init push
This commit is contained in:
583
deps/oblib/unittest/lib/regex/test_regex.cpp
vendored
Normal file
583
deps/oblib/unittest/lib/regex/test_regex.cpp
vendored
Normal file
@ -0,0 +1,583 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include "lib/ob_define.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include "lib/regex/regex/ob_regex.h"
|
||||
#include <assert.h>
|
||||
|
||||
#include "test_regex.ih"
|
||||
|
||||
#include "lib/utility/ob_macro_utils.h"
|
||||
|
||||
using namespace oceanbase::common;
|
||||
|
||||
int debug = 0;
|
||||
int line = 0;
|
||||
int status = 0;
|
||||
|
||||
int copts = OB_REG_EXTENDED;
|
||||
int eopts = 0;
|
||||
ob_regoff_t startoff = 0;
|
||||
ob_regoff_t endoff = 0;
|
||||
|
||||
// extern void regprint();
|
||||
|
||||
/*
|
||||
- split - divide a string into fields, like awk split()
|
||||
= int split(char *string, char *fields[], int nfields, char *sep);
|
||||
*/
|
||||
int /* number of fields, including overflow */
|
||||
split(char* string, char* fields[], /* list is not NULL-terminated */
|
||||
int nfields, /* number of entries available in fields[] */
|
||||
char* sep /* "" white, "c" single char, "ab" [ab]+ */
|
||||
)
|
||||
{
|
||||
register char* p = string;
|
||||
register char c; /* latest character */
|
||||
register char sepc = sep[0];
|
||||
register char sepc2;
|
||||
register int fn;
|
||||
register char** fp = fields;
|
||||
register char* sepp;
|
||||
register int trimtrail;
|
||||
|
||||
/* white space */
|
||||
if (sepc == '\0') {
|
||||
while ((c = *p++) == ' ' || c == '\t')
|
||||
continue;
|
||||
p--;
|
||||
trimtrail = 1;
|
||||
static char static_sep[5] = " \t";
|
||||
sep = static_sep; /* note, code below knows this is 2 long */
|
||||
sepc = ' ';
|
||||
} else
|
||||
trimtrail = 0;
|
||||
sepc2 = sep[1]; /* now we can safely pick this up */
|
||||
|
||||
/* catch empties */
|
||||
if (*p == '\0')
|
||||
return (0);
|
||||
|
||||
/* single separator */
|
||||
if (sepc2 == '\0') {
|
||||
fn = nfields;
|
||||
for (;;) {
|
||||
*fp++ = p;
|
||||
fn--;
|
||||
if (fn == 0)
|
||||
break;
|
||||
while ((c = *p++) != sepc)
|
||||
if (c == '\0')
|
||||
return (nfields - fn);
|
||||
*(p - 1) = '\0';
|
||||
}
|
||||
/* we have overflowed the fields vector -- just count them */
|
||||
fn = nfields;
|
||||
for (;;) {
|
||||
while ((c = *p++) != sepc)
|
||||
if (c == '\0')
|
||||
return (fn);
|
||||
fn++;
|
||||
}
|
||||
/* not reached */
|
||||
}
|
||||
|
||||
/* two separators */
|
||||
if (sep[2] == '\0') {
|
||||
fn = nfields;
|
||||
for (;;) {
|
||||
*fp++ = p;
|
||||
fn--;
|
||||
while ((c = *p++) != sepc && c != sepc2)
|
||||
if (c == '\0') {
|
||||
if (trimtrail && **(fp - 1) == '\0')
|
||||
fn++;
|
||||
return (nfields - fn);
|
||||
}
|
||||
if (fn == 0)
|
||||
break;
|
||||
*(p - 1) = '\0';
|
||||
while ((c = *p++) == sepc || c == sepc2)
|
||||
continue;
|
||||
p--;
|
||||
}
|
||||
/* we have overflowed the fields vector -- just count them */
|
||||
fn = nfields;
|
||||
while (c != '\0') {
|
||||
while ((c = *p++) == sepc || c == sepc2)
|
||||
continue;
|
||||
p--;
|
||||
fn++;
|
||||
while ((c = *p++) != '\0' && c != sepc && c != sepc2)
|
||||
continue;
|
||||
}
|
||||
/* might have to trim trailing white space */
|
||||
if (trimtrail) {
|
||||
p--;
|
||||
while ((c = *--p) == sepc || c == sepc2)
|
||||
continue;
|
||||
p++;
|
||||
if (*p != '\0') {
|
||||
if (fn == nfields + 1)
|
||||
*p = '\0';
|
||||
fn--;
|
||||
}
|
||||
}
|
||||
return (fn);
|
||||
}
|
||||
|
||||
/* n separators */
|
||||
fn = 0;
|
||||
for (;;) {
|
||||
if (fn < nfields)
|
||||
*fp++ = p;
|
||||
fn++;
|
||||
for (;;) {
|
||||
c = *p++;
|
||||
if (c == '\0')
|
||||
return (fn);
|
||||
sepp = sep;
|
||||
while ((sepc = *sepp++) != '\0' && sepc != c)
|
||||
continue;
|
||||
if (sepc != '\0') /* it was a separator */
|
||||
break;
|
||||
}
|
||||
if (fn < nfields)
|
||||
*(p - 1) = '\0';
|
||||
for (;;) {
|
||||
c = *p++;
|
||||
sepp = sep;
|
||||
while ((sepc = *sepp++) != '\0' && sepc != c)
|
||||
continue;
|
||||
if (sepc == '\0') /* it wasn't a separator */
|
||||
break;
|
||||
}
|
||||
p--;
|
||||
}
|
||||
|
||||
/* not reached */
|
||||
}
|
||||
|
||||
/*
|
||||
- regress - main loop of regression test
|
||||
== bool regress(FILE *in);
|
||||
*/
|
||||
bool regress(FILE* in)
|
||||
{
|
||||
char inbuf[1000];
|
||||
#define MAXF 10
|
||||
char* f[MAXF];
|
||||
int nf;
|
||||
int i;
|
||||
char erbuf[100];
|
||||
size_t ne;
|
||||
const char* badpat = "invalid regular expression";
|
||||
#define SHORT 10
|
||||
const char* bpname = "OB_REG_BADPAT";
|
||||
ob_regex_t re;
|
||||
char sep[5] = "\t\t";
|
||||
|
||||
while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
|
||||
line++;
|
||||
if (inbuf[0] == '#' || inbuf[0] == '\n')
|
||||
continue; /* NOTE CONTINUE */
|
||||
inbuf[strlen(inbuf) - 1] = '\0'; /* get rid of stupid \n */
|
||||
if (debug)
|
||||
fprintf(stdout, "%d:\n", line);
|
||||
nf = split(inbuf, f, MAXF, sep);
|
||||
if (nf < 3) {
|
||||
fprintf(stderr, "bad input, line %d\n", line);
|
||||
return false;
|
||||
}
|
||||
for (i = 0; i < nf; i++)
|
||||
if (strcmp(f[i], "\"\"") == 0)
|
||||
f[i][0] = '\0';
|
||||
// f[i] = "";
|
||||
if (nf <= 3)
|
||||
f[3] = NULL;
|
||||
if (nf <= 4)
|
||||
f[4] = NULL;
|
||||
try_case(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
|
||||
if (opt('&', f[1])) /* try with either type of RE */
|
||||
try_case(f[0], f[1], f[2], f[3], f[4], options('c', f[1]) & ~OB_REG_EXTENDED);
|
||||
}
|
||||
|
||||
ne = ob_regerror(OB_REG_BADPAT, (ob_regex_t*)NULL, erbuf, sizeof(erbuf));
|
||||
if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat) + 1) {
|
||||
fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n", erbuf, badpat);
|
||||
status = 1;
|
||||
}
|
||||
ne = ob_regerror(OB_REG_BADPAT, (ob_regex_t*)NULL, erbuf, (size_t)SHORT);
|
||||
if (strncmp(erbuf, badpat, SHORT - 1) != 0 || erbuf[SHORT - 1] != '\0' || ne != strlen(badpat) + 1) {
|
||||
fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n", erbuf, SHORT - 1, badpat);
|
||||
status = 1;
|
||||
}
|
||||
ne = ob_regerror(OB_REG_ITOA | OB_REG_BADPAT, (ob_regex_t*)NULL, erbuf, sizeof(erbuf));
|
||||
if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname) + 1) {
|
||||
fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n", erbuf, bpname);
|
||||
status = 1;
|
||||
}
|
||||
re.re_endp = bpname;
|
||||
ne = ob_regerror(OB_REG_ATOI, &re, erbuf, sizeof(erbuf));
|
||||
if (atoi(erbuf) != (int)OB_REG_BADPAT) {
|
||||
fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n", erbuf, (long)OB_REG_BADPAT);
|
||||
status = 1;
|
||||
} else if (ne != strlen(erbuf) + 1) {
|
||||
fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n", erbuf, (long)OB_REG_BADPAT);
|
||||
status = 1;
|
||||
}
|
||||
return 0 == status;
|
||||
}
|
||||
|
||||
/*
|
||||
- try_case - try it, and report on problems
|
||||
== void try_case(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
|
||||
*/
|
||||
void try_case(char* f0, char* f1, char* f2, char* f3, char* f4, int opts /* may not match f1 */
|
||||
)
|
||||
{
|
||||
ob_regex_t re;
|
||||
#define NSUBS 10
|
||||
ob_regmatch_t subs[NSUBS];
|
||||
#define NSHOULD 15
|
||||
char* should[NSHOULD];
|
||||
int nshould;
|
||||
char erbuf[100];
|
||||
int err;
|
||||
int len;
|
||||
const char* type = (opts & OB_REG_EXTENDED) ? "ERE" : "BRE";
|
||||
register int i;
|
||||
char* grump;
|
||||
char f0copy[1000];
|
||||
char f2copy[1000];
|
||||
char sep[5] = ",";
|
||||
|
||||
strcpy(f0copy, f0);
|
||||
re.re_endp = (opts & OB_REG_PEND) ? f0copy + strlen(f0copy) : NULL;
|
||||
fixstr(f0copy);
|
||||
err = ob_regcomp(&re, f0copy, opts, &ob_charset_utf8mb4_general_ci);
|
||||
if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
|
||||
/* unexpected error or wrong error */
|
||||
len = (int)ob_regerror(err, &re, erbuf, sizeof(erbuf));
|
||||
fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n", line, type, eprint(err), len, (int)sizeof(erbuf), erbuf);
|
||||
status = 1;
|
||||
} else if (err == 0 && opt('C', f1)) {
|
||||
/* unexpected success */
|
||||
fprintf(stderr, "%d: %s should have given OB_REG_%s\n", line, type, f2);
|
||||
status = 1;
|
||||
err = 1; /* so we won't try regexec */
|
||||
}
|
||||
|
||||
if (err != 0) {
|
||||
ob_regfree(&re);
|
||||
return;
|
||||
}
|
||||
|
||||
strcpy(f2copy, f2);
|
||||
fixstr(f2copy);
|
||||
|
||||
if (options('e', f1) & OB_REG_STARTEND) {
|
||||
if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
|
||||
fprintf(stderr, "%d: bad STARTEND syntax\n", line);
|
||||
subs[0].rm_so = strchr(f2, '(') - f2 + 1;
|
||||
subs[0].rm_eo = strchr(f2, ')') - f2;
|
||||
}
|
||||
err = ob_regexec(&re, f2copy, NSUBS, subs, options('e', f1));
|
||||
|
||||
if (err != 0 && (f3 != NULL || err != OB_REG_NOMATCH)) {
|
||||
/* unexpected error or wrong error */
|
||||
len = (int)ob_regerror(err, &re, erbuf, sizeof(erbuf));
|
||||
fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n", line, type, eprint(err), len, (int)sizeof(erbuf), erbuf);
|
||||
status = 1;
|
||||
} else if (err != 0) {
|
||||
/* nothing more to check */
|
||||
} else if (f3 == NULL) {
|
||||
/* unexpected success */
|
||||
fprintf(stderr, "%d: %s exec should have failed\n", line, type);
|
||||
status = 1;
|
||||
err = 1; /* just on principle */
|
||||
} else if (opts & OB_REG_NOSUB) {
|
||||
/* nothing more to check */
|
||||
} else if ((grump = check(f2, subs[0], f3)) != NULL) {
|
||||
fprintf(stderr, "%d: %s %s\n", line, type, grump);
|
||||
status = 1;
|
||||
err = 1;
|
||||
}
|
||||
|
||||
if (err != 0 || f4 == NULL) {
|
||||
ob_regfree(&re);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 1; i < NSHOULD; i++)
|
||||
should[i] = NULL;
|
||||
nshould = split(f4, should + 1, NSHOULD - 1, sep);
|
||||
if (nshould == 0) {
|
||||
nshould = 1;
|
||||
should[1][0] = '\0';
|
||||
// should[1] = "";
|
||||
}
|
||||
for (i = 1; i < NSUBS; i++) {
|
||||
grump = check(f2, subs[i], should[i]);
|
||||
if (grump != NULL) {
|
||||
fprintf(stderr, "%d: %s $%d %s\n", line, type, i, grump);
|
||||
status = 1;
|
||||
err = 1;
|
||||
}
|
||||
}
|
||||
|
||||
ob_regfree(&re);
|
||||
}
|
||||
|
||||
/*
|
||||
- options - pick options out of a regression-test string
|
||||
== int options(int type, char *s);
|
||||
*/
|
||||
int options(int type, /* 'c' compile, 'e' exec */
|
||||
char* s)
|
||||
{
|
||||
register char* p;
|
||||
register int o = (type == 'c') ? copts : eopts;
|
||||
register const char* legal = (type == 'c') ? "bisnmp" : "^$#tl";
|
||||
|
||||
for (p = s; *p != '\0'; p++)
|
||||
if (strchr(legal, *p) != NULL)
|
||||
switch (*p) {
|
||||
case 'b':
|
||||
o &= ~OB_REG_EXTENDED;
|
||||
break;
|
||||
case 'i':
|
||||
o |= OB_REG_ICASE;
|
||||
break;
|
||||
case 's':
|
||||
o |= OB_REG_NOSUB;
|
||||
break;
|
||||
case 'n':
|
||||
o |= OB_REG_NEWLINE;
|
||||
break;
|
||||
case 'm':
|
||||
o &= ~OB_REG_EXTENDED;
|
||||
o |= OB_REG_NOSPEC;
|
||||
break;
|
||||
case 'p':
|
||||
o |= OB_REG_PEND;
|
||||
break;
|
||||
case '^':
|
||||
o |= OB_REG_NOTBOL;
|
||||
break;
|
||||
case '$':
|
||||
o |= OB_REG_NOTEOL;
|
||||
break;
|
||||
case '#':
|
||||
o |= OB_REG_STARTEND;
|
||||
break;
|
||||
case 't': /* trace */
|
||||
o |= OB_REG_TRACE;
|
||||
break;
|
||||
case 'l': /* force long representation */
|
||||
o |= OB_REG_LARGE;
|
||||
break;
|
||||
case 'r': /* force backref use */
|
||||
o |= OB_REG_BACKR;
|
||||
break;
|
||||
}
|
||||
return (o);
|
||||
}
|
||||
|
||||
/*
|
||||
- opt - is a particular option in a regression string?
|
||||
== int opt(int c, char *s);
|
||||
*/
|
||||
int /* predicate */
|
||||
opt(int c, char* s)
|
||||
{
|
||||
return (strchr(s, c) != NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
- fixstr - transform magic characters in strings
|
||||
== void fixstr(register char *p);
|
||||
*/
|
||||
void fixstr(register char* p)
|
||||
{
|
||||
if (p == NULL)
|
||||
return;
|
||||
|
||||
for (; *p != '\0'; p++)
|
||||
if (*p == 'N')
|
||||
*p = '\n';
|
||||
else if (*p == 'T')
|
||||
*p = '\t';
|
||||
else if (*p == 'S')
|
||||
*p = ' ';
|
||||
else if (*p == 'Z')
|
||||
*p = '\0';
|
||||
}
|
||||
|
||||
/*
|
||||
- check - check a substring match
|
||||
== char *check(char *str, ob_regmatch_t sub, char *should);
|
||||
*/
|
||||
char* /* NULL or complaint */
|
||||
check(char* str, ob_regmatch_t sub, char* should)
|
||||
{
|
||||
register int len;
|
||||
register int shlen;
|
||||
register char* p;
|
||||
static char grump[500];
|
||||
register char* at = NULL;
|
||||
|
||||
if (should != NULL && strcmp(should, "-") == 0)
|
||||
should = NULL;
|
||||
if (should != NULL && should[0] == '@') {
|
||||
at = should + 1;
|
||||
should[0] = '\0';
|
||||
}
|
||||
|
||||
/* check rm_so and rm_eo for consistency */
|
||||
if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) || (sub.rm_so != -1 && sub.rm_eo == -1) ||
|
||||
(sub.rm_so != -1 && sub.rm_so < 0) || (sub.rm_eo != -1 && sub.rm_eo < 0)) {
|
||||
sprintf(grump, "start %ld end %ld", (long)sub.rm_so, (long)sub.rm_eo);
|
||||
return (grump);
|
||||
}
|
||||
|
||||
/* check for no match */
|
||||
if (sub.rm_so == -1 && should == NULL)
|
||||
return (NULL);
|
||||
if (sub.rm_so == -1) {
|
||||
static char ret_err_buf[50] = "did not match";
|
||||
return (ret_err_buf);
|
||||
}
|
||||
|
||||
/* check for in range */
|
||||
if (sub.rm_eo > strlen(str)) {
|
||||
sprintf(grump, "start %ld end %ld, past end of string", (long)sub.rm_so, (long)sub.rm_eo);
|
||||
return (grump);
|
||||
}
|
||||
|
||||
len = (int)(sub.rm_eo - sub.rm_so);
|
||||
shlen = (int)strlen(should);
|
||||
p = str + sub.rm_so;
|
||||
|
||||
/* check for not supposed to match */
|
||||
if (should == NULL) {
|
||||
sprintf(grump, "matched `%.*s'", len, p);
|
||||
return (grump);
|
||||
}
|
||||
|
||||
/* check for wrong match */
|
||||
if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
|
||||
sprintf(grump, "matched `%.*s' instead", len, p);
|
||||
return (grump);
|
||||
}
|
||||
if (shlen > 0)
|
||||
return (NULL);
|
||||
|
||||
/* check null match in right place */
|
||||
if (at == NULL)
|
||||
return (NULL);
|
||||
shlen = (int)strlen(at);
|
||||
if (shlen == 0)
|
||||
shlen = 1; /* force check for end-of-string */
|
||||
if (strncmp(p, at, shlen) != 0) {
|
||||
sprintf(grump, "matched null at `%.20s'", p);
|
||||
return (grump);
|
||||
}
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
- eprint - convert error number to name
|
||||
== static char *eprint(int err);
|
||||
*/
|
||||
static char* eprint(int err)
|
||||
{
|
||||
static char epbuf[100];
|
||||
size_t len;
|
||||
|
||||
len = ob_regerror(OB_REG_ITOA | err, (ob_regex_t*)NULL, epbuf, sizeof(epbuf));
|
||||
assert(len <= sizeof(epbuf));
|
||||
UNUSED(len);
|
||||
return (epbuf);
|
||||
}
|
||||
|
||||
/*
|
||||
- efind - convert error name to number
|
||||
== static int efind(char *name);
|
||||
*/
|
||||
static int efind(char* name)
|
||||
{
|
||||
static char efbuf[100];
|
||||
// size_t n;
|
||||
ob_regex_t re;
|
||||
|
||||
sprintf(efbuf, "OB_REG_%s", name);
|
||||
assert(strlen(efbuf) < sizeof(efbuf));
|
||||
re.re_endp = efbuf;
|
||||
(void)ob_regerror(OB_REG_ATOI, &re, efbuf, sizeof(efbuf));
|
||||
return (atoi(efbuf));
|
||||
}
|
||||
|
||||
class ObRegexTest : public ::testing::Test {
|
||||
public:
|
||||
ObRegexTest();
|
||||
virtual ~ObRegexTest();
|
||||
virtual void SetUp();
|
||||
virtual void TearDown();
|
||||
|
||||
private:
|
||||
// disallow copy
|
||||
ObRegexTest(const ObRegexTest& other);
|
||||
ObRegexTest& operator=(const ObRegexTest& other);
|
||||
|
||||
private:
|
||||
// data members
|
||||
};
|
||||
ObRegexTest::ObRegexTest()
|
||||
{}
|
||||
|
||||
ObRegexTest::~ObRegexTest()
|
||||
{}
|
||||
|
||||
void ObRegexTest::SetUp()
|
||||
{}
|
||||
|
||||
void ObRegexTest::TearDown()
|
||||
{}
|
||||
|
||||
TEST_F(ObRegexTest, basic_test)
|
||||
{
|
||||
/*
|
||||
- main - do the simple case, hand off to regress() for regression
|
||||
*/
|
||||
char test_filename[] = "regex/tests";
|
||||
FILE* test_file = fopen(test_filename, "r");
|
||||
if (NULL == test_file) {
|
||||
fprintf(stderr, "fail to open file '%s'\n", test_filename);
|
||||
} else {
|
||||
ASSERT_TRUE(regress(test_file));
|
||||
fclose(test_file);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
OB_LOGGER.set_log_level("INFO");
|
||||
// ob_init_memory_pool();
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
Reference in New Issue
Block a user