[parser] *: Support full-text search (#592)

add MATCH(col1,col2,...) AGAINST(expr [search_modifier]) syntax support
This commit is contained in:
Xiaoguang Sun
2019-11-01 15:03:47 +08:00
committed by Ti Chi Robot
parent 9b11c3d2ad
commit cc817e1324
8 changed files with 8609 additions and 8328 deletions

View File

@ -2608,3 +2608,24 @@ func (n *SplitOption) Restore(ctx *RestoreCtx) error {
}
return nil
}
type FulltextSearchModifier int
const (
FulltextSearchModifierNaturalLanguageMode = 0
FulltextSearchModifierBooleanMode = 1
FulltextSearchModifierModeMask = 0xF
FulltextSearchModifierWithQueryExpansion = 1 << 4
)
func (m FulltextSearchModifier) IsBooleanMode() bool {
return m&FulltextSearchModifierModeMask == FulltextSearchModifierBooleanMode
}
func (m FulltextSearchModifier) IsNaturalLanguageMode() bool {
return m&FulltextSearchModifierModeMask == FulltextSearchModifierNaturalLanguageMode
}
func (m FulltextSearchModifier) WithQueryExpansion() bool {
return m&FulltextSearchModifierWithQueryExpansion == FulltextSearchModifierWithQueryExpansion
}

View File

@ -396,3 +396,9 @@ func (ts *testDMLSuite) TestWindowSpecRestore(c *C) {
}
RunNodeRestoreTest(c, testCases, "select rank() over %s from t window w as (order by a)", extractNodeFunc)
}
func (ts *testDMLSuite) TestFulltextSearchModifier(c *C) {
c.Assert(FulltextSearchModifier(FulltextSearchModifierNaturalLanguageMode).IsBooleanMode(), IsFalse)
c.Assert(FulltextSearchModifier(FulltextSearchModifierNaturalLanguageMode).IsNaturalLanguageMode(), IsTrue)
c.Assert(FulltextSearchModifier(FulltextSearchModifierNaturalLanguageMode).WithQueryExpansion(), IsFalse)
}

View File

@ -45,6 +45,7 @@ var (
_ ExprNode = &UnaryOperationExpr{}
_ ExprNode = &ValuesExpr{}
_ ExprNode = &VariableExpr{}
_ ExprNode = &MatchAgainst{}
_ Node = &ColumnName{}
_ Node = &WhenClause{}
@ -1273,3 +1274,82 @@ func (n *MaxValueExpr) Accept(v Visitor) (Node, bool) {
}
return v.Leave(n)
}
// MatchAgainst is the expression for matching against fulltext index.
type MatchAgainst struct {
exprNode
// ColumnNames are the columns to match.
ColumnNames []*ColumnName
// Against
Against ExprNode
// Modifier
Modifier FulltextSearchModifier
}
func (n *MatchAgainst) Restore(ctx *RestoreCtx) error {
ctx.WriteKeyWord("MATCH")
ctx.WritePlain(" (")
for i, v := range n.ColumnNames {
if i != 0 {
ctx.WritePlain(",")
}
if err := v.Restore(ctx); err != nil {
return errors.Annotatef(err, "An error occurred while restore MatchAgainst.ColumnNames[%d]", i)
}
}
ctx.WritePlain(") ")
ctx.WriteKeyWord("AGAINST")
ctx.WritePlain(" (")
if err := n.Against.Restore(ctx); err != nil {
return errors.Annotate(err, "An error occurred while restore MatchAgainst.Against")
}
if n.Modifier.IsBooleanMode() {
ctx.WritePlain(" IN BOOLEAN MODE")
if n.Modifier.WithQueryExpansion() {
return errors.New("BOOLEAN MODE doesn't support QUERY EXPANSION")
}
} else if n.Modifier.WithQueryExpansion() {
ctx.WritePlain(" WITH QUERY EXPANSION")
}
ctx.WritePlain(")")
return nil
}
func (n *MatchAgainst) Format(w io.Writer) {
fmt.Fprint(w, "MATCH(")
for i, v := range n.ColumnNames {
if i != 0 {
fmt.Fprintf(w, ",%s", v.String())
} else {
fmt.Fprint(w, v.String())
}
}
fmt.Fprint(w, ") AGAINST(")
n.Against.Format(w)
if n.Modifier.IsBooleanMode() {
fmt.Fprint(w, " IN BOOLEAN MODE")
} else if n.Modifier.WithQueryExpansion() {
fmt.Fprint(w, " WITH QUERY EXPANSION")
}
fmt.Fprint(w, ")")
}
func (n *MatchAgainst) Accept(v Visitor) (Node, bool) {
newNode, skipChildren := v.Enter(n)
if skipChildren {
return v.Leave(newNode)
}
for i, colName := range n.ColumnNames {
newColName, ok := colName.Accept(v)
if !ok {
return n, false
}
n.ColumnNames[i] = newColName.(*ColumnName)
}
newAgainst, ok := n.Against.Accept(v)
if !ok {
return n, false
}
n.Against = newAgainst.(ExprNode)
return v.Leave(n)
}

View File

@ -379,3 +379,19 @@ func (tc *testExpressionsSuite) TestVariableExpr(c *C) {
}
RunNodeRestoreTest(c, testCases, "select %s", extractNodeFunc)
}
func (tc *testExpressionsSuite) TestMatchAgainstExpr(c *C) {
testCases := []NodeRestoreTestCase{
{`MATCH(content, title) AGAINST ('search for')`, "MATCH (`content`,`title`) AGAINST ('search for')"},
{`MATCH(content) AGAINST ('search for' IN BOOLEAN MODE)`, "MATCH (`content`) AGAINST ('search for' IN BOOLEAN MODE)"},
{`MATCH(content, title) AGAINST ('search for' WITH QUERY EXPANSION)`, "MATCH (`content`,`title`) AGAINST ('search for' WITH QUERY EXPANSION)"},
{`MATCH(content) AGAINST ('search for' IN NATURAL LANGUAGE MODE WITH QUERY EXPANSION)`, "MATCH (`content`) AGAINST ('search for' WITH QUERY EXPANSION)"},
{`MATCH(content) AGAINST ('search') AND id = 1`, "MATCH (`content`) AGAINST ('search') AND `id`=1"},
{`MATCH(content) AGAINST ('search') OR id = 1`, "MATCH (`content`) AGAINST ('search') OR `id`=1"},
{`MATCH(content) AGAINST (X'40404040' | X'01020304') OR id = 1`, "MATCH (`content`) AGAINST (x'40404040'|x'01020304') OR `id`=1"},
}
extractNodeFunc := func(node Node) Node {
return node.(*SelectStmt).Where
}
RunNodeRestoreTest(c, testCases, "SELECT * FROM t WHERE %s", extractNodeFunc)
}

View File

@ -140,6 +140,7 @@ var tokenMap = map[string]int{
"ADDDATE": addDate,
"ADMIN": admin,
"AFTER": after,
"AGAINST": against,
"AGG_TO_COP": hintAggToCop,
"ALL": all,
"ALGORITHM": algorithm,
@ -274,6 +275,7 @@ var tokenMap = map[string]int{
"EXCHANGE": exchange,
"EXECUTE": execute,
"EXISTS": exists,
"EXPANSION": expansion,
"EXPIRE": expire,
"EXPLAIN": explain,
"EXTRACT": extract,
@ -353,6 +355,7 @@ var tokenMap = map[string]int{
"KEYS": keys,
"KILL": kill,
"LABELS": labels,
"LANGUAGE": language,
"LAST": last,
"LEADING": leading,
"LEFT": left,

File diff suppressed because it is too large Load Diff

View File

@ -160,6 +160,7 @@ import (
keys "KEYS"
kill "KILL"
lag "LAG"
language "LANGUAGE"
lastValue "LAST_VALUE"
lead "LEAD"
leading "LEADING"
@ -280,6 +281,7 @@ import (
account "ACCOUNT"
action "ACTION"
after "AFTER"
against "AGAINST"
always "ALWAYS"
algorithm "ALGORITHM"
any "ANY"
@ -342,6 +344,7 @@ import (
exchange "EXCHANGE"
exclusive "EXCLUSIVE"
execute "EXECUTE"
expansion "EXPANSION"
expire "EXPIRE"
faultsSym "FAULTS"
fields "FIELDS"
@ -834,6 +837,7 @@ import (
FieldList "field expression list"
FieldTerminator "Field terminator"
FlushOption "Flush option"
FulltextSearchModifierOpt "Fulltext modifier"
PluginNameList "Plugin Name List"
TableRefsClause "Table references clause"
FieldItem "Field item for load data clause"
@ -3897,6 +3901,14 @@ Expression:
$$ = &ast.UnaryOperationExpr{Op: opcode.Not, V: $2}
}
}
| "MATCH" '(' ColumnNameList ')' "AGAINST" '(' BitExpr FulltextSearchModifierOpt ')'
{
$$ = &ast.MatchAgainst {
ColumnNames: $3.([]*ast.ColumnName),
Against: $7,
Modifier: ast.FulltextSearchModifier($8.(int)),
}
}
| BoolPri IsOrNotOp trueKwd %prec is
{
$$ = &ast.IsTruthExpr{Expr:$1, Not: !$2.(bool), True: int64(1)}
@ -3922,6 +3934,27 @@ MaxValueOrExpression:
$$ = $1
}
FulltextSearchModifierOpt:
/* empty */
{
$$ = ast.FulltextSearchModifierNaturalLanguageMode
}
| "IN" "NATURAL" "LANGUAGE" "MODE"
{
$$ = ast.FulltextSearchModifierNaturalLanguageMode
}
| "IN" "NATURAL" "LANGUAGE" "MODE" "WITH" "QUERY" "EXPANSION"
{
$$ = ast.FulltextSearchModifierNaturalLanguageMode | ast.FulltextSearchModifierWithQueryExpansion
}
| "IN" "BOOLEAN" "MODE"
{
$$ = ast.FulltextSearchModifierBooleanMode
}
| "WITH" "QUERY" "EXPANSION"
{
$$ = ast.FulltextSearchModifierWithQueryExpansion
}
logOr:
pipesAsOr
@ -4449,7 +4482,7 @@ UnReservedKeyword:
| "WITHOUT" | "RTREE" | "EXCHANGE" | "COLUMN_FORMAT" | "REPAIR" | "IMPORT" | "DISCARD" | "TABLE_CHECKSUM" | "UNICODE"
| "SQL_TSI_DAY" | "SQL_TSI_HOUR" | "SQL_TSI_MINUTE" | "SQL_TSI_MONTH" | "SQL_TSI_QUARTER" | "SQL_TSI_SECOND" |
"SQL_TSI_WEEK" | "SQL_TSI_YEAR" | "INVISIBLE" | "VISIBLE" | "TYPE" | "NOWAIT" | "REPLICA" | "LOCATION" | "LABELS"
| "LOGS" | "HOSTS"
| "LOGS" | "HOSTS" | "AGAINST" | "EXPANSION"
TiDBKeyword:
"ADMIN" | "AGG_TO_COP" |"BUCKETS" | "BUILTINS" | "CANCEL" | "CMSKETCH" | "DDL" | "DEPTH" | "DRAINER" | "JOBS" | "JOB" | "NODE_ID" | "NODE_STATE" | "PUMP" | "SAMPLES" | "STATS" | "STATS_META" | "STATS_HISTOGRAMS" | "STATS_BUCKETS" | "STATS_HEALTHY" | "TIDB"

View File

@ -14,6 +14,7 @@
package parser_test
import (
"bytes"
"fmt"
"runtime"
"strings"
@ -70,6 +71,7 @@ func (s *testParserSuite) TestSimple(c *C) {
"delayed", "high_priority", "low_priority",
"cumeDist", "denseRank", "firstValue", "lag", "lastValue", "lead", "nthValue", "ntile",
"over", "percentRank", "rank", "row", "rows", "rowNumber", "window", "linear",
"match", "language",
// TODO: support the following keywords
// "with",
}
@ -104,7 +106,7 @@ func (s *testParserSuite) TestSimple(c *C) {
"ln", "log", "log2", "log10", "timestampdiff", "pi", "quote", "none", "super", "shared", "exclusive",
"always", "stats", "stats_meta", "stats_histogram", "stats_buckets", "stats_healthy", "tidb_version", "replication", "slave", "client",
"max_connections_per_hour", "max_queries_per_hour", "max_updates_per_hour", "max_user_connections", "event", "reload", "routine", "temporary",
"following", "preceding", "unbounded", "respect", "nulls", "current", "last",
"following", "preceding", "unbounded", "respect", "nulls", "current", "last", "against", "expansion",
}
for _, kw := range unreservedKws {
src := fmt.Sprintf("SELECT %s FROM tbl;", kw)
@ -4538,6 +4540,51 @@ func (s *testParserSuite) TestCharset(c *C) {
c.Assert(st.(*ast.AlterDatabaseStmt), NotNil)
}
func (s *testParserSuite) TestFulltextSearch(c *C) {
parser := parser.New()
st, err := parser.ParseOneStmt("SELECT * FROM fulltext_test WHERE MATCH(content) AGAINST('search')", "", "")
c.Assert(err, IsNil)
c.Assert(st.(*ast.SelectStmt), NotNil)
st, err = parser.ParseOneStmt("SELECT * FROM fulltext_test WHERE MATCH() AGAINST('search')", "", "")
c.Assert(err, NotNil)
c.Assert(st, IsNil)
st, err = parser.ParseOneStmt("SELECT * FROM fulltext_test WHERE MATCH(content) AGAINST()", "", "")
c.Assert(err, NotNil)
c.Assert(st, IsNil)
st, err = parser.ParseOneStmt("SELECT * FROM fulltext_test WHERE MATCH(content) AGAINST('search' IN)", "", "")
c.Assert(err, NotNil)
c.Assert(st, IsNil)
st, err = parser.ParseOneStmt("SELECT * FROM fulltext_test WHERE MATCH(content) AGAINST('search' IN BOOLEAN MODE WITH QUERY EXPANSION)", "", "")
c.Assert(err, NotNil)
c.Assert(st, IsNil)
st, err = parser.ParseOneStmt("SELECT * FROM fulltext_test WHERE MATCH(title,content) AGAINST('search' IN NATURAL LANGUAGE MODE)", "", "")
c.Assert(err, IsNil)
c.Assert(st.(*ast.SelectStmt), NotNil)
writer := bytes.NewBufferString("")
st.(*ast.SelectStmt).Where.Format(writer)
c.Assert(writer.String(), Equals, "MATCH(title,content) AGAINST(\"search\")")
st, err = parser.ParseOneStmt("SELECT * FROM fulltext_test WHERE MATCH(title,content) AGAINST('search' IN BOOLEAN MODE)", "", "")
c.Assert(err, IsNil)
c.Assert(st.(*ast.SelectStmt), NotNil)
writer.Reset()
st.(*ast.SelectStmt).Where.Format(writer)
c.Assert(writer.String(), Equals, "MATCH(title,content) AGAINST(\"search\" IN BOOLEAN MODE)")
st, err = parser.ParseOneStmt("SELECT * FROM fulltext_test WHERE MATCH(title,content) AGAINST('search' WITH QUERY EXPANSION)", "", "")
c.Assert(err, IsNil)
c.Assert(st.(*ast.SelectStmt), NotNil)
writer.Reset()
st.(*ast.SelectStmt).Where.Format(writer)
c.Assert(writer.String(), Equals, "MATCH(title,content) AGAINST(\"search\" WITH QUERY EXPANSION)")
}
// CleanNodeText set the text of node and all child node empty.
// For test only.
func CleanNodeText(node ast.Node) {