[parser] *: Support full-text search (#592)
add MATCH(col1,col2,...) AGAINST(expr [search_modifier]) syntax support
This commit is contained in:
committed by
Ti Chi Robot
parent
9b11c3d2ad
commit
cc817e1324
@ -2608,3 +2608,24 @@ func (n *SplitOption) Restore(ctx *RestoreCtx) error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type FulltextSearchModifier int
|
||||
|
||||
const (
|
||||
FulltextSearchModifierNaturalLanguageMode = 0
|
||||
FulltextSearchModifierBooleanMode = 1
|
||||
FulltextSearchModifierModeMask = 0xF
|
||||
FulltextSearchModifierWithQueryExpansion = 1 << 4
|
||||
)
|
||||
|
||||
func (m FulltextSearchModifier) IsBooleanMode() bool {
|
||||
return m&FulltextSearchModifierModeMask == FulltextSearchModifierBooleanMode
|
||||
}
|
||||
|
||||
func (m FulltextSearchModifier) IsNaturalLanguageMode() bool {
|
||||
return m&FulltextSearchModifierModeMask == FulltextSearchModifierNaturalLanguageMode
|
||||
}
|
||||
|
||||
func (m FulltextSearchModifier) WithQueryExpansion() bool {
|
||||
return m&FulltextSearchModifierWithQueryExpansion == FulltextSearchModifierWithQueryExpansion
|
||||
}
|
||||
|
||||
@ -396,3 +396,9 @@ func (ts *testDMLSuite) TestWindowSpecRestore(c *C) {
|
||||
}
|
||||
RunNodeRestoreTest(c, testCases, "select rank() over %s from t window w as (order by a)", extractNodeFunc)
|
||||
}
|
||||
|
||||
func (ts *testDMLSuite) TestFulltextSearchModifier(c *C) {
|
||||
c.Assert(FulltextSearchModifier(FulltextSearchModifierNaturalLanguageMode).IsBooleanMode(), IsFalse)
|
||||
c.Assert(FulltextSearchModifier(FulltextSearchModifierNaturalLanguageMode).IsNaturalLanguageMode(), IsTrue)
|
||||
c.Assert(FulltextSearchModifier(FulltextSearchModifierNaturalLanguageMode).WithQueryExpansion(), IsFalse)
|
||||
}
|
||||
|
||||
@ -45,6 +45,7 @@ var (
|
||||
_ ExprNode = &UnaryOperationExpr{}
|
||||
_ ExprNode = &ValuesExpr{}
|
||||
_ ExprNode = &VariableExpr{}
|
||||
_ ExprNode = &MatchAgainst{}
|
||||
|
||||
_ Node = &ColumnName{}
|
||||
_ Node = &WhenClause{}
|
||||
@ -1273,3 +1274,82 @@ func (n *MaxValueExpr) Accept(v Visitor) (Node, bool) {
|
||||
}
|
||||
return v.Leave(n)
|
||||
}
|
||||
|
||||
// MatchAgainst is the expression for matching against fulltext index.
|
||||
type MatchAgainst struct {
|
||||
exprNode
|
||||
// ColumnNames are the columns to match.
|
||||
ColumnNames []*ColumnName
|
||||
// Against
|
||||
Against ExprNode
|
||||
// Modifier
|
||||
Modifier FulltextSearchModifier
|
||||
}
|
||||
|
||||
func (n *MatchAgainst) Restore(ctx *RestoreCtx) error {
|
||||
ctx.WriteKeyWord("MATCH")
|
||||
ctx.WritePlain(" (")
|
||||
for i, v := range n.ColumnNames {
|
||||
if i != 0 {
|
||||
ctx.WritePlain(",")
|
||||
}
|
||||
if err := v.Restore(ctx); err != nil {
|
||||
return errors.Annotatef(err, "An error occurred while restore MatchAgainst.ColumnNames[%d]", i)
|
||||
}
|
||||
}
|
||||
ctx.WritePlain(") ")
|
||||
ctx.WriteKeyWord("AGAINST")
|
||||
ctx.WritePlain(" (")
|
||||
if err := n.Against.Restore(ctx); err != nil {
|
||||
return errors.Annotate(err, "An error occurred while restore MatchAgainst.Against")
|
||||
}
|
||||
if n.Modifier.IsBooleanMode() {
|
||||
ctx.WritePlain(" IN BOOLEAN MODE")
|
||||
if n.Modifier.WithQueryExpansion() {
|
||||
return errors.New("BOOLEAN MODE doesn't support QUERY EXPANSION")
|
||||
}
|
||||
} else if n.Modifier.WithQueryExpansion() {
|
||||
ctx.WritePlain(" WITH QUERY EXPANSION")
|
||||
}
|
||||
ctx.WritePlain(")")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *MatchAgainst) Format(w io.Writer) {
|
||||
fmt.Fprint(w, "MATCH(")
|
||||
for i, v := range n.ColumnNames {
|
||||
if i != 0 {
|
||||
fmt.Fprintf(w, ",%s", v.String())
|
||||
} else {
|
||||
fmt.Fprint(w, v.String())
|
||||
}
|
||||
}
|
||||
fmt.Fprint(w, ") AGAINST(")
|
||||
n.Against.Format(w)
|
||||
if n.Modifier.IsBooleanMode() {
|
||||
fmt.Fprint(w, " IN BOOLEAN MODE")
|
||||
} else if n.Modifier.WithQueryExpansion() {
|
||||
fmt.Fprint(w, " WITH QUERY EXPANSION")
|
||||
}
|
||||
fmt.Fprint(w, ")")
|
||||
}
|
||||
|
||||
func (n *MatchAgainst) Accept(v Visitor) (Node, bool) {
|
||||
newNode, skipChildren := v.Enter(n)
|
||||
if skipChildren {
|
||||
return v.Leave(newNode)
|
||||
}
|
||||
for i, colName := range n.ColumnNames {
|
||||
newColName, ok := colName.Accept(v)
|
||||
if !ok {
|
||||
return n, false
|
||||
}
|
||||
n.ColumnNames[i] = newColName.(*ColumnName)
|
||||
}
|
||||
newAgainst, ok := n.Against.Accept(v)
|
||||
if !ok {
|
||||
return n, false
|
||||
}
|
||||
n.Against = newAgainst.(ExprNode)
|
||||
return v.Leave(n)
|
||||
}
|
||||
|
||||
@ -379,3 +379,19 @@ func (tc *testExpressionsSuite) TestVariableExpr(c *C) {
|
||||
}
|
||||
RunNodeRestoreTest(c, testCases, "select %s", extractNodeFunc)
|
||||
}
|
||||
|
||||
func (tc *testExpressionsSuite) TestMatchAgainstExpr(c *C) {
|
||||
testCases := []NodeRestoreTestCase{
|
||||
{`MATCH(content, title) AGAINST ('search for')`, "MATCH (`content`,`title`) AGAINST ('search for')"},
|
||||
{`MATCH(content) AGAINST ('search for' IN BOOLEAN MODE)`, "MATCH (`content`) AGAINST ('search for' IN BOOLEAN MODE)"},
|
||||
{`MATCH(content, title) AGAINST ('search for' WITH QUERY EXPANSION)`, "MATCH (`content`,`title`) AGAINST ('search for' WITH QUERY EXPANSION)"},
|
||||
{`MATCH(content) AGAINST ('search for' IN NATURAL LANGUAGE MODE WITH QUERY EXPANSION)`, "MATCH (`content`) AGAINST ('search for' WITH QUERY EXPANSION)"},
|
||||
{`MATCH(content) AGAINST ('search') AND id = 1`, "MATCH (`content`) AGAINST ('search') AND `id`=1"},
|
||||
{`MATCH(content) AGAINST ('search') OR id = 1`, "MATCH (`content`) AGAINST ('search') OR `id`=1"},
|
||||
{`MATCH(content) AGAINST (X'40404040' | X'01020304') OR id = 1`, "MATCH (`content`) AGAINST (x'40404040'|x'01020304') OR `id`=1"},
|
||||
}
|
||||
extractNodeFunc := func(node Node) Node {
|
||||
return node.(*SelectStmt).Where
|
||||
}
|
||||
RunNodeRestoreTest(c, testCases, "SELECT * FROM t WHERE %s", extractNodeFunc)
|
||||
}
|
||||
|
||||
@ -140,6 +140,7 @@ var tokenMap = map[string]int{
|
||||
"ADDDATE": addDate,
|
||||
"ADMIN": admin,
|
||||
"AFTER": after,
|
||||
"AGAINST": against,
|
||||
"AGG_TO_COP": hintAggToCop,
|
||||
"ALL": all,
|
||||
"ALGORITHM": algorithm,
|
||||
@ -274,6 +275,7 @@ var tokenMap = map[string]int{
|
||||
"EXCHANGE": exchange,
|
||||
"EXECUTE": execute,
|
||||
"EXISTS": exists,
|
||||
"EXPANSION": expansion,
|
||||
"EXPIRE": expire,
|
||||
"EXPLAIN": explain,
|
||||
"EXTRACT": extract,
|
||||
@ -353,6 +355,7 @@ var tokenMap = map[string]int{
|
||||
"KEYS": keys,
|
||||
"KILL": kill,
|
||||
"LABELS": labels,
|
||||
"LANGUAGE": language,
|
||||
"LAST": last,
|
||||
"LEADING": leading,
|
||||
"LEFT": left,
|
||||
|
||||
16727
parser/parser.go
16727
parser/parser.go
File diff suppressed because it is too large
Load Diff
@ -160,6 +160,7 @@ import (
|
||||
keys "KEYS"
|
||||
kill "KILL"
|
||||
lag "LAG"
|
||||
language "LANGUAGE"
|
||||
lastValue "LAST_VALUE"
|
||||
lead "LEAD"
|
||||
leading "LEADING"
|
||||
@ -280,6 +281,7 @@ import (
|
||||
account "ACCOUNT"
|
||||
action "ACTION"
|
||||
after "AFTER"
|
||||
against "AGAINST"
|
||||
always "ALWAYS"
|
||||
algorithm "ALGORITHM"
|
||||
any "ANY"
|
||||
@ -342,6 +344,7 @@ import (
|
||||
exchange "EXCHANGE"
|
||||
exclusive "EXCLUSIVE"
|
||||
execute "EXECUTE"
|
||||
expansion "EXPANSION"
|
||||
expire "EXPIRE"
|
||||
faultsSym "FAULTS"
|
||||
fields "FIELDS"
|
||||
@ -834,6 +837,7 @@ import (
|
||||
FieldList "field expression list"
|
||||
FieldTerminator "Field terminator"
|
||||
FlushOption "Flush option"
|
||||
FulltextSearchModifierOpt "Fulltext modifier"
|
||||
PluginNameList "Plugin Name List"
|
||||
TableRefsClause "Table references clause"
|
||||
FieldItem "Field item for load data clause"
|
||||
@ -3897,6 +3901,14 @@ Expression:
|
||||
$$ = &ast.UnaryOperationExpr{Op: opcode.Not, V: $2}
|
||||
}
|
||||
}
|
||||
| "MATCH" '(' ColumnNameList ')' "AGAINST" '(' BitExpr FulltextSearchModifierOpt ')'
|
||||
{
|
||||
$$ = &ast.MatchAgainst {
|
||||
ColumnNames: $3.([]*ast.ColumnName),
|
||||
Against: $7,
|
||||
Modifier: ast.FulltextSearchModifier($8.(int)),
|
||||
}
|
||||
}
|
||||
| BoolPri IsOrNotOp trueKwd %prec is
|
||||
{
|
||||
$$ = &ast.IsTruthExpr{Expr:$1, Not: !$2.(bool), True: int64(1)}
|
||||
@ -3922,6 +3934,27 @@ MaxValueOrExpression:
|
||||
$$ = $1
|
||||
}
|
||||
|
||||
FulltextSearchModifierOpt:
|
||||
/* empty */
|
||||
{
|
||||
$$ = ast.FulltextSearchModifierNaturalLanguageMode
|
||||
}
|
||||
| "IN" "NATURAL" "LANGUAGE" "MODE"
|
||||
{
|
||||
$$ = ast.FulltextSearchModifierNaturalLanguageMode
|
||||
}
|
||||
| "IN" "NATURAL" "LANGUAGE" "MODE" "WITH" "QUERY" "EXPANSION"
|
||||
{
|
||||
$$ = ast.FulltextSearchModifierNaturalLanguageMode | ast.FulltextSearchModifierWithQueryExpansion
|
||||
}
|
||||
| "IN" "BOOLEAN" "MODE"
|
||||
{
|
||||
$$ = ast.FulltextSearchModifierBooleanMode
|
||||
}
|
||||
| "WITH" "QUERY" "EXPANSION"
|
||||
{
|
||||
$$ = ast.FulltextSearchModifierWithQueryExpansion
|
||||
}
|
||||
|
||||
logOr:
|
||||
pipesAsOr
|
||||
@ -4449,7 +4482,7 @@ UnReservedKeyword:
|
||||
| "WITHOUT" | "RTREE" | "EXCHANGE" | "COLUMN_FORMAT" | "REPAIR" | "IMPORT" | "DISCARD" | "TABLE_CHECKSUM" | "UNICODE"
|
||||
| "SQL_TSI_DAY" | "SQL_TSI_HOUR" | "SQL_TSI_MINUTE" | "SQL_TSI_MONTH" | "SQL_TSI_QUARTER" | "SQL_TSI_SECOND" |
|
||||
"SQL_TSI_WEEK" | "SQL_TSI_YEAR" | "INVISIBLE" | "VISIBLE" | "TYPE" | "NOWAIT" | "REPLICA" | "LOCATION" | "LABELS"
|
||||
| "LOGS" | "HOSTS"
|
||||
| "LOGS" | "HOSTS" | "AGAINST" | "EXPANSION"
|
||||
|
||||
TiDBKeyword:
|
||||
"ADMIN" | "AGG_TO_COP" |"BUCKETS" | "BUILTINS" | "CANCEL" | "CMSKETCH" | "DDL" | "DEPTH" | "DRAINER" | "JOBS" | "JOB" | "NODE_ID" | "NODE_STATE" | "PUMP" | "SAMPLES" | "STATS" | "STATS_META" | "STATS_HISTOGRAMS" | "STATS_BUCKETS" | "STATS_HEALTHY" | "TIDB"
|
||||
|
||||
@ -14,6 +14,7 @@
|
||||
package parser_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"runtime"
|
||||
"strings"
|
||||
@ -70,6 +71,7 @@ func (s *testParserSuite) TestSimple(c *C) {
|
||||
"delayed", "high_priority", "low_priority",
|
||||
"cumeDist", "denseRank", "firstValue", "lag", "lastValue", "lead", "nthValue", "ntile",
|
||||
"over", "percentRank", "rank", "row", "rows", "rowNumber", "window", "linear",
|
||||
"match", "language",
|
||||
// TODO: support the following keywords
|
||||
// "with",
|
||||
}
|
||||
@ -104,7 +106,7 @@ func (s *testParserSuite) TestSimple(c *C) {
|
||||
"ln", "log", "log2", "log10", "timestampdiff", "pi", "quote", "none", "super", "shared", "exclusive",
|
||||
"always", "stats", "stats_meta", "stats_histogram", "stats_buckets", "stats_healthy", "tidb_version", "replication", "slave", "client",
|
||||
"max_connections_per_hour", "max_queries_per_hour", "max_updates_per_hour", "max_user_connections", "event", "reload", "routine", "temporary",
|
||||
"following", "preceding", "unbounded", "respect", "nulls", "current", "last",
|
||||
"following", "preceding", "unbounded", "respect", "nulls", "current", "last", "against", "expansion",
|
||||
}
|
||||
for _, kw := range unreservedKws {
|
||||
src := fmt.Sprintf("SELECT %s FROM tbl;", kw)
|
||||
@ -4538,6 +4540,51 @@ func (s *testParserSuite) TestCharset(c *C) {
|
||||
c.Assert(st.(*ast.AlterDatabaseStmt), NotNil)
|
||||
}
|
||||
|
||||
func (s *testParserSuite) TestFulltextSearch(c *C) {
|
||||
parser := parser.New()
|
||||
|
||||
st, err := parser.ParseOneStmt("SELECT * FROM fulltext_test WHERE MATCH(content) AGAINST('search')", "", "")
|
||||
c.Assert(err, IsNil)
|
||||
c.Assert(st.(*ast.SelectStmt), NotNil)
|
||||
|
||||
st, err = parser.ParseOneStmt("SELECT * FROM fulltext_test WHERE MATCH() AGAINST('search')", "", "")
|
||||
c.Assert(err, NotNil)
|
||||
c.Assert(st, IsNil)
|
||||
|
||||
st, err = parser.ParseOneStmt("SELECT * FROM fulltext_test WHERE MATCH(content) AGAINST()", "", "")
|
||||
c.Assert(err, NotNil)
|
||||
c.Assert(st, IsNil)
|
||||
|
||||
st, err = parser.ParseOneStmt("SELECT * FROM fulltext_test WHERE MATCH(content) AGAINST('search' IN)", "", "")
|
||||
c.Assert(err, NotNil)
|
||||
c.Assert(st, IsNil)
|
||||
|
||||
st, err = parser.ParseOneStmt("SELECT * FROM fulltext_test WHERE MATCH(content) AGAINST('search' IN BOOLEAN MODE WITH QUERY EXPANSION)", "", "")
|
||||
c.Assert(err, NotNil)
|
||||
c.Assert(st, IsNil)
|
||||
|
||||
st, err = parser.ParseOneStmt("SELECT * FROM fulltext_test WHERE MATCH(title,content) AGAINST('search' IN NATURAL LANGUAGE MODE)", "", "")
|
||||
c.Assert(err, IsNil)
|
||||
c.Assert(st.(*ast.SelectStmt), NotNil)
|
||||
writer := bytes.NewBufferString("")
|
||||
st.(*ast.SelectStmt).Where.Format(writer)
|
||||
c.Assert(writer.String(), Equals, "MATCH(title,content) AGAINST(\"search\")")
|
||||
|
||||
st, err = parser.ParseOneStmt("SELECT * FROM fulltext_test WHERE MATCH(title,content) AGAINST('search' IN BOOLEAN MODE)", "", "")
|
||||
c.Assert(err, IsNil)
|
||||
c.Assert(st.(*ast.SelectStmt), NotNil)
|
||||
writer.Reset()
|
||||
st.(*ast.SelectStmt).Where.Format(writer)
|
||||
c.Assert(writer.String(), Equals, "MATCH(title,content) AGAINST(\"search\" IN BOOLEAN MODE)")
|
||||
|
||||
st, err = parser.ParseOneStmt("SELECT * FROM fulltext_test WHERE MATCH(title,content) AGAINST('search' WITH QUERY EXPANSION)", "", "")
|
||||
c.Assert(err, IsNil)
|
||||
c.Assert(st.(*ast.SelectStmt), NotNil)
|
||||
writer.Reset()
|
||||
st.(*ast.SelectStmt).Where.Format(writer)
|
||||
c.Assert(writer.String(), Equals, "MATCH(title,content) AGAINST(\"search\" WITH QUERY EXPANSION)")
|
||||
}
|
||||
|
||||
// CleanNodeText set the text of node and all child node empty.
|
||||
// For test only.
|
||||
func CleanNodeText(node ast.Node) {
|
||||
|
||||
Reference in New Issue
Block a user