Files
tidb/pkg/lightning/mydump/parser.rl

189 lines
3.6 KiB
Ragel

// Copyright 2019 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Please edit `parser.rl` if you want to modify this file. To generate
// `parser_generated.go`, please execute
//
// ```sh
// make data_parsers
// ```
package mydump
import (
"io"
"github.com/pingcap/errors"
)
%%{
#`
# This is a ragel parser to quickly scan through a data source file consisting
# of INSERT statements only. You may find detailed syntax explanation on its
# website <https://www.colm.net/open-source/ragel/>.
machine chunk_parser;
# We treat all unimportant patterns as "comments". This include:
# - Real SQL comments `/* ... */` and `-- ...`
# - Whitespace
# - Separators `,` and `;`
# - The keyword `INTO` (suffix `i` means case-insensitive).
# - The parts of the function `CONVERT(` and `USING UTF8MB4)`
# (to strip the unnecessary detail from mydumper JSON output)
block_comment = '/*' any* :>> '*/';
line_comment = /--[^\r\n]*/;
comment =
block_comment |
line_comment |
space |
[,;] |
'convert('i |
'using utf8mb4)'i;
# The patterns parse quoted strings.
bs = '\\' when { parser.escFlavor != escapeFlavorNone };
single_quoted = "'" (^"'" | bs any | "''")** "'";
double_quoted = '"' (^'"' | bs any | '""')** '"';
back_quoted = '`' (^'`' | '``')* '`';
unquoted = ^([,;()'"`/*] | space)+;
integer = '-'? [0-9]+;
hex_string = '0x' [0-9a-fA-F]+ | "x'"i [0-9a-fA-F]* "'";
bin_string = '0b' [01]+ | "b'"i [01]* "'";
main := |*
comment;
'(' => {
consumedToken = tokRowBegin
fbreak;
};
')' => {
consumedToken = tokRowEnd
fbreak;
};
'values'i => {
consumedToken = tokValues
fbreak;
};
'null'i => {
consumedToken = tokNull
fbreak;
};
'true'i => {
consumedToken = tokTrue
fbreak;
};
'false'i => {
consumedToken = tokFalse
fbreak;
};
integer => {
consumedToken = tokInteger
fbreak;
};
hex_string => {
consumedToken = tokHexString
fbreak;
};
bin_string => {
consumedToken = tokBinString
fbreak;
};
single_quoted => {
consumedToken = tokSingleQuoted
fbreak;
};
double_quoted => {
consumedToken = tokDoubleQuoted
fbreak;
};
back_quoted => {
consumedToken = tokBackQuoted
fbreak;
};
unquoted => {
consumedToken = tokUnquoted
fbreak;
};
*|;
#`
}%%
%% write data;
func (parser *ChunkParser) lex() (token, []byte, error) {
var cs, ts, te, act, p int
%% write init;
for {
data := parser.buf
consumedToken := tokNil
pe := len(data)
eof := -1
if parser.isLastChunk {
eof = pe
}
%% write exec;
if cs == %%{ write error; }%% {
parser.logSyntaxError()
return tokNil, nil, errors.New("syntax error")
}
if consumedToken != tokNil {
result := data[ts:te]
parser.buf = data[te:]
parser.pos += int64(te)
return consumedToken, result, nil
}
if parser.isLastChunk {
if te == eof {
return tokNil, nil, io.EOF
} else {
return tokNil, nil, errors.New("syntax error: unexpected EOF")
}
}
parser.buf = parser.buf[ts:]
parser.pos += int64(ts)
p -= ts
te -= ts
ts = 0
if err := parser.readBlock(); err != nil {
return tokNil, nil, errors.Trace(err)
}
}
return tokNil, nil, nil
}