189 lines
3.6 KiB
Ragel
189 lines
3.6 KiB
Ragel
// Copyright 2019 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
// Please edit `parser.rl` if you want to modify this file. To generate
|
|
// `parser_generated.go`, please execute
|
|
//
|
|
// ```sh
|
|
// make data_parsers
|
|
// ```
|
|
|
|
package mydump
|
|
|
|
import (
|
|
"io"
|
|
|
|
"github.com/pingcap/errors"
|
|
)
|
|
|
|
%%{
|
|
#`
|
|
|
|
# This is a ragel parser to quickly scan through a data source file consisting
|
|
# of INSERT statements only. You may find detailed syntax explanation on its
|
|
# website <https://www.colm.net/open-source/ragel/>.
|
|
|
|
machine chunk_parser;
|
|
|
|
# We treat all unimportant patterns as "comments". This include:
|
|
# - Real SQL comments `/* ... */` and `-- ...`
|
|
# - Whitespace
|
|
# - Separators `,` and `;`
|
|
# - The keyword `INTO` (suffix `i` means case-insensitive).
|
|
# - The parts of the function `CONVERT(` and `USING UTF8MB4)`
|
|
# (to strip the unnecessary detail from mydumper JSON output)
|
|
block_comment = '/*' any* :>> '*/';
|
|
line_comment = /--[^\r\n]*/;
|
|
comment =
|
|
block_comment |
|
|
line_comment |
|
|
space |
|
|
[,;] |
|
|
'convert('i |
|
|
'using utf8mb4)'i;
|
|
|
|
# The patterns parse quoted strings.
|
|
bs = '\\' when { parser.escFlavor != escapeFlavorNone };
|
|
|
|
single_quoted = "'" (^"'" | bs any | "''")** "'";
|
|
double_quoted = '"' (^'"' | bs any | '""')** '"';
|
|
back_quoted = '`' (^'`' | '``')* '`';
|
|
unquoted = ^([,;()'"`/*] | space)+;
|
|
|
|
integer = '-'? [0-9]+;
|
|
hex_string = '0x' [0-9a-fA-F]+ | "x'"i [0-9a-fA-F]* "'";
|
|
bin_string = '0b' [01]+ | "b'"i [01]* "'";
|
|
|
|
main := |*
|
|
comment;
|
|
|
|
'(' => {
|
|
consumedToken = tokRowBegin
|
|
fbreak;
|
|
};
|
|
|
|
')' => {
|
|
consumedToken = tokRowEnd
|
|
fbreak;
|
|
};
|
|
|
|
'values'i => {
|
|
consumedToken = tokValues
|
|
fbreak;
|
|
};
|
|
|
|
'null'i => {
|
|
consumedToken = tokNull
|
|
fbreak;
|
|
};
|
|
|
|
'true'i => {
|
|
consumedToken = tokTrue
|
|
fbreak;
|
|
};
|
|
|
|
'false'i => {
|
|
consumedToken = tokFalse
|
|
fbreak;
|
|
};
|
|
|
|
integer => {
|
|
consumedToken = tokInteger
|
|
fbreak;
|
|
};
|
|
|
|
hex_string => {
|
|
consumedToken = tokHexString
|
|
fbreak;
|
|
};
|
|
|
|
bin_string => {
|
|
consumedToken = tokBinString
|
|
fbreak;
|
|
};
|
|
|
|
single_quoted => {
|
|
consumedToken = tokSingleQuoted
|
|
fbreak;
|
|
};
|
|
|
|
double_quoted => {
|
|
consumedToken = tokDoubleQuoted
|
|
fbreak;
|
|
};
|
|
|
|
back_quoted => {
|
|
consumedToken = tokBackQuoted
|
|
fbreak;
|
|
};
|
|
|
|
unquoted => {
|
|
consumedToken = tokUnquoted
|
|
fbreak;
|
|
};
|
|
*|;
|
|
|
|
#`
|
|
}%%
|
|
|
|
%% write data;
|
|
|
|
func (parser *ChunkParser) lex() (token, []byte, error) {
|
|
var cs, ts, te, act, p int
|
|
%% write init;
|
|
|
|
for {
|
|
data := parser.buf
|
|
consumedToken := tokNil
|
|
pe := len(data)
|
|
eof := -1
|
|
if parser.isLastChunk {
|
|
eof = pe
|
|
}
|
|
|
|
%% write exec;
|
|
|
|
if cs == %%{ write error; }%% {
|
|
parser.logSyntaxError()
|
|
return tokNil, nil, errors.New("syntax error")
|
|
}
|
|
|
|
if consumedToken != tokNil {
|
|
result := data[ts:te]
|
|
parser.buf = data[te:]
|
|
parser.pos += int64(te)
|
|
return consumedToken, result, nil
|
|
}
|
|
|
|
if parser.isLastChunk {
|
|
if te == eof {
|
|
return tokNil, nil, io.EOF
|
|
} else {
|
|
return tokNil, nil, errors.New("syntax error: unexpected EOF")
|
|
}
|
|
}
|
|
|
|
parser.buf = parser.buf[ts:]
|
|
parser.pos += int64(ts)
|
|
p -= ts
|
|
te -= ts
|
|
ts = 0
|
|
if err := parser.readBlock(); err != nil {
|
|
return tokNil, nil, errors.Trace(err)
|
|
}
|
|
}
|
|
|
|
return tokNil, nil, nil
|
|
}
|