// Copyright 2019 PingCAP, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Please edit `parser.rl` if you want to modify this file. To generate // `parser_generated.go`, please execute // // ```sh // make data_parsers // ``` package mydump import ( "io" "github.com/pingcap/errors" ) %%{ #` # This is a ragel parser to quickly scan through a data source file consisting # of INSERT statements only. You may find detailed syntax explanation on its # website . machine chunk_parser; # We treat all unimportant patterns as "comments". This include: # - Real SQL comments `/* ... */` and `-- ...` # - Whitespace # - Separators `,` and `;` # - The keyword `INTO` (suffix `i` means case-insensitive). # - The parts of the function `CONVERT(` and `USING UTF8MB4)` # (to strip the unnecessary detail from mydumper JSON output) block_comment = '/*' any* :>> '*/'; line_comment = /--[^\r\n]*/; comment = block_comment | line_comment | space | [,;] | 'convert('i | 'using utf8mb4)'i; # The patterns parse quoted strings. bs = '\\' when { parser.escFlavor != escapeFlavorNone }; single_quoted = "'" (^"'" | bs any | "''")** "'"; double_quoted = '"' (^'"' | bs any | '""')** '"'; back_quoted = '`' (^'`' | '``')* '`'; unquoted = ^([,;()'"`/*] | space)+; integer = '-'? [0-9]+; hex_string = '0x' [0-9a-fA-F]+ | "x'"i [0-9a-fA-F]* "'"; bin_string = '0b' [01]+ | "b'"i [01]* "'"; main := |* comment; '(' => { consumedToken = tokRowBegin fbreak; }; ')' => { consumedToken = tokRowEnd fbreak; }; 'values'i => { consumedToken = tokValues fbreak; }; 'null'i => { consumedToken = tokNull fbreak; }; 'true'i => { consumedToken = tokTrue fbreak; }; 'false'i => { consumedToken = tokFalse fbreak; }; integer => { consumedToken = tokInteger fbreak; }; hex_string => { consumedToken = tokHexString fbreak; }; bin_string => { consumedToken = tokBinString fbreak; }; single_quoted => { consumedToken = tokSingleQuoted fbreak; }; double_quoted => { consumedToken = tokDoubleQuoted fbreak; }; back_quoted => { consumedToken = tokBackQuoted fbreak; }; unquoted => { consumedToken = tokUnquoted fbreak; }; *|; #` }%% %% write data; func (parser *ChunkParser) lex() (token, []byte, error) { var cs, ts, te, act, p int %% write init; for { data := parser.buf consumedToken := tokNil pe := len(data) eof := -1 if parser.isLastChunk { eof = pe } %% write exec; if cs == %%{ write error; }%% { parser.logSyntaxError() return tokNil, nil, errors.New("syntax error") } if consumedToken != tokNil { result := data[ts:te] parser.buf = data[te:] parser.pos += int64(te) return consumedToken, result, nil } if parser.isLastChunk { if te == eof { return tokNil, nil, io.EOF } else { return tokNil, nil, errors.New("syntax error: unexpected EOF") } } parser.buf = parser.buf[ts:] parser.pos += int64(ts) p -= ts te -= ts ts = 0 if err := parser.readBlock(); err != nil { return tokNil, nil, errors.Trace(err) } } return tokNil, nil, nil }