From 01abdb1fbdf8706c03a47c19a996137fd488957e Mon Sep 17 00:00:00 2001 From: tiancaiamao Date: Thu, 14 Feb 2019 20:14:52 +0800 Subject: [PATCH] [parser] parser: fix lexer that treat 9eTSs as a float (#208) --- parser/lexer.go | 30 +++++++++++++++++++++++++----- parser/lexer_test.go | 11 +++++++++++ parser/parser_test.go | 5 +++-- 3 files changed, 39 insertions(+), 7 deletions(-) diff --git a/parser/lexer.go b/parser/lexer.go index a8b3510b6f..2470f2cf57 100644 --- a/parser/lexer.go +++ b/parser/lexer.go @@ -639,17 +639,31 @@ func startWithNumber(s *Scanner) (tok int, pos Pos, lit string) { s.scanOct() case ch1 == 'x' || ch1 == 'X': s.r.inc() + p1 := s.r.pos() s.scanHex() + p2 := s.r.pos() + // 0x, 0x7fz3 are identifier + if p1 == p2 || isDigit(s.r.peek()) { + s.r.incAsLongAs(isIdentChar) + return identifier, pos, s.r.data(&pos) + } tok = hexLit case ch1 == 'b': s.r.inc() + p1 := s.r.pos() s.scanBit() + p2 := s.r.pos() + // 0b, 0b123, 0b1ab are identifier + if p1 == p2 || isDigit(s.r.peek()) { + s.r.incAsLongAs(isIdentChar) + return identifier, pos, s.r.data(&pos) + } tok = bitLit case ch1 == '.': return s.scanFloat(&pos) case ch1 == 'B': - tok = unicode.ReplacementChar - return + s.r.incAsLongAs(isIdentChar) + return identifier, pos, s.r.data(&pos) } } @@ -717,11 +731,17 @@ func (s *Scanner) scanFloat(beg *Pos) (tok int, pos Pos, lit string) { if ch0 == 'e' || ch0 == 'E' { s.r.inc() ch0 = s.r.peek() - if ch0 == '-' || ch0 == '+' { + if ch0 == '-' || ch0 == '+' || isDigit(ch0) { s.r.inc() + s.scanDigits() + tok = floatLit + } else { + // D1 . D2 e XX when XX is not D3, parse the result to an identifier. + // 9e9e = 9e9(float) + e(identifier) + // 9est = 9est(identifier) + s.r.incAsLongAs(isIdentChar) + tok = identifier } - s.scanDigits() - tok = floatLit } else { tok = decLit } diff --git a/parser/lexer_test.go b/parser/lexer_test.go index 4e50043ab8..9bcddf7eeb 100644 --- a/parser/lexer_test.go +++ b/parser/lexer_test.go @@ -113,6 +113,7 @@ func (s *testLexerSuite) TestLiteral(c *C) { {"132.313", decLit}, {"132.3e231", floatLit}, {"132.3e-231", floatLit}, + {"001e-12", floatLit}, {"23416", intLit}, {"123test", identifier}, {"123" + string(unicode.ReplacementChar) + "xxx", identifier}, @@ -126,6 +127,7 @@ func (s *testLexerSuite) TestLiteral(c *C) { {"\\N", null}, {".*", int('.')}, // `.`, `*` {".1_t_1_x", int('.')}, // `.`, `1_t_1_x` + {"9e9e", floatLit}, // 9e9e = 9e9 + e // Issue #3954 {".1e23", floatLit}, // `.1e23` {".123", decLit}, // `.123` @@ -232,6 +234,15 @@ func (s *testLexerSuite) TestIdentifier(c *C) { {"1_x", "1_x"}, {"0_x", "0_x"}, {replacementString, replacementString}, + {"9e", "9e"}, + {"0b", "0b"}, + {"0b123", "0b123"}, + {"0b1ab", "0b1ab"}, + {"0B01", "0B01"}, + {"0x", "0x"}, + {"0x7fz3", "0x7fz3"}, + {"023a4", "023a4"}, + {"9eTSs", "9eTSs"}, {fmt.Sprintf("t1%cxxx", 0), "t1"}, } l := &Scanner{} diff --git a/parser/parser_test.go b/parser/parser_test.go index a04e1f3d9b..1552372900 100755 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -2039,8 +2039,9 @@ func (s *testParserSuite) TestType(c *C) { // for bit {"select 0b01, 0b0, b'11', B'11'", true, "SELECT b'1',b'0',b'11',b'11'"}, - {"select 0B01", false, ""}, - {"select 0b21", false, ""}, + // 0B01 and 0b21 are identifiers, the following two statement could parse. + // {"select 0B01", false, ""}, + // {"select 0b21", false, ""}, // for enum and set type {"create table t (c1 enum('a', 'b'), c2 set('a', 'b'))", true, ""},