expression: support ilike function (#40942)

close #40943
2023-03-17 13:37:15 +08:00
parent 8d8d608093
commit 5aceb2e525
32 changed files with 11244 additions and 10505 deletions
--- a/util/stringutil/string_util.go
+++ b/util/stringutil/string_util.go
@ -407,6 +407,87 @@ func ConvertPosInUtf8(str *string, pos int64) int64 {
 	return int64(preStrNum + 1)
 }

+func toLowerIfAlphaASCII(c byte) byte {
+	return c | 0x20
+}
+
+func toUpperIfAlphaASCII(c byte) byte {
+	return c ^ 0x20
+}
+
+// IsUpperASCII judges if this is capital alphabet
+func IsUpperASCII(c byte) bool {
+	if c >= 'A' && c <= 'Z' {
+		return true
+	}
+	return false
+}
+
+// IsLowerASCII judges if this is lower alphabet
+func IsLowerASCII(c byte) bool {
+	if c >= 'a' && c <= 'z' {
+		return true
+	}
+	return false
+}
+
+// LowerOneString lowers the ascii characters in a string
+func LowerOneString(str []byte) {
+	strLen := len(str)
+	for i := 0; i < strLen; i++ {
+		if IsUpperASCII(str[i]) {
+			str[i] = toLowerIfAlphaASCII(str[i])
+		}
+	}
+}
+
+// LowerOneStringExcludeEscapeChar lowers strings and exclude an escape char
+//
+// When escape_char is a capital char, we shouldn't lower the escape char.
+// For example, 'aaaa' ilike 'AAAA' escape 'A', we should convert 'AAAA' to 'AaAa'.
+// If we do not exclude the escape char, 'AAAA' will be lowered to 'aaaa', and we
+// can not get the correct result.
+//
+// When escape_char is a lower char, we need to convert it to the capital char
+// Because: when lowering "ABC" with escape 'a', after lower, "ABC" -> "abc",
+// then 'a' will be an escape char and it is not expected.
+// Morever, when escape char is uppered we need to tell it to the caller.
+func LowerOneStringExcludeEscapeChar(str []byte, escapeChar byte) byte {
+	actualEscapeChar := escapeChar
+	if IsLowerASCII(escapeChar) {
+		actualEscapeChar = toUpperIfAlphaASCII(escapeChar)
+	}
+	escaped := false
+	strLen := len(str)
+
+	for i := 0; i < strLen; i++ {
+		if IsUpperASCII(str[i]) {
+			// Do not lower the escape char, however when a char is equal to
+			// an escape char and it's after an escape char, we still lower it
+			// For example: "AA" (escape 'A'), -> "Aa"
+			if str[i] != escapeChar || escaped {
+				str[i] = toLowerIfAlphaASCII(str[i])
+			} else {
+				escaped = true
+				continue
+			}
+		} else {
+			if str[i] == escapeChar && !escaped {
+				escaped = true
+
+				// It should be `str[i] = toUpperIfAlphaASCII(str[i])`,
+				// but 'actual_escape_char' is always equal to 'toUpperIfAlphaASCII(str[i])'
+				str[i] = actualEscapeChar
+				continue
+			}
+			i += Utf8Len(str[i]) - 1
+		}
+		escaped = false
+	}
+
+	return actualEscapeChar
+}
+
 // EscapeGlobExceptAsterisk escapes '?', '[', ']' for a glob path pattern.
 func EscapeGlobExceptAsterisk(s string) string {
 	var buf strings.Builder