1424 lines
35 KiB
Go
1424 lines
35 KiB
Go
// Copyright 2022 PingCAP, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package expression
|
|
|
|
import (
|
|
"encoding/hex"
|
|
"fmt"
|
|
"regexp"
|
|
"strings"
|
|
"sync"
|
|
"unicode/utf8"
|
|
|
|
"github.com/pingcap/tidb/parser/charset"
|
|
"github.com/pingcap/tidb/parser/mysql"
|
|
"github.com/pingcap/tidb/sessionctx"
|
|
"github.com/pingcap/tidb/types"
|
|
"github.com/pingcap/tidb/util/chunk"
|
|
"github.com/pingcap/tidb/util/collate"
|
|
"github.com/pingcap/tidb/util/set"
|
|
"github.com/pingcap/tidb/util/stringutil"
|
|
"github.com/pingcap/tipb/go-tipb"
|
|
)
|
|
|
|
type empty struct{}
|
|
|
|
const patternIdx = 1
|
|
const regexpLikeMatchTypeIdx = 2
|
|
const regexpSubstrMatchTypeIdx = 4
|
|
const regexpInstrMatchTypeIdx = 5
|
|
const regexpReplaceMatchTypeIdx = 5
|
|
|
|
// Valid flags in match type
|
|
const (
|
|
flagI = "i"
|
|
flagC = "c"
|
|
flagM = "m"
|
|
flagS = "s"
|
|
)
|
|
|
|
const (
|
|
invalidMatchType = "Invalid match type"
|
|
invalidIndex = "Index out of bounds in regular expression search"
|
|
invalidReturnOption = "Incorrect arguments to regexp_instr: return_option must be 1 or 0"
|
|
binaryCollateErr = "Not support binary collation so far"
|
|
emptyPatternErr = "Empty pattern is invalid"
|
|
)
|
|
|
|
var validMatchType = set.NewStringSet(
|
|
flagI, // Case-insensitive matching
|
|
flagC, // Case-sensitive matching
|
|
flagM, // Multiple-line mode
|
|
flagS, // The . character matches line terminators
|
|
)
|
|
|
|
type regexpBaseFuncSig struct {
|
|
baseBuiltinFunc
|
|
regexpMemorizedSig
|
|
once sync.Once
|
|
}
|
|
|
|
// check binary collation, not xxx_bin collation!
|
|
func (re *regexpBaseFuncSig) isBinaryCollation() bool {
|
|
return re.collation == charset.CollationBin && re.charset == charset.CharsetBin
|
|
}
|
|
|
|
func (re *regexpBaseFuncSig) clone() *regexpBaseFuncSig {
|
|
newSig := ®expBaseFuncSig{once: sync.Once{}}
|
|
if re.memorizedRegexp != nil {
|
|
newSig.memorizedRegexp = re.memorizedRegexp
|
|
}
|
|
newSig.memorizedErr = re.memorizedErr
|
|
newSig.cloneFrom(&re.baseBuiltinFunc)
|
|
return newSig
|
|
}
|
|
|
|
// If characters specifying contradictory options are specified
|
|
// within match_type, the rightmost one takes precedence.
|
|
func (re *regexpBaseFuncSig) getMatchType(userInputMatchType string) (string, error) {
|
|
flag := ""
|
|
matchTypeSet := set.NewStringSet()
|
|
|
|
if collate.IsCICollation(re.baseBuiltinFunc.collation) {
|
|
matchTypeSet.Insert(flagI)
|
|
}
|
|
|
|
for _, val := range userInputMatchType {
|
|
c := string(val)
|
|
|
|
// Check validation of the flag
|
|
_, err := validMatchType[c]
|
|
if !err {
|
|
return "", ErrRegexp.GenWithStackByArgs(invalidMatchType)
|
|
}
|
|
|
|
if c == flagC {
|
|
// re2 is case-sensitive by default, so we only need to delete 'i' flag
|
|
// to enable the case-sensitive for the regexp
|
|
delete(matchTypeSet, flagI)
|
|
continue
|
|
}
|
|
|
|
matchTypeSet[c] = empty{} // add this flag
|
|
}
|
|
|
|
// generate flag
|
|
for key := range matchTypeSet {
|
|
flag += key
|
|
}
|
|
|
|
return flag, nil
|
|
}
|
|
|
|
// To get a unified compile interface in initMemoizedRegexp, we need to process many things in genCompile
|
|
func (re *regexpBaseFuncSig) genCompile(matchType string) (func(string) (*regexp.Regexp, error), error) {
|
|
matchType, err := re.getMatchType(matchType)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return func(pat string) (*regexp.Regexp, error) {
|
|
if len(matchType) == 0 {
|
|
return regexp.Compile(pat)
|
|
}
|
|
return regexp.Compile(fmt.Sprintf("(?%s)%s", matchType, pat))
|
|
}, nil
|
|
}
|
|
|
|
func (re *regexpBaseFuncSig) genRegexp(pat string, matchType string) (*regexp.Regexp, error) {
|
|
if len(pat) == 0 {
|
|
return nil, ErrRegexp.GenWithStackByArgs(emptyPatternErr)
|
|
}
|
|
|
|
if re.isMemorizedRegexpInitialized() {
|
|
return re.memorizedRegexp, re.memorizedErr
|
|
}
|
|
|
|
var err error
|
|
|
|
// Generate compiler first
|
|
compile, err := re.genCompile(matchType)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return compile(pat)
|
|
}
|
|
|
|
// we can memorize the regexp when:
|
|
// 1. pattern and match type are constant
|
|
// 2. pattern is const and there is no match type argument
|
|
//
|
|
// return true: need, false: needless
|
|
func (re *regexpBaseFuncSig) canMemorize(matchTypeIdx int) bool {
|
|
return re.args[patternIdx].ConstItem(re.ctx.GetSessionVars().StmtCtx) && (len(re.args) <= matchTypeIdx || re.args[matchTypeIdx].ConstItem(re.ctx.GetSessionVars().StmtCtx))
|
|
}
|
|
|
|
func (re *regexpBaseFuncSig) initMemoizedRegexp(params []*regexpParam, matchTypeIdx int) error {
|
|
pat := params[patternIdx].getStringVal(0)
|
|
if len(pat) == 0 {
|
|
return ErrRegexp.GenWithStackByArgs(emptyPatternErr)
|
|
}
|
|
|
|
// Generate compile
|
|
compile, err := re.genCompile(params[matchTypeIdx].getStringVal(0))
|
|
if err != nil {
|
|
return ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
|
|
// Compile this constant pattern, so that we can avoid this repeated work
|
|
re.memorize(compile, pat)
|
|
|
|
return re.memorizedErr
|
|
}
|
|
|
|
// As multiple threads may memorize regexp and cause data race, only the first thread
|
|
// who gets the lock is permitted to do the memorization and others should wait for him
|
|
// until the memorization has been finished.
|
|
func (re *regexpBaseFuncSig) tryToMemorize(params []*regexpParam, matchTypeIdx int, n int) error {
|
|
// Check memorization
|
|
if n == 0 || !re.canMemorize(matchTypeIdx) {
|
|
return nil
|
|
}
|
|
|
|
var err error
|
|
memorize := func() {
|
|
if re.isMemorizedRegexpInitialized() {
|
|
err = nil
|
|
return
|
|
}
|
|
|
|
err = re.initMemoizedRegexp(params, matchTypeIdx)
|
|
}
|
|
|
|
re.once.Do(memorize)
|
|
|
|
return err
|
|
}
|
|
|
|
// https://dev.mysql.com/doc/refman/8.0/en/regexp.html#function_regexp-like
|
|
type regexpLikeFunctionClass struct {
|
|
baseFunctionClass
|
|
}
|
|
|
|
func (c *regexpLikeFunctionClass) getFunction(ctx sessionctx.Context, args []Expression) (builtinFunc, error) {
|
|
if err := c.verifyArgs(args); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
argTp := []types.EvalType{types.ETString, types.ETString}
|
|
if len(args) == 3 {
|
|
argTp = append(argTp, types.ETString)
|
|
}
|
|
|
|
bf, err := newBaseBuiltinFuncWithTp(ctx, c.funcName, args, types.ETInt, argTp...)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
bf.tp.SetFlen(1)
|
|
sig := builtinRegexpLikeFuncSig{
|
|
regexpBaseFuncSig: regexpBaseFuncSig{baseBuiltinFunc: bf},
|
|
}
|
|
|
|
sig.setPbCode(tipb.ScalarFuncSig_RegexpLikeSig)
|
|
|
|
return &sig, nil
|
|
}
|
|
|
|
type builtinRegexpLikeFuncSig struct {
|
|
regexpBaseFuncSig
|
|
}
|
|
|
|
func (re *builtinRegexpLikeFuncSig) Clone() builtinFunc {
|
|
newSig := &builtinRegexpLikeFuncSig{}
|
|
newSig.regexpBaseFuncSig = *re.regexpBaseFuncSig.clone()
|
|
return newSig
|
|
}
|
|
|
|
func (re *builtinRegexpLikeFuncSig) vectorized() bool {
|
|
return true
|
|
}
|
|
|
|
func (re *builtinRegexpLikeFuncSig) evalInt(row chunk.Row) (int64, bool, error) {
|
|
expr, isNull, err := re.args[0].EvalString(re.ctx, row)
|
|
if isNull || err != nil {
|
|
return 0, true, err
|
|
}
|
|
|
|
pat, isNull, err := re.args[1].EvalString(re.ctx, row)
|
|
if isNull || err != nil {
|
|
return 0, true, err
|
|
} else if len(pat) == 0 {
|
|
return 0, true, ErrRegexp.GenWithStackByArgs(emptyPatternErr)
|
|
}
|
|
|
|
matchType := ""
|
|
if len(re.args) == 3 {
|
|
matchType, isNull, err = re.args[2].EvalString(re.ctx, row)
|
|
if isNull || err != nil {
|
|
return 0, true, err
|
|
}
|
|
}
|
|
|
|
memorize := func() {
|
|
compile, err := re.genCompile(matchType)
|
|
if err != nil {
|
|
re.memorizedErr = err
|
|
return
|
|
}
|
|
re.memorize(compile, pat)
|
|
}
|
|
|
|
if re.canMemorize(regexpLikeMatchTypeIdx) {
|
|
re.once.Do(memorize) // Avoid data race
|
|
}
|
|
|
|
if !re.isMemorizedRegexpInitialized() {
|
|
compile, err := re.genCompile(matchType)
|
|
if err != nil {
|
|
return 0, true, ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
reg, err := compile(pat)
|
|
if err != nil {
|
|
return 0, true, ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
return boolToInt64(reg.MatchString(expr)), false, nil
|
|
}
|
|
|
|
if re.memorizedErr != nil {
|
|
return 0, true, ErrRegexp.GenWithStackByArgs(re.memorizedErr)
|
|
}
|
|
|
|
return boolToInt64(re.memorizedRegexp.MatchString(expr)), false, nil
|
|
}
|
|
|
|
// REGEXP_LIKE(expr, pat[, match_type])
|
|
func (re *builtinRegexpLikeFuncSig) vecEvalInt(input *chunk.Chunk, result *chunk.Column) error {
|
|
n := input.NumRows()
|
|
params := make([]*regexpParam, 0, 3)
|
|
defer releaseBuffers(&re.baseBuiltinFunc, params)
|
|
|
|
for i := 0; i < 2; i++ {
|
|
param, isConstNull, err := buildStringParam(&re.baseBuiltinFunc, i, input, false)
|
|
if err != nil {
|
|
return ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
if isConstNull {
|
|
result.ResizeInt64(n, true)
|
|
return nil
|
|
}
|
|
params = append(params, param)
|
|
}
|
|
|
|
// user may ignore match type parameter
|
|
hasMatchType := (len(re.args) == 3)
|
|
param, isConstNull, err := buildStringParam(&re.baseBuiltinFunc, 2, input, !hasMatchType)
|
|
params = append(params, param)
|
|
if err != nil {
|
|
return ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
|
|
if isConstNull {
|
|
result.ResizeInt64(n, true)
|
|
return nil
|
|
}
|
|
|
|
err = re.tryToMemorize(params, regexpLikeMatchTypeIdx, n)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
result.ResizeInt64(n, false)
|
|
result.MergeNulls(getBuffers(params)...)
|
|
i64s := result.Int64s()
|
|
for i := 0; i < n; i++ {
|
|
if result.IsNull(i) {
|
|
continue
|
|
}
|
|
|
|
matchType := params[2].getStringVal(i)
|
|
re, err := re.genRegexp(params[1].getStringVal(i), matchType)
|
|
if err != nil {
|
|
return ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
i64s[i] = boolToInt64(re.MatchString(params[0].getStringVal(i)))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// https://dev.mysql.com/doc/refman/8.0/en/regexp.html#function_regexp-substr
|
|
type regexpSubstrFunctionClass struct {
|
|
baseFunctionClass
|
|
}
|
|
|
|
func (c *regexpSubstrFunctionClass) getFunction(ctx sessionctx.Context, args []Expression) (builtinFunc, error) {
|
|
if err := c.verifyArgs(args); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
argTp := []types.EvalType{types.ETString, types.ETString}
|
|
switch len(args) {
|
|
case 3:
|
|
argTp = append(argTp, types.ETInt)
|
|
case 4:
|
|
argTp = append(argTp, types.ETInt, types.ETInt)
|
|
case 5:
|
|
argTp = append(argTp, types.ETInt, types.ETInt, types.ETString)
|
|
}
|
|
|
|
bf, err := newBaseBuiltinFuncWithTp(ctx, c.funcName, args, types.ETString, argTp...)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
argType := args[0].GetType()
|
|
bf.tp.SetFlen(argType.GetFlen())
|
|
sig := builtinRegexpSubstrFuncSig{
|
|
regexpBaseFuncSig: regexpBaseFuncSig{baseBuiltinFunc: bf},
|
|
}
|
|
sig.setPbCode(tipb.ScalarFuncSig_RegexpSubstrSig)
|
|
|
|
if sig.isBinaryCollation() {
|
|
return nil, ErrRegexp.GenWithStackByArgs(binaryCollateErr)
|
|
}
|
|
|
|
return &sig, nil
|
|
}
|
|
|
|
type builtinRegexpSubstrFuncSig struct {
|
|
regexpBaseFuncSig
|
|
}
|
|
|
|
func (re *builtinRegexpSubstrFuncSig) vectorized() bool {
|
|
return true
|
|
}
|
|
|
|
func (re *builtinRegexpSubstrFuncSig) Clone() builtinFunc {
|
|
newSig := &builtinRegexpSubstrFuncSig{}
|
|
newSig.regexpBaseFuncSig = *re.regexpBaseFuncSig.clone()
|
|
return newSig
|
|
}
|
|
|
|
func (re *builtinRegexpSubstrFuncSig) findString(reg *regexp.Regexp, expr string, occurrence int64) (string, bool, error) {
|
|
matches := reg.FindAllString(expr, -1)
|
|
length := int64(len(matches))
|
|
if length == 0 || occurrence > length {
|
|
return "", true, nil
|
|
}
|
|
return matches[occurrence-1], false, nil
|
|
}
|
|
|
|
func (re *builtinRegexpSubstrFuncSig) findBinString(reg *regexp.Regexp, bexpr []byte, occurrence int64) (string, bool, error) {
|
|
matches := reg.FindAll(bexpr, -1)
|
|
length := int64(len(matches))
|
|
if length == 0 || occurrence > length {
|
|
return "", true, nil
|
|
}
|
|
|
|
return fmt.Sprintf("0x%s", strings.ToUpper(hex.EncodeToString(matches[occurrence-1]))), false, nil
|
|
}
|
|
|
|
func (re *builtinRegexpSubstrFuncSig) evalString(row chunk.Row) (string, bool, error) {
|
|
expr, isNull, err := re.args[0].EvalString(re.ctx, row)
|
|
if isNull || err != nil {
|
|
return "", true, err
|
|
}
|
|
|
|
pat, isNull, err := re.args[1].EvalString(re.ctx, row)
|
|
if isNull || err != nil {
|
|
return "", true, err
|
|
} else if len(pat) == 0 {
|
|
return "", true, ErrRegexp.GenWithStackByArgs(emptyPatternErr)
|
|
}
|
|
|
|
occurrence := int64(1)
|
|
matchType := ""
|
|
argNum := len(re.args)
|
|
var bexpr []byte
|
|
|
|
if re.isBinaryCollation() {
|
|
bexpr = []byte(expr)
|
|
}
|
|
|
|
if argNum >= 3 {
|
|
pos, isNull, err := re.args[2].EvalInt(re.ctx, row)
|
|
if isNull || err != nil {
|
|
return "", true, err
|
|
}
|
|
|
|
// Check position and trim expr
|
|
if re.isBinaryCollation() {
|
|
if pos < 1 || pos > int64(len(bexpr)) {
|
|
if checkOutRangePos(len(bexpr), pos) {
|
|
return "", true, ErrRegexp.GenWithStackByArgs(invalidIndex)
|
|
}
|
|
}
|
|
|
|
bexpr = bexpr[pos-1:] // Trim
|
|
} else {
|
|
if pos < 1 || pos > int64(utf8.RuneCountInString(expr)) {
|
|
if checkOutRangePos(len(expr), pos) {
|
|
return "", true, ErrRegexp.GenWithStackByArgs(invalidIndex)
|
|
}
|
|
}
|
|
|
|
stringutil.TrimUtf8String(&expr, pos-1) // Trim
|
|
}
|
|
}
|
|
|
|
if argNum >= 4 {
|
|
occurrence, isNull, err = re.args[3].EvalInt(re.ctx, row)
|
|
if isNull || err != nil {
|
|
return "", true, err
|
|
}
|
|
|
|
if occurrence < 1 {
|
|
occurrence = 1
|
|
}
|
|
}
|
|
|
|
if argNum == 5 {
|
|
matchType, isNull, err = re.args[4].EvalString(re.ctx, row)
|
|
if isNull || err != nil {
|
|
return "", true, err
|
|
}
|
|
}
|
|
|
|
memorize := func() {
|
|
compile, err := re.genCompile(matchType)
|
|
if err != nil {
|
|
re.memorizedErr = err
|
|
return
|
|
}
|
|
re.memorize(compile, pat)
|
|
}
|
|
|
|
if re.canMemorize(regexpSubstrMatchTypeIdx) {
|
|
re.once.Do(memorize) // Avoid data race
|
|
}
|
|
|
|
if !re.isMemorizedRegexpInitialized() {
|
|
compile, err := re.genCompile(matchType)
|
|
if err != nil {
|
|
return "", true, ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
reg, err := compile(pat)
|
|
if err != nil {
|
|
return "", true, ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
|
|
if re.isBinaryCollation() {
|
|
return re.findBinString(reg, bexpr, occurrence)
|
|
}
|
|
return re.findString(reg, expr, occurrence)
|
|
}
|
|
|
|
if re.memorizedErr != nil {
|
|
return "", true, ErrRegexp.GenWithStackByArgs(re.memorizedErr)
|
|
}
|
|
|
|
if re.isBinaryCollation() {
|
|
return re.findBinString(re.memorizedRegexp, bexpr, occurrence)
|
|
}
|
|
|
|
return re.findString(re.memorizedRegexp, expr, occurrence)
|
|
}
|
|
|
|
// REGEXP_SUBSTR(expr, pat[, pos[, occurrence[, match_type]]])
|
|
func (re *builtinRegexpSubstrFuncSig) vecEvalString(input *chunk.Chunk, result *chunk.Column) error {
|
|
n := input.NumRows()
|
|
params := make([]*regexpParam, 0, 5)
|
|
defer releaseBuffers(&re.baseBuiltinFunc, params)
|
|
|
|
for i := 0; i < 2; i++ {
|
|
param, isConstNull, err := buildStringParam(&re.baseBuiltinFunc, i, input, false)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if isConstNull {
|
|
fillNullStringIntoResult(result, n)
|
|
return nil
|
|
}
|
|
params = append(params, param)
|
|
}
|
|
|
|
paramLen := len(re.args)
|
|
|
|
// Handle position parameter
|
|
hasPosition := (paramLen >= 3)
|
|
param, isConstNull, err := buildIntParam(&re.baseBuiltinFunc, 2, input, !hasPosition, 1)
|
|
params = append(params, param)
|
|
|
|
if err != nil {
|
|
return ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
if isConstNull {
|
|
fillNullStringIntoResult(result, n)
|
|
return nil
|
|
}
|
|
|
|
// Handle occurrence parameter
|
|
hasOccur := (paramLen >= 4)
|
|
param, isConstNull, err = buildIntParam(&re.baseBuiltinFunc, 3, input, !hasOccur, 1)
|
|
params = append(params, param)
|
|
|
|
if err != nil {
|
|
return ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
if isConstNull {
|
|
fillNullStringIntoResult(result, n)
|
|
return nil
|
|
}
|
|
|
|
// Handle match type
|
|
hasMatchType := (paramLen == 5)
|
|
param, isConstNull, err = buildStringParam(&re.baseBuiltinFunc, 4, input, !hasMatchType)
|
|
params = append(params, param)
|
|
|
|
if err != nil {
|
|
return ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
if isConstNull {
|
|
fillNullStringIntoResult(result, n)
|
|
return nil
|
|
}
|
|
|
|
// Check memorization
|
|
err = re.tryToMemorize(params, regexpSubstrMatchTypeIdx, n)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
result.ReserveString(n)
|
|
buffers := getBuffers(params)
|
|
|
|
// Start to calculate
|
|
for i := 0; i < n; i++ {
|
|
if isResultNull(buffers, i) {
|
|
result.AppendNull()
|
|
continue
|
|
}
|
|
|
|
expr := params[0].getStringVal(i)
|
|
var bexpr []byte
|
|
|
|
if re.isBinaryCollation() {
|
|
bexpr = []byte(expr)
|
|
}
|
|
|
|
// Check position and trim expr
|
|
pos := params[2].getIntVal(i)
|
|
if re.isBinaryCollation() {
|
|
if pos < 1 || pos > int64(len(bexpr)) {
|
|
if checkOutRangePos(len(bexpr), pos) {
|
|
return ErrRegexp.GenWithStackByArgs(invalidIndex)
|
|
}
|
|
}
|
|
|
|
bexpr = bexpr[pos-1:] // Trim
|
|
} else {
|
|
if pos < 1 || pos > int64(utf8.RuneCountInString(expr)) {
|
|
if checkOutRangePos(len(expr), pos) {
|
|
return ErrRegexp.GenWithStackByArgs(invalidIndex)
|
|
}
|
|
}
|
|
|
|
stringutil.TrimUtf8String(&expr, pos-1) // Trim
|
|
}
|
|
|
|
// Get occurrence
|
|
occurrence := params[3].getIntVal(i)
|
|
if occurrence < 1 {
|
|
occurrence = 1
|
|
}
|
|
|
|
// Get match type and generate regexp
|
|
matchType := params[4].getStringVal(i)
|
|
reg, err := re.genRegexp(params[1].getStringVal(i), matchType)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Find string
|
|
if re.isBinaryCollation() {
|
|
matches := reg.FindAll(bexpr, -1)
|
|
length := int64(len(matches))
|
|
if length == 0 || occurrence > length {
|
|
result.AppendNull()
|
|
continue
|
|
}
|
|
|
|
result.AppendString(fmt.Sprintf("0x%s", strings.ToUpper(hex.EncodeToString(matches[occurrence-1]))))
|
|
} else {
|
|
matches := reg.FindAllString(expr, -1)
|
|
length := int64(len(matches))
|
|
if length == 0 || occurrence > length {
|
|
result.AppendNull()
|
|
continue
|
|
}
|
|
|
|
result.AppendString(matches[occurrence-1])
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// https://dev.mysql.com/doc/refman/8.0/en/regexp.html#function_regexp-instr
|
|
type regexpInStrFunctionClass struct {
|
|
baseFunctionClass
|
|
}
|
|
|
|
func (c *regexpInStrFunctionClass) getFunction(ctx sessionctx.Context, args []Expression) (builtinFunc, error) {
|
|
if err := c.verifyArgs(args); err != nil {
|
|
return nil, ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
|
|
argTp := []types.EvalType{types.ETString, types.ETString}
|
|
switch len(args) {
|
|
case 3:
|
|
argTp = append(argTp, types.ETInt)
|
|
case 4:
|
|
argTp = append(argTp, types.ETInt, types.ETInt)
|
|
case 5:
|
|
argTp = append(argTp, types.ETInt, types.ETInt, types.ETInt)
|
|
case 6:
|
|
argTp = append(argTp, types.ETInt, types.ETInt, types.ETInt, types.ETString)
|
|
}
|
|
|
|
bf, err := newBaseBuiltinFuncWithTp(ctx, c.funcName, args, types.ETInt, argTp...)
|
|
if err != nil {
|
|
return nil, ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
|
|
bf.tp.SetFlen(mysql.MaxIntWidth)
|
|
sig := builtinRegexpInStrFuncSig{
|
|
regexpBaseFuncSig: regexpBaseFuncSig{baseBuiltinFunc: bf},
|
|
}
|
|
sig.setPbCode(tipb.ScalarFuncSig_RegexpInStrSig)
|
|
|
|
if sig.isBinaryCollation() {
|
|
return nil, ErrRegexp.GenWithStackByArgs(binaryCollateErr)
|
|
}
|
|
|
|
return &sig, nil
|
|
}
|
|
|
|
type builtinRegexpInStrFuncSig struct {
|
|
regexpBaseFuncSig
|
|
}
|
|
|
|
func (re *builtinRegexpInStrFuncSig) Clone() builtinFunc {
|
|
newSig := &builtinRegexpInStrFuncSig{}
|
|
newSig.regexpBaseFuncSig = *re.regexpBaseFuncSig.clone()
|
|
return newSig
|
|
}
|
|
|
|
func (re *builtinRegexpInStrFuncSig) vectorized() bool {
|
|
return true
|
|
}
|
|
|
|
func (re *builtinRegexpInStrFuncSig) findBinIndex(reg *regexp.Regexp, bexpr []byte, pos int64, occurrence int64, returnOption int64) (int64, bool, error) {
|
|
matches := reg.FindAllIndex(bexpr, -1)
|
|
length := int64(len(matches))
|
|
if length == 0 || occurrence > length {
|
|
return 0, false, nil
|
|
}
|
|
|
|
if returnOption == 0 {
|
|
return int64(matches[occurrence-1][0]) + pos, false, nil
|
|
}
|
|
return int64(matches[occurrence-1][1]) + pos, false, nil
|
|
}
|
|
|
|
func (re *builtinRegexpInStrFuncSig) findIndex(reg *regexp.Regexp, expr string, pos int64, occurrence int64, returnOption int64) (int64, bool, error) {
|
|
matches := reg.FindAllStringIndex(expr, -1)
|
|
length := int64(len(matches))
|
|
if length == 0 || occurrence > length {
|
|
return 0, false, nil
|
|
}
|
|
|
|
if returnOption == 0 {
|
|
return stringutil.ConvertPosInUtf8(&expr, int64(matches[occurrence-1][0])) + pos - 1, false, nil
|
|
}
|
|
return stringutil.ConvertPosInUtf8(&expr, int64(matches[occurrence-1][1])) + pos - 1, false, nil
|
|
}
|
|
|
|
func (re *builtinRegexpInStrFuncSig) evalInt(row chunk.Row) (int64, bool, error) {
|
|
expr, isNull, err := re.args[0].EvalString(re.ctx, row)
|
|
if isNull || err != nil {
|
|
return 0, true, err
|
|
}
|
|
|
|
pat, isNull, err := re.args[1].EvalString(re.ctx, row)
|
|
if isNull || err != nil {
|
|
return 0, true, err
|
|
} else if len(pat) == 0 {
|
|
return 0, true, ErrRegexp.GenWithStackByArgs(emptyPatternErr)
|
|
}
|
|
|
|
pos := int64(1)
|
|
occurrence := int64(1)
|
|
returnOption := int64(0)
|
|
matchType := ""
|
|
argNum := len(re.args)
|
|
var bexpr []byte
|
|
|
|
if re.isBinaryCollation() {
|
|
bexpr = []byte(expr)
|
|
}
|
|
|
|
if argNum >= 3 {
|
|
pos, isNull, err = re.args[2].EvalInt(re.ctx, row)
|
|
if isNull || err != nil {
|
|
return 0, true, err
|
|
}
|
|
|
|
// Check position and trim expr
|
|
if re.isBinaryCollation() {
|
|
bexprLen := int64(len(bexpr))
|
|
if (pos < 1 || pos > bexprLen) && bexprLen != 0 {
|
|
return 0, true, ErrRegexp.GenWithStackByArgs(invalidIndex)
|
|
}
|
|
|
|
if bexprLen != 0 {
|
|
bexpr = bexpr[pos-1:] // Trim
|
|
}
|
|
} else {
|
|
exprLen := int64(len(expr))
|
|
if pos < 1 || pos > int64(utf8.RuneCountInString(expr)) && exprLen != 0 {
|
|
return 0, true, ErrRegexp.GenWithStackByArgs(invalidIndex)
|
|
}
|
|
|
|
if exprLen != 0 {
|
|
stringutil.TrimUtf8String(&expr, pos-1) // Trim
|
|
}
|
|
}
|
|
}
|
|
|
|
if argNum >= 4 {
|
|
occurrence, isNull, err = re.args[3].EvalInt(re.ctx, row)
|
|
if isNull || err != nil {
|
|
return 0, true, err
|
|
}
|
|
|
|
if occurrence < 1 {
|
|
occurrence = 1
|
|
}
|
|
}
|
|
|
|
if argNum >= 5 {
|
|
returnOption, isNull, err = re.args[4].EvalInt(re.ctx, row)
|
|
if isNull || err != nil {
|
|
return 0, true, err
|
|
}
|
|
|
|
if returnOption != 0 && returnOption != 1 {
|
|
return 0, true, ErrRegexp.GenWithStackByArgs(invalidReturnOption)
|
|
}
|
|
}
|
|
|
|
if argNum == 6 {
|
|
matchType, isNull, err = re.args[5].EvalString(re.ctx, row)
|
|
if isNull || err != nil {
|
|
return 0, true, err
|
|
}
|
|
}
|
|
|
|
memorize := func() {
|
|
compile, err := re.genCompile(matchType)
|
|
if err != nil {
|
|
re.memorizedErr = err
|
|
return
|
|
}
|
|
re.memorize(compile, pat)
|
|
}
|
|
|
|
if re.canMemorize(regexpInstrMatchTypeIdx) {
|
|
re.once.Do(memorize) // Avoid data race
|
|
}
|
|
|
|
if !re.isMemorizedRegexpInitialized() {
|
|
compile, err := re.genCompile(matchType)
|
|
if err != nil {
|
|
return 0, true, ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
reg, err := compile(pat)
|
|
if err != nil {
|
|
return 0, true, ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
|
|
if re.isBinaryCollation() {
|
|
return re.findBinIndex(reg, bexpr, pos, occurrence, returnOption)
|
|
}
|
|
return re.findIndex(reg, expr, pos, occurrence, returnOption)
|
|
}
|
|
|
|
if re.memorizedErr != nil {
|
|
return 0, true, ErrRegexp.GenWithStackByArgs(re.memorizedErr)
|
|
}
|
|
|
|
if re.isBinaryCollation() {
|
|
return re.findBinIndex(re.memorizedRegexp, bexpr, pos, occurrence, returnOption)
|
|
}
|
|
|
|
return re.findIndex(re.memorizedRegexp, expr, pos, occurrence, returnOption)
|
|
}
|
|
|
|
// REGEXP_INSTR(expr, pat[, pos[, occurrence[, return_option[, match_type]]]])
|
|
func (re *builtinRegexpInStrFuncSig) vecEvalInt(input *chunk.Chunk, result *chunk.Column) error {
|
|
n := input.NumRows()
|
|
params := make([]*regexpParam, 0, 5)
|
|
defer releaseBuffers(&re.baseBuiltinFunc, params)
|
|
|
|
for i := 0; i < 2; i++ {
|
|
param, isConstNull, err := buildStringParam(&re.baseBuiltinFunc, i, input, false)
|
|
if err != nil {
|
|
return ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
if isConstNull {
|
|
result.ResizeInt64(n, true)
|
|
return nil
|
|
}
|
|
params = append(params, param)
|
|
}
|
|
|
|
paramLen := len(re.args)
|
|
|
|
// Handle position parameter
|
|
hasPosition := (paramLen >= 3)
|
|
param, isConstNull, err := buildIntParam(&re.baseBuiltinFunc, 2, input, !hasPosition, 1)
|
|
params = append(params, param)
|
|
|
|
if err != nil {
|
|
return ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
if isConstNull {
|
|
result.ResizeInt64(n, true)
|
|
return nil
|
|
}
|
|
|
|
// Handle occurrence parameter
|
|
hasOccur := (paramLen >= 4)
|
|
param, isConstNull, err = buildIntParam(&re.baseBuiltinFunc, 3, input, !hasOccur, 1)
|
|
params = append(params, param)
|
|
|
|
if err != nil {
|
|
return ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
if isConstNull {
|
|
result.ResizeInt64(n, true)
|
|
return nil
|
|
}
|
|
|
|
// Handle return_option parameter
|
|
hasRetOpt := (paramLen >= 5)
|
|
param, isConstNull, err = buildIntParam(&re.baseBuiltinFunc, 4, input, !hasRetOpt, 0)
|
|
params = append(params, param)
|
|
|
|
if err != nil {
|
|
return ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
if isConstNull {
|
|
result.ResizeInt64(n, true)
|
|
return nil
|
|
}
|
|
|
|
// Handle match type
|
|
hasMatchType := (paramLen == 6)
|
|
param, isConstNull, err = buildStringParam(&re.baseBuiltinFunc, 5, input, !hasMatchType)
|
|
params = append(params, param)
|
|
|
|
if err != nil {
|
|
return ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
if isConstNull {
|
|
result.ResizeInt64(n, true)
|
|
return nil
|
|
}
|
|
|
|
err = re.tryToMemorize(params, regexpInstrMatchTypeIdx, n)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Start to calculate
|
|
result.ResizeInt64(n, false)
|
|
result.MergeNulls(getBuffers(params)...)
|
|
i64s := result.Int64s()
|
|
for i := 0; i < n; i++ {
|
|
if result.IsNull(i) {
|
|
continue
|
|
}
|
|
|
|
expr := params[0].getStringVal(i)
|
|
var bexpr []byte
|
|
|
|
if re.isBinaryCollation() {
|
|
bexpr = []byte(expr)
|
|
}
|
|
|
|
// Check position and trim expr
|
|
pos := params[2].getIntVal(i)
|
|
if re.isBinaryCollation() {
|
|
bexprLen := int64(len(bexpr))
|
|
if pos < 1 || pos > bexprLen {
|
|
return ErrRegexp.GenWithStackByArgs(invalidIndex)
|
|
}
|
|
|
|
if bexprLen != 0 {
|
|
bexpr = bexpr[pos-1:] // Trim
|
|
}
|
|
} else {
|
|
if pos < 1 || pos > int64(utf8.RuneCountInString(expr)) {
|
|
return ErrRegexp.GenWithStackByArgs(invalidIndex)
|
|
}
|
|
|
|
if len(expr) != 0 {
|
|
stringutil.TrimUtf8String(&expr, pos-1) // Trim
|
|
}
|
|
}
|
|
|
|
// Get occurrence
|
|
occurrence := params[3].getIntVal(i)
|
|
if occurrence < 1 {
|
|
occurrence = 1
|
|
}
|
|
|
|
returnOption := params[4].getIntVal(i)
|
|
if returnOption != 0 && returnOption != 1 {
|
|
return ErrRegexp.GenWithStackByArgs(invalidReturnOption)
|
|
}
|
|
|
|
// Get match type and generate regexp
|
|
matchType := params[5].getStringVal(i)
|
|
reg, err := re.genRegexp(params[1].getStringVal(i), matchType)
|
|
if err != nil {
|
|
return ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
|
|
// Find index
|
|
if re.isBinaryCollation() {
|
|
matches := reg.FindAllIndex(bexpr, -1)
|
|
length := int64(len(matches))
|
|
if length == 0 || occurrence > length {
|
|
i64s[i] = 0
|
|
continue
|
|
}
|
|
|
|
if returnOption == 0 {
|
|
i64s[i] = int64(matches[occurrence-1][0]) + pos
|
|
} else {
|
|
i64s[i] = int64(matches[occurrence-1][1]) + pos
|
|
}
|
|
} else {
|
|
matches := reg.FindAllStringIndex(expr, -1)
|
|
length := int64(len(matches))
|
|
if length == 0 || occurrence > length {
|
|
i64s[i] = 0
|
|
continue
|
|
}
|
|
|
|
if returnOption == 0 {
|
|
i64s[i] = stringutil.ConvertPosInUtf8(&expr, int64(matches[occurrence-1][0])) + pos - 1
|
|
} else {
|
|
i64s[i] = stringutil.ConvertPosInUtf8(&expr, int64(matches[occurrence-1][1])) + pos - 1
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// https://dev.mysql.com/doc/refman/8.0/en/regexp.html#function_regexp-replace
|
|
type regexpReplaceFunctionClass struct {
|
|
baseFunctionClass
|
|
}
|
|
|
|
func (c *regexpReplaceFunctionClass) getFunction(ctx sessionctx.Context, args []Expression) (builtinFunc, error) {
|
|
if err := c.verifyArgs(args); err != nil {
|
|
return nil, ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
|
|
argTp := []types.EvalType{types.ETString, types.ETString}
|
|
switch len(args) {
|
|
case 3:
|
|
argTp = append(argTp, types.ETString)
|
|
case 4:
|
|
argTp = append(argTp, types.ETString, types.ETInt)
|
|
case 5:
|
|
argTp = append(argTp, types.ETString, types.ETInt, types.ETInt)
|
|
case 6:
|
|
argTp = append(argTp, types.ETString, types.ETInt, types.ETInt, types.ETString)
|
|
}
|
|
|
|
bf, err := newBaseBuiltinFuncWithTp(ctx, c.funcName, args, types.ETString, argTp...)
|
|
if err != nil {
|
|
return nil, ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
|
|
argType := args[0].GetType()
|
|
bf.tp.SetFlen(argType.GetFlen())
|
|
sig := builtinRegexpReplaceFuncSig{
|
|
regexpBaseFuncSig: regexpBaseFuncSig{baseBuiltinFunc: bf},
|
|
}
|
|
sig.setPbCode(tipb.ScalarFuncSig_RegexpReplaceSig)
|
|
|
|
if sig.isBinaryCollation() {
|
|
return nil, ErrRegexp.GenWithStackByArgs(binaryCollateErr)
|
|
}
|
|
|
|
return &sig, nil
|
|
}
|
|
|
|
type builtinRegexpReplaceFuncSig struct {
|
|
regexpBaseFuncSig
|
|
}
|
|
|
|
func (re *builtinRegexpReplaceFuncSig) vectorized() bool {
|
|
return true
|
|
}
|
|
|
|
func (re *builtinRegexpReplaceFuncSig) Clone() builtinFunc {
|
|
newSig := &builtinRegexpReplaceFuncSig{}
|
|
newSig.regexpBaseFuncSig = *re.regexpBaseFuncSig.clone()
|
|
return newSig
|
|
}
|
|
|
|
func (re *builtinRegexpReplaceFuncSig) getReplacedBinStr(reg *regexp.Regexp, bexpr []byte, trimmedBexpr []byte, repl string, pos int64, occurrence int64) (string, bool, error) {
|
|
count := occurrence
|
|
repFunc := func(matchedStr []byte) []byte {
|
|
if occurrence == 0 {
|
|
return []byte(repl)
|
|
}
|
|
|
|
count--
|
|
if count == 0 {
|
|
return []byte(repl)
|
|
}
|
|
|
|
return matchedStr
|
|
}
|
|
|
|
replacedBStr := reg.ReplaceAllFunc(trimmedBexpr, repFunc)
|
|
|
|
return fmt.Sprintf("0x%s", strings.ToUpper(hex.EncodeToString(append(bexpr[:pos-1], replacedBStr...)))), false, nil
|
|
}
|
|
|
|
func (re *builtinRegexpReplaceFuncSig) getReplacedStr(reg *regexp.Regexp, expr string, trimmedExpr string, repl string, trimmedLen int64, occurrence int64) (string, bool, error) {
|
|
count := occurrence
|
|
repFunc := func(matchedStr string) string {
|
|
if occurrence == 0 {
|
|
return repl
|
|
}
|
|
|
|
count--
|
|
if count == 0 {
|
|
return repl
|
|
}
|
|
|
|
return matchedStr
|
|
}
|
|
|
|
replacedStr := reg.ReplaceAllStringFunc(trimmedExpr, repFunc)
|
|
return expr[:trimmedLen] + replacedStr, false, nil
|
|
}
|
|
|
|
func (re *builtinRegexpReplaceFuncSig) evalString(row chunk.Row) (string, bool, error) {
|
|
expr, isNull, err := re.args[0].EvalString(re.ctx, row)
|
|
trimmedExpr := expr
|
|
if isNull || err != nil {
|
|
return "", true, err
|
|
}
|
|
|
|
pat, isNull, err := re.args[1].EvalString(re.ctx, row)
|
|
if isNull || err != nil {
|
|
return "", true, err
|
|
} else if len(pat) == 0 {
|
|
return "", true, ErrRegexp.GenWithStackByArgs(emptyPatternErr)
|
|
}
|
|
|
|
repl, isNull, err := re.args[2].EvalString(re.ctx, row)
|
|
if isNull || err != nil {
|
|
return "", true, err
|
|
}
|
|
|
|
pos := int64(1)
|
|
occurrence := int64(0)
|
|
matchType := ""
|
|
argNum := len(re.args)
|
|
var bexpr []byte
|
|
var trimmedBexpr []byte
|
|
|
|
if re.isBinaryCollation() {
|
|
bexpr = []byte(expr)
|
|
trimmedBexpr = bexpr
|
|
}
|
|
|
|
trimmedLen := int64(0)
|
|
if argNum >= 4 {
|
|
pos, isNull, err = re.args[3].EvalInt(re.ctx, row)
|
|
if isNull || err != nil {
|
|
return "", true, err
|
|
}
|
|
|
|
// Check position and trim expr
|
|
if re.isBinaryCollation() {
|
|
if pos < 1 || pos > int64(len(trimmedBexpr)) {
|
|
if checkOutRangePos(len(trimmedBexpr), pos) {
|
|
return "", true, ErrRegexp.GenWithStackByArgs(invalidIndex)
|
|
}
|
|
}
|
|
|
|
trimmedBexpr = bexpr[pos-1:] // Trim
|
|
} else {
|
|
if pos < 1 || pos > int64(utf8.RuneCountInString(trimmedExpr)) {
|
|
if checkOutRangePos(len(trimmedExpr), pos) {
|
|
return "", true, ErrRegexp.GenWithStackByArgs(invalidIndex)
|
|
}
|
|
}
|
|
|
|
trimmedLen = stringutil.TrimUtf8String(&trimmedExpr, pos-1) // Trim
|
|
}
|
|
}
|
|
|
|
if argNum >= 5 {
|
|
occurrence, isNull, err = re.args[4].EvalInt(re.ctx, row)
|
|
if isNull || err != nil {
|
|
return "", true, err
|
|
}
|
|
|
|
if occurrence < 0 {
|
|
occurrence = 1
|
|
}
|
|
}
|
|
|
|
if argNum == 6 {
|
|
matchType, isNull, err = re.args[5].EvalString(re.ctx, row)
|
|
if isNull || err != nil {
|
|
return "", true, err
|
|
}
|
|
}
|
|
|
|
memorize := func() {
|
|
compile, err := re.genCompile(matchType)
|
|
if err != nil {
|
|
re.memorizedErr = err
|
|
return
|
|
}
|
|
re.memorize(compile, pat)
|
|
}
|
|
|
|
if re.canMemorize(regexpReplaceMatchTypeIdx) {
|
|
re.once.Do(memorize) // Avoid data race
|
|
}
|
|
|
|
if !re.isMemorizedRegexpInitialized() {
|
|
compile, err := re.genCompile(matchType)
|
|
if err != nil {
|
|
return "", true, ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
reg, err := compile(pat)
|
|
if err != nil {
|
|
return "", true, ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
|
|
if re.isBinaryCollation() {
|
|
return re.getReplacedBinStr(reg, bexpr, trimmedBexpr, repl, pos, occurrence)
|
|
}
|
|
return re.getReplacedStr(reg, expr, trimmedExpr, repl, trimmedLen, occurrence)
|
|
}
|
|
|
|
if re.memorizedErr != nil {
|
|
return "", true, ErrRegexp.GenWithStackByArgs(re.memorizedErr)
|
|
}
|
|
|
|
if re.isBinaryCollation() {
|
|
return re.getReplacedBinStr(re.memorizedRegexp, bexpr, trimmedBexpr, repl, pos, occurrence)
|
|
}
|
|
return re.getReplacedStr(re.memorizedRegexp, expr, trimmedExpr, repl, trimmedLen, occurrence)
|
|
}
|
|
|
|
// REGEXP_REPLACE(expr, pat, repl[, pos[, occurrence[, match_type]]])
|
|
func (re *builtinRegexpReplaceFuncSig) vecEvalString(input *chunk.Chunk, result *chunk.Column) error {
|
|
n := input.NumRows()
|
|
params := make([]*regexpParam, 0, 6)
|
|
defer releaseBuffers(&re.baseBuiltinFunc, params)
|
|
|
|
for i := 0; i < 2; i++ {
|
|
param, isConstNull, err := buildStringParam(&re.baseBuiltinFunc, i, input, false)
|
|
if err != nil {
|
|
return ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
if isConstNull {
|
|
fillNullStringIntoResult(result, n)
|
|
return nil
|
|
}
|
|
params = append(params, param)
|
|
}
|
|
|
|
paramLen := len(re.args)
|
|
|
|
// Handle repl parameter
|
|
hasRepl := (paramLen >= 3)
|
|
param, isConstNull, err := buildStringParam(&re.baseBuiltinFunc, 2, input, !hasRepl)
|
|
params = append(params, param)
|
|
|
|
if err != nil {
|
|
return ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
if isConstNull {
|
|
fillNullStringIntoResult(result, n)
|
|
return nil
|
|
}
|
|
|
|
// Handle position parameter
|
|
hasPosition := (paramLen >= 4)
|
|
param, isConstNull, err = buildIntParam(&re.baseBuiltinFunc, 3, input, !hasPosition, 1)
|
|
params = append(params, param)
|
|
|
|
if err != nil {
|
|
return ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
if isConstNull {
|
|
fillNullStringIntoResult(result, n)
|
|
return nil
|
|
}
|
|
|
|
// Handle occurrence parameter
|
|
hasOccur := (paramLen >= 5)
|
|
param, isConstNull, err = buildIntParam(&re.baseBuiltinFunc, 4, input, !hasOccur, 0)
|
|
params = append(params, param)
|
|
|
|
if err != nil {
|
|
return ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
if isConstNull {
|
|
fillNullStringIntoResult(result, n)
|
|
return nil
|
|
}
|
|
|
|
// Handle match type
|
|
hasMatchType := (paramLen == 6)
|
|
param, isConstNull, err = buildStringParam(&re.baseBuiltinFunc, 5, input, !hasMatchType)
|
|
params = append(params, param)
|
|
if err != nil {
|
|
return ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
|
|
if isConstNull {
|
|
fillNullStringIntoResult(result, n)
|
|
return nil
|
|
}
|
|
|
|
err = re.tryToMemorize(params, regexpReplaceMatchTypeIdx, n)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
result.ReserveString(n)
|
|
buffers := getBuffers(params)
|
|
|
|
// Start to calculate
|
|
for i := 0; i < n; i++ {
|
|
if isResultNull(buffers, i) {
|
|
result.AppendNull()
|
|
continue
|
|
}
|
|
|
|
expr := params[0].getStringVal(i)
|
|
trimmedExpr := expr
|
|
var bexpr []byte
|
|
var trimmedBexpr []byte
|
|
|
|
if re.isBinaryCollation() {
|
|
bexpr = []byte(expr)
|
|
trimmedBexpr = bexpr
|
|
}
|
|
|
|
repl := params[2].getStringVal(i)
|
|
|
|
// Check position and trim expr
|
|
pos := params[3].getIntVal(i)
|
|
trimmedLen := int64(0)
|
|
if re.isBinaryCollation() {
|
|
if pos < 1 || pos > int64(len(trimmedBexpr)) {
|
|
if checkOutRangePos(len(trimmedBexpr), pos) {
|
|
return ErrRegexp.GenWithStackByArgs(invalidIndex)
|
|
}
|
|
}
|
|
|
|
trimmedBexpr = bexpr[pos-1:] // Trim
|
|
} else {
|
|
if pos < 1 || pos > int64(utf8.RuneCountInString(trimmedExpr)) {
|
|
if checkOutRangePos(len(trimmedExpr), pos) {
|
|
return ErrRegexp.GenWithStackByArgs(invalidIndex)
|
|
}
|
|
}
|
|
|
|
trimmedLen = stringutil.TrimUtf8String(&trimmedExpr, pos-1) // Trim
|
|
}
|
|
|
|
// Get occurrence
|
|
occurrence := params[4].getIntVal(i)
|
|
if occurrence < 0 {
|
|
occurrence = 1
|
|
}
|
|
|
|
// Get match type and generate regexp
|
|
matchType := params[5].getStringVal(i)
|
|
reg, err := re.genRegexp(params[1].getStringVal(i), matchType)
|
|
if err != nil {
|
|
return ErrRegexp.GenWithStackByArgs(err)
|
|
}
|
|
|
|
// Start to replace
|
|
count := occurrence
|
|
if re.isBinaryCollation() {
|
|
repFunc := func(matchedStr []byte) []byte {
|
|
if occurrence == 0 {
|
|
return []byte(repl)
|
|
}
|
|
|
|
count--
|
|
if count == 0 {
|
|
return []byte(repl)
|
|
}
|
|
|
|
return matchedStr
|
|
}
|
|
|
|
replacedBStr := reg.ReplaceAllFunc(trimmedBexpr, repFunc)
|
|
result.AppendString(fmt.Sprintf("0x%s", strings.ToUpper(hex.EncodeToString(append(bexpr[:pos-1], replacedBStr...)))))
|
|
} else {
|
|
repFunc := func(matchedStr string) string {
|
|
if occurrence == 0 {
|
|
return repl
|
|
}
|
|
|
|
count--
|
|
if count == 0 {
|
|
return repl
|
|
}
|
|
|
|
return matchedStr
|
|
}
|
|
|
|
replacedStr := reg.ReplaceAllStringFunc(trimmedExpr, repFunc)
|
|
result.AppendString(expr[:trimmedLen] + replacedStr)
|
|
}
|
|
}
|
|
return nil
|
|
}
|