Files
doris/be/src/exprs/like_predicate.h
yubingpeng 064f253177 fix escape character in like predicate (#75)
now select "abcd%1" like "abcd\%%" is true
2017-09-05 19:54:10 +08:00

182 lines
6.8 KiB
C++

// Modifications copyright (C) 2017, Baidu.com, Inc.
// Copyright 2017 The Apache Software Foundation
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef BDG_PALO_BE_SRC_QUERY_EXPRS_LIKE_PREDICATE_H
#define BDG_PALO_BE_SRC_QUERY_EXPRS_LIKE_PREDICATE_H
#include <string>
#include <memory>
#include <re2/re2.h>
#include "exprs/predicate.h"
#include "gen_cpp/Exprs_types.h"
#include "runtime/string_search.hpp"
namespace palo {
class LikePredicate {
public:
static void init();
private:
typedef palo_udf::BooleanVal (*LikePredicateFunction) (
palo_udf::FunctionContext*, const palo_udf::StringVal&, const palo_udf::StringVal&);
struct LikePredicateState {
char escape_char;
/// This is the function, set in the prepare function, that will be used to determine
/// the value of the predicate. It will be set depending on whether the expression is
/// a LIKE, RLIKE or REGEXP predicate, whether the pattern is a constant argument
/// and whether the pattern has any constant substrings. If the pattern is not a
/// constant argument, none of the following fields can be set because we cannot know
/// the format of the pattern in the prepare function and must deal with each pattern
/// seperately.
LikePredicateFunction function;
/// Holds the string the StringValue points to and is set any time StringValue is
/// used.
std::string search_string;
/// Used for LIKE predicates if the pattern is a constant argument, and is either a
/// constant string or has a constant string at the beginning or end of the pattern.
/// This will be set in order to check for that pattern in the corresponding part of
/// the string.
StringValue search_string_sv;
/// Used for LIKE predicates if the pattern is a constant argument and has a constant
/// string in the middle of it. This will be use in order to check for the substring
/// in the value.
StringSearch substring_pattern;
/// Used for RLIKE and REGEXP predicates if the pattern is a constant argument.
std::unique_ptr<re2::RE2> regex;
LikePredicateState() : escape_char('\\') {
}
void set_search_string(const std::string& search_string_arg) {
search_string = search_string_arg;
search_string_sv = StringValue(search_string);
substring_pattern = StringSearch(&search_string_sv);
}
};
friend class OpcodeRegistry;
static void like_prepare(
palo_udf::FunctionContext* context,
palo_udf::FunctionContext::FunctionStateScope scope);
static palo_udf::BooleanVal like(
palo_udf::FunctionContext* context,
const palo_udf::StringVal& val,
const palo_udf::StringVal& pattern);
static void like_close(
palo_udf::FunctionContext* context,
palo_udf::FunctionContext::FunctionStateScope scope);
static void regex_prepare(
palo_udf::FunctionContext* context,
palo_udf::FunctionContext::FunctionStateScope scope);
static palo_udf::BooleanVal regex(
palo_udf::FunctionContext* context,
const palo_udf::StringVal& val,
const palo_udf::StringVal& pattern);
/// Prepare function for regexp_like() when a third optional parameter is used
static void regexp_like_prepare(
palo_udf::FunctionContext* context,
palo_udf::FunctionContext::FunctionStateScope scope);
/// Handles regexp_like() when 3 parameters are passed to it
static palo_udf::BooleanVal regexp_like(
palo_udf::FunctionContext* context,
const palo_udf::StringVal& val,
const palo_udf::StringVal& pattern,
const palo_udf::StringVal& match_parameter);
static void regex_close(
palo_udf::FunctionContext*,
palo_udf::FunctionContext::FunctionStateScope scope);
static palo_udf::BooleanVal regex_fn(
palo_udf::FunctionContext* context,
const palo_udf::StringVal& val,
const palo_udf::StringVal& pattern);
static palo_udf::BooleanVal like_fn(
palo_udf::FunctionContext* context,
const palo_udf::StringVal& val,
const palo_udf::StringVal& pattern);
/// Handling of like predicates that map to strstr
static palo_udf::BooleanVal constant_substring_fn(
palo_udf::FunctionContext* context,
const palo_udf::StringVal& val,
const palo_udf::StringVal& pattern);
/// Handling of like predicates that can be implemented using strncmp
static palo_udf::BooleanVal constant_starts_with_fn(
palo_udf::FunctionContext* context,
const palo_udf::StringVal& val,
const palo_udf::StringVal& pattern);
/// Handling of like predicates that can be implemented using strncmp
static palo_udf::BooleanVal constant_ends_with_fn(
palo_udf::FunctionContext* context,
const palo_udf::StringVal& val,
const palo_udf::StringVal& pattern);
/// Handling of like predicates that can be implemented using strcmp
static palo_udf::BooleanVal constant_equals_fn(
palo_udf::FunctionContext* context,
const palo_udf::StringVal& val,
const palo_udf::StringVal& pattern);
static palo_udf::BooleanVal constant_regex_fn_partial(
palo_udf::FunctionContext* context, const palo_udf::StringVal& val,
const palo_udf::StringVal& pattern);
static palo_udf::BooleanVal constant_regex_fn(
palo_udf::FunctionContext* context,
const palo_udf::StringVal& val,
const palo_udf::StringVal& pattern);
static palo_udf::BooleanVal regex_match(
palo_udf::FunctionContext* context, const palo_udf::StringVal& val,
const palo_udf::StringVal& pattern, bool is_like_pattern);
/// Convert a LIKE pattern (with embedded % and _) into the corresponding
/// regular expression pattern. Escaped chars are copied verbatim.
static void convert_like_pattern(
palo_udf::FunctionContext* context,
const palo_udf::StringVal& pattern,
std::string* re_pattern);
static void remove_escape_character(std::string* search_string);
};
}
#endif