// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. // This file is copied from // https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/string-functions.cpp // and modified by Doris #include "exprs/string_functions.h" #include #include #include // NOTE: be careful not to use string::append. It is not performant. namespace doris { // This function sets options in the RE2 library before pattern matching. bool StringFunctions::set_re2_options(const StringRef& match_parameter, std::string* error_str, re2::RE2::Options* opts) { for (int i = 0; i < match_parameter.size; i++) { char match = match_parameter.data[i]; switch (match) { case 'i': opts->set_case_sensitive(false); break; case 'c': opts->set_case_sensitive(true); break; case 'm': opts->set_posix_syntax(true); opts->set_one_line(false); break; case 'n': opts->set_never_nl(false); opts->set_dot_nl(true); break; default: std::stringstream error; error << "Illegal match parameter " << match; *error_str = error.str(); return false; } } return true; } // The caller owns the returned regex. Returns nullptr if the pattern could not be compiled. bool StringFunctions::compile_regex(const StringRef& pattern, std::string* error_str, const StringRef& match_parameter, std::unique_ptr& re) { re2::StringPiece pattern_sp(pattern.data, pattern.size); re2::RE2::Options options; // Disable error logging in case e.g. every row causes an error options.set_log_errors(false); // ATTN(cmy): no set it, or the lazy mode of regex won't work. See Doris #6587 // Return the leftmost longest match (rather than the first match). // options.set_longest_match(true); options.set_dot_nl(true); if (match_parameter.size > 0 && !StringFunctions::set_re2_options(match_parameter, error_str, &options)) { return false; } re.reset(new re2::RE2(pattern_sp, options)); if (!re->ok()) { std::stringstream ss; ss << "Could not compile regexp pattern: " << std::string(pattern.data, pattern.size) << std::endl << "Error: " << re->error(); *error_str = ss.str(); re.reset(); return false; } return true; } } // namespace doris