Currently, there are some useless includes in the codebase. We can use a tool named include-what-you-use to optimize these includes. By using a strict include-what-you-use policy, we can get lots of benefits from it.
91 lines
3.3 KiB
C++
91 lines
3.3 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
// This file is copied from
|
|
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/string-functions.cpp
|
|
// and modified by Doris
|
|
|
|
#include "exprs/string_functions.h"
|
|
|
|
#include <re2/re2.h>
|
|
#include <re2/stringpiece.h>
|
|
|
|
#include <sstream>
|
|
|
|
// NOTE: be careful not to use string::append. It is not performant.
|
|
namespace doris {
|
|
|
|
// This function sets options in the RE2 library before pattern matching.
|
|
bool StringFunctions::set_re2_options(const StringRef& match_parameter, std::string* error_str,
|
|
re2::RE2::Options* opts) {
|
|
for (int i = 0; i < match_parameter.size; i++) {
|
|
char match = match_parameter.data[i];
|
|
switch (match) {
|
|
case 'i':
|
|
opts->set_case_sensitive(false);
|
|
break;
|
|
case 'c':
|
|
opts->set_case_sensitive(true);
|
|
break;
|
|
case 'm':
|
|
opts->set_posix_syntax(true);
|
|
opts->set_one_line(false);
|
|
break;
|
|
case 'n':
|
|
opts->set_never_nl(false);
|
|
opts->set_dot_nl(true);
|
|
break;
|
|
default:
|
|
std::stringstream error;
|
|
error << "Illegal match parameter " << match;
|
|
*error_str = error.str();
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// The caller owns the returned regex. Returns nullptr if the pattern could not be compiled.
|
|
bool StringFunctions::compile_regex(const StringRef& pattern, std::string* error_str,
|
|
const StringRef& match_parameter,
|
|
std::unique_ptr<re2::RE2>& re) {
|
|
re2::StringPiece pattern_sp(pattern.data, pattern.size);
|
|
re2::RE2::Options options;
|
|
// Disable error logging in case e.g. every row causes an error
|
|
options.set_log_errors(false);
|
|
// ATTN(cmy): no set it, or the lazy mode of regex won't work. See Doris #6587
|
|
// Return the leftmost longest match (rather than the first match).
|
|
// options.set_longest_match(true);
|
|
options.set_dot_nl(true);
|
|
if (match_parameter.size > 0 &&
|
|
!StringFunctions::set_re2_options(match_parameter, error_str, &options)) {
|
|
return false;
|
|
}
|
|
re.reset(new re2::RE2(pattern_sp, options));
|
|
if (!re->ok()) {
|
|
std::stringstream ss;
|
|
ss << "Could not compile regexp pattern: " << std::string(pattern.data, pattern.size)
|
|
<< std::endl
|
|
<< "Error: " << re->error();
|
|
*error_str = ss.str();
|
|
re.reset();
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
} // namespace doris
|