[improvement] Improve sig handler (#8545)

* Refactor glog's default signal handler

Co-authored-by: Zhengguo Yang <780531911@qq.com>
This commit is contained in:
yiguolei
2022-03-22 10:40:31 +08:00
committed by GitHub
parent 011985e7e3
commit 989e03ddf9
13 changed files with 519 additions and 9 deletions

View File

@ -34,6 +34,7 @@ header:
- 'tsan_suppressions'
- 'docs/.markdownlintignore'
- 'fe/fe-core/src/test/resources/data/net_snmp_normal'
- 'be/src/common/signal_handler.h'
- 'be/src/olap/lru_cache.cpp'
- 'be/src/olap/lru_cache.h'
- 'be/src/olap/skiplist.h'

View File

@ -26,6 +26,7 @@ jmockit/*
status.*
env*
lru*
signal_handler.h
skiplist.h
string_search.hpp
coding.*

View File

@ -576,3 +576,36 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
--------------------------------------------------------------------------------
be/src/common/signal_handler.h:
Copyright (c) 2008, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -143,6 +143,9 @@ set_target_properties(gflags PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/
add_library(glog STATIC IMPORTED)
set_target_properties(glog PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/libglog.a)
add_library(backtrace STATIC IMPORTED)
set_target_properties(backtrace PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/libbacktrace.a)
add_library(re2 STATIC IMPORTED)
set_target_properties(re2 PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/libre2.a)
@ -529,6 +532,7 @@ endif()
# When adding new dependencies, If you don’t know if it can run on all platforms,
# add it here first.
set(COMMON_THIRDPARTY
backtrace
rocksdb
cyrus-sasl
libs2

View File

@ -224,7 +224,7 @@ void Daemon::init(int argc, char** argv, const std::vector<StorePath>& paths) {
// google::SetVersionString(get_build_version(false));
// google::ParseCommandLineFlags(&argc, &argv, true);
google::ParseCommandLineFlags(&argc, &argv, true);
init_glog("be", true);
init_glog("be");
LOG(INFO) << get_version_string(false);

View File

@ -47,7 +47,7 @@ static bool iequals(const std::string& a, const std::string& b) {
return true;
}
bool init_glog(const char* basename, bool install_signal_handler) {
bool init_glog(const char* basename) {
std::lock_guard<std::mutex> logging_lock(logging_mutex);
if (logging_initialized) {
@ -58,10 +58,6 @@ bool init_glog(const char* basename, bool install_signal_handler) {
FLAGS_alsologtostderr = true;
}
if (install_signal_handler) {
google::InstallFailureSignalHandler();
}
// don't log to stderr
FLAGS_stderrthreshold = 5;
// set glog log dir

View File

@ -0,0 +1,446 @@
// Copyright (c) 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: Satoru Takabayashi
//
// Implementation of InstallFailureSignalHandler().
#define BOOST_STACKTRACE_USE_BACKTRACE
#include <boost/stacktrace.hpp>
#include <glog/logging.h>
#include <gutil/macros.h>
#include <csignal>
#include <ctime>
#ifdef HAVE_UCONTEXT_H
# include <ucontext.h>
#endif
#ifdef HAVE_SYS_UCONTEXT_H
# include <sys/ucontext.h>
#endif
#include <algorithm>
namespace doris::signal {
namespace {
// We'll install the failure signal handler for these signals. We could
// use strsignal() to get signal names, but we don't use it to avoid
// introducing yet another #ifdef complication.
//
// The list should be synced with the comment in signalhandler.h.
const struct {
int number;
const char *name;
} kFailureSignals[] = {
{ SIGSEGV, "SIGSEGV" },
{ SIGILL, "SIGILL" },
{ SIGFPE, "SIGFPE" },
{ SIGABRT, "SIGABRT" },
{ SIGBUS, "SIGBUS" },
{ SIGTERM, "SIGTERM" },
};
static bool kFailureSignalHandlerInstalled = false;
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* These signal explainer is copied from Meta's Folly
*/
const char* sigill_reason(int si_code) {
switch (si_code) {
case ILL_ILLOPC:
return "illegal opcode";
case ILL_ILLOPN:
return "illegal operand";
case ILL_ILLADR:
return "illegal addressing mode";
case ILL_ILLTRP:
return "illegal trap";
case ILL_PRVOPC:
return "privileged opcode";
case ILL_PRVREG:
return "privileged register";
case ILL_COPROC:
return "coprocessor error";
case ILL_BADSTK:
return "internal stack error";
default:
return nullptr;
}
}
const char* sigfpe_reason(int si_code) {
switch (si_code) {
case FPE_INTDIV:
return "integer divide by zero";
case FPE_INTOVF:
return "integer overflow";
case FPE_FLTDIV:
return "floating-point divide by zero";
case FPE_FLTOVF:
return "floating-point overflow";
case FPE_FLTUND:
return "floating-point underflow";
case FPE_FLTRES:
return "floating-point inexact result";
case FPE_FLTINV:
return "floating-point invalid operation";
case FPE_FLTSUB:
return "subscript out of range";
default:
return nullptr;
}
}
const char* sigsegv_reason(int si_code) {
switch (si_code) {
case SEGV_MAPERR:
return "address not mapped to object";
case SEGV_ACCERR:
return "invalid permissions for mapped object";
default:
return nullptr;
}
}
const char* sigbus_reason(int si_code) {
switch (si_code) {
case BUS_ADRALN:
return "invalid address alignment";
case BUS_ADRERR:
return "nonexistent physical address";
case BUS_OBJERR:
return "object-specific hardware error";
// MCEERR_AR and MCEERR_AO: in sigaction(2) but not in headers.
default:
return nullptr;
}
}
const char* signal_reason(int signum, int si_code) {
switch (signum) {
case SIGILL:
return sigill_reason(si_code);
case SIGFPE:
return sigfpe_reason(si_code);
case SIGSEGV:
return sigsegv_reason(si_code);
case SIGBUS:
return sigbus_reason(si_code);
default:
return nullptr;
}
}
// The class is used for formatting error messages. We don't use printf()
// as it's not async signal safe.
class MinimalFormatter {
public:
MinimalFormatter(char *buffer, size_t size)
: buffer_(buffer),
cursor_(buffer),
end_(buffer + size) {
}
// Returns the number of bytes written in the buffer.
std::size_t num_bytes_written() const { return static_cast<std::size_t>(cursor_ - buffer_); }
// Appends string from "str" and updates the internal cursor.
void AppendString(const char* str) {
ptrdiff_t i = 0;
while (str[i] != '\0' && cursor_ + i < end_) {
cursor_[i] = str[i];
++i;
}
cursor_ += i;
}
// Formats "number" in "radix" and updates the internal cursor.
// Lowercase letters are used for 'a' - 'z'.
void AppendUint64(uint64 number, unsigned radix) {
unsigned i = 0;
while (cursor_ + i < end_) {
const uint64 tmp = number % radix;
number /= radix;
cursor_[i] = static_cast<char>(tmp < 10 ? '0' + tmp : 'a' + tmp - 10);
++i;
if (number == 0) {
break;
}
}
// Reverse the bytes written.
std::reverse(cursor_, cursor_ + i);
cursor_ += i;
}
// Formats "number" as hexadecimal number, and updates the internal
// cursor. Padding will be added in front if needed.
void AppendHexWithPadding(uint64 number, int width) {
char* start = cursor_;
AppendString("0x");
AppendUint64(number, 16);
// Move to right and add padding in front if needed.
if (cursor_ < start + width) {
const int64 delta = start + width - cursor_;
std::copy(start, cursor_, start + delta);
std::fill(start, start + delta, ' ');
cursor_ = start + width;
}
}
private:
char *buffer_;
char *cursor_;
const char * const end_;
};
// Writes the given data with the size to the standard error.
void WriteToStderr(const char* data, size_t size) {
if (write(STDERR_FILENO, data, size) < 0) {
// Ignore errors.
}
}
// The writer function can be changed by InstallFailureWriter().
void (*g_failure_writer)(const char* data, size_t size) = WriteToStderr;
// Dumps time information. We don't dump human-readable time information
// as localtime() is not guaranteed to be async signal safe.
void DumpTimeInfo() {
time_t time_in_sec = time(NULL);
char buf[256]; // Big enough for time info.
MinimalFormatter formatter(buf, sizeof(buf));
formatter.AppendString("*** Aborted at ");
formatter.AppendUint64(static_cast<uint64>(time_in_sec), 10);
formatter.AppendString(" (unix time)");
formatter.AppendString(" try \"date -d @");
formatter.AppendUint64(static_cast<uint64>(time_in_sec), 10);
formatter.AppendString("\" if you are using GNU date ***\n");
g_failure_writer(buf, formatter.num_bytes_written());
}
// Dumps information about the signal to STDERR.
void DumpSignalInfo(int signal_number, siginfo_t *siginfo) {
// Get the signal name.
const char* signal_name = NULL;
for (size_t i = 0; i < ARRAYSIZE(kFailureSignals); ++i) {
if (signal_number == kFailureSignals[i].number) {
signal_name = kFailureSignals[i].name;
}
}
char buf[256]; // Big enough for signal info.
MinimalFormatter formatter(buf, sizeof(buf));
formatter.AppendString("*** ");
if (signal_name) {
formatter.AppendString(signal_name);
} else {
// Use the signal number if the name is unknown. The signal name
// should be known, but just in case.
formatter.AppendString("Signal ");
formatter.AppendUint64(static_cast<uint64>(signal_number), 10);
}
formatter.AppendString(" ");
// Detail reason explain
auto reason = signal_reason(signal_number, siginfo->si_code);
// If we can't find a reason code make a best effort to print the (int) code.
if (reason != nullptr) {
formatter.AppendString(reason);
} else {
formatter.AppendString("unkown detail explain");
}
formatter.AppendString(" (@0x");
formatter.AppendUint64(reinterpret_cast<uintptr_t>(siginfo->si_addr), 16);
formatter.AppendString(")");
formatter.AppendString(" received by PID ");
formatter.AppendUint64(static_cast<uint64>(getpid()), 10);
formatter.AppendString(" (TID 0x");
// We assume pthread_t is an integral number or a pointer, rather
// than a complex struct. In some environments, pthread_self()
// returns an uint64 but in some other environments pthread_self()
// returns a pointer.
pthread_t id = pthread_self();
formatter.AppendUint64(
reinterpret_cast<uint64>(reinterpret_cast<const char*>(id)), 16);
formatter.AppendString(") ");
// Only linux has the PID of the signal sender in si_pid.
formatter.AppendString("from PID ");
formatter.AppendUint64(static_cast<uint64>(siginfo->si_pid), 10);
formatter.AppendString("; ");
formatter.AppendString("stack trace: ***\n");
g_failure_writer(buf, formatter.num_bytes_written());
}
// Invoke the default signal handler.
void InvokeDefaultSignalHandler(int signal_number) {
struct sigaction sig_action;
memset(&sig_action, 0, sizeof(sig_action));
sigemptyset(&sig_action.sa_mask);
sig_action.sa_handler = SIG_DFL;
sigaction(signal_number, &sig_action, NULL);
kill(getpid(), signal_number);
}
// This variable is used for protecting FailureSignalHandler() from
// dumping stuff while another thread is doing it. Our policy is to let
// the first thread dump stuff and let other threads wait.
// See also comments in FailureSignalHandler().
static pthread_t* g_entered_thread_id_pointer = NULL;
// Wrapper of __sync_val_compare_and_swap. If the GCC extension isn't
// defined, we try the CPU specific logics (we only support x86 and
// x86_64 for now) first, then use a naive implementation, which has a
// race condition.
template<typename T>
inline T sync_val_compare_and_swap(T* ptr, T oldval, T newval) {
#if defined(HAVE___SYNC_VAL_COMPARE_AND_SWAP)
return __sync_val_compare_and_swap(ptr, oldval, newval);
#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
T ret;
__asm__ __volatile__("lock; cmpxchg %1, (%2);"
:"=a"(ret)
// GCC may produces %sil or %dil for
// constraint "r", but some of apple's gas
// dosn't know the 8 bit registers.
// We use "q" to avoid these registers.
:"q"(newval), "q"(ptr), "a"(oldval)
:"memory", "cc");
return ret;
#else
T ret = *ptr;
if (ret == oldval) {
*ptr = newval;
}
return ret;
#endif
}
// Dumps signal and stack frame information, and invokes the default
// signal handler once our job is done.
void FailureSignalHandler(int signal_number,
siginfo_t *signal_info,
void *ucontext)
{
// First check if we've already entered the function. We use an atomic
// compare and swap operation for platforms that support it. For other
// platforms, we use a naive method that could lead to a subtle race.
// We assume pthread_self() is async signal safe, though it's not
// officially guaranteed.
pthread_t my_thread_id = pthread_self();
// NOTE: We could simply use pthread_t rather than pthread_t* for this,
// if pthread_self() is guaranteed to return non-zero value for thread
// ids, but there is no such guarantee. We need to distinguish if the
// old value (value returned from __sync_val_compare_and_swap) is
// different from the original value (in this case NULL).
pthread_t* old_thread_id_pointer =
sync_val_compare_and_swap(
&g_entered_thread_id_pointer,
static_cast<pthread_t*>(NULL),
&my_thread_id);
if (old_thread_id_pointer != NULL) {
// We've already entered the signal handler. What should we do?
if (pthread_equal(my_thread_id, *g_entered_thread_id_pointer)) {
// It looks the current thread is reentering the signal handler.
// Something must be going wrong (maybe we are reentering by another
// type of signal?). Kill ourself by the default signal handler.
InvokeDefaultSignalHandler(signal_number);
}
// Another thread is dumping stuff. Let's wait until that thread
// finishes the job and kills the process.
while (true) {
sleep(1);
}
}
// This is the first time we enter the signal handler. We are going to
// do some interesting stuff from here.
// TODO(satorux): We might want to set timeout here using alarm(), but
// mixing alarm() and sleep() can be a bad idea.
// First dump time info.
DumpTimeInfo();
DumpSignalInfo(signal_number, signal_info);
// *** TRANSITION ***
//
// BEFORE this point, all code must be async-termination-safe!
// (See WARNING above.)
//
// AFTER this point, we do unsafe things, like using LOG()!
// The process could be terminated or hung at any time. We try to
// do more useful things first and riskier things later.
// Use boost stacktrace to print more detail info
std::cout << boost::stacktrace::stacktrace() << std::endl;
// Flush the logs before we do anything in case 'anything'
// causes problems.
google::FlushLogFilesUnsafe(0);
// Kill ourself by the default signal handler.
InvokeDefaultSignalHandler(signal_number);
}
} // namespace
void InstallFailureSignalHandler() {
// Build the sigaction struct.
struct sigaction sig_action;
memset(&sig_action, 0, sizeof(sig_action));
sigemptyset(&sig_action.sa_mask);
sig_action.sa_flags |= SA_SIGINFO;
sig_action.sa_sigaction = &FailureSignalHandler;
for (size_t i = 0; i < ARRAYSIZE(kFailureSignals); ++i) {
CHECK_ERR(sigaction(kFailureSignals[i].number, &sig_action, NULL));
}
kFailureSignalHandlerInstalled = true;
}
} // namespace doris

View File

@ -42,6 +42,7 @@
#include "common/daemon.h"
#include "common/logging.h"
#include "common/resource_tls.h"
#include "common/signal_handler.h"
#include "common/status.h"
#include "common/utils.h"
#include "env/env.h"
@ -274,6 +275,7 @@ struct Checker
;
int main(int argc, char** argv) {
doris::signal::InstallFailureSignalHandler();
// check if print version or help
if (argc > 1) {

View File

@ -32,7 +32,7 @@ namespace doris {
// glog doesn't allow multiple invocations of InitGoogleLogging. This method conditionally
// calls InitGoogleLogging only if it hasn't been called before.
bool init_glog(const char* basename, bool install_signal_handler = false);
bool init_glog(const char* basename);
// Shuts down the google logging library. Call before exit to ensure that log files are
// flushed. May only be called once.

View File

@ -177,8 +177,10 @@ if [[ ${HELP} -eq 1 ]]; then
fi
# build thirdparty libraries if necessary
if [[ ! -f ${DORIS_THIRDPARTY}/installed/lib/libsimdjson.a ]]; then
if [[ ! -f ${DORIS_THIRDPARTY}/installed/lib/libbacktrace.a ]]; then
echo "Thirdparty libraries need to be build ..."
# need remove all installed pkgs because some lib like lz4 will throw error if its lib alreay exists
rm -rf ${DORIS_THIRDPARTY}/installed
${DORIS_THIRDPARTY}/build-thirdparty.sh -j $PARALLEL
fi

View File

@ -2,6 +2,9 @@
This file contains version of the third-party dependency libraries in the build-env image. The docker build-env image is apache/incubator-doris, and the tag is `build-env-${version}`
## v20220321
- Add libbacktrace, it is used by boost stacktrace to print exception stack.
## v20220316
- Modified: CRoaring 0.3.4 -> 0.4.0

View File

@ -220,6 +220,20 @@ check_if_archieve_exist() {
fi
}
#libbacktrace
build_libbacktrace() {
check_if_source_exist $LIBBACKTRACE_SOURCE
cd $TP_SOURCE_DIR/$LIBBACKTRACE_SOURCE
CPPFLAGS="-I${TP_INCLUDE_DIR} -fPIC" \
CXXFLAGS="-I${TP_INCLUDE_DIR} -fPIC" \
LDFLAGS="-L${TP_LIB_DIR}" \
CFLAGS="-fPIC" \
./configure --prefix=$TP_INSTALL_DIR
make -j $PARALLEL
make install
}
# libevent
build_libevent() {
check_if_source_exist $LIBEVENT_SOURCE
@ -983,6 +997,7 @@ build_hdfs3
build_benchmark
build_breakpad
build_simdjson
build_libbacktrace
echo "Finished to build all thirdparties"

9
thirdparty/vars.sh vendored
View File

@ -371,6 +371,12 @@ SIMDJSON_NAME=simdjson-1.0.2.tar.gz
SIMDJSON_SOURCE=simdjson-1.0.2
SIMDJSON_MD5SUM="5bb34cca7087a99c450dbdfe406bdc7d"
# libbacktrace
LIBBACKTRACE_DOWNLOAD="https://codeload.github.com/ianlancetaylor/libbacktrace/zip/2446c66076480ce07a6bd868badcbceb3eeecc2e"
LIBBACKTRACE_NAME=libbacktrace-2446c66076480ce07a6bd868badcbceb3eeecc2e.zip
LIBBACKTRACE_SOURCE=libbacktrace-2446c66076480ce07a6bd868badcbceb3eeecc2e
LIBBACKTRACE_MD5SUM="6c79a8012870a24610c0d9c3621b23fe"
# all thirdparties which need to be downloaded is set in array TP_ARCHIVES
export TP_ARCHIVES="LIBEVENT
OPENSSL
@ -423,4 +429,5 @@ PDQSORT
BENCHMARK
BREAKPAD
XSIMD
SIMDJSON"
SIMDJSON
LIBBACKTRACE"