DEV: Log Unicorn worker timeout backtraces to Rails.logger (#27257)

This commit introduces the following changes:

1. Introduce the `SignalTrapLogger` singleton which starts a single
   thread that polls a queue to log messages with the specified logger.
   This thread is necessary becasue most loggers cannot be used inside
   the `Signal.trap` context as they rely on mutexes which are not
   allowed within the context.

2. Moves the monkey patch in `freedom_patches/unicorn_http_server_patch.rb` to
   `config/unicorn.config.rb` which is already monkey patching
   `Unicorn::HttpServer`.

3. `Unicorn::HttpServer` will now automatically send a `USR2` signal to
   a unicorn worker 2 seconds before the worker is timed out by the
   Unicorn master.

4. When a Unicorn worker receives a `USR2` signal, it will now log only
   the main thread's backtraces to `Rails.logger`. Previously, it was
   `put`ing the backtraces to `STDOUT` which most people wouldn't read.
   Logging it via `Rails.logger` will make the backtraces easily
   accessible via `/logs`.
This commit is contained in:
Alan Guo Xiang Tan
2024-06-03 12:51:12 +08:00
committed by GitHub
parent 4b2bd4d682
commit 23c38cbf11
6 changed files with 118 additions and 105 deletions

View File

@ -9,9 +9,9 @@ class Demon::Base
@demons
end
def self.start(count = 1, verbose: false)
def self.start(count = 1, verbose: false, logger: nil)
@demons ||= {}
count.times { |i| (@demons["#{prefix}_#{i}"] ||= new(i, verbose: verbose)).start }
count.times { |i| (@demons["#{prefix}_#{i}"] ||= new(i, verbose:, logger:)).start }
end
def self.stop
@ -39,7 +39,7 @@ class Demon::Base
attr_reader :pid, :parent_pid, :started, :index
attr_accessor :stop_timeout
def initialize(index, rails_root: nil, parent_pid: nil, verbose: false)
def initialize(index, rails_root: nil, parent_pid: nil, verbose: false, logger: nil)
@index = index
@pid = nil
@parent_pid = parent_pid || Process.pid
@ -47,6 +47,11 @@ class Demon::Base
@stop_timeout = 10
@rails_root = rails_root || Rails.root
@verbose = verbose
@logger = logger || Logger.new(STDERR)
end
def log(message, level: :info)
@logger.public_send(level, message)
end
def pid_file
@ -72,6 +77,7 @@ class Demon::Base
def stop
@started = false
if @pid
Process.kill(stop_signal, @pid)
@ -99,7 +105,7 @@ class Demon::Base
wait_for_stop.call
if alive?
STDERR.puts "Process would not terminate cleanly, force quitting. pid: #{@pid} #{self.class}"
log("Process would not terminate cleanly, force quitting. pid: #{@pid} #{self.class}")
Process.kill("KILL", @pid)
end
@ -125,8 +131,9 @@ class Demon::Base
rescue StandardError
-1
end
if dead
STDERR.puts "Detected dead worker #{@pid}, restarting..."
log("Detected dead worker #{@pid}, restarting...")
@pid = nil
@started = false
start
@ -138,7 +145,7 @@ class Demon::Base
if existing = already_running?
# should not happen ... so kill violently
STDERR.puts "Attempting to kill pid #{existing}"
log("Attempting to kill pid #{existing}")
Process.kill("TERM", existing)
end
@ -199,7 +206,7 @@ class Demon::Base
Process.kill "KILL", Process.pid
end
rescue => e
STDERR.puts "URGENT monitoring thread had an exception #{e}"
log("URGENT monitoring thread had an exception #{e}")
end
sleep 1
end

View File

@ -77,14 +77,14 @@ class Demon::EmailSync < ::Demon::Base
end
def after_fork
puts "[EmailSync] Loading EmailSync in process id #{Process.pid}"
log("[EmailSync] Loading EmailSync in process id #{Process.pid}")
loop do
break if Discourse.redis.set(HEARTBEAT_KEY, Time.now.to_i, ex: HEARTBEAT_INTERVAL, nx: true)
sleep HEARTBEAT_INTERVAL
end
puts "[EmailSync] Starting EmailSync main thread"
log("[EmailSync] Starting EmailSync main thread")
@running = true
@sync_data = {}
@ -158,18 +158,20 @@ class Demon::EmailSync < ::Demon::Base
Discourse.redis.del(HEARTBEAT_KEY)
exit 0
rescue => e
STDERR.puts e.message
STDERR.puts e.backtrace.join("\n")
log("#{e.message}: #{e.backtrace.join("\n")}")
exit 1
end
def kill_and_disconnect!(data)
data[:thread].kill
data[:thread].join
begin
data[:syncer]&.disconnect!
rescue Net::IMAP::ResponseError => err
puts "[EmailSync] Encountered a response error when disconnecting: #{err}"
log(
"[EmailSync] Encountered a response error when disconnecting: #{err}\n#{err.backtrace.join("\n")}",
)
end
end
end

View File

@ -3,8 +3,6 @@
require "demon/base"
class Demon::Sidekiq < ::Demon::Base
cattr_accessor :logger
def self.prefix
"sidekiq"
end
@ -13,37 +11,6 @@ class Demon::Sidekiq < ::Demon::Base
blk ? (@blk = blk) : @blk
end
def self.format(message)
"[#{Time.now.utc.strftime("%Y-%m-%dT%H:%M:%S.%6N")} ##{Process.pid}] #{message}"
end
def self.log(message, level: :info)
# We use an IO pipe and log messages using the logger in a seperate thread to avoid the `log writing failed. can't be called from trap context`
# error message that is raised when trying to log from within a `Signal.trap` block.
if logger
if !defined?(@logger_read_pipe)
@logger_read_pipe, @logger_write_pipe = IO.pipe
@logger_thread =
Thread.new do
begin
while readable_io = IO.select([@logger_read_pipe])
logger.public_send(level, readable_io.first[0].gets.strip)
end
rescue => e
STDOUT.puts self.format(
"Error in Sidekiq demon logger thread: #{e.message}\n#{e.backtrace.join("\n")}",
)
end
end
end
@logger_write_pipe.puts(message)
else
STDOUT.puts self.format(message)
end
end
private
def suppress_stdout
@ -54,12 +21,13 @@ class Demon::Sidekiq < ::Demon::Base
false
end
def log(message, level: :info)
self.class.log(message, level:)
def log_in_trap(message, level: :info)
SignalTrapLogger.instance.log(@logger, message, level: level)
end
def after_fork
Demon::Sidekiq.after_fork&.call
SignalTrapLogger.instance.after_fork
log("Loading Sidekiq in process id #{Process.pid}")
require "sidekiq/cli"
@ -67,9 +35,9 @@ class Demon::Sidekiq < ::Demon::Base
# Unicorn uses USR1 to indicate that log files have been rotated
Signal.trap("USR1") do
log("Sidekiq reopening logs...")
log_in_trap("Sidekiq reopening logs...")
Unicorn::Util.reopen_logs
log("Sidekiq done reopening logs...")
log_in_trap("Sidekiq done reopening logs...")
end
options = ["-c", GlobalSetting.sidekiq_workers.to_s]
@ -91,8 +59,7 @@ class Demon::Sidekiq < ::Demon::Base
load Rails.root + "config/initializers/100-sidekiq.rb"
cli.run
rescue => e
STDERR.puts e.message
STDERR.puts e.backtrace.join("\n")
log("Error encountered while starting Sidekiq: #{e.message}\n#{e.backtrace.join("\n")}")
exit 1
end
end