FIX: crawler requests exceptions for non UTF-8 user agents with invalid bytes

This commit is contained in:
Arkshine
2024-05-24 03:49:17 +02:00
committed by Régis Hanol
parent b757275c1e
commit 1fffb236b2
5 changed files with 86 additions and 13 deletions

View File

@ -4,6 +4,7 @@ require "mobile_detection"
require "crawler_detection"
require "guardian"
require "http_language_parser"
require "http_user_agent_encoder"
module Middleware
class AnonymousCache
@ -73,6 +74,7 @@ module Middleware
def initialize(env, request = nil)
@env = env
@user_agent = HttpUserAgentEncoder.ensure_utf8(@env[USER_AGENT])
@request = request || Rack::Request.new(@env)
end
@ -82,7 +84,7 @@ module Middleware
@request[Auth::DefaultCurrentUserProvider::API_KEY].nil? &&
@env[Auth::DefaultCurrentUserProvider::USER_API_KEY].nil? &&
@env[Auth::DefaultCurrentUserProvider::HEADER_API_KEY].nil? &&
CrawlerDetection.is_blocked_crawler?(@env[USER_AGENT])
CrawlerDetection.is_blocked_crawler?(@user_agent)
end
# rubocop:disable Lint/BooleanSymbol
@ -98,7 +100,7 @@ module Middleware
# otherwise you get a broken params on the request
params = {}
MobileDetection.resolve_mobile_view!(@env[USER_AGENT], params, session) ? :true : :false
MobileDetection.resolve_mobile_view!(@user_agent, params, session) ? :true : :false
end
@is_mobile == :true
@ -126,14 +128,12 @@ module Middleware
def is_crawler?
@is_crawler ||=
begin
user_agent = @env[USER_AGENT]
if @env[DISCOURSE_RENDER] == "crawler" ||
CrawlerDetection.crawler?(user_agent, @env["HTTP_VIA"])
CrawlerDetection.crawler?(@user_agent, @env["HTTP_VIA"])
:true
else
if user_agent.downcase.include?("discourse") &&
!user_agent.downcase.include?("mobile")
if @user_agent.downcase.include?("discourse") &&
!@user_agent.downcase.include?("mobile")
:true
else
:false
@ -146,11 +146,11 @@ module Middleware
# rubocop:enable Lint/BooleanSymbol
def key_is_modern_mobile_device?
MobileDetection.modern_mobile_device?(@env[USER_AGENT]) if @env[USER_AGENT]
MobileDetection.modern_mobile_device?(@user_agent) if @user_agent
end
def key_is_old_browser?
CrawlerDetection.show_browser_update?(@env[USER_AGENT]) if @env[USER_AGENT]
CrawlerDetection.show_browser_update?(@user_agent) if @user_agent
end
def cache_key

View File

@ -2,6 +2,7 @@
require "method_profiler"
require "middleware/anonymous_cache"
require "http_user_agent_encoder"
class Middleware::RequestTracker
@@detailed_request_loggers = nil
@ -186,10 +187,7 @@ class Middleware::RequestTracker
if h[:is_crawler]
user_agent = env["HTTP_USER_AGENT"]
if user_agent && (user_agent.encoding != Encoding::UTF_8)
user_agent = user_agent.encode("utf-8")
user_agent.scrub!
end
user_agent = HttpUserAgentEncoder.ensure_utf8(user_agent) if user_agent
h[:user_agent] = user_agent
end