mirror of
https://github.com/discourse/discourse.git
synced 2025-05-31 14:48:03 +08:00
FIX: crawler requests not tracked for non UTF-8 user agents
Non UTF-8 user_agent requests were bypassing logging due to PG always wanting UTF-8 strings. This adds some conversion to ensure we are always dealing with UTF-8
This commit is contained in:
@ -15,6 +15,26 @@ describe Middleware::RequestTracker do
|
||||
}.merge(opts)
|
||||
end
|
||||
|
||||
context "full request" do
|
||||
before do
|
||||
@orig = WebCrawlerRequest.autoflush
|
||||
WebCrawlerRequest.autoflush = 1
|
||||
end
|
||||
after do
|
||||
WebCrawlerRequest.autoflush = @orig
|
||||
end
|
||||
|
||||
it "can handle rogue user agents" do
|
||||
agent = (+"Evil Googlebot String \xc3\x28").force_encoding("Windows-1252")
|
||||
|
||||
middleware = Middleware::RequestTracker.new(->(env) { ["200", { "Content-Type" => "text/html" }, [""]] })
|
||||
middleware.call(env("HTTP_USER_AGENT" => agent))
|
||||
|
||||
expect(WebCrawlerRequest.where(user_agent: agent.encode('utf-8')).count).to eq(1)
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
context "log_request" do
|
||||
before do
|
||||
freeze_time Time.now
|
||||
|
Reference in New Issue
Block a user