FIX: crawler requests exceptions for non UTF-8 user agents with invalid bytes

This commit is contained in:
Arkshine
2024-05-24 03:49:17 +02:00
committed by Régis Hanol
parent b757275c1e
commit 1fffb236b2
5 changed files with 86 additions and 13 deletions

View File

@ -112,6 +112,18 @@ RSpec.describe Middleware::AnonymousCache do
expect(key1).not_to eq(key2)
end
it "handles user agents with invalid bytes" do
agent = (+"Evil Googlebot String \xc3\x28").force_encoding("ASCII")
expect {
key1 = new_helper("HTTP_USER_AGENT" => agent).cache_key
key2 =
new_helper(
"HTTP_USER_AGENT" => agent.encode("utf-8", invalid: :replace, undef: :replace),
).cache_key
expect(key1).to eq(key2)
}.not_to raise_error
end
context "when cached" do
let!(:helper) { new_helper("ANON_CACHE_DURATION" => 10) }
@ -351,6 +363,15 @@ RSpec.describe Middleware::AnonymousCache do
expect(@status).to eq(403)
expect {
get "/",
headers: {
"HTTP_USER_AGENT" => (+"Evil Googlebot String \xc3\x28").force_encoding("ASCII"),
}
expect(@status).to eq(403)
}.not_to raise_error
get "/",
headers: {
"HTTP_USER_AGENT" => "Twitterbot/2.1 (+http://www.notgoogle.com/bot.html)",