FIX: Apply crawler rate limits to cached requests (#27174)

This commit moves the logic for crawler rate limits out of the application controller and into the request tracker middleware. The reason for this move is to apply rate limits to all crawler requests instead of just the requests that make it to the application controller. Some requests are served early from the middleware stack without reaching the Rails app for performance reasons (e.g. `AnonymousCache`) which results in crawlers getting 200 responses even though they've reached their limits and should be getting 429 responses.

Internal topic: t/128810.
This commit is contained in:
Osama Sayegh
2024-05-27 16:26:35 +03:00
committed by GitHub
parent 7992d7a65a
commit 361992bb74
4 changed files with 87 additions and 26 deletions

View File

@ -369,6 +369,31 @@ RSpec.describe Middleware::RequestTracker do
end
end
describe "crawler rate limits" do
context "when there are multiple matching crawlers" do
before { SiteSetting.slow_down_crawler_user_agents = "badcrawler2|badcrawler22" }
it "only checks limits for the first match" do
env = env("HTTP_USER_AGENT" => "badcrawler")
status, _ = middleware.call(env)
expect(status).to eq(200)
end
end
it "compares user agents in a case-insensitive manner" do
SiteSetting.slow_down_crawler_user_agents = "BaDCRawLer"
env1 = env("HTTP_USER_AGENT" => "bADcrAWLer")
env2 = env("HTTP_USER_AGENT" => "bADcrAWLer")
status, _ = middleware.call(env1)
expect(status).to eq(200)
status, _ = middleware.call(env2)
expect(status).to eq(429)
end
end
describe "register_ip_skipper" do
before do
Middleware::RequestTracker.register_ip_skipper { |ip| ip == "1.1.1.2" }