FEATURE: control which web crawlers can access using a whitelist or blacklist

This commit is contained in:
Neil Lalonde
2018-03-15 17:10:45 -04:00
parent cbbeedf53b
commit ced7e9a691
22 changed files with 722 additions and 97 deletions

View File

@ -1,5 +1,6 @@
# frozen_string_literal: true
class ApplicationRequest < ActiveRecord::Base
enum req_type: %i(http_total
http_2xx
http_background
@ -12,41 +13,12 @@ class ApplicationRequest < ActiveRecord::Base
page_view_logged_in_mobile
page_view_anon_mobile)
cattr_accessor :autoflush, :autoflush_seconds, :last_flush
# auto flush if backlog is larger than this
self.autoflush = 2000
# auto flush if older than this
self.autoflush_seconds = 5.minutes
self.last_flush = Time.now.utc
include CachedCounting
def self.increment!(type, opts = nil)
key = redis_key(type)
val = $redis.incr(key).to_i
# readonly mode it is going to be 0, skip
return if val == 0
# 3.days, see: https://github.com/rails/rails/issues/21296
$redis.expire(key, 259200)
autoflush = (opts && opts[:autoflush]) || self.autoflush
if autoflush > 0 && val >= autoflush
write_cache!
return
end
if (Time.now.utc - last_flush).to_i > autoflush_seconds
write_cache!
end
perform_increment!(redis_key(type), opts)
end
GET_AND_RESET = <<~LUA
local val = redis.call('get', KEYS[1])
redis.call('set', KEYS[1], '0')
return val
LUA
def self.write_cache!(date = nil)
if date.nil?
write_cache!(Time.now.utc)
@ -58,13 +30,9 @@ class ApplicationRequest < ActiveRecord::Base
date = date.to_date
# this may seem a bit fancy but in so it allows
# for concurrent calls without double counting
req_types.each do |req_type, _|
key = redis_key(req_type, date)
val = get_and_reset(redis_key(req_type, date))
namespaced_key = $redis.namespace_key(key)
val = $redis.without_namespace.eval(GET_AND_RESET, keys: [namespaced_key]).to_i
next if val == 0
id = req_id(date, req_type)