FEATURE: Only count topic views for explicit/deferred tracked views (#27533)

Followup 2f2da7274732cba30d03b6c5c3a4194652cb6783

This commit moves topic view tracking from happening
every time a Topic is requested, which is susceptible
to inflating numbers of views from web crawlers, to
our request tracker middleware.

In this new location, topic views are only tracked when
the following headers are sent:

* HTTP_DISCOURSE_TRACK_VIEW - This is sent on every page navigation when
  clicking around the ember app. We count these as browser page views
  because we know it comes from the AJAX call in our app. The topic ID
  is extracted from HTTP_DISCOURSE_TRACK_VIEW_TOPIC_ID
* HTTP_DISCOURSE_DEFERRED_TRACK_VIEW - Sent when MessageBus initializes
  after first loading the page to count the initial page load view. The
  topic ID is extracted from HTTP_DISCOURSE_DEFERRED_TRACK_VIEW.

This will bring topic views more in line with the change we
made to page views in the referenced commit and result in
more realistic topic view counts.
This commit is contained in:
Martin Brennan
2024-07-03 10:38:49 +10:00
committed by GitHub
parent 57af5d6f0d
commit 527f02e99f
12 changed files with 641 additions and 183 deletions

View File

@ -75,18 +75,32 @@ class Middleware::RequestTracker
elsif data[:has_auth_cookie]
ApplicationRequest.increment!(:page_view_logged_in)
ApplicationRequest.increment!(:page_view_logged_in_mobile) if data[:is_mobile]
if data[:explicit_track_view]
# Must be a browser if it had this header from our ajax implementation
ApplicationRequest.increment!(:page_view_logged_in_browser)
ApplicationRequest.increment!(:page_view_logged_in_browser_mobile) if data[:is_mobile]
if data[:topic_id].present? && data[:current_user_id].present?
TopicsController.defer_topic_view(
data[:topic_id],
data[:request_remote_ip],
data[:current_user_id],
)
end
end
elsif !SiteSetting.login_required
ApplicationRequest.increment!(:page_view_anon)
ApplicationRequest.increment!(:page_view_anon_mobile) if data[:is_mobile]
if data[:explicit_track_view]
# Must be a browser if it had this header from our ajax implementation
ApplicationRequest.increment!(:page_view_anon_browser)
ApplicationRequest.increment!(:page_view_anon_browser_mobile) if data[:is_mobile]
if data[:topic_id].present?
TopicsController.defer_topic_view(data[:topic_id], data[:request_remote_ip])
end
end
end
end
@ -97,9 +111,21 @@ class Middleware::RequestTracker
if data[:has_auth_cookie]
ApplicationRequest.increment!(:page_view_logged_in_browser)
ApplicationRequest.increment!(:page_view_logged_in_browser_mobile) if data[:is_mobile]
if data[:topic_id].present? && data[:current_user_id].present?
TopicsController.defer_topic_view(
data[:topic_id],
data[:request_remote_ip],
data[:current_user_id],
)
end
elsif !SiteSetting.login_required
ApplicationRequest.increment!(:page_view_anon_browser)
ApplicationRequest.increment!(:page_view_anon_browser_mobile) if data[:is_mobile]
if data[:topic_id].present?
TopicsController.defer_topic_view(data[:topic_id], data[:request_remote_ip])
end
end
end
@ -129,7 +155,15 @@ class Middleware::RequestTracker
request ||= Rack::Request.new(env)
helper = Middleware::AnonymousCache::Helper.new(env, request)
# Value of the discourse-track-view request header
# Since ActionDispatch::RemoteIp middleware is run before this middleware,
# we have access to the normalised remote IP based on ActionDispatch::RemoteIp::GetIp
#
# NOTE: Locally with MessageBus requests, the remote IP ends up as ::1 because
# of the X-Forwarded-For header set...somewhere, whereas all other requests
# end up as 127.0.0.1.
request_remote_ip = env["action_dispatch.remote_ip"].to_s
# Value of the discourse-track-view request header, set in `lib/ajax.js`
env_track_view = env["HTTP_DISCOURSE_TRACK_VIEW"]
# Was the discourse-track-view request header set to true? Likely
@ -141,10 +175,25 @@ class Middleware::RequestTracker
status == 200 && !%w[0 false].include?(env_track_view) && request.get? && !request.xhr? &&
headers["Content-Type"] =~ %r{text/html}
# This header is sent on a follow-up request after a real browser loads up a page
# see `scripts/pageview.js` and `instance-initializers/page-tracking.js`
deferred_track_view = %w[1 true].include?(env["HTTP_DISCOURSE_DEFERRED_TRACK_VIEW"])
# This is treated separately from deferred tracking in #log_request, this is
# why deferred_track_view is not counted here.
track_view = !!(explicit_track_view || implicit_track_view)
has_auth_cookie = Auth::DefaultCurrentUserProvider.find_v0_auth_cookie(request).present?
has_auth_cookie ||= Auth::DefaultCurrentUserProvider.find_v1_auth_cookie(env).present?
# These are set in the same place as the respective track view headers in the client.
topic_id =
if deferred_track_view
env["HTTP_DISCOURSE_DEFERRED_TRACK_VIEW_TOPIC_ID"]
elsif explicit_track_view || implicit_track_view
env["HTTP_DISCOURSE_TRACK_VIEW_TOPIC_ID"]
end
auth_cookie = Auth::DefaultCurrentUserProvider.find_v0_auth_cookie(request)
auth_cookie ||= Auth::DefaultCurrentUserProvider.find_v1_auth_cookie(env)
has_auth_cookie = auth_cookie.present?
is_api ||= !!env[Auth::DefaultCurrentUserProvider::API_KEY_ENV]
is_user_api ||= !!env[Auth::DefaultCurrentUserProvider::USER_API_KEY_ENV]
@ -152,14 +201,31 @@ class Middleware::RequestTracker
is_message_bus = request.path.start_with?("#{Discourse.base_path}/message-bus/")
is_topic_timings = request.path.start_with?("#{Discourse.base_path}/topics/timings")
# This header is sent on a follow-up request after a real browser loads up a page
# see `scripts/pageview.js` and `instance-initializers/page-tracking.js`
has_deferred_track_header = %w[1 true].include?(env["HTTP_DISCOURSE_DEFERRED_TRACK_VIEW"])
# Auth cookie can be used to find the ID for logged in users, but API calls must look up the
# current user based on env variables.
#
# We only care about this for topic views, other pageviews it's enough to know if the user is
# logged in or not, and we have separate pageview tracking for API views.
current_user_id =
if topic_id.present?
begin
(auth_cookie&.[](:user_id) || CurrentUser.lookup_from_env(env)&.id)
rescue Discourse::InvalidAccess => err
# This error is raised when the API key is invalid, no need to stop the show.
Discourse.warn_exception(
err,
message: "RequestTracker.get_data failed with an invalid API key error",
)
nil
end
end
h = {
status: status,
is_crawler: helper.is_crawler?,
has_auth_cookie: has_auth_cookie,
current_user_id: current_user_id,
topic_id: topic_id,
is_api: is_api,
is_user_api: is_user_api,
is_background: is_message_bus || is_topic_timings,
@ -168,7 +234,8 @@ class Middleware::RequestTracker
timing: timing,
queue_seconds: env["REQUEST_QUEUE_SECONDS"],
explicit_track_view: explicit_track_view,
deferred_track: has_deferred_track_header,
deferred_track: deferred_track_view,
request_remote_ip: request_remote_ip,
}
if h[:is_background]
@ -199,12 +266,16 @@ class Middleware::RequestTracker
end
def log_request_info(env, result, info, request = nil)
# we got to skip this on error ... its just logging
# We've got to skip this on error ... its just logging
data =
begin
self.class.get_data(env, result, info, request)
rescue StandardError => e
Discourse.warn_exception(e, message: "RequestTracker.get_data failed")
rescue StandardError => err
Discourse.warn_exception(err, message: "RequestTracker.get_data failed")
# This is super hard to find if in testing, we should still raise in this case.
raise err if Rails.env.test?
nil
end