From 22768a4b68b213c0b0b43fbf5f82c9a893a94d08 Mon Sep 17 00:00:00 2001 From: Sam Date: Mon, 4 Aug 2014 12:35:55 +1000 Subject: [PATCH] PERF: refactor incoming links table --- app/models/incoming_domain.rb | 16 ++++ app/models/incoming_link.rb | 19 ++--- app/models/incoming_referer.rb | 16 ++++ app/models/post_timing.rb | 5 +- ...40804010803_incoming_link_normalization.rb | 78 +++++++++++++++++++ 5 files changed, 121 insertions(+), 13 deletions(-) create mode 100644 app/models/incoming_domain.rb create mode 100644 app/models/incoming_referer.rb create mode 100644 db/migrate/20140804010803_incoming_link_normalization.rb diff --git a/app/models/incoming_domain.rb b/app/models/incoming_domain.rb new file mode 100644 index 00000000000..ce8e3816c7c --- /dev/null +++ b/app/models/incoming_domain.rb @@ -0,0 +1,16 @@ +class IncomingDomain < ActiveRecord::Base +end + +# == Schema Information +# +# Table name: incoming_domains +# +# id :integer not null, primary key +# name :string(100) not null +# https :boolean default(FALSE), not null +# port :integer not null +# +# Indexes +# +# index_incoming_domains_on_name_and_https_and_port (name,https,port) UNIQUE +# diff --git a/app/models/incoming_link.rb b/app/models/incoming_link.rb index 6f000158b1d..38f968d4576 100644 --- a/app/models/incoming_link.rb +++ b/app/models/incoming_link.rb @@ -89,20 +89,17 @@ end # # Table name: incoming_links # -# id :integer not null, primary key -# referer :string(1000) -# domain :string(100) -# topic_id :integer -# post_number :integer -# created_at :datetime -# user_id :integer -# ip_address :inet -# current_user_id :integer -# post_id :integer not null +# id :integer not null, primary key +# topic_id :integer +# created_at :datetime +# user_id :integer +# ip_address :inet +# current_user_id :integer +# post_id :integer not null +# incoming_referer_id :integer # # Indexes # -# index_incoming_links_on_created_at_and_domain (created_at,domain) # index_incoming_links_on_created_at_and_user_id (created_at,user_id) # index_incoming_links_on_post_id (post_id) # diff --git a/app/models/incoming_referer.rb b/app/models/incoming_referer.rb new file mode 100644 index 00000000000..3c50461e15b --- /dev/null +++ b/app/models/incoming_referer.rb @@ -0,0 +1,16 @@ +class IncomingReferer < ActiveRecord::Base +end + +# == Schema Information +# +# Table name: incoming_referers +# +# id :integer not null, primary key +# url :string(1000) not null +# path :string(1000) not null +# incoming_domain_id :integer not null +# +# Indexes +# +# index_incoming_referers_on_path_and_incoming_domain_id (path,incoming_domain_id) UNIQUE +# diff --git a/app/models/post_timing.rb b/app/models/post_timing.rb index 489b14f73fa..1f6c070d0e9 100644 --- a/app/models/post_timing.rb +++ b/app/models/post_timing.rb @@ -101,6 +101,7 @@ end # # Indexes # -# post_timings_summary (topic_id,post_number) -# post_timings_unique (topic_id,post_number,user_id) UNIQUE +# index_post_timings_on_user_id (user_id) +# post_timings_summary (topic_id,post_number) +# post_timings_unique (topic_id,post_number,user_id) UNIQUE # diff --git a/db/migrate/20140804010803_incoming_link_normalization.rb b/db/migrate/20140804010803_incoming_link_normalization.rb new file mode 100644 index 00000000000..2c3743541a8 --- /dev/null +++ b/db/migrate/20140804010803_incoming_link_normalization.rb @@ -0,0 +1,78 @@ +class IncomingLinkNormalization < ActiveRecord::Migration + def up + remove_column :incoming_links, :post_number + remove_column :incoming_links, :domain + add_column :incoming_links, :incoming_referer_id, :integer + + create_table :incoming_referers do |t| + t.string :url, limit: 1000, null: false + t.string :domain, limit: 100, null: false + t.string :path, limit: 1000, null: false + t.integer :port, null: false + t.boolean :https, null: false + t.integer :incoming_domain_id + end + + # start the shuffle + # + execute "INSERT INTO incoming_referers(url, https, domain, port, path) + SELECT referer, + CASE WHEN a[1] = 's' THEN true ELSE false END, + a[2] as domain, + CASE WHEN a[1] = 's' THEN + COALESCE(a[4]::integer, 443)::integer + ELSE + COALESCE(a[4]::integer, 80)::integer + END, + COALESCE(a[5], '') path + FROM + ( + SELECT referer, regexp_matches(referer, 'http(s)?://([^/:]+)(:(\d+))?(.*)') a + FROM + ( + SELECT DISTINCT referer + FROM incoming_links WHERE referer ~ '^https?://.+' + ) Z + ) X + WHERE a[2] IS NOT NULL" + + + execute "UPDATE incoming_links l + SET incoming_referer_id = r.id + FROM incoming_referers r + WHERE r.url = l.referer" + + create_table :incoming_domains do |t| + t.string :name, limit: 100, null: false + t.boolean :https, null: false, default: false + t.integer :port, null: false + end + + # shuffle part 2 + # + execute "INSERT INTO incoming_domains(name, port, https) + SELECT DISTINCT domain, port, https + FROM incoming_referers" + + execute "UPDATE incoming_referers l + SET incoming_domain_id = d.id + FROM incoming_domains d + WHERE d.name = l.domain AND d.https = l.https AND d.port = l.port" + + + remove_column :incoming_referers, :domain + remove_column :incoming_referers, :port + remove_column :incoming_referers, :https + + change_column :incoming_referers, :incoming_domain_id, :integer, null: false + + add_index :incoming_referers, [:path, :incoming_domain_id], unique: true + add_index :incoming_domains, [:name, :https, :port], unique: true + + remove_column :incoming_links, :referer + end + + def down + raise ActiveRecord::IrreversibleMigration + end +end