From cabe8f0d2de98289b7636345e60988f8a0eca8ad Mon Sep 17 00:00:00 2001 From: Neil Lalonde Date: Wed, 16 Oct 2013 16:39:13 -0400 Subject: [PATCH] Clean up ScreenUrl normalization and matching --- app/models/screened_url.rb | 19 +++++++--- spec/models/screened_url_spec.rb | 63 ++++++++++++++++++++++++++++++-- 2 files changed, 74 insertions(+), 8 deletions(-) diff --git a/app/models/screened_url.rb b/app/models/screened_url.rb index 6191b2679b7..5da8d044799 100644 --- a/app/models/screened_url.rb +++ b/app/models/screened_url.rb @@ -17,14 +17,23 @@ class ScreenedUrl < ActiveRecord::Base validates :domain, presence: true def normalize - if self.url - self.url.gsub!(/http(s?):\/\//i, '') - self.url.gsub!(/(\/)+$/, '') # trim trailing slashes - end + self.url = ScreenedUrl.normalize_url(self.url) if self.url + self.domain = self.domain.downcase if self.domain end def self.watch(url, domain, opts={}) - find_by_url(url) || create(opts.slice(:action_type, :ip_address).merge(url: url, domain: domain)) + find_match(url) || create(opts.slice(:action_type, :ip_address).merge(url: url, domain: domain)) + end + + def self.find_match(url) + find_by_url normalize_url(url) + end + + def self.normalize_url(url) + normalized = url.gsub(/http(s?):\/\//i, '') + normalized.gsub!(/(\/)+$/, '') # trim trailing slashes + normalized.gsub!(/^([^\/]+)(?:\/)?/) { |m| m.downcase } # downcase the domain part of the url + normalized end end diff --git a/spec/models/screened_url_spec.rb b/spec/models/screened_url_spec.rb index 60bb56d495e..2a099494f5d 100644 --- a/spec/models/screened_url_spec.rb +++ b/spec/models/screened_url_spec.rb @@ -16,18 +16,75 @@ describe ScreenedUrl do described_class.create(valid_params).last_match_at.should be_nil end + it "normalizes the url and domain" do + record = described_class.new(valid_params) + record.expects(:normalize).once + record.valid? + end + end + + describe 'normalize' do + let(:record) { described_class.new(@params) } + subject { record.normalize; record } + ['http://', 'HTTP://', 'https://', 'HTTPS://'].each do |prefix| it "strips #{prefix}" do - described_class.create(valid_params.merge(url: url.gsub('http://', prefix))).url.should == url.gsub('http://', '') + @params = valid_params.merge(url: url.gsub('http://', prefix)) + subject.url.should == url.gsub('http://', '') end end it "strips trailing slash" do - described_class.create(valid_params.merge(url: 'silverbullet.in/')).url.should == 'silverbullet.in' + @params = valid_params.merge(url: 'silverbullet.in/') + subject.url.should == 'silverbullet.in' end it "strips trailing slashes" do - described_class.create(valid_params.merge(url: 'silverbullet.in/buy///')).url.should == 'silverbullet.in/buy' + @params = valid_params.merge(url: 'silverbullet.in/buy///') + subject.url.should == 'silverbullet.in/buy' + end + + it "downcases domains" do + record1 = described_class.new(valid_params.merge(domain: 'DuB30.com', url: 'DuB30.com/Gems/Gems-of-Power')) + record1.normalize + record1.domain.should == 'dub30.com' + record1.url.should == 'dub30.com/Gems/Gems-of-Power' + record1.should be_valid + + record2 = described_class.new(valid_params.merge(domain: 'DuB30.com', url: 'DuB30.com')) + record2.normalize + record2.domain.should == 'dub30.com' + record2.url.should == 'dub30.com' + record2.should be_valid + end + + it "doesn't modify the url argument" do + expect { + described_class.new(valid_params).normalize + }.to_not change { valid_params[:url] } + end + + it "doesn't modify the domain argument" do + params = valid_params.merge(domain: domain.upcase) + expect { + described_class.new(params).normalize + }.to_not change { params[:domain] } + end + end + + describe 'find_match' do + it 'returns nil when there is no match' do + described_class.find_match('http://spamspot.com/buy/it').should be_nil + end + + it 'returns the record when there is an exact match' do + match = described_class.create(valid_params) + described_class.find_match(valid_params[:url]).should == match + end + + it 'ignores case of the domain' do + match = described_class.create(valid_params.merge(url: 'spamexchange.com/Good/Things')) + described_class.find_match("http://SPAMExchange.com/Good/Things").should == match end end