From cabe8f0d2de98289b7636345e60988f8a0eca8ad Mon Sep 17 00:00:00 2001
From: Neil Lalonde <neillalonde@gmail.com>
Date: Wed, 16 Oct 2013 16:39:13 -0400
Subject: [PATCH] Clean up ScreenUrl normalization and matching

---
 app/models/screened_url.rb       | 19 +++++++---
 spec/models/screened_url_spec.rb | 63 ++++++++++++++++++++++++++++++--
 2 files changed, 74 insertions(+), 8 deletions(-)

diff --git a/app/models/screened_url.rb b/app/models/screened_url.rb
index 6191b2679b7..5da8d044799 100644
--- a/app/models/screened_url.rb
+++ b/app/models/screened_url.rb
@@ -17,14 +17,23 @@ class ScreenedUrl < ActiveRecord::Base
   validates :domain, presence: true
 
   def normalize
-    if self.url
-      self.url.gsub!(/http(s?):\/\//i, '')
-      self.url.gsub!(/(\/)+$/, '') # trim trailing slashes
-    end
+    self.url = ScreenedUrl.normalize_url(self.url) if self.url
+    self.domain = self.domain.downcase if self.domain
   end
 
   def self.watch(url, domain, opts={})
-    find_by_url(url) || create(opts.slice(:action_type, :ip_address).merge(url: url, domain: domain))
+    find_match(url) || create(opts.slice(:action_type, :ip_address).merge(url: url, domain: domain))
+  end
+
+  def self.find_match(url)
+    find_by_url normalize_url(url)
+  end
+
+  def self.normalize_url(url)
+    normalized = url.gsub(/http(s?):\/\//i, '')
+    normalized.gsub!(/(\/)+$/, '') # trim trailing slashes
+    normalized.gsub!(/^([^\/]+)(?:\/)?/) { |m| m.downcase } # downcase the domain part of the url
+    normalized
   end
 end
 
diff --git a/spec/models/screened_url_spec.rb b/spec/models/screened_url_spec.rb
index 60bb56d495e..2a099494f5d 100644
--- a/spec/models/screened_url_spec.rb
+++ b/spec/models/screened_url_spec.rb
@@ -16,18 +16,75 @@ describe ScreenedUrl do
       described_class.create(valid_params).last_match_at.should be_nil
     end
 
+    it "normalizes the url and domain" do
+      record = described_class.new(valid_params)
+      record.expects(:normalize).once
+      record.valid?
+    end
+  end
+
+  describe 'normalize' do
+    let(:record) { described_class.new(@params) }
+    subject { record.normalize; record }
+
     ['http://', 'HTTP://', 'https://', 'HTTPS://'].each do |prefix|
       it "strips #{prefix}" do
-        described_class.create(valid_params.merge(url: url.gsub('http://', prefix))).url.should == url.gsub('http://', '')
+        @params = valid_params.merge(url: url.gsub('http://', prefix))
+        subject.url.should == url.gsub('http://', '')
       end
     end
 
     it "strips trailing slash" do
-      described_class.create(valid_params.merge(url: 'silverbullet.in/')).url.should == 'silverbullet.in'
+      @params = valid_params.merge(url: 'silverbullet.in/')
+      subject.url.should == 'silverbullet.in'
     end
 
     it "strips trailing slashes" do
-      described_class.create(valid_params.merge(url: 'silverbullet.in/buy///')).url.should == 'silverbullet.in/buy'
+      @params = valid_params.merge(url: 'silverbullet.in/buy///')
+      subject.url.should == 'silverbullet.in/buy'
+    end
+
+    it "downcases domains" do
+      record1 = described_class.new(valid_params.merge(domain: 'DuB30.com', url: 'DuB30.com/Gems/Gems-of-Power'))
+      record1.normalize
+      record1.domain.should == 'dub30.com'
+      record1.url.should == 'dub30.com/Gems/Gems-of-Power'
+      record1.should be_valid
+
+      record2 = described_class.new(valid_params.merge(domain: 'DuB30.com', url: 'DuB30.com'))
+      record2.normalize
+      record2.domain.should == 'dub30.com'
+      record2.url.should == 'dub30.com'
+      record2.should be_valid
+    end
+
+    it "doesn't modify the url argument" do
+      expect {
+        described_class.new(valid_params).normalize
+      }.to_not change { valid_params[:url] }
+    end
+
+    it "doesn't modify the domain argument" do
+      params = valid_params.merge(domain: domain.upcase)
+      expect {
+        described_class.new(params).normalize
+      }.to_not change { params[:domain] }
+    end
+  end
+
+  describe 'find_match' do
+    it 'returns nil when there is no match' do
+      described_class.find_match('http://spamspot.com/buy/it').should be_nil
+    end
+
+    it 'returns the record when there is an exact match' do
+      match = described_class.create(valid_params)
+      described_class.find_match(valid_params[:url]).should == match
+    end
+
+    it 'ignores case of the domain' do
+      match = described_class.create(valid_params.merge(url: 'spamexchange.com/Good/Things'))
+      described_class.find_match("http://SPAMExchange.com/Good/Things").should == match
     end
   end