From c4b5455c2194b3346edb3ae10f62687aefd82263 Mon Sep 17 00:00:00 2001 From: Robin Ward Date: Thu, 20 Feb 2014 16:07:02 -0500 Subject: [PATCH] REFACTOR: Rename `GooglebotDetection` to `CrawlerDetection` because we will likely whitelist more crawlers in the future. --- app/controllers/application_controller.rb | 4 +-- .../{googlebot.html.erb => crawler.html.erb} | 0 ...ebot_detection.rb => crawler_detection.rb} | 4 +-- spec/components/crawler_detection_spec.rb | 30 +++++++++++++++++++ spec/components/googlebot_detection_spec.rb | 30 ------------------- 5 files changed, 34 insertions(+), 34 deletions(-) rename app/views/layouts/{googlebot.html.erb => crawler.html.erb} (100%) rename lib/{googlebot_detection.rb => crawler_detection.rb} (54%) create mode 100644 spec/components/crawler_detection_spec.rb delete mode 100644 spec/components/googlebot_detection_spec.rb diff --git a/app/controllers/application_controller.rb b/app/controllers/application_controller.rb index 30c7ce510..f9a081a3e 100644 --- a/app/controllers/application_controller.rb +++ b/app/controllers/application_controller.rb @@ -4,7 +4,7 @@ require_dependency 'discourse' require_dependency 'custom_renderer' require_dependency 'archetype' require_dependency 'rate_limiter' -require_dependency 'googlebot_detection' +require_dependency 'crawler_detection' class ApplicationController < ActionController::Base include CurrentUser @@ -40,7 +40,7 @@ class ApplicationController < ActionController::Base layout :set_layout def set_layout - GooglebotDetection.googlebot?(request.user_agent) ? 'googlebot' : 'application' + CrawlerDetection.crawler?(request.user_agent) ? 'crawler' : 'application' end rescue_from Exception do |exception| diff --git a/app/views/layouts/googlebot.html.erb b/app/views/layouts/crawler.html.erb similarity index 100% rename from app/views/layouts/googlebot.html.erb rename to app/views/layouts/crawler.html.erb diff --git a/lib/googlebot_detection.rb b/lib/crawler_detection.rb similarity index 54% rename from lib/googlebot_detection.rb rename to lib/crawler_detection.rb index a8164a4f7..2287bc7c8 100644 --- a/lib/googlebot_detection.rb +++ b/lib/crawler_detection.rb @@ -1,5 +1,5 @@ -module GooglebotDetection - def self.googlebot?(user_agent) +module CrawlerDetection + def self.crawler?(user_agent) !/Googlebot|Mediapartners|AdsBot/.match(user_agent).nil? end end diff --git a/spec/components/crawler_detection_spec.rb b/spec/components/crawler_detection_spec.rb new file mode 100644 index 000000000..2e8f80ace --- /dev/null +++ b/spec/components/crawler_detection_spec.rb @@ -0,0 +1,30 @@ +require 'spec_helper' +require_dependency 'crawler_detection' + +describe CrawlerDetection do + describe "crawler?" do + it "returns true for crawler user agents" do + # https://support.google.com/webmasters/answer/1061943?hl=en + described_class.crawler?("Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)").should == true + described_class.crawler?("Googlebot/2.1 (+http://www.google.com/bot.html)").should == true + described_class.crawler?("Googlebot-News").should == true + described_class.crawler?("Googlebot-Image/1.0").should == true + described_class.crawler?("Googlebot-Video/1.0").should == true + described_class.crawler?("(compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)").should == true + described_class.crawler?("Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)").should == true + described_class.crawler?("(compatible; Mediapartners-Google/2.1; +http://www.google.com/bot.html)").should == true + described_class.crawler?("Mediapartners-Google").should == true + described_class.crawler?("AdsBot-Google (+http://www.google.com/adsbot.html)").should == true + end + + it "returns false for non-crawler user agents" do + described_class.crawler?("Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36").should == false + described_class.crawler?("Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko").should == false + described_class.crawler?("Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0)").should == false + described_class.crawler?("Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5355d Safari/8536.25").should == false + described_class.crawler?("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0").should == false + described_class.crawler?("Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30").should == false + end + + end +end diff --git a/spec/components/googlebot_detection_spec.rb b/spec/components/googlebot_detection_spec.rb deleted file mode 100644 index 59fb08c0d..000000000 --- a/spec/components/googlebot_detection_spec.rb +++ /dev/null @@ -1,30 +0,0 @@ -require 'spec_helper' -require_dependency 'googlebot_detection' - -describe GooglebotDetection do - describe "googlebot?" do - it "returns true for googlebot user agents" do - # https://support.google.com/webmasters/answer/1061943?hl=en - described_class.googlebot?("Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)").should == true - described_class.googlebot?("Googlebot/2.1 (+http://www.google.com/bot.html)").should == true - described_class.googlebot?("Googlebot-News").should == true - described_class.googlebot?("Googlebot-Image/1.0").should == true - described_class.googlebot?("Googlebot-Video/1.0").should == true - described_class.googlebot?("(compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)").should == true - described_class.googlebot?("Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)").should == true - described_class.googlebot?("(compatible; Mediapartners-Google/2.1; +http://www.google.com/bot.html)").should == true - described_class.googlebot?("Mediapartners-Google").should == true - described_class.googlebot?("AdsBot-Google (+http://www.google.com/adsbot.html)").should == true - end - - it "returns false for non-googlebot user agents" do - described_class.googlebot?("Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36").should == false - described_class.googlebot?("Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko").should == false - described_class.googlebot?("Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0)").should == false - described_class.googlebot?("Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5355d Safari/8536.25").should == false - described_class.googlebot?("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0").should == false - described_class.googlebot?("Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30").should == false - end - - end -end