From 3d647a4b412bea1313af1d3c866b9052e6702772 Mon Sep 17 00:00:00 2001
From: Sam <sam.saffron@gmail.com>
Date: Wed, 16 Oct 2013 16:39:18 +1100
Subject: [PATCH] remove rack cache, it has been causing trouble instead
 implement an aggressive anonymous cache that is stored in redis this cache is
 sitting in the front of the middleware stack enabled only in production TODO:
 expire it more intelligently when stuff is created

---
 Gemfile                                       |  4 -
 Gemfile.lock                                  | 10 --
 Gemfile_rails4.lock                           | 16 +---
 app/controllers/application_controller.rb     | 24 +----
 app/controllers/list_controller.rb            | 32 +++----
 app/controllers/topics_controller.rb          | 33 ++++---
 config/environments/development.rb            |  2 +
 config/environments/production.rb.sample      |  3 -
 config/environments/profile.rb                |  3 -
 config/initializers/99-rack-cache.rb          | 35 ++-----
 lib/middleware/anonymous_cache.rb             | 92 +++++++++++++++++++
 .../middleware/anonymous_cache_spec.rb        | 41 +++++++++
 12 files changed, 180 insertions(+), 115 deletions(-)
 create mode 100644 lib/middleware/anonymous_cache.rb
 create mode 100644 spec/components/middleware/anonymous_cache_spec.rb

diff --git a/Gemfile b/Gemfile
index 56c042a51..b27401ad4 100644
--- a/Gemfile
+++ b/Gemfile
@@ -192,10 +192,6 @@ gem 'flamegraph', git: 'https://github.com/SamSaffron/flamegraph.git', require:
 gem 'rack-mini-profiler',  git: 'https://github.com/MiniProfiler/rack-mini-profiler.git', require: false
 
 # used for caching, optional
-# redis-rack-cache is missing a sane expiry policy, it hogs redis
-# https://github.com/jodosha/redis-store/pull/183
-gem 'redis-rack-cache', git: 'https://github.com/SamSaffron/redis-rack-cache.git', require: false
-gem 'rack-cache', require: false
 gem 'rack-cors', require: false
 gem 'unicorn', require: false
 gem 'puma', require: false
diff --git a/Gemfile.lock b/Gemfile.lock
index c48647492..c27c426bf 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -54,14 +54,6 @@ GIT
       redis
       thin
 
-GIT
-  remote: https://github.com/SamSaffron/redis-rack-cache.git
-  revision: 379ef30e31d4e185cb1d7f8badca0cc06403eba2
-  specs:
-    redis-rack-cache (1.2.1)
-      rack-cache (~> 1.2)
-      redis-store (~> 1.1.0)
-
 GIT
   remote: https://github.com/SamSaffron/sprockets.git
   revision: bacf2ec4d4d10cd8d1ab25a6360740314c512237
@@ -526,7 +518,6 @@ DEPENDENCIES
   pry-rails
   puma
   qunit-rails
-  rack-cache
   rack-cors
   rack-mini-profiler!
   rails (= 3.2.12)
@@ -536,7 +527,6 @@ DEPENDENCIES
   rb-inotify (~> 0.9)
   redcarpet
   redis
-  redis-rack-cache!
   redis-rails
   rest-client
   rinku
diff --git a/Gemfile_rails4.lock b/Gemfile_rails4.lock
index a0d4531c6..62abe0984 100644
--- a/Gemfile_rails4.lock
+++ b/Gemfile_rails4.lock
@@ -23,7 +23,7 @@ GIT
 
 GIT
   remote: git://github.com/rails/rails.git
-  revision: cfd9186e34a25a29f7cd2018e91548f5d1be22af
+  revision: 0785008a64f4af8d09379683303d49d160cd444d
   branch: 4-0-stable
   specs:
     actionmailer (4.0.0)
@@ -111,14 +111,6 @@ GIT
       redis
       thin
 
-GIT
-  remote: https://github.com/SamSaffron/redis-rack-cache.git
-  revision: 379ef30e31d4e185cb1d7f8badca0cc06403eba2
-  specs:
-    redis-rack-cache (1.2.1)
-      rack-cache (~> 1.2)
-      redis-store (~> 1.1.0)
-
 GIT
   remote: https://github.com/callahad/omniauth-browserid.git
   revision: af62d667626c1622de6fe13b60849c3640765ab1
@@ -281,7 +273,7 @@ GEM
     mocha (0.14.0)
       metaclass (~> 0.0.1)
     mock_redis (0.9.0)
-    multi_json (1.8.1)
+    multi_json (1.8.2)
     multipart-post (1.2.0)
     mustache (0.99.4)
     net-scp (1.1.2)
@@ -340,8 +332,6 @@ GEM
     qunit-rails (0.0.4)
       railties (>= 3.2.3)
     rack (1.5.2)
-    rack-cache (1.2)
-      rack (>= 0.4)
     rack-cors (0.2.8)
       rack
     rack-openid (1.3.1)
@@ -531,7 +521,6 @@ DEPENDENCIES
   pry-rails
   puma
   qunit-rails
-  rack-cache
   rack-cors
   rack-mini-profiler!
   rails!
@@ -542,7 +531,6 @@ DEPENDENCIES
   rb-inotify (~> 0.9)
   redcarpet
   redis
-  redis-rack-cache!
   redis-rails!
   rest-client
   rinku
diff --git a/app/controllers/application_controller.rb b/app/controllers/application_controller.rb
index 1223f00a0..3715481ce 100644
--- a/app/controllers/application_controller.rb
+++ b/app/controllers/application_controller.rb
@@ -155,35 +155,15 @@ class ApplicationController < ActionController::Base
   end
 
   def can_cache_content?
-    # Don't cache unless we're in production mode
-    return false unless Rails.env.production? || Rails.env == "profile"
-
-    # Don't cache logged in users
-    return false if current_user.present?
-
-    true
+    !current_user.present?
   end
 
   # Our custom cache method
   def discourse_expires_in(time_length)
     return unless can_cache_content?
-    expires_in time_length, public: true
+    Middleware::AnonymousCache.anon_cache(request.env, 1.minute)
   end
 
-  # Helper method - if no logged in user (anonymous), use Rails' conditional GET
-  # support. Should be very fast behind a cache.
-  def anonymous_etag(*args)
-    if can_cache_content?
-      yield if stale?(*args)
-
-      # Add a one minute expiry
-      expires_in 1.minute, public: true
-    else
-      yield
-    end
-  end
-
-
   def fetch_user_from_params
     username_lower = params[:username].downcase
     username_lower.gsub!(/\.json$/, '')
diff --git a/app/controllers/list_controller.rb b/app/controllers/list_controller.rb
index d9b32ed86..dc3e329a7 100644
--- a/app/controllers/list_controller.rb
+++ b/app/controllers/list_controller.rb
@@ -22,14 +22,14 @@ class ListController < ApplicationController
 
   [:latest, :hot].each do |filter|
     define_method("#{filter}_feed") do
-      anonymous_etag(@category) do
-        @title = "#{filter.capitalize} Topics"
-        @link = "#{Discourse.base_url}/#{filter}"
-        @description = I18n.t("rss_description.#{filter}")
-        @atom_link = "#{Discourse.base_url}/#{filter}.rss"
-        @topic_list = TopicQuery.new(current_user).public_send("list_#{filter}")
-        render 'list', formats: [:rss]
-      end
+      discourse_expires_in 1.minute
+
+      @title = "#{filter.capitalize} Topics"
+      @link = "#{Discourse.base_url}/#{filter}"
+      @description = I18n.t("rss_description.#{filter}")
+      @atom_link = "#{Discourse.base_url}/#{filter}.rss"
+      @topic_list = TopicQuery.new(current_user).public_send("list_#{filter}")
+      render 'list', formats: [:rss]
     end
   end
 
@@ -99,14 +99,14 @@ class ListController < ApplicationController
 
     guardian.ensure_can_see!(@category)
 
-    anonymous_etag(@category) do
-      @title = @category.name
-      @link = "#{Discourse.base_url}/category/#{@category.slug}"
-      @description = "#{I18n.t('topics_in_category', category: @category.name)} #{@category.description}"
-      @atom_link = "#{Discourse.base_url}/category/#{@category.slug}.rss"
-      @topic_list = TopicQuery.new.list_new_in_category(@category)
-      render 'list', formats: [:rss]
-    end
+    discourse_expires_in 1.minute
+
+    @title = @category.name
+    @link = "#{Discourse.base_url}/category/#{@category.slug}"
+    @description = "#{I18n.t('topics_in_category', category: @category.name)} #{@category.description}"
+    @atom_link = "#{Discourse.base_url}/category/#{@category.slug}.rss"
+    @topic_list = TopicQuery.new.list_new_in_category(@category)
+    render 'list', formats: [:rss]
   end
 
   def popular_redirect
diff --git a/app/controllers/topics_controller.rb b/app/controllers/topics_controller.rb
index 9c1068ff8..3a682334e 100644
--- a/app/controllers/topics_controller.rb
+++ b/app/controllers/topics_controller.rb
@@ -40,22 +40,22 @@ class TopicsController < ApplicationController
       return redirect_to(topic.relative_url)
     end
 
-    anonymous_etag(@topic_view.topic) do
-      redirect_to_correct_topic && return if slugs_do_not_match
+    discourse_expires_in 1.minute
 
-      # render workaround pseudo-static HTML page for old crawlers which ignores <noscript>
-      # (see http://meta.discourse.org/t/noscript-tag-and-some-search-engines/8078)
-      return render 'topics/plain', layout: false if (SiteSetting.enable_escaped_fragments && params.has_key?('_escaped_fragment_'))
+    redirect_to_correct_topic && return if slugs_do_not_match
 
-      track_visit_to_topic
+    # render workaround pseudo-static HTML page for old crawlers which ignores <noscript>
+    # (see http://meta.discourse.org/t/noscript-tag-and-some-search-engines/8078)
+    return render 'topics/plain', layout: false if (SiteSetting.enable_escaped_fragments && params.has_key?('_escaped_fragment_'))
 
-      if should_track_visit_to_topic?
-        @topic_view.draft = Draft.get(current_user, @topic_view.draft_key, @topic_view.draft_sequence)
-      end
+    track_visit_to_topic
 
-      perform_show_response
+    if should_track_visit_to_topic?
+      @topic_view.draft = Draft.get(current_user, @topic_view.draft_key, @topic_view.draft_sequence)
     end
 
+    perform_show_response
+
     canonical_url @topic_view.canonical_path
   end
 
@@ -75,10 +75,10 @@ class TopicsController < ApplicationController
           only_moderator_liked: params[:only_moderator_liked].to_s == "true"
     )
 
-    anonymous_etag(@topic_view.topic) do
-      wordpress_serializer = TopicViewWordpressSerializer.new(@topic_view, scope: guardian, root: false)
-      render_json_dump(wordpress_serializer)
-    end
+    discourse_expires_in 1.minute
+
+    wordpress_serializer = TopicViewWordpressSerializer.new(@topic_view, scope: guardian, root: false)
+    render_json_dump(wordpress_serializer)
   end
 
 
@@ -272,9 +272,8 @@ class TopicsController < ApplicationController
 
   def feed
     @topic_view = TopicView.new(params[:topic_id])
-    anonymous_etag(@topic_view.topic) do
-      render 'topics/show', formats: [:rss]
-    end
+    discourse_expires_in 1.minute
+    render 'topics/show', formats: [:rss]
   end
 
   private
diff --git a/config/environments/development.rb b/config/environments/development.rb
index 11458f74a..485d13ef6 100644
--- a/config/environments/development.rb
+++ b/config/environments/development.rb
@@ -44,5 +44,7 @@ Discourse::Application.configure do
 
   require 'middleware/turbo_dev'
   config.middleware.insert 0, Middleware::TurboDev
+
+  config.enable_anon_caching = false
 end
 
diff --git a/config/environments/production.rb.sample b/config/environments/production.rb.sample
index ef934d5e7..3b19c592f 100644
--- a/config/environments/production.rb.sample
+++ b/config/environments/production.rb.sample
@@ -68,9 +68,6 @@ Discourse::Application.configure do
   # this will cause all handlebars templates to be pre-compiles, making your page faster
   config.handlebars.precompile = true
 
-  # this setting enables rack_cache so it caches various requests in redis
-  config.enable_rack_cache = true
-
   # allows admins to use mini profiler
   config.enable_mini_profiler = true
 
diff --git a/config/environments/profile.rb b/config/environments/profile.rb
index 6047a697a..33fed04e8 100644
--- a/config/environments/profile.rb
+++ b/config/environments/profile.rb
@@ -40,9 +40,6 @@ Discourse::Application.configure do
   # precompile handlebar assets
   config.handlebars.precompile = true
 
-  # this setting enable rack_cache so it caches various requests in redis
-  config.enable_rack_cache = false
-
   # allows users to use mini profiler
   config.enable_mini_profiler = false
 
diff --git a/config/initializers/99-rack-cache.rb b/config/initializers/99-rack-cache.rb
index 822d1e328..17719fcc5 100644
--- a/config/initializers/99-rack-cache.rb
+++ b/config/initializers/99-rack-cache.rb
@@ -1,29 +1,12 @@
-if Rails.configuration.respond_to?(:enable_rack_cache) && Rails.configuration.enable_rack_cache
-  require 'rack-cache'
-  require 'redis-rack-cache'
+require_dependency "middleware/anonymous_cache"
 
-  # by default we will cache up to 3 minutes in redis, if you want to cut down on redis usage
-  #  cut down this number
-  RedisRackCache.max_cache_seconds = 60 * 3
+enabled = if Rails.configuration.respond_to?(:enable_anon_caching)
+            Rails.configuration.enable_anon_caching
+          else
+            Rails.env.production?
+          end
 
-  url = DiscourseRedis.url
-
-  class Rack::Cache::Discourse < Rack::Cache::Context
-    def initialize(app, options={})
-      @app = app
-      super
-    end
-    def call(env)
-      if CurrentUser.has_auth_cookie?(env)
-        @app.call(env)
-      else
-        super
-      end
-    end
-  end
-
-  Rails.configuration.middleware.insert 0, Rack::Cache::Discourse,
-    metastore: "#{url}/metastore",
-    entitystore: "#{url}/entitystore",
-    verbose: !Rails.env.production?
+if enabled
+  Rails.configuration.middleware.insert 0, Middleware::AnonymousCache
 end
+
diff --git a/lib/middleware/anonymous_cache.rb b/lib/middleware/anonymous_cache.rb
new file mode 100644
index 000000000..bb1d64124
--- /dev/null
+++ b/lib/middleware/anonymous_cache.rb
@@ -0,0 +1,92 @@
+module Middleware
+  class AnonymousCache
+
+    def self.anon_cache(env, duration)
+      env["ANON_CACHE_DURATION"] = duration
+    end
+
+    class Helper
+      def initialize(env)
+        @env = env
+      end
+
+      def cache_key
+        @cache_key ||= "ANON_CACHE_#{@env["HTTP_HOST"]}#{@env["REQUEST_URI"]}"
+      end
+
+      def cache_key_body
+        @cache_key_body ||= "#{cache_key}_body"
+      end
+
+      def cache_key_other
+        @cache_key_other || "#{cache_key}_other"
+      end
+
+      def get?
+        @env["REQUEST_METHOD"] == "GET"
+      end
+
+      def cacheable?
+        !!(!CurrentUser.has_auth_cookie?(@env) && get?)
+      end
+
+      def cached
+        if body = $redis.get(cache_key_body)
+          if other = $redis.get(cache_key_other)
+            other = JSON.parse(other)
+            [other[0], other[1], [body]]
+          end
+        end
+      end
+
+      def cache_duration
+        @env["ANON_CACHE_DURATION"]
+      end
+
+      # NOTE in an ideal world cache still serves out cached content except for one magic worker
+      #  that fills it up, this avoids a herd killing you, we can probably do this using a job or redis tricks
+      #  but coordinating this is tricky
+      def cache(result)
+        status,headers,response = result
+
+        if status == 200 && cache_duration
+          headers_stripped = headers.dup.delete_if{|k,v| ["Set-Cookie","X-MiniProfiler-Ids"].include? k}
+          parts = []
+          response.each do |part|
+            parts << part
+          end
+
+          $redis.setex(cache_key_body,  cache_duration, parts.join)
+          $redis.setex(cache_key_other, cache_duration, [status,headers_stripped].to_json)
+        else
+          parts = response
+        end
+
+        [status,headers,parts]
+      end
+
+      def clear_cache
+        $redis.del(cache_key_body)
+        $redis.del(cache_key_other)
+      end
+
+    end
+
+    def initialize(app, settings={})
+      @app = app
+    end
+
+    def call(env)
+      helper = Helper.new(env)
+
+      if helper.cacheable?
+        helper.cached or helper.cache(@app.call(env))
+      else
+        @app.call(env)
+      end
+
+    end
+
+  end
+
+end
diff --git a/spec/components/middleware/anonymous_cache_spec.rb b/spec/components/middleware/anonymous_cache_spec.rb
new file mode 100644
index 000000000..10bc1c128
--- /dev/null
+++ b/spec/components/middleware/anonymous_cache_spec.rb
@@ -0,0 +1,41 @@
+require "spec_helper"
+require_dependency "middleware/anonymous_cache"
+
+describe Middleware::AnonymousCache::Helper do
+
+  def new_helper(env={})
+    Middleware::AnonymousCache::Helper.new({
+      "HTTP_HOST" => "http://test.com",
+      "REQUEST_URI" => "/path?bla=1",
+      "REQUEST_METHOD" => "GET"
+    }.merge(env))
+  end
+
+  context "cachable?" do
+    it "true by default" do
+      new_helper.cacheable?.should be_true
+    end
+
+    it "is false for non GET" do
+      new_helper("ANON_CACHE_DURATION" => 10, "REQUEST_METHOD" => "POST").cacheable?.should be_false
+    end
+  end
+
+  context "cached" do
+    let!(:helper) do
+      new_helper("ANON_CACHE_DURATION" => 10)
+    end
+
+    after do
+      helper.clear_cache
+    end
+
+    it "returns cached data for cached requests" do
+      helper.cached.should be_nil
+      helper.cache([200, {"HELLO" => "WORLD"}, ["hello ", "world"]])
+      helper.cached.should == [200, {"HELLO" => "WORLD"}, ["hello world"]]
+    end
+  end
+
+end
+