PERF: reduce storage requirements for incoming links

Only store incoming links for topics.
This commit is contained in:
Sam 2014-08-04 11:06:06 +10:00
parent b36273e4ac
commit 0920c4bea6
8 changed files with 129 additions and 120 deletions

View file

@ -36,7 +36,6 @@ class ApplicationController < ActionController::Base
before_filter :disable_customization
before_filter :block_if_readonly_mode
before_filter :authorize_mini_profiler
before_filter :store_incoming_links
before_filter :preload_json
before_filter :check_xhr
before_filter :redirect_to_login_if_required
@ -313,10 +312,6 @@ class ApplicationController < ActionController::Base
Rack::MiniProfiler.authorize_request
end
def store_incoming_links
IncomingLink.add(request, current_user) unless request.xhr?
end
def check_xhr
# bypass xhr check on PUT / POST / DELETE provided api key is there, otherwise calling api is annoying
return if !request.get? && api_key_valid?

View file

@ -31,6 +31,9 @@ class TopicsController < ApplicationController
skip_before_filter :check_xhr, only: [:show, :feed]
def show
flash["referer"] ||= request.referer
# We'd like to migrate the wordpress feed to another url. This keeps up backwards compatibility with
# existing installs.
return wordpress if params[:best].present?
@ -380,6 +383,18 @@ class TopicsController < ApplicationController
user_id = (current_user.id if current_user)
track_visit = should_track_visit_to_topic?
Scheduler::Defer.later "Track Link" do
IncomingLink.add(
referer: request.referer || flash[:referer],
host: request.host,
current_user: current_user,
topic_id: @topic_view.topic.id,
post_number: params[:post_number],
username: request['u'],
ip_address: request.remote_ip
)
end unless request.xhr?
Scheduler::Defer.later "Track Visit" do
View.create_for_parent(Topic, topic_id, ip, user_id)
if track_visit

View file

@ -2,38 +2,50 @@ class IncomingLink < ActiveRecord::Base
belongs_to :topic
belongs_to :user
validates :url, presence: true
validate :referer_valid
validate :post_id, presence: true
before_validation :extract_domain
before_validation :extract_topic_and_post
after_create :update_link_counts
def self.add(request,current_user=nil)
user_id, host, referer = nil
attr_accessor :url
if request['u']
u = User.select(:id).find_by(username_lower: request["u"].downcase)
def self.add(opts)
user_id, host, referer = nil
current_user = opts[:current_user]
if username = opts[:username]
u = User.select(:id).find_by(username_lower: username.downcase)
user_id = u.id if u
end
if request.referer.present?
if opts[:referer].present?
begin
host = URI.parse(request.referer).host
referer = request.referer[0..999]
host = URI.parse(opts[:referer]).host
referer = opts[:referer][0..999]
rescue URI::InvalidURIError
# bad uri, skip
end
end
if host != request.host && (user_id || referer)
if host != opts[:host] && (user_id || referer)
post_id = opts[:post_id]
post_id ||= Post.where(topic_id: opts[:topic_id],
post_number: opts[:post_number] || 1)
.pluck(:id).first
cid = current_user ? (current_user.id) : (nil)
unless cid && cid == user_id
IncomingLink.create(url: request.url,
referer: referer,
IncomingLink.create(referer: referer,
user_id: user_id,
post_id: post_id,
current_user_id: cid,
ip_address: request.remote_ip)
ip_address: opts[:ip_address]) if post_id
end
end
@ -49,35 +61,14 @@ class IncomingLink < ActiveRecord::Base
end
end
# Internal: If link is internal and points to topic/post, extract their IDs.
def extract_topic_and_post
if url.present?
parsed = URI.parse(url)
begin
# TODO achieve same thing with no exception
params = Rails.application.routes.recognize_path(parsed.path)
if self.topic_id = params[:topic_id]
self.post_number = params[:post_number] || 1
end
rescue ActionController::RoutingError
# If we can't route to the url, that's OK. Don't save those two fields.
end
end
end
# Internal: Update appropriate link counts.
def update_link_counts
if topic_id.present?
exec_sql("UPDATE topics
SET incoming_link_count = incoming_link_count + 1
WHERE id = ?", topic_id)
if post_number.present?
exec_sql("UPDATE posts
SET incoming_link_count = incoming_link_count + 1
WHERE topic_id = ? and post_number = ?", topic_id, post_number)
end
end
exec_sql("UPDATE topics
SET incoming_link_count = incoming_link_count + 1
WHERE id = (SELECT topic_id FROM posts where id = ?)", post_id)
exec_sql("UPDATE posts
SET incoming_link_count = incoming_link_count + 1
WHERE id = ?", post_id)
end
protected

View file

@ -0,0 +1,21 @@
class FixIncomingLinks < ActiveRecord::Migration
def up
execute "DROP INDEX incoming_index"
add_column :incoming_links, :post_id, :integer
remove_column :incoming_links, :updated_at
remove_column :incoming_links, :url
execute "UPDATE incoming_links l SET post_id = (
SELECT p.id FROM posts p WHERE p.topic_id = l.topic_id AND p.post_number = l.post_number
)"
execute "DELETE FROM incoming_links WHERE post_id IS NULL"
change_column :incoming_links, :post_id, :integer, null: false
add_index :incoming_links, :post_id
end
def down
raise ActiveRecord::IrreversibleMigration
end
end

View file

@ -24,13 +24,6 @@ describe TopicsController do
lambda { get :show, {id: topic.id} }.should_not raise_error
end
it "stores an incoming link when there is an off-site referer" do
lambda {
set_referer("http://google.com/search")
get :show, {id: topic.id}
}.should change(IncomingLink, :count).by(1)
end
describe "has_escaped_fragment?" do
render_views
@ -92,19 +85,6 @@ describe TopicsController do
end
describe 'after inserting an incoming link' do
it 'sets last link correctly' do
set_referer("http://google.com/search")
get :show, {topic_id: topic.id}
last_link = IncomingLink.last
last_link.topic_id.should == topic.id
last_link.post_number.should == 1
end
end
describe 'set_locale' do
it 'sets the one the user prefers' do
SiteSetting.stubs(:allow_user_locale).returns(true)

View file

@ -590,6 +590,24 @@ describe TopicsController do
lambda { xhr :get, :show, topic_id: topic.id, slug: topic.slug }.should change(View, :count).by(1)
end
it 'records incoming links' do
user = Fabricate(:user)
get :show, topic_id: topic.id, slug: topic.slug, u: user.username
IncomingLink.count.should == 1
end
it 'records redirects' do
@request.env['HTTP_REFERER'] = 'http://twitter.com'
get :show, { id: topic.id }
@request.env['HTTP_REFERER'] = nil
get :show, topic_id: topic.id, slug: topic.slug
link = IncomingLink.first
link.referer.should == 'http://twitter.com'
end
it 'tracks a visit for all html requests' do
current_user = log_in(:coding_horror)
TopicUser.expects(:track_visit!).with(topic.id, current_user.id)

View file

@ -1,9 +0,0 @@
Fabricator(:incoming_link) do
url 'http://localhost:3000/t/pinball/76/6'
referer 'https://twitter.com/evil_trout'
end
Fabricator(:incoming_link_not_topic, from: :incoming_link) do
url 'http://localhost:3000/made-up-url'
referer 'https://twitter.com/evil_trout'
end

View file

@ -3,95 +3,93 @@ require 'spec_helper'
describe IncomingLink do
it { should belong_to :topic }
it { should validate_presence_of :url }
let(:post) { Fabricate(:post) }
let(:topic) { post.topic }
let :incoming_link do
IncomingLink.create(url: "/t/slug/#{topic.id}/#{post.post_number}",
referer: "http://twitter.com")
IncomingLink.add(host: "a.com", referer: "http://twitter.com", post_id: post.id, ip_address: '1.1.1.1')
end
describe 'local topic link' do
it 'should validate properly' do
Fabricate.build(:incoming_link).should be_valid
end
describe 'tracking link counts' do
it "increases the incoming link counts" do
incoming_link
lambda { post.reload }.should change(post, :incoming_link_count).by(1)
lambda { topic.reload }.should change(topic, :incoming_link_count).by(1)
link = incoming_link
link.domain.should == "twitter.com"
link.post_id.should == post.id
post.reload
post.incoming_link_count.should == 1
topic.reload
topic.incoming_link_count.should == 1
end
end
describe 'saving local link' do
it 'has correct info set' do
incoming_link.domain.should == "twitter.com"
incoming_link.topic_id.should == topic.id
incoming_link.post_number.should == post.post_number
end
end
end
describe 'add' do
class TestRequest<Rack::Request
attr_accessor :remote_ip
def req(opts)
{
referer: opts[:referer],
host: opts[:host] || 'test.com',
current_user: opts[:current_user],
topic_id: opts[:topic_id],
post_number: opts[:post_number],
post_id: opts[:post_id],
username: opts[:username],
ip_address: opts[:ip_address]
}
end
def req(url, referer=nil)
env = Rack::MockRequest.env_for(url)
env['HTTP_REFERER'] = referer if referer
TestRequest.new(env)
def add(opts)
IncomingLink.add(req(opts))
end
it "does not explode with bad referer" do
IncomingLink.add(req('http://sam.com','file:///Applications/Install/75067ABC-C9D1-47B7-8ACE-76AEDE3911B2/Install/'))
add(
referer: 'file:///Applications/Install/75067ABC-C9D1-47B7-8ACE-76AEDE3911B2/Install/',
post_id: 1
)
end
it "does not explode with bad referer 2" do
IncomingLink.add(req('http://sam.com','http://disqus.com/embed/comments/?disqus_version=42750f96&base=default&f=sergeiklimov&t_i=871%20http%3A%2F%2Fsergeiklimov.biz%2F%3Fp%3D871&t_u=http%3A%2F%2Fsergeiklimov.biz%2F2014%2F02%2Fweb%2F&t_e=%D0%91%D0%BB%D0%BE%D0%B3%20%2F%20%D1%84%D0%BE%D1%80%D1%83%D0%BC%20%2F%20%D1%81%D0%B0%D0%B9%D1%82%20%D0%B4%D0%BB%D1%8F%20Gremlins%2C%20Inc.%20%26%238212%3B%20%D0%B8%D1%89%D0%B5%D0%BC%20%D1%81%D0%BF%D0%B5%D1%86%D0%B8%D0%B0%D0%BB%D0%B8%D1%81%D1%82%D0%B0%20(UPD%3A%20%D0%9D%D0%90%D0%A8%D0%9B%D0%98!!)&t_d=%D0%91%D0%BB%D0%BE%D0%B3%20%2F%20%D1%84%D0%BE%D1%80%D1%83%D0%BC%20%2F%20%D1%81%D0%B0%D0%B9%D1%82%20%D0%B4%D0%BB%D1%8F%20Gremlins%2C%20Inc.%20%E2%80%94%20%D0%B8%D1%89%D0%B5%D0%BC%20%D1%81%D0%BF%D0%B5%D1%86%D0%B8%D0%B0%D0%BB%D0%B8%D1%81%D1%82%D0%B0%20(UPD%3A%20%D0%9D%D0%90%D0%A8%D0%9B%D0%98!!)&t_t=%D0%91%D0%BB%D0%BE%D0%B3%20%2F%20%D1%84%D0%BE%D1%80%D1%83%D0%BC%20%2F%20%D1%81%D0%B0%D0%B9%D1%82%20%D0%B4%D0%BB%D1%8F%20Gremlins%2C%20Inc.%20%26%238212%3B%20%D0%B8%D1%89%D0%B5%D0%BC%20%D1%81%D0%BF%D0%B5%D1%86%D0%B8%D0%B0%D0%BB%D0%B8%D1%81%D1%82%D0%B0%20(UPD%3A%20%D0%9D%D0%90%D0%A8%D0%9B%D0%98!!)&s_o=default&l='))
add(
post_id: 1,
referer: 'http://disqus.com/embed/comments/?disqus_version=42750f96&base=default&f=sergeiklimov&t_i=871%20http%3A%2F%2Fsergeiklimov.biz%2F%3Fp%3D871&t_u=http%3A%2F%2Fsergeiklimov.biz%2F2014%2F02%2Fweb%2F&t_e=%D0%91%D0%BB%D0%BE%D0%B3%20%2F%20%D1%84%D0%BE%D1%80%D1%83%D0%BC%20%2F%20%D1%81%D0%B0%D0%B9%D1%82%20%D0%B4%D0%BB%D1%8F%20Gremlins%2C%20Inc.%20%26%238212%3B%20%D0%B8%D1%89%D0%B5%D0%BC%20%D1%81%D0%BF%D0%B5%D1%86%D0%B8%D0%B0%D0%BB%D0%B8%D1%81%D1%82%D0%B0%20(UPD%3A%20%D0%9D%D0%90%D0%A8%D0%9B%D0%98!!)&t_d=%D0%91%D0%BB%D0%BE%D0%B3%20%2F%20%D1%84%D0%BE%D1%80%D1%83%D0%BC%20%2F%20%D1%81%D0%B0%D0%B9%D1%82%20%D0%B4%D0%BB%D1%8F%20Gremlins%2C%20Inc.%20%E2%80%94%20%D0%B8%D1%89%D0%B5%D0%BC%20%D1%81%D0%BF%D0%B5%D1%86%D0%B8%D0%B0%D0%BB%D0%B8%D1%81%D1%82%D0%B0%20(UPD%3A%20%D0%9D%D0%90%D0%A8%D0%9B%D0%98!!)&t_t=%D0%91%D0%BB%D0%BE%D0%B3%20%2F%20%D1%84%D0%BE%D1%80%D1%83%D0%BC%20%2F%20%D1%81%D0%B0%D0%B9%D1%82%20%D0%B4%D0%BB%D1%8F%20Gremlins%2C%20Inc.%20%26%238212%3B%20%D0%B8%D1%89%D0%B5%D0%BC%20%D1%81%D0%BF%D0%B5%D1%86%D0%B8%D0%B0%D0%BB%D0%B8%D1%81%D1%82%D0%B0%20(UPD%3A%20%D0%9D%D0%90%D0%A8%D0%9B%D0%98!!)&s_o=default&l='
)
end
it "does nothing if referer is empty" do
IncomingLink.expects(:create).never
IncomingLink.add(req('http://somesite.com'))
add(post_id: 1)
IncomingLink.count.should == 0
end
it "does nothing if referer is same as host" do
IncomingLink.expects(:create).never
IncomingLink.add(req('http://somesite.com', 'http://somesite.com'))
add(post_id: 1, host: 'somesite.com', referer: 'http://somesite.com')
IncomingLink.count.should == 0
end
it "tracks visits for invalid referers" do
IncomingLink.add(req('http://somesite.com', 'bang bang bang'))
# no current user, don't track
it "tracks not visits for invalid referers" do
add(post_id: 1, referer: 'bang bang bang')
IncomingLink.count.should == 0
end
it "expects to be called with referer and user id" do
IncomingLink.expects(:create).once.returns(true)
IncomingLink.add(req('http://somesite.com', 'http://some.other.site.com'), build(:user))
add(host: "test.com", referer: 'http://some.other.site.com', post_id: 1)
IncomingLink.count.should == 1
end
it "is able to look up user_id and log it from the GET params" do
user = Fabricate(:user, username: "Bob")
IncomingLink.add(req('http://somesite.com?u=bob'))
add(host: 'test.com', username: "bob", post_id: 1)
first = IncomingLink.first
first.user_id.should == user.id
end
end
describe 'non-topic url' do
it 'has nothing set' do
link = Fabricate.build(:incoming_link_not_topic)
link.topic_id.should be_blank
link.user_id.should be_blank
end
end
end