2013-02-05 14:16:51 -05:00
require 'uri'
require_dependency 'slug'
class TopicLink < ActiveRecord :: Base
2015-09-25 14:07:04 -04:00
def self . max_domain_length
100
end
def self . max_url_length
500
end
2014-06-26 11:38:23 +10:00
2013-02-05 14:16:51 -05:00
belongs_to :topic
belongs_to :user
belongs_to :post
belongs_to :link_topic , class_name : 'Topic'
2014-03-18 13:12:07 +11:00
belongs_to :link_post , class_name : 'Post'
2013-02-05 14:16:51 -05:00
validates_presence_of :url
validates_length_of :url , maximum : 500
validates_uniqueness_of :url , scope : [ :topic_id , :post_id ]
2013-06-13 13:41:45 -04:00
has_many :topic_link_clicks , dependent : :destroy
2013-02-05 14:16:51 -05:00
validate :link_to_self
2014-04-05 14:47:25 -04:00
after_commit :crawl_link_title
2013-02-05 14:16:51 -05:00
# Make sure a topic can't link to itself
def link_to_self
errors . add ( :base , " can't link to the same topic " ) if ( topic_id == link_topic_id )
end
2013-11-15 12:15:46 -05:00
def self . topic_map ( guardian , topic_id )
2013-06-05 16:10:26 +10:00
# Sam: complicated reports are really hard in AR
builder = SqlBuilder . new ( " SELECT ftl.url,
2014-04-05 14:47:25 -04:00
COALESCE ( ft . title , ftl . title ) AS title ,
2013-06-05 16:10:26 +10:00
ftl . link_topic_id ,
ftl . reflection ,
ftl . internal ,
2014-04-05 14:47:25 -04:00
ftl . domain ,
2013-06-05 16:10:26 +10:00
MIN ( ftl . user_id ) AS user_id ,
SUM ( clicks ) AS clicks
FROM topic_links AS ftl
LEFT JOIN topics AS ft ON ftl . link_topic_id = ft . id
LEFT JOIN categories AS c ON c . id = ft . category_id
/ *where* /
2014-04-05 14:47:25 -04:00
GROUP BY ftl . url , ft . title , ftl . title , ftl . link_topic_id , ftl . reflection , ftl . internal , ftl . domain
2013-06-05 16:10:26 +10:00
ORDER BY clicks DESC " )
builder . where ( 'ftl.topic_id = :topic_id' , topic_id : topic_id )
builder . where ( 'ft.deleted_at IS NULL' )
2014-05-11 15:53:57 -04:00
builder . where ( " COALESCE(ft.archetype, 'regular') <> :archetype " , archetype : Archetype . private_message )
2013-06-05 16:10:26 +10:00
builder . secure_category ( guardian . secure_category_ids )
builder . exec . to_a
end
def self . counts_for ( guardian , topic , posts )
return { } if posts . blank?
# Sam: I don't know how to write this cleanly in AR,
# in particular the securing logic is tricky and would fallback to SQL anyway
builder = SqlBuilder . new ( " SELECT
l . post_id ,
l . url ,
l . clicks ,
2014-04-05 14:47:25 -04:00
COALESCE ( t . title , l . title ) AS title ,
2013-06-05 16:10:26 +10:00
l . internal ,
2014-04-05 14:47:25 -04:00
l . reflection ,
l . domain
2013-06-05 16:10:26 +10:00
FROM topic_links l
LEFT JOIN topics t ON t . id = l . link_topic_id
LEFT JOIN categories AS c ON c . id = t . category_id
/ *where* /
ORDER BY reflection ASC , clicks DESC " )
builder . where ( 't.deleted_at IS NULL' )
2014-05-11 15:53:57 -04:00
builder . where ( " COALESCE(t.archetype, 'regular') <> :archetype " , archetype : Archetype . private_message )
2013-06-05 16:10:26 +10:00
# not certain if pluck is right, cause it may interfere with caching
builder . where ( 'l.post_id IN (:post_ids)' , post_ids : posts . map ( & :id ) )
builder . secure_category ( guardian . secure_category_ids )
builder . map_exec ( OpenStruct ) . each_with_object ( { } ) do | l , result |
result [ l . post_id ] || = [ ]
result [ l . post_id ] << { url : l . url ,
clicks : l . clicks ,
title : l . title ,
internal : l . internal ,
reflection : l . reflection }
end
end
2013-02-05 14:16:51 -05:00
# Extract any urls in body
def self . extract_from ( post )
return unless post . present?
2013-02-07 16:45:24 +01:00
2014-04-05 14:47:25 -04:00
added_urls = [ ]
2013-02-05 14:16:51 -05:00
TopicLink . transaction do
added_urls = [ ]
2016-06-08 16:08:41 -04:00
reflected_ids = [ ]
2013-02-05 14:16:51 -05:00
PrettyText
. extract_links ( post . cooked )
2016-04-05 00:43:11 +02:00
. map { | u | [ u , URI . parse ( u . url ) ] rescue nil }
. reject { | _ , p | p . nil? || " mailto " . freeze == p . scheme }
. uniq { | _ , p | p }
2014-07-11 14:17:01 +10:00
. each do | link , parsed |
2013-02-05 14:16:51 -05:00
begin
2014-07-11 14:17:01 +10:00
url = link . url
2013-02-05 14:16:51 -05:00
internal = false
topic_id = nil
post_number = nil
2016-04-05 00:43:11 +02:00
parsed_path = parsed . path || " "
2013-07-19 01:26:23 +02:00
2013-07-31 23:26:34 +02:00
if Discourse . store . has_been_uploaded? ( url )
internal = Discourse . store . internal?
2016-04-05 00:43:11 +02:00
elsif ( parsed . host == Discourse . current_hostname && parsed_path . start_with? ( Discourse . base_uri ) ) || ! parsed . host
2013-02-05 14:16:51 -05:00
internal = true
2013-02-07 16:45:24 +01:00
2016-04-05 00:43:11 +02:00
parsed_path . slice! ( Discourse . base_uri )
2016-04-04 01:39:12 +03:00
2016-04-05 00:43:11 +02:00
route = Rails . application . routes . recognize_path ( parsed_path )
2013-02-11 12:27:32 -05:00
# We aren't interested in tracking internal links to users
next if route [ :controller ] == 'users'
2016-06-13 14:31:10 +08:00
topic_id = route [ :topic_id ] . to_i
2013-02-05 14:16:51 -05:00
post_number = route [ :post_number ] || 1
2013-02-13 15:22:04 -05:00
# Store the canonical URL
2014-05-06 14:41:59 +01:00
topic = Topic . find_by ( id : topic_id )
2014-06-26 11:38:23 +10:00
topic_id = nil unless topic
2013-02-13 15:22:04 -05:00
if topic . present?
2015-05-14 14:26:18 -04:00
url = " #{ Discourse . base_url_no_prefix } #{ topic . relative_url } "
2013-02-13 15:22:04 -05:00
url << " / #{ post_number } " if post_number . to_i > 1
end
2013-02-05 14:16:51 -05:00
end
# Skip linking to ourselves
next if topic_id == post . topic_id
2014-03-18 13:12:07 +11:00
reflected_post = nil
if post_number && topic_id
2014-05-06 14:41:59 +01:00
reflected_post = Post . find_by ( topic_id : topic_id , post_number : post_number . to_i )
2014-03-18 13:12:07 +11:00
end
2015-09-25 14:07:04 -04:00
url = url [ 0 ... TopicLink . max_url_length ]
next if parsed && parsed . host && parsed . host . length > TopicLink . max_domain_length
2014-06-26 11:38:23 +10:00
2013-02-05 14:16:51 -05:00
added_urls << url
2016-06-13 14:31:10 +08:00
topic_link = TopicLink . find_by ( topic_id : post . topic_id ,
2016-06-15 13:45:07 +08:00
post_id : post . id ,
2016-06-13 14:31:10 +08:00
url : url )
unless topic_link
TopicLink . create! ( post_id : post . id ,
user_id : post . user_id ,
topic_id : post . topic_id ,
url : url ,
domain : parsed . host || Discourse . current_hostname ,
internal : internal ,
link_topic_id : topic_id ,
link_post_id : reflected_post . try ( :id ) ,
quote : link . is_quote )
end
2013-02-05 14:16:51 -05:00
# Create the reflection if we can
if topic_id . present?
2014-05-06 14:41:59 +01:00
topic = Topic . find_by ( id : topic_id )
2013-02-05 14:16:51 -05:00
2013-06-06 13:52:30 -04:00
if topic && post . topic && post . topic . archetype != 'private_message' && topic . archetype != 'private_message'
2013-02-05 14:16:51 -05:00
2015-05-14 14:26:18 -04:00
prefix = Discourse . base_url_no_prefix
2013-02-05 14:16:51 -05:00
reflected_url = " #{ prefix } #{ post . topic . relative_url ( post . post_number ) } "
2016-06-10 17:24:30 +10:00
tl = TopicLink . find_by ( topic_id : topic_id ,
post_id : reflected_post . try ( :id ) ,
url : reflected_url )
2016-06-10 17:25:37 +10:00
unless tl
2016-06-13 12:24:38 +08:00
tl = TopicLink . create! ( user_id : post . user_id ,
2016-06-08 16:08:41 -04:00
topic_id : topic_id ,
post_id : reflected_post . try ( :id ) ,
url : reflected_url ,
domain : Discourse . current_hostname ,
reflection : true ,
internal : true ,
link_topic_id : post . topic_id ,
link_post_id : post . id )
2016-06-10 17:24:30 +10:00
end
2016-06-08 16:08:41 -04:00
reflected_ids << tl . try ( :id )
2013-02-05 14:16:51 -05:00
end
end
rescue URI :: InvalidURIError
# if the URI is invalid, don't store it.
rescue ActionController :: RoutingError
2013-02-07 16:45:24 +01:00
# If we can't find the route, no big deal
2013-02-05 14:16:51 -05:00
end
2013-02-07 16:45:24 +01:00
end
2013-02-05 14:16:51 -05:00
# Remove links that aren't there anymore
if added_urls . present?
2014-04-25 16:49:48 -04:00
TopicLink . delete_all [ " (url not in (:urls)) AND (post_id = :post_id AND NOT reflection) " , urls : added_urls , post_id : post . id ]
2016-06-08 16:08:41 -04:00
reflected_ids . compact!
if reflected_ids . present?
TopicLink . delete_all [ " (id not in (:reflected_ids)) AND (link_post_id = :post_id AND reflection) " ,
reflected_ids : reflected_ids , post_id : post . id ]
else
TopicLink . delete_all [ " link_post_id = :post_id AND reflection " , post_id : post . id ]
end
2013-02-05 14:16:51 -05:00
else
2014-04-25 16:49:48 -04:00
TopicLink . delete_all [ " (post_id = :post_id AND NOT reflection) OR (link_post_id = :post_id AND reflection) " , post_id : post . id ]
2013-02-05 14:16:51 -05:00
end
end
end
2014-04-05 14:47:25 -04:00
# Crawl a link's title after it's saved
def crawl_link_title
Jobs . enqueue ( :crawl_topic_link , topic_link_id : id )
end
2016-06-06 16:58:35 -04:00
def self . duplicate_lookup ( topic )
2016-06-08 12:35:11 -04:00
results = TopicLink
2016-06-13 11:11:25 +02:00
. includes ( :post , :user )
. joins ( :post , :user )
. where ( " posts.id IS NOT NULL AND users.id IS NOT NULL " )
2016-06-13 13:13:39 +08:00
. where ( topic_id : topic . id , reflection : false )
2016-06-13 11:11:25 +02:00
. last ( 200 )
2016-06-06 16:58:35 -04:00
lookup = { }
2016-06-08 12:35:11 -04:00
results . each do | tl |
2016-06-08 17:20:32 -04:00
normalized = tl . url . downcase . sub ( / ^https?: \/ \/ / , '' ) . sub ( / \/ $ / , '' )
2016-06-08 12:35:11 -04:00
lookup [ normalized ] = { domain : tl . domain ,
2016-06-13 11:11:25 +02:00
username : tl . user . username_lower ,
2016-06-09 13:02:44 -04:00
posted_at : tl . post . created_at ,
post_number : tl . post . post_number }
2016-06-06 16:58:35 -04:00
end
lookup
end
2013-02-05 14:16:51 -05:00
end
2013-05-24 12:48:32 +10:00
# == Schema Information
#
# Table name: topic_links
#
# id :integer not null, primary key
# topic_id :integer not null
# post_id :integer
# user_id :integer not null
# url :string(500) not null
# domain :string(100) not null
# internal :boolean default(FALSE), not null
# link_topic_id :integer
2014-08-27 15:19:25 +10:00
# created_at :datetime not null
# updated_at :datetime not null
2013-05-24 12:48:32 +10:00
# reflection :boolean default(FALSE)
# clicks :integer default(0), not null
# link_post_id :integer
2016-02-23 10:33:53 +11:00
# title :string
2014-04-08 17:35:44 +02:00
# crawled_at :datetime
2014-07-15 11:29:44 +10:00
# quote :boolean default(FALSE), not null
2013-05-24 12:48:32 +10:00
#
# Indexes
#
2015-09-18 10:41:10 +10:00
# index_topic_links_on_link_post_id_and_reflection (link_post_id,reflection)
# index_topic_links_on_post_id (post_id)
# index_topic_links_on_topic_id (topic_id)
# unique_post_links (topic_id,post_id,url) UNIQUE
2013-05-24 12:48:32 +10:00
#