discourse/lib/oneboxer.rb

142 lines
3.6 KiB
Ruby
Raw Normal View History

2013-02-05 14:16:51 -05:00
require 'open-uri'
require 'digest/sha1'
2013-02-05 14:16:51 -05:00
require_dependency 'oneboxer/base'
require_dependency 'oneboxer/whitelist'
2013-02-25 11:42:20 -05:00
Dir["#{Rails.root}/lib/oneboxer/*_onebox.rb"].each {|f|
require_dependency(f.split('/')[-2..-1].join('/'))
2013-02-05 14:16:51 -05:00
}
module Oneboxer
extend Oneboxer::Base
Dir["#{Rails.root}/lib/oneboxer/*_onebox.rb"].sort.each do |f|
2013-02-05 14:16:51 -05:00
add_onebox "Oneboxer::#{Pathname.new(f).basename.to_s.gsub(/\.rb$/, '').classify}".constantize
end
def self.default_expiry
1.day
2013-02-05 14:16:51 -05:00
end
# Return a oneboxer for a given URL
def self.onebox_for_url(url)
matchers.each do |matcher|
regexp = matcher.regexp
klass = matcher.klass
2013-02-06 00:22:11 -05:00
regexp = regexp.call if regexp.class == Proc
return klass.new(url) if url =~ regexp
2013-02-05 14:16:51 -05:00
end
nil
end
# Retrieve the onebox for a url without caching
def self.onebox_nocache(url)
oneboxer = onebox_for_url(url)
return oneboxer.onebox if oneboxer.present?
whitelist_entry = Whitelist.entry_for_url(url)
2013-02-05 14:16:51 -05:00
if whitelist_entry.present?
2013-02-05 14:16:51 -05:00
page_html = open(url).read
if page_html.present?
2013-02-12 09:46:45 -05:00
doc = Nokogiri::HTML(page_html)
2013-02-05 14:16:51 -05:00
if whitelist_entry.allows_oembed?
# See if if it has an oembed thing we can use
(doc/"link[@type='application/json+oembed']").each do |oembed|
return OembedOnebox.new(oembed[:href]).onebox
end
(doc/"link[@type='text/json+oembed']").each do |oembed|
return OembedOnebox.new(oembed[:href]).onebox
end
end
2013-02-05 14:16:51 -05:00
# Check for opengraph
open_graph = Oneboxer.parse_open_graph(doc)
return OpenGraphOnebox.new(url, open_graph).onebox if open_graph.present?
end
end
nil
rescue OpenURI::HTTPError
nil
2013-02-05 14:16:51 -05:00
end
# Parse URLs out of HTML, returning the document when finished.
def self.each_onebox_link(string_or_doc)
doc = string_or_doc
2013-02-12 09:46:45 -05:00
doc = Nokogiri::HTML(doc) if doc.is_a?(String)
2013-02-05 14:16:51 -05:00
onebox_links = doc.search("a.onebox")
if onebox_links.present?
onebox_links.each do |link|
if link['href'].present?
yield link['href'], link
end
end
end
doc
end
def self.cache_key_for(url)
"onebox:#{Digest::SHA1.hexdigest(url)}"
2013-02-05 14:16:51 -05:00
end
2013-03-21 20:47:44 -04:00
def self.preview_cache_key_for(url)
"onebox:preview:#{Digest::SHA1.hexdigest(url)}"
end
def self.render_from_cache(url)
Rails.cache.read(cache_key_for(url))
2013-02-05 14:16:51 -05:00
end
# Cache results from a onebox call
def self.fetch_and_cache(url, args)
2013-03-21 20:47:44 -04:00
contents, preview = onebox_nocache(url)
return nil if contents.blank?
2013-02-05 14:16:51 -05:00
Rails.cache.write(cache_key_for(url), contents, expires_in: default_expiry)
2013-03-21 20:47:44 -04:00
if preview.present?
Rails.cache.write(preview_cache_key_for(url), preview, expires_in: default_expiry)
end
[contents, preview]
2013-02-05 14:16:51 -05:00
end
def self.invalidate(url)
Rails.cache.delete(cache_key_for(url))
2013-02-05 14:16:51 -05:00
end
2013-03-21 20:47:44 -04:00
def self.preview(url, args={})
# Look for a preview
cached = Rails.cache.read(preview_cache_key_for(url)) unless args[:no_cache].present?
return cached if cached.present?
# Try the full version
cached = render_from_cache(url)
return cached if cached.present?
# If that fails, look it up
contents, cached = fetch_and_cache(url, args)
return cached if cached.present?
contents
end
2013-02-05 14:16:51 -05:00
# Return the cooked content for a url, caching the result for performance
def self.onebox(url, args={})
if args[:invalidate_oneboxes]
2013-02-05 14:16:51 -05:00
# Remove the onebox from the cache
Oneboxer.invalidate(url)
else
contents = render_from_cache(url)
return contents if contents.present?
2013-02-05 14:16:51 -05:00
end
fetch_and_cache(url, args)
2013-02-05 14:16:51 -05:00
end
end