2013-02-05 14:16:51 -05:00
require 'v8'
require 'nokogiri'
2013-05-28 09:48:47 +10:00
require_dependency 'excerpt_parser'
2013-02-05 14:16:51 -05:00
module PrettyText
def self . whitelist
{
2013-03-23 20:32:59 +05:30
elements : %w[
2013-02-05 14:16:51 -05:00
a abbr aside b bdo blockquote br caption cite code col colgroup dd div del dfn dl
dt em hr figcaption figure h1 h2 h3 h4 h5 h6 hgroup i img ins kbd li mark
ol p pre q rp rt ruby s samp small span strike strong sub sup table tbody td
tfoot th thead time tr u ul var wbr
] ,
2013-03-23 20:32:59 +05:30
attributes : {
2013-02-05 14:16:51 -05:00
:all = > [ 'dir' , 'lang' , 'title' , 'class' ] ,
'aside' = > [ 'data-post' , 'data-full' , 'data-topic' ] ,
'a' = > [ 'href' ] ,
'blockquote' = > [ 'cite' ] ,
'col' = > [ 'span' , 'width' ] ,
'colgroup' = > [ 'span' , 'width' ] ,
'del' = > [ 'cite' , 'datetime' ] ,
'img' = > [ 'align' , 'alt' , 'height' , 'src' , 'width' ] ,
'ins' = > [ 'cite' , 'datetime' ] ,
'ol' = > [ 'start' , 'reversed' , 'type' ] ,
'q' = > [ 'cite' ] ,
'span' = > [ 'style' ] ,
'table' = > [ 'summary' , 'width' , 'style' , 'cellpadding' , 'cellspacing' ] ,
'td' = > [ 'abbr' , 'axis' , 'colspan' , 'rowspan' , 'width' , 'style' ] ,
'th' = > [ 'abbr' , 'axis' , 'colspan' , 'rowspan' , 'scope' , 'width' , 'style' ] ,
'time' = > [ 'datetime' , 'pubdate' ] ,
'ul' = > [ 'type' ]
} ,
2013-03-23 20:32:59 +05:30
protocols : {
2013-02-05 14:16:51 -05:00
'a' = > { 'href' = > [ 'ftp' , 'http' , 'https' , 'mailto' , :relative ] } ,
'blockquote' = > { 'cite' = > [ 'http' , 'https' , :relative ] } ,
'del' = > { 'cite' = > [ 'http' , 'https' , :relative ] } ,
'img' = > { 'src' = > [ 'http' , 'https' , :relative ] } ,
'ins' = > { 'cite' = > [ 'http' , 'https' , :relative ] } ,
'q' = > { 'cite' = > [ 'http' , 'https' , :relative ] }
}
2013-02-25 19:42:20 +03:00
}
2013-02-05 14:16:51 -05:00
end
class Helpers
2013-02-25 19:42:20 +03:00
# function here are available to v8
2013-02-05 14:16:51 -05:00
def avatar_template ( username )
return " " unless username
user = User . where ( username_lower : username . downcase ) . first
if user
user . avatar_template
end
end
def is_username_valid ( username )
return false unless username
username = username . downcase
return User . exec_sql ( 'select 1 from users where username_lower = ?' , username ) . values . length == 1
end
end
@mutex = Mutex . new
def self . mention_matcher
2013-03-24 01:57:00 +01:00
Regexp . new ( " ( \ @[a-zA-Z0-9_]{ #{ User . username_length . begin } , #{ User . username_length . end } }) " )
2013-02-25 19:42:20 +03:00
end
2013-02-05 14:16:51 -05:00
def self . app_root
Rails . root
end
def self . v8
return @ctx unless @ctx . nil?
@ctx = V8 :: Context . new
2013-02-25 19:42:20 +03:00
@ctx [ " helpers " ] = Helpers . new
2013-02-05 14:16:51 -05:00
2013-05-28 09:48:47 +10:00
ctx_load ( " app/assets/javascripts/external/md5.js " ,
2013-06-11 06:48:50 +10:00
" app/assets/javascripts/external/underscore.js " ,
2013-05-28 09:48:47 +10:00
" app/assets/javascripts/external/Markdown.Converter.js " ,
" app/assets/javascripts/external/twitter-text-1.5.0.js " ,
" lib/headless-ember.js " ,
" app/assets/javascripts/external/rsvp.js " ,
2013-06-11 06:48:50 +10:00
Rails . configuration . ember . handlebars_location )
2013-02-05 14:16:51 -05:00
@ctx . eval ( " var Discourse = {}; Discourse.SiteSettings = #{ SiteSetting . client_settings_json } ; " )
@ctx . eval ( " var window = {}; window.devicePixelRatio = 2; " ) # hack to make code think stuff is retina
2013-05-28 09:48:47 +10:00
ctx_load ( " app/assets/javascripts/discourse/components/bbcode.js " ,
" app/assets/javascripts/discourse/components/utilities.js " ,
" app/assets/javascripts/discourse/components/markdown.js " )
2013-02-05 14:16:51 -05:00
# Load server side javascripts
if DiscoursePluginRegistry . server_side_javascripts . present?
DiscoursePluginRegistry . server_side_javascripts . each do | ssjs |
@ctx . load ( ssjs )
end
end
@ctx [ 'quoteTemplate' ] = File . open ( app_root + 'app/assets/javascripts/discourse/templates/quote.js.shbrs' ) { | f | f . read }
@ctx [ 'quoteEmailTemplate' ] = File . open ( app_root + 'lib/assets/quote_email.js.shbrs' ) { | f | f . read }
2013-02-25 19:42:20 +03:00
@ctx . eval ( " HANDLEBARS_TEMPLATES = {
2013-02-05 14:16:51 -05:00
'quote' : Handlebars . compile ( quoteTemplate ) ,
'quote_email' : Handlebars . compile ( quoteEmailTemplate ) ,
} ; " )
@ctx
end
def self . markdown ( text , opts = nil )
# we use the exact same markdown converter as the client
2013-02-25 19:42:20 +03:00
# TODO: use the same extensions on both client and server (in particular the template for mentions)
2013-02-05 14:16:51 -05:00
baked = nil
2013-02-25 19:42:20 +03:00
@mutex . synchronize do
2013-02-05 14:16:51 -05:00
# we need to do this to work in a multi site environment, many sites, many settings
v8 . eval ( " Discourse.SiteSettings = #{ SiteSetting . client_settings_json } ; " )
v8 . eval ( " Discourse.BaseUrl = 'http:// #{ RailsMultisite :: ConnectionManagement . current_hostname } '; " )
2013-03-14 13:01:52 +01:00
v8 . eval ( " Discourse.getURL = function(url) {return ' #{ Discourse :: base_uri } ' + url}; " )
2013-02-05 14:16:51 -05:00
v8 [ 'opts' ] = opts || { }
v8 [ 'raw' ] = text
v8 . eval ( 'opts["mentionLookup"] = function(u){return helpers.is_username_valid(u);}' )
v8 . eval ( 'opts["lookupAvatar"] = function(p){return Discourse.Utilities.avatarImg({username: p, size: "tiny", avatarTemplate: helpers.avatar_template(p)});}' )
2013-03-05 14:33:27 -05:00
baked = v8 . eval ( 'Discourse.Markdown.markdownConverter(opts).makeHtml(raw)' )
2013-02-05 14:16:51 -05:00
end
# we need some minimal server side stuff, apply CDN and TODO filter disallowed markup
2013-02-25 19:42:20 +03:00
baked = apply_cdn ( baked , Rails . configuration . action_controller . asset_host )
2013-02-05 14:16:51 -05:00
baked
end
# leaving this here, cause it invokes v8, don't want to implement twice
def self . avatar_img ( username , size )
r = nil
2013-02-25 19:42:20 +03:00
@mutex . synchronize do
2013-02-05 14:16:51 -05:00
v8 [ 'username' ] = username
v8 [ 'size' ] = size
v8 . eval ( " Discourse.SiteSettings = #{ SiteSetting . client_settings_json } ; " )
v8 . eval ( " Discourse.CDN = ' #{ Rails . configuration . action_controller . asset_host } '; " )
v8 . eval ( " Discourse.BaseUrl = ' #{ RailsMultisite :: ConnectionManagement . current_hostname } '; " )
2013-02-25 19:42:20 +03:00
r = v8 . eval ( " Discourse.Utilities.avatarImg({ username: username, size: size }); " )
2013-02-05 14:16:51 -05:00
end
r
end
def self . apply_cdn ( html , url )
return html unless url
image = / \ .(jpg|jpeg|gif|png|tiff|tif)$ /
doc = Nokogiri :: HTML . fragment ( html )
doc . css ( " a " ) . each do | l |
href = l . attributes [ " href " ] . to_s
if href [ 0 ] == '/' && href =~ image
l [ " href " ] = url + href
end
end
doc . css ( " img " ) . each do | l |
src = l . attributes [ " src " ] . to_s
2013-02-25 19:42:20 +03:00
if src [ 0 ] == '/'
2013-02-05 14:16:51 -05:00
l [ " src " ] = url + src
end
end
doc . to_s
end
def self . cook ( text , opts = { } )
cloned = opts . dup
# we have a minor inconsistency
cloned [ :topicId ] = opts [ :topic_id ]
2013-02-25 19:42:20 +03:00
sanitized = Sanitize . clean ( markdown ( text . dup , cloned ) , PrettyText . whitelist )
2013-02-11 11:43:07 +11:00
if SiteSetting . add_rel_nofollow_to_user_content
2013-02-25 19:42:20 +03:00
sanitized = add_rel_nofollow_to_user_content ( sanitized )
2013-02-11 11:43:07 +11:00
end
sanitized
end
2013-02-25 19:42:20 +03:00
2013-02-11 11:43:07 +11:00
def self . add_rel_nofollow_to_user_content ( html )
2013-02-11 18:58:19 +11:00
whitelist = [ ]
2013-02-11 19:01:33 +11:00
2013-02-11 18:58:19 +11:00
l = SiteSetting . exclude_rel_nofollow_domains
if l . present?
2013-02-25 19:42:20 +03:00
whitelist = l . split ( " , " )
2013-02-11 18:58:19 +11:00
end
2013-02-11 19:01:33 +11:00
2013-02-11 11:43:07 +11:00
site_uri = nil
doc = Nokogiri :: HTML . fragment ( html )
doc . css ( " a " ) . each do | l |
href = l [ " href " ] . to_s
2013-02-25 19:42:20 +03:00
begin
2013-02-11 11:43:07 +11:00
uri = URI ( href )
site_uri || = URI ( Discourse . base_url )
2013-02-25 19:42:20 +03:00
if ! uri . host . present? ||
uri . host . ends_with? ( site_uri . host ) ||
2013-02-11 19:01:33 +11:00
whitelist . any? { | u | uri . host . ends_with? ( u ) }
2013-02-11 11:43:07 +11:00
# we are good no need for nofollow
else
l [ " rel " ] = " nofollow "
end
rescue URI :: InvalidURIError
2013-02-25 19:42:20 +03:00
# add a nofollow anyway
2013-02-11 11:43:07 +11:00
l [ " rel " ] = " nofollow "
end
end
doc . to_html
2013-02-05 14:16:51 -05:00
end
def self . extract_links ( html )
links = [ ]
2013-06-05 20:53:07 +02:00
doc = Nokogiri :: HTML . fragment ( html )
# remove href inside quotes
doc . css ( " aside.quote a " ) . each { | l | l [ " href " ] = " " }
# extract all links from the post
doc . css ( " a " ) . each { | l | links << l [ " href " ] unless l [ " href " ] . empty? }
# extract links to quotes
2013-02-13 15:22:04 -05:00
doc . css ( " aside.quote " ) . each do | a |
2013-06-05 20:53:07 +02:00
topic_id = a [ 'data-topic' ]
2013-02-25 19:42:20 +03:00
2013-02-13 15:22:04 -05:00
url = " /t/topic/ #{ topic_id } "
2013-06-05 20:53:07 +02:00
if post_number = a [ 'data-post' ]
2013-02-13 15:22:04 -05:00
url << " / #{ post_number } "
end
links << url
end
2013-02-05 14:16:51 -05:00
links
end
2013-05-28 09:48:47 +10:00
def self . excerpt ( html , max_length , options = { } )
ExcerptParser . get_excerpt ( html , max_length , options )
end
2013-02-05 14:16:51 -05:00
2013-06-05 15:28:10 -04:00
def self . strip_links ( string )
return string if string . blank?
# If the user is not basic, strip links from their bio
fragment = Nokogiri :: HTML . fragment ( string )
fragment . css ( 'a' ) . each { | a | a . replace ( a . text ) }
fragment . to_html
end
2013-05-28 09:48:47 +10:00
protected
2013-02-05 14:16:51 -05:00
2013-05-28 09:48:47 +10:00
def self . ctx_load ( * files )
files . each do | file |
@ctx . load ( app_root + file )
2013-02-05 14:16:51 -05:00
end
end
end