diff --git a/Gemfile b/Gemfile index 1476657d6f9..be623dde769 100644 --- a/Gemfile +++ b/Gemfile @@ -37,7 +37,6 @@ gem 'babel-transpiler' gem 'message_bus' gem 'rails_multisite', path: 'vendor/gems/rails_multisite' -gem 'redcarpet', require: false gem 'fast_xs' gem 'fast_xor' diff --git a/Gemfile.lock b/Gemfile.lock index dad42251ff9..d96defd9c0a 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -320,7 +320,6 @@ GEM ffi (>= 1.0.6) msgpack (>= 0.4.3) trollop (>= 1.16.2) - redcarpet (3.3.2) redis (3.2.1) redis-namespace (1.5.2) redis (~> 3.0, >= 3.0.4) @@ -510,7 +509,6 @@ DEPENDENCIES rb-fsevent rb-inotify (~> 0.9) rbtrace - redcarpet redis rest-client rinku diff --git a/app/models/topic.rb b/app/models/topic.rb index 71111defccc..12be5183eac 100644 --- a/app/models/topic.rb +++ b/app/models/topic.rb @@ -5,6 +5,7 @@ require_dependency 'rate_limiter' require_dependency 'text_sentinel' require_dependency 'text_cleaner' require_dependency 'archetype' +require_dependency 'html_prettify' class Topic < ActiveRecord::Base include ActionView::Helpers::SanitizeHelper @@ -164,6 +165,9 @@ class Topic < ActiveRecord::Base cancel_auto_close_job ensure_topic_has_a_category end + if title_changed? + write_attribute :fancy_title, Topic.fancy_title(title) + end end after_save do @@ -267,17 +271,28 @@ class Topic < ActiveRecord::Base apply_per_day_rate_limit_for("pms", :max_private_messages_per_day) end + def self.fancy_title(title) + escaped = ERB::Util.html_escape(title) + return unless escaped + HtmlPrettify.render(escaped) + end + def fancy_title - sanitized_title = ERB::Util.html_escape(title) + return ERB::Util.html_escape(title) unless SiteSetting.title_fancy_entities? - return unless sanitized_title - return sanitized_title unless SiteSetting.title_fancy_entities? + unless fancy_title = read_attribute(:fancy_title) - # We don't always have to require this, if fancy is disabled - # see: http://meta.discourse.org/t/pattern-for-defer-loading-gems-and-profiling-with-perftools-rb/4629 - require 'redcarpet' unless defined? Redcarpet + fancy_title = Topic.fancy_title(title) + write_attribute(:fancy_title, fancy_title) - Redcarpet::Render::SmartyPants.render(sanitized_title) + unless new_record? + # make sure data is set in table, this also allows us to change algorithm + # by simply nulling this column + exec_sql("UPDATE topics SET fancy_title = :fancy_title where id = :id", id: self.id, fancy_title: fancy_title) + end + end + + fancy_title end def pending_posts_count @@ -698,6 +713,7 @@ class Topic < ActiveRecord::Base def title=(t) slug = Slug.for(t.to_s) write_attribute(:slug, slug) + write_attribute(:fancy_title, nil) write_attribute(:title,t) end diff --git a/db/migrate/20150924022040_add_fancy_title_to_topic.rb b/db/migrate/20150924022040_add_fancy_title_to_topic.rb new file mode 100644 index 00000000000..49863f89f5d --- /dev/null +++ b/db/migrate/20150924022040_add_fancy_title_to_topic.rb @@ -0,0 +1,5 @@ +class AddFancyTitleToTopic < ActiveRecord::Migration + def change + add_column :topics, :fancy_title, :string, limit: 400, null: true + end +end diff --git a/lib/html_prettify.rb b/lib/html_prettify.rb new file mode 100644 index 00000000000..6d0b3d065bf --- /dev/null +++ b/lib/html_prettify.rb @@ -0,0 +1,407 @@ +# heavily based off +# https://github.com/vmg/redcarpet/blob/master/ext/redcarpet/html_smartypants.c +# and +# https://github.com/jmcnevin/rubypants/blob/master/lib/rubypants/core.rb +# 99% of the code here is by Jeremy McNevin +# +# This Source File is available under BSD/MIT license as well as standard GPL +# + +class HtmlPrettify < String + def self.render(html) + new(html).to_html + end + + # Create a new RubyPants instance with the text in +string+. + # + # Allowed elements in the options array: + # + # 0 :: do nothing + # 1 :: enable all, using only em-dash shortcuts + # 2 :: enable all, using old school en- and em-dash shortcuts (*default*) + # 3 :: enable all, using inverted old school en and em-dash shortcuts + # -1 :: stupefy (translate HTML entities to their ASCII-counterparts) + # + # If you don't like any of these defaults, you can pass symbols to change + # RubyPants' behavior: + # + # :quotes :: quotes + # :backticks :: backtick quotes (``double'' only) + # :allbackticks :: backtick quotes (``double'' and `single') + # :dashes :: dashes + # :oldschool :: old school dashes + # :inverted :: inverted old school dashes + # :ellipses :: ellipses + # :convertquotes :: convert " entities to + # " + # :stupefy :: translate RubyPants HTML entities + # to their ASCII counterparts. + # + # In addition, you can customize the HTML entities that will be injected by + # passing in a hash for the final argument. The defaults for these entities + # are as follows: + # + # :single_left_quote :: ‘ + # :double_left_quote :: “ + # :single_right_quote :: ’ + # :double_right_quote :: ” + # :em_dash :: — + # :en_dash :: – + # :ellipsis :: … + # :html_quote :: " + # + def initialize(string, options=[2], entities = {}) + super string + + @options = [*options] + @entities = default_entities.update(entities) + end + + # Apply SmartyPants transformations. + def to_html + do_quotes = do_backticks = do_dashes = do_ellipses = nil + + if @options.include?(0) + # Do nothing. + return self + elsif @options.include?(1) + # Do everything, turn all options on. + do_quotes = do_backticks = do_ellipses = true + do_dashes = :normal + elsif @options.include?(2) + # Do everything, turn all options on, use old school dash shorthand. + do_quotes = do_backticks = do_ellipses = true + do_dashes = :oldschool + elsif @options.include?(3) + # Do everything, turn all options on, use inverted old school + # dash shorthand. + do_quotes = do_backticks = do_ellipses = true + do_dashes = :inverted + elsif @options.include?(-1) + do_stupefy = true + else + do_quotes = @options.include?(:quotes) + do_backticks = @options.include?(:backticks) + do_backticks = :both if @options.include?(:allbackticks) + do_dashes = :normal if @options.include?(:dashes) + do_dashes = :oldschool if @options.include?(:oldschool) + do_dashes = :inverted if @options.include?(:inverted) + do_ellipses = @options.include?(:ellipses) + do_stupefy = @options.include?(:stupefy) + end + + # Parse the HTML + tokens = tokenize + + # Keep track of when we're inside
or tags.
+ in_pre = false
+
+ # Here is the result stored in.
+ result = ""
+
+ # This is a cheat, used to get some context for one-character
+ # tokens that consist of just a quote char. What we do is remember
+ # the last character of the previous text token, to use as context
+ # to curl single- character quote tokens correctly.
+ prev_token_last_char = nil
+
+ tokens.each do |token|
+ if token.first == :tag
+ result << token[1]
+ if token[1] =~ %r!<(/?)(?:pre|code|kbd|script|math)[\s>]!
+ in_pre = ($1 != "/") # Opening or closing tag?
+ end
+ else
+ t = token[1]
+
+ # Remember last char of this token before processing.
+ last_char = t[-1].chr
+
+ unless in_pre
+
+ t.gsub!("'", "'")
+
+ t = process_escapes t
+
+ t.gsub!(""", '"')
+
+ if do_dashes
+ t = educate_dashes t if do_dashes == :normal
+ t = educate_dashes_oldschool t if do_dashes == :oldschool
+ t = educate_dashes_inverted t if do_dashes == :inverted
+ end
+
+ t = educate_ellipses t if do_ellipses
+
+ t = educate_fractions t
+
+ # Note: backticks need to be processed before quotes.
+ if do_backticks
+ t = educate_backticks t
+ t = educate_single_backticks t if do_backticks == :both
+ end
+
+ if do_quotes
+ if t == "'"
+ # Special case: single-character ' token
+ if prev_token_last_char =~ /\S/
+ t = entity(:single_right_quote)
+ else
+ t = entity(:single_left_quote)
+ end
+ elsif t == '"'
+ # Special case: single-character " token
+ if prev_token_last_char =~ /\S/
+ t = entity(:double_right_quote)
+ else
+ t = entity(:double_left_quote)
+ end
+ else
+ # Normal case:
+ t = educate_quotes t
+ end
+ end
+
+ t = stupefy_entities t if do_stupefy
+ end
+
+ prev_token_last_char = last_char
+ result << t
+ end
+ end
+
+ # Done
+ result
+ end
+
+ protected
+
+ # Return the string, with after processing the following backslash
+ # escape sequences. This is useful if you want to force a "dumb" quote
+ # or other character to appear.
+ #
+ # Escaped are:
+ # \\ \" \' \. \- \`
+ #
+ def process_escapes(str)
+ str = str.gsub('\\\\', '\')
+ str.gsub!('\"', '"')
+ str.gsub!("\\\'", ''')
+ str.gsub!('\.', '.')
+ str.gsub!('\-', '-')
+ str.gsub!('\`', '`')
+ str
+ end
+
+ # The string, with each instance of "--" translated to an
+ # em-dash HTML entity.
+ #
+ def educate_dashes(str)
+ str.
+ gsub(/--/, entity(:em_dash))
+ end
+
+ # The string, with each instance of "--" translated to an
+ # en-dash HTML entity, and each "---" translated to an
+ # em-dash HTML entity.
+ #
+ def educate_dashes_oldschool(str)
+ str.
+ gsub(/---/, entity(:em_dash)).
+ gsub(/--/, entity(:en_dash))
+ end
+
+ # Return the string, with each instance of "--" translated
+ # to an em-dash HTML entity, and each "---" translated to
+ # an en-dash HTML entity. Two reasons why: First, unlike the en- and
+ # em-dash syntax supported by +educate_dashes_oldschool+, it's
+ # compatible with existing entries written before SmartyPants 1.1,
+ # back when "--" was only used for em-dashes. Second,
+ # em-dashes are more common than en-dashes, and so it sort of makes
+ # sense that the shortcut should be shorter to type. (Thanks to
+ # Aaron Swartz for the idea.)
+ #
+ def educate_dashes_inverted(str)
+ str.
+ gsub(/---/, entity(:en_dash)).
+ gsub(/--/, entity(:em_dash))
+ end
+
+ # Return the string, with each instance of "..." translated
+ # to an ellipsis HTML entity. Also converts the case where there are
+ # spaces between the dots.
+ #
+ def educate_ellipses(str)
+ str.
+ gsub('...', entity(:ellipsis)).
+ gsub('. . .', entity(:ellipsis))
+ end
+
+ # Return the string, with "``backticks''"-style single quotes
+ # translated into HTML curly quote entities.
+ #
+ def educate_backticks(str)
+ str.
+ gsub("``", entity(:double_left_quote)).
+ gsub("''", entity(:double_right_quote))
+ end
+
+ # Return the string, with "`backticks'"-style single quotes
+ # translated into HTML curly quote entities.
+ #
+ def educate_single_backticks(str)
+ str.
+ gsub("`", entity(:single_left_quote)).
+ gsub("'", entity(:single_right_quote))
+ end
+
+ def educate_fractions(str)
+ str.gsub(/(\s+|^)(1\/4|1\/2|3\/4)([,.;\s]|$)/) do
+ frac =
+ if $2 == "1/2".freeze
+ entity(:frac12)
+ elsif $2 == "1/4".freeze
+ entity(:frac14)
+ elsif $2 == "3/4".freeze
+ entity(:frac34)
+ end
+ "#{$1}#{frac}#{$3}"
+ end
+ end
+
+ # Return the string, with "educated" curly quote HTML entities.
+ #
+ def educate_quotes(str)
+ punct_class = '[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]'
+
+ # normalize html
+ str = str.dup
+ # Special case if the very first character is a quote followed by
+ # punctuation at a non-word-break. Close the quotes by brute
+ # force:
+ str.gsub!(/^'(?=#{punct_class}\B)/,
+ entity(:single_right_quote))
+ str.gsub!(/^"(?=#{punct_class}\B)/,
+ entity(:double_right_quote))
+
+ # Special case for double sets of quotes, e.g.:
+ # He said, "'Quoted' words in a larger quote."
+ str.gsub!(/"'(?=\w)/,
+ "#{entity(:double_left_quote)}#{entity(:single_left_quote)}")
+ str.gsub!(/'"(?=\w)/,
+ "#{entity(:single_left_quote)}#{entity(:double_left_quote)}")
+
+ # Special case for decade abbreviations (the '80s):
+ str.gsub!(/'(?=\d\ds)/,
+ entity(:single_right_quote))
+
+ close_class = %![^\ \t\r\n\\[\{\(\-]!
+ dec_dashes = "#{entity(:en_dash)}|#{entity(:em_dash)}"
+
+ # Get most opening single quotes:
+ str.gsub!(/(\s| |=|--|&[mn]dash;|#{dec_dashes}|ȁ[34];)'(?=\w)/,
+ '\1' + entity(:single_left_quote))
+
+ # Single closing quotes:
+ str.gsub!(/(#{close_class})'/,
+ '\1' + entity(:single_right_quote))
+ str.gsub!(/'(\s|s\b|$)/,
+ entity(:single_right_quote) + '\1')
+
+ # Any remaining single quotes should be opening ones:
+ str.gsub!(/'/,
+ entity(:single_left_quote))
+
+ # Get most opening double quotes:
+ str.gsub!(/(\s| |=|--|&[mn]dash;|#{dec_dashes}|ȁ[34];)"(?=\w)/,
+ '\1' + entity(:double_left_quote))
+
+ # Double closing quotes:
+ str.gsub!(/(#{close_class})"/,
+ '\1' + entity(:double_right_quote))
+ str.gsub!(/"(\s|s\b|$)/,
+ entity(:double_right_quote) + '\1')
+
+ # Any remaining quotes should be opening ones:
+ str.gsub!(/"/,
+ entity(:double_left_quote))
+
+ str
+ end
+
+ # Return the string, with each RubyPants HTML entity translated to
+ # its ASCII counterpart.
+ #
+ # Note: This is not reversible (but exactly the same as in SmartyPants)
+ #
+ def stupefy_entities(str)
+ new_str = str.dup
+
+ {
+ :en_dash => '-',
+ :em_dash => '--',
+ :single_left_quote => "'",
+ :single_right_quote => "'",
+ :double_left_quote => '"',
+ :double_right_quote => '"',
+ :ellipsis => '...'
+ }.each do |k,v|
+ new_str.gsub!(/#{entity(k)}/, v)
+ end
+
+ new_str
+ end
+
+ # Return an array of the tokens comprising the string. Each token is
+ # either a tag (possibly with nested, tags contained therein, such
+ # as , or a run of text between
+ # tags. Each element of the array is a two-element array; the first
+ # is either :tag or :text; the second is the actual value.
+ #
+ # Based on the _tokenize() subroutine from Brad Choate's
+ # MTRegex plugin.
+ #
+ # This is actually the easier variant using tag_soup, as used by
+ # Chad Miller in the Python port of SmartyPants.
+ #
+ def tokenize
+ tag_soup = /([^<]*)(<[^>]*>)/
+
+ tokens = []
+
+ prev_end = 0
+
+ scan(tag_soup) do
+ tokens << [:text, $1] if $1 != ""
+ tokens << [:tag, $2]
+ prev_end = $~.end(0)
+ end
+
+ if prev_end < size
+ tokens << [:text, self[prev_end..-1]]
+ end
+
+ tokens
+ end
+
+ def default_entities
+ {
+ single_left_quote: "‘",
+ double_left_quote: "“",
+ single_right_quote: "’",
+ double_right_quote: "”",
+ em_dash: "—",
+ en_dash: "–",
+ ellipsis: "…",
+ html_quote: """,
+ frac12: "½",
+ frac14: "¼",
+ frac34: "¾",
+ }
+ end
+
+ def entity(key)
+ @entities[key]
+ end
+
+end
diff --git a/spec/components/html_prettify_spec.rb b/spec/components/html_prettify_spec.rb
new file mode 100644
index 00000000000..8f1126eeb72
--- /dev/null
+++ b/spec/components/html_prettify_spec.rb
@@ -0,0 +1,30 @@
+require 'spec_helper'
+require 'html_prettify'
+
+describe HtmlPrettify do
+
+ def t(source, expected)
+ expect(HtmlPrettify.render(source)).to eq(expected)
+ end
+
+ it 'correctly prettifies html' do
+ t "All's well!
", "All’s well!
"
+ t "Eatin' Lunch'.
", "Eatin’ Lunch’.
"
+ t "a 1/4. is a fraction but not 1/4/2000
", "a ¼. is a fraction but not 1/4/2000
"
+ t "Well that'll be the day
", "Well that’ll be the day
"
+ t %("Quoted text"
), %(“Quoted text”
)
+ t "I've been meaning to tell you ..
", "I’ve been meaning to tell you ..
"
+ t "single `backticks` in HTML should be preserved
", "single `backticks` in HTML should be preserved
"
+ t "double hyphen -- ndash --- mdash
", "double hyphen – ndash — mdash
"
+ t "a long time ago...", "a long time ago…"
+ t "is 'this a mistake'?", "is ‘this a mistake’?"
+ t ERB::Util.html_escape("'that went well'"), "‘that went well’"
+ t '"that went well"', "“that went well”"
+ t ERB::Util.html_escape('"that went well"'), "“that went well”"
+
+ t 'src="test.png"> yay', "src=“test.png”> yay"
+
+ t ERB::Util.html_escape('
yay'), "<img src=“test.png”> yay"
+ end
+
+end
diff --git a/spec/models/topic_spec.rb b/spec/models/topic_spec.rb
index ccaed7b06e4..3ed9e895d60 100644
--- a/spec/models/topic_spec.rb
+++ b/spec/models/topic_spec.rb
@@ -214,7 +214,7 @@ describe Topic do
context 'title_fancy_entities disabled' do
before do
- SiteSetting.stubs(:title_fancy_entities).returns(false)
+ SiteSetting.title_fancy_entities = false
end
it "doesn't add entities to the title" do
@@ -224,11 +224,26 @@ describe Topic do
context 'title_fancy_entities enabled' do
before do
- SiteSetting.stubs(:title_fancy_entities).returns(true)
+ SiteSetting.title_fancy_entities = true
end
- it "converts the title to have fancy entities" do
+ it "converts the title to have fancy entities and updates" do
expect(topic.fancy_title).to eq("“this topic” – has “fancy stuff”")
+ topic.title = "this is my test hello world... yay"
+ topic.user.save!
+ topic.save!
+ topic.reload
+ expect(topic.fancy_title).to eq("This is my test hello world… yay")
+
+ topic.title = "I made a change to the title"
+ topic.save!
+
+ topic.reload
+ expect(topic.fancy_title).to eq("I made a change to the title")
+
+ # another edge case
+ topic.title = "this is another edge case"
+ expect(topic.fancy_title).to eq("this is another edge case")
end
end
end