From b0c489f161e173f2e1eac4a85698df6ea174f018 Mon Sep 17 00:00:00 2001 From: Travis Miller Date: Sat, 29 Sep 2018 02:57:36 -0500 Subject: [PATCH 1/7] Changed frontmatter filtering to support YAML, JSON, TOML, and arbitrary languages --- .../52007-frontmatter-toml-json.yml | 5 + lib/banzai/filter/front_matter_filter.rb | 52 +++++++ lib/banzai/pipeline/pre_process_pipeline.rb | 2 +- .../banzai/filter/front_matter_filter_spec.rb | 140 ++++++++++++++++++ 4 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 changelogs/unreleased/52007-frontmatter-toml-json.yml create mode 100644 lib/banzai/filter/front_matter_filter.rb create mode 100644 spec/lib/banzai/filter/front_matter_filter_spec.rb diff --git a/changelogs/unreleased/52007-frontmatter-toml-json.yml b/changelogs/unreleased/52007-frontmatter-toml-json.yml new file mode 100644 index 00000000000..cb6a0bbca94 --- /dev/null +++ b/changelogs/unreleased/52007-frontmatter-toml-json.yml @@ -0,0 +1,5 @@ +--- +title: Changed frontmatter filtering to support YAML, JSON, TOML, and arbitrary languages +merge_request: +author: Travis Miller +type: changed diff --git a/lib/banzai/filter/front_matter_filter.rb b/lib/banzai/filter/front_matter_filter.rb new file mode 100644 index 00000000000..3a2038b956f --- /dev/null +++ b/lib/banzai/filter/front_matter_filter.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +module Banzai + module Filter + class FrontMatterFilter < HTML::Pipeline::Filter + DELIM = '[-+;]{3}' + + PATTERN = %r{ + \A(?:[^\r\n]*coding:[^\r\n]*\r?\n)? # ignore encoding on first line + (?:(?: # frontmatter by delimiter + (?#{DELIM})(?\S*)(?:[ ]*\r?\n)+ # opening delimiter line with optional language specifier + (?.*)(?:[ ]*\r?\n)+ # frontmatter contents + ^\k(?:[ ]*\r?\n) # closing delimiter - backreferencing opening + )|(?: # frontmatter by json + (?{.*})(?:[ ]*\r?\n){2} # json frontmatter contents + )) + (?:[ ]*\r?\n?)* # optional extra empty lines + (?.*) # content + }msx + + def call + match = PATTERN.match(html) + + return html unless match + + frontmatter = match['frontmatter'] || match['frontmatter_json'] + lang = lang_by_delim(match['lang'], match['delim']) + + "```#{lang}\n#{frontmatter}\n```\n\n#{match['content']}" + end + + def lang_by_delim(lang, delim) + if lang.nil? + 'json' + elsif lang.empty? + case delim + when '---' + 'yaml' + when '+++' + 'toml' + when ';;;' + 'json' + else + '' + end + else + lang + end + end + end + end +end diff --git a/lib/banzai/pipeline/pre_process_pipeline.rb b/lib/banzai/pipeline/pre_process_pipeline.rb index c937f783180..4c2b4ca1665 100644 --- a/lib/banzai/pipeline/pre_process_pipeline.rb +++ b/lib/banzai/pipeline/pre_process_pipeline.rb @@ -5,7 +5,7 @@ module Pipeline class PreProcessPipeline < BasePipeline def self.filters FilterArray[ - Filter::YamlFrontMatterFilter, + Filter::FrontMatterFilter, Filter::BlockquoteFenceFilter, ] end diff --git a/spec/lib/banzai/filter/front_matter_filter_spec.rb b/spec/lib/banzai/filter/front_matter_filter_spec.rb new file mode 100644 index 00000000000..3f9809240b2 --- /dev/null +++ b/spec/lib/banzai/filter/front_matter_filter_spec.rb @@ -0,0 +1,140 @@ +require 'rails_helper' + +describe Banzai::Filter::FrontMatterFilter do + include FilterSpecHelper + + it 'allows for `encoding:` before the frontmatter' do + content = <<-MD.strip_heredoc + # encoding: UTF-8 + --- + foo: foo + bar: bar + --- + + # Header + + Content + MD + + output = filter(content) + + expect(output).not_to match 'encoding' + end + + it 'converts YAML frontmatter to a fenced code block' do + content = <<-MD.strip_heredoc + --- + foo: :foo_symbol + bar: :bar_symbol + --- + + # Header + + Content + MD + + output = filter(content) + + aggregate_failures do + expect(output).not_to include '---' + expect(output).to include "```yaml\nfoo: :foo_symbol\n" + end + end + + it 'converts TOML frontmatter to a fenced code block' do + content = <<-MD.strip_heredoc + +++ + foo = :foo_symbol + bar = :bar_symbol + +++ + + # Header + + Content + MD + + output = filter(content) + + aggregate_failures do + expect(output).not_to include '+++' + expect(output).to include "```toml\nfoo = :foo_symbol\n" + end + end + + it 'converts JSON frontmatter to a fenced code block' do + content = <<-MD.strip_heredoc + ;;; + { + "foo": ":foo_symbol", + "bar": ":bar_symbol" + } + ;;; + + # Header + + Content + MD + + output = filter(content) + + aggregate_failures do + expect(output).not_to include ';;;' + expect(output).to include "```json\n{\n \"foo\": \":foo_symbol\",\n" + end + end + + it 'converts arbitrary frontmatter to a fenced code block' do + content = <<-MD.strip_heredoc + ---arbitrary + foo = :foo_symbol + bar = :bar_symbol + --- + + # Header + + Content + MD + + output = filter(content) + + aggregate_failures do + expect(output).not_to include '---arbitrary' + expect(output).to include "```arbitrary\nfoo = :foo_symbol\n" + end + end + + it 'converts JSON (tokenless) frontmatter to a fenced code block' do + content = <<-MD.strip_heredoc + { + "foo": { + "baz": ":baz_symbol" + }, + "bar": ":bar_symbol" + } + + # Header + + ```json + {"x": true} + ``` + MD + + output = filter(content) + + aggregate_failures do + expect(output).to include "```json\n{\n \"foo\": {\n \"baz\": \":baz_symbol\"\n },\n" + end + end + + context 'on content without frontmatter' do + it 'returns the content unmodified' do + content = <<-MD.strip_heredoc + # This is some Markdown + + It has no YAML frontmatter to parse. + MD + + expect(filter(content)).to eq content + end + end +end -- GitLab From ed42b21c97e0489cd2e13d738fcf278c7f3b60b0 Mon Sep 17 00:00:00 2001 From: Travis Miller Date: Fri, 23 Nov 2018 10:17:13 -0600 Subject: [PATCH 2/7] Add merge request id to changelog entry --- changelogs/unreleased/52007-frontmatter-toml-json.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelogs/unreleased/52007-frontmatter-toml-json.yml b/changelogs/unreleased/52007-frontmatter-toml-json.yml index cb6a0bbca94..bdada19f3a7 100644 --- a/changelogs/unreleased/52007-frontmatter-toml-json.yml +++ b/changelogs/unreleased/52007-frontmatter-toml-json.yml @@ -1,5 +1,5 @@ --- title: Changed frontmatter filtering to support YAML, JSON, TOML, and arbitrary languages -merge_request: +merge_request: 23331 author: Travis Miller type: changed -- GitLab From 210e33b445447d7472ca2dde7fe49bd5d47c164d Mon Sep 17 00:00:00 2001 From: Travis Miller Date: Mon, 3 Dec 2018 09:27:34 -0600 Subject: [PATCH 3/7] Remove previous Yaml front matter filter --- .../filter/yaml_front_matter_filter_spec.rb | 53 ------------------- 1 file changed, 53 deletions(-) delete mode 100644 spec/lib/banzai/filter/yaml_front_matter_filter_spec.rb diff --git a/spec/lib/banzai/filter/yaml_front_matter_filter_spec.rb b/spec/lib/banzai/filter/yaml_front_matter_filter_spec.rb deleted file mode 100644 index 9f1b862ef19..00000000000 --- a/spec/lib/banzai/filter/yaml_front_matter_filter_spec.rb +++ /dev/null @@ -1,53 +0,0 @@ -require 'rails_helper' - -describe Banzai::Filter::YamlFrontMatterFilter do - include FilterSpecHelper - - it 'allows for `encoding:` before the frontmatter' do - content = <<-MD.strip_heredoc - # encoding: UTF-8 - --- - foo: foo - --- - - # Header - - Content - MD - - output = filter(content) - - expect(output).not_to match 'encoding' - end - - it 'converts YAML frontmatter to a fenced code block' do - content = <<-MD.strip_heredoc - --- - bar: :bar_symbol - --- - - # Header - - Content - MD - - output = filter(content) - - aggregate_failures do - expect(output).not_to include '---' - expect(output).to include "```yaml\nbar: :bar_symbol\n```" - end - end - - context 'on content without frontmatter' do - it 'returns the content unmodified' do - content = <<-MD.strip_heredoc - # This is some Markdown - - It has no YAML frontmatter to parse. - MD - - expect(filter(content)).to eq content - end - end -end -- GitLab From 76c65666432f7b8e3a646658bb8728f905213b9e Mon Sep 17 00:00:00 2001 From: Travis Miller Date: Mon, 3 Dec 2018 09:29:33 -0600 Subject: [PATCH 4/7] Update front matter filter regex and remove "JSON without Delimiter" case --- lib/banzai/filter/front_matter_filter.rb | 51 +++++--------- .../banzai/filter/front_matter_filter_spec.rb | 68 +++++++++---------- 2 files changed, 52 insertions(+), 67 deletions(-) diff --git a/lib/banzai/filter/front_matter_filter.rb b/lib/banzai/filter/front_matter_filter.rb index 3a2038b956f..3367981401f 100644 --- a/lib/banzai/filter/front_matter_filter.rb +++ b/lib/banzai/filter/front_matter_filter.rb @@ -3,19 +3,24 @@ module Banzai module Filter class FrontMatterFilter < HTML::Pipeline::Filter - DELIM = '[-+;]{3}' + DELIM_LANG = { + "---" => "yaml", + "+++" => "toml", + ";;;" => "json" + }.freeze + + DELIM = DELIM_LANG.keys.map {|delim| Regexp.escape(delim) }.join("|") PATTERN = %r{ - \A(?:[^\r\n]*coding:[^\r\n]*\r?\n)? # ignore encoding on first line - (?:(?: # frontmatter by delimiter - (?#{DELIM})(?\S*)(?:[ ]*\r?\n)+ # opening delimiter line with optional language specifier - (?.*)(?:[ ]*\r?\n)+ # frontmatter contents - ^\k(?:[ ]*\r?\n) # closing delimiter - backreferencing opening - )|(?: # frontmatter by json - (?{.*})(?:[ ]*\r?\n){2} # json frontmatter contents - )) - (?:[ ]*\r?\n?)* # optional extra empty lines - (?.*) # content + \A(?:[^\r\n]*coding:[^\r\n]*)? # optional encoding line + \s* + ^(?#{DELIM})(?\S*) # opening front matter marker (optional language specifier) + \s* + ^(?.*?) # front matter (not greedy) + \s* + ^\k # closing front matter marker + \s* + (?^.*)? # content }msx def call @@ -23,29 +28,9 @@ def call return html unless match - frontmatter = match['frontmatter'] || match['frontmatter_json'] - lang = lang_by_delim(match['lang'], match['delim']) - - "```#{lang}\n#{frontmatter}\n```\n\n#{match['content']}" - end + lang = match['lang'].empty? ? DELIM_LANG[match['delim']] : match['lang'] - def lang_by_delim(lang, delim) - if lang.nil? - 'json' - elsif lang.empty? - case delim - when '---' - 'yaml' - when '+++' - 'toml' - when ';;;' - 'json' - else - '' - end - else - lang - end + "```#{lang}\n#{match['front_matter']}\n```\n\n#{match['content']}" end end end diff --git a/spec/lib/banzai/filter/front_matter_filter_spec.rb b/spec/lib/banzai/filter/front_matter_filter_spec.rb index 3f9809240b2..3071dc7cf21 100644 --- a/spec/lib/banzai/filter/front_matter_filter_spec.rb +++ b/spec/lib/banzai/filter/front_matter_filter_spec.rb @@ -3,8 +3,8 @@ describe Banzai::Filter::FrontMatterFilter do include FilterSpecHelper - it 'allows for `encoding:` before the frontmatter' do - content = <<-MD.strip_heredoc + it 'allows for `encoding:` before the front matter' do + content = <<~MD # encoding: UTF-8 --- foo: foo @@ -21,8 +21,8 @@ expect(output).not_to match 'encoding' end - it 'converts YAML frontmatter to a fenced code block' do - content = <<-MD.strip_heredoc + it 'converts YAML front matter to a fenced code block' do + content = <<~MD --- foo: :foo_symbol bar: :bar_symbol @@ -42,7 +42,7 @@ end it 'converts TOML frontmatter to a fenced code block' do - content = <<-MD.strip_heredoc + content = <<~MD +++ foo = :foo_symbol bar = :bar_symbol @@ -61,8 +61,8 @@ end end - it 'converts JSON frontmatter to a fenced code block' do - content = <<-MD.strip_heredoc + it 'converts JSON front matter to a fenced code block' do + content = <<~MD ;;; { "foo": ":foo_symbol", @@ -83,8 +83,8 @@ end end - it 'converts arbitrary frontmatter to a fenced code block' do - content = <<-MD.strip_heredoc + it 'converts arbitrary front matter to a fenced code block' do + content = <<~MD ---arbitrary foo = :foo_symbol bar = :bar_symbol @@ -103,38 +103,38 @@ end end - it 'converts JSON (tokenless) frontmatter to a fenced code block' do - content = <<-MD.strip_heredoc - { - "foo": { - "baz": ":baz_symbol" - }, - "bar": ":bar_symbol" - } - - # Header - - ```json - {"x": true} - ``` - MD + context 'on content without front matter' do + it 'returns the content unmodified' do + content = <<~MD + # This is some Markdown - output = filter(content) + It has no YAML front matter to parse. + MD - aggregate_failures do - expect(output).to include "```json\n{\n \"foo\": {\n \"baz\": \":baz_symbol\"\n },\n" + expect(filter(content)).to eq content end end - context 'on content without frontmatter' do - it 'returns the content unmodified' do - content = <<-MD.strip_heredoc - # This is some Markdown - - It has no YAML frontmatter to parse. + context 'on front matter without content' do + it 'converts YAML front matter to a fenced code block' do + content = <<~MD + --- + foo: :foo_symbol + bar: :bar_symbol + --- MD - expect(filter(content)).to eq content + output = filter(content) + + aggregate_failures do + expect(output).to eq <<~MD + ```yaml + foo: :foo_symbol + bar: :bar_symbol + ``` + + MD + end end end end -- GitLab From 4d25c8c8008f8fc3bcd0f208bfbf0d42157eca26 Mon Sep 17 00:00:00 2001 From: Travis Miller Date: Mon, 3 Dec 2018 11:30:59 -0600 Subject: [PATCH 5/7] updates from code review --- lib/banzai/filter/front_matter_filter.rb | 27 +++++++++++------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/lib/banzai/filter/front_matter_filter.rb b/lib/banzai/filter/front_matter_filter.rb index 3367981401f..9c18d6e9154 100644 --- a/lib/banzai/filter/front_matter_filter.rb +++ b/lib/banzai/filter/front_matter_filter.rb @@ -4,33 +4,30 @@ module Banzai module Filter class FrontMatterFilter < HTML::Pipeline::Filter DELIM_LANG = { - "---" => "yaml", - "+++" => "toml", - ";;;" => "json" + '---' => 'yaml', + '+++' => 'toml', + ';;;' => 'json' }.freeze - DELIM = DELIM_LANG.keys.map {|delim| Regexp.escape(delim) }.join("|") + DELIM = Regexp.union(DELIM_LANG.keys) PATTERN = %r{ - \A(?:[^\r\n]*coding:[^\r\n]*)? # optional encoding line + \A(?:[^\r\n]*coding:[^\r\n]*)? # optional encoding line \s* - ^(?#{DELIM})(?\S*) # opening front matter marker (optional language specifier) + ^(?#{DELIM})[ \t]*(?\S*) # opening front matter marker (optional language specifier) \s* - ^(?.*?) # front matter (not greedy) + ^(?.*?) # front matter (not greedy) \s* - ^\k # closing front matter marker + ^\k # closing front matter marker \s* - (?^.*)? # content }msx def call - match = PATTERN.match(html) + html.sub(PATTERN) do |_match| + lang = $~[:lang].presence || DELIM_LANG[$~[:delim]] - return html unless match - - lang = match['lang'].empty? ? DELIM_LANG[match['delim']] : match['lang'] - - "```#{lang}\n#{match['front_matter']}\n```\n\n#{match['content']}" + ["```#{lang}", $~[:front_matter], "```", "\n"].join("\n") + end end end end -- GitLab From cdfd9799487f834df368f6cfa0f8da7d5a3bbe4f Mon Sep 17 00:00:00 2001 From: Travis Miller Date: Fri, 7 Dec 2018 10:33:28 -0600 Subject: [PATCH 6/7] Remove inappropriate encoding modifier from regex --- lib/banzai/filter/front_matter_filter.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/banzai/filter/front_matter_filter.rb b/lib/banzai/filter/front_matter_filter.rb index 9c18d6e9154..a27d18facd1 100644 --- a/lib/banzai/filter/front_matter_filter.rb +++ b/lib/banzai/filter/front_matter_filter.rb @@ -20,7 +20,7 @@ class FrontMatterFilter < HTML::Pipeline::Filter \s* ^\k # closing front matter marker \s* - }msx + }mx def call html.sub(PATTERN) do |_match| -- GitLab From 4a542c47bf8067b732fac8fa047004a2b6098f7e Mon Sep 17 00:00:00 2001 From: Travis Miller Date: Fri, 7 Dec 2018 10:41:59 -0600 Subject: [PATCH 7/7] remove the old yaml front matter filter --- lib/banzai/filter/yaml_front_matter_filter.rb | 27 ------------------- 1 file changed, 27 deletions(-) delete mode 100644 lib/banzai/filter/yaml_front_matter_filter.rb diff --git a/lib/banzai/filter/yaml_front_matter_filter.rb b/lib/banzai/filter/yaml_front_matter_filter.rb deleted file mode 100644 index 295964dd75d..00000000000 --- a/lib/banzai/filter/yaml_front_matter_filter.rb +++ /dev/null @@ -1,27 +0,0 @@ -# frozen_string_literal: true - -module Banzai - module Filter - class YamlFrontMatterFilter < HTML::Pipeline::Filter - DELIM = '---'.freeze - - # Hat-tip to Middleman: https://git.io/v2e0z - PATTERN = %r{ - \A(?:[^\r\n]*coding:[^\r\n]*\r?\n)? - (?#{DELIM})[ ]*\r?\n - (?.*?)[ ]*\r?\n? - ^(?#{DELIM})[ ]*\r?\n? - \r?\n? - (?.*) - }mx.freeze - - def call - match = PATTERN.match(html) - - return html unless match - - "```yaml\n#{match['frontmatter']}\n```\n\n#{match['content']}" - end - end - end -end -- GitLab