...
 
Commits (2)
pelican-toc @ 29ab1611
Subproject commit 29ab1611aeb8531813cb8360778e94cf8160012a
This diff is collapsed.
pelican-toc [![Build Status](https://travis-ci.org/ingwinlu/pelican-toc.svg?branch=master)](https://travis-ci.org/ingwinlu/pelican-toc)
===================================
This plugin generates a table of contents for pelican articles and pages, available for themes via `article.toc`.
# Usage
## requirements
Beautifulsoup4 - install via `pip install beautifulsoup4`
## theme
```
{% if article.toc %}
<div class="col-lg-3 hidden-xs hidden-sm">
{{article.toc}}
</div>
{% endif %}
```
## article
```
Title: Peeking at erlang/chicagoboss
###Intro
###Chicagoboss Magic
###Result
```
## output
```
<div class="col-lg-3 hidden-xs hidden-sm">
<div id="toc">
<ul>
<li>
<a href="#" title="Peeking at&nbsp;erlang/chicagoboss">Peeking at&nbsp;erlang/chicagoboss</a>
<ul>
<li>
<a href="#intro" title="Intro">Intro</a>
</li>
<li>
<a href="#chicagoboss-magic" title="Chicagoboss&nbsp;Magic">Chicagoboss&nbsp;Magic</a>
</li>
<li>
<a href="#result" title="Result">Result</a>
</li>
</ul>
</li>
</ul>
</div>
</div>
```
## settings
```
TOC = {
'TOC_HEADERS' : '^h[1-6]', # What headers should be included in
# the generated toc
# Expected format is a regular expression
'TOC_RUN' : 'true', # Default value for toc generation,
# if it does not evaluate
# to 'true' no toc will be generated
'TOC_INCLUDE_TITLE': 'true', # If 'true' include title in toc
}
```
All those settings can be overwritten on a per page/article basis via metadata.
Just use the respective keyword as metadata (example: `toc_headers: ^h[1-4]`)
# Differences between pelican-toc and pelican-extract-toc
`extract-toc` uses a markdown extension to generate a toc and then extract it via beautifulsoup.
This extension generates the toc itself, removing the need to write `[ToC]` in your articles.
There also is a 'health' check on id's which should be generated via markdown.extensions.headerid per default, but somehow don't always end up in the output.
Title: Peeking at erlang/chicagoboss
Date: 2014-03-07 00:19:24
###Intro
an article with headers
###Magic
###Result
Title: headers of all sizes
Date: 2015-06-02 00:56:24
# 1
## 2
### 3
#### 4
##### 5
###### 6
Title: headers of all sizes
Date: 2015-06-02 00:56:24
toc_headers: ^h[1-3]
# 1
## 2
### 3
#### 4
##### 5
###### 6
Title: Peeking at erlang/chicagoboss
Date: 2014-03-07 00:19:24
toc_run: false
###Intro
an article with headers
###Magic
###Result
Title: headers in non ascii
Date: 2015-06-01 00:19:24
###введение
russian intro
###魔術
traditional chinese magic
###αποτέλεσμα
greek result
<div id="toc"><ul><li><a class="toc-href" href="#" title="Peeking at erlang/chicagoboss">Peeking at erlang/chicagoboss</a><ul><li><a class="toc-href" href="#intro" title="Intro">Intro</a></li><li><a class="toc-href" href="#magic" title="Magic">Magic</a></li><li><a class="toc-href" href="#result" title="Result">Result</a></li></ul></li></ul></div>
\ No newline at end of file
<div id="toc"><ul><li><a class="toc-href" href="#" title="headers of all sizes">headers of all sizes</a><ul><li><a class="toc-href" href="#1" title="1">1</a><ul><li><a class="toc-href" href="#2" title="2">2</a><ul><li><a class="toc-href" href="#3" title="3">3</a></li></ul></li></ul></li></ul></li></ul></div>
\ No newline at end of file
<div id="toc"><ul><li><a class="toc-href" href="#1" title="1">1</a><ul><li><a class="toc-href" href="#2" title="2">2</a><ul><li><a class="toc-href" href="#3" title="3">3</a></li></ul></li></ul></li></ul></div>
\ No newline at end of file
<div id="toc"><ul><li><a class="toc-href" href="#intro" title="Intro">Intro</a></li><li><a class="toc-href" href="#magic" title="Magic">Magic</a></li><li><a class="toc-href" href="#result" title="Result">Result</a></li></ul></div>
\ No newline at end of file
<div id="toc"><ul><li><a class="toc-href" href="#" title="headers in non ascii">headers in non ascii</a><ul><li><a class="toc-href" href="#vvedenie" title="введение">введение</a></li><li><a class="toc-href" href="#mo-shu" title="魔術">魔術</a></li><li><a class="toc-href" href="#apotelesma" title="&alpha;&pi;&omicron;&tau;έ&lambda;&epsilon;&sigma;&mu;&alpha;">&alpha;&pi;&omicron;&tau;έ&lambda;&epsilon;&sigma;&mu;&alpha;</a></li></ul></li></ul></div>
\ No newline at end of file
<div id="toc"><ul><li><a class="toc-href" href="#vvedenie" title="введение">введение</a></li><li><a class="toc-href" href="#mo-shu" title="魔術">魔術</a></li><li><a class="toc-href" href="#apotelesma" title="&alpha;&pi;&omicron;&tau;έ&lambda;&epsilon;&sigma;&mu;&alpha;">&alpha;&pi;&omicron;&tau;έ&lambda;&epsilon;&sigma;&mu;&alpha;</a></li></ul></div>
\ No newline at end of file
Title: Peeking at erlang/chicagoboss
Date: 2014-03-07 00:19:24
an article without headers
from io import open
import unittest
import re
import toc
from pelican.readers import MarkdownReader
from pelican.contents import Article
from pelican.tests.support import get_settings
class TestToCGeneration(unittest.TestCase):
@classmethod
def setUpClass(cls):
toc.init_default_config(None)
cls.settings = get_settings()
cls.md_reader = MarkdownReader(cls.settings)
def setUp(self):
# have to reset the default, because shallow copies
self.settings['TOC']['TOC_HEADERS'] = '^h[1-6]'
self.settings['TOC']['TOC_RUN'] = 'true'
self.maxDiff = 9999
def _handle_article_generation(self, path):
content, metadata = self.md_reader.read(path)
return Article(content=content, metadata=metadata)
def _generate_toc(self, article_path, expected_path):
result = self._handle_article_generation(article_path)
toc.generate_toc(result)
expected = ""
with open(expected_path, 'r') as f:
expected = f.read()
return result, expected
def test_toc_generation(self):
self.settings['TOC']['TOC_INCLUDE_TITLE'] = 'true'
result, expected = self._generate_toc(
"test_data/article_with_headers.md",
"test_data/article_with_headers_toc.html"
)
self.assertEqual(result.toc, expected)
def test_toc_generation_nonascii(self):
self.settings['TOC']['TOC_INCLUDE_TITLE'] = 'true'
result, expected = self._generate_toc(
"test_data/article_with_headers_nonascii.md",
"test_data/article_with_headers_toc_nonascii.html"
)
self.assertEqual(result.toc, expected)
def test_toc_generation_exclude_small_headers(self):
self.settings['TOC']['TOC_INCLUDE_TITLE'] = 'true'
self.settings['TOC']['TOC_HEADERS'] = '^h[1-3]'
result, expected = self._generate_toc(
"test_data/article_with_headers_exclude_small_headers.md",
"test_data/article_with_headers_toc_exclude_small_headers.html"
)
self.assertEqual(result.toc, expected)
def test_toc_generation_exclude_small_headers_metadata(self):
self.settings['TOC']['TOC_INCLUDE_TITLE'] = 'true'
result, expected = self._generate_toc(
"test_data/article_with_headers_exclude_small_headers_metadata.md",
"test_data/article_with_headers_toc_exclude_small_headers.html"
)
self.assertEqual(result.toc, expected)
def test_bad_TOC_HEADERS(self):
self.settings['TOC']['TOC_INCLUDE_TITLE'] = 'true'
self.settings['TOC']['TOC_HEADERS'] = '^[1-'
with self.assertRaises(re.error):
self._generate_toc(
"test_data/article_with_headers_exclude_small_headers.md",
"test_data/article_with_headers_toc_exclude_small_headers.html"
)
def test_toc_generation_no_title(self):
self.settings['TOC']['TOC_INCLUDE_TITLE'] = 'false'
result, expected = self._generate_toc(
"test_data/article_with_headers.md",
"test_data/article_with_headers_toc_no_title.html"
)
self.assertEqual(result.toc, expected)
def test_toc_generation_nonascii_no_title(self):
self.settings['TOC']['TOC_INCLUDE_TITLE'] = 'false'
result, expected = self._generate_toc(
"test_data/article_with_headers_nonascii.md",
"test_data/article_with_headers_toc_nonascii_no_title.html"
)
self.assertEqual(result.toc, expected)
def test_toc_generation_exclude_small_headers_no_title(self):
self.settings['TOC']['TOC_INCLUDE_TITLE'] = 'false'
self.settings['TOC']['TOC_HEADERS'] = '^h[1-3]'
result, expected = self._generate_toc(
"test_data/article_with_headers_exclude_small_headers.md",
"test_data/article_with_headers_toc_exclude_small_headers_no_title.html"
)
self.assertEqual(result.toc, expected)
def test_toc_generation_exclude_small_headers_metadata_no_title(self):
self.settings['TOC']['TOC_INCLUDE_TITLE'] = 'false'
result, expected = self._generate_toc(
"test_data/article_with_headers_exclude_small_headers_metadata.md",
"test_data/article_with_headers_toc_exclude_small_headers_no_title.html"
)
self.assertEqual(result.toc, expected)
def test_bad_TOC_HEADERS(self):
self.settings['TOC']['TOC_INCLUDE_TITLE'] = 'false'
self.settings['TOC']['TOC_HEADERS'] = '^[1-'
with self.assertRaises(re.error):
self._generate_toc(
"test_data/article_with_headers_exclude_small_headers.md",
"test_data/article_with_headers_toc_exclude_small_headers_no_title.html"
)
def test_no_toc_generation(self):
article_without_headers_path = "test_data/article_without_headers.md"
article_without_headers = self._handle_article_generation(
article_without_headers_path)
toc.generate_toc(article_without_headers)
with self.assertRaises(AttributeError):
self.assertIsNone(article_without_headers.toc)
def test_no_toc_generation_metadata(self):
article_without_headers_path = "test_data/article_with_headers_metadata.md"
article_without_headers = self._handle_article_generation(
article_without_headers_path)
toc.generate_toc(article_without_headers)
with self.assertRaises(AttributeError):
self.assertIsNone(article_without_headers.toc)
if __name__ == "__main__":
unittest.main()
'''
toc
===================================
This plugin generates tocs for pages and articles.
'''
from __future__ import unicode_literals
import logging
import re
from bs4 import BeautifulSoup, Comment
from pelican import contents, signals
from pelican.utils import python_2_unicode_compatible, slugify
logger = logging.getLogger(__name__)
TOC_DEFAULT = {
'TOC_HEADERS': '^h[1-6]',
'TOC_RUN': 'true',
'TOC_INCLUDE_TITLE': 'true',
}
TOC_KEY = 'TOC'
'''
https://github.com/waylan/Python-Markdown/blob/master/markdown/extensions/headerid.py
'''
IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$')
def unique(id, ids):
""" Ensure id is unique in set of ids. Append '_1', '_2'... if not """
while id in ids or not id:
m = IDCOUNT_RE.match(id)
if m:
id = '%s_%d' % (m.group(1), int(m.group(2)) + 1)
else:
id = '%s_%d' % (id, 1)
ids.add(id)
return id
@python_2_unicode_compatible
class HtmlTreeNode(object):
def __init__(self, parent, header, level, id, include_title):
self.children = []
self.parent = parent
self.header = header
self.level = level
self.id = id
self.include_title = include_title
def add(self, new_header, ids):
new_level = new_header.name
new_string = new_header.string
new_id = new_header.attrs.get('id')
if not new_string:
new_string = new_header.find_all(
text=lambda t: not isinstance(t, Comment),
recursive=True)
new_string = "".join(new_string)
if not new_id:
new_id = slugify(new_string, ())
new_id = unique(new_id, ids) # make sure id is unique
new_header.attrs['id'] = new_id
if(self.level < new_level):
new_node = HtmlTreeNode(self, new_string, new_level, new_id,
self.include_title)
self.children += [new_node]
return new_node, new_header
elif(self.level == new_level):
new_node = HtmlTreeNode(self.parent, new_string, new_level, new_id,
self.include_title)
self.parent.children += [new_node]
return new_node, new_header
elif(self.level > new_level):
return self.parent.add(new_header, ids)
def __str__(self):
ret = ''
if self.parent or self.include_title:
ret = "<a class='toc-href' href='#{0}' title='{1}'>{1}</a>".format(
self.id, self.header)
if self.children:
ret += "<ul>{}</ul>".format('{}'*len(self.children)).format(
*self.children)
# each list
if self.parent or self.include_title:
ret = "<li>{}</li>".format(ret)
# end wrapper
if not self.parent:
if self.include_title:
ret = "<div id='toc'><ul>{}</ul></div>".format(ret)
else:
ret = "<div id='toc'>{}</div>".format(ret)
return ret
def init_default_config(pelican):
from pelican.settings import DEFAULT_CONFIG
def update_settings(settings):
temp = TOC_DEFAULT.copy()
if TOC_KEY in settings:
temp.update(settings[TOC_KEY])
settings[TOC_KEY] = temp
return settings
DEFAULT_CONFIG = update_settings(DEFAULT_CONFIG)
if pelican:
pelican.settings = update_settings(pelican.settings)
def generate_toc(content):
if isinstance(content, contents.Static):
return
_toc_run = content.metadata.get(
'toc_run',
content.settings[TOC_KEY]['TOC_RUN'])
if not _toc_run == 'true':
return
_toc_include_title = content.metadata.get(
'toc_include_title',
content.settings[TOC_KEY]['TOC_INCLUDE_TITLE']) == 'true'
all_ids = set()
title = content.metadata.get('title', 'Title')
tree = node = HtmlTreeNode(None, title, 'h0', '', _toc_include_title)
soup = BeautifulSoup(content._content, 'html.parser')
settoc = False
try:
header_re = re.compile(content.metadata.get(
'toc_headers', content.settings[TOC_KEY]['TOC_HEADERS']))
except re.error as e:
logger.error("TOC_HEADERS '%s' is not a valid re\n%s",
content.settings[TOC_KEY]['TOC_HEADERS'])
raise e
for header in soup.findAll(header_re):
settoc = True
node, new_header = node.add(header, all_ids)
header.replaceWith(new_header) # to get our ids back into soup
if (settoc):
tree_string = '{}'.format(tree)
tree_soup = BeautifulSoup(tree_string, 'html.parser')
content.toc = tree_soup.decode(formatter='html')
content._content = soup.decode(formatter='html')
def register():
signals.initialized.connect(init_default_config)
signals.content_object_init.connect(generate_toc)
[tox]
skipsdist = true
envlist = py{27,33,34,35,36}-pelican{34,35,36,37,dev}
[testenv]
basepython =
py27: python2.7
py33: python3.3
py34: python3.4
py35: python3.5
py36: python3.6
deps =
pelican34: git+https://github.com/getpelican/pelican.git@3.4.0#egg=pelican
pelican35: git+https://github.com/getpelican/pelican.git@3.5.0#egg=pelican
pelican36: git+https://github.com/getpelican/pelican.git@3.6.3#egg=pelican
pelican37: git+https://github.com/getpelican/pelican.git@3.7.1#egg=pelican
pelicandev: git+https://github.com/getpelican/pelican.git#egg=pelican
beautifulsoup4
-rdev_requirements.txt
passenv = *
install_command= pip install {opts} -e {packages}
commands =
{envpython} --version
pelican --version
{envpython} test_toc.py
[flake8]
application-import-names=toc
import-order-style=cryptography
[testenv:flake8]
basepython = python2.7
deps =
flake8
flake8-import-order
install_command= pip install {opts} {packages}
commands =
flake8 --version
flake8 toc.py