Skip to content
Snippets Groups Projects
Commit 0a232029 authored by Bauke's avatar Bauke Committed by Andrew Shu
Browse files

Hide the inner <details> text from comment excerpts but include the <summary> text.

parent 1d0144c3
Branches
No related tags found
No related merge requests found
......@@ -69,7 +69,9 @@ class TopicMetadataGenerator(EventStreamConsumer):
if not topic.rendered_html:
return {}
extracted_text = extract_text_from_html(topic.rendered_html)
extracted_text = extract_text_from_html(
topic.rendered_html, exclude_details_include_summary=True
)
# create a short excerpt by truncating the extracted string
excerpt = truncate_string(extracted_text, length=200, truncate_at_chars=" ")
......
......@@ -154,6 +154,16 @@ def test_comment_excerpt_excludes_del(topic, session_user):
assert comment.excerpt == "I really love it."
def test_comment_excerpt_excludes_details(topic, session_user):
"""Ensure that comment excerpts don't include text from <details> elements.
But ensure that the inner <summary> text *is* included.
"""
markdown = "<details>\n<summary>Spoilers!</summary>\n\nHide me!\n</details>"
comment = Comment(topic, session_user, markdown)
assert comment.excerpt == "Spoilers!"
def test_comment_tree(db, topic, session_user):
"""Ensure that building and pruning a comment tree works."""
all_comments = []
......
......@@ -7,6 +7,7 @@ from tildes.lib.string import (
truncate_string,
truncate_string_at_char,
word_count,
extract_text_from_html,
)
......@@ -152,3 +153,23 @@ def test_basic_camelcase_to_snakecase():
def test_camelcase_to_snakecase_with_acronym():
"""Ensure CamelCase->snake_case works as expected with an acronym."""
assert camelcase_to_snakecase("SomeHTTPThing") == "some_http_thing"
def test_extract_text_from_html_include_details():
"""Ensure extract_text_from_html behavior includes <details> elements by default."""
html = "<details><summary>Spoilers!</summary> <p>Don't hide me!</p></details>"
assert extract_text_from_html(html) == "Spoilers! Don't hide me!"
html = "<details><p>Don't hide me!</p></details>"
assert extract_text_from_html(html) == "Don't hide me!"
def test_extract_text_from_html_exclude_details():
"""Ensure extract_text_from_html behavior excludes <details> elements when specified."""
html = "<details><summary>Spoilers!</summary> <p>Hide me!</p></details>"
text = extract_text_from_html(html, exclude_details_include_summary=True)
assert text == "Spoilers!"
html = "<details><p>Hide me!</p></details>"
text = extract_text_from_html(html, exclude_details_include_summary=True)
assert text == "Details"
......@@ -226,7 +226,11 @@ def separate_string(original: str, separator: str, segment_size: int) -> str:
return separated
def extract_text_from_html(html: str, skip_tags: Optional[list[str]] = None) -> str:
def extract_text_from_html(
html: str,
skip_tags: Optional[list[str]] = None,
exclude_details_include_summary: bool = False,
) -> str:
"""Extract plain text content from the elements inside an HTML string."""
def extract_text(element: Element, skip_tags: list[str]) -> Iterator[str]:
......@@ -242,6 +246,14 @@ def extract_text_from_html(html: str, skip_tags: Optional[list[str]] = None) ->
if element.tag in skip_tags:
return
if element.tag == "details" and exclude_details_include_summary:
for subelement in element:
if subelement.tag == "summary":
yield from extract_text(subelement, skip_tags)
return
yield "Details"
return
if element.text:
yield element.text
......
......@@ -138,7 +138,9 @@ class Comment(DatabaseModel):
self.rendered_html = convert_markdown_to_safe_html(new_markdown)
extracted_text = extract_text_from_html(
self.rendered_html, skip_tags=["blockquote", "del"]
self.rendered_html,
skip_tags=["blockquote", "del"],
exclude_details_include_summary=True,
)
self.excerpt = truncate_string(
extracted_text, length=200, truncate_at_chars=" "
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment