Hide the inner <details> text from comment excerpts but include the <summary> text.

Closes tildes-community/tildes-cf#4 See merge request tildes-community/tildes-cf!13

Hide the inner <details> text from comment excerpts but include the <summary> text.
0a232029 · Bauke · Andrew Shu · 1d0144c3 · 0a232029 · 0a232029
Commit 0a232029 authored 2 months ago by Bauke Committed by Andrew Shu 2 months ago
--- a/tildes/consumers/topic_metadata_generator.py
+++ b/tildes/consumers/topic_metadata_generator.py
@@ -69,7 +69,9 @@ class TopicMetadataGenerator(EventStreamConsumer):
        if not topic.rendered_html:
            return {}

-        extracted_text = extract_text_from_html(topic.rendered_html)
+        extracted_text = extract_text_from_html(
+            topic.rendered_html, exclude_details_include_summary=True
+        )

        # create a short excerpt by truncating the extracted string
        excerpt = truncate_string(extracted_text, length=200, truncate_at_chars=" ")

--- a/tildes/tests/test_comment.py
+++ b/tildes/tests/test_comment.py
@@ -154,6 +154,16 @@ def test_comment_excerpt_excludes_del(topic, session_user):
    assert comment.excerpt == "I really love it."


+def test_comment_excerpt_excludes_details(topic, session_user):
+    """Ensure that comment excerpts don't include text from <details> elements.
+
+    But ensure that the inner <summary> text *is* included.
+    """
+    markdown = "<details>\n<summary>Spoilers!</summary>\n\nHide me!\n</details>"
+    comment = Comment(topic, session_user, markdown)
+    assert comment.excerpt == "Spoilers!"
+
+
 def test_comment_tree(db, topic, session_user):
    """Ensure that building and pruning a comment tree works."""
    all_comments = []

--- a/tildes/tests/test_string.py
+++ b/tildes/tests/test_string.py
@@ -7,6 +7,7 @@ from tildes.lib.string import (
    truncate_string,
    truncate_string_at_char,
    word_count,
+    extract_text_from_html,
 )


@@ -152,3 +153,23 @@ def test_basic_camelcase_to_snakecase():
 def test_camelcase_to_snakecase_with_acronym():
    """Ensure CamelCase->snake_case works as expected with an acronym."""
    assert camelcase_to_snakecase("SomeHTTPThing") == "some_http_thing"
+
+
+def test_extract_text_from_html_include_details():
+    """Ensure extract_text_from_html behavior includes <details> elements by default."""
+    html = "<details><summary>Spoilers!</summary> <p>Don't hide me!</p></details>"
+    assert extract_text_from_html(html) == "Spoilers! Don't hide me!"
+
+    html = "<details><p>Don't hide me!</p></details>"
+    assert extract_text_from_html(html) == "Don't hide me!"
+
+
+def test_extract_text_from_html_exclude_details():
+    """Ensure extract_text_from_html behavior excludes <details> elements when specified."""
+    html = "<details><summary>Spoilers!</summary> <p>Hide me!</p></details>"
+    text = extract_text_from_html(html, exclude_details_include_summary=True)
+    assert text == "Spoilers!"
+
+    html = "<details><p>Hide me!</p></details>"
+    text = extract_text_from_html(html, exclude_details_include_summary=True)
+    assert text == "Details"
--- a/tildes/tildes/lib/string.py
+++ b/tildes/tildes/lib/string.py
@@ -226,7 +226,11 @@ def separate_string(original: str, separator: str, segment_size: int) -> str:
    return separated


-def extract_text_from_html(html: str, skip_tags: Optional[list[str]] = None) -> str:
+def extract_text_from_html(
+    html: str,
+    skip_tags: Optional[list[str]] = None,
+    exclude_details_include_summary: bool = False,
+) -> str:
    """Extract plain text content from the elements inside an HTML string."""

    def extract_text(element: Element, skip_tags: list[str]) -> Iterator[str]:
@@ -242,6 +246,14 @@ def extract_text_from_html(html: str, skip_tags: Optional[list[str]] = None) ->
        if element.tag in skip_tags:
            return

+        if element.tag == "details" and exclude_details_include_summary:
+            for subelement in element:
+                if subelement.tag == "summary":
+                    yield from extract_text(subelement, skip_tags)
+                    return
+            yield "Details"
+            return
+
        if element.text:
            yield element.text


--- a/tildes/tildes/models/comment/comment.py
+++ b/tildes/tildes/models/comment/comment.py
@@ -138,7 +138,9 @@ class Comment(DatabaseModel):
        self.rendered_html = convert_markdown_to_safe_html(new_markdown)

        extracted_text = extract_text_from_html(
-            self.rendered_html, skip_tags=["blockquote", "del"]
+            self.rendered_html,
+            skip_tags=["blockquote", "del"],
+            exclude_details_include_summary=True,
        )
        self.excerpt = truncate_string(
            extracted_text, length=200, truncate_at_chars=" "