From 2e8e7657269717465b107f225312427cf37f5ba2 Mon Sep 17 00:00:00 2001 From: fluffy Date: Sun, 20 Mar 2022 12:43:23 -0700 Subject: [PATCH] Properly strip tables when extracting an entry summary --- publ/html_entry.py | 10 ++++++++-- publ/markdown.py | 2 +- .../cards/table in summary paragraph.md | 20 +++++++++++++++++++ 3 files changed, 29 insertions(+), 3 deletions(-) create mode 100644 tests/content/cards/table in summary paragraph.md diff --git a/publ/html_entry.py b/publ/html_entry.py index 2eadb65..3c11e06 100644 --- a/publ/html_entry.py +++ b/publ/html_entry.py @@ -204,6 +204,9 @@ def __init__(self): self._tag_stack = [] def handle_starttag(self, tag, attrs): + if tag.lower() == 'table': + self._consume = False + if tag.lower() == 'p': if self._found: self._consume = False @@ -219,6 +222,10 @@ def handle_endtag(self, tag): if self._consume: self.append(f'') + + if tag.lower() == 'table' and not self._found: + self._consume = True + if (not self._tag_stack or tag.lower() == 'p') and self._found: self._consume = False @@ -227,9 +234,8 @@ def handle_startendtag(self, tag, attrs): self.append(utils.make_tag(tag, attrs, True)) def handle_data(self, data): - if data.strip(): + if self._consume and data.strip(): self._found = True - if self._consume: self.append(data) diff --git a/publ/markdown.py b/publ/markdown.py index c8f0ce6..4b2211e 100644 --- a/publ/markdown.py +++ b/publ/markdown.py @@ -31,7 +31,7 @@ 'del', 'add', 'mark') # Remove these tags from plaintext-style conversions -PLAINTEXT_REMOVE_ELEMENTS = ('del', 's') +PLAINTEXT_REMOVE_ELEMENTS = ('del', 's', 'table') class ItemCounter(misaka.BaseRenderer): diff --git a/tests/content/cards/table in summary paragraph.md b/tests/content/cards/table in summary paragraph.md new file mode 100644 index 0000000..1cae671 --- /dev/null +++ b/tests/content/cards/table in summary paragraph.md @@ -0,0 +1,20 @@ +Title: Table in summary paragraph +Date: 2022-03-20 12:34:00-07:00 +Entry-ID: 1711 +UUID: e5fe5b31-039a-5838-b481-bde0001e28e1 + +| Name | Value | +|------|-------| +| foo | 1 | +| bar | 2 | +| baz | 3 | + +This entry starts with a table. This paragraph should be the summary. + +| Name | Value | +|------|-------| +| foo | 1 | +| bar | 2 | +| baz | 3 | + +This second paragraph shouldn't appear in the summary. \ No newline at end of file