diff --git a/mkdocs_rss_plugin/hacky_fix_links.py b/mkdocs_rss_plugin/hacky_fix_links.py
new file mode 100644
index 0000000..5915d20
--- /dev/null
+++ b/mkdocs_rss_plugin/hacky_fix_links.py
@@ -0,0 +1,33 @@
+import re
+from urllib.parse import urljoin
+
+HREF_MATCH_PATTERN = re.compile('href="(.*?)"')
+SRC_MATCH_PATTERN = re.compile('src="(.*?)"')
+
+
+def relative_links_resolve_to_page(page_html, page_url):
+ href_links_to_replace = re.findall(HREF_MATCH_PATTERN, page_html)
+ src_links_to_replace = re.findall(SRC_MATCH_PATTERN, page_html)
+ links_to_replace = set(href_links_to_replace + src_links_to_replace)
+ links_with_replacements = [
+ (link, urljoin(page_url, link)) for link in links_to_replace
+ ]
+ replaced_html = page_html
+ for original, replacement in links_with_replacements:
+ replaced_html = replaced_html.replace(original, replacement)
+ return replaced_html
+
+
+WRAPPER_PATTERNS = [
+ re.compile(p, flags=re.DOTALL)
+ for p in [
+ '(.*?)',
+ '
(.*?)
',
+ ]
+]
+
+
+def remove_wrappers(page_html):
+ for wrapper_pattern in WRAPPER_PATTERNS:
+ page_html = re.sub(wrapper_pattern, r"\1", page_html)
+ return page_html
diff --git a/mkdocs_rss_plugin/plugin.py b/mkdocs_rss_plugin/plugin.py
index a9e7beb..c6307b1 100644
--- a/mkdocs_rss_plugin/plugin.py
+++ b/mkdocs_rss_plugin/plugin.py
@@ -323,6 +323,7 @@ def on_page_content(
created=page_dates[0],
description=self.util.get_description_or_abstract(
in_page=page,
+ html=html,
chars_count=self.config.abstract_chars_count,
abstract_delimiter=self.config.abstract_delimiter,
),
diff --git a/mkdocs_rss_plugin/util.py b/mkdocs_rss_plugin/util.py
index 1c14ab3..848b268 100644
--- a/mkdocs_rss_plugin/util.py
+++ b/mkdocs_rss_plugin/util.py
@@ -41,6 +41,10 @@
REMOTE_REQUEST_HEADERS,
)
from mkdocs_rss_plugin.git_manager.ci import CiHandler
+from mkdocs_rss_plugin.hacky_fix_links import (
+ relative_links_resolve_to_page,
+ remove_wrappers,
+)
from mkdocs_rss_plugin.integrations.theme_material_social_plugin import (
IntegrationMaterialSocialCards,
)
@@ -475,6 +479,7 @@ def get_date_from_meta(
def get_description_or_abstract(
self,
in_page: Page,
+ html: str,
chars_count: int = 160,
abstract_delimiter: Optional[str] = None,
) -> str:
@@ -509,15 +514,12 @@ def get_description_or_abstract(
# If the abstract is cut by the delimiter
elif (
abstract_delimiter
- and (
- excerpt_separator_position := in_page.markdown.find(abstract_delimiter)
- )
- > -1
+ and (excerpt_separator_position := html.find(abstract_delimiter)) > -1
):
- return markdown.markdown(
- in_page.markdown[:excerpt_separator_position],
- output_format="html5",
- )
+ replaced_links = relative_links_resolve_to_page(html, in_page.canonical_url)
+ removed_wrappers = remove_wrappers(replaced_links)
+ return removed_wrappers[: removed_wrappers.find(abstract_delimiter)]
+
# Use first chars_count from the markdown
elif chars_count > 0 and in_page.markdown:
if len(in_page.markdown) <= chars_count:
diff --git a/tests/fixtures/docs/blog/posts/assets/example_image.webp b/tests/fixtures/docs/blog/posts/assets/example_image.webp
new file mode 100644
index 0000000..e44b857
Binary files /dev/null and b/tests/fixtures/docs/blog/posts/assets/example_image.webp differ
diff --git a/tests/fixtures/docs/blog/posts/sample_blog_post_internal_links.md b/tests/fixtures/docs/blog/posts/sample_blog_post_internal_links.md
new file mode 100644
index 0000000..f7af380
--- /dev/null
+++ b/tests/fixtures/docs/blog/posts/sample_blog_post_internal_links.md
@@ -0,0 +1,24 @@
+---
+date: 2023-02-12
+authors: [guts]
+categories:
+ - Blog
+---
+
+# Blog sample with internal links
+
+I'm a really short intro.
+
+![here's an internal image](./assets/example_image.webp)
+
+[Here's an internal link](./sample_blog_post.md)
+and another
+[Another link](../../index.md)
+
+
+
+## This part won't show up in RSS feed
+
+### What is Lorem Ipsum?
+
+Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.
diff --git a/tests/test_build.py b/tests/test_build.py
index 7f07e47..4d015d4 100644
--- a/tests/test_build.py
+++ b/tests/test_build.py
@@ -427,7 +427,7 @@ def test_simple_build_item_delimiter(self):
for feed_item in feed_parsed.entries:
if feed_item.title in ("Page without meta with early delimiter",):
- self.assertLess(len(feed_item.description), 50, feed_item.title)
+ self.assertLess(len(feed_item.description), 100, feed_item.title)
def test_simple_build_item_delimiter_empty(self):
with tempfile.TemporaryDirectory() as tmpdirname:
@@ -904,6 +904,41 @@ def test_not_git_repo(self):
# restore name
git_dir_tmp.replace(git_dir)
+ def test_abstract_with_internal_links(self):
+ with tempfile.TemporaryDirectory() as tmpdirname:
+ cli_result = self.build_docs_setup(
+ testproject_path="docs",
+ mkdocs_yml_filepath=Path("tests/fixtures/mkdocs_minimal.yml"),
+ output_path=tmpdirname,
+ strict=True,
+ )
+ self.assertEqual(cli_result.exit_code, 0)
+ self.assertIsNone(cli_result.exception)
+
+ feed_rss_created = feedparser.parse(
+ Path(tmpdirname) / OUTPUT_RSS_FEED_CREATED
+ )
+
+ feed_rss_updated = feedparser.parse(
+ Path(tmpdirname) / OUTPUT_RSS_FEED_UPDATED
+ )
+
+ ##print(json.dumps(feed_rss_created))
+
+ for page in feed_rss_created.entries + feed_rss_updated.entries:
+ if page.title == "Blog sample with internal links":
+ self.assertIn(
+ 'href="https://guts.github.io/mkdocs-rss-plugin/blog/posts/sample_blog_post/"',
+ page.summary,
+ )
+ self.assertIn(
+ 'href="https://guts.github.io/mkdocs-rss-plugin/"', page.summary
+ )
+ self.assertIn(
+ 'src="https://guts.github.io/mkdocs-rss-plugin/blog/posts/assets/example_image.webp"',
+ page.summary,
+ )
+
# ##############################################################################
# ##### Stand alone program ########