first commit
Build Docker Image / docker (push) Successful in 44s

This commit is contained in:
2026-04-09 10:42:10 -07:00
commit ead872a0a5
19 changed files with 2783 additions and 0 deletions
+91
View File
@@ -0,0 +1,91 @@
from __future__ import annotations
from email.utils import format_datetime
from io import StringIO
from xml.sax.saxutils import escape
import datetime as dt
from page_importer.dates import parse_datetime
from page_importer.models import ScrapedPost
def build_wxr(posts: list[ScrapedPost], channel_title: str = "Imported Content") -> str:
now = dt.datetime.now(dt.timezone.utc)
out = StringIO()
out.write('<?xml version="1.0" encoding="UTF-8" ?>\n')
out.write(
'<rss version="2.0" xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/" '
'xmlns:content="http://purl.org/rss/1.0/modules/content/" '
'xmlns:wfw="http://wellformedweb.org/CommentAPI/" '
'xmlns:dc="http://purl.org/dc/elements/1.1/" '
'xmlns:wp="http://wordpress.org/export/1.2/">\n'
)
out.write("<channel>\n")
out.write(f"<title>{escape(channel_title)}</title>\n")
out.write("<link>http://localhost/</link>\n")
out.write("<description>Generated by Page Importer</description>\n")
out.write(f"<pubDate>{format_datetime(now)}</pubDate>\n")
out.write("<language>en-US</language>\n")
out.write("<wp:wxr_version>1.2</wp:wxr_version>\n")
for post in posts:
local_date, gmt_date, item_pub_date = _resolve_post_dates(post.publish_date, now)
out.write("<item>\n")
out.write(f"<title>{escape(post.title)}</title>\n")
out.write(f"<link>{escape(post.source_url)}</link>\n")
out.write(f"<pubDate>{format_datetime(item_pub_date)}</pubDate>\n")
out.write(f"<dc:creator>{cdata(post.author or 'importer')}</dc:creator>\n")
out.write(f"<guid isPermaLink=\"false\">{escape(post.source_url)}</guid>\n")
out.write("<description></description>\n")
out.write(f"<content:encoded>{cdata(post.body_html)}</content:encoded>\n")
out.write(f"<excerpt:encoded>{cdata('')}</excerpt:encoded>\n")
out.write(f"<wp:post_date>{cdata(local_date)}</wp:post_date>\n")
out.write(f"<wp:post_date_gmt>{cdata(gmt_date)}</wp:post_date_gmt>\n")
out.write("<wp:comment_status><![CDATA[closed]]></wp:comment_status>\n")
out.write("<wp:ping_status><![CDATA[closed]]></wp:ping_status>\n")
out.write("<wp:post_name><![CDATA[]]></wp:post_name>\n")
out.write(f"<wp:status>{cdata(post.status)}</wp:status>\n")
out.write("<wp:post_parent>0</wp:post_parent>\n")
out.write("<wp:menu_order>0</wp:menu_order>\n")
out.write(f"<wp:post_type>{cdata(post.post_type or 'post')}</wp:post_type>\n")
out.write("<wp:post_password><![CDATA[]]></wp:post_password>\n")
out.write("<wp:is_sticky>0</wp:is_sticky>\n")
for category in post.categories:
out.write(
f'<category domain="category" nicename="{escape(slugify(category))}">{cdata(category)}</category>\n'
)
for tag in post.tags:
out.write(
f'<category domain="post_tag" nicename="{escape(slugify(tag))}">{cdata(tag)}</category>\n'
)
out.write("</item>\n")
out.write("</channel>\n</rss>\n")
return out.getvalue()
def slugify(value: str) -> str:
return "".join(ch.lower() if ch.isalnum() else "-" for ch in value).strip("-")
def cdata(value: str) -> str:
return f"<![CDATA[{(value or '').replace(']]>', ']]]]><![CDATA[>')}]]>"
def _resolve_post_dates(value: str, fallback: dt.datetime) -> tuple[str, str, dt.datetime]:
parsed = parse_datetime(value)
if parsed is None:
return "", "", fallback
if parsed.tzinfo is None or parsed.utcoffset() is None:
local_date = _format_wp_date(parsed)
assumed_utc = parsed.replace(tzinfo=dt.timezone.utc)
return local_date, local_date, assumed_utc
local_date = _format_wp_date(parsed)
gmt_value = parsed.astimezone(dt.timezone.utc)
return local_date, _format_wp_date(gmt_value), gmt_value
def _format_wp_date(value: dt.datetime) -> str:
return value.replace(tzinfo=None).strftime("%Y-%m-%d %H:%M:%S")