from __future__ import annotations from email.utils import format_datetime from io import StringIO from xml.sax.saxutils import escape import datetime as dt from page_importer.dates import parse_datetime from page_importer.models import ScrapedPost def build_wxr(posts: list[ScrapedPost], channel_title: str = "Imported Content") -> str: now = dt.datetime.now(dt.timezone.utc) out = StringIO() out.write('\n') out.write( '\n' ) out.write("\n") out.write(f"{escape(channel_title)}\n") out.write("http://localhost/\n") out.write("Generated by Page Importer\n") out.write(f"{format_datetime(now)}\n") out.write("en-US\n") out.write("1.2\n") for post in posts: local_date, gmt_date, item_pub_date = _resolve_post_dates(post.publish_date, now) out.write("\n") out.write(f"{escape(post.title)}\n") out.write(f"{escape(post.source_url)}\n") out.write(f"{format_datetime(item_pub_date)}\n") out.write(f"{cdata(post.author or 'importer')}\n") out.write(f"{escape(post.source_url)}\n") out.write("\n") out.write(f"{cdata(post.body_html)}\n") out.write(f"{cdata('')}\n") out.write(f"{cdata(local_date)}\n") out.write(f"{cdata(gmt_date)}\n") out.write("\n") out.write("\n") out.write("\n") out.write(f"{cdata(post.status)}\n") out.write("0\n") out.write("0\n") out.write(f"{cdata(post.post_type or 'post')}\n") out.write("\n") out.write("0\n") for category in post.categories: out.write( f'{cdata(category)}\n' ) for tag in post.tags: out.write( f'{cdata(tag)}\n' ) out.write("\n") out.write("\n\n") return out.getvalue() def slugify(value: str) -> str: return "".join(ch.lower() if ch.isalnum() else "-" for ch in value).strip("-") def cdata(value: str) -> str: return f"', ']]]]>')}]]>" def _resolve_post_dates(value: str, fallback: dt.datetime) -> tuple[str, str, dt.datetime]: parsed = parse_datetime(value) if parsed is None: return "", "", fallback if parsed.tzinfo is None or parsed.utcoffset() is None: local_date = _format_wp_date(parsed) assumed_utc = parsed.replace(tzinfo=dt.timezone.utc) return local_date, local_date, assumed_utc local_date = _format_wp_date(parsed) gmt_value = parsed.astimezone(dt.timezone.utc) return local_date, _format_wp_date(gmt_value), gmt_value def _format_wp_date(value: dt.datetime) -> str: return value.replace(tzinfo=None).strftime("%Y-%m-%d %H:%M:%S")