Edge Case fixes, bug fixes, and UI Cleanup.
Build Docker Image / docker (push) Successful in 6s

This commit is contained in:
2026-04-09 11:21:23 -07:00
parent 8667f547e6
commit 0e410a1f6c
5 changed files with 256 additions and 6 deletions
+24 -2
View File
@@ -2,6 +2,7 @@ from __future__ import annotations
import csv
import datetime as dt
import hashlib
import io
import re
from dataclasses import replace
@@ -13,6 +14,7 @@ from page_importer.models import ScrapeOptions, ScrapedPost
from page_importer.scraper import Scraper
from page_importer.wxr import build_wxr
def load_csv(file_data: bytes) -> tuple[list[str], list[dict[str, str]]]:
text = file_data.decode("utf-8-sig", errors="replace")
reader = csv.DictReader(io.StringIO(text))
@@ -20,6 +22,20 @@ def load_csv(file_data: bytes) -> tuple[list[str], list[dict[str, str]]]:
return reader.fieldnames or [], rows
def build_upload_fingerprint(file_data: bytes) -> str:
return hashlib.sha256(file_data).hexdigest()
def sync_uploaded_file_state(session_state: dict[str, object], upload_fingerprint: str) -> None:
previous_fingerprint = session_state.get("uploaded_csv_fingerprint")
if previous_fingerprint == upload_fingerprint:
return
for key in ("results", "input_rows", "input_headers", "scrape_context"):
session_state.pop(key, None)
session_state["uploaded_csv_fingerprint"] = upload_fingerprint
def render_app() -> None:
st.title("Page Importer")
st.caption("Scrape blog posts from CSV URLs and export a WordPress WXR file.")
@@ -47,7 +63,10 @@ def render_app() -> None:
st.info("Upload a CSV to begin.")
return
headers, rows = load_csv(uploaded.getvalue())
uploaded_bytes = uploaded.getvalue()
sync_uploaded_file_state(st.session_state, build_upload_fingerprint(uploaded_bytes))
headers, rows = load_csv(uploaded_bytes)
if not rows:
st.error("The CSV did not contain any rows.")
return
@@ -88,6 +107,7 @@ def render_app() -> None:
results = scrape_rows(rows, context, phase_label="Scraping")
st.session_state["results"] = results
st.session_state["input_rows"] = rows
st.session_state["input_headers"] = headers
st.session_state["scrape_context"] = context
results = st.session_state.get("results", [])
@@ -176,7 +196,9 @@ def render_app() -> None:
st.write(f"**Author:** {selected.author or '(missing)'}")
st.write(f"**Post Type:** {selected.post_type}")
st.write(selected.body_html, unsafe_allow_html=True)
render_export_sidebar(successful, rows, headers)
stored_rows = st.session_state.get("input_rows", rows)
stored_headers = st.session_state.get("input_headers", headers)
render_export_sidebar(successful, stored_rows, stored_headers)
def build_scrape_context(