Edge Case fixes, bug fixes, and UI Cleanup.
Build Docker Image / docker (push) Successful in 6s

This commit is contained in:
2026-04-09 11:21:23 -07:00
parent 8667f547e6
commit 0e410a1f6c
5 changed files with 256 additions and 6 deletions
+16
View File
@@ -175,6 +175,15 @@ def get_log_path(output_path: Path) -> Path:
return output_path.with_suffix(output_path.suffix + DEFAULT_LOG_SUFFIX)
def cleanup_run_files(output_path: Path) -> list[Path]:
removed_paths: list[Path] = []
for path in (Path(output_path), get_state_path(Path(output_path)), get_log_path(Path(output_path))):
if path.exists():
path.unlink()
removed_paths.append(path)
return removed_paths
def log_message(log_path: Path, message: str) -> None:
log_path.parent.mkdir(parents=True, exist_ok=True)
timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
@@ -599,6 +608,7 @@ def crawl_site(
break
current = resolve_alias(state.queue.popleft(), state.alias_to_canonical)
state.queued.discard(current)
if current in state.visited:
continue
@@ -647,6 +657,7 @@ def crawl_site(
while state.queue and len(pending) < workers and len(state.visited) < max_pages:
current = resolve_alias(state.queue.popleft(), state.alias_to_canonical)
state.queued.discard(current)
if current in state.visited:
continue
@@ -790,6 +801,11 @@ def run_crawl(
"The saved crawl state uses a different document setting. "
"Keep the same choice or start a fresh crawl."
)
if state.include_subdomains != include_subdomains:
raise ValueError(
"The saved crawl state uses a different subdomain setting. "
"Keep the same choice or start a fresh crawl."
)
else:
state = initialize_state(normalized_start, include_subdomains, include_documents)