Update UI, and Update Sitemap tool to get proper thread count
Build Docker Image / docker (push) Successful in 6s

This commit is contained in:
2026-04-09 11:27:13 -07:00
parent 0e410a1f6c
commit 287566716f
5 changed files with 32 additions and 5 deletions
+18 -1
View File
@@ -28,7 +28,6 @@ DEFAULT_LOG_SUFFIX = ".crawl.log"
DEFAULT_MAX_PAGES = 10000
DEFAULT_RESUME_PAGE_INCREMENT = 10000
DEFAULT_SAVE_EVERY = 25
DEFAULT_WORKERS = 8
SCRIPT_DIR = Path(__file__).resolve().parent
DOCUMENT_EXTENSIONS = {
".pdf",
@@ -47,6 +46,24 @@ DOCUMENT_EXTENSIONS = {
}
def detect_default_workers() -> int:
affinity_count: int | None = None
get_affinity = getattr(os, "sched_getaffinity", None)
if callable(get_affinity):
try:
affinity_count = len(get_affinity(0))
except OSError:
affinity_count = None
cpu_count = os.cpu_count() or 1
if affinity_count:
return max(affinity_count, 1)
return max(cpu_count, 1)
DEFAULT_WORKERS = detect_default_workers()
@dataclass
class CrawlResult:
url: str