Update UI, and Update Sitemap tool to get proper thread count
Build Docker Image / docker (push) Successful in 6s
Build Docker Image / docker (push) Successful in 6s
This commit is contained in:
@@ -28,7 +28,6 @@ DEFAULT_LOG_SUFFIX = ".crawl.log"
|
||||
DEFAULT_MAX_PAGES = 10000
|
||||
DEFAULT_RESUME_PAGE_INCREMENT = 10000
|
||||
DEFAULT_SAVE_EVERY = 25
|
||||
DEFAULT_WORKERS = 8
|
||||
SCRIPT_DIR = Path(__file__).resolve().parent
|
||||
DOCUMENT_EXTENSIONS = {
|
||||
".pdf",
|
||||
@@ -47,6 +46,24 @@ DOCUMENT_EXTENSIONS = {
|
||||
}
|
||||
|
||||
|
||||
def detect_default_workers() -> int:
|
||||
affinity_count: int | None = None
|
||||
get_affinity = getattr(os, "sched_getaffinity", None)
|
||||
if callable(get_affinity):
|
||||
try:
|
||||
affinity_count = len(get_affinity(0))
|
||||
except OSError:
|
||||
affinity_count = None
|
||||
|
||||
cpu_count = os.cpu_count() or 1
|
||||
if affinity_count:
|
||||
return max(affinity_count, 1)
|
||||
return max(cpu_count, 1)
|
||||
|
||||
|
||||
DEFAULT_WORKERS = detect_default_workers()
|
||||
|
||||
|
||||
@dataclass
|
||||
class CrawlResult:
|
||||
url: str
|
||||
|
||||
Reference in New Issue
Block a user