35 lines
860 B
Python
35 lines
860 B
Python
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass, field
|
|
|
|
|
|
@dataclass
|
|
class ScrapeOptions:
|
|
include_author: bool = True
|
|
include_categories: bool = True
|
|
include_tags: bool = True
|
|
force_heuristics: bool = False
|
|
request_timeout: int = 20
|
|
user_agent: str = (
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
|
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0 Safari/537.36"
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class ScrapedPost:
|
|
source_url: str
|
|
row_number: int = 0
|
|
cms: str = "unknown"
|
|
title: str = ""
|
|
publish_date: str = ""
|
|
author: str = ""
|
|
body_html: str = ""
|
|
categories: list[str] = field(default_factory=list)
|
|
tags: list[str] = field(default_factory=list)
|
|
status: str = "draft"
|
|
post_type: str = "post"
|
|
success: bool = False
|
|
error: str = ""
|
|
error_details: str = ""
|