40 lines
1.0 KiB
Python
40 lines
1.0 KiB
Python
import argparse
|
|
from pathlib import Path
|
|
from src.logger import setup_logging
|
|
from src.load_config import load_config_from_file
|
|
from src.spiegel_scraper import SpiegelScraper
|
|
|
|
|
|
def main() -> None:
|
|
"""
|
|
Main function to run the Spiegel scraper application.
|
|
|
|
This function sets up logging, parses the command-line arguments to get the configuration file path,
|
|
loads the configuration, and starts the article scraping process.
|
|
"""
|
|
parser = argparse.ArgumentParser(description="Run Spiegel scraper.")
|
|
|
|
parser.add_argument(
|
|
"config_file", type=str, help="Path to the configuration JSON file"
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
setup_logging()
|
|
|
|
config_file_path: Path = Path(args.config_file)
|
|
config: dict = load_config_from_file(config_file_path)
|
|
|
|
scraper: SpiegelScraper = SpiegelScraper(
|
|
config["start_date"],
|
|
config["end_date"],
|
|
config["exclusions"],
|
|
config["output_path"],
|
|
config["target"],
|
|
)
|
|
|
|
scraper.scrap_article_data()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|