# HMC_preprocessing.py from src.utils.database_documentation_generator import generate_db_api_reference from src.utils.settings_loader import load_settings from src.utils.data_loader import DataLoader from src.process_all_waves import DataPreprocessingAllWaves from src.utils.database_populator import populate_database from src.utils.logging_config import setup_logging import logging def main(): setup_logging() logger = logging.getLogger("preprocessing") try: logger.info("Starting data preprocessing pipeline.") settings = load_settings("settings.yaml") data_loader = DataLoader(settings) data_all_waves = data_loader.load_all_survey_data() data_preprocessor = DataPreprocessingAllWaves(data_all_waves, settings, logger) preprocessed_data_all_waves = data_preprocessor.preprocess_data() generate_db_api_reference( settings, logger, cronbachs_alphas=data_preprocessor.cronbachs_alphas ) output_settings: dict = settings.get("output", {}) populate_database( preprocessed_data_all_waves, database_path=output_settings.get( "database_path", "results/study_results.sqlite" ), export_csv=output_settings.get("export_csv", False), export_excel=output_settings.get("export_excel", False), csv_output_directory=output_settings.get("csv_output_directory", "results"), excel_output_directory=output_settings.get( "excel_output_directory", "results" ), ) logger.info( "Data preprocessing and database population completed successfully." ) except Exception as e: logger.exception(f"An error occurred in the preprocessing pipeline: {e}") if __name__ == "__main__": main()