preprocessing/HMC_preprocessing.py
2025-12-15 13:47:28 +01:00

55 lines
1.8 KiB
Python

# HMC_preprocessing.py
from src.utils.database_documentation_generator import generate_db_api_reference
from src.utils.settings_loader import load_settings
from src.utils.data_loader import DataLoader
from src.process_all_waves import DataPreprocessingAllWaves
from src.utils.database_populator import populate_database
from src.utils.logging_config import setup_logging
import logging
def main():
setup_logging()
logger = logging.getLogger("preprocessing")
try:
logger.info("Starting data preprocessing pipeline.")
settings = load_settings("settings.yaml")
data_loader = DataLoader(settings)
data_all_waves = data_loader.load_all_survey_data()
data_preprocessor = DataPreprocessingAllWaves(data_all_waves, settings, logger)
preprocessed_data_all_waves = data_preprocessor.preprocess_data()
generate_db_api_reference(
settings, logger, cronbachs_alphas=data_preprocessor.cronbachs_alphas
)
output_settings: dict = settings.get("output", {})
populate_database(
preprocessed_data_all_waves,
database_path=output_settings.get(
"database_path", "results/study_results.sqlite"
),
export_csv=output_settings.get("export_csv", False),
export_excel=output_settings.get("export_excel", False),
csv_output_directory=output_settings.get("csv_output_directory", "results"),
excel_output_directory=output_settings.get(
"excel_output_directory", "results"
),
)
logger.info(
"Data preprocessing and database population completed successfully."
)
except Exception as e:
logger.exception(f"An error occurred in the preprocessing pipeline: {e}")
if __name__ == "__main__":
main()