55 lines
1.8 KiB
Python
55 lines
1.8 KiB
Python
# HMC_preprocessing.py
|
|
from src.utils.database_documentation_generator import generate_db_api_reference
|
|
from src.utils.settings_loader import load_settings
|
|
from src.utils.data_loader import DataLoader
|
|
from src.process_all_waves import DataPreprocessingAllWaves
|
|
from src.utils.database_populator import populate_database
|
|
from src.utils.logging_config import setup_logging
|
|
import logging
|
|
|
|
|
|
def main():
|
|
setup_logging()
|
|
logger = logging.getLogger("preprocessing")
|
|
|
|
try:
|
|
logger.info("Starting data preprocessing pipeline.")
|
|
|
|
settings = load_settings("settings.yaml")
|
|
|
|
data_loader = DataLoader(settings)
|
|
data_all_waves = data_loader.load_all_survey_data()
|
|
|
|
data_preprocessor = DataPreprocessingAllWaves(data_all_waves, settings, logger)
|
|
|
|
preprocessed_data_all_waves = data_preprocessor.preprocess_data()
|
|
|
|
generate_db_api_reference(
|
|
settings, logger, cronbachs_alphas=data_preprocessor.cronbachs_alphas
|
|
)
|
|
|
|
output_settings: dict = settings.get("output", {})
|
|
populate_database(
|
|
preprocessed_data_all_waves,
|
|
database_path=output_settings.get(
|
|
"database_path", "results/study_results.sqlite"
|
|
),
|
|
export_csv=output_settings.get("export_csv", False),
|
|
export_excel=output_settings.get("export_excel", False),
|
|
csv_output_directory=output_settings.get("csv_output_directory", "results"),
|
|
excel_output_directory=output_settings.get(
|
|
"excel_output_directory", "results"
|
|
),
|
|
)
|
|
|
|
logger.info(
|
|
"Data preprocessing and database population completed successfully."
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.exception(f"An error occurred in the preprocessing pipeline: {e}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|