uncongeniality_preprocessing/main.py

41 lines
1.4 KiB
Python

import pandas as pd
from src.settings_loader import SettingsLoader
from src.article_data_preprocessor import ArticleDataPreprocessor
from src.comment_data_preprocessor import CommentsDataPreprocessor
from src.create_analysis_dataset import AnalysisDatasetCreator
def main():
"""
Main function to orchestrate the processing of comment and article data
into a final analysis-ready dataset.
"""
settings_loader = SettingsLoader()
article_data_processor: ArticleDataPreprocessor = ArticleDataPreprocessor(
data_folder_article_data=settings_loader.directory_raw_article_data
)
comment_data_processor: CommentsDataPreprocessor = CommentsDataPreprocessor(
data_folder_comment_data=settings_loader.directory_raw_comment_data
)
print("processing article data...")
article_data: pd.DataFrame = article_data_processor.get_all_data()
print("processing comment data...")
comment_data: pd.DataFrame = comment_data_processor.get_all_data()
analysis_dataset_creator: AnalysisDatasetCreator = AnalysisDatasetCreator(
comment_data,
article_data,
add_extend_article_information=settings_loader.add_extend_article_information,
add_comments_text_body=settings_loader.add_comment_text_body,
)
analysis_dataset_creator.process_comments_and_articles_into_analysis_dataset()
print("Data processing completed and final dataset has been saved.")
if __name__ == "__main__":
main()