41 lines
1.4 KiB
Python
41 lines
1.4 KiB
Python
import pandas as pd
|
|
|
|
from src.settings_loader import SettingsLoader
|
|
from src.article_data_preprocessor import ArticleDataPreprocessor
|
|
from src.comment_data_preprocessor import CommentsDataPreprocessor
|
|
from src.create_analysis_dataset import AnalysisDatasetCreator
|
|
|
|
|
|
def main():
|
|
"""
|
|
Main function to orchestrate the processing of comment and article data
|
|
into a final analysis-ready dataset.
|
|
"""
|
|
settings_loader = SettingsLoader()
|
|
article_data_processor: ArticleDataPreprocessor = ArticleDataPreprocessor(
|
|
data_folder_article_data=settings_loader.directory_raw_article_data
|
|
)
|
|
comment_data_processor: CommentsDataPreprocessor = CommentsDataPreprocessor(
|
|
data_folder_comment_data=settings_loader.directory_raw_comment_data
|
|
)
|
|
|
|
print("processing article data...")
|
|
article_data: pd.DataFrame = article_data_processor.get_all_data()
|
|
print("processing comment data...")
|
|
comment_data: pd.DataFrame = comment_data_processor.get_all_data()
|
|
|
|
analysis_dataset_creator: AnalysisDatasetCreator = AnalysisDatasetCreator(
|
|
comment_data,
|
|
article_data,
|
|
add_extend_article_information=settings_loader.add_extend_article_information,
|
|
add_comments_text_body=settings_loader.add_comment_text_body,
|
|
)
|
|
|
|
analysis_dataset_creator.process_comments_and_articles_into_analysis_dataset()
|
|
|
|
print("Data processing completed and final dataset has been saved.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|