uncongeniality_preprocessing/src/article_data_preprocessor.py

55 lines
1.6 KiB
Python

"""
The `article_data_preprocessor.py` module serves as a preprocessing subclass, based on
the `DataPreprocessor` abstract base class, for dealing with article data.
"""
from pathlib import Path
import pandas as pd
from src.data_preprocessor import DataPreprocessor
from src.utils import load_json_file
class ArticleDataPreprocessor(DataPreprocessor):
"""
A class used to preprocess article data.
Attributes
----------
Inherits all attributes from the abstract base class DataPreprocessor
Methods
-------
preprocess(article_file_path: Path) -> pd.DataFrame
Preprocess an article data file and return it as a DataFrame.
"""
def __init__(self, data_folder_article_data: Path):
"""
Constructs the ArticleDataPreprocessor object.
Parameters
----------
data_folder_article_data : Path
Path object representing the directory where the article data files are located.
"""
super().__init__(data_folder_article_data, split_word="SPON_article")
def preprocess(self, article_file_path: Path) -> pd.DataFrame:
"""
Preprocess an article data file and return it as a DataFrame.
Parameters
----------
article_file_path : pathlib.Path
The path object representing the article data file to be preprocessed.
Returns
-------
pd.DataFrame
A pandas DataFrame containing the preprocessed article data.
"""
single_article_dict: dict = load_json_file(article_file_path)
return pd.DataFrame([single_article_dict])