import os
import pandas as pd
import numpy as np
from scipy import stats as stats
from scipy.stats import ttest_ind, ttest_rel
from pathlib import Path

from src.data_loading_and_saving.print_and_save_results import print_and_save_result


class TTest:
    """
    A class used to perform a T-test analysis.

    ...

    Attributes
    ----------
    print_result: bool
        A flag used to indicate if the function should print the result to the standard output.
    save_result: bool
        A flag used to indicate if the function should save the result to a file.
    filepath: str
        The directory path where the result files will be saved if save_result is True.

    Methods
    -------
    perform_ttest(data, first_group_name, second_group_name, name_save_file) -> None:
        Performs an independent samples T-test for the two specified groups in the provided dataset.
    perform_paired_ttest(data, first_group_name, second_group_name, name_save_file) -> tuple[float, float, float]:
        Performs a paired samples T-test on the two specified groups in the provided dataset.
        Returns the mean difference with the 95% confidence interval.
    """

    def __init__(
        self,
        print_result: bool = True,
        save_result: bool = True,
        filepath: str = "results/",
    ):
        """
        Initializes the TTest object with the provided parameters.

        Parameters
        ----------
        print_result: bool, optional
            A flag to indicate if the function should print the result to the standard output. Default is True.
        save_result: bool, optional
            A flag to indicate if the function should save the result to a file. Default is True.
        filepath: str, optional
            The directory path where the result files will be saved if save_result is True. Default is "results/".
        """
        self.print_result: bool = print_result
        self.save_result: bool = save_result
        self.filepath: str = filepath
        if not os.path.isdir(filepath):
            os.makedirs(filepath)

    def perform_ttest(
        self,
        data: pd.DataFrame,
        first_group_name: str,
        second_group_name: str,
        name_save_file: Path,
    ) -> None:
        """
        This method performs a T-test for the means of two independent samples of scores using
        the given columns and either prints or saves the result based on the TTest object properties.

        Parameters
        ----------
        data: pandas.DataFrame
            The input dataframe which contains the data.
        first_group_name: str
            The name of the first column to be used in the t-test.
        second_group_name: str
            The name of the second column to be used in the t-test.
        name_save_file: str
            The name of the file to which the result will be saved (if self.save_result is True).

        Returns
        -------
        None
        """
        group_1: pd.DataFrame = data[first_group_name].dropna()
        group_2: pd.DataFrame = data[second_group_name].dropna()

        degrees_of_freedom: int = len(group_1) + len(group_2) - 2

        t_statistic: float
        p_value: float
        t_statistic, p_value = ttest_ind(group_1, group_2)

        mean_group_1: float = group_1.mean()
        mean_group_2: float = group_2.mean()

        standard_deviation_group_1: float = group_1.std()
        standard_deviation_group_2: float = group_2.std()

        ttest_summary: str = (
            f"Mean of {first_group_name}: {mean_group_1}\n"
            f"Mean of {second_group_name}: {mean_group_2}\n"
            f"Standard Deviation of {first_group_name}: {standard_deviation_group_1}\n"
            f"Standard Deviation of {second_group_name}: {standard_deviation_group_2}\n"
            f"Degrees of Freedom: {degrees_of_freedom}\n"
            f"T-statistic: {t_statistic}\n"
            f"P-value: {p_value}"
        )

        print_and_save_result(
            self.print_result,
            self.save_result,
            self.filepath,
            ttest_summary,
            name_save_file,
        )

    def perform_paired_ttest(
        self,
        data: pd.DataFrame,
        first_group_name: str,
        second_group_name: str,
        name_save_file: Path,
    ) -> tuple[float, float, float]:
        """
        Performs a paired sample t-test and calculates the effect size (Cohen's d) using the given columns

        Parameters
        ----------
        data : pd.DataFrame
            The DataFrame containing the data of two related groups to be compared
        first_group_name : str
            The name of the first group (column)
        second_group_name : str
            the name of the second group (column)
        name_save_file: Path
            The path of the file to save the result in.

        Returns
        -------
        mean_difference : float
            The mean difference between the two samples
        confidence_interval[0] : float
            The lower bound of the 95% confidence interval
        confidence_interval[1] : float
            The upper bound of the 95% confidence interval
        """

        data: pd.DataFrame = data[[first_group_name, second_group_name]].dropna()
        group_1: pd.DataFrame = data[first_group_name]
        group_2: pd.DataFrame = data[second_group_name]
        degrees_of_freedom: int = len(group_1) - 1

        t_statistic: float
        p_value: float
        t_statistic, p_value = ttest_rel(group_1, group_2)

        mean_group_1: float = group_1.mean()
        mean_group_2: float = group_2.mean()
        standard_deviation_group_1: float = group_1.std()
        standard_deviation_group_2: float = group_2.std()
        pooled_standard_deviation: float = np.sqrt(
            (standard_deviation_group_1**2 + standard_deviation_group_2**2) / 2
        )
        cohens_d: float = (mean_group_1 - mean_group_2) / pooled_standard_deviation

        mean_difference: float = mean_group_1 - mean_group_2
        standard_error_difference: float = np.std(group_1 - group_2, ddof=1) / np.sqrt(
            len(group_1)
        )

        confidence_interval: np.ndarray[float] = stats.t.interval(
            0.95, len(group_1) - 1, loc=mean_difference, scale=standard_error_difference
        )

        paired_ttest_summary: str = (
            f"Mean of {first_group_name}: {mean_group_1}\n"
            f"Mean of {second_group_name}: {mean_group_2}\n"
            f"Standard Deviation of {first_group_name}: {standard_deviation_group_1}\n"
            f"Standard Deviation of {second_group_name}: {standard_deviation_group_2}\n"
            f"Degrees of Freedom: {degrees_of_freedom}\n"
            f"Cohen's d: {cohens_d}\n"
            f"T-statistic: {t_statistic}\n"
            f"P-value: {p_value}"
        )

        print_and_save_result(
            self.print_result,
            self.save_result,
            self.filepath,
            paired_ttest_summary,
            name_save_file,
        )

        return mean_difference, confidence_interval[0], confidence_interval[1]