import pytest import pandas as pd import numpy as np from src.scale_processor import ScaleProcessor class TestScaleProcessor: @staticmethod def initializes_with_basic_scale_config(): config = {"name": "test_scale", "items": [{"id": "item1"}, {"id": "item2"}]} processor = ScaleProcessor(config) assert processor.name == "test_scale" assert processor.items == [{"id": "item1"}, {"id": "item2"}] assert processor.calculation == "mean" assert processor.score_min == 1 assert processor.score_max == 5 assert processor.output == "test_scale" assert processor.subgroup is None @staticmethod def initializes_with_custom_configuration(): config = { "name": "custom_scale", "items": [{"id": "q1"}], "calculation": "sum", "score_range": (0, 10), "response_options": {"1": "Yes", "2": "No"}, "output": "custom_output", } processor = ScaleProcessor(config, "group1") assert processor.calculation == "sum" assert processor.score_min == 0 assert processor.score_max == 10 assert processor.response_options == {"1": "Yes", "2": "No"} assert processor.output == "custom_output" assert processor.subgroup == "group1" @staticmethod def check_items_passes_when_all_columns_present(): config = {"name": "test", "items": [{"id": "col1"}, {"id": "col2"}]} processor = ScaleProcessor(config) df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4], "col3": [5, 6]}) processor.check_items(df) @staticmethod def check_items_raises_error_when_columns_missing(): config = {"name": "test", "items": [{"id": "col1"}, {"id": "missing"}]} processor = ScaleProcessor(config) df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]}) with pytest.raises( ValueError, match="Missing columns in data: \\['missing'\\]" ): processor.check_items(df) @staticmethod def get_subgroup_mask_returns_all_true_when_no_subgroup(): config = {"name": "test", "items": [{"id": "col1"}]} processor = ScaleProcessor(config) df = pd.DataFrame({"col1": [1, 2, 3]}) mask = processor.get_subgroup_mask(df) assert mask.all() assert len(mask) == 3 @staticmethod def get_subgroup_mask_returns_all_true_when_subgroup_is_all(): config = {"name": "test", "items": [{"id": "col1"}]} processor = ScaleProcessor(config, "all") df = pd.DataFrame({"col1": [1, 2, 3]}) mask = processor.get_subgroup_mask(df) assert mask.all() @staticmethod def get_subgroup_mask_filters_by_subgroup_column(): config = {"name": "test", "items": [{"id": "col1"}]} processor = ScaleProcessor(config, "group") df = pd.DataFrame({"col1": [1, 2, 3], "group": [True, False, True]}) mask = processor.get_subgroup_mask(df) assert mask.iloc[0] is True assert mask.iloc[1] is False assert mask.iloc[2] is True @staticmethod def get_subgroup_mask_returns_all_true_when_subgroup_column_missing(): config = {"name": "test", "items": [{"id": "col1"}]} processor = ScaleProcessor(config, "nonexistent") df = pd.DataFrame({"col1": [1, 2, 3]}) mask = processor.get_subgroup_mask(df) assert mask.all() @staticmethod def process_calculates_mean_by_default(): config = {"name": "test", "items": [{"id": "q1"}, {"id": "q2"}]} processor = ScaleProcessor(config) df = pd.DataFrame({"q1": [2, 4, 6], "q2": [4, 6, 8]}) result = processor.process(df) assert result.columns[0] == "test" assert result["test"].iloc[0] == 3.0 assert result["test"].iloc[1] == 5.0 assert result["test"].iloc[2] == 7.0 @staticmethod def process_calculates_sum_when_specified(): config = { "name": "sum_scale", "items": [{"id": "q1"}, {"id": "q2"}], "calculation": "sum", } processor = ScaleProcessor(config) df = pd.DataFrame({"q1": [1, 2, 3], "q2": [4, 5, 6]}) result = processor.process(df) assert result["sum_scale"].iloc[0] == 5 assert result["sum_scale"].iloc[1] == 7 assert result["sum_scale"].iloc[2] == 9 @staticmethod def process_handles_item_inversion(): config = { "name": "inverted", "items": [{"id": "q1", "inverse": True}, {"id": "q2"}], "score_range": (1, 5), } processor = ScaleProcessor(config) df = pd.DataFrame({"q1": [1, 5], "q2": [3, 3]}) result = processor.process(df) assert result["inverted"].iloc[0] == 4.0 # (5+1-1+3)/2 = 4 assert result["inverted"].iloc[1] == 2.0 # (5+1-5+3)/2 = 2 @staticmethod def process_handles_categorical_calculation_single_item(): config = { "name": "category", "items": [{"id": "q1"}], "calculation": "categorical", "response_options": {"1": "Option A", "2": "Option B", "3": "Option C"}, } processor = ScaleProcessor(config) df = pd.DataFrame({"q1": [1, 2, 3, 1]}) result = processor.process(df) assert result["category"].iloc[0] == "Option A" assert result["category"].iloc[1] == "Option B" assert result["category"].iloc[2] == "Option C" assert result["category"].iloc[3] == "Option A" @staticmethod def process_raises_error_for_categorical_with_multiple_items(): config = { "name": "category", "items": [{"id": "q1"}, {"id": "q2"}], "calculation": "categorical", } processor = ScaleProcessor(config) df = pd.DataFrame({"q1": [1, 2], "q2": [1, 2]}) with pytest.raises( ValueError, match="calculation 'categorical' is only for single-item scales" ): processor.process(df) @staticmethod def process_handles_categorical_with_open_ended_other_option(): config = { "name": "category", "items": [{"id": "q1", "open_ended_id": "q1_other"}], "calculation": "categorical", "response_options": {"1": "Option A", "10": "Other"}, } processor = ScaleProcessor(config) df = pd.DataFrame( { "q1": [1, 10, 1, 10], "q1_other": ["", "Custom text", "", "Another custom"], } ) result = processor.process(df) assert result["category"].iloc[0] == "Option A" assert result["category"].iloc[1] == "Other" assert pd.isna(result["category_other_text"].iloc[0]) assert result["category_other_text"].iloc[1] == "Custom text" assert pd.isna(result["category_other_text"].iloc[2]) assert result["category_other_text"].iloc[3] == "Another custom" @staticmethod def process_handles_ordinal_calculation_single_item(): config = { "name": "ordinal", "items": [{"id": "q1"}], "calculation": "ordinal", "response_options": {1: "Low", 2: "Medium", 3: "High"}, } processor = ScaleProcessor(config) df = pd.DataFrame({"q1": [1, 2, 3, 2]}) result = processor.process(df) assert result["ordinal"].iloc[0] == "Low" assert result["ordinal"].iloc[1] == "Medium" assert result["ordinal"].iloc[2] == "High" assert result["ordinal"].iloc[3] == "Medium" @staticmethod def process_raises_error_for_ordinal_with_multiple_items(): config = { "name": "ordinal", "items": [{"id": "q1"}, {"id": "q2"}], "calculation": "ordinal", } processor = ScaleProcessor(config) df = pd.DataFrame({"q1": [1, 2], "q2": [1, 2]}) with pytest.raises( ValueError, match="calculation 'ordinal' only allowed with single-item scales", ): processor.process(df) @staticmethod def process_handles_response_calculation_single_item(): config = { "name": "response", "items": [{"id": "q1"}], "calculation": "response", } processor = ScaleProcessor(config) df = pd.DataFrame({"q1": [1.5, 2.7, 3.9]}) result = processor.process(df) assert result["response"].iloc[0] == 1.5 assert result["response"].iloc[1] == 2.7 assert result["response"].iloc[2] == 3.9 @staticmethod def process_raises_error_for_response_with_multiple_items(): config = { "name": "response", "items": [{"id": "q1"}, {"id": "q2"}], "calculation": "response", } processor = ScaleProcessor(config) df = pd.DataFrame({"q1": [1, 2], "q2": [1, 2]}) with pytest.raises( ValueError, match="calculation 'response' can only be used with single-item scales!", ): processor.process(df) @staticmethod def process_handles_sum_correct_calculation(): config = { "name": "correct_sum", "items": [ {"id": "q1", "correct": 2}, {"id": "q2", "correct": 1}, {"id": "q3", "correct": 3}, ], "calculation": "sum_correct", } processor = ScaleProcessor(config) df = pd.DataFrame( { "q1": [2, 1, 2], # correct, wrong, correct "q2": [1, 1, 2], # correct, correct, wrong "q3": [3, 2, 3], # correct, wrong, correct } ) result = processor.process(df) assert result["correct_sum"].iloc[0] == 3 # all correct assert result["correct_sum"].iloc[1] == 1 # one correct assert result["correct_sum"].iloc[2] == 2 # two correct @staticmethod def process_handles_mean_correct_calculation(): config = { "name": "correct_mean", "items": [{"id": "q1", "correct": 1}, {"id": "q2", "correct": 2}], "calculation": "mean_correct", } processor = ScaleProcessor(config) df = pd.DataFrame( { "q1": [1, 1, 2], # correct, correct, wrong "q2": [2, 1, 2], # correct, wrong, correct } ) result = processor.process(df) assert result["correct_mean"].iloc[0] == 1.0 # 2/2 = 1.0 assert result["correct_mean"].iloc[1] == 0.5 # 1/2 = 0.5 assert result["correct_mean"].iloc[2] == 0.5 # 1/2 = 0.5 @staticmethod def process_raises_error_for_unknown_correct_calculation(): config = { "name": "test", "items": [{"id": "q1", "correct": 1}], "calculation": "unknown_correct", } processor = ScaleProcessor(config) df = pd.DataFrame({"q1": [1, 2]}) with pytest.raises( ValueError, match="Unknown calculation for objective items: unknown_correct" ): processor.process(df) @staticmethod def process_raises_error_for_unknown_calculation_type(): config = {"name": "test", "items": [{"id": "q1"}], "calculation": "unknown"} processor = ScaleProcessor(config) df = pd.DataFrame({"q1": [1, 2]}) with pytest.raises(ValueError, match="Unknown calculation: unknown"): processor.process(df) @staticmethod def process_applies_subgroup_filtering(): config = { "name": "filtered", "items": [{"id": "q1"}], "calculation": "response", } processor = ScaleProcessor(config, "group") df = pd.DataFrame({"q1": [10, 20, 30], "group": [True, False, True]}) result = processor.process(df) assert result["filtered"].iloc[0] == 10 assert pd.isna(result["filtered"].iloc[1]) assert result["filtered"].iloc[2] == 30 @staticmethod def process_handles_missing_values_in_mean_calculation(): config = {"name": "with_na", "items": [{"id": "q1"}, {"id": "q2"}]} processor = ScaleProcessor(config) df = pd.DataFrame({"q1": [1, np.nan, 3], "q2": [2, 4, np.nan]}) result = processor.process(df) assert result["with_na"].iloc[0] == 1.5 # (1+2)/2 assert result["with_na"].iloc[1] == 4.0 # only q2 value assert result["with_na"].iloc[2] == 3.0 # only q1 value @staticmethod def process_handles_missing_values_in_categorical_calculation(): config = { "name": "category_na", "items": [{"id": "q1"}], "calculation": "categorical", "response_options": {"1": "Yes", "2": "No"}, } processor = ScaleProcessor(config) df = pd.DataFrame({"q1": [1, np.nan, 2]}) result = processor.process(df) assert result["category_na"].iloc[0] == "Yes" assert pd.isna(result["category_na"].iloc[1]) assert result["category_na"].iloc[2] == "No" @staticmethod def process_uses_custom_output_name(): config = { "name": "original_name", "items": [{"id": "q1"}], "output": "custom_output", } processor = ScaleProcessor(config) df = pd.DataFrame({"q1": [1, 2, 3]}) result = processor.process(df) assert "custom_output" in result.columns assert "original_name" not in result.columns @staticmethod def process_raises_error_for_ordinal_without_response_options_dict(): config = { "name": "ordinal", "items": [{"id": "q1"}], "calculation": "ordinal", "response_options": ["Not a dict"], } processor = ScaleProcessor(config) df = pd.DataFrame({"q1": [1, 2]}) with pytest.raises( ValueError, match="For calculation 'ordinal', response_options must be a dict mapping", ): processor.process(df) @staticmethod def process_raises_error_for_categorical_without_response_options_dict(): config = { "name": "categorical", "items": [{"id": "q1"}], "calculation": "categorical", "response_options": "Not a dict", } processor = ScaleProcessor(config) df = pd.DataFrame({"q1": [1, 2]}) with pytest.raises( ValueError, match="response_options must be a dict for calculation 'categorical'", ): processor.process(df)