import pytest import pandas as pd from unittest.mock import patch, mock_open from src.utils.data_loader import ( load_yaml, DataLoader, load_questionnaire_scales, assemble_wave_info, ) class TestLoadYaml: def test_yaml_file_loads_correctly(self): yaml_content = "key1: value1\nkey2:\n nested: value2" with patch("builtins.open", mock_open(read_data=yaml_content)): result = load_yaml("test.yaml") assert result == {"key1": "value1", "key2": {"nested": "value2"}} def test_yaml_file_not_found_raises_exception(self): with pytest.raises(FileNotFoundError): load_yaml("nonexistent.yaml") def test_yaml_file_with_empty_content(self): yaml_content = "" with patch("builtins.open", mock_open(read_data=yaml_content)): result = load_yaml("test.yaml") assert result is None def test_yaml_file_with_invalid_syntax_raises_exception(self): yaml_content = "invalid: yaml: content: [" with patch("builtins.open", mock_open(read_data=yaml_content)): with pytest.raises(Exception): load_yaml("test.yaml") class TestDataLoader: def test_dataloader_initializes_with_all_waves_when_none_specified(self): settings = { "data_directory": "/data", "data_file_for_each_wave": {1: "wave1.csv", 2: "wave2.csv"}, "config_file_for_each_wave": {1: "config1.yaml", 2: "config2.yaml"}, } loader = DataLoader(settings) assert loader.waves_to_process == settings["data_file_for_each_wave"].keys() def test_dataloader_initializes_with_specified_waves(self): settings = { "data_directory": "/data", "data_file_for_each_wave": {1: "wave1.csv", 2: "wave2.csv", 3: "wave3.csv"}, "config_file_for_each_wave": { 1: "config1.yaml", 2: "config2.yaml", 3: "config3.yaml", }, } loader = DataLoader(settings, [1, 3]) assert loader.waves_to_process == [1, 3] def test_dataloader_stores_settings_correctly(self): settings = { "data_directory": "/test/data", "data_file_for_each_wave": {1: "test.csv"}, "config_file_for_each_wave": {1: "test.yaml"}, } loader = DataLoader(settings) assert loader.data_directory == "/test/data" assert loader.data_file_for_each_wave == {1: "test.csv"} assert loader.config_file_for_each_wave == {1: "test.yaml"} @patch("pandas.read_csv") def test_dataloader_loads_survey_data_for_specified_waves(self, mock_read_csv): import os mock_df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]}) mock_read_csv.return_value = mock_df settings = { "data_directory": "/data", "data_file_for_each_wave": {1: "wave1.csv", 2: "wave2.csv"}, "config_file_for_each_wave": {1: "config1.yaml", 2: "config2.yaml"}, } loader = DataLoader(settings, [1]) result = loader.load_all_survey_data() assert 1 in result assert "data" in result[1] assert "config_path" in result[1] assert result[1]["config_path"] == "config1.yaml" expected_path = os.path.join("/data", "wave1.csv") mock_read_csv.assert_called_once_with(expected_path) @patch("pandas.read_csv") def test_dataloader_loads_multiple_waves(self, mock_read_csv): mock_df1 = pd.DataFrame({"wave1_col": [1, 2]}) mock_df2 = pd.DataFrame({"wave2_col": [3, 4]}) mock_read_csv.side_effect = [mock_df1, mock_df2] settings = { "data_directory": "/data", "data_file_for_each_wave": {1: "wave1.csv", 2: "wave2.csv"}, "config_file_for_each_wave": {1: "config1.yaml", 2: "config2.yaml"}, } loader = DataLoader(settings, [1, 2]) result = loader.load_all_survey_data() assert len(result) == 2 assert 1 in result and 2 in result assert result[1]["config_path"] == "config1.yaml" assert result[2]["config_path"] == "config2.yaml" @patch("pandas.read_csv") def test_dataloader_handles_csv_read_error(self, mock_read_csv): mock_read_csv.side_effect = FileNotFoundError("CSV file not found") settings = { "data_directory": "/data", "data_file_for_each_wave": {1: "nonexistent.csv"}, "config_file_for_each_wave": {1: "config1.yaml"}, } loader = DataLoader(settings, [1]) with pytest.raises(FileNotFoundError): loader.load_all_survey_data() class TestLoadQuestionnaireScales: def test_questionnaire_scales_loads_from_valid_yaml(self): yaml_content = """ scales: - name: scale1 items: [item1, item2] - name: scale2 items: [item3, item4] """ with patch("builtins.open", mock_open(read_data=yaml_content)): with patch("yaml.safe_load") as mock_yaml: mock_yaml.return_value = { "scales": [ {"name": "scale1", "items": ["item1", "item2"]}, {"name": "scale2", "items": ["item3", "item4"]}, ] } result = load_questionnaire_scales("test.yaml", questionnaire_name="q1") assert "scale1" in result assert "scale2" in result assert result["scale1"]["items"] == ["item1", "item2"] def test_questionnaire_scales_handles_empty_scales_list(self): yaml_content = """ scales: [] """ with patch("builtins.open", mock_open(read_data=yaml_content)): with patch("yaml.safe_load") as mock_yaml: mock_yaml.return_value = {"scales": []} result = load_questionnaire_scales("test.yaml", questionnaire_name="q1") assert result == {} def test_questionnaire_scales_loads_complex_structure(self): with patch("builtins.open", mock_open()): with patch("yaml.safe_load") as mock_yaml: mock_yaml.return_value = { "questionnaire": "test_questionnaire", "scales": [ { "name": "choice_favorite_ai_user", "label": "Choice of favorite AI system", "calculation": "categorical", "response_options": {"1": "ChatGPT", "2": "Claude"}, "output": "choice_favorite_ai_user", } ], } result = load_questionnaire_scales("test.yaml", questionnaire_name="q1") assert "choice_favorite_ai_user" in result assert result["choice_favorite_ai_user"]["calculation"] == "categorical" def test_questionnaire_scales_handles_missing_scales_key(self): with patch("builtins.open", mock_open()): with patch("yaml.safe_load") as mock_yaml: mock_yaml.return_value = {"questionnaire": "test"} with pytest.raises(KeyError): load_questionnaire_scales("test.yaml", questionnaire_name="q1") class TestAssembleWaveInfo: @patch("src.utils.data_loader.load_yaml") @patch("src.utils.data_loader.load_questionnaire_scales") @patch("os.path.isabs") @patch("os.path.normpath") @patch("os.path.join") def test_wave_info_assembles_with_absolute_questionnaire_paths( self, mock_join, mock_normpath, mock_isabs, mock_load_scales, mock_load_yaml ): mock_isabs.return_value = True mock_load_yaml.return_value = { "questionnaires": [{"name": "q1", "path": "/absolute/path/q1.yaml"}] } mock_load_scales.return_value = { "scale1": {"items": ["item1", "item2"], "questionnaire": "q1"}, "scale2": {"items": ["item3", "item4"], "questionnaire": "q1"}, } settings = {"questionnaire_directory": "/base"} result = assemble_wave_info("wave_config.yaml", settings) mock_load_scales.assert_called_once_with("/absolute/path/q1.yaml", "q1") assert "scale1" in result[0] @patch("src.utils.data_loader.load_yaml") @patch("src.utils.data_loader.load_questionnaire_scales") @patch("os.path.isabs") @patch("os.path.normpath") @patch("os.path.join") def test_wave_info_assembles_with_relative_questionnaire_paths( self, mock_join, mock_normpath, mock_isabs, mock_load_scales, mock_load_yaml ): mock_isabs.return_value = False mock_join.return_value = "/base/relative/q1.yaml" mock_normpath.return_value = "/base/relative/q1.yaml" mock_load_yaml.return_value = { "questionnaires": [{"name": "q1", "path": "relative/q1.yaml"}] } mock_load_scales.return_value = {"scale1": {"questionnaire": "q1"}} settings = {"questionnaire_directory": "/base"} result = assemble_wave_info("wave_config.yaml", settings) assert "scale1" in result[0] mock_join.assert_called_once_with("/base", "relative/q1.yaml") mock_load_scales.assert_called_once_with("/base/relative/q1.yaml", "q1") @patch("src.utils.data_loader.load_yaml") @patch("src.utils.data_loader.load_questionnaire_scales") @patch("logging.info") def test_wave_info_assigns_all_subgroup_to_scales_without_subgroup( self, mock_log, mock_load_scales, mock_load_yaml ): mock_load_yaml.return_value = { "questionnaires": [{"name": "q1", "path": "/path/q1.yaml"}] } mock_load_scales.return_value = {"scale1": {"questionnaire": "q1"}} settings = {"questionnaire_directory": "/base"} result = assemble_wave_info("wave_config.yaml", settings) assert result[1]["scale1"] == "all" mock_log.assert_called_once() @patch("src.utils.data_loader.load_yaml") @patch("src.utils.data_loader.load_questionnaire_scales") def test_wave_info_handles_subgroup_scales_by_questionnaire_name( self, mock_load_scales, mock_load_yaml ): mock_load_yaml.return_value = { "questionnaires": [{"name": "q1", "path": "/path/q1.yaml"}], "subgroup_scales": {"q1": "group1"}, } mock_load_scales.return_value = { "scale1": {"questionnaire": "q1"}, "scale2": {"questionnaire": "q1"}, } settings = {"questionnaire_directory": "/base"} result = assemble_wave_info("wave_config.yaml", settings) assert result[1]["scale1"] == "group1" assert result[1]["scale2"] == "group1" @patch("src.utils.data_loader.load_yaml") @patch("src.utils.data_loader.load_questionnaire_scales") def test_wave_info_handles_subgroup_scales_by_scale_name( self, mock_load_scales, mock_load_yaml ): mock_load_yaml.return_value = { "questionnaires": [{"name": "q1", "path": "/path/q1.yaml"}], "subgroup_scales": {"scale1": "specific_group"}, } mock_load_scales.return_value = { "scale1": {"questionnaire": "q1"}, "scale2": {"questionnaire": "q1"}, } settings = {"questionnaire_directory": "/base"} result = assemble_wave_info("wave_config.yaml", settings) assert result[1]["scale1"] == "specific_group" @patch("src.utils.data_loader.load_yaml") @patch("src.utils.data_loader.load_questionnaire_scales") def test_wave_info_raises_error_for_invalid_subgroup_entry( self, mock_load_scales, mock_load_yaml ): mock_load_yaml.return_value = { "questionnaires": [{"name": "q1", "path": "/path/q1.yaml"}], "subgroup_scales": {"nonexistent": "group1"}, } mock_load_scales.return_value = {"scale1": {"questionnaire": "q1"}} settings = {"questionnaire_directory": "/base"} with pytest.raises( ValueError, match="Entry 'nonexistent' in subgroup_scales is not a loaded scale or questionnaire name", ): assemble_wave_info("wave_config.yaml", settings) @patch("src.utils.data_loader.load_yaml") @patch("src.utils.data_loader.load_questionnaire_scales") def test_wave_info_returns_composite_scales_when_present( self, mock_load_scales, mock_load_yaml ): mock_load_yaml.return_value = { "questionnaires": [{"name": "q1", "path": "/path/q1.yaml"}], "composite_scales": {"composite1": {"items": ["scale1", "scale2"]}}, } mock_load_scales.return_value = {"scale1": {"questionnaire": "q1"}} settings = {"questionnaire_directory": "/base"} result = assemble_wave_info("wave_config.yaml", settings) assert "composite1" in result[3] assert result[3]["composite1"]["items"] == ["scale1", "scale2"] @patch("src.utils.data_loader.load_yaml") @patch("src.utils.data_loader.load_questionnaire_scales") def test_wave_info_returns_empty_composite_scales_when_absent( self, mock_load_scales, mock_load_yaml ): mock_load_yaml.return_value = { "questionnaires": [{"name": "q1", "path": "/path/q1.yaml"}] } mock_load_scales.return_value = {"scale1": {"questionnaire": "q1"}} settings = {"questionnaire_directory": "/base"} result = assemble_wave_info("wave_config.yaml", settings) assert result[3] == {} @patch("src.utils.data_loader.load_yaml") @patch("src.utils.data_loader.load_questionnaire_scales") def test_wave_info_handles_multiple_questionnaires( self, mock_load_scales, mock_load_yaml ): mock_load_yaml.return_value = { "questionnaires": [ {"name": "q1", "path": "/path/q1.yaml"}, {"name": "q2", "path": "/path/q2.yaml"}, ] } mock_load_scales.side_effect = [ {"scale1": {"questionnaire": "q1"}}, {"scale2": {"questionnaire": "q2"}}, ] settings = {"questionnaire_directory": "/base"} result = assemble_wave_info("wave_config.yaml", settings) assert "scale1" in result[0] assert "scale2" in result[0] assert len(result[0]) == 2 @patch("src.utils.data_loader.load_yaml") @patch("src.utils.data_loader.load_questionnaire_scales") def test_wave_info_returns_correct_tuple_structure( self, mock_load_scales, mock_load_yaml ): mock_load_yaml.return_value = { "questionnaires": [{"name": "q1", "path": "/path/q1.yaml"}] } mock_load_scales.return_value = {"scale1": {"questionnaire": "q1"}} settings = {"questionnaire_directory": "/base"} result = assemble_wave_info("wave_config.yaml", settings) assert isinstance(result, tuple) assert len(result) == 4 assert isinstance(result[0], dict) # scale_dictionary assert isinstance(result[1], dict) # final_subgroup_scales assert isinstance(result[2], set) # excluded_scales assert isinstance(result[3], dict) # composite_scales