Submission scientific data
This commit is contained in:
@@ -0,0 +1,382 @@
|
||||
import pytest
|
||||
import pandas as pd
|
||||
from unittest.mock import patch, mock_open
|
||||
from src.utils.data_loader import (
|
||||
load_yaml,
|
||||
DataLoader,
|
||||
load_questionnaire_scales,
|
||||
assemble_wave_info,
|
||||
)
|
||||
|
||||
|
||||
class TestLoadYaml:
|
||||
def test_yaml_file_loads_correctly(self):
|
||||
yaml_content = "key1: value1\nkey2:\n nested: value2"
|
||||
with patch("builtins.open", mock_open(read_data=yaml_content)):
|
||||
result = load_yaml("test.yaml")
|
||||
assert result == {"key1": "value1", "key2": {"nested": "value2"}}
|
||||
|
||||
def test_yaml_file_not_found_raises_exception(self):
|
||||
with pytest.raises(FileNotFoundError):
|
||||
load_yaml("nonexistent.yaml")
|
||||
|
||||
def test_yaml_file_with_empty_content(self):
|
||||
yaml_content = ""
|
||||
with patch("builtins.open", mock_open(read_data=yaml_content)):
|
||||
result = load_yaml("test.yaml")
|
||||
assert result is None
|
||||
|
||||
def test_yaml_file_with_invalid_syntax_raises_exception(self):
|
||||
yaml_content = "invalid: yaml: content: ["
|
||||
with patch("builtins.open", mock_open(read_data=yaml_content)):
|
||||
with pytest.raises(Exception):
|
||||
load_yaml("test.yaml")
|
||||
|
||||
|
||||
class TestDataLoader:
|
||||
def test_dataloader_initializes_with_all_waves_when_none_specified(self):
|
||||
settings = {
|
||||
"data_directory": "/data",
|
||||
"data_file_for_each_wave": {1: "wave1.csv", 2: "wave2.csv"},
|
||||
"config_file_for_each_wave": {1: "config1.yaml", 2: "config2.yaml"},
|
||||
}
|
||||
loader = DataLoader(settings)
|
||||
assert loader.waves_to_process == settings["data_file_for_each_wave"].keys()
|
||||
|
||||
def test_dataloader_initializes_with_specified_waves(self):
|
||||
settings = {
|
||||
"data_directory": "/data",
|
||||
"data_file_for_each_wave": {1: "wave1.csv", 2: "wave2.csv", 3: "wave3.csv"},
|
||||
"config_file_for_each_wave": {
|
||||
1: "config1.yaml",
|
||||
2: "config2.yaml",
|
||||
3: "config3.yaml",
|
||||
},
|
||||
}
|
||||
loader = DataLoader(settings, [1, 3])
|
||||
assert loader.waves_to_process == [1, 3]
|
||||
|
||||
def test_dataloader_stores_settings_correctly(self):
|
||||
settings = {
|
||||
"data_directory": "/test/data",
|
||||
"data_file_for_each_wave": {1: "test.csv"},
|
||||
"config_file_for_each_wave": {1: "test.yaml"},
|
||||
}
|
||||
loader = DataLoader(settings)
|
||||
assert loader.data_directory == "/test/data"
|
||||
assert loader.data_file_for_each_wave == {1: "test.csv"}
|
||||
assert loader.config_file_for_each_wave == {1: "test.yaml"}
|
||||
|
||||
@patch("pandas.read_csv")
|
||||
def test_dataloader_loads_survey_data_for_specified_waves(self, mock_read_csv):
|
||||
import os
|
||||
|
||||
mock_df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
|
||||
mock_read_csv.return_value = mock_df
|
||||
|
||||
settings = {
|
||||
"data_directory": "/data",
|
||||
"data_file_for_each_wave": {1: "wave1.csv", 2: "wave2.csv"},
|
||||
"config_file_for_each_wave": {1: "config1.yaml", 2: "config2.yaml"},
|
||||
}
|
||||
loader = DataLoader(settings, [1])
|
||||
result = loader.load_all_survey_data()
|
||||
|
||||
assert 1 in result
|
||||
assert "data" in result[1]
|
||||
assert "config_path" in result[1]
|
||||
assert result[1]["config_path"] == "config1.yaml"
|
||||
|
||||
expected_path = os.path.join("/data", "wave1.csv")
|
||||
mock_read_csv.assert_called_once_with(expected_path)
|
||||
|
||||
@patch("pandas.read_csv")
|
||||
def test_dataloader_loads_multiple_waves(self, mock_read_csv):
|
||||
mock_df1 = pd.DataFrame({"wave1_col": [1, 2]})
|
||||
mock_df2 = pd.DataFrame({"wave2_col": [3, 4]})
|
||||
mock_read_csv.side_effect = [mock_df1, mock_df2]
|
||||
|
||||
settings = {
|
||||
"data_directory": "/data",
|
||||
"data_file_for_each_wave": {1: "wave1.csv", 2: "wave2.csv"},
|
||||
"config_file_for_each_wave": {1: "config1.yaml", 2: "config2.yaml"},
|
||||
}
|
||||
loader = DataLoader(settings, [1, 2])
|
||||
result = loader.load_all_survey_data()
|
||||
|
||||
assert len(result) == 2
|
||||
assert 1 in result and 2 in result
|
||||
assert result[1]["config_path"] == "config1.yaml"
|
||||
assert result[2]["config_path"] == "config2.yaml"
|
||||
|
||||
@patch("pandas.read_csv")
|
||||
def test_dataloader_handles_csv_read_error(self, mock_read_csv):
|
||||
mock_read_csv.side_effect = FileNotFoundError("CSV file not found")
|
||||
|
||||
settings = {
|
||||
"data_directory": "/data",
|
||||
"data_file_for_each_wave": {1: "nonexistent.csv"},
|
||||
"config_file_for_each_wave": {1: "config1.yaml"},
|
||||
}
|
||||
loader = DataLoader(settings, [1])
|
||||
|
||||
with pytest.raises(FileNotFoundError):
|
||||
loader.load_all_survey_data()
|
||||
|
||||
|
||||
class TestLoadQuestionnaireScales:
|
||||
def test_questionnaire_scales_loads_from_valid_yaml(self):
|
||||
yaml_content = """
|
||||
scales:
|
||||
- name: scale1
|
||||
items: [item1, item2]
|
||||
- name: scale2
|
||||
items: [item3, item4]
|
||||
"""
|
||||
with patch("builtins.open", mock_open(read_data=yaml_content)):
|
||||
with patch("yaml.safe_load") as mock_yaml:
|
||||
mock_yaml.return_value = {
|
||||
"scales": [
|
||||
{"name": "scale1", "items": ["item1", "item2"]},
|
||||
{"name": "scale2", "items": ["item3", "item4"]},
|
||||
]
|
||||
}
|
||||
result = load_questionnaire_scales("test.yaml", questionnaire_name="q1")
|
||||
assert "scale1" in result
|
||||
assert "scale2" in result
|
||||
assert result["scale1"]["items"] == ["item1", "item2"]
|
||||
|
||||
def test_questionnaire_scales_handles_empty_scales_list(self):
|
||||
yaml_content = """
|
||||
scales: []
|
||||
"""
|
||||
with patch("builtins.open", mock_open(read_data=yaml_content)):
|
||||
with patch("yaml.safe_load") as mock_yaml:
|
||||
mock_yaml.return_value = {"scales": []}
|
||||
result = load_questionnaire_scales("test.yaml", questionnaire_name="q1")
|
||||
assert result == {}
|
||||
|
||||
def test_questionnaire_scales_loads_complex_structure(self):
|
||||
with patch("builtins.open", mock_open()):
|
||||
with patch("yaml.safe_load") as mock_yaml:
|
||||
mock_yaml.return_value = {
|
||||
"questionnaire": "test_questionnaire",
|
||||
"scales": [
|
||||
{
|
||||
"name": "choice_favorite_ai_user",
|
||||
"label": "Choice of favorite AI system",
|
||||
"calculation": "categorical",
|
||||
"response_options": {"1": "ChatGPT", "2": "Claude"},
|
||||
"output": "choice_favorite_ai_user",
|
||||
}
|
||||
],
|
||||
}
|
||||
result = load_questionnaire_scales("test.yaml", questionnaire_name="q1")
|
||||
assert "choice_favorite_ai_user" in result
|
||||
assert result["choice_favorite_ai_user"]["calculation"] == "categorical"
|
||||
|
||||
def test_questionnaire_scales_handles_missing_scales_key(self):
|
||||
with patch("builtins.open", mock_open()):
|
||||
with patch("yaml.safe_load") as mock_yaml:
|
||||
mock_yaml.return_value = {"questionnaire": "test"}
|
||||
with pytest.raises(KeyError):
|
||||
load_questionnaire_scales("test.yaml", questionnaire_name="q1")
|
||||
|
||||
|
||||
class TestAssembleWaveInfo:
|
||||
@patch("src.utils.data_loader.load_yaml")
|
||||
@patch("src.utils.data_loader.load_questionnaire_scales")
|
||||
@patch("os.path.isabs")
|
||||
@patch("os.path.normpath")
|
||||
@patch("os.path.join")
|
||||
def test_wave_info_assembles_with_absolute_questionnaire_paths(
|
||||
self, mock_join, mock_normpath, mock_isabs, mock_load_scales, mock_load_yaml
|
||||
):
|
||||
mock_isabs.return_value = True
|
||||
mock_load_yaml.return_value = {
|
||||
"questionnaires": [{"name": "q1", "path": "/absolute/path/q1.yaml"}]
|
||||
}
|
||||
mock_load_scales.return_value = {
|
||||
"scale1": {"items": ["item1", "item2"], "questionnaire": "q1"},
|
||||
"scale2": {"items": ["item3", "item4"], "questionnaire": "q1"},
|
||||
}
|
||||
|
||||
settings = {"questionnaire_directory": "/base"}
|
||||
result = assemble_wave_info("wave_config.yaml", settings)
|
||||
|
||||
mock_load_scales.assert_called_once_with("/absolute/path/q1.yaml", "q1")
|
||||
assert "scale1" in result[0]
|
||||
|
||||
@patch("src.utils.data_loader.load_yaml")
|
||||
@patch("src.utils.data_loader.load_questionnaire_scales")
|
||||
@patch("os.path.isabs")
|
||||
@patch("os.path.normpath")
|
||||
@patch("os.path.join")
|
||||
def test_wave_info_assembles_with_relative_questionnaire_paths(
|
||||
self, mock_join, mock_normpath, mock_isabs, mock_load_scales, mock_load_yaml
|
||||
):
|
||||
mock_isabs.return_value = False
|
||||
mock_join.return_value = "/base/relative/q1.yaml"
|
||||
mock_normpath.return_value = "/base/relative/q1.yaml"
|
||||
mock_load_yaml.return_value = {
|
||||
"questionnaires": [{"name": "q1", "path": "relative/q1.yaml"}]
|
||||
}
|
||||
mock_load_scales.return_value = {"scale1": {"questionnaire": "q1"}}
|
||||
|
||||
settings = {"questionnaire_directory": "/base"}
|
||||
result = assemble_wave_info("wave_config.yaml", settings)
|
||||
|
||||
assert "scale1" in result[0]
|
||||
|
||||
mock_join.assert_called_once_with("/base", "relative/q1.yaml")
|
||||
mock_load_scales.assert_called_once_with("/base/relative/q1.yaml", "q1")
|
||||
|
||||
@patch("src.utils.data_loader.load_yaml")
|
||||
@patch("src.utils.data_loader.load_questionnaire_scales")
|
||||
@patch("logging.info")
|
||||
def test_wave_info_assigns_all_subgroup_to_scales_without_subgroup(
|
||||
self, mock_log, mock_load_scales, mock_load_yaml
|
||||
):
|
||||
mock_load_yaml.return_value = {
|
||||
"questionnaires": [{"name": "q1", "path": "/path/q1.yaml"}]
|
||||
}
|
||||
mock_load_scales.return_value = {"scale1": {"questionnaire": "q1"}}
|
||||
|
||||
settings = {"questionnaire_directory": "/base"}
|
||||
result = assemble_wave_info("wave_config.yaml", settings)
|
||||
|
||||
assert result[1]["scale1"] == "all"
|
||||
mock_log.assert_called_once()
|
||||
|
||||
@patch("src.utils.data_loader.load_yaml")
|
||||
@patch("src.utils.data_loader.load_questionnaire_scales")
|
||||
def test_wave_info_handles_subgroup_scales_by_questionnaire_name(
|
||||
self, mock_load_scales, mock_load_yaml
|
||||
):
|
||||
mock_load_yaml.return_value = {
|
||||
"questionnaires": [{"name": "q1", "path": "/path/q1.yaml"}],
|
||||
"subgroup_scales": {"q1": "group1"},
|
||||
}
|
||||
mock_load_scales.return_value = {
|
||||
"scale1": {"questionnaire": "q1"},
|
||||
"scale2": {"questionnaire": "q1"},
|
||||
}
|
||||
|
||||
settings = {"questionnaire_directory": "/base"}
|
||||
result = assemble_wave_info("wave_config.yaml", settings)
|
||||
|
||||
assert result[1]["scale1"] == "group1"
|
||||
assert result[1]["scale2"] == "group1"
|
||||
|
||||
@patch("src.utils.data_loader.load_yaml")
|
||||
@patch("src.utils.data_loader.load_questionnaire_scales")
|
||||
def test_wave_info_handles_subgroup_scales_by_scale_name(
|
||||
self, mock_load_scales, mock_load_yaml
|
||||
):
|
||||
mock_load_yaml.return_value = {
|
||||
"questionnaires": [{"name": "q1", "path": "/path/q1.yaml"}],
|
||||
"subgroup_scales": {"scale1": "specific_group"},
|
||||
}
|
||||
mock_load_scales.return_value = {
|
||||
"scale1": {"questionnaire": "q1"},
|
||||
"scale2": {"questionnaire": "q1"},
|
||||
}
|
||||
|
||||
settings = {"questionnaire_directory": "/base"}
|
||||
result = assemble_wave_info("wave_config.yaml", settings)
|
||||
|
||||
assert result[1]["scale1"] == "specific_group"
|
||||
|
||||
@patch("src.utils.data_loader.load_yaml")
|
||||
@patch("src.utils.data_loader.load_questionnaire_scales")
|
||||
def test_wave_info_raises_error_for_invalid_subgroup_entry(
|
||||
self, mock_load_scales, mock_load_yaml
|
||||
):
|
||||
mock_load_yaml.return_value = {
|
||||
"questionnaires": [{"name": "q1", "path": "/path/q1.yaml"}],
|
||||
"subgroup_scales": {"nonexistent": "group1"},
|
||||
}
|
||||
mock_load_scales.return_value = {"scale1": {"questionnaire": "q1"}}
|
||||
|
||||
settings = {"questionnaire_directory": "/base"}
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Entry 'nonexistent' in subgroup_scales is not a loaded scale or questionnaire name",
|
||||
):
|
||||
assemble_wave_info("wave_config.yaml", settings)
|
||||
|
||||
@patch("src.utils.data_loader.load_yaml")
|
||||
@patch("src.utils.data_loader.load_questionnaire_scales")
|
||||
def test_wave_info_returns_composite_scales_when_present(
|
||||
self, mock_load_scales, mock_load_yaml
|
||||
):
|
||||
mock_load_yaml.return_value = {
|
||||
"questionnaires": [{"name": "q1", "path": "/path/q1.yaml"}],
|
||||
"composite_scales": {"composite1": {"items": ["scale1", "scale2"]}},
|
||||
}
|
||||
mock_load_scales.return_value = {"scale1": {"questionnaire": "q1"}}
|
||||
|
||||
settings = {"questionnaire_directory": "/base"}
|
||||
result = assemble_wave_info("wave_config.yaml", settings)
|
||||
|
||||
assert "composite1" in result[3]
|
||||
assert result[3]["composite1"]["items"] == ["scale1", "scale2"]
|
||||
|
||||
@patch("src.utils.data_loader.load_yaml")
|
||||
@patch("src.utils.data_loader.load_questionnaire_scales")
|
||||
def test_wave_info_returns_empty_composite_scales_when_absent(
|
||||
self, mock_load_scales, mock_load_yaml
|
||||
):
|
||||
mock_load_yaml.return_value = {
|
||||
"questionnaires": [{"name": "q1", "path": "/path/q1.yaml"}]
|
||||
}
|
||||
mock_load_scales.return_value = {"scale1": {"questionnaire": "q1"}}
|
||||
|
||||
settings = {"questionnaire_directory": "/base"}
|
||||
result = assemble_wave_info("wave_config.yaml", settings)
|
||||
|
||||
assert result[3] == {}
|
||||
|
||||
@patch("src.utils.data_loader.load_yaml")
|
||||
@patch("src.utils.data_loader.load_questionnaire_scales")
|
||||
def test_wave_info_handles_multiple_questionnaires(
|
||||
self, mock_load_scales, mock_load_yaml
|
||||
):
|
||||
mock_load_yaml.return_value = {
|
||||
"questionnaires": [
|
||||
{"name": "q1", "path": "/path/q1.yaml"},
|
||||
{"name": "q2", "path": "/path/q2.yaml"},
|
||||
]
|
||||
}
|
||||
mock_load_scales.side_effect = [
|
||||
{"scale1": {"questionnaire": "q1"}},
|
||||
{"scale2": {"questionnaire": "q2"}},
|
||||
]
|
||||
|
||||
settings = {"questionnaire_directory": "/base"}
|
||||
result = assemble_wave_info("wave_config.yaml", settings)
|
||||
|
||||
assert "scale1" in result[0]
|
||||
assert "scale2" in result[0]
|
||||
assert len(result[0]) == 2
|
||||
|
||||
@patch("src.utils.data_loader.load_yaml")
|
||||
@patch("src.utils.data_loader.load_questionnaire_scales")
|
||||
def test_wave_info_returns_correct_tuple_structure(
|
||||
self, mock_load_scales, mock_load_yaml
|
||||
):
|
||||
mock_load_yaml.return_value = {
|
||||
"questionnaires": [{"name": "q1", "path": "/path/q1.yaml"}]
|
||||
}
|
||||
mock_load_scales.return_value = {"scale1": {"questionnaire": "q1"}}
|
||||
|
||||
settings = {"questionnaire_directory": "/base"}
|
||||
result = assemble_wave_info("wave_config.yaml", settings)
|
||||
|
||||
assert isinstance(result, tuple)
|
||||
assert len(result) == 4
|
||||
assert isinstance(result[0], dict) # scale_dictionary
|
||||
assert isinstance(result[1], dict) # final_subgroup_scales
|
||||
assert isinstance(result[2], set) # excluded_scales
|
||||
assert isinstance(result[3], dict) # composite_scales
|
||||
@@ -0,0 +1,266 @@
|
||||
import pytest
|
||||
import sqlite3
|
||||
import pandas as pd
|
||||
import tempfile
|
||||
import os
|
||||
from unittest.mock import patch, MagicMock
|
||||
from src.utils.database_populator import populate_database
|
||||
|
||||
|
||||
class TestPopulateDatabase:
|
||||
@staticmethod
|
||||
def single_wave_data_creates_correct_table():
|
||||
test_data = {1: pd.DataFrame({"col1": [1, 2], "col2": ["a", "b"]})}
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) as tmp_file:
|
||||
db_path = tmp_file.name
|
||||
|
||||
try:
|
||||
populate_database(test_data, db_path)
|
||||
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
|
||||
tables = cursor.fetchall()
|
||||
|
||||
assert ("wave1",) in tables
|
||||
|
||||
cursor.execute("SELECT * FROM wave1")
|
||||
rows = cursor.fetchall()
|
||||
assert len(rows) == 2
|
||||
assert rows[0] == (1, "a")
|
||||
assert rows[1] == (2, "b")
|
||||
|
||||
conn.close()
|
||||
finally:
|
||||
os.unlink(db_path)
|
||||
|
||||
@staticmethod
|
||||
def multiple_waves_create_separate_tables():
|
||||
test_data = {
|
||||
1: pd.DataFrame({"wave1_col": [1, 2]}),
|
||||
2: pd.DataFrame({"wave2_col": [3, 4]}),
|
||||
3: pd.DataFrame({"wave3_col": [5, 6]}),
|
||||
}
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) as tmp_file:
|
||||
db_path = tmp_file.name
|
||||
|
||||
try:
|
||||
populate_database(test_data, db_path)
|
||||
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
|
||||
tables = [table[0] for table in cursor.fetchall()]
|
||||
|
||||
assert "wave1" in tables
|
||||
assert "wave2" in tables
|
||||
assert "wave3" in tables
|
||||
assert len(tables) == 3
|
||||
|
||||
conn.close()
|
||||
finally:
|
||||
os.unlink(db_path)
|
||||
|
||||
@staticmethod
|
||||
def empty_dataframe_creates_table_with_no_rows():
|
||||
test_data = {1: pd.DataFrame({"empty_col": []})}
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) as tmp_file:
|
||||
db_path = tmp_file.name
|
||||
|
||||
try:
|
||||
populate_database(test_data, db_path)
|
||||
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT COUNT(*) FROM wave1")
|
||||
row_count = cursor.fetchone()[0]
|
||||
|
||||
assert row_count == 0
|
||||
|
||||
cursor.execute("PRAGMA table_info(wave1)")
|
||||
columns = cursor.fetchall()
|
||||
assert len(columns) == 1
|
||||
assert columns[0][1] == "empty_col"
|
||||
|
||||
conn.close()
|
||||
finally:
|
||||
os.unlink(db_path)
|
||||
|
||||
@staticmethod
|
||||
def empty_dictionary_creates_no_tables():
|
||||
test_data = {}
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) as tmp_file:
|
||||
db_path = tmp_file.name
|
||||
|
||||
try:
|
||||
populate_database(test_data, db_path)
|
||||
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
|
||||
tables = cursor.fetchall()
|
||||
|
||||
assert len(tables) == 0
|
||||
|
||||
conn.close()
|
||||
finally:
|
||||
os.unlink(db_path)
|
||||
|
||||
@staticmethod
|
||||
def existing_database_tables_are_replaced():
|
||||
test_data = {1: pd.DataFrame({"col": [1, 2]})}
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) as tmp_file:
|
||||
db_path = tmp_file.name
|
||||
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("CREATE TABLE wave1 (old_col INTEGER)")
|
||||
cursor.execute("INSERT INTO wave1 VALUES (999)")
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
populate_database(test_data, db_path)
|
||||
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT * FROM wave1")
|
||||
rows = cursor.fetchall()
|
||||
|
||||
assert len(rows) == 2
|
||||
assert rows[0] == (1,)
|
||||
assert rows[1] == (2,)
|
||||
|
||||
cursor.execute("PRAGMA table_info(wave1)")
|
||||
columns = cursor.fetchall()
|
||||
assert len(columns) == 1
|
||||
assert columns[0][1] == "col"
|
||||
|
||||
conn.close()
|
||||
finally:
|
||||
os.unlink(db_path)
|
||||
|
||||
@staticmethod
|
||||
def database_uses_default_path_when_not_specified():
|
||||
test_data = {1: pd.DataFrame({"col": [1]})}
|
||||
default_path = "results/study_results.sqlite"
|
||||
|
||||
with patch("sqlite3.connect") as mock_connect:
|
||||
mock_connection = MagicMock()
|
||||
mock_connect.return_value = mock_connection
|
||||
|
||||
populate_database(test_data)
|
||||
|
||||
mock_connect.assert_called_once_with(default_path)
|
||||
mock_connection.close.assert_called_once()
|
||||
|
||||
@staticmethod
|
||||
def dataframe_with_various_data_types_preserved():
|
||||
test_data = {
|
||||
1: pd.DataFrame(
|
||||
{
|
||||
"int_col": [1, 2],
|
||||
"float_col": [1.5, 2.7],
|
||||
"str_col": ["text1", "text2"],
|
||||
"bool_col": [True, False],
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) as tmp_file:
|
||||
db_path = tmp_file.name
|
||||
|
||||
try:
|
||||
populate_database(test_data, db_path)
|
||||
|
||||
conn = sqlite3.connect(db_path)
|
||||
df_result = pd.read_sql_query("SELECT * FROM wave1", conn)
|
||||
|
||||
assert len(df_result) == 2
|
||||
assert list(df_result.columns) == [
|
||||
"int_col",
|
||||
"float_col",
|
||||
"str_col",
|
||||
"bool_col",
|
||||
]
|
||||
assert df_result["int_col"].iloc[0] == 1
|
||||
assert df_result["str_col"].iloc[1] == "text2"
|
||||
|
||||
conn.close()
|
||||
finally:
|
||||
os.unlink(db_path)
|
||||
|
||||
@patch("sqlite3.connect")
|
||||
def connection_closed_even_when_exception_occurs(self, mock_connect):
|
||||
mock_connection = MagicMock()
|
||||
mock_connect.return_value = mock_connection
|
||||
mock_connection.__enter__ = MagicMock(return_value=mock_connection)
|
||||
mock_connection.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
test_dataframe = pd.DataFrame({"col": [1, 2]})
|
||||
test_dataframe.to_sql = MagicMock(side_effect=Exception("SQL Error"))
|
||||
|
||||
test_data = {1: test_dataframe}
|
||||
|
||||
with pytest.raises(Exception, match="SQL Error"):
|
||||
populate_database(test_data, "test.db")
|
||||
|
||||
mock_connection.close.assert_called_once()
|
||||
|
||||
@staticmethod
|
||||
def wave_numbers_create_correct_table_names():
|
||||
test_data = {
|
||||
10: pd.DataFrame({"col": [1]}),
|
||||
99: pd.DataFrame({"col": [2]}),
|
||||
1: pd.DataFrame({"col": [3]}),
|
||||
}
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) as tmp_file:
|
||||
db_path = tmp_file.name
|
||||
|
||||
try:
|
||||
populate_database(test_data, db_path)
|
||||
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' ORDER BY name"
|
||||
)
|
||||
tables = [table[0] for table in cursor.fetchall()]
|
||||
|
||||
expected_tables = ["wave1", "wave10", "wave99"]
|
||||
assert tables == expected_tables
|
||||
|
||||
conn.close()
|
||||
finally:
|
||||
os.unlink(db_path)
|
||||
|
||||
@staticmethod
|
||||
def dataframe_index_not_stored_in_database():
|
||||
df_with_custom_index = pd.DataFrame({"col": [1, 2]})
|
||||
df_with_custom_index.index = ["row1", "row2"]
|
||||
test_data = {1: df_with_custom_index}
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) as tmp_file:
|
||||
db_path = tmp_file.name
|
||||
|
||||
try:
|
||||
populate_database(test_data, db_path)
|
||||
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("PRAGMA table_info(wave1)")
|
||||
columns = [column[1] for column in cursor.fetchall()]
|
||||
|
||||
assert "col" in columns
|
||||
assert "index" not in columns
|
||||
assert len(columns) == 1
|
||||
|
||||
conn.close()
|
||||
finally:
|
||||
os.unlink(db_path)
|
||||
@@ -0,0 +1,433 @@
|
||||
import pytest
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from src.scale_processor import ScaleProcessor
|
||||
|
||||
|
||||
class TestScaleProcessor:
|
||||
@staticmethod
|
||||
def initializes_with_basic_scale_config():
|
||||
config = {"name": "test_scale", "items": [{"id": "item1"}, {"id": "item2"}]}
|
||||
processor = ScaleProcessor(config)
|
||||
|
||||
assert processor.name == "test_scale"
|
||||
assert processor.items == [{"id": "item1"}, {"id": "item2"}]
|
||||
assert processor.calculation == "mean"
|
||||
assert processor.score_min == 1
|
||||
assert processor.score_max == 5
|
||||
assert processor.output == "test_scale"
|
||||
assert processor.subgroup is None
|
||||
|
||||
@staticmethod
|
||||
def initializes_with_custom_configuration():
|
||||
config = {
|
||||
"name": "custom_scale",
|
||||
"items": [{"id": "q1"}],
|
||||
"calculation": "sum",
|
||||
"score_range": (0, 10),
|
||||
"response_options": {"1": "Yes", "2": "No"},
|
||||
"output": "custom_output",
|
||||
}
|
||||
processor = ScaleProcessor(config, "group1")
|
||||
|
||||
assert processor.calculation == "sum"
|
||||
assert processor.score_min == 0
|
||||
assert processor.score_max == 10
|
||||
assert processor.response_options == {"1": "Yes", "2": "No"}
|
||||
assert processor.output == "custom_output"
|
||||
assert processor.subgroup == "group1"
|
||||
|
||||
@staticmethod
|
||||
def check_items_passes_when_all_columns_present():
|
||||
config = {"name": "test", "items": [{"id": "col1"}, {"id": "col2"}]}
|
||||
processor = ScaleProcessor(config)
|
||||
df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4], "col3": [5, 6]})
|
||||
|
||||
processor.check_items(df)
|
||||
|
||||
@staticmethod
|
||||
def check_items_raises_error_when_columns_missing():
|
||||
config = {"name": "test", "items": [{"id": "col1"}, {"id": "missing"}]}
|
||||
processor = ScaleProcessor(config)
|
||||
df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
|
||||
|
||||
with pytest.raises(
|
||||
ValueError, match="Missing columns in data: \\['missing'\\]"
|
||||
):
|
||||
processor.check_items(df)
|
||||
|
||||
@staticmethod
|
||||
def get_subgroup_mask_returns_all_true_when_no_subgroup():
|
||||
config = {"name": "test", "items": [{"id": "col1"}]}
|
||||
processor = ScaleProcessor(config)
|
||||
df = pd.DataFrame({"col1": [1, 2, 3]})
|
||||
|
||||
mask = processor.get_subgroup_mask(df)
|
||||
|
||||
assert mask.all()
|
||||
assert len(mask) == 3
|
||||
|
||||
@staticmethod
|
||||
def get_subgroup_mask_returns_all_true_when_subgroup_is_all():
|
||||
config = {"name": "test", "items": [{"id": "col1"}]}
|
||||
processor = ScaleProcessor(config, "all")
|
||||
df = pd.DataFrame({"col1": [1, 2, 3]})
|
||||
|
||||
mask = processor.get_subgroup_mask(df)
|
||||
|
||||
assert mask.all()
|
||||
|
||||
@staticmethod
|
||||
def get_subgroup_mask_filters_by_subgroup_column():
|
||||
config = {"name": "test", "items": [{"id": "col1"}]}
|
||||
processor = ScaleProcessor(config, "group")
|
||||
df = pd.DataFrame({"col1": [1, 2, 3], "group": [True, False, True]})
|
||||
|
||||
mask = processor.get_subgroup_mask(df)
|
||||
|
||||
assert mask.iloc[0] is True
|
||||
assert mask.iloc[1] is False
|
||||
assert mask.iloc[2] is True
|
||||
|
||||
@staticmethod
|
||||
def get_subgroup_mask_returns_all_true_when_subgroup_column_missing():
|
||||
config = {"name": "test", "items": [{"id": "col1"}]}
|
||||
processor = ScaleProcessor(config, "nonexistent")
|
||||
df = pd.DataFrame({"col1": [1, 2, 3]})
|
||||
|
||||
mask = processor.get_subgroup_mask(df)
|
||||
|
||||
assert mask.all()
|
||||
|
||||
@staticmethod
|
||||
def process_calculates_mean_by_default():
|
||||
config = {"name": "test", "items": [{"id": "q1"}, {"id": "q2"}]}
|
||||
processor = ScaleProcessor(config)
|
||||
df = pd.DataFrame({"q1": [2, 4, 6], "q2": [4, 6, 8]})
|
||||
|
||||
result = processor.process(df)
|
||||
|
||||
assert result.columns[0] == "test"
|
||||
assert result["test"].iloc[0] == 3.0
|
||||
assert result["test"].iloc[1] == 5.0
|
||||
assert result["test"].iloc[2] == 7.0
|
||||
|
||||
@staticmethod
|
||||
def process_calculates_sum_when_specified():
|
||||
config = {
|
||||
"name": "sum_scale",
|
||||
"items": [{"id": "q1"}, {"id": "q2"}],
|
||||
"calculation": "sum",
|
||||
}
|
||||
processor = ScaleProcessor(config)
|
||||
df = pd.DataFrame({"q1": [1, 2, 3], "q2": [4, 5, 6]})
|
||||
|
||||
result = processor.process(df)
|
||||
|
||||
assert result["sum_scale"].iloc[0] == 5
|
||||
assert result["sum_scale"].iloc[1] == 7
|
||||
assert result["sum_scale"].iloc[2] == 9
|
||||
|
||||
@staticmethod
|
||||
def process_handles_item_inversion():
|
||||
config = {
|
||||
"name": "inverted",
|
||||
"items": [{"id": "q1", "inverse": True}, {"id": "q2"}],
|
||||
"score_range": (1, 5),
|
||||
}
|
||||
processor = ScaleProcessor(config)
|
||||
df = pd.DataFrame({"q1": [1, 5], "q2": [3, 3]})
|
||||
|
||||
result = processor.process(df)
|
||||
|
||||
assert result["inverted"].iloc[0] == 4.0 # (5+1-1+3)/2 = 4
|
||||
assert result["inverted"].iloc[1] == 2.0 # (5+1-5+3)/2 = 2
|
||||
|
||||
@staticmethod
|
||||
def process_handles_categorical_calculation_single_item():
|
||||
config = {
|
||||
"name": "category",
|
||||
"items": [{"id": "q1"}],
|
||||
"calculation": "categorical",
|
||||
"response_options": {"1": "Option A", "2": "Option B", "3": "Option C"},
|
||||
}
|
||||
processor = ScaleProcessor(config)
|
||||
df = pd.DataFrame({"q1": [1, 2, 3, 1]})
|
||||
|
||||
result = processor.process(df)
|
||||
|
||||
assert result["category"].iloc[0] == "Option A"
|
||||
assert result["category"].iloc[1] == "Option B"
|
||||
assert result["category"].iloc[2] == "Option C"
|
||||
assert result["category"].iloc[3] == "Option A"
|
||||
|
||||
@staticmethod
|
||||
def process_raises_error_for_categorical_with_multiple_items():
|
||||
config = {
|
||||
"name": "category",
|
||||
"items": [{"id": "q1"}, {"id": "q2"}],
|
||||
"calculation": "categorical",
|
||||
}
|
||||
processor = ScaleProcessor(config)
|
||||
df = pd.DataFrame({"q1": [1, 2], "q2": [1, 2]})
|
||||
|
||||
with pytest.raises(
|
||||
ValueError, match="calculation 'categorical' is only for single-item scales"
|
||||
):
|
||||
processor.process(df)
|
||||
|
||||
@staticmethod
|
||||
def process_handles_categorical_with_open_ended_other_option():
|
||||
config = {
|
||||
"name": "category",
|
||||
"items": [{"id": "q1", "open_ended_id": "q1_other"}],
|
||||
"calculation": "categorical",
|
||||
"response_options": {"1": "Option A", "10": "Other"},
|
||||
}
|
||||
processor = ScaleProcessor(config)
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"q1": [1, 10, 1, 10],
|
||||
"q1_other": ["", "Custom text", "", "Another custom"],
|
||||
}
|
||||
)
|
||||
|
||||
result = processor.process(df)
|
||||
|
||||
assert result["category"].iloc[0] == "Option A"
|
||||
assert result["category"].iloc[1] == "Other"
|
||||
assert pd.isna(result["category_other_text"].iloc[0])
|
||||
assert result["category_other_text"].iloc[1] == "Custom text"
|
||||
assert pd.isna(result["category_other_text"].iloc[2])
|
||||
assert result["category_other_text"].iloc[3] == "Another custom"
|
||||
|
||||
@staticmethod
|
||||
def process_handles_ordinal_calculation_single_item():
|
||||
config = {
|
||||
"name": "ordinal",
|
||||
"items": [{"id": "q1"}],
|
||||
"calculation": "ordinal",
|
||||
"response_options": {1: "Low", 2: "Medium", 3: "High"},
|
||||
}
|
||||
processor = ScaleProcessor(config)
|
||||
df = pd.DataFrame({"q1": [1, 2, 3, 2]})
|
||||
|
||||
result = processor.process(df)
|
||||
|
||||
assert result["ordinal"].iloc[0] == "Low"
|
||||
assert result["ordinal"].iloc[1] == "Medium"
|
||||
assert result["ordinal"].iloc[2] == "High"
|
||||
assert result["ordinal"].iloc[3] == "Medium"
|
||||
|
||||
@staticmethod
|
||||
def process_raises_error_for_ordinal_with_multiple_items():
|
||||
config = {
|
||||
"name": "ordinal",
|
||||
"items": [{"id": "q1"}, {"id": "q2"}],
|
||||
"calculation": "ordinal",
|
||||
}
|
||||
processor = ScaleProcessor(config)
|
||||
df = pd.DataFrame({"q1": [1, 2], "q2": [1, 2]})
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="calculation 'ordinal' only allowed with single-item scales",
|
||||
):
|
||||
processor.process(df)
|
||||
|
||||
@staticmethod
|
||||
def process_handles_response_calculation_single_item():
|
||||
config = {
|
||||
"name": "response",
|
||||
"items": [{"id": "q1"}],
|
||||
"calculation": "response",
|
||||
}
|
||||
processor = ScaleProcessor(config)
|
||||
df = pd.DataFrame({"q1": [1.5, 2.7, 3.9]})
|
||||
|
||||
result = processor.process(df)
|
||||
|
||||
assert result["response"].iloc[0] == 1.5
|
||||
assert result["response"].iloc[1] == 2.7
|
||||
assert result["response"].iloc[2] == 3.9
|
||||
|
||||
@staticmethod
|
||||
def process_raises_error_for_response_with_multiple_items():
|
||||
config = {
|
||||
"name": "response",
|
||||
"items": [{"id": "q1"}, {"id": "q2"}],
|
||||
"calculation": "response",
|
||||
}
|
||||
processor = ScaleProcessor(config)
|
||||
df = pd.DataFrame({"q1": [1, 2], "q2": [1, 2]})
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="calculation 'response' can only be used with single-item scales!",
|
||||
):
|
||||
processor.process(df)
|
||||
|
||||
@staticmethod
|
||||
def process_handles_sum_correct_calculation():
|
||||
config = {
|
||||
"name": "correct_sum",
|
||||
"items": [
|
||||
{"id": "q1", "correct": 2},
|
||||
{"id": "q2", "correct": 1},
|
||||
{"id": "q3", "correct": 3},
|
||||
],
|
||||
"calculation": "sum_correct",
|
||||
}
|
||||
processor = ScaleProcessor(config)
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"q1": [2, 1, 2], # correct, wrong, correct
|
||||
"q2": [1, 1, 2], # correct, correct, wrong
|
||||
"q3": [3, 2, 3], # correct, wrong, correct
|
||||
}
|
||||
)
|
||||
|
||||
result = processor.process(df)
|
||||
|
||||
assert result["correct_sum"].iloc[0] == 3 # all correct
|
||||
assert result["correct_sum"].iloc[1] == 1 # one correct
|
||||
assert result["correct_sum"].iloc[2] == 2 # two correct
|
||||
|
||||
@staticmethod
|
||||
def process_handles_mean_correct_calculation():
|
||||
config = {
|
||||
"name": "correct_mean",
|
||||
"items": [{"id": "q1", "correct": 1}, {"id": "q2", "correct": 2}],
|
||||
"calculation": "mean_correct",
|
||||
}
|
||||
processor = ScaleProcessor(config)
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"q1": [1, 1, 2], # correct, correct, wrong
|
||||
"q2": [2, 1, 2], # correct, wrong, correct
|
||||
}
|
||||
)
|
||||
|
||||
result = processor.process(df)
|
||||
|
||||
assert result["correct_mean"].iloc[0] == 1.0 # 2/2 = 1.0
|
||||
assert result["correct_mean"].iloc[1] == 0.5 # 1/2 = 0.5
|
||||
assert result["correct_mean"].iloc[2] == 0.5 # 1/2 = 0.5
|
||||
|
||||
@staticmethod
|
||||
def process_raises_error_for_unknown_correct_calculation():
|
||||
config = {
|
||||
"name": "test",
|
||||
"items": [{"id": "q1", "correct": 1}],
|
||||
"calculation": "unknown_correct",
|
||||
}
|
||||
processor = ScaleProcessor(config)
|
||||
df = pd.DataFrame({"q1": [1, 2]})
|
||||
|
||||
with pytest.raises(
|
||||
ValueError, match="Unknown calculation for objective items: unknown_correct"
|
||||
):
|
||||
processor.process(df)
|
||||
|
||||
@staticmethod
|
||||
def process_raises_error_for_unknown_calculation_type():
|
||||
config = {"name": "test", "items": [{"id": "q1"}], "calculation": "unknown"}
|
||||
processor = ScaleProcessor(config)
|
||||
df = pd.DataFrame({"q1": [1, 2]})
|
||||
|
||||
with pytest.raises(ValueError, match="Unknown calculation: unknown"):
|
||||
processor.process(df)
|
||||
|
||||
@staticmethod
|
||||
def process_applies_subgroup_filtering():
|
||||
config = {
|
||||
"name": "filtered",
|
||||
"items": [{"id": "q1"}],
|
||||
"calculation": "response",
|
||||
}
|
||||
processor = ScaleProcessor(config, "group")
|
||||
df = pd.DataFrame({"q1": [10, 20, 30], "group": [True, False, True]})
|
||||
|
||||
result = processor.process(df)
|
||||
|
||||
assert result["filtered"].iloc[0] == 10
|
||||
assert pd.isna(result["filtered"].iloc[1])
|
||||
assert result["filtered"].iloc[2] == 30
|
||||
|
||||
@staticmethod
|
||||
def process_handles_missing_values_in_mean_calculation():
|
||||
config = {"name": "with_na", "items": [{"id": "q1"}, {"id": "q2"}]}
|
||||
processor = ScaleProcessor(config)
|
||||
df = pd.DataFrame({"q1": [1, np.nan, 3], "q2": [2, 4, np.nan]})
|
||||
|
||||
result = processor.process(df)
|
||||
|
||||
assert result["with_na"].iloc[0] == 1.5 # (1+2)/2
|
||||
assert result["with_na"].iloc[1] == 4.0 # only q2 value
|
||||
assert result["with_na"].iloc[2] == 3.0 # only q1 value
|
||||
|
||||
@staticmethod
|
||||
def process_handles_missing_values_in_categorical_calculation():
|
||||
config = {
|
||||
"name": "category_na",
|
||||
"items": [{"id": "q1"}],
|
||||
"calculation": "categorical",
|
||||
"response_options": {"1": "Yes", "2": "No"},
|
||||
}
|
||||
processor = ScaleProcessor(config)
|
||||
df = pd.DataFrame({"q1": [1, np.nan, 2]})
|
||||
|
||||
result = processor.process(df)
|
||||
|
||||
assert result["category_na"].iloc[0] == "Yes"
|
||||
assert pd.isna(result["category_na"].iloc[1])
|
||||
assert result["category_na"].iloc[2] == "No"
|
||||
|
||||
@staticmethod
|
||||
def process_uses_custom_output_name():
|
||||
config = {
|
||||
"name": "original_name",
|
||||
"items": [{"id": "q1"}],
|
||||
"output": "custom_output",
|
||||
}
|
||||
processor = ScaleProcessor(config)
|
||||
df = pd.DataFrame({"q1": [1, 2, 3]})
|
||||
|
||||
result = processor.process(df)
|
||||
|
||||
assert "custom_output" in result.columns
|
||||
assert "original_name" not in result.columns
|
||||
|
||||
@staticmethod
|
||||
def process_raises_error_for_ordinal_without_response_options_dict():
|
||||
config = {
|
||||
"name": "ordinal",
|
||||
"items": [{"id": "q1"}],
|
||||
"calculation": "ordinal",
|
||||
"response_options": ["Not a dict"],
|
||||
}
|
||||
processor = ScaleProcessor(config)
|
||||
df = pd.DataFrame({"q1": [1, 2]})
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="For calculation 'ordinal', response_options must be a dict mapping",
|
||||
):
|
||||
processor.process(df)
|
||||
|
||||
@staticmethod
|
||||
def process_raises_error_for_categorical_without_response_options_dict():
|
||||
config = {
|
||||
"name": "categorical",
|
||||
"items": [{"id": "q1"}],
|
||||
"calculation": "categorical",
|
||||
"response_options": "Not a dict",
|
||||
}
|
||||
processor = ScaleProcessor(config)
|
||||
df = pd.DataFrame({"q1": [1, 2]})
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="response_options must be a dict for calculation 'categorical'",
|
||||
):
|
||||
processor.process(df)
|
||||
Reference in New Issue
Block a user