diff options
Diffstat (limited to 'tests/unit/test_csvcmp.py')
-rw-r--r-- | tests/unit/test_csvcmp.py | 170 |
1 files changed, 170 insertions, 0 deletions
diff --git a/tests/unit/test_csvcmp.py b/tests/unit/test_csvcmp.py new file mode 100644 index 0000000..c2fda6b --- /dev/null +++ b/tests/unit/test_csvcmp.py @@ -0,0 +1,170 @@ +"""Tests for gn3.csvcmp""" +import pytest + +from gn3.csvcmp import clean_csv_text +from gn3.csvcmp import csv_diff +from gn3.csvcmp import extract_invalid_csv_headers +from gn3.csvcmp import extract_strain_name +from gn3.csvcmp import fill_csv +from gn3.csvcmp import get_allowable_sampledata_headers +from gn3.csvcmp import remove_insignificant_edits + + +@pytest.mark.unit_test +def test_fill_csv(): + """Test that filling a csv works properly""" + test_input = """ +Strain Name,Value,SE,Count,Sex +BXD1,18,x,0, +BXD12,16,x,x, +BXD14,15,x,x, +BXD15,14,x,x +""" + expected_output = """Strain Name,Value,SE,Count,Sex +BXD1,18,x,0,x +BXD12,16,x,x,x +BXD14,15,x,x,x +BXD15,14,x,x,x""" + assert fill_csv(test_input, width=5, value="x") == expected_output + + +@pytest.mark.unit_test +def test_remove_insignificant_data(): + """Test that values outside ε are removed/ ignored""" + diff_data = { + "Additions": [], + "Deletions": [], + "Modifications": [ + {"Current": "1.000001,3", "Original": "1,3"}, + {"Current": "1,3", "Original": "1.000001,3"}, + {"Current": "2.000001,3", "Original": "2,2"}, + {"Current": "1.01,3", "Original": "1,2"}, + ], + } + expected_json = { + "Additions": [], + "Deletions": [], + "Modifications": [ + {"Current": "2,3", "Original": "2,2"}, + {"Current": "1.01,3", "Original": "1,2"}, + ], + } + assert remove_insignificant_edits(diff_data) == expected_json + + +@pytest.mark.unit_test +def test_csv_diff_same_columns(): + """Test csv diffing on data with the same number of columns""" + assert csv_diff(base_csv="a,b \n1,2\n", delta_csv="a,b\n1,3") == { + "Additions": [], + "Deletions": [], + "Columns": "", + "Modifications": [{"Current": "1,3", "Original": "1,2"}], + } + + +@pytest.mark.unit_test +def test_csv_diff_different_columns(): + """Test csv diffing on data with different columns""" + base_csv = """ +Strain Name,Value,SE,Count +BXD1,18,x,0 +BXD12,16,x,x +BXD14,15,x,x +BXD15,14,x,x +""" + delta_csv = """Strain Name,Value,SE,Count,Sex +BXD1,18,x,0 +BXD12,16,x,x,1 +BXD14,15,x,x +BXD15,14,x,x""" + assert csv_diff(base_csv=base_csv, delta_csv=delta_csv) == { + "Additions": [], + "Columns": "Strain Name,Value,SE,Count,Sex", + "Deletions": [], + "Modifications": [{"Current": "BXD12,16,x,x,1", "Original": "BXD12,16,x,x,x"}], + } + + +@pytest.mark.unit_test +def test_csv_diff_only_column_change(): + """Test csv diffing when only the column header change""" + base_csv = """ +Strain Name,Value,SE,Count +BXD1,18,x,0 +BXD12,16,x,x +BXD14,15,x,x +BXD15,14,x,x +""" + delta_csv = """Strain Name,Value,SE,Count,Sex +BXD1,18,x,0 +BXD12,16,x,x +BXD14,15,x,x +BXD15,14,x,x +""" + assert csv_diff(base_csv=base_csv, delta_csv=delta_csv) == { + "Additions": [], + "Deletions": [], + "Modifications": [], + } + + +@pytest.mark.unit_test +def test_extract_strain_name(): + """Test that the strain's name is extracted given a csv header""" + assert ( + extract_strain_name(csv_header="Strain Name,Value,SE,Count", data="BXD1,18,x,0") + == "BXD1" + ) + + +@pytest.mark.unit_test +def test_get_allowable_csv_headers(mocker): + """Test that all the csv headers are fetched properly""" + mock_conn = mocker.MagicMock() + expected_values = [ + "Strain Name", "Value", "SE", "Count", + "Condition", "Tissue", "Sex", "Age", + "Ethn.", "PMI (hrs)", "pH", "Color", + ] + with mock_conn.cursor() as cursor: + cursor.fetchall.return_value = ( + ('Condition',), ('Tissue',), ('Sex',), + ('Age',), ('Ethn.',), ('PMI (hrs)',), ('pH',), ('Color',)) + assert get_allowable_sampledata_headers(mock_conn) == expected_values + cursor.execute.assert_called_once_with( + "SELECT Name from CaseAttribute") + + +@pytest.mark.unit_test +def test_extract_invalid_csv_headers_with_some_wrong_headers(): + """Test that invalid column headers are extracted correctly from a csv +string""" + allowed_headers = [ + "Strain Name", "Value", "SE", "Count", + "Condition", "Tissue", "Sex", "Age", + "Ethn.", "PMI (hrs)", "pH", "Color", + ] + + csv_text = "Strain Name, Value, SE, Colour" + assert extract_invalid_csv_headers(allowed_headers, csv_text) == ["Colour"] + + +@pytest.mark.unit_test +def test_clean_csv(): + """Test that csv text input is cleaned properly""" + csv_text = """ +Strain Name,Value,SE,Count +BXD1,18,x ,0 +BXD12, 16,x,x +BXD14,15 ,x,x +BXD15,14,x, +""" + expected_csv = """Strain Name,Value,SE,Count +BXD1,18,x,0 +BXD12,16,x,x +BXD14,15,x,x +BXD15,14,x,""" + + assert clean_csv_text(csv_text) == expected_csv + assert clean_csv_text("a,b \n1,2\n") == "a,b\n1,2" |