diff options
-rw-r--r-- | gn3/db/traits.py | 28 | ||||
-rw-r--r-- | tests/unit/db/test_traits.py | 144 |
2 files changed, 170 insertions, 2 deletions
diff --git a/gn3/db/traits.py b/gn3/db/traits.py index 5013844..3827c34 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -99,12 +99,36 @@ def get_trait_csv_sample_data(conn: Any, "cxref.StrainId = st.Id) " "LEFT JOIN CaseAttribute ca ON ca.Id = cxref.CaseAttributeId" "WHERE px.Id = %s AND px.PhenotypeId = %s ORDER BY st.Name") + case_attr_columns = set() + csv_data = {} with conn.cursor() as cursor: cursor.execute(__query, (trait_name, phenotype_id)) - return ("Strain Name,Value,SE,Count\n" + - "\n".join(map(lambda x:x[0], cursor.fetchall()))) + for data in cursor.fetchall(): + if data[1] == "x": + csv_data[data[0]] = None + else: + sample, case_attr, value = data[0], data[1], data[2] + if not csv_data.get(sample): + csv_data[sample] = {} + csv_data[sample][case_attr] = None if value == "x" else value + case_attr_columns.add(case_attr) + if not case_attr_columns: + return ("Strain Name,Value,SE,Count\n" + + "\n".join(csv_data.keys())) + else: + columns = sorted(case_attr_columns) + csv = ("Strain Name,Value,SE,Count," + + ",".join(columns) + "\n") + for key, value in csv_data.items(): + if not value: + csv += (key + (len(case_attr_columns) * ",x") + "\n") + else: + vals = [str(value.get(column, "x")) for column in columns] + csv += (key + "," + ",".join(vals) + "\n") + return csv return "No Sample Data Found" + def update_sample_data(conn: Any, #pylint: disable=[R0913] trait_name: str, strain_name: str, diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index de65348..a1c1136 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -5,6 +5,7 @@ from gn3.db.traits import ( build_trait_name, export_trait_data, export_informative, + get_trait_csv_sample_data, set_haveinfo_field, update_sample_data, retrieve_trait_info, @@ -431,3 +432,146 @@ class TestTraitsDBFunctions(TestCase): with self.subTest(trait_data=trait_data): self.assertEqual( export_informative(trait_data, inc_var), expected) + + +class TestRetrieveCsvData(TestCase): + "Test cases for retrieving csv data" + def test_get_trait_csv_sample_data_with_case_attributes(self): + """Test that correct csv is returned when all samples have case attribute +values""" + mock_conn = mock.MagicMock() + with mock_conn.cursor() as cursor: + cursor.fetchall.return_value = [ + ['H1044,25.900000,x,1', 'Strain', 'BXD29'], + ['H1044,25.900000,x,1', 'Condition', 'CD'], + ['H1044,25.900000,x,1', 'EarTag', 1044], + ['H1044,25.900000,x,1', 'Age', 724], + ['H1096,23.700001,x,1', 'EarTag', 1096], + ['H1096,23.700001,x,1', 'Age', 732], + ['H1096,23.700001,x,1', 'Strain', 'BXD90'], + ['H1096,23.700001,x,1', 'Condition', 'CD'], + ['H1163,95.000000,x,1', 'Strain', 'D2B6F1'], + ['H1163,95.000000,x,1', 'Condition', 'HF'], + ['H1163,95.000000,x,1', 'EarTag', 1163], + ['H1163,95.000000,x,1', 'Age', 1114], + ['H1230,88.099998,x,1', 'EarTag', 1230], + ['H1230,88.099998,x,1', 'Age', 726], + ['H1230,88.099998,x,1', 'Strain', 'B6D2F1'], + ['H1230,88.099998,x,1', 'Condition', 'HF'], + ['H1251,24.100000,x,1', 'EarTag', 1251], + ['H1251,24.100000,x,1', 'Age', 711], + ['H1251,24.100000,x,1', 'Strain', 'C57BL/6J'], + ['H1251,24.100000,x,1', 'Condition', 'CD'], + ['H1292,24.900000,x,1', 'Strain', 'C57BL/6J'], + ['H1292,24.900000,x,1', 'Condition', 'CD'], + ['H1292,24.900000,x,1', 'EarTag', 1292], + ['H1292,24.900000,x,1', 'Age', 705], + ] + csv = get_trait_csv_sample_data(conn=mock_conn, + trait_name=10006, + phenotype_id=28409) + self.assertEqual(csv, """Strain Name,Value,SE,Count,Age,Condition,EarTag,Strain +H1044,25.900000,x,1,724,CD,1044,BXD29 +H1096,23.700001,x,1,732,CD,1096,BXD90 +H1163,95.000000,x,1,1114,HF,1163,D2B6F1 +H1230,88.099998,x,1,726,HF,1230,B6D2F1 +H1251,24.100000,x,1,711,CD,1251,C57BL/6J +H1292,24.900000,x,1,705,CD,1292,C57BL/6J +""") + + def test_get_trait_csv_sample_data_with_missing_case_attributes(self): + """Test that the correct csv is returned when some case attributes are not +present from some samples""" + mock_conn = mock.MagicMock() + with mock_conn.cursor() as cursor: + cursor.fetchall.return_value = [ + ['H1044,25.900000,x,1', 'Strain', 'BXD29'], + ['H1044,25.900000,x,1', 'Condition', 'CD'], + ['H1044,25.900000,x,1', 'Age', 724], + ['H1096,23.700001,x,1', 'Age', 732], + ['H1096,23.700001,x,1', 'Strain', 'BXD90'], + ['H1096,23.700001,x,1', 'Condition', 'CD'], + ['H1163,95.000000,x,1', 'Condition', 'HF'], + ['H1163,95.000000,x,1', 'EarTag', 1163], + ['H1163,95.000000,x,1', 'Age', 1114], + ['H1230,88.099998,x,1', 'EarTag', 1230], + ['H1230,88.099998,x,1', 'Age', 726], + ['H1230,88.099998,x,1', 'Strain', 'B6D2F1'], + ['H1230,88.099998,x,1', 'Condition', 'HF'], + ['H1251,24.100000,x,1', 'EarTag', 1251], + ['H1251,24.100000,x,1', 'Age', 711], + ['H1251,24.100000,x,1', 'Strain', 'C57BL/6J'], + ['H1251,24.100000,x,1', 'Condition', 'CD'], + ['H1292,24.900000,x,1', 'Strain', 'C57BL/6J'], + ['H1292,24.900000,x,1', 'Condition', 'CD'], + ['H1292,24.900000,x,1', 'EarTag', 1292], + ] + csv = get_trait_csv_sample_data(conn=mock_conn, + trait_name=10006, + phenotype_id=28409) + self.assertEqual(csv, """Strain Name,Value,SE,Count,Age,Condition,EarTag,Strain +H1044,25.900000,x,1,724,CD,x,BXD29 +H1096,23.700001,x,1,732,CD,x,BXD90 +H1163,95.000000,x,1,1114,HF,1163,x +H1230,88.099998,x,1,726,HF,1230,B6D2F1 +H1251,24.100000,x,1,711,CD,1251,C57BL/6J +H1292,24.900000,x,1,x,CD,1292,C57BL/6J +""") + + def test_get_trait_csv_sample_data_with_varying_case_attributes(self): + """Test that the correct csv is returned when case attributes are entirely +missing from same samples, and only one sample has a case attribute that is +not present in all other samples""" + mock_conn = mock.MagicMock() + with mock_conn.cursor() as cursor: + cursor.fetchall.return_value = [ + ['H1044,25.900000,x,1', 'x', 'x'], + ['H1096,23.700001,x,1', 'x', 'x'], + ['H1163,95.000000,x,1', 'x', 'x'], + ['H1230,88.099998,x,1', 'x', 'x'], + ['H1251,24.100000,x,1', 'x', 'x'], + ['H1292,24.900000,x,1', 'x', 'x'], + ] + csv = get_trait_csv_sample_data(conn=mock_conn, + trait_name=10006, + phenotype_id=28409) + self.assertEqual(csv, """Strain Name,Value,SE,Count +H1044,25.900000,x,1 +H1096,23.700001,x,1 +H1163,95.000000,x,1 +H1230,88.099998,x,1 +H1251,24.100000,x,1 +H1292,24.900000,x,1""") + + def test_get_trait_csv_sample_data_with_empty_case_attributes(self): + mock_conn = mock.MagicMock() + with mock_conn.cursor() as cursor: + cursor.fetchall.return_value = [ + ['H1044,25.900000,x,1', 'x', 'x'], + ['H1096,23.700001,x,1', 'Age', 732], + ['H1096,23.700001,x,1', 'Sex', "M"], + ['H1096,23.700001,x,1', 'Strain', 'BXD90'], + ['H1096,23.700001,x,1', 'Condition', 'CD'], + ['H1163,95.000000,x,1', 'Condition', 'HF'], + ['H1163,95.000000,x,1', 'EarTag', 1163], + ['H1163,95.000000,x,1', 'Age', 1114], + ['H1230,88.099998,x,1', 'EarTag', 1230], + ['H1230,88.099998,x,1', 'Age', 726], + ['H1230,88.099998,x,1', 'Strain', 'B6D2F1'], + ['H1230,88.099998,x,1', 'Condition', 'HF'], + ['H1251,24.100000,x,1', 'x', 'x'], + ['H1292,24.900000,x,1', 'Strain', 'C57BL/6J'], + ['H1292,24.900000,x,1', 'Condition', 'CD'], + ['H1292,24.900000,x,1', 'EarTag', 1292], + ] + csv = get_trait_csv_sample_data(conn=mock_conn, + trait_name=10006, + phenotype_id=28409) + self.assertEqual(csv, """Strain Name,Value,SE,Count,Age,Condition,EarTag,Sex,Strain +H1044,25.900000,x,1,x,x,x,x,x +H1096,23.700001,x,1,732,CD,x,M,BXD90 +H1163,95.000000,x,1,1114,HF,1163,x,x +H1230,88.099998,x,1,726,HF,1230,x,B6D2F1 +H1251,24.100000,x,1,x,x,x,x,x +H1292,24.900000,x,1,x,CD,1292,x,C57BL/6J +""") |