From 76ba5296c66e131301a9fdb692c3b2623f3331ed Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Thu, 5 Aug 2021 08:40:49 +0300 Subject: Build up trait_name items from full name Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * The full name of the traits from search contains multiple parts to it, and as such, we use it to retrieve the appropriate data and set it up in the final trait_info dictionary that is produced. --- gn3/db/traits.py | 16 +++++++++++++-- tests/unit/db/test_traits.py | 46 ++++++++++++++++++++++++++++++++++++-------- 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/gn3/db/traits.py b/gn3/db/traits.py index 29c91a6..9f89510 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -341,8 +341,18 @@ def set_riset_fields(trait_info, conn): **trait_info, "risetid": riid, "riset": "BXD" if riset == "BXD300" else riset} +def build_trait_name(trait_fullname): + name_parts = trait_fullname.split("::") + assert len(name_parts) >= 2, "Name format error" + return { + "trait_db": name_parts[0], + "trait_fullname": trait_fullname, + "trait_name": name_parts[1], + "cellid": name_parts[2] if len(name_parts) == 3 else "" + } + def retrieve_trait_info( - trait_type: str, trait_name: str, trait_dataset_id: int, + trait_type: str, trait_full_name: str, trait_dataset_id: int, trait_dataset_name: str, conn: Any, qtl=None): """Retrieves the trait information. @@ -351,6 +361,7 @@ def retrieve_trait_info( This function, or the dependent functions, might be incomplete as they are currently.""" # pylint: disable=[R0913] + trait = build_trait_name(trait_full_name) trait_info_function_table = { "Publish": retrieve_publish_trait_info, "ProbeSet": retrieve_probeset_trait_info, @@ -362,6 +373,7 @@ def retrieve_trait_info( lambda ti: set_riset_fields(ti, conn), lambda ti: set_homologene_id_field(ti, conn), lambda ti: {"type": trait_type, **ti}, + lambda ti: {**ti, **trait}, set_haveinfo_field) trait_post_processing_functions_table = { @@ -377,7 +389,7 @@ def retrieve_trait_info( return retrieve_info( { - "trait_name": trait_name, + "trait_name": trait["trait_name"], "trait_dataset_id": trait_dataset_id, "trait_dataset_name":trait_dataset_name }, diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 1c481a2..39d7a31 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -1,6 +1,7 @@ """Tests for gn3/db/traits.py""" from unittest import mock, TestCase from gn3.db.traits import ( + build_trait_name, set_riset_fields, set_haveinfo_field, update_sample_data, @@ -155,18 +156,47 @@ class TestTraitsDBFunctions(TestCase): "SELECT name, description FROM Temp WHERE Name = %(trait_name)s", trait_source) + def test_build_trait_name_with_good_fullnames(self): + for fullname, expected in [ + ["testdb::testname", + {"trait_db": "testdb", "trait_name": "testname", "cellid": "", + "trait_fullname": "testdb::testname"}], + ["testdb::testname::testcell", + {"trait_db": "testdb", "trait_name": "testname", + "cellid": "testcell", + "trait_fullname": "testdb::testname::testcell"}]]: + with self.subTest(fullname=fullname): + self.assertEqual(build_trait_name(fullname), expected) + + def test_build_trait_name_with_bad_fullnames(self): + for fullname in ["", "test", "test:test"]: + with self.subTest(fullname=fullname): + with self.assertRaises(AssertionError, msg="Name format error"): + build_trait_name(fullname) + def test_retrieve_trait_info(self): """Test that information on traits is retrieved as appropriate.""" for trait_type, trait_name, trait_dataset_id, trait_dataset_name, expected in [ - ["Publish", "PublishTraitName", 1, "PublishDatasetTraitName", + ["Publish", "pubDb::PublishTraitName::pubCell", 1, + "PublishDatasetTraitName", {"haveinfo": 0, "homologeneid": None, "type": "Publish", - "confidential": 0}], - ["ProbeSet", "ProbeSetTraitName", 2, "ProbeSetDatasetTraitName", - {"haveinfo": 0, "homologeneid": None, "type": "ProbeSet"}], - ["Geno", "GenoTraitName", 3, "GenoDatasetTraitName", - {"haveinfo": 0, "homologeneid": None, "type": "Geno"}], - ["Temp", "TempTraitName", 4, "TempDatasetTraitName", - {"haveinfo": 0, "homologeneid": None, "type": "Temp"}]]: + "confidential": 0, "trait_db": "pubDb", + "trait_name": "PublishTraitName", "cellid": "pubCell", + "trait_fullname": "pubDb::PublishTraitName::pubCell"}], + ["ProbeSet", "prbDb::ProbeSetTraitName::prbCell", 2, + "ProbeSetDatasetTraitName", + {"haveinfo": 0, "homologeneid": None, "type": "ProbeSet", + "trait_fullname": "prbDb::ProbeSetTraitName::prbCell", + "trait_db": "prbDb", "trait_name": "ProbeSetTraitName", + "cellid": "prbCell"}], + ["Geno", "genDb::GenoTraitName", 3, "GenoDatasetTraitName", + {"haveinfo": 0, "homologeneid": None, "type": "Geno", + "trait_fullname": "genDb::GenoTraitName", "trait_db": "genDb", + "trait_name": "GenoTraitName", "cellid": ""}], + ["Temp", "tmpDb::TempTraitName", 4, "TempDatasetTraitName", + {"haveinfo": 0, "homologeneid": None, "type": "Temp", + "trait_fullname": "tmpDb::TempTraitName", "trait_db": "tmpDb", + "trait_name": "TempTraitName", "cellid": ""}]]: db_mock = mock.MagicMock() with self.subTest(trait_type=trait_type): with db_mock.cursor() as cursor: -- cgit v1.2.3