diff options
author | Muriithi Frederick Muriuki | 2021-08-05 08:40:49 +0300 |
---|---|---|
committer | Muriithi Frederick Muriuki | 2021-08-05 08:40:49 +0300 |
commit | 76ba5296c66e131301a9fdb692c3b2623f3331ed (patch) | |
tree | c450cf976687dfffce5cb7e07e557a87fa591dc0 | |
parent | f712da630c1a3642cb44b62c4b2b857373cd78d7 (diff) | |
download | genenetwork3-76ba5296c66e131301a9fdb692c3b2623f3331ed.tar.gz |
Build up trait_name items from full name
Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi
* The full name of the traits from search contains multiple parts to it, and
as such, we use it to retrieve the appropriate data and set it up in the
final trait_info dictionary that is produced.
-rw-r--r-- | gn3/db/traits.py | 16 | ||||
-rw-r--r-- | tests/unit/db/test_traits.py | 46 |
2 files changed, 52 insertions, 10 deletions
diff --git a/gn3/db/traits.py b/gn3/db/traits.py index 29c91a6..9f89510 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -341,8 +341,18 @@ def set_riset_fields(trait_info, conn): **trait_info, "risetid": riid, "riset": "BXD" if riset == "BXD300" else riset} +def build_trait_name(trait_fullname): + name_parts = trait_fullname.split("::") + assert len(name_parts) >= 2, "Name format error" + return { + "trait_db": name_parts[0], + "trait_fullname": trait_fullname, + "trait_name": name_parts[1], + "cellid": name_parts[2] if len(name_parts) == 3 else "" + } + def retrieve_trait_info( - trait_type: str, trait_name: str, trait_dataset_id: int, + trait_type: str, trait_full_name: str, trait_dataset_id: int, trait_dataset_name: str, conn: Any, qtl=None): """Retrieves the trait information. @@ -351,6 +361,7 @@ def retrieve_trait_info( This function, or the dependent functions, might be incomplete as they are currently.""" # pylint: disable=[R0913] + trait = build_trait_name(trait_full_name) trait_info_function_table = { "Publish": retrieve_publish_trait_info, "ProbeSet": retrieve_probeset_trait_info, @@ -362,6 +373,7 @@ def retrieve_trait_info( lambda ti: set_riset_fields(ti, conn), lambda ti: set_homologene_id_field(ti, conn), lambda ti: {"type": trait_type, **ti}, + lambda ti: {**ti, **trait}, set_haveinfo_field) trait_post_processing_functions_table = { @@ -377,7 +389,7 @@ def retrieve_trait_info( return retrieve_info( { - "trait_name": trait_name, + "trait_name": trait["trait_name"], "trait_dataset_id": trait_dataset_id, "trait_dataset_name":trait_dataset_name }, diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 1c481a2..39d7a31 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -1,6 +1,7 @@ """Tests for gn3/db/traits.py""" from unittest import mock, TestCase from gn3.db.traits import ( + build_trait_name, set_riset_fields, set_haveinfo_field, update_sample_data, @@ -155,18 +156,47 @@ class TestTraitsDBFunctions(TestCase): "SELECT name, description FROM Temp WHERE Name = %(trait_name)s", trait_source) + def test_build_trait_name_with_good_fullnames(self): + for fullname, expected in [ + ["testdb::testname", + {"trait_db": "testdb", "trait_name": "testname", "cellid": "", + "trait_fullname": "testdb::testname"}], + ["testdb::testname::testcell", + {"trait_db": "testdb", "trait_name": "testname", + "cellid": "testcell", + "trait_fullname": "testdb::testname::testcell"}]]: + with self.subTest(fullname=fullname): + self.assertEqual(build_trait_name(fullname), expected) + + def test_build_trait_name_with_bad_fullnames(self): + for fullname in ["", "test", "test:test"]: + with self.subTest(fullname=fullname): + with self.assertRaises(AssertionError, msg="Name format error"): + build_trait_name(fullname) + def test_retrieve_trait_info(self): """Test that information on traits is retrieved as appropriate.""" for trait_type, trait_name, trait_dataset_id, trait_dataset_name, expected in [ - ["Publish", "PublishTraitName", 1, "PublishDatasetTraitName", + ["Publish", "pubDb::PublishTraitName::pubCell", 1, + "PublishDatasetTraitName", {"haveinfo": 0, "homologeneid": None, "type": "Publish", - "confidential": 0}], - ["ProbeSet", "ProbeSetTraitName", 2, "ProbeSetDatasetTraitName", - {"haveinfo": 0, "homologeneid": None, "type": "ProbeSet"}], - ["Geno", "GenoTraitName", 3, "GenoDatasetTraitName", - {"haveinfo": 0, "homologeneid": None, "type": "Geno"}], - ["Temp", "TempTraitName", 4, "TempDatasetTraitName", - {"haveinfo": 0, "homologeneid": None, "type": "Temp"}]]: + "confidential": 0, "trait_db": "pubDb", + "trait_name": "PublishTraitName", "cellid": "pubCell", + "trait_fullname": "pubDb::PublishTraitName::pubCell"}], + ["ProbeSet", "prbDb::ProbeSetTraitName::prbCell", 2, + "ProbeSetDatasetTraitName", + {"haveinfo": 0, "homologeneid": None, "type": "ProbeSet", + "trait_fullname": "prbDb::ProbeSetTraitName::prbCell", + "trait_db": "prbDb", "trait_name": "ProbeSetTraitName", + "cellid": "prbCell"}], + ["Geno", "genDb::GenoTraitName", 3, "GenoDatasetTraitName", + {"haveinfo": 0, "homologeneid": None, "type": "Geno", + "trait_fullname": "genDb::GenoTraitName", "trait_db": "genDb", + "trait_name": "GenoTraitName", "cellid": ""}], + ["Temp", "tmpDb::TempTraitName", 4, "TempDatasetTraitName", + {"haveinfo": 0, "homologeneid": None, "type": "Temp", + "trait_fullname": "tmpDb::TempTraitName", "trait_db": "tmpDb", + "trait_name": "TempTraitName", "cellid": ""}]]: db_mock = mock.MagicMock() with self.subTest(trait_type=trait_type): with db_mock.cursor() as cursor: |