From b88d63ac06f157a97cc88bee0ea702949a5a0c64 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Tue, 30 Mar 2021 23:58:02 +0300 Subject: refactor retrieve trait sample data and tests --- gn3/api/traits.py | 2 +- gn3/computations/datasets.py | 62 +++++++++++++++++++++++++++----- tests/unit/computations/test_datasets.py | 8 ++++- 3 files changed, 61 insertions(+), 11 deletions(-) diff --git a/gn3/api/traits.py b/gn3/api/traits.py index b3ddcc0..a9ff906 100644 --- a/gn3/api/traits.py +++ b/gn3/api/traits.py @@ -20,7 +20,7 @@ def home(): def create_trait(trait_name, dataset_name): """endpoints for creating trait first should\ call the endpoint for creating the trait only\ - trait data is the sample data""" + also acts as endpoints for fetching trait data""" trait_dataset = mock.Mock() # xtodo should replace this with calling epoints trait_dataset.name = dataset_name diff --git a/gn3/computations/datasets.py b/gn3/computations/datasets.py index 92a7bdf..6df5777 100644 --- a/gn3/computations/datasets.py +++ b/gn3/computations/datasets.py @@ -5,24 +5,30 @@ from typing import Optional from typing import List -def retrieve_trait_sample_data(dataset_id, dataset_type: str, trait_name: str) -> List: +def retrieve_trait_sample_data(dataset, + trait_name: str, + group_species_id=None,) -> List: """given the dataset id and trait_name fetch the\ sample_name,value from the dataset""" # should pass the db as arg all do a setup - _func_args = (dataset_id, dataset_type, trait_name) + (dataset_name, dataset_id, dataset_type) = (dataset.get("name"), dataset.get( + "id"), dataset.get("type")) + dataset_query = get_query_for_dataset_sample(dataset_type) + results = [] + sample_query_values = { + "Publish": (trait_name, dataset_id), + "Geno": (group_species_id, trait_name, dataset_name), + "ProbeSet": (trait_name, dataset_name) + } if dataset_query: - if dataset_type == "Publish": - formatted_query = dataset_query % (trait_name, dataset_id) - results = fetch_from_db_sample_data(formatted_query, mock.Mock()) - return results - - return [] + formatted_query = dataset_query % sample_query_values[dataset_type] + results = fetch_from_db_sample_data(formatted_query, mock.Mock()) - return [] + return results def fetch_from_db_sample_data(formatted_query: str, database_instance) -> List: @@ -60,7 +66,45 @@ def get_query_for_dataset_sample(dataset_type) -> Optional[str]: Order BY Strain.Name """ + geno_query = """ + SELECT + Strain.Name, GenoData.value, GenoSE.error, "N/A", Strain.Name2 + FROM + (GenoData, GenoFreeze, Strain, Geno, GenoXRef) + left join GenoSE on + (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId) + WHERE + Geno.SpeciesId = %s AND Geno.Name = %s AND GenoXRef.GenoId = Geno.Id AND + GenoXRef.GenoFreezeId = GenoFreeze.Id AND + GenoFreeze.Name = %s AND + GenoXRef.DataId = GenoData.Id AND + GenoData.StrainId = Strain.Id + Order BY + Strain.Name + """ + + probeset_query = """ + SELECT + Strain.Name, ProbeSetData.value, ProbeSetSE.error, NStrain.count, Strain.Name2 + FROM + (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef) + left join ProbeSetSE on + (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId) + left join NStrain on + (NStrain.DataId = ProbeSetData.Id AND + NStrain.StrainId = ProbeSetData.StrainId) + WHERE + ProbeSet.Name = '%s' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND + ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND + ProbeSetFreeze.Name = '%s' AND + ProbeSetXRef.DataId = ProbeSetData.Id AND + ProbeSetData.StrainId = Strain.Id + Order BY + Strain.Name + """ dataset_query["Publish"] = pheno_query + dataset_query["Geno"] = geno_query + dataset_query["ProbeSet"] = probeset_query return dataset_query.get(dataset_type) diff --git a/tests/unit/computations/test_datasets.py b/tests/unit/computations/test_datasets.py index 2cd58b2..408f13b 100644 --- a/tests/unit/computations/test_datasets.py +++ b/tests/unit/computations/test_datasets.py @@ -18,12 +18,18 @@ class TestDatasets(TestCase): dataset_id = "HC_M2_0606_P&" dataset_type = "Publish" + dataset = { + "id": dataset_id, + "type": dataset_type, + "name": dataset_id + } + fetch_results = [('BXD32', 8.001, None, None, 'BXD32')] mock_fetch_sample_results.return_value = fetch_results results = retrieve_trait_sample_data( - dataset_id, dataset_type, trait_name) + dataset, trait_name) self.assertEqual(mock_fetch_sample_results.call_count, 1) self.assertEqual(results, fetch_results) -- cgit v1.2.3