From f6c6851504f14a1a163b6eeb5e3653a5ec3f5ceb Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Wed, 31 Mar 2021 22:29:37 +0300 Subject: add datasets functions and endpoints --- gn3/app.py | 2 + gn3/computations/datasets.py | 98 +++++++++++++++++++++++++++++++- tests/unit/computations/test_datasets.py | 83 +++++++++++++++++++++++++++ 3 files changed, 182 insertions(+), 1 deletion(-) diff --git a/gn3/app.py b/gn3/app.py index c862f29..f0f35f9 100644 --- a/gn3/app.py +++ b/gn3/app.py @@ -8,6 +8,7 @@ from gn3.api.gemma import gemma from gn3.api.general import general from gn3.api.correlation import correlation from gn3.api.traits import trait +from gn3.api.datasets import dataset def create_app(config: Union[Dict, str, None] = None) -> Flask: @@ -30,4 +31,5 @@ def create_app(config: Union[Dict, str, None] = None) -> Flask: app.register_blueprint(gemma, url_prefix="/api/gemma") app.register_blueprint(correlation, url_prefix="/api/correlation") app.register_blueprint(trait, url_prefix="/api/trait") + app.register_blueprint(dataset, url_prefix="/api/dataset") return app diff --git a/gn3/computations/datasets.py b/gn3/computations/datasets.py index 6df5777..8e9d743 100644 --- a/gn3/computations/datasets.py +++ b/gn3/computations/datasets.py @@ -1,9 +1,16 @@ """module contains the code all related to datasets""" +import json from unittest import mock from typing import Optional from typing import List +from dataclasses import dataclass +import requests + +from gn3.experimental_db import database_connector +from gn3.settings import GN2_BASE_URL + def retrieve_trait_sample_data(dataset, trait_name: str, @@ -35,6 +42,10 @@ def fetch_from_db_sample_data(formatted_query: str, database_instance) -> List: """this is the function that does the actual fetching of\ results from the database""" cursor = database_instance.cursor() + _conn = database_connector + # conn, cursor = database_connector() + # cursor = conn.cursor() + cursor.execute(formatted_query) results = cursor.fetchall() @@ -87,7 +98,8 @@ def get_query_for_dataset_sample(dataset_type) -> Optional[str]: SELECT Strain.Name, ProbeSetData.value, ProbeSetSE.error, NStrain.count, Strain.Name2 FROM - (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef) + (ProbeSetData, ProbeSetFreeze, + Strain, ProbeSet, ProbeSetXRef) left join ProbeSetSE on (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId) left join NStrain on @@ -108,3 +120,87 @@ def get_query_for_dataset_sample(dataset_type) -> Optional[str]: dataset_query["ProbeSet"] = probeset_query return dataset_query.get(dataset_type) + + +@dataclass +class Dataset: + """class for creating datasets""" + name: Optional[str] = None + dataset_type: Optional[str] = None + dataset_id: int = -1 + + +def create_mrna_tissue_dataset(dataset_name, dataset_type): + """an mrna assay is a quantitative assessment(assay) associated\ + with an mrna trait.This used to be called probeset,but that term\ + only referes specifically to the afffymetrix platform and is\ + far too speficified""" + + return Dataset(name=dataset_name, dataset_type=dataset_type) + + +def dataset_type_getter(dataset_name, redis_instance=None) -> Optional[str]: + """given the dataset name fetch the type\ + of the dataset this in turn enables fetching\ + the creation of the correct object could utilize\ + redis for the case""" + + results = redis_instance.get(dataset_name, None) + + if results: + return results + + return fetch_dataset_type_from_gn2_api(dataset_name) + + +def fetch_dataset_type_from_gn2_api(dataset_name): + """this function is only called when the\ + the redis is empty and does have the specificied\ + dataset_type""" + # should only run once + + dataset_structure = {} + + map_dataset_to_new_type = { + "Phenotypes": "Publish", + "Genotypes": "Geno", + "MrnaTypes": "ProbeSet" + } + + data = json.loads(requests.get( + GN2_BASE_URL + "/api/v_pre1/gen_dropdown", timeout=5).content) + _name = dataset_name + for species in data['datasets']: + for group in data['datasets'][species]: + for dataset_type in data['datasets'][species][group]: + for dataset in data['datasets'][species][group][dataset_type]: + # assumes the first is dataset_short_name + short_dataset_name = next( + item for item in dataset if item != "None" and item is not None) + + dataset_structure[short_dataset_name] = map_dataset_to_new_type.get( + dataset_type, "MrnaTypes") + return dataset_structure + + +def dataset_creator_store(dataset_type): + """function contains key value pairs for\ + the function need to be called to create\ + each dataset_type""" + + dataset_obj = { + "ProbeSet": create_mrna_tissue_dataset + } + + return dataset_obj[dataset_type] + + +def create_dataset(dataset_type=None, dataset_name: str = None): + """function for creating new dataset temp not implemented""" + if dataset_type is None: + dataset_type = dataset_type_getter(dataset_name) + + dataset_creator = dataset_creator_store(dataset_type) + results = dataset_creator( + dataset_name=dataset_name, dataset_type=dataset_type) + return results diff --git a/tests/unit/computations/test_datasets.py b/tests/unit/computations/test_datasets.py index 408f13b..7135041 100644 --- a/tests/unit/computations/test_datasets.py +++ b/tests/unit/computations/test_datasets.py @@ -1,10 +1,18 @@ """module contains tests from datasets""" +import json + from unittest import TestCase from unittest import mock +from collections import namedtuple + from gn3.computations.datasets import retrieve_trait_sample_data from gn3.computations.datasets import get_query_for_dataset_sample from gn3.computations.datasets import fetch_from_db_sample_data +from gn3.computations.datasets import create_dataset +from gn3.computations.datasets import dataset_creator_store +from gn3.computations.datasets import dataset_type_getter +from gn3.computations.datasets import fetch_dataset_type_from_gn2_api class TestDatasets(TestCase): @@ -74,3 +82,78 @@ class TestDatasets(TestCase): fetch_results = fetch_from_db_sample_data(mock_pheno_query, database) self.assertEqual(fetch_results, database_results) + + @mock.patch("gn3.computations.datasets.dataset_creator_store") + @mock.patch("gn3.computations.datasets.dataset_type_getter") + def test_create_dataset(self, mock_dataset_type, mock_store): + """test function that creates/fetches required dataset\ + can either be published phenotype,genotype,Microarray or\ + user defined ->Temp""" + probe_name = "HC_M2_0606_P" + probe_type = "ProbeSet" + + mock_dataset_creator = namedtuple( + 'ProbeSet', ["dataset_name", "dataset_type"]) + + mock_store.return_value = mock_dataset_creator + mock_dataset_type.return_value = probe_type + dataset = create_dataset( + dataset_type=None, dataset_name=probe_name) + + self.assertEqual(dataset.dataset_name, probe_name) + self.assertEqual(dataset.dataset_type, probe_type) + + def test_dataset_creator_store(self): + """test for functions that actual + function to create differerent \ + datasets""" + results = dataset_creator_store("ProbeSet") + + self.assertTrue(results) + + def test_dataset_type_getter(self): + """test for fetching type of dataset given\ + the dataset name""" + + redis_instance = mock.Mock() + # found in redis + redis_instance.get.return_value = "ProbeSet" + results = dataset_type_getter("HC_M2_0_P", redis_instance) + self.assertEqual(results, "ProbeSet") + + @mock.patch("gn3.computations.datasets.requests") + def test_fetch_dataset_type_from_gn2_api(self, mock_request): + """test for function that test fetching\ + all datasets from gn2 api in order to store\ + in redis""" + + expected_json_results = {"datasets": { + "arabidopsis": { + "BayXSha": { + "Genotypes": [ + [ + "None", + "BayXShaGeno", + "BayXSha Genotypes" + ] + ], + "Phenotypes": [ + [ + "642", + "BayXShaPublish", + "BayXSha Published Phenotypes" + ] + ] + } + } + }} + + request_results = json.dumps(expected_json_results) + mock_request.get.return_value.content = request_results + results = fetch_dataset_type_from_gn2_api("HC_M2_0_P") + expected_results = { + "BayXShaGeno": "Geno", + "642": "Publish" + } + + self.assertEqual(expected_results, results) -- cgit v1.2.3