aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gn3/app.py2
-rw-r--r--gn3/computations/datasets.py98
-rw-r--r--tests/unit/computations/test_datasets.py83
3 files changed, 182 insertions, 1 deletions
diff --git a/gn3/app.py b/gn3/app.py
index c862f29..f0f35f9 100644
--- a/gn3/app.py
+++ b/gn3/app.py
@@ -8,6 +8,7 @@ from gn3.api.gemma import gemma
from gn3.api.general import general
from gn3.api.correlation import correlation
from gn3.api.traits import trait
+from gn3.api.datasets import dataset
def create_app(config: Union[Dict, str, None] = None) -> Flask:
@@ -30,4 +31,5 @@ def create_app(config: Union[Dict, str, None] = None) -> Flask:
app.register_blueprint(gemma, url_prefix="/api/gemma")
app.register_blueprint(correlation, url_prefix="/api/correlation")
app.register_blueprint(trait, url_prefix="/api/trait")
+ app.register_blueprint(dataset, url_prefix="/api/dataset")
return app
diff --git a/gn3/computations/datasets.py b/gn3/computations/datasets.py
index 6df5777..8e9d743 100644
--- a/gn3/computations/datasets.py
+++ b/gn3/computations/datasets.py
@@ -1,9 +1,16 @@
"""module contains the code all related to datasets"""
+import json
from unittest import mock
from typing import Optional
from typing import List
+from dataclasses import dataclass
+import requests
+
+from gn3.experimental_db import database_connector
+from gn3.settings import GN2_BASE_URL
+
def retrieve_trait_sample_data(dataset,
trait_name: str,
@@ -35,6 +42,10 @@ def fetch_from_db_sample_data(formatted_query: str, database_instance) -> List:
"""this is the function that does the actual fetching of\
results from the database"""
cursor = database_instance.cursor()
+ _conn = database_connector
+ # conn, cursor = database_connector()
+ # cursor = conn.cursor()
+
cursor.execute(formatted_query)
results = cursor.fetchall()
@@ -87,7 +98,8 @@ def get_query_for_dataset_sample(dataset_type) -> Optional[str]:
SELECT
Strain.Name, ProbeSetData.value, ProbeSetSE.error, NStrain.count, Strain.Name2
FROM
- (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef)
+ (ProbeSetData, ProbeSetFreeze,
+ Strain, ProbeSet, ProbeSetXRef)
left join ProbeSetSE on
(ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
left join NStrain on
@@ -108,3 +120,87 @@ def get_query_for_dataset_sample(dataset_type) -> Optional[str]:
dataset_query["ProbeSet"] = probeset_query
return dataset_query.get(dataset_type)
+
+
+@dataclass
+class Dataset:
+ """class for creating datasets"""
+ name: Optional[str] = None
+ dataset_type: Optional[str] = None
+ dataset_id: int = -1
+
+
+def create_mrna_tissue_dataset(dataset_name, dataset_type):
+ """an mrna assay is a quantitative assessment(assay) associated\
+ with an mrna trait.This used to be called probeset,but that term\
+ only referes specifically to the afffymetrix platform and is\
+ far too speficified"""
+
+ return Dataset(name=dataset_name, dataset_type=dataset_type)
+
+
+def dataset_type_getter(dataset_name, redis_instance=None) -> Optional[str]:
+ """given the dataset name fetch the type\
+ of the dataset this in turn enables fetching\
+ the creation of the correct object could utilize\
+ redis for the case"""
+
+ results = redis_instance.get(dataset_name, None)
+
+ if results:
+ return results
+
+ return fetch_dataset_type_from_gn2_api(dataset_name)
+
+
+def fetch_dataset_type_from_gn2_api(dataset_name):
+ """this function is only called when the\
+ the redis is empty and does have the specificied\
+ dataset_type"""
+ # should only run once
+
+ dataset_structure = {}
+
+ map_dataset_to_new_type = {
+ "Phenotypes": "Publish",
+ "Genotypes": "Geno",
+ "MrnaTypes": "ProbeSet"
+ }
+
+ data = json.loads(requests.get(
+ GN2_BASE_URL + "/api/v_pre1/gen_dropdown", timeout=5).content)
+ _name = dataset_name
+ for species in data['datasets']:
+ for group in data['datasets'][species]:
+ for dataset_type in data['datasets'][species][group]:
+ for dataset in data['datasets'][species][group][dataset_type]:
+ # assumes the first is dataset_short_name
+ short_dataset_name = next(
+ item for item in dataset if item != "None" and item is not None)
+
+ dataset_structure[short_dataset_name] = map_dataset_to_new_type.get(
+ dataset_type, "MrnaTypes")
+ return dataset_structure
+
+
+def dataset_creator_store(dataset_type):
+ """function contains key value pairs for\
+ the function need to be called to create\
+ each dataset_type"""
+
+ dataset_obj = {
+ "ProbeSet": create_mrna_tissue_dataset
+ }
+
+ return dataset_obj[dataset_type]
+
+
+def create_dataset(dataset_type=None, dataset_name: str = None):
+ """function for creating new dataset temp not implemented"""
+ if dataset_type is None:
+ dataset_type = dataset_type_getter(dataset_name)
+
+ dataset_creator = dataset_creator_store(dataset_type)
+ results = dataset_creator(
+ dataset_name=dataset_name, dataset_type=dataset_type)
+ return results
diff --git a/tests/unit/computations/test_datasets.py b/tests/unit/computations/test_datasets.py
index 408f13b..7135041 100644
--- a/tests/unit/computations/test_datasets.py
+++ b/tests/unit/computations/test_datasets.py
@@ -1,10 +1,18 @@
"""module contains tests from datasets"""
+import json
+
from unittest import TestCase
from unittest import mock
+from collections import namedtuple
+
from gn3.computations.datasets import retrieve_trait_sample_data
from gn3.computations.datasets import get_query_for_dataset_sample
from gn3.computations.datasets import fetch_from_db_sample_data
+from gn3.computations.datasets import create_dataset
+from gn3.computations.datasets import dataset_creator_store
+from gn3.computations.datasets import dataset_type_getter
+from gn3.computations.datasets import fetch_dataset_type_from_gn2_api
class TestDatasets(TestCase):
@@ -74,3 +82,78 @@ class TestDatasets(TestCase):
fetch_results = fetch_from_db_sample_data(mock_pheno_query, database)
self.assertEqual(fetch_results, database_results)
+
+ @mock.patch("gn3.computations.datasets.dataset_creator_store")
+ @mock.patch("gn3.computations.datasets.dataset_type_getter")
+ def test_create_dataset(self, mock_dataset_type, mock_store):
+ """test function that creates/fetches required dataset\
+ can either be published phenotype,genotype,Microarray or\
+ user defined ->Temp"""
+ probe_name = "HC_M2_0606_P"
+ probe_type = "ProbeSet"
+
+ mock_dataset_creator = namedtuple(
+ 'ProbeSet', ["dataset_name", "dataset_type"])
+
+ mock_store.return_value = mock_dataset_creator
+ mock_dataset_type.return_value = probe_type
+ dataset = create_dataset(
+ dataset_type=None, dataset_name=probe_name)
+
+ self.assertEqual(dataset.dataset_name, probe_name)
+ self.assertEqual(dataset.dataset_type, probe_type)
+
+ def test_dataset_creator_store(self):
+ """test for functions that actual
+ function to create differerent \
+ datasets"""
+ results = dataset_creator_store("ProbeSet")
+
+ self.assertTrue(results)
+
+ def test_dataset_type_getter(self):
+ """test for fetching type of dataset given\
+ the dataset name"""
+
+ redis_instance = mock.Mock()
+ # found in redis
+ redis_instance.get.return_value = "ProbeSet"
+ results = dataset_type_getter("HC_M2_0_P", redis_instance)
+ self.assertEqual(results, "ProbeSet")
+
+ @mock.patch("gn3.computations.datasets.requests")
+ def test_fetch_dataset_type_from_gn2_api(self, mock_request):
+ """test for function that test fetching\
+ all datasets from gn2 api in order to store\
+ in redis"""
+
+ expected_json_results = {"datasets": {
+ "arabidopsis": {
+ "BayXSha": {
+ "Genotypes": [
+ [
+ "None",
+ "BayXShaGeno",
+ "BayXSha Genotypes"
+ ]
+ ],
+ "Phenotypes": [
+ [
+ "642",
+ "BayXShaPublish",
+ "BayXSha Published Phenotypes"
+ ]
+ ]
+ }
+ }
+ }}
+
+ request_results = json.dumps(expected_json_results)
+ mock_request.get.return_value.content = request_results
+ results = fetch_dataset_type_from_gn2_api("HC_M2_0_P")
+ expected_results = {
+ "BayXShaGeno": "Geno",
+ "642": "Publish"
+ }
+
+ self.assertEqual(expected_results, results)