about summary refs log tree commit diff
diff options
context:
space:
mode:
authorAlexander Kabui2021-03-31 22:29:37 +0300
committerAlexander Kabui2021-03-31 22:29:37 +0300
commitf6c6851504f14a1a163b6eeb5e3653a5ec3f5ceb (patch)
treef4a4bfc24055065357b7bb59e73ded19f5a97b35
parentb88d63ac06f157a97cc88bee0ea702949a5a0c64 (diff)
downloadgenenetwork3-f6c6851504f14a1a163b6eeb5e3653a5ec3f5ceb.tar.gz
add datasets functions and endpoints
-rw-r--r--gn3/app.py2
-rw-r--r--gn3/computations/datasets.py98
-rw-r--r--tests/unit/computations/test_datasets.py83
3 files changed, 182 insertions, 1 deletions
diff --git a/gn3/app.py b/gn3/app.py
index c862f29..f0f35f9 100644
--- a/gn3/app.py
+++ b/gn3/app.py
@@ -8,6 +8,7 @@ from gn3.api.gemma import gemma
 from gn3.api.general import general
 from gn3.api.correlation import correlation
 from gn3.api.traits import trait
+from gn3.api.datasets import dataset
 
 
 def create_app(config: Union[Dict, str, None] = None) -> Flask:
@@ -30,4 +31,5 @@ def create_app(config: Union[Dict, str, None] = None) -> Flask:
     app.register_blueprint(gemma, url_prefix="/api/gemma")
     app.register_blueprint(correlation, url_prefix="/api/correlation")
     app.register_blueprint(trait, url_prefix="/api/trait")
+    app.register_blueprint(dataset, url_prefix="/api/dataset")
     return app
diff --git a/gn3/computations/datasets.py b/gn3/computations/datasets.py
index 6df5777..8e9d743 100644
--- a/gn3/computations/datasets.py
+++ b/gn3/computations/datasets.py
@@ -1,9 +1,16 @@
 """module contains the code all related to datasets"""
+import json
 from unittest import mock
 
 from typing import Optional
 from typing import List
 
+from dataclasses import dataclass
+import requests
+
+from gn3.experimental_db import database_connector
+from gn3.settings import GN2_BASE_URL
+
 
 def retrieve_trait_sample_data(dataset,
                                trait_name: str,
@@ -35,6 +42,10 @@ def fetch_from_db_sample_data(formatted_query: str, database_instance) -> List:
     """this is the function that does the actual fetching of\
     results from the database"""
     cursor = database_instance.cursor()
+    _conn = database_connector
+    # conn, cursor = database_connector()
+    # cursor = conn.cursor()
+
     cursor.execute(formatted_query)
     results = cursor.fetchall()
 
@@ -87,7 +98,8 @@ def get_query_for_dataset_sample(dataset_type) -> Optional[str]:
                 SELECT
                         Strain.Name, ProbeSetData.value, ProbeSetSE.error, NStrain.count, Strain.Name2
                 FROM
-                        (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef)
+                        (ProbeSetData, ProbeSetFreeze,
+                         Strain, ProbeSet, ProbeSetXRef)
                 left join ProbeSetSE on
                         (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
                 left join NStrain on
@@ -108,3 +120,87 @@ def get_query_for_dataset_sample(dataset_type) -> Optional[str]:
     dataset_query["ProbeSet"] = probeset_query
 
     return dataset_query.get(dataset_type)
+
+
+@dataclass
+class Dataset:
+    """class for creating datasets"""
+    name: Optional[str] = None
+    dataset_type: Optional[str] = None
+    dataset_id: int = -1
+
+
+def create_mrna_tissue_dataset(dataset_name, dataset_type):
+    """an mrna assay is a quantitative assessment(assay) associated\
+    with an mrna trait.This used to be called probeset,but that term\
+    only referes specifically to the afffymetrix platform and is\
+    far too speficified"""
+
+    return Dataset(name=dataset_name, dataset_type=dataset_type)
+
+
+def dataset_type_getter(dataset_name, redis_instance=None) -> Optional[str]:
+    """given the dataset name fetch the type\
+    of the dataset this in turn  enables fetching\
+    the creation of the correct object could utilize\
+    redis for the case"""
+
+    results = redis_instance.get(dataset_name, None)
+
+    if results:
+        return results
+
+    return fetch_dataset_type_from_gn2_api(dataset_name)
+
+
+def fetch_dataset_type_from_gn2_api(dataset_name):
+    """this function is only called when the\
+    the redis is empty and does have the specificied\
+    dataset_type"""
+    # should only run once
+
+    dataset_structure = {}
+
+    map_dataset_to_new_type = {
+        "Phenotypes": "Publish",
+        "Genotypes": "Geno",
+        "MrnaTypes": "ProbeSet"
+    }
+
+    data = json.loads(requests.get(
+        GN2_BASE_URL + "/api/v_pre1/gen_dropdown", timeout=5).content)
+    _name = dataset_name
+    for species in data['datasets']:
+        for group in data['datasets'][species]:
+            for dataset_type in data['datasets'][species][group]:
+                for dataset in data['datasets'][species][group][dataset_type]:
+                    # assumes  the first is dataset_short_name
+                    short_dataset_name = next(
+                        item for item in dataset if item != "None" and item is not None)
+
+                    dataset_structure[short_dataset_name] = map_dataset_to_new_type.get(
+                        dataset_type, "MrnaTypes")
+    return dataset_structure
+
+
+def dataset_creator_store(dataset_type):
+    """function contains key value pairs for\
+    the function need to be called to create\
+    each dataset_type"""
+
+    dataset_obj = {
+        "ProbeSet": create_mrna_tissue_dataset
+    }
+
+    return dataset_obj[dataset_type]
+
+
+def create_dataset(dataset_type=None, dataset_name: str = None):
+    """function for creating new dataset  temp not implemented"""
+    if dataset_type is None:
+        dataset_type = dataset_type_getter(dataset_name)
+
+    dataset_creator = dataset_creator_store(dataset_type)
+    results = dataset_creator(
+        dataset_name=dataset_name, dataset_type=dataset_type)
+    return results
diff --git a/tests/unit/computations/test_datasets.py b/tests/unit/computations/test_datasets.py
index 408f13b..7135041 100644
--- a/tests/unit/computations/test_datasets.py
+++ b/tests/unit/computations/test_datasets.py
@@ -1,10 +1,18 @@
 """module contains tests from datasets"""
+import json
+
 from unittest import TestCase
 from unittest import mock
 
+from collections import namedtuple
+
 from gn3.computations.datasets import retrieve_trait_sample_data
 from gn3.computations.datasets import get_query_for_dataset_sample
 from gn3.computations.datasets import fetch_from_db_sample_data
+from gn3.computations.datasets import create_dataset
+from gn3.computations.datasets import dataset_creator_store
+from gn3.computations.datasets import dataset_type_getter
+from gn3.computations.datasets import fetch_dataset_type_from_gn2_api
 
 
 class TestDatasets(TestCase):
@@ -74,3 +82,78 @@ class TestDatasets(TestCase):
         fetch_results = fetch_from_db_sample_data(mock_pheno_query, database)
 
         self.assertEqual(fetch_results, database_results)
+
+    @mock.patch("gn3.computations.datasets.dataset_creator_store")
+    @mock.patch("gn3.computations.datasets.dataset_type_getter")
+    def test_create_dataset(self, mock_dataset_type, mock_store):
+        """test function that creates/fetches required dataset\
+        can either be published phenotype,genotype,Microarray or\
+        user defined ->Temp"""
+        probe_name = "HC_M2_0606_P"
+        probe_type = "ProbeSet"
+
+        mock_dataset_creator = namedtuple(
+            'ProbeSet', ["dataset_name", "dataset_type"])
+
+        mock_store.return_value = mock_dataset_creator
+        mock_dataset_type.return_value = probe_type
+        dataset = create_dataset(
+            dataset_type=None, dataset_name=probe_name)
+
+        self.assertEqual(dataset.dataset_name, probe_name)
+        self.assertEqual(dataset.dataset_type, probe_type)
+
+    def test_dataset_creator_store(self):
+        """test  for functions that actual
+        function to create differerent \
+        datasets"""
+        results = dataset_creator_store("ProbeSet")
+
+        self.assertTrue(results)
+
+    def test_dataset_type_getter(self):
+        """test for fetching type of dataset given\
+        the dataset name"""
+
+        redis_instance = mock.Mock()
+        # found in redis
+        redis_instance.get.return_value = "ProbeSet"
+        results = dataset_type_getter("HC_M2_0_P", redis_instance)
+        self.assertEqual(results, "ProbeSet")
+
+    @mock.patch("gn3.computations.datasets.requests")
+    def test_fetch_dataset_type_from_gn2_api(self, mock_request):
+        """test for function that test fetching\
+        all datasets from gn2 api in order to store\
+        in redis"""
+
+        expected_json_results = {"datasets": {
+            "arabidopsis": {
+                "BayXSha": {
+                    "Genotypes": [
+                        [
+                            "None",
+                            "BayXShaGeno",
+                            "BayXSha Genotypes"
+                        ]
+                    ],
+                    "Phenotypes": [
+                        [
+                            "642",
+                            "BayXShaPublish",
+                            "BayXSha Published Phenotypes"
+                        ]
+                    ]
+                }
+            }
+        }}
+
+        request_results = json.dumps(expected_json_results)
+        mock_request.get.return_value.content = request_results
+        results = fetch_dataset_type_from_gn2_api("HC_M2_0_P")
+        expected_results = {
+            "BayXShaGeno": "Geno",
+            "642": "Publish"
+        }
+
+        self.assertEqual(expected_results, results)