aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Kabui2021-05-02 23:54:56 +0300
committerAlexander Kabui2021-05-02 23:54:56 +0300
commit9f24b15064bcebcda9cf2164ab7b7e89644e3103 (patch)
tree7c36e4fd5b08720cfd0805352d56ca7cb1fc757b
parentf0ccff2a90d760fc0b268e715e0c6c673ff64e15 (diff)
downloadgenenetwork3-9f24b15064bcebcda9cf2164ab7b7e89644e3103.tar.gz
delete dataset and trait files
-rw-r--r--gn3/api/datasets.py64
-rw-r--r--gn3/api/traits.py53
-rw-r--r--gn3/computations/datasets.py371
-rw-r--r--gn3/computations/traits.py56
-rw-r--r--tests/integration/test_datasets.py41
-rw-r--r--tests/integration/test_traits.py72
-rw-r--r--tests/unit/computations/test_datasets.py219
-rw-r--r--tests/unit/computations/test_trait.py84
8 files changed, 0 insertions, 960 deletions
diff --git a/gn3/api/datasets.py b/gn3/api/datasets.py
deleted file mode 100644
index 2d13120..0000000
--- a/gn3/api/datasets.py
+++ /dev/null
@@ -1,64 +0,0 @@
-"""this module contains code for creating datasets"""
-from flask import Blueprint
-from flask import jsonify
-
-from gn3.computations.datasets import create_dataset
-from gn3.computations.datasets import get_traits_data
-from gn3.computations.datasets import get_probeset_trait_data
-
-from gn3.db_utils import database_connector
-
-
-dataset = Blueprint("dataset", __name__)
-
-
-@dataset.route("/create/<dataset_name>/")
-@dataset.route("/create/<dataset_name>/<dataset_type>")
-def create_dataset_api(dataset_name, dataset_type=None):
- """Endpoint of creating dataset"""
-
- new_dataset = create_dataset(
- dataset_type=dataset_type, dataset_name=dataset_name)
-
- results = {
- "dataset": new_dataset
- }
- return jsonify(results)
-
-
-@dataset.route("/fetch_traits_data/<dataset_name>/<dataset_type>")
-def fetch_traits_data(dataset_name, dataset_type):
- """Endpoint for fetching Trait data"""
- # should fetch this(temp)
- trait_sample_ids = [4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15,
- 17, 18, 19, 20, 21, 22, 24, 25, 26, 28, 29, 30, 31,
- 35, 36, 37, 39, 98, 99, 100, 103, 487, 105, 106, 110, 115,
- 116, 117, 118, 119, 120, 919, 147,
- 121, 40, 41, 124, 125, 128, 135, 129, 130, 131,
- 132, 134, 138, 139, 140, 141, 142, 144,
- 145, 148, 149, 920, 922, 2, 3, 1, 1100]
-
- conn, _cursor = database_connector()
- results = get_traits_data(sample_ids=trait_sample_ids, database_instance=conn,
- dataset_name=dataset_name, dataset_type=dataset_type)
- conn.close()
-
- return jsonify({"results": results})
-
-
-@dataset.route("/fetch_probeset_data/<dataset_name>")
-def fetch_probeset_data(dataset_name):
- """Endpoint for fetching probeset trait data"""
- trait_sample_ids = [4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15,
- 17, 18, 19, 20, 21, 22, 24, 25, 26, 28, 29, 30, 31,
- 35, 36, 37, 39, 98, 99, 100, 103, 487, 105, 106, 110, 115,
- 116, 117, 118, 119, 120, 919, 147,
- 121, 40, 41, 124, 125, 128, 135, 129, 130, 131,
- 132, 134, 138, 139, 140, 141, 142, 144,
- 145, 148, 149, 920, 922, 2, 3, 1, 1100]
-
- conn, _cursor = database_connector()
-
- results = get_probeset_trait_data(trait_sample_ids, conn, dataset_name)
-
- return jsonify({"results": results})
diff --git a/gn3/api/traits.py b/gn3/api/traits.py
deleted file mode 100644
index 002a281..0000000
--- a/gn3/api/traits.py
+++ /dev/null
@@ -1,53 +0,0 @@
-"""this module contains the all endpoints for traits"""
-from unittest import mock
-
-from flask import Blueprint
-from flask import jsonify
-from flask import request
-
-from gn3.computations.traits import fetch_trait
-from gn3.computations.traits import get_trait_info_data
-from gn3.db_utils import database_connector
-
-trait = Blueprint("trait", __name__)
-
-
-@trait.route("/<string:trait_name>/<string:dataset_name>")
-def create_trait(trait_name, dataset_name):
- """Endpoint for creating trait and fetching strain\
- values"""
-
- # xtodo replace the object at most this endpoint
- # requires dataset_type,dataset_name ,dataset_id
- trait_dataset = {
- "name": dataset_name,
- "id": 12,
- "type": "ProbeSet" # temp values
- }
- conn, _cursor = database_connector()
-
- trait_results = fetch_trait(dataset=trait_dataset,
- trait_name=trait_name,
- database=conn)
-
- conn.close()
-
- return jsonify(trait_results)
-
-
-@trait.route("/trait_info/<string:trait_name>", methods=["POST"])
-def fetch_trait_info(trait_name):
- """Api endpoint for fetching the trait info \
- expects the trait and trait dataset to have\
- been created """
- data = request.get_json()
-
- trait_dataset = data["trait_dataset"]
- trait_data = data["trait"]
- _trait_name = trait_name # should be used as key to return results
-
- database_instance = mock.Mock()
-
- results = get_trait_info_data(trait_dataset, trait_data, database_instance)
-
- return jsonify(results)
diff --git a/gn3/computations/datasets.py b/gn3/computations/datasets.py
deleted file mode 100644
index b69583e..0000000
--- a/gn3/computations/datasets.py
+++ /dev/null
@@ -1,371 +0,0 @@
-"""module contains the code all related to datasets"""
-import json
-from math import ceil
-from collections import defaultdict
-
-from typing import Optional
-from typing import List
-
-from dataclasses import dataclass
-from MySQLdb import escape_string # type: ignore
-
-import requests
-from gn3.settings import GN2_BASE_URL
-
-
-def retrieve_trait_sample_data(dataset,
- trait_name: str,
- database,
- group_species_id=None) -> List:
- """given the dataset id and trait_name fetch the\
- sample_name,value from the dataset"""
-
- # should pass the db as arg all do a setup
-
- (dataset_name, dataset_id, dataset_type) = (dataset.get("name"), dataset.get(
- "id"), dataset.get("type"))
-
- dataset_query = get_query_for_dataset_sample(dataset_type)
- results = []
- sample_query_values = {
- "Publish": (trait_name, dataset_id),
- "Geno": (group_species_id, trait_name, dataset_name),
- "ProbeSet": (trait_name, dataset_name)
- }
-
- if dataset_query:
- formatted_query = dataset_query % sample_query_values[dataset_type]
-
- results = fetch_from_db_sample_data(formatted_query, database)
-
- return results
-
-
-def fetch_from_db_sample_data(formatted_query: str, database_instance) -> List:
- """this is the function that does the actual fetching of\
- results from the database"""
- try:
- cursor = database_instance.cursor()
- cursor.execute(formatted_query)
- results = cursor.fetchall()
-
- except Exception as error:
- raise error
-
- cursor.close()
-
- return results
-
-
-def get_query_for_dataset_sample(dataset_type) -> Optional[str]:
- """this functions contains querys for\
- getting sample data from the db depending in
- dataset"""
- dataset_query = {}
-
- pheno_query = """
- SELECT
- Strain.Name, PublishData.value, PublishSE.error,NStrain.count, Strain.Name2
- FROM
- (PublishData, Strain, PublishXRef, PublishFreeze)
- left join PublishSE on
- (PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId)
- left join NStrain on
- (NStrain.DataId = PublishData.Id AND
- NStrain.StrainId = PublishData.StrainId)
- WHERE
- PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
- PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = %s AND
- PublishFreeze.Id = %s AND PublishData.StrainId = Strain.Id
- Order BY
- Strain.Name
- """
- geno_query = """
- SELECT
- Strain.Name, GenoData.value, GenoSE.error, "N/A", Strain.Name2
- FROM
- (GenoData, GenoFreeze, Strain, Geno, GenoXRef)
- left join GenoSE on
- (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId)
- WHERE
- Geno.SpeciesId = %s AND Geno.Name = %s AND GenoXRef.GenoId = Geno.Id AND
- GenoXRef.GenoFreezeId = GenoFreeze.Id AND
- GenoFreeze.Name = %s AND
- GenoXRef.DataId = GenoData.Id AND
- GenoData.StrainId = Strain.Id
- Order BY
- Strain.Name
- """
-
- probeset_query = """
- SELECT
- Strain.Name, ProbeSetData.value, ProbeSetSE.error, NStrain.count, Strain.Name2
- FROM
- (ProbeSetData, ProbeSetFreeze,
- Strain, ProbeSet, ProbeSetXRef)
- left join ProbeSetSE on
- (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
- left join NStrain on
- (NStrain.DataId = ProbeSetData.Id AND
- NStrain.StrainId = ProbeSetData.StrainId)
- WHERE
- ProbeSet.Name = '%s' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
- ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
- ProbeSetFreeze.Name = '%s' AND
- ProbeSetXRef.DataId = ProbeSetData.Id AND
- ProbeSetData.StrainId = Strain.Id
- Order BY
- Strain.Name
- """
-
- dataset_query["Publish"] = pheno_query
- dataset_query["Geno"] = geno_query
- dataset_query["ProbeSet"] = probeset_query
-
- return dataset_query.get(dataset_type)
-
-
-@dataclass
-class Dataset:
- """class for creating datasets"""
- name: Optional[str] = None
- dataset_type: Optional[str] = None
- dataset_id: int = -1
-
-
-def create_mrna_tissue_dataset(dataset_name, dataset_type):
- """an mrna assay is a quantitative assessment(assay) associated\
- with an mrna trait.This used to be called probeset,but that term\
- only referes specifically to the afffymetrix platform and is\
- far too speficified"""
-
- return Dataset(name=dataset_name, dataset_type=dataset_type)
-
-
-def dataset_type_getter(dataset_name, redis_instance=None) -> Optional[str]:
- """given the dataset name fetch the type\
- of the dataset this in turn enables fetching\
- the creation of the correct object could utilize\
- redis for the case"""
-
- results = redis_instance.get(dataset_name, None)
-
- if results:
- return results
-
- return fetch_dataset_type_from_gn2_api(dataset_name)
-
-
-def fetch_dataset_type_from_gn2_api(dataset_name):
- """this function is only called when the\
- the redis is empty and does have the specificied\
- dataset_type"""
- # should only run once
-
- dataset_structure = {}
-
- map_dataset_to_new_type = {
- "Phenotypes": "Publish",
- "Genotypes": "Geno",
- "MrnaTypes": "ProbeSet"
- }
-
- data = json.loads(requests.get(
- GN2_BASE_URL + "/api/v_pre1/gen_dropdown", timeout=5).content)
- _name = dataset_name
- for species in data['datasets']:
- for group in data['datasets'][species]:
- for dataset_type in data['datasets'][species][group]:
- for dataset in data['datasets'][species][group][dataset_type]:
- # assumes the first is dataset_short_name
- short_dataset_name = next(
- item for item in dataset if item != "None" and item is not None)
-
- dataset_structure[short_dataset_name] = map_dataset_to_new_type.get(
- dataset_type, "MrnaTypes")
- return dataset_structure
-
-
-def dataset_creator_store(dataset_type):
- """function contains key value pairs for\
- the function need to be called to create\
- each dataset_type"""
-
- dataset_obj = {
- "ProbeSet": create_mrna_tissue_dataset
- }
-
- return dataset_obj[dataset_type]
-
-
-def create_dataset(dataset_type=None, dataset_name: str = None):
- """function for creating new dataset temp not implemented"""
- if dataset_type is None:
- dataset_type = dataset_type_getter(dataset_name)
-
- dataset_creator = dataset_creator_store(dataset_type)
- results = dataset_creator(
- dataset_name=dataset_name, dataset_type=dataset_type)
- return results
-
-
-def fetch_dataset_sample_id(samplelist: List, database, species: str) -> dict:
- """fetch the strain ids from the db only if\
- it is in the samplelist"""
- # xtodo create an in clause for samplelist
-
- strain_query = """
- SELECT Strain.Name, Strain.Id FROM Strain, Species
- WHERE Strain.Name IN {}
- and Strain.SpeciesId=Species.Id
- and Species.name = '{}'
- """
-
- database_cursor = database.cursor()
- database_cursor.execute(strain_query.format(samplelist, species))
-
- results = database_cursor.fetchall()
-
- return dict(results)
-
-
-def divide_into_chunks(the_list, number_chunks):
- """Divides a list into approximately number_chunks
- >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 3)
- [[1, 2, 7], [3, 22, 8], [5, 22, 333]]"""
-
- length = len(the_list)
- if length == 0:
- return [[]]
-
- if length <= number_chunks:
- number_chunks = length
- chunk_size = int(ceil(length/number_chunks))
- chunks = []
-
- for counter in range(0, length, chunk_size):
- chunks.append(the_list[counter:counter+chunk_size])
- return chunks
-
-
-def escape(string_):
- """function escape sql value"""
- return escape_string(string_).decode('utf8')
-
-
-def mescape(*items) -> List:
- """multiple escape for query values"""
-
- return [escape_string(str(item)).decode('utf8') for item in items]
-
-
-def get_traits_data(sample_ids, database_instance, dataset_name, dataset_type):
- """function to fetch trait data"""
- # MySQL limits the number of tables that can be used in a join to 61,
- # so we break the sample ids into smaller chunks
- # Postgres doesn't have that limit, so we can get rid of this after we transition
-
- _trait_data = defaultdict(list)
- chunk_size = 61
- number_chunks = int(ceil(len(sample_ids) / chunk_size))
- for sample_ids_step in divide_into_chunks(sample_ids, number_chunks):
- if dataset_type == "Publish":
- full_dataset_type = "Phenotype"
- else:
- full_dataset_type = dataset_type
- temp = ['T%s.value' % item for item in sample_ids_step]
-
- if dataset_type == "Publish":
- query = "SELECT {}XRef.Id,".format(escape(dataset_type))
-
- else:
- query = "SELECT {}.Name,".format(escape(full_dataset_type))
-
- query += ', '.join(temp)
- query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(full_dataset_type,
- dataset_type,
- dataset_type))
- for item in sample_ids_step:
-
- query += """
- left join {}Data as T{} on T{}.Id = {}XRef.DataId
- and T{}.StrainId={}\n
- """.format(*mescape(dataset_type, item,
- item, dataset_type, item, item))
-
- if dataset_type == "Publish":
- query += """
- WHERE {}XRef.{}FreezeId = {}Freeze.Id
- and {}Freeze.Name = '{}'
- and {}.Id = {}XRef.{}Id
- order by {}.Id
- """.format(*mescape(dataset_type, dataset_type,
- dataset_type, dataset_type,
- dataset_name, full_dataset_type,
- dataset_type, dataset_type,
- full_dataset_type))
-
- else:
- query += """
- WHERE {}XRef.{}FreezeId = {}Freeze.Id
- and {}Freeze.Name = '{}'
- and {}.Id = {}XRef.{}Id
- order by {}.Id
- """.format(*mescape(dataset_type, dataset_type,
- dataset_type, dataset_type,
- dataset_name, dataset_type,
- dataset_type, dataset_type,
- full_dataset_type))
-
- # print(query)
-
- _results = fetch_from_db_sample_data(query, database_instance)
-
- return []
-
-
-def get_probeset_trait_data(strain_ids: List, conn, dataset_name) -> dict:
- """function for getting trait data\
- for probeset data type similar to\
- get trait data only difference is that\
- it uses sub queries"""
-
- trait_data: dict = {}
-
- trait_id_name = {}
-
- traits_query = """
- SELECT ProbeSetXRef.DataId,ProbeSet.Name FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze)
- WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id
- and ProbeSetFreeze.Name = '{}'
- and ProbeSet.Id = ProbeSetXRef.ProbeSetId
- order by ProbeSet.Id
- """.format(dataset_name)
-
- query = """
- SELECT * from ProbeSetData
- where StrainID in ({})
- and id in (SELECT ProbeSetXRef.DataId FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze)
- WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id
- and ProbeSetFreeze.Name = '{}'
- and ProbeSet.Id = ProbeSetXRef.ProbeSetId
- order by ProbeSet.Id)
- """.format(",".join(str(strain_id) for strain_id in strain_ids), dataset_name)
-
- with conn:
- cursor = conn.cursor()
- cursor.execute(query)
- _results = cursor.fetchall()
- cursor.execute(traits_query)
- trait_id_name = dict(cursor.fetchall())
-
- for trait_id, _strain_id, strain_value in _results:
- trait_name = trait_id_name[trait_id]
- if trait_data.get(trait_name):
- trait_data[trait_name].append(strain_value)
- else:
- trait_data[trait_name] = []
-
- trait_data[trait_name].append(strain_value)
-
- return trait_data
diff --git a/gn3/computations/traits.py b/gn3/computations/traits.py
deleted file mode 100644
index 1aa2970..0000000
--- a/gn3/computations/traits.py
+++ /dev/null
@@ -1,56 +0,0 @@
-"""module contains all operating related to traits"""
-from gn3.computations.datasets import retrieve_trait_sample_data
-
-
-def fetch_trait(dataset, trait_name: str, database) -> dict:
- """this method creates a trait by\
- fetching required data given the\
- dataset and trait_name"""
-
- created_trait = {
- "dataset": dataset,
- "trait_name": trait_name
- }
-
- trait_data = get_trait_sample_data(dataset, trait_name, database)
-
- created_trait["trait_data"] = trait_data
-
- return created_trait
-
-
-def get_trait_sample_data(trait_dataset, trait_name, database) -> dict:
- """first try to fetch the traits sample data from redis if that\
- try to fetch from the traits dataset redis is only used for\
- temp dataset type which is not used in this case """
-
- sample_results = retrieve_trait_sample_data(
- trait_dataset, trait_name, database)
-
- trait_data = {}
-
- for (name, sample_value, _variance, _numcase, _name2) in sample_results:
-
- trait_data[name] = sample_value
- return trait_data
-
-
-def get_trait_info_data(trait_dataset,
- trait_name: str,
- database_instance,
- get_qtl_info: bool = False) -> dict:
- """given a dataset and trait_name return a dict containing all info\
- regarding the get trait"""
-
- _temp_var_holder = (trait_dataset, trait_name,
- database_instance, get_qtl_info)
- trait_info_data = {
- "description": "",
- "chr": "",
- "locus": "",
- "mb": "",
- "abbreviation": "",
- "trait_display_name": ""
-
- }
- return trait_info_data
diff --git a/tests/integration/test_datasets.py b/tests/integration/test_datasets.py
deleted file mode 100644
index f97d970..0000000
--- a/tests/integration/test_datasets.py
+++ /dev/null
@@ -1,41 +0,0 @@
-"""This module contains integration tests for datasets"""
-from unittest import TestCase
-from unittest import mock
-
-from collections import namedtuple
-from gn3.app import create_app
-
-
-class DatasetIntegrationTests(TestCase):
- """class contains integration tests for datasets"""
-
- def setUp(self):
- self.app = create_app().test_client()
-
- @mock.patch("gn3.api.datasets.create_dataset")
- def test_create_dataset(self, mock_dataset):
- """Test for creating dataset object"""
- mock_dataset_creator = namedtuple(
- 'ProbeSet', ["dataset_name", "dataset_type"])
- new_dataset = mock_dataset_creator("HC_M2_0606_P", "ProbeSet")
- mock_dataset.return_value = new_dataset
- response = self.app.get(
- "/api/dataset/create/HC_M2_0606_P/", follow_redirects=True)
- mock_dataset.assert_called_once_with(
- dataset_type=None, dataset_name="HC_M2_0606_P")
- results = response.get_json()["dataset"]
- self.assertEqual(results[1], "ProbeSet")
- self.assertEqual(response.status_code, 200)
-
- @mock.patch("gn3.api.datasets.get_traits_data")
- @mock.patch("gn3.api.datasets.database_connector")
- def test_fetch_traits_data(self, mock_db, mock_get_trait_data):
- """Test api/dataset/fetch_traits_data/d_name/d_type"""
-
- mock_get_trait_data.return_value = {}
- mock_db.return_value = (mock.Mock(), mock.Mock())
- response = self.app.get(
- "/api/dataset/fetch_traits_data/Aging-Brain-UCIPublish/Publish", follow_redirects=True)
-
- self.assertEqual(response.status_code, 200)
- self.assertEqual(response.get_json(), {"results": {}})
diff --git a/tests/integration/test_traits.py b/tests/integration/test_traits.py
deleted file mode 100644
index 410ba22..0000000
--- a/tests/integration/test_traits.py
+++ /dev/null
@@ -1,72 +0,0 @@
-"""module contains integration tests for trait endpoints"""
-from unittest import TestCase
-from unittest import mock
-
-from gn3.app import create_app
-
-
-class TraitIntegrationTest(TestCase):
- """class contains integration tests for\
- traits"""
-
- def setUp(self):
- self.app = create_app().test_client()
-
- @mock.patch("gn3.api.traits.fetch_trait")
- @mock.patch("gn3.api.traits.database_connector")
- def test_create_trait(self, mock_database, mock_fetch_trait):
- """test the endpoint for creating traits\
- endpoint requires trait name and dataset name"""
- mock_database.return_value = (mock.Mock(), mock.Mock())
- trait_results = {
- "dataset": None,
- "trait_name": "1449593_at",
- "trait_data": {
- "BXD11": 8.464,
- "BXD12": 8.414,
- "BXD13": 8.753,
- "BXD15": 8.5,
- "BXD16": 8.832
- }
-
- }
- mock_fetch_trait.return_value = trait_results
-
- results = self.app.get(
- "/api/trait/1449593_at/HC_M2_0606_P", follow_redirects=True)
-
- trait_data = results.get_json()
-
- self.assertEqual(mock_database.call_count, 1)
- self.assertEqual(results.status_code, 200)
- self.assertEqual(trait_data, trait_results)
-
- @mock.patch("gn3.api.traits.get_trait_info_data")
- def test_retrieve_trait_info(self, mock_get_trait_info):
- """integration test for endpoints for retrieving\
- trait info expects the dataset of trait to have been
- created"""
-
- trait_post_data = {
- "trait": {"trait_name": ""},
- "trait_dataset": {"dataset_name": ""}
- }
-
- expected_api_results = {
- "description": "trait description",
- "chr": "",
- "locus": "",
- "mb": "",
- "abbreviation": "trait_abbreviation",
- "trait_display_name": "trait_name"
-
- }
- mock_get_trait_info.return_value = expected_api_results
-
- trait_info = self.app.post(
- "/api/trait/trait_info/144_at", json=trait_post_data, follow_redirects=True)
-
- trait_info_results = trait_info.get_json()
-
- self.assertEqual(trait_info.status_code, 200)
- self.assertEqual(trait_info_results, expected_api_results)
diff --git a/tests/unit/computations/test_datasets.py b/tests/unit/computations/test_datasets.py
deleted file mode 100644
index f9e9c2b..0000000
--- a/tests/unit/computations/test_datasets.py
+++ /dev/null
@@ -1,219 +0,0 @@
-"""Module contains tests from datasets"""
-import json
-
-from unittest import TestCase
-from unittest import mock
-
-from collections import namedtuple
-
-from gn3.computations.datasets import retrieve_trait_sample_data
-from gn3.computations.datasets import get_query_for_dataset_sample
-from gn3.computations.datasets import fetch_from_db_sample_data
-from gn3.computations.datasets import create_dataset
-from gn3.computations.datasets import dataset_creator_store
-from gn3.computations.datasets import dataset_type_getter
-from gn3.computations.datasets import fetch_dataset_type_from_gn2_api
-from gn3.computations.datasets import fetch_dataset_sample_id
-from gn3.computations.datasets import divide_into_chunks
-from gn3.computations.datasets import get_traits_data
-
-
-class TestDatasets(TestCase):
- """Class contains tests for datasets"""
-
- @mock.patch("gn3.computations.datasets.fetch_from_db_sample_data")
- def test_retrieve_trait_sample_data(self, mock_fetch_sample_results):
- """Test retrieving sample data\
- for trait from the dataset
- """
- trait_name = "1419792_at"
- dataset_id = "HC_M2_0606_P&"
- dataset_type = "Publish"
-
- database = mock.Mock()
-
- dataset = {
- "id": dataset_id,
- "type": dataset_type,
- "name": dataset_id
- }
-
- fetch_results = [('BXD32', 8.001, None, None, 'BXD32')]
-
- mock_fetch_sample_results.return_value = fetch_results
-
- results = retrieve_trait_sample_data(
- dataset, trait_name, database)
- self.assertEqual(mock_fetch_sample_results.call_count, 1)
- self.assertEqual(results, fetch_results)
-
- def test_query_for_dataset_sample(self):
- """Test for getting query for sample data"""
-
- no_results = get_query_for_dataset_sample("does not exists")
-
- query_exists = get_query_for_dataset_sample("Publish")
-
- self.assertEqual(no_results, None)
- self.assertIsInstance(query_exists, str)
-
- def test_fetch_from_db_sample_data(self):
- """Test for function that fetches sample\
- results from the database
- """
-
- database_results = [('BXD31', 8.001, None, None, 'BXD31'),
- ('BXD32', 7.884, None, None, 'BXD32'),
- ('BXD42', 7.682, None, None, 'BXD42'),
- ('BXD42', 7.682, None, None, 'BXD42'),
- ('BXD40', 7.945, None, None, 'BXD40'),
- ('BXD43', 7.873, None, None, 'BXD43')
- ]
-
- database = mock.Mock()
- db_cursor = mock.Mock()
- db_cursor.execute.return_value = 6
- db_cursor.fetchall.return_value = database_results
- database.cursor.return_value = db_cursor
-
- mock_pheno_query = """
- SELECT
- Strain.Name, PublishData.value, PublishSE.error,NStrain.count, Strain.Name2
- WHERE
- PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
- PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = 1419792_at AND
- PublishFreeze.Id = '12' AND PublishData.StrainId = Strain.Id
- Order BY
- Strain.Name
- """
- fetch_results = fetch_from_db_sample_data(mock_pheno_query, database)
-
- self.assertEqual(fetch_results, database_results)
-
- @mock.patch("gn3.computations.datasets.dataset_creator_store")
- @mock.patch("gn3.computations.datasets.dataset_type_getter")
- def test_create_dataset(self, mock_dataset_type, mock_store):
- """Test function that creates/fetches required dataset\
- can either be published phenotype,genotype,Microarray or\
- user defined ->Temp
- """
- probe_name = "HC_M2_0606_P"
- probe_type = "ProbeSet"
-
- mock_dataset_creator = namedtuple(
- 'ProbeSet', ["dataset_name", "dataset_type"])
-
- mock_store.return_value = mock_dataset_creator
- mock_dataset_type.return_value = probe_type
- dataset = create_dataset(
- dataset_type=None, dataset_name=probe_name)
-
- self.assertEqual(dataset.dataset_name, probe_name)
- self.assertEqual(dataset.dataset_type, probe_type)
-
- def test_dataset_creator_store(self):
- """Test for functions that actual
- function to create differerent \
- datasets
- """
- results = dataset_creator_store("ProbeSet")
-
- self.assertTrue(results)
-
- def test_dataset_type_getter(self):
- """Test for fetching type of dataset given\
- the dataset name
- """
-
- redis_instance = mock.Mock()
- # fetched in redis
- redis_instance.get.return_value = "ProbeSet"
- results = dataset_type_getter("HC_M2_0_P", redis_instance)
- self.assertEqual(results, "ProbeSet")
-
- @mock.patch("gn3.computations.datasets.requests")
- def test_fetch_dataset_type_from_gn2_api(self, mock_request):
- """Test for function that test fetching\
- all datasets from gn2 api in order to store\
- in redis
- """
-
- expected_json_results = {"datasets": {
- "arabidopsis": {
- "BayXSha": {
- "Genotypes": [
- [
- "None",
- "BayXShaGeno",
- "BayXSha Genotypes"
- ]
- ],
- "Phenotypes": [
- [
- "642",
- "BayXShaPublish",
- "BayXSha Published Phenotypes"
- ]
- ]
- }
- }
- }}
-
- request_results = json.dumps(expected_json_results)
- mock_request.get.return_value.content = request_results
- results = fetch_dataset_type_from_gn2_api("HC_M2_0_P")
- expected_results = {
- "BayXShaGeno": "Geno",
- "642": "Publish"
- }
-
- self.assertEqual(expected_results, results)
-
- def test_fetch_dataset_sample_id(self):
- """Get from the database the sample\
- id if only in the samplelists
- """
-
- expected_results = {"B6D2F1": 1, "BXD1": 4, "BXD11": 10,
- "BXD12": 11, "BXD13": 12, "BXD15": 14, "BXD16": 15}
-
- database_instance = mock.Mock()
- database_cursor = mock.Mock()
-
- database_cursor.execute.return_value = 5
- database_cursor.fetchall.return_value = list(expected_results.items())
- database_instance.cursor.return_value = database_cursor
- strain_list = ["B6D2F1", "BXD1", "BXD11",
- "BXD12", "BXD13", "BXD16", "BXD15"]
-
- results = fetch_dataset_sample_id(
- samplelist=strain_list, database=database_instance, species="mouse")
-
- self.assertEqual(results, expected_results)
-
- @mock.patch("gn3.computations.datasets.fetch_from_db_sample_data")
- @mock.patch("gn3.computations.datasets.divide_into_chunks")
- def test_get_traits_data(self, mock_divide_into_chunks, mock_fetch_samples):
- """Test for for function to get data\
- of traits in dataset
- """
- _expected_results = {'AT_DSAFDS': [
- 12, 14, 13, 23, 12, 14, 13, 23, 12, 14, 13, 23]}
- database = mock.Mock()
- sample_id = [1, 2, 7, 3, 22, 8]
- mock_divide_into_chunks.return_value = [
- [1, 2, 7], [3, 22, 8], [5, 22, 333]]
- mock_fetch_samples.return_value = ("AT_DSAFDS", 12, 14, 13, 23)
- results = get_traits_data(sample_id, database, "HC_M2", "Publish")
-
- self.assertEqual({}, dict(results))
-
- def test_divide_into_chunks(self):
- """Test for dividing a list into given number of\
- chunks for example
- """
- results = divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 3)
-
- expected_results = [[1, 2, 7], [3, 22, 8], [5, 22, 333]]
-
- self.assertEqual(results, expected_results)
diff --git a/tests/unit/computations/test_trait.py b/tests/unit/computations/test_trait.py
deleted file mode 100644
index feb97c6..0000000
--- a/tests/unit/computations/test_trait.py
+++ /dev/null
@@ -1,84 +0,0 @@
-"""Module contains tests for creating traits"""
-from unittest import TestCase
-from unittest import mock
-
-from gn3.computations.traits import fetch_trait
-from gn3.computations.traits import get_trait_sample_data
-from gn3.computations.traits import get_trait_info_data
-
-
-class TestTrait(TestCase):
- """Class contains tests for creating traits"""
-
- @mock.patch("gn3.computations.traits.get_trait_sample_data")
- def test_fetch_trait(self, get_sample_data):
- """Test for creating/fetching trait"""
-
- expected_sample_data = {
- "A/Y": 12.3,
- "WQC": 11.1
- }
-
- database = mock.Mock()
-
- get_sample_data.return_value = expected_sample_data
-
- expected_trait = {
- "trait_name": "AXFDSF_AT",
- "dataset": None,
- "trait_data": expected_sample_data
- }
- results = fetch_trait(dataset=None,
- trait_name="AXFDSF_AT",
- database=database)
-
- self.assertEqual(results, expected_trait)
- self.assertEqual(get_sample_data.call_count, 1)
-
- @mock.patch("gn3.computations.traits.retrieve_trait_sample_data")
- def test_get_trait_sample_data(self, mock_retrieve_sample_data):
- """Test for getting sample data from either\
- the trait's dataset or form redis
- """
-
- trait_dataset = mock.Mock()
- dataset_trait_sample_data = [
- ('129S1/SvImJ', 7.433, None, None, '129S1/SvImJ'),
- ('A/J', 7.596, None, None, 'A/J'),
- ('AKR/J', 7.774, None, None, 'AKR/J'),
- ('B6D2F1', 7.707, None, None, 'B6D2F1')]
- mock_retrieve_sample_data.return_value = dataset_trait_sample_data
-
- trait_name = "1426679_at"
-
- database = mock.Mock()
-
- results = get_trait_sample_data(
- trait_dataset, trait_name, database)
-
- expected_results = {
- "129S1/SvImJ": 7.433,
- "A/J": 7.596,
- "AKR/J": 7.774,
- "B6D2F1": 7.707
- }
-
- self.assertEqual(results, expected_results)
-
- def test_get_trait_info_data(self):
- """Test for getting info data related\
- to trait
- """
-
- results = get_trait_info_data(
- trait_name="AXSF_AT", trait_dataset=mock.Mock(), database_instance=None)
- expected_trait_info = {
- "description": "",
- "trait_display_name": "",
- "abbreviation": "",
- "chr": "",
- "mb": "",
- "locus": ""
- }
-
- self.assertEqual(results, expected_trait_info)