diff options
-rw-r--r-- | gn3/api/datasets.py | 64 | ||||
-rw-r--r-- | gn3/api/traits.py | 53 | ||||
-rw-r--r-- | gn3/computations/datasets.py | 371 | ||||
-rw-r--r-- | gn3/computations/traits.py | 56 | ||||
-rw-r--r-- | tests/integration/test_datasets.py | 41 | ||||
-rw-r--r-- | tests/integration/test_traits.py | 72 | ||||
-rw-r--r-- | tests/unit/computations/test_datasets.py | 219 | ||||
-rw-r--r-- | tests/unit/computations/test_trait.py | 84 |
8 files changed, 0 insertions, 960 deletions
diff --git a/gn3/api/datasets.py b/gn3/api/datasets.py deleted file mode 100644 index 2d13120..0000000 --- a/gn3/api/datasets.py +++ /dev/null @@ -1,64 +0,0 @@ -"""this module contains code for creating datasets""" -from flask import Blueprint -from flask import jsonify - -from gn3.computations.datasets import create_dataset -from gn3.computations.datasets import get_traits_data -from gn3.computations.datasets import get_probeset_trait_data - -from gn3.db_utils import database_connector - - -dataset = Blueprint("dataset", __name__) - - -@dataset.route("/create/<dataset_name>/") -@dataset.route("/create/<dataset_name>/<dataset_type>") -def create_dataset_api(dataset_name, dataset_type=None): - """Endpoint of creating dataset""" - - new_dataset = create_dataset( - dataset_type=dataset_type, dataset_name=dataset_name) - - results = { - "dataset": new_dataset - } - return jsonify(results) - - -@dataset.route("/fetch_traits_data/<dataset_name>/<dataset_type>") -def fetch_traits_data(dataset_name, dataset_type): - """Endpoint for fetching Trait data""" - # should fetch this(temp) - trait_sample_ids = [4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, - 17, 18, 19, 20, 21, 22, 24, 25, 26, 28, 29, 30, 31, - 35, 36, 37, 39, 98, 99, 100, 103, 487, 105, 106, 110, 115, - 116, 117, 118, 119, 120, 919, 147, - 121, 40, 41, 124, 125, 128, 135, 129, 130, 131, - 132, 134, 138, 139, 140, 141, 142, 144, - 145, 148, 149, 920, 922, 2, 3, 1, 1100] - - conn, _cursor = database_connector() - results = get_traits_data(sample_ids=trait_sample_ids, database_instance=conn, - dataset_name=dataset_name, dataset_type=dataset_type) - conn.close() - - return jsonify({"results": results}) - - -@dataset.route("/fetch_probeset_data/<dataset_name>") -def fetch_probeset_data(dataset_name): - """Endpoint for fetching probeset trait data""" - trait_sample_ids = [4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, - 17, 18, 19, 20, 21, 22, 24, 25, 26, 28, 29, 30, 31, - 35, 36, 37, 39, 98, 99, 100, 103, 487, 105, 106, 110, 115, - 116, 117, 118, 119, 120, 919, 147, - 121, 40, 41, 124, 125, 128, 135, 129, 130, 131, - 132, 134, 138, 139, 140, 141, 142, 144, - 145, 148, 149, 920, 922, 2, 3, 1, 1100] - - conn, _cursor = database_connector() - - results = get_probeset_trait_data(trait_sample_ids, conn, dataset_name) - - return jsonify({"results": results}) diff --git a/gn3/api/traits.py b/gn3/api/traits.py deleted file mode 100644 index 002a281..0000000 --- a/gn3/api/traits.py +++ /dev/null @@ -1,53 +0,0 @@ -"""this module contains the all endpoints for traits""" -from unittest import mock - -from flask import Blueprint -from flask import jsonify -from flask import request - -from gn3.computations.traits import fetch_trait -from gn3.computations.traits import get_trait_info_data -from gn3.db_utils import database_connector - -trait = Blueprint("trait", __name__) - - -@trait.route("/<string:trait_name>/<string:dataset_name>") -def create_trait(trait_name, dataset_name): - """Endpoint for creating trait and fetching strain\ - values""" - - # xtodo replace the object at most this endpoint - # requires dataset_type,dataset_name ,dataset_id - trait_dataset = { - "name": dataset_name, - "id": 12, - "type": "ProbeSet" # temp values - } - conn, _cursor = database_connector() - - trait_results = fetch_trait(dataset=trait_dataset, - trait_name=trait_name, - database=conn) - - conn.close() - - return jsonify(trait_results) - - -@trait.route("/trait_info/<string:trait_name>", methods=["POST"]) -def fetch_trait_info(trait_name): - """Api endpoint for fetching the trait info \ - expects the trait and trait dataset to have\ - been created """ - data = request.get_json() - - trait_dataset = data["trait_dataset"] - trait_data = data["trait"] - _trait_name = trait_name # should be used as key to return results - - database_instance = mock.Mock() - - results = get_trait_info_data(trait_dataset, trait_data, database_instance) - - return jsonify(results) diff --git a/gn3/computations/datasets.py b/gn3/computations/datasets.py deleted file mode 100644 index b69583e..0000000 --- a/gn3/computations/datasets.py +++ /dev/null @@ -1,371 +0,0 @@ -"""module contains the code all related to datasets""" -import json -from math import ceil -from collections import defaultdict - -from typing import Optional -from typing import List - -from dataclasses import dataclass -from MySQLdb import escape_string # type: ignore - -import requests -from gn3.settings import GN2_BASE_URL - - -def retrieve_trait_sample_data(dataset, - trait_name: str, - database, - group_species_id=None) -> List: - """given the dataset id and trait_name fetch the\ - sample_name,value from the dataset""" - - # should pass the db as arg all do a setup - - (dataset_name, dataset_id, dataset_type) = (dataset.get("name"), dataset.get( - "id"), dataset.get("type")) - - dataset_query = get_query_for_dataset_sample(dataset_type) - results = [] - sample_query_values = { - "Publish": (trait_name, dataset_id), - "Geno": (group_species_id, trait_name, dataset_name), - "ProbeSet": (trait_name, dataset_name) - } - - if dataset_query: - formatted_query = dataset_query % sample_query_values[dataset_type] - - results = fetch_from_db_sample_data(formatted_query, database) - - return results - - -def fetch_from_db_sample_data(formatted_query: str, database_instance) -> List: - """this is the function that does the actual fetching of\ - results from the database""" - try: - cursor = database_instance.cursor() - cursor.execute(formatted_query) - results = cursor.fetchall() - - except Exception as error: - raise error - - cursor.close() - - return results - - -def get_query_for_dataset_sample(dataset_type) -> Optional[str]: - """this functions contains querys for\ - getting sample data from the db depending in - dataset""" - dataset_query = {} - - pheno_query = """ - SELECT - Strain.Name, PublishData.value, PublishSE.error,NStrain.count, Strain.Name2 - FROM - (PublishData, Strain, PublishXRef, PublishFreeze) - left join PublishSE on - (PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId) - left join NStrain on - (NStrain.DataId = PublishData.Id AND - NStrain.StrainId = PublishData.StrainId) - WHERE - PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND - PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = %s AND - PublishFreeze.Id = %s AND PublishData.StrainId = Strain.Id - Order BY - Strain.Name - """ - geno_query = """ - SELECT - Strain.Name, GenoData.value, GenoSE.error, "N/A", Strain.Name2 - FROM - (GenoData, GenoFreeze, Strain, Geno, GenoXRef) - left join GenoSE on - (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId) - WHERE - Geno.SpeciesId = %s AND Geno.Name = %s AND GenoXRef.GenoId = Geno.Id AND - GenoXRef.GenoFreezeId = GenoFreeze.Id AND - GenoFreeze.Name = %s AND - GenoXRef.DataId = GenoData.Id AND - GenoData.StrainId = Strain.Id - Order BY - Strain.Name - """ - - probeset_query = """ - SELECT - Strain.Name, ProbeSetData.value, ProbeSetSE.error, NStrain.count, Strain.Name2 - FROM - (ProbeSetData, ProbeSetFreeze, - Strain, ProbeSet, ProbeSetXRef) - left join ProbeSetSE on - (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId) - left join NStrain on - (NStrain.DataId = ProbeSetData.Id AND - NStrain.StrainId = ProbeSetData.StrainId) - WHERE - ProbeSet.Name = '%s' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND - ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND - ProbeSetFreeze.Name = '%s' AND - ProbeSetXRef.DataId = ProbeSetData.Id AND - ProbeSetData.StrainId = Strain.Id - Order BY - Strain.Name - """ - - dataset_query["Publish"] = pheno_query - dataset_query["Geno"] = geno_query - dataset_query["ProbeSet"] = probeset_query - - return dataset_query.get(dataset_type) - - -@dataclass -class Dataset: - """class for creating datasets""" - name: Optional[str] = None - dataset_type: Optional[str] = None - dataset_id: int = -1 - - -def create_mrna_tissue_dataset(dataset_name, dataset_type): - """an mrna assay is a quantitative assessment(assay) associated\ - with an mrna trait.This used to be called probeset,but that term\ - only referes specifically to the afffymetrix platform and is\ - far too speficified""" - - return Dataset(name=dataset_name, dataset_type=dataset_type) - - -def dataset_type_getter(dataset_name, redis_instance=None) -> Optional[str]: - """given the dataset name fetch the type\ - of the dataset this in turn enables fetching\ - the creation of the correct object could utilize\ - redis for the case""" - - results = redis_instance.get(dataset_name, None) - - if results: - return results - - return fetch_dataset_type_from_gn2_api(dataset_name) - - -def fetch_dataset_type_from_gn2_api(dataset_name): - """this function is only called when the\ - the redis is empty and does have the specificied\ - dataset_type""" - # should only run once - - dataset_structure = {} - - map_dataset_to_new_type = { - "Phenotypes": "Publish", - "Genotypes": "Geno", - "MrnaTypes": "ProbeSet" - } - - data = json.loads(requests.get( - GN2_BASE_URL + "/api/v_pre1/gen_dropdown", timeout=5).content) - _name = dataset_name - for species in data['datasets']: - for group in data['datasets'][species]: - for dataset_type in data['datasets'][species][group]: - for dataset in data['datasets'][species][group][dataset_type]: - # assumes the first is dataset_short_name - short_dataset_name = next( - item for item in dataset if item != "None" and item is not None) - - dataset_structure[short_dataset_name] = map_dataset_to_new_type.get( - dataset_type, "MrnaTypes") - return dataset_structure - - -def dataset_creator_store(dataset_type): - """function contains key value pairs for\ - the function need to be called to create\ - each dataset_type""" - - dataset_obj = { - "ProbeSet": create_mrna_tissue_dataset - } - - return dataset_obj[dataset_type] - - -def create_dataset(dataset_type=None, dataset_name: str = None): - """function for creating new dataset temp not implemented""" - if dataset_type is None: - dataset_type = dataset_type_getter(dataset_name) - - dataset_creator = dataset_creator_store(dataset_type) - results = dataset_creator( - dataset_name=dataset_name, dataset_type=dataset_type) - return results - - -def fetch_dataset_sample_id(samplelist: List, database, species: str) -> dict: - """fetch the strain ids from the db only if\ - it is in the samplelist""" - # xtodo create an in clause for samplelist - - strain_query = """ - SELECT Strain.Name, Strain.Id FROM Strain, Species - WHERE Strain.Name IN {} - and Strain.SpeciesId=Species.Id - and Species.name = '{}' - """ - - database_cursor = database.cursor() - database_cursor.execute(strain_query.format(samplelist, species)) - - results = database_cursor.fetchall() - - return dict(results) - - -def divide_into_chunks(the_list, number_chunks): - """Divides a list into approximately number_chunks - >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 3) - [[1, 2, 7], [3, 22, 8], [5, 22, 333]]""" - - length = len(the_list) - if length == 0: - return [[]] - - if length <= number_chunks: - number_chunks = length - chunk_size = int(ceil(length/number_chunks)) - chunks = [] - - for counter in range(0, length, chunk_size): - chunks.append(the_list[counter:counter+chunk_size]) - return chunks - - -def escape(string_): - """function escape sql value""" - return escape_string(string_).decode('utf8') - - -def mescape(*items) -> List: - """multiple escape for query values""" - - return [escape_string(str(item)).decode('utf8') for item in items] - - -def get_traits_data(sample_ids, database_instance, dataset_name, dataset_type): - """function to fetch trait data""" - # MySQL limits the number of tables that can be used in a join to 61, - # so we break the sample ids into smaller chunks - # Postgres doesn't have that limit, so we can get rid of this after we transition - - _trait_data = defaultdict(list) - chunk_size = 61 - number_chunks = int(ceil(len(sample_ids) / chunk_size)) - for sample_ids_step in divide_into_chunks(sample_ids, number_chunks): - if dataset_type == "Publish": - full_dataset_type = "Phenotype" - else: - full_dataset_type = dataset_type - temp = ['T%s.value' % item for item in sample_ids_step] - - if dataset_type == "Publish": - query = "SELECT {}XRef.Id,".format(escape(dataset_type)) - - else: - query = "SELECT {}.Name,".format(escape(full_dataset_type)) - - query += ', '.join(temp) - query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(full_dataset_type, - dataset_type, - dataset_type)) - for item in sample_ids_step: - - query += """ - left join {}Data as T{} on T{}.Id = {}XRef.DataId - and T{}.StrainId={}\n - """.format(*mescape(dataset_type, item, - item, dataset_type, item, item)) - - if dataset_type == "Publish": - query += """ - WHERE {}XRef.{}FreezeId = {}Freeze.Id - and {}Freeze.Name = '{}' - and {}.Id = {}XRef.{}Id - order by {}.Id - """.format(*mescape(dataset_type, dataset_type, - dataset_type, dataset_type, - dataset_name, full_dataset_type, - dataset_type, dataset_type, - full_dataset_type)) - - else: - query += """ - WHERE {}XRef.{}FreezeId = {}Freeze.Id - and {}Freeze.Name = '{}' - and {}.Id = {}XRef.{}Id - order by {}.Id - """.format(*mescape(dataset_type, dataset_type, - dataset_type, dataset_type, - dataset_name, dataset_type, - dataset_type, dataset_type, - full_dataset_type)) - - # print(query) - - _results = fetch_from_db_sample_data(query, database_instance) - - return [] - - -def get_probeset_trait_data(strain_ids: List, conn, dataset_name) -> dict: - """function for getting trait data\ - for probeset data type similar to\ - get trait data only difference is that\ - it uses sub queries""" - - trait_data: dict = {} - - trait_id_name = {} - - traits_query = """ - SELECT ProbeSetXRef.DataId,ProbeSet.Name FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze) - WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id - and ProbeSetFreeze.Name = '{}' - and ProbeSet.Id = ProbeSetXRef.ProbeSetId - order by ProbeSet.Id - """.format(dataset_name) - - query = """ - SELECT * from ProbeSetData - where StrainID in ({}) - and id in (SELECT ProbeSetXRef.DataId FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze) - WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id - and ProbeSetFreeze.Name = '{}' - and ProbeSet.Id = ProbeSetXRef.ProbeSetId - order by ProbeSet.Id) - """.format(",".join(str(strain_id) for strain_id in strain_ids), dataset_name) - - with conn: - cursor = conn.cursor() - cursor.execute(query) - _results = cursor.fetchall() - cursor.execute(traits_query) - trait_id_name = dict(cursor.fetchall()) - - for trait_id, _strain_id, strain_value in _results: - trait_name = trait_id_name[trait_id] - if trait_data.get(trait_name): - trait_data[trait_name].append(strain_value) - else: - trait_data[trait_name] = [] - - trait_data[trait_name].append(strain_value) - - return trait_data diff --git a/gn3/computations/traits.py b/gn3/computations/traits.py deleted file mode 100644 index 1aa2970..0000000 --- a/gn3/computations/traits.py +++ /dev/null @@ -1,56 +0,0 @@ -"""module contains all operating related to traits""" -from gn3.computations.datasets import retrieve_trait_sample_data - - -def fetch_trait(dataset, trait_name: str, database) -> dict: - """this method creates a trait by\ - fetching required data given the\ - dataset and trait_name""" - - created_trait = { - "dataset": dataset, - "trait_name": trait_name - } - - trait_data = get_trait_sample_data(dataset, trait_name, database) - - created_trait["trait_data"] = trait_data - - return created_trait - - -def get_trait_sample_data(trait_dataset, trait_name, database) -> dict: - """first try to fetch the traits sample data from redis if that\ - try to fetch from the traits dataset redis is only used for\ - temp dataset type which is not used in this case """ - - sample_results = retrieve_trait_sample_data( - trait_dataset, trait_name, database) - - trait_data = {} - - for (name, sample_value, _variance, _numcase, _name2) in sample_results: - - trait_data[name] = sample_value - return trait_data - - -def get_trait_info_data(trait_dataset, - trait_name: str, - database_instance, - get_qtl_info: bool = False) -> dict: - """given a dataset and trait_name return a dict containing all info\ - regarding the get trait""" - - _temp_var_holder = (trait_dataset, trait_name, - database_instance, get_qtl_info) - trait_info_data = { - "description": "", - "chr": "", - "locus": "", - "mb": "", - "abbreviation": "", - "trait_display_name": "" - - } - return trait_info_data diff --git a/tests/integration/test_datasets.py b/tests/integration/test_datasets.py deleted file mode 100644 index f97d970..0000000 --- a/tests/integration/test_datasets.py +++ /dev/null @@ -1,41 +0,0 @@ -"""This module contains integration tests for datasets""" -from unittest import TestCase -from unittest import mock - -from collections import namedtuple -from gn3.app import create_app - - -class DatasetIntegrationTests(TestCase): - """class contains integration tests for datasets""" - - def setUp(self): - self.app = create_app().test_client() - - @mock.patch("gn3.api.datasets.create_dataset") - def test_create_dataset(self, mock_dataset): - """Test for creating dataset object""" - mock_dataset_creator = namedtuple( - 'ProbeSet', ["dataset_name", "dataset_type"]) - new_dataset = mock_dataset_creator("HC_M2_0606_P", "ProbeSet") - mock_dataset.return_value = new_dataset - response = self.app.get( - "/api/dataset/create/HC_M2_0606_P/", follow_redirects=True) - mock_dataset.assert_called_once_with( - dataset_type=None, dataset_name="HC_M2_0606_P") - results = response.get_json()["dataset"] - self.assertEqual(results[1], "ProbeSet") - self.assertEqual(response.status_code, 200) - - @mock.patch("gn3.api.datasets.get_traits_data") - @mock.patch("gn3.api.datasets.database_connector") - def test_fetch_traits_data(self, mock_db, mock_get_trait_data): - """Test api/dataset/fetch_traits_data/d_name/d_type""" - - mock_get_trait_data.return_value = {} - mock_db.return_value = (mock.Mock(), mock.Mock()) - response = self.app.get( - "/api/dataset/fetch_traits_data/Aging-Brain-UCIPublish/Publish", follow_redirects=True) - - self.assertEqual(response.status_code, 200) - self.assertEqual(response.get_json(), {"results": {}}) diff --git a/tests/integration/test_traits.py b/tests/integration/test_traits.py deleted file mode 100644 index 410ba22..0000000 --- a/tests/integration/test_traits.py +++ /dev/null @@ -1,72 +0,0 @@ -"""module contains integration tests for trait endpoints""" -from unittest import TestCase -from unittest import mock - -from gn3.app import create_app - - -class TraitIntegrationTest(TestCase): - """class contains integration tests for\ - traits""" - - def setUp(self): - self.app = create_app().test_client() - - @mock.patch("gn3.api.traits.fetch_trait") - @mock.patch("gn3.api.traits.database_connector") - def test_create_trait(self, mock_database, mock_fetch_trait): - """test the endpoint for creating traits\ - endpoint requires trait name and dataset name""" - mock_database.return_value = (mock.Mock(), mock.Mock()) - trait_results = { - "dataset": None, - "trait_name": "1449593_at", - "trait_data": { - "BXD11": 8.464, - "BXD12": 8.414, - "BXD13": 8.753, - "BXD15": 8.5, - "BXD16": 8.832 - } - - } - mock_fetch_trait.return_value = trait_results - - results = self.app.get( - "/api/trait/1449593_at/HC_M2_0606_P", follow_redirects=True) - - trait_data = results.get_json() - - self.assertEqual(mock_database.call_count, 1) - self.assertEqual(results.status_code, 200) - self.assertEqual(trait_data, trait_results) - - @mock.patch("gn3.api.traits.get_trait_info_data") - def test_retrieve_trait_info(self, mock_get_trait_info): - """integration test for endpoints for retrieving\ - trait info expects the dataset of trait to have been - created""" - - trait_post_data = { - "trait": {"trait_name": ""}, - "trait_dataset": {"dataset_name": ""} - } - - expected_api_results = { - "description": "trait description", - "chr": "", - "locus": "", - "mb": "", - "abbreviation": "trait_abbreviation", - "trait_display_name": "trait_name" - - } - mock_get_trait_info.return_value = expected_api_results - - trait_info = self.app.post( - "/api/trait/trait_info/144_at", json=trait_post_data, follow_redirects=True) - - trait_info_results = trait_info.get_json() - - self.assertEqual(trait_info.status_code, 200) - self.assertEqual(trait_info_results, expected_api_results) diff --git a/tests/unit/computations/test_datasets.py b/tests/unit/computations/test_datasets.py deleted file mode 100644 index f9e9c2b..0000000 --- a/tests/unit/computations/test_datasets.py +++ /dev/null @@ -1,219 +0,0 @@ -"""Module contains tests from datasets""" -import json - -from unittest import TestCase -from unittest import mock - -from collections import namedtuple - -from gn3.computations.datasets import retrieve_trait_sample_data -from gn3.computations.datasets import get_query_for_dataset_sample -from gn3.computations.datasets import fetch_from_db_sample_data -from gn3.computations.datasets import create_dataset -from gn3.computations.datasets import dataset_creator_store -from gn3.computations.datasets import dataset_type_getter -from gn3.computations.datasets import fetch_dataset_type_from_gn2_api -from gn3.computations.datasets import fetch_dataset_sample_id -from gn3.computations.datasets import divide_into_chunks -from gn3.computations.datasets import get_traits_data - - -class TestDatasets(TestCase): - """Class contains tests for datasets""" - - @mock.patch("gn3.computations.datasets.fetch_from_db_sample_data") - def test_retrieve_trait_sample_data(self, mock_fetch_sample_results): - """Test retrieving sample data\ - for trait from the dataset - """ - trait_name = "1419792_at" - dataset_id = "HC_M2_0606_P&" - dataset_type = "Publish" - - database = mock.Mock() - - dataset = { - "id": dataset_id, - "type": dataset_type, - "name": dataset_id - } - - fetch_results = [('BXD32', 8.001, None, None, 'BXD32')] - - mock_fetch_sample_results.return_value = fetch_results - - results = retrieve_trait_sample_data( - dataset, trait_name, database) - self.assertEqual(mock_fetch_sample_results.call_count, 1) - self.assertEqual(results, fetch_results) - - def test_query_for_dataset_sample(self): - """Test for getting query for sample data""" - - no_results = get_query_for_dataset_sample("does not exists") - - query_exists = get_query_for_dataset_sample("Publish") - - self.assertEqual(no_results, None) - self.assertIsInstance(query_exists, str) - - def test_fetch_from_db_sample_data(self): - """Test for function that fetches sample\ - results from the database - """ - - database_results = [('BXD31', 8.001, None, None, 'BXD31'), - ('BXD32', 7.884, None, None, 'BXD32'), - ('BXD42', 7.682, None, None, 'BXD42'), - ('BXD42', 7.682, None, None, 'BXD42'), - ('BXD40', 7.945, None, None, 'BXD40'), - ('BXD43', 7.873, None, None, 'BXD43') - ] - - database = mock.Mock() - db_cursor = mock.Mock() - db_cursor.execute.return_value = 6 - db_cursor.fetchall.return_value = database_results - database.cursor.return_value = db_cursor - - mock_pheno_query = """ - SELECT - Strain.Name, PublishData.value, PublishSE.error,NStrain.count, Strain.Name2 - WHERE - PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND - PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = 1419792_at AND - PublishFreeze.Id = '12' AND PublishData.StrainId = Strain.Id - Order BY - Strain.Name - """ - fetch_results = fetch_from_db_sample_data(mock_pheno_query, database) - - self.assertEqual(fetch_results, database_results) - - @mock.patch("gn3.computations.datasets.dataset_creator_store") - @mock.patch("gn3.computations.datasets.dataset_type_getter") - def test_create_dataset(self, mock_dataset_type, mock_store): - """Test function that creates/fetches required dataset\ - can either be published phenotype,genotype,Microarray or\ - user defined ->Temp - """ - probe_name = "HC_M2_0606_P" - probe_type = "ProbeSet" - - mock_dataset_creator = namedtuple( - 'ProbeSet', ["dataset_name", "dataset_type"]) - - mock_store.return_value = mock_dataset_creator - mock_dataset_type.return_value = probe_type - dataset = create_dataset( - dataset_type=None, dataset_name=probe_name) - - self.assertEqual(dataset.dataset_name, probe_name) - self.assertEqual(dataset.dataset_type, probe_type) - - def test_dataset_creator_store(self): - """Test for functions that actual - function to create differerent \ - datasets - """ - results = dataset_creator_store("ProbeSet") - - self.assertTrue(results) - - def test_dataset_type_getter(self): - """Test for fetching type of dataset given\ - the dataset name - """ - - redis_instance = mock.Mock() - # fetched in redis - redis_instance.get.return_value = "ProbeSet" - results = dataset_type_getter("HC_M2_0_P", redis_instance) - self.assertEqual(results, "ProbeSet") - - @mock.patch("gn3.computations.datasets.requests") - def test_fetch_dataset_type_from_gn2_api(self, mock_request): - """Test for function that test fetching\ - all datasets from gn2 api in order to store\ - in redis - """ - - expected_json_results = {"datasets": { - "arabidopsis": { - "BayXSha": { - "Genotypes": [ - [ - "None", - "BayXShaGeno", - "BayXSha Genotypes" - ] - ], - "Phenotypes": [ - [ - "642", - "BayXShaPublish", - "BayXSha Published Phenotypes" - ] - ] - } - } - }} - - request_results = json.dumps(expected_json_results) - mock_request.get.return_value.content = request_results - results = fetch_dataset_type_from_gn2_api("HC_M2_0_P") - expected_results = { - "BayXShaGeno": "Geno", - "642": "Publish" - } - - self.assertEqual(expected_results, results) - - def test_fetch_dataset_sample_id(self): - """Get from the database the sample\ - id if only in the samplelists - """ - - expected_results = {"B6D2F1": 1, "BXD1": 4, "BXD11": 10, - "BXD12": 11, "BXD13": 12, "BXD15": 14, "BXD16": 15} - - database_instance = mock.Mock() - database_cursor = mock.Mock() - - database_cursor.execute.return_value = 5 - database_cursor.fetchall.return_value = list(expected_results.items()) - database_instance.cursor.return_value = database_cursor - strain_list = ["B6D2F1", "BXD1", "BXD11", - "BXD12", "BXD13", "BXD16", "BXD15"] - - results = fetch_dataset_sample_id( - samplelist=strain_list, database=database_instance, species="mouse") - - self.assertEqual(results, expected_results) - - @mock.patch("gn3.computations.datasets.fetch_from_db_sample_data") - @mock.patch("gn3.computations.datasets.divide_into_chunks") - def test_get_traits_data(self, mock_divide_into_chunks, mock_fetch_samples): - """Test for for function to get data\ - of traits in dataset - """ - _expected_results = {'AT_DSAFDS': [ - 12, 14, 13, 23, 12, 14, 13, 23, 12, 14, 13, 23]} - database = mock.Mock() - sample_id = [1, 2, 7, 3, 22, 8] - mock_divide_into_chunks.return_value = [ - [1, 2, 7], [3, 22, 8], [5, 22, 333]] - mock_fetch_samples.return_value = ("AT_DSAFDS", 12, 14, 13, 23) - results = get_traits_data(sample_id, database, "HC_M2", "Publish") - - self.assertEqual({}, dict(results)) - - def test_divide_into_chunks(self): - """Test for dividing a list into given number of\ - chunks for example - """ - results = divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 3) - - expected_results = [[1, 2, 7], [3, 22, 8], [5, 22, 333]] - - self.assertEqual(results, expected_results) diff --git a/tests/unit/computations/test_trait.py b/tests/unit/computations/test_trait.py deleted file mode 100644 index feb97c6..0000000 --- a/tests/unit/computations/test_trait.py +++ /dev/null @@ -1,84 +0,0 @@ -"""Module contains tests for creating traits""" -from unittest import TestCase -from unittest import mock - -from gn3.computations.traits import fetch_trait -from gn3.computations.traits import get_trait_sample_data -from gn3.computations.traits import get_trait_info_data - - -class TestTrait(TestCase): - """Class contains tests for creating traits""" - - @mock.patch("gn3.computations.traits.get_trait_sample_data") - def test_fetch_trait(self, get_sample_data): - """Test for creating/fetching trait""" - - expected_sample_data = { - "A/Y": 12.3, - "WQC": 11.1 - } - - database = mock.Mock() - - get_sample_data.return_value = expected_sample_data - - expected_trait = { - "trait_name": "AXFDSF_AT", - "dataset": None, - "trait_data": expected_sample_data - } - results = fetch_trait(dataset=None, - trait_name="AXFDSF_AT", - database=database) - - self.assertEqual(results, expected_trait) - self.assertEqual(get_sample_data.call_count, 1) - - @mock.patch("gn3.computations.traits.retrieve_trait_sample_data") - def test_get_trait_sample_data(self, mock_retrieve_sample_data): - """Test for getting sample data from either\ - the trait's dataset or form redis - """ - - trait_dataset = mock.Mock() - dataset_trait_sample_data = [ - ('129S1/SvImJ', 7.433, None, None, '129S1/SvImJ'), - ('A/J', 7.596, None, None, 'A/J'), - ('AKR/J', 7.774, None, None, 'AKR/J'), - ('B6D2F1', 7.707, None, None, 'B6D2F1')] - mock_retrieve_sample_data.return_value = dataset_trait_sample_data - - trait_name = "1426679_at" - - database = mock.Mock() - - results = get_trait_sample_data( - trait_dataset, trait_name, database) - - expected_results = { - "129S1/SvImJ": 7.433, - "A/J": 7.596, - "AKR/J": 7.774, - "B6D2F1": 7.707 - } - - self.assertEqual(results, expected_results) - - def test_get_trait_info_data(self): - """Test for getting info data related\ - to trait - """ - - results = get_trait_info_data( - trait_name="AXSF_AT", trait_dataset=mock.Mock(), database_instance=None) - expected_trait_info = { - "description": "", - "trait_display_name": "", - "abbreviation": "", - "chr": "", - "mb": "", - "locus": "" - } - - self.assertEqual(results, expected_trait_info) |