diff options
-rw-r--r-- | gn3/api/correlation.py | 12 | ||||
-rw-r--r-- | gn3/api/datasets.py | 44 | ||||
-rw-r--r-- | gn3/api/traits.py | 53 | ||||
-rw-r--r-- | gn3/app.py | 4 | ||||
-rw-r--r-- | gn3/computations/correlations.py | 13 | ||||
-rw-r--r-- | gn3/computations/datasets.py | 323 | ||||
-rw-r--r-- | gn3/computations/traits.py | 56 | ||||
-rw-r--r-- | gn3/experimental_db.py | 11 | ||||
-rw-r--r-- | gn3/settings.py | 3 | ||||
-rw-r--r-- | tests/integration/test_datasets.py | 41 | ||||
-rw-r--r-- | tests/integration/test_traits.py | 72 | ||||
-rw-r--r-- | tests/unit/computations/test_correlation.py | 74 | ||||
-rw-r--r-- | tests/unit/computations/test_datasets.py | 219 | ||||
-rw-r--r-- | tests/unit/computations/test_trait.py | 84 |
14 files changed, 966 insertions, 43 deletions
diff --git a/gn3/api/correlation.py b/gn3/api/correlation.py index 2339088..f28e1f5 100644 --- a/gn3/api/correlation.py +++ b/gn3/api/correlation.py @@ -33,9 +33,10 @@ def compute_sample_integration(corr_method="pearson"): @correlation.route("/sample_r/<string:corr_method>", methods=["POST"]) def compute_sample_r(corr_method="pearson"): - """correlation endpoint for computing sample r correlations\ + """Correlation endpoint for computing sample r correlations\ api expects the trait data with has the trait and also the\ - target_dataset data""" + target_dataset data + """ correlation_input = request.get_json() # xtodo move code below to compute_all_sampl correlation @@ -53,9 +54,10 @@ def compute_sample_r(corr_method="pearson"): @correlation.route("/lit_corr/<string:species>/<int:gene_id>", methods=["POST"]) def compute_lit_corr(species=None, gene_id=None): - """api endpoint for doing lit correlation.results for lit correlation\ + """Api endpoint for doing lit correlation.results for lit correlation\ are fetched from the database this is the only case where the db\ - might be needed for actual computing of the correlation results""" + might be needed for actual computing of the correlation results + """ conn, _cursor_object = database_connector() target_traits_gene_ids = request.get_json() @@ -72,7 +74,7 @@ def compute_lit_corr(species=None, gene_id=None): @correlation.route("/tissue_corr/<string:corr_method>", methods=["POST"]) def compute_tissue_corr(corr_method="pearson"): - """api endpoint fr doing tissue correlation""" + """Api endpoint fr doing tissue correlation""" tissue_input_data = request.get_json() primary_tissue_dict = tissue_input_data["primary_tissue"] target_tissues_dict = tissue_input_data["target_tissues_dict"] diff --git a/gn3/api/datasets.py b/gn3/api/datasets.py new file mode 100644 index 0000000..7f08de5 --- /dev/null +++ b/gn3/api/datasets.py @@ -0,0 +1,44 @@ +"""this module contains code for creating datasets""" +from flask import Blueprint +from flask import jsonify + +from gn3.computations.datasets import create_dataset +from gn3.computations.datasets import get_traits_data +from gn3.experimental_db import database_connector + + +dataset = Blueprint("dataset", __name__) + + +@dataset.route("/create/<dataset_name>/") +@dataset.route("/create/<dataset_name>/<dataset_type>") +def create_dataset_api(dataset_name, dataset_type=None): + """Endpoint of creating dataset""" + + new_dataset = create_dataset( + dataset_type=dataset_type, dataset_name=dataset_name) + + results = { + "dataset": new_dataset + } + return jsonify(results) + + +@dataset.route("/fetch_traits_data/<dataset_name>/<dataset_type>") +def fetch_traits_data(dataset_name, dataset_type): + """Endpoint for fetching Trait data""" + # should fetch this(temp) + trait_sample_ids = [4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 15, + 17, 18, 19, 20, 21, 22, 24, 25, 26, 28, 29, 30, 31, + 35, 36, 37, 39, 98, 99, 100, 103, 487, 105, 106, 110, 115, + 116, 117, 118, 119, 120, 919, 147, + 121, 40, 41, 124, 125, 128, 135, 129, 130, 131, + 132, 134, 138, 139, 140, 141, 142, 144, + 145, 148, 149, 920, 922, 2, 3, 1, 1100] + + conn, _cursor = database_connector() + results = get_traits_data(sample_ids=trait_sample_ids, database_instance=conn, + dataset_name=dataset_name, dataset_type=dataset_type) + conn.close() + + return jsonify({"results": results}) diff --git a/gn3/api/traits.py b/gn3/api/traits.py new file mode 100644 index 0000000..0ac437d --- /dev/null +++ b/gn3/api/traits.py @@ -0,0 +1,53 @@ +"""this module contains the all endpoints for traits""" +from unittest import mock + +from flask import Blueprint +from flask import jsonify +from flask import request + +from gn3.computations.traits import fetch_trait +from gn3.computations.traits import get_trait_info_data +from gn3.experimental_db import database_connector + +trait = Blueprint("trait", __name__) + + +@trait.route("/<string:trait_name>/<string:dataset_name>") +def create_trait(trait_name, dataset_name): + """Endpoint for creating trait and fetching strain\ + values""" + + # xtodo replace the object at most this endpoint + # requires dataset_type,dataset_name ,dataset_id + trait_dataset = { + "name": dataset_name, + "id": 12, + "type": "ProbeSet" # temp values + } + conn, _cursor = database_connector() + + trait_results = fetch_trait(dataset=trait_dataset, + trait_name=trait_name, + database=conn) + + conn.close() + + return jsonify(trait_results) + + +@trait.route("/trait_info/<string:trait_name>", methods=["POST"]) +def fetch_trait_info(trait_name): + """Api endpoint for fetching the trait info \ + expects the trait and trait dataset to have\ + been created """ + data = request.get_json() + + trait_dataset = data["trait_dataset"] + trait_data = data["trait"] + _trait_name = trait_name # should be used as key to return results + + database_instance = mock.Mock() + + results = get_trait_info_data(trait_dataset, trait_data, database_instance) + + return jsonify(results) @@ -7,6 +7,8 @@ from flask import Flask from gn3.api.gemma import gemma from gn3.api.general import general from gn3.api.correlation import correlation +from gn3.api.traits import trait +from gn3.api.datasets import dataset def create_app(config: Union[Dict, str, None] = None) -> Flask: @@ -28,4 +30,6 @@ def create_app(config: Union[Dict, str, None] = None) -> Flask: app.register_blueprint(general, url_prefix="/api/") app.register_blueprint(gemma, url_prefix="/api/gemma") app.register_blueprint(correlation, url_prefix="/api/correlation") + app.register_blueprint(trait, url_prefix="/api/trait") + app.register_blueprint(dataset, url_prefix="/api/dataset") return app diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py index 26b7294..7fb67be 100644 --- a/gn3/computations/correlations.py +++ b/gn3/computations/correlations.py @@ -109,10 +109,9 @@ package :not packaged in guix def filter_shared_sample_keys(this_samplelist, target_samplelist) -> Tuple[List, List]: - """Given primary and target samplelist for two base and target trait select -filter the values using the shared keys - - """ + """Given primary and target samplelist\ + for two base and target trait select\ + filter the values using the shared keys""" this_vals = [] target_vals = [] for key, value in target_samplelist.items(): @@ -125,8 +124,9 @@ filter the values using the shared keys def compute_all_sample_correlation(this_trait, target_dataset, corr_method="pearson") -> List: - """Given a trait data samplelist and target__datasets compute all sample -correlation""" + """Given a trait data samplelist and\ + target__datasets compute all sample correlation + """ this_trait_samples = this_trait["trait_sample_data"] @@ -323,7 +323,6 @@ def compute_all_lit_correlation(conn, trait_lists: List, species: str, gene_id): """Function that acts as an abstraction for lit_correlation_for_trait_list""" - # xtodo to be refactored lit_results = lit_correlation_for_trait_list( conn=conn, diff --git a/gn3/computations/datasets.py b/gn3/computations/datasets.py new file mode 100644 index 0000000..57e1fe1 --- /dev/null +++ b/gn3/computations/datasets.py @@ -0,0 +1,323 @@ +"""module contains the code all related to datasets""" +import json +from math import ceil +from collections import defaultdict + +from typing import Optional +from typing import List + +from dataclasses import dataclass +from MySQLdb import escape_string # type: ignore + +import requests +from gn3.settings import GN2_BASE_URL + + +def retrieve_trait_sample_data(dataset, + trait_name: str, + database, + group_species_id=None) -> List: + """given the dataset id and trait_name fetch the\ + sample_name,value from the dataset""" + + # should pass the db as arg all do a setup + + (dataset_name, dataset_id, dataset_type) = (dataset.get("name"), dataset.get( + "id"), dataset.get("type")) + + dataset_query = get_query_for_dataset_sample(dataset_type) + results = [] + sample_query_values = { + "Publish": (trait_name, dataset_id), + "Geno": (group_species_id, trait_name, dataset_name), + "ProbeSet": (trait_name, dataset_name) + } + + if dataset_query: + formatted_query = dataset_query % sample_query_values[dataset_type] + + results = fetch_from_db_sample_data(formatted_query, database) + + return results + + +def fetch_from_db_sample_data(formatted_query: str, database_instance) -> List: + """this is the function that does the actual fetching of\ + results from the database""" + try: + cursor = database_instance.cursor() + cursor.execute(formatted_query) + results = cursor.fetchall() + + except Exception as error: + raise error + + cursor.close() + + return results + + +def get_query_for_dataset_sample(dataset_type) -> Optional[str]: + """this functions contains querys for\ + getting sample data from the db depending in + dataset""" + dataset_query = {} + + pheno_query = """ + SELECT + Strain.Name, PublishData.value, PublishSE.error,NStrain.count, Strain.Name2 + FROM + (PublishData, Strain, PublishXRef, PublishFreeze) + left join PublishSE on + (PublishSE.DataId = PublishData.Id AND PublishSE.StrainId = PublishData.StrainId) + left join NStrain on + (NStrain.DataId = PublishData.Id AND + NStrain.StrainId = PublishData.StrainId) + WHERE + PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND + PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = %s AND + PublishFreeze.Id = %s AND PublishData.StrainId = Strain.Id + Order BY + Strain.Name + """ + geno_query = """ + SELECT + Strain.Name, GenoData.value, GenoSE.error, "N/A", Strain.Name2 + FROM + (GenoData, GenoFreeze, Strain, Geno, GenoXRef) + left join GenoSE on + (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId) + WHERE + Geno.SpeciesId = %s AND Geno.Name = %s AND GenoXRef.GenoId = Geno.Id AND + GenoXRef.GenoFreezeId = GenoFreeze.Id AND + GenoFreeze.Name = %s AND + GenoXRef.DataId = GenoData.Id AND + GenoData.StrainId = Strain.Id + Order BY + Strain.Name + """ + + probeset_query = """ + SELECT + Strain.Name, ProbeSetData.value, ProbeSetSE.error, NStrain.count, Strain.Name2 + FROM + (ProbeSetData, ProbeSetFreeze, + Strain, ProbeSet, ProbeSetXRef) + left join ProbeSetSE on + (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId) + left join NStrain on + (NStrain.DataId = ProbeSetData.Id AND + NStrain.StrainId = ProbeSetData.StrainId) + WHERE + ProbeSet.Name = '%s' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND + ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND + ProbeSetFreeze.Name = '%s' AND + ProbeSetXRef.DataId = ProbeSetData.Id AND + ProbeSetData.StrainId = Strain.Id + Order BY + Strain.Name + """ + + dataset_query["Publish"] = pheno_query + dataset_query["Geno"] = geno_query + dataset_query["ProbeSet"] = probeset_query + + return dataset_query.get(dataset_type) + + +@dataclass +class Dataset: + """class for creating datasets""" + name: Optional[str] = None + dataset_type: Optional[str] = None + dataset_id: int = -1 + + +def create_mrna_tissue_dataset(dataset_name, dataset_type): + """an mrna assay is a quantitative assessment(assay) associated\ + with an mrna trait.This used to be called probeset,but that term\ + only referes specifically to the afffymetrix platform and is\ + far too speficified""" + + return Dataset(name=dataset_name, dataset_type=dataset_type) + + +def dataset_type_getter(dataset_name, redis_instance=None) -> Optional[str]: + """given the dataset name fetch the type\ + of the dataset this in turn enables fetching\ + the creation of the correct object could utilize\ + redis for the case""" + + results = redis_instance.get(dataset_name, None) + + if results: + return results + + return fetch_dataset_type_from_gn2_api(dataset_name) + + +def fetch_dataset_type_from_gn2_api(dataset_name): + """this function is only called when the\ + the redis is empty and does have the specificied\ + dataset_type""" + # should only run once + + dataset_structure = {} + + map_dataset_to_new_type = { + "Phenotypes": "Publish", + "Genotypes": "Geno", + "MrnaTypes": "ProbeSet" + } + + data = json.loads(requests.get( + GN2_BASE_URL + "/api/v_pre1/gen_dropdown", timeout=5).content) + _name = dataset_name + for species in data['datasets']: + for group in data['datasets'][species]: + for dataset_type in data['datasets'][species][group]: + for dataset in data['datasets'][species][group][dataset_type]: + # assumes the first is dataset_short_name + short_dataset_name = next( + item for item in dataset if item != "None" and item is not None) + + dataset_structure[short_dataset_name] = map_dataset_to_new_type.get( + dataset_type, "MrnaTypes") + return dataset_structure + + +def dataset_creator_store(dataset_type): + """function contains key value pairs for\ + the function need to be called to create\ + each dataset_type""" + + dataset_obj = { + "ProbeSet": create_mrna_tissue_dataset + } + + return dataset_obj[dataset_type] + + +def create_dataset(dataset_type=None, dataset_name: str = None): + """function for creating new dataset temp not implemented""" + if dataset_type is None: + dataset_type = dataset_type_getter(dataset_name) + + dataset_creator = dataset_creator_store(dataset_type) + results = dataset_creator( + dataset_name=dataset_name, dataset_type=dataset_type) + return results + + +def fetch_dataset_sample_id(samplelist: List, database, species: str) -> dict: + """fetch the strain ids from the db only if\ + it is in the samplelist""" + # xtodo create an in clause for samplelist + + strain_query = """ + SELECT Strain.Name, Strain.Id FROM Strain, Species + WHERE Strain.Name IN {} + and Strain.SpeciesId=Species.Id + and Species.name = '{}' + """ + + database_cursor = database.cursor() + database_cursor.execute(strain_query.format(samplelist, species)) + + results = database_cursor.fetchall() + + return dict(results) + + +def divide_into_chunks(the_list, number_chunks): + """Divides a list into approximately number_chunks + >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 3) + [[1, 2, 7], [3, 22, 8], [5, 22, 333]]""" + + length = len(the_list) + if length == 0: + return [[]] + + if length <= number_chunks: + number_chunks = length + chunk_size = int(ceil(length/number_chunks)) + chunks = [] + + for counter in range(0, length, chunk_size): + chunks.append(the_list[counter:counter+chunk_size]) + return chunks + + +def escape(string_): + """function escape sql value""" + return escape_string(string_).decode('utf8') + + +def mescape(*items) -> List: + """multiple escape for query values""" + + return [escape_string(str(item)).decode('utf8') for item in items] + + +def get_traits_data(sample_ids, database_instance, dataset_name, dataset_type): + """function to fetch trait data""" + # MySQL limits the number of tables that can be used in a join to 61, + # so we break the sample ids into smaller chunks + # Postgres doesn't have that limit, so we can get rid of this after we transition + + _trait_data = defaultdict(list) + chunk_size = 61 + number_chunks = int(ceil(len(sample_ids) / chunk_size)) + for sample_ids_step in divide_into_chunks(sample_ids, number_chunks): + if dataset_type == "Publish": + full_dataset_type = "Phenotype" + else: + full_dataset_type = dataset_type + temp = ['T%s.value' % item for item in sample_ids_step] + + if dataset_type == "Publish": + query = "SELECT {}XRef.Id,".format(escape(dataset_type)) + + else: + query = "SELECT {}.Name,".format(escape(full_dataset_type)) + + query += ', '.join(temp) + query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(full_dataset_type, + dataset_type, + dataset_type)) + for item in sample_ids_step: + + query += """ + left join {}Data as T{} on T{}.Id = {}XRef.DataId + and T{}.StrainId={}\n + """.format(*mescape(dataset_type, item, + item, dataset_type, item, item)) + + if dataset_type == "Publish": + query += """ + WHERE {}XRef.{}FreezeId = {}Freeze.Id + and {}Freeze.Name = '{}' + and {}.Id = {}XRef.{}Id + order by {}.Id + """.format(*mescape(dataset_type, dataset_type, + dataset_type, dataset_type, + dataset_name, full_dataset_type, + dataset_type, dataset_type, + full_dataset_type)) + + else: + query += """ + WHERE {}XRef.{}FreezeId = {}Freeze.Id + and {}Freeze.Name = '{}' + and {}.Id = {}XRef.{}Id + order by {}.Id + """.format(*mescape(dataset_type, dataset_type, + dataset_type, dataset_type, + dataset_name, dataset_type, + dataset_type, dataset_type, + full_dataset_type)) + + # print(query) + + _results = fetch_from_db_sample_data(query, database_instance) + return {} diff --git a/gn3/computations/traits.py b/gn3/computations/traits.py new file mode 100644 index 0000000..1aa2970 --- /dev/null +++ b/gn3/computations/traits.py @@ -0,0 +1,56 @@ +"""module contains all operating related to traits""" +from gn3.computations.datasets import retrieve_trait_sample_data + + +def fetch_trait(dataset, trait_name: str, database) -> dict: + """this method creates a trait by\ + fetching required data given the\ + dataset and trait_name""" + + created_trait = { + "dataset": dataset, + "trait_name": trait_name + } + + trait_data = get_trait_sample_data(dataset, trait_name, database) + + created_trait["trait_data"] = trait_data + + return created_trait + + +def get_trait_sample_data(trait_dataset, trait_name, database) -> dict: + """first try to fetch the traits sample data from redis if that\ + try to fetch from the traits dataset redis is only used for\ + temp dataset type which is not used in this case """ + + sample_results = retrieve_trait_sample_data( + trait_dataset, trait_name, database) + + trait_data = {} + + for (name, sample_value, _variance, _numcase, _name2) in sample_results: + + trait_data[name] = sample_value + return trait_data + + +def get_trait_info_data(trait_dataset, + trait_name: str, + database_instance, + get_qtl_info: bool = False) -> dict: + """given a dataset and trait_name return a dict containing all info\ + regarding the get trait""" + + _temp_var_holder = (trait_dataset, trait_name, + database_instance, get_qtl_info) + trait_info_data = { + "description": "", + "chr": "", + "locus": "", + "mb": "", + "abbreviation": "", + "trait_display_name": "" + + } + return trait_info_data diff --git a/gn3/experimental_db.py b/gn3/experimental_db.py new file mode 100644 index 0000000..a07aeba --- /dev/null +++ b/gn3/experimental_db.py @@ -0,0 +1,11 @@ +"""this function contains experimental db staff""" +from typing import Tuple +import MySQLdb as mdb # type: ignore + + +def database_connector()->Tuple: + """function to create db connector""" + conn = mdb.connect("localhost", "kabui", "1234", "db_webqtl") + cursor = conn.cursor() + + return (conn, cursor) diff --git a/gn3/settings.py b/gn3/settings.py index e77a977..478a041 100644 --- a/gn3/settings.py +++ b/gn3/settings.py @@ -15,3 +15,6 @@ TMPDIR = os.environ.get("TMPDIR", tempfile.gettempdir()) SQL_URI = os.environ.get("SQL_URI", "mysql://kabui:1234@localhost/db_webqtl") SECRET_KEY = "password" SQLALCHEMY_TRACK_MODIFICATIONS = False +# gn2 results only used in fetching dataset info + +GN2_BASE_URL = "http://www.genenetwork.org/" diff --git a/tests/integration/test_datasets.py b/tests/integration/test_datasets.py new file mode 100644 index 0000000..f97d970 --- /dev/null +++ b/tests/integration/test_datasets.py @@ -0,0 +1,41 @@ +"""This module contains integration tests for datasets""" +from unittest import TestCase +from unittest import mock + +from collections import namedtuple +from gn3.app import create_app + + +class DatasetIntegrationTests(TestCase): + """class contains integration tests for datasets""" + + def setUp(self): + self.app = create_app().test_client() + + @mock.patch("gn3.api.datasets.create_dataset") + def test_create_dataset(self, mock_dataset): + """Test for creating dataset object""" + mock_dataset_creator = namedtuple( + 'ProbeSet', ["dataset_name", "dataset_type"]) + new_dataset = mock_dataset_creator("HC_M2_0606_P", "ProbeSet") + mock_dataset.return_value = new_dataset + response = self.app.get( + "/api/dataset/create/HC_M2_0606_P/", follow_redirects=True) + mock_dataset.assert_called_once_with( + dataset_type=None, dataset_name="HC_M2_0606_P") + results = response.get_json()["dataset"] + self.assertEqual(results[1], "ProbeSet") + self.assertEqual(response.status_code, 200) + + @mock.patch("gn3.api.datasets.get_traits_data") + @mock.patch("gn3.api.datasets.database_connector") + def test_fetch_traits_data(self, mock_db, mock_get_trait_data): + """Test api/dataset/fetch_traits_data/d_name/d_type""" + + mock_get_trait_data.return_value = {} + mock_db.return_value = (mock.Mock(), mock.Mock()) + response = self.app.get( + "/api/dataset/fetch_traits_data/Aging-Brain-UCIPublish/Publish", follow_redirects=True) + + self.assertEqual(response.status_code, 200) + self.assertEqual(response.get_json(), {"results": {}}) diff --git a/tests/integration/test_traits.py b/tests/integration/test_traits.py new file mode 100644 index 0000000..410ba22 --- /dev/null +++ b/tests/integration/test_traits.py @@ -0,0 +1,72 @@ +"""module contains integration tests for trait endpoints""" +from unittest import TestCase +from unittest import mock + +from gn3.app import create_app + + +class TraitIntegrationTest(TestCase): + """class contains integration tests for\ + traits""" + + def setUp(self): + self.app = create_app().test_client() + + @mock.patch("gn3.api.traits.fetch_trait") + @mock.patch("gn3.api.traits.database_connector") + def test_create_trait(self, mock_database, mock_fetch_trait): + """test the endpoint for creating traits\ + endpoint requires trait name and dataset name""" + mock_database.return_value = (mock.Mock(), mock.Mock()) + trait_results = { + "dataset": None, + "trait_name": "1449593_at", + "trait_data": { + "BXD11": 8.464, + "BXD12": 8.414, + "BXD13": 8.753, + "BXD15": 8.5, + "BXD16": 8.832 + } + + } + mock_fetch_trait.return_value = trait_results + + results = self.app.get( + "/api/trait/1449593_at/HC_M2_0606_P", follow_redirects=True) + + trait_data = results.get_json() + + self.assertEqual(mock_database.call_count, 1) + self.assertEqual(results.status_code, 200) + self.assertEqual(trait_data, trait_results) + + @mock.patch("gn3.api.traits.get_trait_info_data") + def test_retrieve_trait_info(self, mock_get_trait_info): + """integration test for endpoints for retrieving\ + trait info expects the dataset of trait to have been + created""" + + trait_post_data = { + "trait": {"trait_name": ""}, + "trait_dataset": {"dataset_name": ""} + } + + expected_api_results = { + "description": "trait description", + "chr": "", + "locus": "", + "mb": "", + "abbreviation": "trait_abbreviation", + "trait_display_name": "trait_name" + + } + mock_get_trait_info.return_value = expected_api_results + + trait_info = self.app.post( + "/api/trait/trait_info/144_at", json=trait_post_data, follow_redirects=True) + + trait_info_results = trait_info.get_json() + + self.assertEqual(trait_info.status_code, 200) + self.assertEqual(trait_info_results, expected_api_results) diff --git a/tests/unit/computations/test_correlation.py b/tests/unit/computations/test_correlation.py index 52d1f60..8f3ef25 100644 --- a/tests/unit/computations/test_correlation.py +++ b/tests/unit/computations/test_correlation.py @@ -1,4 +1,4 @@ -"""module contains the tests for correlation""" +"""Module contains the tests for correlation""" import unittest from unittest import TestCase from unittest import mock @@ -88,10 +88,10 @@ class DataBase(QueryableMixin): class TestCorrelation(TestCase): - """class for testing correlation functions""" + """Class for testing correlation functions""" def test_normalize_values(self): - """function to test normalizing values """ + """Function to test normalizing values """ results = normalize_values([2.3, None, None, 3.2, 4.1, 5], [3.4, 7.2, 1.3, None, 6.2, 4.1]) @@ -100,7 +100,7 @@ class TestCorrelation(TestCase): self.assertEqual(results, expected_results) def test_bicor(self): - """test for doing biweight mid correlation """ + """Test for doing biweight mid correlation """ results = do_bicor(x_val=[1, 2, 3], y_val=[4, 5, 6]) @@ -110,8 +110,9 @@ class TestCorrelation(TestCase): @mock.patch("gn3.computations.correlations.compute_corr_coeff_p_value") @mock.patch("gn3.computations.correlations.normalize_values") def test_compute_sample_r_correlation(self, norm_vals, compute_corr): - """test for doing sample correlation gets the cor\ - and p value and rho value using pearson correlation""" + """Test for doing sample correlation gets the cor\ + and p value and rho value using pearson correlation + """ primary_values = [2.3, 4.1, 5] target_values = [3.4, 6.2, 4.1] @@ -141,7 +142,7 @@ class TestCorrelation(TestCase): spearman_results, tuple, "message") def test_filter_shared_sample_keys(self): - """function to tests shared key between two dicts""" + """Function to tests shared key between two dicts""" this_samplelist = { "C57BL/6J": "6.638", @@ -170,7 +171,7 @@ class TestCorrelation(TestCase): @mock.patch("gn3.computations.correlations.compute_sample_r_correlation") @mock.patch("gn3.computations.correlations.filter_shared_sample_keys") def test_compute_all_sample(self, filter_shared_samples, sample_r_corr): - """given target dataset compute all sample r correlation""" + """Given target dataset compute all sample r correlation""" filter_shared_samples.return_value = (["1.23", "6.565", "6.456"], [ "6.266", "6.565", "6.456"]) @@ -200,7 +201,6 @@ class TestCorrelation(TestCase): sample_all_results = [{"1419792_at": {"corr_coeffient": -1.0, "p_value": 0.9, "num_overlap": 6}}] - # ?corr_method: str, trait_vals, target_samples_vals self.assertEqual(compute_all_sample_correlation( this_trait=this_trait_data, target_dataset=traits_dataset), sample_all_results) @@ -212,9 +212,10 @@ class TestCorrelation(TestCase): @unittest.skip("not implemented") def test_tissue_lit_corr_for_probe_type(self): - """tests for doing tissue and lit correlation for trait list\ + """Tests for doing tissue and lit correlation for trait list\ if both the dataset and target dataset are probeset runs\ - on after initial correlation has been done""" + on after initial correlation has been done + """ results = tissue_lit_corr_for_probe_type( corr_type="tissue", top_corr_results={}) @@ -223,8 +224,9 @@ class TestCorrelation(TestCase): @mock.patch("gn3.computations.correlations.compute_corr_coeff_p_value") def test_tissue_correlation_for_trait_list(self, mock_compute_corr_coeff): - """test given a primary tissue values for a trait and and a list of\ - target tissues for traits do the tissue correlation for them""" + """Test given a primary tissue values for a trait and and a list of\ + target tissues for traits do the tissue correlation for them + """ primary_tissue_values = [1.1, 1.5, 2.3] target_tissues_values = [1, 2, 3] @@ -241,8 +243,9 @@ class TestCorrelation(TestCase): @mock.patch("gn3.computations.correlations.fetch_lit_correlation_data") @mock.patch("gn3.computations.correlations.map_to_mouse_gene_id") def test_lit_correlation_for_trait_list(self, mock_mouse_gene_id, fetch_lit_data): - """fetch results from db call for lit correlation given a trait list\ - after doing correlation""" + """Fetch results from db call for lit correlation given a trait list\ + after doing correlation + """ target_trait_lists = [("1426679_at", 15), ("1426702_at", 17), @@ -265,8 +268,9 @@ class TestCorrelation(TestCase): self.assertEqual(lit_results, expected_results) def test_fetch_lit_correlation_data(self): - """test for fetching lit correlation data from\ - the database where the input and mouse geneid are none""" + """Test for fetching lit correlation data from\ + the database where the input and mouse geneid are none + """ conn = DataBase() results = fetch_lit_correlation_data(conn=conn, @@ -277,8 +281,9 @@ class TestCorrelation(TestCase): self.assertEqual(results, ("1", 0)) def test_fetch_lit_correlation_data_db_query(self): - """test for fetching lit corr coefficent givent the input\ - input trait mouse gene id and mouse gene id""" + """Test for fetching lit corr coefficent givent the input\ + input trait mouse gene id and mouse gene id + """ expected_db_results = [namedtuple("lit_coeff", "val")(x*0.1) for x in range(1, 4)] @@ -293,9 +298,12 @@ class TestCorrelation(TestCase): self.assertEqual(expected_results, lit_results) def test_query_lit_correlation_for_db_empty(self): - """test that corr coeffient returned is 0 given the\ - db value if corr coefficient is empty""" - database_instance = DataBase() + """Test that corr coeffient returned is 0 given the\ + db value if corr coefficient is empty + """ + database_instance = mock.Mock() + database_instance.execute.return_value.fetchone.return_value = None + lit_results = fetch_lit_correlation_data(conn=database_instance, input_mouse_gene_id="12", gene_id="16", @@ -304,8 +312,9 @@ class TestCorrelation(TestCase): self.assertEqual(lit_results, ("16", 0)) def test_query_formatter(self): - """test for formatting a query given the query string and also the\ - values""" + """Test for formatting a query given the query string and also the\ + values + """ query = """ SELECT VALUE FROM LCorr @@ -330,16 +339,18 @@ class TestCorrelation(TestCase): self.assertEqual(formatted_query, expected_formatted_query) def test_query_formatter_no_query_values(self): - """test for formatting a query where there are no\ - string placeholder""" + """Test for formatting a query where there are no\ + string placeholder + """ query = """SELECT * FROM USERS""" formatted_query = query_formatter(query) self.assertEqual(formatted_query, query) def test_map_to_mouse_gene_id(self): - """test for converting a gene id to mouse geneid\ - given a species which is not mouse""" + """Test for converting a gene id to mouse geneid\ + given a species which is not mouse + """ database_instance = mock.Mock() test_data = [("Human", 14), (None, 9), ("Mouse", 15), ("Rat", 14)] @@ -361,9 +372,10 @@ class TestCorrelation(TestCase): @mock.patch("gn3.computations.correlations.lit_correlation_for_trait_list") def test_compute_all_lit_correlation(self, mock_lit_corr): - """test for compute all lit correlation which acts\ + """Test for compute all lit correlation which acts\ as an abstraction for lit_correlation_for_trait_list - and is used in the api/correlation/lit""" + and is used in the api/correlation/lit + """ database = mock.Mock() @@ -385,7 +397,7 @@ class TestCorrelation(TestCase): @mock.patch("gn3.computations.correlations.tissue_correlation_for_trait_list") @mock.patch("gn3.computations.correlations.process_trait_symbol_dict") def test_compute_all_tissue_correlation(self, process_trait_symbol, mock_tissue_corr): - """test for compute all tissue corelation which abstracts + """Test for compute all tissue corelation which abstracts api calling the tissue_correlation for trait_list""" primary_tissue_dict = {"trait_id": "1419792_at", diff --git a/tests/unit/computations/test_datasets.py b/tests/unit/computations/test_datasets.py new file mode 100644 index 0000000..f9e9c2b --- /dev/null +++ b/tests/unit/computations/test_datasets.py @@ -0,0 +1,219 @@ +"""Module contains tests from datasets""" +import json + +from unittest import TestCase +from unittest import mock + +from collections import namedtuple + +from gn3.computations.datasets import retrieve_trait_sample_data +from gn3.computations.datasets import get_query_for_dataset_sample +from gn3.computations.datasets import fetch_from_db_sample_data +from gn3.computations.datasets import create_dataset +from gn3.computations.datasets import dataset_creator_store +from gn3.computations.datasets import dataset_type_getter +from gn3.computations.datasets import fetch_dataset_type_from_gn2_api +from gn3.computations.datasets import fetch_dataset_sample_id +from gn3.computations.datasets import divide_into_chunks +from gn3.computations.datasets import get_traits_data + + +class TestDatasets(TestCase): + """Class contains tests for datasets""" + + @mock.patch("gn3.computations.datasets.fetch_from_db_sample_data") + def test_retrieve_trait_sample_data(self, mock_fetch_sample_results): + """Test retrieving sample data\ + for trait from the dataset + """ + trait_name = "1419792_at" + dataset_id = "HC_M2_0606_P&" + dataset_type = "Publish" + + database = mock.Mock() + + dataset = { + "id": dataset_id, + "type": dataset_type, + "name": dataset_id + } + + fetch_results = [('BXD32', 8.001, None, None, 'BXD32')] + + mock_fetch_sample_results.return_value = fetch_results + + results = retrieve_trait_sample_data( + dataset, trait_name, database) + self.assertEqual(mock_fetch_sample_results.call_count, 1) + self.assertEqual(results, fetch_results) + + def test_query_for_dataset_sample(self): + """Test for getting query for sample data""" + + no_results = get_query_for_dataset_sample("does not exists") + + query_exists = get_query_for_dataset_sample("Publish") + + self.assertEqual(no_results, None) + self.assertIsInstance(query_exists, str) + + def test_fetch_from_db_sample_data(self): + """Test for function that fetches sample\ + results from the database + """ + + database_results = [('BXD31', 8.001, None, None, 'BXD31'), + ('BXD32', 7.884, None, None, 'BXD32'), + ('BXD42', 7.682, None, None, 'BXD42'), + ('BXD42', 7.682, None, None, 'BXD42'), + ('BXD40', 7.945, None, None, 'BXD40'), + ('BXD43', 7.873, None, None, 'BXD43') + ] + + database = mock.Mock() + db_cursor = mock.Mock() + db_cursor.execute.return_value = 6 + db_cursor.fetchall.return_value = database_results + database.cursor.return_value = db_cursor + + mock_pheno_query = """ + SELECT + Strain.Name, PublishData.value, PublishSE.error,NStrain.count, Strain.Name2 + WHERE + PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND + PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = 1419792_at AND + PublishFreeze.Id = '12' AND PublishData.StrainId = Strain.Id + Order BY + Strain.Name + """ + fetch_results = fetch_from_db_sample_data(mock_pheno_query, database) + + self.assertEqual(fetch_results, database_results) + + @mock.patch("gn3.computations.datasets.dataset_creator_store") + @mock.patch("gn3.computations.datasets.dataset_type_getter") + def test_create_dataset(self, mock_dataset_type, mock_store): + """Test function that creates/fetches required dataset\ + can either be published phenotype,genotype,Microarray or\ + user defined ->Temp + """ + probe_name = "HC_M2_0606_P" + probe_type = "ProbeSet" + + mock_dataset_creator = namedtuple( + 'ProbeSet', ["dataset_name", "dataset_type"]) + + mock_store.return_value = mock_dataset_creator + mock_dataset_type.return_value = probe_type + dataset = create_dataset( + dataset_type=None, dataset_name=probe_name) + + self.assertEqual(dataset.dataset_name, probe_name) + self.assertEqual(dataset.dataset_type, probe_type) + + def test_dataset_creator_store(self): + """Test for functions that actual + function to create differerent \ + datasets + """ + results = dataset_creator_store("ProbeSet") + + self.assertTrue(results) + + def test_dataset_type_getter(self): + """Test for fetching type of dataset given\ + the dataset name + """ + + redis_instance = mock.Mock() + # fetched in redis + redis_instance.get.return_value = "ProbeSet" + results = dataset_type_getter("HC_M2_0_P", redis_instance) + self.assertEqual(results, "ProbeSet") + + @mock.patch("gn3.computations.datasets.requests") + def test_fetch_dataset_type_from_gn2_api(self, mock_request): + """Test for function that test fetching\ + all datasets from gn2 api in order to store\ + in redis + """ + + expected_json_results = {"datasets": { + "arabidopsis": { + "BayXSha": { + "Genotypes": [ + [ + "None", + "BayXShaGeno", + "BayXSha Genotypes" + ] + ], + "Phenotypes": [ + [ + "642", + "BayXShaPublish", + "BayXSha Published Phenotypes" + ] + ] + } + } + }} + + request_results = json.dumps(expected_json_results) + mock_request.get.return_value.content = request_results + results = fetch_dataset_type_from_gn2_api("HC_M2_0_P") + expected_results = { + "BayXShaGeno": "Geno", + "642": "Publish" + } + + self.assertEqual(expected_results, results) + + def test_fetch_dataset_sample_id(self): + """Get from the database the sample\ + id if only in the samplelists + """ + + expected_results = {"B6D2F1": 1, "BXD1": 4, "BXD11": 10, + "BXD12": 11, "BXD13": 12, "BXD15": 14, "BXD16": 15} + + database_instance = mock.Mock() + database_cursor = mock.Mock() + + database_cursor.execute.return_value = 5 + database_cursor.fetchall.return_value = list(expected_results.items()) + database_instance.cursor.return_value = database_cursor + strain_list = ["B6D2F1", "BXD1", "BXD11", + "BXD12", "BXD13", "BXD16", "BXD15"] + + results = fetch_dataset_sample_id( + samplelist=strain_list, database=database_instance, species="mouse") + + self.assertEqual(results, expected_results) + + @mock.patch("gn3.computations.datasets.fetch_from_db_sample_data") + @mock.patch("gn3.computations.datasets.divide_into_chunks") + def test_get_traits_data(self, mock_divide_into_chunks, mock_fetch_samples): + """Test for for function to get data\ + of traits in dataset + """ + _expected_results = {'AT_DSAFDS': [ + 12, 14, 13, 23, 12, 14, 13, 23, 12, 14, 13, 23]} + database = mock.Mock() + sample_id = [1, 2, 7, 3, 22, 8] + mock_divide_into_chunks.return_value = [ + [1, 2, 7], [3, 22, 8], [5, 22, 333]] + mock_fetch_samples.return_value = ("AT_DSAFDS", 12, 14, 13, 23) + results = get_traits_data(sample_id, database, "HC_M2", "Publish") + + self.assertEqual({}, dict(results)) + + def test_divide_into_chunks(self): + """Test for dividing a list into given number of\ + chunks for example + """ + results = divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 3) + + expected_results = [[1, 2, 7], [3, 22, 8], [5, 22, 333]] + + self.assertEqual(results, expected_results) diff --git a/tests/unit/computations/test_trait.py b/tests/unit/computations/test_trait.py new file mode 100644 index 0000000..feb97c6 --- /dev/null +++ b/tests/unit/computations/test_trait.py @@ -0,0 +1,84 @@ +"""Module contains tests for creating traits""" +from unittest import TestCase +from unittest import mock + +from gn3.computations.traits import fetch_trait +from gn3.computations.traits import get_trait_sample_data +from gn3.computations.traits import get_trait_info_data + + +class TestTrait(TestCase): + """Class contains tests for creating traits""" + + @mock.patch("gn3.computations.traits.get_trait_sample_data") + def test_fetch_trait(self, get_sample_data): + """Test for creating/fetching trait""" + + expected_sample_data = { + "A/Y": 12.3, + "WQC": 11.1 + } + + database = mock.Mock() + + get_sample_data.return_value = expected_sample_data + + expected_trait = { + "trait_name": "AXFDSF_AT", + "dataset": None, + "trait_data": expected_sample_data + } + results = fetch_trait(dataset=None, + trait_name="AXFDSF_AT", + database=database) + + self.assertEqual(results, expected_trait) + self.assertEqual(get_sample_data.call_count, 1) + + @mock.patch("gn3.computations.traits.retrieve_trait_sample_data") + def test_get_trait_sample_data(self, mock_retrieve_sample_data): + """Test for getting sample data from either\ + the trait's dataset or form redis + """ + + trait_dataset = mock.Mock() + dataset_trait_sample_data = [ + ('129S1/SvImJ', 7.433, None, None, '129S1/SvImJ'), + ('A/J', 7.596, None, None, 'A/J'), + ('AKR/J', 7.774, None, None, 'AKR/J'), + ('B6D2F1', 7.707, None, None, 'B6D2F1')] + mock_retrieve_sample_data.return_value = dataset_trait_sample_data + + trait_name = "1426679_at" + + database = mock.Mock() + + results = get_trait_sample_data( + trait_dataset, trait_name, database) + + expected_results = { + "129S1/SvImJ": 7.433, + "A/J": 7.596, + "AKR/J": 7.774, + "B6D2F1": 7.707 + } + + self.assertEqual(results, expected_results) + + def test_get_trait_info_data(self): + """Test for getting info data related\ + to trait + """ + + results = get_trait_info_data( + trait_name="AXSF_AT", trait_dataset=mock.Mock(), database_instance=None) + expected_trait_info = { + "description": "", + "trait_display_name": "", + "abbreviation": "", + "chr": "", + "mb": "", + "locus": "" + } + + self.assertEqual(results, expected_trait_info) |