aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Kabui2021-03-16 10:36:58 +0300
committerGitHub2021-03-16 10:36:58 +0300
commit43d1bb7f6cd2b5890d5b3eb7c357caafda25a35c (patch)
tree73683272f32cffc860497a93b5c844c272252e67
parent995f1dbd081eb64ad177f929615a4edee01cb68f (diff)
downloadgenenetwork3-43d1bb7f6cd2b5890d5b3eb7c357caafda25a35c.tar.gz
Refactor/clean up correlations (#4)
* initial commit for Refactor/clean-up-correlation * add python scipy dependency * initial commit for sample correlation * initial commit for sample correlation endpoint * initial commit for integration and unittest * initial commit for registering correlation blueprint * add and modify unittest and integration tests for correlation * Add compute compute_all_sample_corr method for correlation * add scipy to requirement txt file * add tissue correlation for trait list * add unittest for tissue correlation * add lit correlation for trait list * add unittests for lit correlation for trait list * modify lit correlarion for trait list * add unittests for lit correlation for trait list * add correlation metho in dynamic url * add file format for expected structure input while doing sample correlation * modify input data structure -> add trait id * update tests for sample r correlation * add compute all lit correlation method * add endpoint for computing lit_corr * add unit and integration tests for computing lit corr * add /api/correlation/tissue_corr/{corr_method} endpoint for tissue correlation * add unittest and integration tests for tissue correlation Co-authored-by: BonfaceKilz <bonfacemunyoki@gmail.com>
-rw-r--r--gn3/api/correlation.py77
-rw-r--r--gn3/computations/correlations.py305
-rw-r--r--guix.scm1
-rw-r--r--requirements.txt13
-rw-r--r--tests/integration/test_correlation.py118
-rw-r--r--tests/unit/computations/correlation_test_data/target_dataset.json230
-rw-r--r--tests/unit/computations/correlation_test_data/this_trait_data.json76
-rw-r--r--tests/unit/computations/test_correlation.py399
8 files changed, 1149 insertions, 70 deletions
diff --git a/gn3/api/correlation.py b/gn3/api/correlation.py
index 217b7ce..56b8381 100644
--- a/gn3/api/correlation.py
+++ b/gn3/api/correlation.py
@@ -1,44 +1,63 @@
-"""Endpoints for computing correlation"""
-import time
-from flask import Blueprint
+"""Endpoints for running correlations"""
+from unittest import mock
+
from flask import jsonify
+from flask import Blueprint
from flask import request
-from flask import g
-from sqlalchemy import create_engine
-from default_settings import SQL_URI
-from gn3.correlation.correlation_computations import compute_correlation
+from gn3.computations.correlations import compute_all_sample_correlation
+from gn3.computations.correlations import compute_all_lit_correlation
+from gn3.computations.correlations import compute_all_tissue_correlation
+
correlation = Blueprint("correlation", __name__)
-# xtodo implement neat db setup
-@correlation.before_request
-def connect_db():
- """add connection to db method"""
- print("@app.before_request connect_db")
- db_connection = getattr(g, '_database', None)
- if db_connection is None:
- print("Get new database connector")
- g.db = g._database = create_engine(SQL_URI, encoding="latin1")
+@correlation.route("/sample_r/<string:corr_method>", methods=["POST"])
+def compute_sample_r(corr_method="pearson"):
+ """correlation endpoint for computing sample r correlations\
+ api expects the trait data with has the trait and also the\
+ target_dataset data"""
+ correlation_input = request.get_json()
+
+ # xtodo move code below to compute_all_sampl correlation
+ this_trait_data = correlation_input.get("this_trait")
+ target_datasets = correlation_input.get("target_dataset")
+
+ correlation_results = compute_all_sample_correlation(corr_method=corr_method,
+ this_trait=this_trait_data,
+ target_dataset=target_datasets)
+
+ return jsonify({
+ "corr_results": correlation_results
+ })
+
- g.initial_time = time.time()
+@correlation.route("/lit_corr/<string:species>/<int:gene_id>", methods=["POST"])
+def compute_lit_corr(species=None, gene_id=None):
+ """api endpoint for doing lit correlation.results for lit correlation\
+ are fetched from the database this is the only case where the db\
+ might be needed for actual computing of the correlation results"""
+ database_instance = mock.Mock()
+ target_traits_gene_ids = request.get_json()
-@correlation.route("/corr_compute", methods=["POST"])
-def corr_compute_page():
- """api for doing correlation"""
+ lit_corr_results = compute_all_lit_correlation(
+ database_instance=database_instance, trait_lists=target_traits_gene_ids,
+ species=species, gene_id=gene_id)
- correlation_input = request.json
+ return jsonify(lit_corr_results)
- if correlation_input is None:
- return jsonify({"error": str("Bad request")}), 400
- try:
- corr_results = compute_correlation(
- correlation_input_data=correlation_input)
+@correlation.route("/tissue_corr/<string:corr_method>", methods=["POST"])
+def compute_tissue_corr(corr_method="pearson"):
+ """api endpoint fr doing tissue correlation"""
+ tissue_input_data = request.get_json()
+ primary_tissue_dict = tissue_input_data["primary_tissue"]
+ target_tissues_dict_list = tissue_input_data["target_tissues"]
- except Exception as error: # pylint: disable=broad-except
- return jsonify({"error": str(error)})
+ results = compute_all_tissue_correlation(primary_tissue_dict=primary_tissue_dict,
+ target_tissues_dict_list=target_tissues_dict_list,
+ corr_method=corr_method)
- return {"correlation_results": corr_results}
+ return jsonify(results) \ No newline at end of file
diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py
new file mode 100644
index 0000000..21f5929
--- /dev/null
+++ b/gn3/computations/correlations.py
@@ -0,0 +1,305 @@
+"""module contains code for correlations"""
+from typing import List
+from typing import Tuple
+from typing import Optional
+from typing import Callable
+
+import scipy.stats # type: ignore
+
+
+def compute_sum(rhs: int, lhs: int)-> int:
+ """initial tests to compute sum of two numbers"""
+ return rhs + lhs
+
+
+def normalize_values(a_values: List, b_values: List)->Tuple[List[float], List[float], int]:
+ """
+ Trim two lists of values to contain only the values they both share
+
+ Given two lists of sample values, trim each list so that it contains
+ only the samples that contain a value in both lists. Also returns
+ the number of such samples.
+
+ >>> normalize_values([2.3, None, None, 3.2, 4.1, 5], [3.4, 7.2, 1.3, None, 6.2, 4.1])
+ ([2.3, 4.1, 5], [3.4, 6.2, 4.1], 3)
+
+ """
+ a_new = []
+ b_new = []
+ for a_val, b_val in zip(a_values, b_values):
+ if (a_val and b_val is not None):
+ a_new.append(a_val)
+ b_new.append(b_val)
+ return a_new, b_new, len(a_new)
+
+
+def compute_corr_coeff_p_value(primary_values: List, target_values: List, corr_method: str)->\
+ Tuple[float, float]:
+ """given array like inputs calculate the primary and target_value
+ methods ->pearson,spearman and biweight mid correlation
+ return value is rho and p_value
+ """
+ corr_mapping = {
+ "bicor": do_bicor,
+ "pearson": scipy.stats.pearsonr,
+ "spearman": scipy.stats.spearmanr
+ }
+
+ use_corr_method = corr_mapping.get(corr_method, "spearman")
+
+ corr_coeffient, p_val = use_corr_method(primary_values, target_values)
+
+ return (corr_coeffient, p_val)
+
+
+def compute_sample_r_correlation(corr_method: str, trait_vals, target_samples_vals)->\
+ Optional[Tuple[float, float, int]]:
+ """Given a primary trait values and target trait values
+ calculate the correlation coeff and p value"""
+
+ sanitized_traits_vals, sanitized_target_vals,\
+ num_overlap = normalize_values(trait_vals, target_samples_vals)
+
+ if num_overlap > 5:
+
+ (corr_coeffient, p_value) =\
+ compute_corr_coeff_p_value(primary_values=sanitized_traits_vals,
+ target_values=sanitized_target_vals,
+ corr_method=corr_method)
+
+ # xtodo check if corr_coefficient is None should use numpy.isNan scipy.isNan is deprecated
+ if corr_coeffient is not None:
+ return (corr_coeffient, p_value, num_overlap)
+
+ return None
+
+
+def do_bicor(x_val, y_val) -> Tuple[float, float]:
+ """not implemented method for doing biweight mid correlation
+ use astropy stats package :not packaged in guix
+ """
+
+ return (x_val, y_val)
+
+
+def filter_shared_sample_keys(this_samplelist, target_samplelist)->Tuple[List, List]:
+ """given primary and target samplelist for two base and target\
+ trait select filter the values using the shared keys"""
+ this_vals = []
+ target_vals = []
+
+ for key, value in target_samplelist.items():
+ if key in this_samplelist:
+ target_vals.append(value)
+ this_vals.append(this_samplelist[key])
+
+ return (this_vals, target_vals)
+
+
+def compute_all_sample_correlation(this_trait, target_dataset, corr_method="pearson")->List:
+ """given a trait data samplelist and target__datasets compute all sample correlation"""
+
+ this_trait_samples = this_trait["trait_sample_data"]
+
+ corr_results = []
+
+ for target_trait in target_dataset:
+ trait_id = target_trait.get("trait_id")
+ target_trait_data = target_trait["trait_sample_data"]
+ this_vals, target_vals = filter_shared_sample_keys(
+ this_trait_samples, target_trait_data)
+
+ sample_correlation = compute_sample_r_correlation(
+ corr_method=corr_method, trait_vals=this_vals, target_samples_vals=target_vals)
+
+ if sample_correlation is not None:
+ (corr_coeffient, p_value, num_overlap) = sample_correlation
+
+ else:
+ continue
+
+ corr_result = {"corr_coeffient": corr_coeffient,
+ "p_value": p_value,
+ "num_overlap": num_overlap}
+
+ corr_results.append({trait_id: corr_result})
+
+ return corr_results
+
+
+def tissue_lit_corr_for_probe_type(corr_type: str, top_corr_results):
+ """function that does either lit_corr_for_trait_list or tissue_corr\
+ _for_trait list depending on whether both dataset and target_dataset are\
+ both set to probet"""
+
+ corr_results = {"lit": 1}
+
+ if corr_type not in ("lit", "literature"):
+
+ corr_results["top_corr_results"] = top_corr_results
+ # run lit_correlation for the given top_corr_results
+ if corr_type == "tissue":
+ # run lit correlation the given top corr results
+ pass
+ if corr_type == "sample":
+ pass
+ # run sample r correlation for the given top results
+
+ return corr_results
+
+
+def tissue_correlation_for_trait_list(primary_tissue_vals: List,
+ target_tissues_values: List,
+ corr_method: str,
+ compute_corr_p_value: Callable =
+ compute_corr_coeff_p_value)->dict:
+ """given a primary tissue values for a trait and the target tissues values\
+ compute the correlation_cooeff and p value the input required are arrays\
+ output - > List containing Dicts with corr_coefficient value,P_value and\
+ also the tissue numbers is len(primary) == len(target)"""
+
+ # ax :todo assertion that lenggth one one target tissue ==primary_tissue
+
+ (tissue_corr_coeffient, p_value) = compute_corr_p_value(
+ primary_values=primary_tissue_vals,
+ target_values=target_tissues_values,
+ corr_method=corr_method)
+
+ lit_corr_result = {
+ "tissue_corr": tissue_corr_coeffient,
+ "p_value": p_value,
+ "tissue_number": len(primary_tissue_vals)
+ }
+
+ return lit_corr_result
+
+
+def fetch_lit_correlation_data(database,
+ input_mouse_gene_id: Optional[str],
+ gene_id: str,
+ mouse_gene_id: Optional[str] = None)->Tuple[str, float]:
+ """given input trait mouse gene id and mouse gene id fetch the lit\
+ corr_data"""
+ if mouse_gene_id is not None and ";" not in mouse_gene_id:
+ query = """
+ SELECT VALUE
+ FROM LCorrRamin3
+ WHERE GeneId1='%s' and
+ GeneId2='%s'
+ """
+
+ query_values = (str(mouse_gene_id), str(input_mouse_gene_id))
+
+ results = database.execute(
+ query_formatter(query, *query_values)).fetchone()
+
+ lit_corr_results = results if results is not None else database.execute(
+ query_formatter(query, *tuple(reversed(query_values)))).fetchone()
+
+ lit_results = (gene_id, lit_corr_results.val)\
+ if lit_corr_results else (gene_id, 0)
+ return lit_results
+
+ return (gene_id, 0)
+
+
+def lit_correlation_for_trait_list(database,
+ target_trait_lists: List,
+ species: Optional[str] = None,
+ trait_gene_id: Optional[str] = None)->List:
+ """given species,base trait gene id fetch the lit corr results from the db\
+ output is float for lit corr results """
+ fetched_lit_corr_results = []
+
+ this_trait_mouse_gene_id = map_to_mouse_gene_id(
+ database=database, species=species, gene_id=trait_gene_id)
+
+ for trait in target_trait_lists:
+ target_trait_gene_id = trait.get("gene_id")
+ if target_trait_gene_id:
+ target_mouse_gene_id = map_to_mouse_gene_id(
+ database=database, species=species, gene_id=target_trait_gene_id)
+
+ fetched_corr_data = fetch_lit_correlation_data(
+ database=database, input_mouse_gene_id=this_trait_mouse_gene_id,
+ gene_id=target_trait_gene_id, mouse_gene_id=target_mouse_gene_id)
+
+ dict_results = dict(
+ zip(("gene_id", "lit_corr"), fetched_corr_data))
+ fetched_lit_corr_results.append(dict_results)
+
+ return fetched_lit_corr_results
+
+
+def query_formatter(query_string: str, * query_values):
+ """formatter query string given the unformatted query string\
+ and the respectibe values.Assumes number of placeholders is
+ equal to the number of query values """
+ results = query_string % (query_values)
+
+ return results
+
+
+def map_to_mouse_gene_id(database, species: Optional[str], gene_id: Optional[str])->Optional[str]:
+ """given a species which is not mouse map the gene_id\
+ to respective mouse gene id"""
+ # AK:xtodo move the code for checking nullity out of thing functions bug while\
+ # method for string
+ if None in (species, gene_id):
+ return None
+ if species == "mouse":
+ return gene_id
+
+ query = """SELECT mouse
+ FROM GeneIDXRef
+ WHERE '%s' = '%s'"""
+
+ query_values = (species, gene_id)
+
+ results = database.execute(
+ query_formatter(query, *query_values)).fetchone()
+
+ mouse_gene_id = results.mouse if results is not None else None
+
+ return mouse_gene_id
+
+
+def compute_all_lit_correlation(database_instance, trait_lists: List, species: str, gene_id):
+ """function that acts as an abstraction for lit_correlation_for_trait_list"""
+ # xtodo to be refactored
+
+ lit_results = lit_correlation_for_trait_list(database=database_instance,
+ target_trait_lists=trait_lists,
+ species=species,
+ trait_gene_id=gene_id
+ )
+
+ return {
+ "lit_results": lit_results
+ }
+
+
+def compute_all_tissue_correlation(primary_tissue_dict: dict,
+ target_tissues_dict_list: List,
+ corr_method: str):
+ """function acts as an abstraction for tissue_correlation_for_trait_list\
+ required input are target tissue object and primary tissue trait """
+
+ tissues_results = {}
+
+ primary_tissue_vals = primary_tissue_dict["tissue_values"]
+
+ target_tissues_list = target_tissues_dict_list
+
+ for target_tissue_obj in target_tissues_list:
+ trait_id = target_tissue_obj.get("trait_id")
+
+ target_tissue_vals = target_tissue_obj.get("tissue_values")
+
+ tissue_result = tissue_correlation_for_trait_list(primary_tissue_vals=primary_tissue_vals,
+ target_tissues_values=target_tissue_vals,
+ corr_method=corr_method)
+
+ tissues_results[trait_id] = tissue_result
+
+ return tissues_results
diff --git a/guix.scm b/guix.scm
index 45bb3fa..503694c 100644
--- a/guix.scm
+++ b/guix.scm
@@ -73,6 +73,7 @@
("python-flask" ,python-flask)
("python-pylint" python-pylint)
("python-numpy" ,python-numpy)
+ ("python-scipy" ,python-scipy)
("python-mypy" ,python-mypy)
("python-mypy-extensions" ,python-mypy-extensions)
("python-redis" ,python-redis)
diff --git a/requirements.txt b/requirements.txt
index e495e19..e4dc881 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,9 +4,16 @@ Flask==1.1.2
itsdangerous==1.1.0
Jinja2==2.11.3
MarkupSafe==1.1.1
-mysqlclient==2.0.1
+mccabe==0.6.1
+mypy==0.790
+mypy-extensions==0.4.3
numpy==1.20.1
+pycparser==2.20
+pylint==2.5.3
+redis==3.5.3
scipy==1.6.0
-SQLAlchemy==1.3.20
-sqlalchemy-stubs==0.4
+six==1.15.0
+toml==0.10.2
+typed-ast==1.4.2
+typing-extensions==3.7.4.3
Werkzeug==1.0.1
diff --git a/tests/integration/test_correlation.py b/tests/integration/test_correlation.py
index 33e0de9..488a8a4 100644
--- a/tests/integration/test_correlation.py
+++ b/tests/integration/test_correlation.py
@@ -1,57 +1,99 @@
-"""Integration tests for correlation api"""
-
-import os
-import json
-import unittest
+"""module contains integration tests for correlation"""
+from unittest import TestCase
from unittest import mock
-
from gn3.app import create_app
-def file_path(relative_path):
- """getting abs path for file """
- dir_name = os.path.dirname(os.path.abspath(__file__))
- split_path = relative_path.split("/")
- new_path = os.path.join(dir_name, *split_path)
- return new_path
+class CorrelationIntegrationTest(TestCase):
+ """class for correlation integration tests"""
-
-class CorrelationAPITest(unittest.TestCase):
- # currently disable
- """Test cases for the Correlation API"""
def setUp(self):
self.app = create_app().test_client()
- with open(file_path("correlation_data.json")) as json_file:
- self.correlation_data = json.load(json_file)
+ def test_fail(self):
+ """initial method for class that fails"""
+ self.assertEqual(2, 2)
+
+ @mock.patch("gn3.api.correlation.compute_all_sample_correlation")
+ def test_sample_r_correlation(self, mock_compute_samples):
+ """Test /api/correlation/sample_r/{method}"""
+ this_trait_data = {
+ "trait_id": "1455376_at",
+ "trait_sample_data": {
+ "C57BL/6J": "6.138",
+ "DBA/2J": "6.266",
+ "B6D2F1": "6.434",
+ "D2B6F1": "6.55",
+ "BXS2": "6.7"
+ }}
+
+ traits_dataset = [
+ {
+ "trait_id": "14192_at",
+ "trait_sample_data": {
+ "DBA/2J": "7.13",
+ "D2B6F1": "5.65",
+ "BXD2": "1.46"
+ }
+ }
+ ]
+
+ correlation_input_data = {"corr_method": "pearson",
+ "this_trait": this_trait_data,
+ "target_dataset": traits_dataset}
+
+ expected_results = [
+ {
+ "sample_r": "-0.407",
+ "p_value": "6.234e-04"
+ },
+ {
+ "sample_r": "0.398",
+ "sample_p": "8.614e-04"
+ }
+ ]
+
+ mock_compute_samples.return_value = expected_results
+
+ api_response = {
+ "corr_results": expected_results
+ }
+
+ response = self.app.post("/api/correlation/sample_r/pearson",
+ json=correlation_input_data, follow_redirects=True)
- with open(file_path("expected_corr_results.json")) as results_file:
- self.correlation_results = json.load(results_file)
+ self.assertEqual(response.status_code, 200)
+ self.assertEqual(response.get_json(), api_response)
- def tearDown(self):
- self.correlation_data = ""
+ @mock.patch("gn3.api.correlation.compute_all_lit_correlation")
+ def test_lit_correlation(self, mock_compute_corr):
+ """Test api/correlation/lit_corr/{species}/{gene_id}"""
- self.correlation_results = ""
+ mock_compute_corr.return_value = []
- @mock.patch("gn3.api.correlation.compute_correlation")
- def test_corr_compute(self, compute_corr):
- """Test that the correct response in correlation"""
+ post_data = [{"gene_id": 8, "lit_corr": 1}, {
+ "gene_id": 12, "lit_corr": 0.3}]
- compute_corr.return_value = self.correlation_results
- response = self.app.post("/api/correlation/corr_compute",
- json=self.correlation_data,
- follow_redirects=True)
+ response = self.app.post(
+ "/api/correlation/lit_corr/mouse/16", json=post_data, follow_redirects=True)
+ self.assertEqual(mock_compute_corr.call_count, 1)
self.assertEqual(response.status_code, 200)
- @mock.patch("gn3.api.correlation.compute_correlation")
- def test_corr_compute_failed_request(self, compute_corr):
- """test taht cormpute requests fails """
+ @mock.patch("gn3.api.correlation.compute_all_tissue_correlation")
+ def test_tissue_correlation(self, mock_tissue_corr):
+ """Test api/correlation/tissue_corr/{corr_method}"""
+ mock_tissue_corr.return_value = {}
- compute_corr.return_value = self.correlation_results
+ primary_dict = {"trait_id": "1449593_at", "tissue_values": [1, 2, 3]}
- response = self.app.post("/api/correlation/corr_compute",
- json=None,
- follow_redirects=True)
+ target_tissue_dict_list = [
+ {"trait_id": "1449593_at", "tissue_values": [1, 2, 3]}]
- self.assertEqual(response.status_code, 400)
+ tissue_corr_input_data = {"primary_tissue": primary_dict,
+ "target_tissues": target_tissue_dict_list}
+
+ response = self.app.post("/api/correlation/tissue_corr/spearman",
+ json=tissue_corr_input_data, follow_redirects=True)
+
+ self.assertEqual(response.status_code, 200)
diff --git a/tests/unit/computations/correlation_test_data/target_dataset.json b/tests/unit/computations/correlation_test_data/target_dataset.json
new file mode 100644
index 0000000..f6757b6
--- /dev/null
+++ b/tests/unit/computations/correlation_test_data/target_dataset.json
@@ -0,0 +1,230 @@
+[
+ {
+ "trait_id":"1425637_at",
+ "sample_data":{
+ "BXD1":7.081,
+ "BXD2":6.912,
+ "BXD5":7.153,
+ "BXD6":6.92,
+ "BXD8":6.886,
+ "BXD9":7.406,
+ "BXD11":6.917,
+ "BXD12":6.914,
+ "BXD13":6.964,
+ "BXD15":6.863,
+ "BXD16":7.06,
+ "BXD19":7.002,
+ "BXD20":7.158,
+ "BXD21":7.039,
+ "BXD22":7.036,
+ "BXD23":6.962,
+ "BXD24":6.946,
+ "BXD27":7.084,
+ "BXD28":7.154,
+ "BXD29":6.932,
+ "BXD31":6.994,
+ "BXD32":6.846,
+ "BXD33":7.078,
+ "BXD34":6.94,
+ "BXD38":6.992,
+ "BXD39":7.048,
+ "BXD40":7.14,
+ "BXD42":6.98,
+ "BXD43":7.072,
+ "BXD44":7.045,
+ "BXD45":6.739,
+ "BXD48":7.07,
+ "BXD48a":6.998,
+ "BXD50":7.053,
+ "BXD51":6.922,
+ "BXD55":6.782,
+ "BXD60":7.042,
+ "BXD61":6.887,
+ "BXD62":6.86,
+ "BXD63":6.815,
+ "BXD64":7.424,
+ "BXD65":7.216,
+ "BXD65a":6.934,
+ "BXD65b":6.893,
+ "BXD66":6.935,
+ "BXD67":6.985,
+ "BXD68":7.044,
+ "BXD69":6.908,
+ "BXD70":6.864,
+ "BXD73":7.074,
+ "BXD73a":6.986,
+ "BXD74":6.914,
+ "BXD75":6.98,
+ "BXD76":6.772,
+ "BXD77":7.121,
+ "BXD79":6.829,
+ "BXD83":7.018,
+ "BXD84":6.948,
+ "BXD85":7.112,
+ "BXD86":6.858,
+ "BXD87":6.865,
+ "BXD89":7.034,
+ "BXD90":6.901,
+ "BXD93":6.97,
+ "BXD94":7.112,
+ "BXD98":6.954,
+ "BXD99":6.912,
+ "C57BL/6J":7.121,
+ "DBA/2J":6.821,
+ "B6D2F1":6.998,
+ "D2B6F1":6.967
+ }
+ },
+ {
+ "trait_id":"1455376_at",
+ "trait_sample_data":{
+ "BXD1":10.929,
+ "BXD2":11.279,
+ "BXD5":11.941,
+ "BXD6":11.407,
+ "BXD8":12.048,
+ "BXD9":11.694,
+ "BXD11":11.534,
+ "BXD12":11.048,
+ "BXD13":12.274,
+ "BXD15":12.077,
+ "BXD16":11.91,
+ "BXD19":11.797,
+ "BXD20":11.67,
+ "BXD21":12.062,
+ "BXD22":12.49,
+ "BXD23":11.957,
+ "BXD24":11.766,
+ "BXD27":13.026,
+ "BXD28":12.184,
+ "BXD29":11.792,
+ "BXD31":12.36,
+ "BXD32":10.608,
+ "BXD33":11.817,
+ "BXD34":11.213,
+ "BXD38":11.212,
+ "BXD39":12.023,
+ "BXD40":12.892,
+ "BXD42":11.518,
+ "BXD43":12.306,
+ "BXD44":11.932,
+ "BXD45":10.982,
+ "BXD48":12.055,
+ "BXD48a":12.572,
+ "BXD50":11.696,
+ "BXD51":11.828,
+ "BXD55":10.523,
+ "BXD60":11.403,
+ "BXD61":11.378,
+ "BXD62":11.887,
+ "BXD63":11.776,
+ "BXD64":12.37,
+ "BXD65":11.122,
+ "BXD65a":10.853,
+ "BXD65b":11.46,
+ "BXD66":11.546,
+ "BXD67":12.198,
+ "BXD68":13.21,
+ "BXD69":11.581,
+ "BXD70":12.338,
+ "BXD73":11.876,
+ "BXD73a":11.75,
+ "BXD74":11.898,
+ "BXD75":11.718,
+ "BXD76":11.926,
+ "BXD77":12.326,
+ "BXD79":12.052,
+ "BXD83":11.478,
+ "BXD84":11.494,
+ "BXD85":11.435,
+ "BXD86":11.476,
+ "BXD87":11.456,
+ "BXD89":11.547,
+ "BXD90":12.452,
+ "BXD93":12.921,
+ "BXD94":11.892,
+ "BXD98":12.614,
+ "BXD99":13.142,
+ "C57BL/6J":12.138,
+ "DBA/2J":11.394,
+ "B6D2F1":11.615,
+ "D2B6F1":11.918
+ }
+ },
+ {
+ "trait_id":"1444351_at",
+ "trait_sample_data":{
+ "BXD1":17.847,
+ "BXD2":15.262,
+ "BXD5":18.054,
+ "BXD6":17.24,
+ "BXD8":15.735,
+ "BXD9":17.876,
+ "BXD11":17.359,
+ "BXD12":17.906,
+ "BXD13":16.084,
+ "BXD15":17.173,
+ "BXD16":15.941,
+ "BXD19":17.721,
+ "BXD20":17.548,
+ "BXD21":17.242,
+ "BXD22":17.012,
+ "BXD23":17.139,
+ "BXD24":17.904,
+ "BXD27":17.008,
+ "BXD28":17.441,
+ "BXD29":17.606,
+ "BXD31":17.35,
+ "BXD32":17.859,
+ "BXD33":17.453,
+ "BXD34":15.924,
+ "BXD38":17.271,
+ "BXD39":18.034,
+ "BXD40":17.844,
+ "BXD42":17.444,
+ "BXD43":17.676,
+ "BXD44":17.71,
+ "BXD45":17.059,
+ "BXD48":17.334,
+ "BXD48a":17.398,
+ "BXD50":17.343,
+ "BXD51":17.514,
+ "BXD55":14.995,
+ "BXD60":18.03,
+ "BXD61":17.628,
+ "BXD62":17.431,
+ "BXD63":16.96,
+ "BXD64":18.199,
+ "BXD65":17.593,
+ "BXD65a":17.49,
+ "BXD65b":17.268,
+ "BXD66":16.602,
+ "BXD67":17.306,
+ "BXD68":17.167,
+ "BXD69":17.706,
+ "BXD70":17.287,
+ "BXD73":17.412,
+ "BXD73a":16.224,
+ "BXD74":16.873,
+ "BXD75":17.202,
+ "BXD76":16.934,
+ "BXD77":17.926,
+ "BXD79":16.55,
+ "BXD83":17.042,
+ "BXD84":17.134,
+ "BXD85":18.021,
+ "BXD86":17.194,
+ "BXD87":17.075,
+ "BXD89":17.511,
+ "BXD90":17.168,
+ "BXD93":17.817,
+ "BXD94":18.04,
+ "BXD98":16.744,
+ "BXD99":17.304,
+ "C57BL/6J":17.084,
+ "DBA/2J":17.316,
+ "B6D2F1":16.964,
+ "D2B6F1":17.086
+ }
+ }
+] \ No newline at end of file
diff --git a/tests/unit/computations/correlation_test_data/this_trait_data.json b/tests/unit/computations/correlation_test_data/this_trait_data.json
new file mode 100644
index 0000000..7c57fdb
--- /dev/null
+++ b/tests/unit/computations/correlation_test_data/this_trait_data.json
@@ -0,0 +1,76 @@
+{
+ "trait_id":"1457784_at",
+ "trait_sample_data":{
+ "BXD1": 6.03,
+ "BXD2": 6.001,
+ "BXD5": 6.154,
+ "BXD6": 6.179,
+ "BXD8": 6.2,
+ "BXD9": 6.062,
+ "BXD11": 6.12,
+ "BXD12": 6.159,
+ "BXD13": 6.153,
+ "BXD15": 6.144,
+ "BXD16": 6.212,
+ "BXD19": 6.206,
+ "BXD20": 6.008,
+ "BXD21": 6.062,
+ "BXD22": 6.042,
+ "BXD23": 6.135,
+ "BXD24": 6.144,
+ "BXD27": 6.316,
+ "BXD28": 6.14,
+ "BXD29": 6.222,
+ "BXD31": 6.211,
+ "BXD32": 5.984,
+ "BXD33": 6.128,
+ "BXD34": 6.086,
+ "BXD38": 6.342,
+ "BXD39": 6.111,
+ "BXD40": 6.136,
+ "BXD42": 6.201,
+ "BXD43": 5.934,
+ "BXD44": 6.116,
+ "BXD45": 6.226,
+ "BXD48": 6.228,
+ "BXD48a": 6.16,
+ "BXD50": 5.92,
+ "BXD51": 6.227,
+ "BXD55": 6.137,
+ "BXD60": 5.932,
+ "BXD61": 6.18,
+ "BXD62": 6.188,
+ "BXD63": 6.134,
+ "BXD64": 6.102,
+ "BXD65": 6.258,
+ "BXD65a": 6.031,
+ "BXD65b": 6.088,
+ "BXD66": 6.07,
+ "BXD67": 6.275,
+ "BXD68": 6.116,
+ "BXD69": 6.031,
+ "BXD70": 6.14,
+ "BXD73": 6.089,
+ "BXD73a": 6.195,
+ "BXD74": 5.971,
+ "BXD75": 5.972,
+ "BXD76": 6.125,
+ "BXD77": 6.107,
+ "BXD79": 6.288,
+ "BXD83": 6.119,
+ "BXD84": 6.102,
+ "BXD85": 5.959,
+ "BXD86": 6.249,
+ "BXD87": 6.172,
+ "BXD89": 6.13,
+ "BXD90": 6.162,
+ "BXD93": 6.19,
+ "BXD94": 6.068,
+ "BXD98": 6.137,
+ "BXD99": 6.252,
+ "C57BL/6J": 6.255,
+ "DBA/2J": 6.14,
+ "B6D2F1": 6.223,
+ "D2B6F1": 6.038
+}
+} \ No newline at end of file
diff --git a/tests/unit/computations/test_correlation.py b/tests/unit/computations/test_correlation.py
new file mode 100644
index 0000000..84b9330
--- /dev/null
+++ b/tests/unit/computations/test_correlation.py
@@ -0,0 +1,399 @@
+"""module contains the tests for correlation"""
+import unittest
+from unittest import TestCase
+from unittest import mock
+
+from collections import namedtuple
+
+from gn3.computations.correlations import normalize_values
+from gn3.computations.correlations import do_bicor
+from gn3.computations.correlations import compute_sample_r_correlation
+from gn3.computations.correlations import compute_all_sample_correlation
+from gn3.computations.correlations import filter_shared_sample_keys
+from gn3.computations.correlations import tissue_lit_corr_for_probe_type
+from gn3.computations.correlations import tissue_correlation_for_trait_list
+from gn3.computations.correlations import lit_correlation_for_trait_list
+from gn3.computations.correlations import fetch_lit_correlation_data
+from gn3.computations.correlations import query_formatter
+from gn3.computations.correlations import map_to_mouse_gene_id
+from gn3.computations.correlations import compute_all_lit_correlation
+from gn3.computations.correlations import compute_all_tissue_correlation
+
+
+class QueryableMixin:
+ """base class for db call"""
+
+ def execute(self, query_options):
+ """base method for execute"""
+ raise NotImplementedError()
+
+ def fetchone(self):
+ """base method for fetching one iten"""
+ raise NotImplementedError()
+
+ def fetchall(self):
+ """base method for fetch all items"""
+ raise NotImplementedError()
+
+
+class IllegalOperationError(Exception):
+ """custom error to raise illegal operation in db"""
+
+ def __init__(self):
+ super().__init__("Operation not permitted!")
+
+
+class DataBase(QueryableMixin):
+ """Class for creating db object"""
+
+ def __init__(self):
+ self.__query_options = None
+ self.__results = None
+
+ def execute(self, query_options):
+ """method to execute an sql query"""
+ self.__query_options = query_options
+ self.results_generator()
+ return self
+
+ def fetchone(self):
+ """method to fetch single item from the db query"""
+ if self.__results is None:
+ raise IllegalOperationError()
+
+ return self.__results[0]
+
+ def fetchall(self):
+ """method for fetching all items from db query"""
+ if self.__results is None:
+ raise IllegalOperationError()
+ return self.__results
+
+ def results_generator(self, expected_results=None):
+ """private method for generating mock results"""
+
+ if expected_results is None:
+ self.__results = [namedtuple("lit_coeff", "val")(x*0.1)
+ for x in range(1, 4)]
+ else:
+ self.__results = expected_results
+
+
+class TestCorrelation(TestCase):
+ """class for testing correlation functions"""
+
+ def test_normalize_values(self):
+ """function to test normalizing values """
+ results = normalize_values([2.3, None, None, 3.2, 4.1, 5],
+ [3.4, 7.2, 1.3, None, 6.2, 4.1])
+
+ expected_results = ([2.3, 4.1, 5], [3.4, 6.2, 4.1], 3)
+
+ self.assertEqual(results, expected_results)
+
+ def test_bicor(self):
+ """test for doing biweight mid correlation """
+
+ results = do_bicor(x_val=[1, 2, 3], y_val=[4, 5, 6])
+
+ self.assertEqual(results, ([1, 2, 3], [4, 5, 6])
+ )
+
+ @mock.patch("gn3.computations.correlations.compute_corr_coeff_p_value")
+ @mock.patch("gn3.computations.correlations.normalize_values")
+ def test_compute_sample_r_correlation(self, norm_vals, compute_corr):
+ """test for doing sample correlation gets the cor\
+ and p value and rho value using pearson correlation"""
+ primary_values = [2.3, 4.1, 5]
+ target_values = [3.4, 6.2, 4.1]
+
+ norm_vals.return_value = ([2.3, 4.1, 5, 4.2, 4, 1.2],
+ [3.4, 6.2, 4, 1.1, 8, 1.1], 6)
+ compute_corr.side_effect = [(0.7, 0.3), (-1.0, 0.9), (1, 0.21)]
+
+ pearson_results = compute_sample_r_correlation(corr_method="pearson",
+ trait_vals=primary_values,
+ target_samples_vals=target_values)
+
+ spearman_results = compute_sample_r_correlation(corr_method="spearman",
+ trait_vals=primary_values,
+ target_samples_vals=target_values)
+
+ bicor_results = compute_sample_r_correlation(corr_method="bicor",
+ trait_vals=primary_values,
+ target_samples_vals=target_values)
+
+ self.assertEqual(bicor_results, (1, 0.21, 6))
+ self.assertEqual(pearson_results, (0.7, 0.3, 6))
+ self.assertEqual(spearman_results, (-1.0, 0.9, 6))
+
+ self.assertIsInstance(
+ pearson_results, tuple, "message")
+ self.assertIsInstance(
+ spearman_results, tuple, "message")
+
+ def test_filter_shared_sample_keys(self):
+ """function to tests shared key between two dicts"""
+
+ this_samplelist = {
+ "C57BL/6J": "6.638",
+ "DBA/2J": "6.266",
+ "B6D2F1": "6.494",
+ "D2B6F1": "6.565",
+ "BXD2": "6.456"
+ }
+
+ target_samplelist = {
+ "DBA/2J": "1.23",
+ "D2B6F1": "6.565",
+ "BXD2": "6.456"
+
+ }
+
+ filtered_target_samplelist = ["1.23", "6.565", "6.456"]
+ filtered_this_samplelist = ["6.266", "6.565", "6.456"]
+
+ results = filter_shared_sample_keys(
+ this_samplelist=this_samplelist, target_samplelist=target_samplelist)
+
+ self.assertEqual(results, (filtered_this_samplelist,
+ filtered_target_samplelist))
+
+ @mock.patch("gn3.computations.correlations.compute_sample_r_correlation")
+ @mock.patch("gn3.computations.correlations.filter_shared_sample_keys")
+ def test_compute_all_sample(self, filter_shared_samples, sample_r_corr):
+ """given target dataset compute all sample r correlation"""
+
+ filter_shared_samples.return_value = (["1.23", "6.565", "6.456"], [
+ "6.266", "6.565", "6.456"])
+ sample_r_corr.return_value = ([-1.0, 0.9, 6])
+
+ this_trait_data = {
+ "trait_id": "1455376_at",
+ "trait_sample_data": {
+ "C57BL/6J": "6.638",
+ "DBA/2J": "6.266",
+ "B6D2F1": "6.494",
+ "D2B6F1": "6.565",
+ "BXD2": "6.456"
+ }}
+
+ traits_dataset = [
+ {
+ "trait_id": "1419792_at",
+ "trait_sample_data": {
+ "DBA/2J": "1.23",
+ "D2B6F1": "6.565",
+ "BXD2": "6.456"
+ }
+ }
+ ]
+
+ sample_all_results = [{"1419792_at": {"corr_coeffient": -1.0,
+ "p_value": 0.9,
+ "num_overlap": 6}}]
+ # ?corr_method: str, trait_vals, target_samples_vals
+
+ self.assertEqual(compute_all_sample_correlation(
+ this_trait=this_trait_data, target_dataset=traits_dataset), sample_all_results)
+ sample_r_corr.assert_called_once_with(
+ corr_method="pearson", trait_vals=['1.23', '6.565', '6.456'],
+ target_samples_vals=['6.266', '6.565', '6.456'])
+ filter_shared_samples.assert_called_once_with(
+ this_trait_data.get("trait_sample_data"), traits_dataset[0].get("trait_sample_data"))
+
+ @unittest.skip("not implemented")
+ def test_tissue_lit_corr_for_probe_type(self):
+ """tests for doing tissue and lit correlation for trait list\
+ if both the dataset and target dataset are probeset runs\
+ on after initial correlation has been done"""
+
+ results = tissue_lit_corr_for_probe_type(
+ corr_type="tissue", top_corr_results={})
+
+ self.assertEqual(results, (None, None))
+
+ @mock.patch("gn3.computations.correlations.compute_corr_coeff_p_value")
+ def test_tissue_correlation_for_trait_list(self, mock_compute_corr_coeff):
+ """test given a primary tissue values for a trait and and a list of\
+ target tissues for traits do the tissue correlation for them"""
+
+ primary_tissue_values = [1.1, 1.5, 2.3]
+ target_tissues_values = [1, 2, 3]
+ mock_compute_corr_coeff.side_effect = [(0.4, 0.9), (-0.2, 0.91)]
+ expected_tissue_results = {
+ 'tissue_corr': 0.4, 'p_value': 0.9, "tissue_number": 3}
+
+ tissue_results = tissue_correlation_for_trait_list(
+ primary_tissue_values, target_tissues_values,
+ corr_method="pearson", compute_corr_p_value=mock_compute_corr_coeff)
+
+ self.assertEqual(tissue_results, expected_tissue_results)
+
+ @mock.patch("gn3.computations.correlations.fetch_lit_correlation_data")
+ @mock.patch("gn3.computations.correlations.map_to_mouse_gene_id")
+ def test_lit_correlation_for_trait_list(self, mock_mouse_gene_id, fetch_lit_data):
+ """fetch results from db call for lit correlation given a trait list\
+ after doing correlation"""
+
+ target_trait_lists = [{"gene_id": 15},
+ {"gene_id": 17},
+ {"gene_id": 11}]
+ mock_mouse_gene_id.side_effect = [12, 11, 18, 16, 20]
+
+ database_instance = namedtuple("database", "execute")("fetchone")
+
+ fetch_lit_data.side_effect = [(15, 9), (17, 8), (11, 12)]
+
+ lit_results = lit_correlation_for_trait_list(
+ database=database_instance, target_trait_lists=target_trait_lists,
+ species="rat", trait_gene_id="12")
+
+ expected_results = [{"gene_id": 15, "lit_corr": 9}, {
+ "gene_id": 17, "lit_corr": 8}, {"gene_id": 11, "lit_corr": 12}]
+
+ self.assertEqual(lit_results, expected_results)
+
+ def test_fetch_lit_correlation_data(self):
+ """test for fetching lit correlation data from\
+ the database where the input and mouse geneid are none"""
+
+ database_instance = DataBase()
+ results = fetch_lit_correlation_data(database=database_instance,
+ gene_id="1",
+ input_mouse_gene_id=None,
+ mouse_gene_id=None)
+
+ self.assertEqual(results, ("1", 0))
+
+ def test_fetch_lit_correlation_data_db_query(self):
+ """test for fetching lit corr coefficent givent the input\
+ input trait mouse gene id and mouse gene id"""
+
+ database_instance = DataBase()
+ expected_results = ("1", 0.1)
+
+ lit_results = fetch_lit_correlation_data(database=database_instance,
+ gene_id="1",
+ input_mouse_gene_id="20",
+ mouse_gene_id="15")
+
+ self.assertEqual(expected_results, lit_results)
+
+ def test_query_lit_correlation_for_db_empty(self):
+ """test that corr coeffient returned is 0 given the\
+ db value if corr coefficient is empty"""
+ database_instance = mock.Mock()
+ database_instance.execute.return_value.fetchone.return_value = None
+
+ lit_results = fetch_lit_correlation_data(database=database_instance,
+ input_mouse_gene_id="12",
+ gene_id="16",
+ mouse_gene_id="12")
+
+ self.assertEqual(lit_results, ("16", 0))
+
+ def test_query_formatter(self):
+ """test for formatting a query given the query string and also the\
+ values"""
+ query = """
+ SELECT VALUE
+ FROM LCorr
+ WHERE GeneId1='%s' and
+ GeneId2='%s'
+ """
+
+ expected_formatted_query = """
+ SELECT VALUE
+ FROM LCorr
+ WHERE GeneId1='20' and
+ GeneId2='15'
+ """
+
+ mouse_gene_id = "20"
+ input_mouse_gene_id = "15"
+
+ query_values = (mouse_gene_id, input_mouse_gene_id)
+
+ formatted_query = query_formatter(query, *query_values)
+
+ self.assertEqual(formatted_query, expected_formatted_query)
+
+ def test_query_formatter_no_query_values(self):
+ """test for formatting a query where there are no\
+ string placeholder"""
+ query = """SELECT * FROM USERS"""
+ formatted_query = query_formatter(query)
+
+ self.assertEqual(formatted_query, query)
+
+ def test_map_to_mouse_gene_id(self):
+ """test for converting a gene id to mouse geneid\
+ given a species which is not mouse"""
+ database_instance = mock.Mock()
+ test_data = [("Human", 14), (None, 9), ("Mouse", 15), ("Rat", 14)]
+
+ database_results = [namedtuple("mouse_id", "mouse")(val)
+ for val in range(12, 20)]
+ results = []
+
+ database_instance.execute.return_value.fetchone.side_effect = database_results
+ expected_results = [12, None, 13, 14]
+ for (species, gene_id) in test_data:
+
+ mouse_gene_id_results = map_to_mouse_gene_id(
+ database=database_instance, species=species, gene_id=gene_id)
+ results.append(mouse_gene_id_results)
+
+ self.assertEqual(results, expected_results)
+
+ @mock.patch("gn3.computations.correlations.lit_correlation_for_trait_list")
+ def test_compute_all_lit_correlation(self, mock_lit_corr):
+ """test for compute all lit correlation which acts\
+ as an abstraction for lit_correlation_for_trait_list
+ and is used in the api/correlation/lit"""
+
+ database = mock.Mock()
+
+ expected_mocked_lit_results = [{"gene_id": 11, "lit_corr": 9}, {
+ "gene_id": 17, "lit_corr": 8}]
+
+ mock_lit_corr.side_effect = expected_mocked_lit_results
+
+ lit_correlation_results = compute_all_lit_correlation(
+ database_instance=database, trait_lists=[{"gene_id": 11}],
+ species="rat", gene_id=12)
+
+ expected_results = {
+ "lit_results": {"gene_id": 11, "lit_corr": 9}
+ }
+
+ self.assertEqual(lit_correlation_results, expected_results)
+
+ @mock.patch("gn3.computations.correlations.tissue_correlation_for_trait_list")
+ def test_compute_all_tissue_correlation(self, mock_tissue_corr):
+ """test for compute all tissue corelation which abstracts
+ api calling the tissue_correlation for trait_list"""
+
+ primary_tissue_dict = {"trait_id": "1419792_at",
+ "tissue_values": [1, 2, 3, 4, 5]}
+
+ target_tissue_dict = [{"trait_id": "1418702_a_at", "tissue_values": [1, 2, 3]},
+ {"trait_id": "1412_at", "tissue_values": [1, 2, 3]}]
+
+ mock_tissue_corr.side_effect = [{"tissue_corr": -0.5, "p_value": 0.9, "tissue_number": 3},
+ {"tissue_corr": 1.11, "p_value": 0.2, "tissue_number": 3}]
+
+ expected_results = {"1418702_a_at":
+ {"tissue_corr": -0.5, "p_value": 0.9, "tissue_number": 3},
+ "1412_at":
+ {"tissue_corr": 1.11, "p_value": 0.2, "tissue_number": 3}}
+
+ results = compute_all_tissue_correlation(
+ primary_tissue_dict=primary_tissue_dict,
+ target_tissues_dict_list=target_tissue_dict,
+ corr_method="pearson")
+
+ self.assertEqual(mock_tissue_corr.call_count, 2)
+
+ self.assertEqual(results, expected_results)