From deca94c7b473ec79c5e5cee3d6caeb3c3885bd74 Mon Sep 17 00:00:00 2001
From: Arun Isaac
Date: Thu, 23 Dec 2021 13:40:31 +0530
Subject: db: Fix wrong continued indentation.
* gn3/db/datasets.py (dataset_metadata): Fix wrong continued indentation.
---
gn3/db/datasets.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
(limited to 'gn3')
diff --git a/gn3/db/datasets.py b/gn3/db/datasets.py
index c50e148..788e9cf 100644
--- a/gn3/db/datasets.py
+++ b/gn3/db/datasets.py
@@ -328,7 +328,7 @@ WHERE {
OPTIONAL { ?dataset gn:geoSeries ?geo_series } .
}
""",
- """
+ """
PREFIX gn:
SELECT ?platform_name ?normalization_name ?species_name ?inbred_set_name ?tissue_name
WHERE {
@@ -341,7 +341,7 @@ WHERE {
OPTIONAL { ?dataset gn:datasetOfPlatform / gn:name ?platform_name } .
}
""",
- """
+ """
PREFIX gn:
SELECT ?specifics ?summary ?about_cases ?about_tissue ?about_platform
?about_data_processing ?notes ?experiment_design ?contributors
--
cgit v1.2.3
From 36cc8b1f837406d7002246c00d6054573687c472 Mon Sep 17 00:00:00 2001
From: Arun Isaac
Date: Thu, 23 Dec 2021 13:57:04 +0530
Subject: db: Fix sparql_query return type.
* gn3/db/datasets.py: Import List from typing.
(sparql_query): Set return type to List[Dict[str, Any]].
---
gn3/db/datasets.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
(limited to 'gn3')
diff --git a/gn3/db/datasets.py b/gn3/db/datasets.py
index 788e9cf..ca48156 100644
--- a/gn3/db/datasets.py
+++ b/gn3/db/datasets.py
@@ -3,7 +3,7 @@ This module contains functions relating to specific trait dataset manipulation
"""
import re
from string import Template
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional
from SPARQLWrapper import JSON, SPARQLWrapper
from gn3.settings import SPARQL_ENDPOINT
@@ -297,7 +297,7 @@ def retrieve_trait_dataset(trait_type, trait, threshold, conn):
**group
}
-def sparql_query(query: str) -> Dict[str, Any]:
+def sparql_query(query: str) -> List[Dict[str, Any]]:
"""Run a SPARQL query and return the bound variables."""
sparql = SPARQLWrapper(SPARQL_ENDPOINT)
sparql.setQuery(query)
--
cgit v1.2.3
From fa227ea35710658c7d8314315ee072a641c163f9 Mon Sep 17 00:00:00 2001
From: Arun Isaac
Date: Thu, 23 Dec 2021 13:58:07 +0530
Subject: db: Assist mypy with explicit type declaration.
* gn3/db/datasets.py (dataset_metadata): Assist mypy by explicitly declaring
type of return dictionary.
---
gn3/db/datasets.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
(limited to 'gn3')
diff --git a/gn3/db/datasets.py b/gn3/db/datasets.py
index ca48156..a41e228 100644
--- a/gn3/db/datasets.py
+++ b/gn3/db/datasets.py
@@ -362,8 +362,8 @@ WHERE {
OPTIONAL { ?dataset gn:acknowledgment ?acknowledgment . }
}
"""]
- result = {'accession_id': accession_id,
- 'investigator': {}}
+ result: Dict[str, Any] = {'accession_id': accession_id,
+ 'investigator': {}}
query_result = {}
for query in queries:
if sparql_result := sparql_query(Template(query).substitute(accession_id=accession_id)):
--
cgit v1.2.3
From ed8ee3077211cc227089f87929a70ac8b7c4593f Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Wed, 22 Dec 2021 09:29:11 +0300
Subject: Add API endpoint for partial correlations
Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi
* Add an API endpoint for the partial correlation.
* gn3/api/correlation.py:
---
gn3/api/correlation.py | 13 +++++++++++++
1 file changed, 13 insertions(+)
(limited to 'gn3')
diff --git a/gn3/api/correlation.py b/gn3/api/correlation.py
index e936eaf..f84228a 100644
--- a/gn3/api/correlation.py
+++ b/gn3/api/correlation.py
@@ -1,4 +1,6 @@
"""Endpoints for running correlations"""
+import json
+
from flask import jsonify
from flask import Blueprint
from flask import request
@@ -87,9 +89,20 @@ def compute_tissue_corr(corr_method="pearson"):
@correlation.route("/partial", methods=["POST"])
def partial_correlation():
+ """API endpoint for partial correlations."""
def trait_fullname(trait):
return f"{trait['dataset']}::{trait['name']}"
+ class OutputEncoder(json.JSONEncoder):
+ """
+ Class to encode output into JSON, for objects which the default
+ json.JSONEncoder class does not have default encoding for.
+ """
+ def default(self, obj):
+ if isinstance(obj, bytes):
+ return str(obj, encoding="utf-8")
+ return json.JSONEncoder.default(self, obj)
+
args = request.get_json()
conn, _cursor_object = database_connector()
corr_results = partial_correlations_entry(
--
cgit v1.2.3
From 5abe0158daed9428484925b95bb4a8b2251adea2 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Sat, 18 Dec 2021 08:09:04 +0300
Subject: Add dataset type to the results
Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi
* The dataset type is relevant for the display of the data, therefore, this
commit presents the dataset type as part of the results.
---
gn3/computations/partial_correlations.py | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
(limited to 'gn3')
diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py
index 13c411a..1c02533 100644
--- a/gn3/computations/partial_correlations.py
+++ b/gn3/computations/partial_correlations.py
@@ -717,7 +717,8 @@ def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911]
return {
"status": "success",
"results": {
- "primary_trait": primary_trait,
- "control_traits": cntrl_traits,
- "correlations": trait_list
+ "primary_trait": primary_trait,
+ "control_traits": cntrl_traits,
+ "correlations": trait_list,
+ "dataset_type": target_dataset["type"]
}}
--
cgit v1.2.3
From 4304e7298af6769110a251b21fca2f105bee4c06 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Sat, 18 Dec 2021 11:36:07 +0300
Subject: Reduce the total amount of data to be output
Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi
* There is a lot of data that is not necessary in the final result. This
commit removes that data, retaining only data relevant for the display.
---
gn3/computations/partial_correlations.py | 56 ++++++++++++++++++++++++++++++--
1 file changed, 53 insertions(+), 3 deletions(-)
(limited to 'gn3')
diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py
index 1c02533..9fb17f7 100644
--- a/gn3/computations/partial_correlations.py
+++ b/gn3/computations/partial_correlations.py
@@ -526,6 +526,54 @@ def tissue_correlation_by_list(
} for trait in trait_list)
return trait_list
+def trait_for_output(trait):
+ """
+ Process a trait for output.
+
+ Removes a lot of extraneous data from the trait, that is not needed for
+ the display of partial correlation results.
+ This function also removes all key-value pairs, for which the value is
+ `None`, because it is a waste of network resources to transmit the key-value
+ pair just to indicate it does not exist.
+ """
+ trait = {
+ "trait_type": trait["trait_type"],
+ "dataset_name": trait["db"]["dataset_name"],
+ "dataset_type": trait["db"]["dataset_type"],
+ "group": trait["db"]["group"],
+ "trait_fullname": trait["trait_fullname"],
+ "trait_name": trait["trait_name"],
+ "symbol": trait.get("symbol"),
+ "description": trait.get("description"),
+ "pre_publication_description": trait.get(
+ "pre_publication_description"),
+ "post_publication_description": trait.get(
+ "post_publication_description"),
+ "original_description": trait.get(
+ "original_description"),
+ "authors": trait.get("authors"),
+ "year": trait.get("year"),
+ "probe_target_description": trait.get(
+ "probe_target_description"),
+ "chr": trait.get("chr"),
+ "mb": trait.get("mb"),
+ "geneid": trait.get("geneid"),
+ "homologeneid": trait.get("homologeneid"),
+ "noverlap": trait.get("noverlap"),
+ "partial_corr": trait.get("partial_corr"),
+ "partial_corr_p_value": trait.get("partial_corr_p_value"),
+ "corr": trait.get("corr"),
+ "corr_p_value": trait.get("corr_p_value"),
+ "rank_order": trait.get("rank_order"),
+ "delta": (
+ None if trait.get("partial_corr") is None
+ else (trait.get("partial_corr") - trait.get("corr"))),
+ "l_corr": trait.get("l_corr"),
+ "tissue_corr": trait.get("tissue_corr"),
+ "tissue_p_value": trait.get("tissue_p_value")
+ }
+ return {key: val for key, val in trait.items() if val is not None}
+
def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911]
conn: Any, primary_trait_name: str,
control_trait_names: Tuple[str, ...], method: str,
@@ -717,8 +765,10 @@ def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911]
return {
"status": "success",
"results": {
- "primary_trait": primary_trait,
- "control_traits": cntrl_traits,
- "correlations": trait_list,
+ "primary_trait": trait_for_output(primary_trait),
+ "control_traits": tuple(
+ trait_for_output(trait) for trait in cntrl_traits),
+ "correlations": tuple(
+ trait_for_output(trait) for trait in trait_list),
"dataset_type": target_dataset["type"]
}}
--
cgit v1.2.3
From c813dd68230a027b1b5acdbe9d3dba46f6bd1ad0 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Wed, 22 Dec 2021 09:22:49 +0300
Subject: Encode the data to JSON and set the status code
Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi
* Encode bytes objects to string
* Encode NaN values to "null"
* gn3/api/correlation.py:
---
gn3/api/correlation.py | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
(limited to 'gn3')
diff --git a/gn3/api/correlation.py b/gn3/api/correlation.py
index f84228a..c0b5806 100644
--- a/gn3/api/correlation.py
+++ b/gn3/api/correlation.py
@@ -1,9 +1,9 @@
"""Endpoints for running correlations"""
import json
-
from flask import jsonify
from flask import Blueprint
from flask import request
+from flask import make_response
from gn3.computations.correlations import compute_all_sample_correlation
from gn3.computations.correlations import compute_all_lit_correlation
@@ -109,6 +109,8 @@ def partial_correlation():
conn, trait_fullname(args["primary_trait"]),
tuple(trait_fullname(trait) for trait in args["control_traits"]),
args["method"], int(args["criteria"]), args["target_db"])
- return make_response(
- jsonify(corr_results),
- 400)
+ response = make_response(
+ json.dumps(corr_results, cls=OutputEncoder),
+ 400 if "error" in corr_results.keys() else 200)
+ response.headers["Content-Type"] = "application/json"
+ return response
--
cgit v1.2.3
From ac8528c5847f4a517c16b5283c06d3caeae8ef5e Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 20 Dec 2021 07:19:16 +0300
Subject: Replace `NaN` with `null` in JSON string
Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi
* `NaN` is not a valid JSON value, and leads to errors in the code. This
commit replaces all `NaN` values with `null`.
---
gn3/api/correlation.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'gn3')
diff --git a/gn3/api/correlation.py b/gn3/api/correlation.py
index c0b5806..1caf31f 100644
--- a/gn3/api/correlation.py
+++ b/gn3/api/correlation.py
@@ -110,7 +110,7 @@ def partial_correlation():
tuple(trait_fullname(trait) for trait in args["control_traits"]),
args["method"], int(args["criteria"]), args["target_db"])
response = make_response(
- json.dumps(corr_results, cls=OutputEncoder),
+ json.dumps(corr_results, cls=OutputEncoder).replace(": NaN", ": null"),
400 if "error" in corr_results.keys() else 200)
response.headers["Content-Type"] = "application/json"
return response
--
cgit v1.2.3
From 0508fc422c033cfff8bbea118f85282212d236e4 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 20 Dec 2021 09:38:17 +0300
Subject: Return the correlation method used
Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi
* Return the correlation method used
---
gn3/computations/partial_correlations.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
(limited to 'gn3')
diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py
index 9fb17f7..dbcbe29 100644
--- a/gn3/computations/partial_correlations.py
+++ b/gn3/computations/partial_correlations.py
@@ -770,5 +770,6 @@ def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911]
trait_for_output(trait) for trait in cntrl_traits),
"correlations": tuple(
trait_for_output(trait) for trait in trait_list),
- "dataset_type": target_dataset["type"]
+ "dataset_type": target_dataset["type"],
+ "method": "spearman" if "spearman" in method.lower() else "pearson"
}}
--
cgit v1.2.3
From 2d3b6eae6953d5e4b00f21b5ffd683271d0f76bc Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Tue, 21 Dec 2021 13:04:51 +0300
Subject: Fix sorting
Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi
* Update the sorting algorithm, for literature and tissue correlations so that
it sorts the results by the correlation value first then by the p-value
next.
---
gn3/computations/partial_correlations.py | 25 ++++++++++++++++++-------
1 file changed, 18 insertions(+), 7 deletions(-)
(limited to 'gn3')
diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py
index dbcbe29..1e4a646 100644
--- a/gn3/computations/partial_correlations.py
+++ b/gn3/computations/partial_correlations.py
@@ -717,19 +717,30 @@ def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911]
def __make_sorter__(method):
- def __sort_6__(row):
- return row[6]
-
- def __sort_3__(row):
+ def __compare_lit_or_tiss_correlation_values_(row):
+ # Index Content
+ # 0 trait name
+ # 1 N
+ # 2 partial correlation coefficient
+ # 3 p value of partial correlation
+ # 6 literature/tissue correlation value
+ return (row[6], row[3])
+
+ def __compare_partial_correlation_p_values__(row):
+ # Index Content
+ # 0 trait name
+ # 1 partial correlation coefficient
+ # 2 N
+ # 3 p value of partial correlation
return row[3]
if "literature" in method.lower():
- return __sort_6__
+ return __compare_lit_or_tiss_correlation_values_
if "tissue" in method.lower():
- return __sort_6__
+ return __compare_lit_or_tiss_correlation_values_
- return __sort_3__
+ return __compare_partial_correlation_p_values__
sorted_correlations = sorted(
all_correlations, key=__make_sorter__(method))
--
cgit v1.2.3
From 672c1f2d43961feb59d9549557cb10c4f0b19dcf Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Wed, 22 Dec 2021 09:13:32 +0300
Subject: Fix linting errors
---
gn3/authentication.py | 5 +++++
gn3/computations/correlations.py | 2 +-
2 files changed, 6 insertions(+), 1 deletion(-)
(limited to 'gn3')
diff --git a/gn3/authentication.py b/gn3/authentication.py
index a6372c1..4aedacd 100644
--- a/gn3/authentication.py
+++ b/gn3/authentication.py
@@ -163,3 +163,8 @@ def create_group(conn: Redis, group_name: Optional[str],
}
conn.hset("groups", group_id, json.dumps(group))
return group
+ # This might break stuff, but it fixes the linting error regarding
+ # inconsistent return types.
+ # @BonfaceKilz please review this and replace with appropriate return and
+ # remove these comments.
+ return None
diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py
index d38946e..345b8d7 100644
--- a/gn3/computations/correlations.py
+++ b/gn3/computations/correlations.py
@@ -79,7 +79,7 @@ def compute_sample_r_correlation(trait_name, corr_method, trait_vals,
zip(*list(normalize_values(trait_vals, target_samples_vals))))
num_overlap = len(normalized_traits_vals)
except ValueError:
- return
+ return None
if num_overlap > 5:
--
cgit v1.2.3
From 784447b17d85a618005ac9acfc57f5b7ef8f5169 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Wed, 22 Dec 2021 09:14:04 +0300
Subject: Fix typing errors
---
gn3/computations/correlations.py | 3 ++-
gn3/computations/partial_correlations.py | 10 ++++++----
2 files changed, 8 insertions(+), 5 deletions(-)
(limited to 'gn3')
diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py
index 345b8d7..1b4b3a4 100644
--- a/gn3/computations/correlations.py
+++ b/gn3/computations/correlations.py
@@ -7,6 +7,7 @@ from typing import List
from typing import Tuple
from typing import Optional
from typing import Callable
+from typing import Generator
import scipy.stats
import pingouin as pg
@@ -106,7 +107,7 @@ package :not packaged in guix
def filter_shared_sample_keys(this_samplelist,
- target_samplelist) -> Tuple[List, List]:
+ target_samplelist) -> Generator:
"""Given primary and target sample-list for two base and target trait select
filter the values using the shared keys
diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py
index 1e4a646..984c15a 100644
--- a/gn3/computations/partial_correlations.py
+++ b/gn3/computations/partial_correlations.py
@@ -217,7 +217,7 @@ def good_dataset_samples_indexes(
def partial_correlations_fast(# pylint: disable=[R0913, R0914]
samples, primary_vals, control_vals, database_filename,
fetched_correlations, method: str, correlation_type: str) -> Tuple[
- float, Tuple[float, ...]]:
+ int, Tuple[float, ...]]:
"""
Computes partial correlation coefficients using data from a CSV file.
@@ -350,7 +350,9 @@ def compute_partial(
def partial_correlations_normal(# pylint: disable=R0913
primary_vals, control_vals, input_trait_gene_id, trait_database,
data_start_pos: int, db_type: str, method: str) -> Tuple[
- float, Tuple[float, ...]]:
+ int, Tuple[Union[
+ Tuple[str, int, float, float, float, float], None],
+ ...]]:#Tuple[float, ...]
"""
Computes the correlation coefficients.
@@ -485,7 +487,7 @@ def literature_correlation_by_list(
def tissue_correlation_by_list(
conn: Any, primary_trait_symbol: str, tissue_probeset_freeze_id: int,
- method: str, trait_list: Tuple[dict]) -> Tuple[dict]:
+ method: str, trait_list: Tuple[dict]) -> Tuple[dict, ...]:
"""
This is a migration of the
`web.webqtl.correlation.CorrelationPage.getTissueCorrelationByList`
@@ -508,7 +510,7 @@ def tissue_correlation_by_list(
primary_trait_value = prim_trait_symbol_value_dict[
primary_trait_symbol.lower()]
gene_symbol_list = tuple(
- trait for trait in trait_list if "symbol" in trait.keys())
+ trait["symbol"] for trait in trait_list if "symbol" in trait.keys())
symbol_value_dict = fetch_gene_symbol_tissue_value_dict_for_trait(
gene_symbol_list, tissue_probeset_freeze_id, conn)
return tuple(
--
cgit v1.2.3