aboutsummaryrefslogtreecommitdiff
path: root/gn3/db
diff options
context:
space:
mode:
Diffstat (limited to 'gn3/db')
-rw-r--r--gn3/db/traits.py93
1 files changed, 93 insertions, 0 deletions
diff --git a/gn3/db/traits.py b/gn3/db/traits.py
index f2673c8..1c6aaa7 100644
--- a/gn3/db/traits.py
+++ b/gn3/db/traits.py
@@ -1,12 +1,81 @@
"""This class contains functions relating to trait data manipulation"""
import os
+from functools import reduce
from typing import Any, Dict, Union, Sequence
+
from gn3.settings import TMPDIR
from gn3.random import random_string
from gn3.function_helpers import compose
from gn3.db.datasets import retrieve_trait_dataset
+def export_trait_data(
+ trait_data: dict, samplelist: Sequence[str], dtype: str = "val",
+ var_exists: bool = False, n_exists: bool = False):
+ """
+ Export data according to `samplelist`. Mostly used in calculating
+ correlations.
+
+ DESCRIPTION:
+ Migrated from
+ https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L166-L211
+
+ PARAMETERS
+ trait: (dict)
+ The dictionary of key-value pairs representing a trait
+ samplelist: (list)
+ A list of sample names
+ dtype: (str)
+ ... verify what this is ...
+ var_exists: (bool)
+ A flag indicating existence of variance
+ n_exists: (bool)
+ A flag indicating existence of ndata
+ """
+ def __export_all_types(tdata, sample):
+ sample_data = []
+ if tdata[sample]["value"]:
+ sample_data.append(tdata[sample]["value"])
+ if var_exists:
+ if tdata[sample]["variance"]:
+ sample_data.append(tdata[sample]["variance"])
+ else:
+ sample_data.append(None)
+ if n_exists:
+ if tdata[sample]["ndata"]:
+ sample_data.append(tdata[sample]["ndata"])
+ else:
+ sample_data.append(None)
+ else:
+ if var_exists and n_exists:
+ sample_data += [None, None, None]
+ elif var_exists or n_exists:
+ sample_data += [None, None]
+ else:
+ sample_data.append(None)
+
+ return tuple(sample_data)
+
+ def __exporter(accumulator, sample):
+ # pylint: disable=[R0911]
+ if sample in trait_data["data"]:
+ if dtype == "val":
+ return accumulator + (trait_data["data"][sample]["value"], )
+ if dtype == "var":
+ return accumulator + (trait_data["data"][sample]["variance"], )
+ if dtype == "N":
+ return accumulator + (trait_data["data"][sample]["ndata"], )
+ if dtype == "all":
+ return accumulator + __export_all_types(trait_data["data"], sample)
+ raise KeyError("Type `%s` is incorrect" % dtype)
+ if var_exists and n_exists:
+ return accumulator + (None, None, None)
+ if var_exists or n_exists:
+ return accumulator + (None, None)
+ return accumulator + (None,)
+
+ return reduce(__exporter, samplelist, tuple())
+
def get_trait_csv_sample_data(conn: Any,
trait_name: int, phenotype_id: int):
"""Fetch a trait and return it as a csv string"""
@@ -674,3 +743,27 @@ def generate_traits_filename(base_path: str = TMPDIR):
"""Generate a unique filename for use with generated traits files."""
return "{}/traits_test_file_{}.txt".format(
os.path.abspath(base_path), random_string(10))
+
+def export_informative(trait_data: dict, inc_var: bool = False) -> tuple:
+ """
+ Export informative strain
+
+ This is a migration of the `exportInformative` function in
+ web/webqtl/base/webqtlTrait.py module in GeneNetwork1.
+
+ There is a chance that the original implementation has a bug, especially
+ dealing with the `inc_var` value. It the `inc_var` value is meant to control
+ the inclusion of the `variance` value, then the current implementation, and
+ that one in GN1 have a bug.
+ """
+ def __exporter__(acc, data_item):
+ if not inc_var or data_item["variance"] is not None:
+ return (
+ acc[0] + (data_item["sample_name"],),
+ acc[1] + (data_item["value"],),
+ acc[2] + (data_item["variance"],))
+ return acc
+ return reduce(
+ __exporter__,
+ filter(lambda td: td["value"] is not None, trait_data["data"].values()),
+ (tuple(), tuple(), tuple()))