aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gn3/db/traits.py93
-rw-r--r--gn3/heatmaps.py67
-rw-r--r--gn3/partial_correlations.py88
-rw-r--r--tests/unit/db/test_traits.py176
-rw-r--r--tests/unit/test_heatmaps.py87
-rw-r--r--tests/unit/test_partial_correlations.py151
6 files changed, 509 insertions, 153 deletions
diff --git a/gn3/db/traits.py b/gn3/db/traits.py
index f2673c8..1c6aaa7 100644
--- a/gn3/db/traits.py
+++ b/gn3/db/traits.py
@@ -1,12 +1,81 @@
"""This class contains functions relating to trait data manipulation"""
import os
+from functools import reduce
from typing import Any, Dict, Union, Sequence
+
from gn3.settings import TMPDIR
from gn3.random import random_string
from gn3.function_helpers import compose
from gn3.db.datasets import retrieve_trait_dataset
+def export_trait_data(
+ trait_data: dict, samplelist: Sequence[str], dtype: str = "val",
+ var_exists: bool = False, n_exists: bool = False):
+ """
+ Export data according to `samplelist`. Mostly used in calculating
+ correlations.
+
+ DESCRIPTION:
+ Migrated from
+ https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L166-L211
+
+ PARAMETERS
+ trait: (dict)
+ The dictionary of key-value pairs representing a trait
+ samplelist: (list)
+ A list of sample names
+ dtype: (str)
+ ... verify what this is ...
+ var_exists: (bool)
+ A flag indicating existence of variance
+ n_exists: (bool)
+ A flag indicating existence of ndata
+ """
+ def __export_all_types(tdata, sample):
+ sample_data = []
+ if tdata[sample]["value"]:
+ sample_data.append(tdata[sample]["value"])
+ if var_exists:
+ if tdata[sample]["variance"]:
+ sample_data.append(tdata[sample]["variance"])
+ else:
+ sample_data.append(None)
+ if n_exists:
+ if tdata[sample]["ndata"]:
+ sample_data.append(tdata[sample]["ndata"])
+ else:
+ sample_data.append(None)
+ else:
+ if var_exists and n_exists:
+ sample_data += [None, None, None]
+ elif var_exists or n_exists:
+ sample_data += [None, None]
+ else:
+ sample_data.append(None)
+
+ return tuple(sample_data)
+
+ def __exporter(accumulator, sample):
+ # pylint: disable=[R0911]
+ if sample in trait_data["data"]:
+ if dtype == "val":
+ return accumulator + (trait_data["data"][sample]["value"], )
+ if dtype == "var":
+ return accumulator + (trait_data["data"][sample]["variance"], )
+ if dtype == "N":
+ return accumulator + (trait_data["data"][sample]["ndata"], )
+ if dtype == "all":
+ return accumulator + __export_all_types(trait_data["data"], sample)
+ raise KeyError("Type `%s` is incorrect" % dtype)
+ if var_exists and n_exists:
+ return accumulator + (None, None, None)
+ if var_exists or n_exists:
+ return accumulator + (None, None)
+ return accumulator + (None,)
+
+ return reduce(__exporter, samplelist, tuple())
+
def get_trait_csv_sample_data(conn: Any,
trait_name: int, phenotype_id: int):
"""Fetch a trait and return it as a csv string"""
@@ -674,3 +743,27 @@ def generate_traits_filename(base_path: str = TMPDIR):
"""Generate a unique filename for use with generated traits files."""
return "{}/traits_test_file_{}.txt".format(
os.path.abspath(base_path), random_string(10))
+
+def export_informative(trait_data: dict, inc_var: bool = False) -> tuple:
+ """
+ Export informative strain
+
+ This is a migration of the `exportInformative` function in
+ web/webqtl/base/webqtlTrait.py module in GeneNetwork1.
+
+ There is a chance that the original implementation has a bug, especially
+ dealing with the `inc_var` value. It the `inc_var` value is meant to control
+ the inclusion of the `variance` value, then the current implementation, and
+ that one in GN1 have a bug.
+ """
+ def __exporter__(acc, data_item):
+ if not inc_var or data_item["variance"] is not None:
+ return (
+ acc[0] + (data_item["sample_name"],),
+ acc[1] + (data_item["value"],),
+ acc[2] + (data_item["variance"],))
+ return acc
+ return reduce(
+ __exporter__,
+ filter(lambda td: td["value"] is not None, trait_data["data"].values()),
+ (tuple(), tuple(), tuple()))
diff --git a/gn3/heatmaps.py b/gn3/heatmaps.py
index 2dd9d07..bf9dfd1 100644
--- a/gn3/heatmaps.py
+++ b/gn3/heatmaps.py
@@ -14,6 +14,7 @@ from plotly.subplots import make_subplots # type: ignore
from gn3.settings import TMPDIR
from gn3.random import random_string
from gn3.computations.slink import slink
+from gn3.db.traits import export_trait_data
from gn3.computations.correlations2 import compute_correlation
from gn3.db.genotypes import (
build_genotype_file, load_genotype_samples)
@@ -26,72 +27,6 @@ from gn3.computations.qtlreaper import (
parse_reaper_main_results,
organise_reaper_main_results)
-def export_trait_data(
- trait_data: dict, samplelist: Sequence[str], dtype: str = "val",
- var_exists: bool = False, n_exists: bool = False):
- """
- Export data according to `samplelist`. Mostly used in calculating
- correlations.
-
- DESCRIPTION:
- Migrated from
- https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L166-L211
-
- PARAMETERS
- trait: (dict)
- The dictionary of key-value pairs representing a trait
- samplelist: (list)
- A list of sample names
- dtype: (str)
- ... verify what this is ...
- var_exists: (bool)
- A flag indicating existence of variance
- n_exists: (bool)
- A flag indicating existence of ndata
- """
- def __export_all_types(tdata, sample):
- sample_data = []
- if tdata[sample]["value"]:
- sample_data.append(tdata[sample]["value"])
- if var_exists:
- if tdata[sample]["variance"]:
- sample_data.append(tdata[sample]["variance"])
- else:
- sample_data.append(None)
- if n_exists:
- if tdata[sample]["ndata"]:
- sample_data.append(tdata[sample]["ndata"])
- else:
- sample_data.append(None)
- else:
- if var_exists and n_exists:
- sample_data += [None, None, None]
- elif var_exists or n_exists:
- sample_data += [None, None]
- else:
- sample_data.append(None)
-
- return tuple(sample_data)
-
- def __exporter(accumulator, sample):
- # pylint: disable=[R0911]
- if sample in trait_data["data"]:
- if dtype == "val":
- return accumulator + (trait_data["data"][sample]["value"], )
- if dtype == "var":
- return accumulator + (trait_data["data"][sample]["variance"], )
- if dtype == "N":
- return accumulator + (trait_data["data"][sample]["ndata"], )
- if dtype == "all":
- return accumulator + __export_all_types(trait_data["data"], sample)
- raise KeyError("Type `%s` is incorrect" % dtype)
- if var_exists and n_exists:
- return accumulator + (None, None, None)
- if var_exists or n_exists:
- return accumulator + (None, None)
- return accumulator + (None,)
-
- return reduce(__exporter, samplelist, tuple())
def trait_display_name(trait: Dict):
"""
diff --git a/gn3/partial_correlations.py b/gn3/partial_correlations.py
new file mode 100644
index 0000000..c556d10
--- /dev/null
+++ b/gn3/partial_correlations.py
@@ -0,0 +1,88 @@
+"""
+This module deals with partial correlations.
+
+It is an attempt to migrate over the partial correlations feature from
+GeneNetwork1.
+"""
+
+from functools import reduce
+from typing import Any, Sequence
+
+def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]):
+ """
+ Fetches data for the control traits.
+
+ This migrates `web/webqtl/correlation/correlationFunction.controlStrain` in
+ GN1, with a few modifications to the arguments passed in.
+
+ PARAMETERS:
+ controls: A map of sample names to trait data. Equivalent to the `cvals`
+ value in the corresponding source function in GN1.
+ sampleslist: A list of samples. Equivalent to `strainlst` in the
+ corresponding source function in GN1
+ """
+ def __process_control__(trait_data):
+ def __process_sample__(acc, sample):
+ if sample in trait_data["data"].keys():
+ sample_item = trait_data["data"][sample]
+ val = sample_item["value"]
+ if val is not None:
+ return (
+ acc[0] + (sample,),
+ acc[1] + (val,),
+ acc[2] + (sample_item["variance"],))
+ return acc
+ return reduce(
+ __process_sample__, sampleslist, (tuple(), tuple(), tuple()))
+
+ return reduce(
+ lambda acc, item: (
+ acc[0] + (item[0],),
+ acc[1] + (item[1],),
+ acc[2] + (item[2],),
+ acc[3] + (len(item[0]),),
+ ),
+ [__process_control__(trait_data) for trait_data in controls],
+ (tuple(), tuple(), tuple(), tuple()))
+
+def dictify_by_samples(samples_vals_vars: Sequence[Sequence]) -> Sequence[dict]:
+ """
+ Build a sequence of dictionaries from a sequence of separate sequences of
+ samples, values and variances.
+
+ This is a partial migration of
+ `web.webqtl.correlation.correlationFunction.fixStrains` function in GN1.
+ This implementation extracts code that will find common use, and that will
+ find use in more than one place.
+ """
+ return tuple(
+ {
+ sample: {"sample_name": sample, "value": val, "variance": var}
+ for sample, val, var in zip(*trait_line)
+ } for trait_line in zip(*(samples_vals_vars[0:3])))
+
+def fix_samples(primary_trait: dict, control_traits: Sequence[dict]) -> Sequence[Sequence[Any]]:
+ """
+ Corrects sample_names, values and variance such that they all contain only
+ those samples that are common to the reference trait and all control traits.
+
+ This is a partial migration of the
+ `web.webqtl.correlation.correlationFunction.fixStrain` function in GN1.
+ """
+ primary_samples = tuple(
+ present[0] for present in
+ ((sample, all(sample in control.keys() for control in control_traits))
+ for sample in primary_trait.keys())
+ if present[1])
+ control_vals_vars: tuple = reduce(
+ lambda acc, x: (acc[0] + (x[0],), acc[1] + (x[1],)),
+ ((item["value"], item["variance"])
+ for sublist in [tuple(control.values()) for control in control_traits]
+ for item in sublist),
+ (tuple(), tuple()))
+ return (
+ primary_samples,
+ tuple(primary_trait[sample]["value"] for sample in primary_samples),
+ control_vals_vars[0],
+ tuple(primary_trait[sample]["variance"] for sample in primary_samples),
+ control_vals_vars[1])
diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py
index 8af8e82..4aa9389 100644
--- a/tests/unit/db/test_traits.py
+++ b/tests/unit/db/test_traits.py
@@ -2,6 +2,8 @@
from unittest import mock, TestCase
from gn3.db.traits import (
build_trait_name,
+ export_trait_data,
+ export_informative,
set_haveinfo_field,
update_sample_data,
retrieve_trait_info,
@@ -12,6 +14,38 @@ from gn3.db.traits import (
retrieve_publish_trait_info,
retrieve_probeset_trait_info)
+samplelist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
+trait_data = {
+ "mysqlid": 36688172,
+ "data": {
+ "B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None},
+ "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None},
+ "BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None},
+ "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None},
+ "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None},
+ "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None},
+ "BXD21": {"sample_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None},
+ "BXD24": {"sample_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None},
+ "BXD27": {"sample_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None},
+ "BXD28": {"sample_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None},
+ "BXD32": {"sample_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None},
+ "BXD39": {"sample_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None},
+ "BXD40": {"sample_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None},
+ "BXD42": {"sample_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None},
+ "BXD6": {"sample_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None},
+ "BXH14": {"sample_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None},
+ "BXH19": {"sample_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None},
+ "BXH2": {"sample_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None},
+ "BXH22": {"sample_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None},
+ "BXH4": {"sample_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None},
+ "BXH6": {"sample_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None},
+ "BXH7": {"sample_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None},
+ "BXH8": {"sample_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None},
+ "BXH9": {"sample_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None},
+ "C3H/HeJ": {"sample_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None},
+ "C57BL/6J": {"sample_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None},
+ "DBA/2J": {"sample_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
+
class TestTraitsDBFunctions(TestCase):
"Test cases for traits functions"
@@ -226,3 +260,145 @@ class TestTraitsDBFunctions(TestCase):
with self.subTest(trait_info=trait_info, expected=expected):
self.assertEqual(
set_confidential_field(trait_type, trait_info), expected)
+
+ def test_export_trait_data_dtype(self):
+ """
+ Test `export_trait_data` with different values for the `dtype` keyword
+ argument
+ """
+ for dtype, expected in [
+ ["val", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+ ["var", (None, None, None, None, None, None)],
+ ["N", (None, None, None, None, None, None)],
+ ["all", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)]]:
+ with self.subTest(dtype=dtype):
+ self.assertEqual(
+ export_trait_data(trait_data, samplelist, dtype=dtype),
+ expected)
+
+ def test_export_trait_data_dtype_all_flags(self):
+ """
+ Test `export_trait_data` with different values for the `dtype` keyword
+ argument and the different flags set up
+ """
+ for dtype, vflag, nflag, expected in [
+ ["val", False, False,
+ (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+ ["val", False, True,
+ (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+ ["val", True, False,
+ (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+ ["val", True, True,
+ (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+ ["var", False, False, (None, None, None, None, None, None)],
+ ["var", False, True, (None, None, None, None, None, None)],
+ ["var", True, False, (None, None, None, None, None, None)],
+ ["var", True, True, (None, None, None, None, None, None)],
+ ["N", False, False, (None, None, None, None, None, None)],
+ ["N", False, True, (None, None, None, None, None, None)],
+ ["N", True, False, (None, None, None, None, None, None)],
+ ["N", True, True, (None, None, None, None, None, None)],
+ ["all", False, False,
+ (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
+ ["all", False, True,
+ (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None,
+ 8.30401, None, 7.80944, None)],
+ ["all", True, False,
+ (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None,
+ 8.30401, None, 7.80944, None)],
+ ["all", True, True,
+ (7.51879, None, None, 7.77141, None, None, 8.39265, None, None,
+ 8.17443, None, None, 8.30401, None, None, 7.80944, None, None)]
+ ]:
+ with self.subTest(dtype=dtype, vflag=vflag, nflag=nflag):
+ self.assertEqual(
+ export_trait_data(
+ trait_data, samplelist, dtype=dtype, var_exists=vflag,
+ n_exists=nflag),
+ expected)
+
+ def test_export_informative(self):
+ """Test that the function exports appropriate data."""
+ # pylint: disable=W0621
+ for trait_data, inc_var, expected in [
+ [{"data": {
+ "sample1": {
+ "sample_name": "sample1", "value": 9, "variance": None,
+ "ndata": 13
+ },
+ "sample2": {
+ "sample_name": "sample2", "value": 8, "variance": None,
+ "ndata": 13
+ },
+ "sample3": {
+ "sample_name": "sample3", "value": 7, "variance": None,
+ "ndata": 13
+ },
+ "sample4": {
+ "sample_name": "sample4", "value": 6, "variance": None,
+ "ndata": 13
+ },
+ }}, 0, (
+ ("sample1", "sample2", "sample3", "sample4"), (9, 8, 7, 6),
+ (None, None, None, None))],
+ [{"data": {
+ "sample1": {
+ "sample_name": "sample1", "value": 9, "variance": None,
+ "ndata": 13
+ },
+ "sample2": {
+ "sample_name": "sample2", "value": 8, "variance": None,
+ "ndata": 13
+ },
+ "sample3": {
+ "sample_name": "sample3", "value": None, "variance": None,
+ "ndata": 13
+ },
+ "sample4": {
+ "sample_name": "sample4", "value": 6, "variance": None,
+ "ndata": 13
+ },
+ }}, 0, (
+ ("sample1", "sample2", "sample4"), (9, 8, 6),
+ (None, None, None))],
+ [{"data": {
+ "sample1": {
+ "sample_name": "sample1", "value": 9, "variance": None,
+ "ndata": 13
+ },
+ "sample2": {
+ "sample_name": "sample2", "value": 8, "variance": None,
+ "ndata": 13
+ },
+ "sample3": {
+ "sample_name": "sample3", "value": 7, "variance": None,
+ "ndata": 13
+ },
+ "sample4": {
+ "sample_name": "sample4", "value": 6, "variance": None,
+ "ndata": 13
+ },
+ }}, True, (tuple(), tuple(), tuple())],
+ [{"data": {
+ "sample1": {
+ "sample_name": "sample1", "value": 9, "variance": None,
+ "ndata": 13
+ },
+ "sample2": {
+ "sample_name": "sample2", "value": 8, "variance": 0.657,
+ "ndata": 13
+ },
+ "sample3": {
+ "sample_name": "sample3", "value": 7, "variance": None,
+ "ndata": 13
+ },
+ "sample4": {
+ "sample_name": "sample4", "value": 6, "variance": None,
+ "ndata": 13
+ },
+ }}, 0, (
+ ("sample1", "sample2", "sample3", "sample4"), (9, 8, 7, 6),
+ (None, 0.657, None, None))]]:
+ with self.subTest(trait_data=trait_data):
+ self.assertEqual(
+ export_informative(trait_data, inc_var), expected)
diff --git a/tests/unit/test_heatmaps.py b/tests/unit/test_heatmaps.py
index 7b66688..03fd4a6 100644
--- a/tests/unit/test_heatmaps.py
+++ b/tests/unit/test_heatmaps.py
@@ -4,43 +4,12 @@ from gn3.heatmaps import (
cluster_traits,
get_loci_names,
get_lrs_from_chr,
- export_trait_data,
compute_traits_order,
retrieve_samples_and_values,
process_traits_data_for_heatmap)
from tests.unit.sample_test_data import organised_trait_1, organised_trait_2
samplelist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
-trait_data = {
- "mysqlid": 36688172,
- "data": {
- "B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None},
- "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None},
- "BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None},
- "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None},
- "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None},
- "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None},
- "BXD21": {"sample_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None},
- "BXD24": {"sample_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None},
- "BXD27": {"sample_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None},
- "BXD28": {"sample_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None},
- "BXD32": {"sample_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None},
- "BXD39": {"sample_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None},
- "BXD40": {"sample_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None},
- "BXD42": {"sample_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None},
- "BXD6": {"sample_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None},
- "BXH14": {"sample_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None},
- "BXH19": {"sample_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None},
- "BXH2": {"sample_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None},
- "BXH22": {"sample_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None},
- "BXH4": {"sample_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None},
- "BXH6": {"sample_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None},
- "BXH7": {"sample_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None},
- "BXH8": {"sample_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None},
- "BXH9": {"sample_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None},
- "C3H/HeJ": {"sample_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None},
- "C57BL/6J": {"sample_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None},
- "DBA/2J": {"sample_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
slinked = (
(((0, 2, 0.16381088984330505),
@@ -55,62 +24,6 @@ slinked = (
class TestHeatmap(TestCase):
"""Class for testing heatmap computation functions"""
- def test_export_trait_data_dtype(self):
- """
- Test `export_trait_data` with different values for the `dtype` keyword
- argument
- """
- for dtype, expected in [
- ["val", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
- ["var", (None, None, None, None, None, None)],
- ["N", (None, None, None, None, None, None)],
- ["all", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)]]:
- with self.subTest(dtype=dtype):
- self.assertEqual(
- export_trait_data(trait_data, samplelist, dtype=dtype),
- expected)
-
- def test_export_trait_data_dtype_all_flags(self):
- """
- Test `export_trait_data` with different values for the `dtype` keyword
- argument and the different flags set up
- """
- for dtype, vflag, nflag, expected in [
- ["val", False, False,
- (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
- ["val", False, True,
- (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
- ["val", True, False,
- (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
- ["val", True, True,
- (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
- ["var", False, False, (None, None, None, None, None, None)],
- ["var", False, True, (None, None, None, None, None, None)],
- ["var", True, False, (None, None, None, None, None, None)],
- ["var", True, True, (None, None, None, None, None, None)],
- ["N", False, False, (None, None, None, None, None, None)],
- ["N", False, True, (None, None, None, None, None, None)],
- ["N", True, False, (None, None, None, None, None, None)],
- ["N", True, True, (None, None, None, None, None, None)],
- ["all", False, False,
- (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)],
- ["all", False, True,
- (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None,
- 8.30401, None, 7.80944, None)],
- ["all", True, False,
- (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None,
- 8.30401, None, 7.80944, None)],
- ["all", True, True,
- (7.51879, None, None, 7.77141, None, None, 8.39265, None, None,
- 8.17443, None, None, 8.30401, None, None, 7.80944, None, None)]
- ]:
- with self.subTest(dtype=dtype, vflag=vflag, nflag=nflag):
- self.assertEqual(
- export_trait_data(
- trait_data, samplelist, dtype=dtype, var_exists=vflag,
- n_exists=nflag),
- expected)
-
def test_cluster_traits(self):
"""
Test that the clustering is working as expected.
diff --git a/tests/unit/test_partial_correlations.py b/tests/unit/test_partial_correlations.py
new file mode 100644
index 0000000..7631a71
--- /dev/null
+++ b/tests/unit/test_partial_correlations.py
@@ -0,0 +1,151 @@
+"""Module contains tests for gn3.partial_correlations"""
+
+from unittest import TestCase
+from gn3.partial_correlations import (
+ fix_samples,
+ control_samples,
+ dictify_by_samples)
+
+sampleslist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
+control_traits = (
+ {
+ "mysqlid": 36688172,
+ "data": {
+ "B6cC3-1": {
+ "sample_name": "B6cC3-1", "value": 7.51879, "variance": None,
+ "ndata": None},
+ "BXD1": {
+ "sample_name": "BXD1", "value": 7.77141, "variance": None,
+ "ndata": None},
+ "BXD12": {
+ "sample_name": "BXD12", "value": 8.39265, "variance": None,
+ "ndata": None},
+ "BXD16": {
+ "sample_name": "BXD16", "value": 8.17443, "variance": None,
+ "ndata": None},
+ "BXD19": {
+ "sample_name": "BXD19", "value": 8.30401, "variance": None,
+ "ndata": None},
+ "BXD2": {
+ "sample_name": "BXD2", "value": 7.80944, "variance": None,
+ "ndata": None}}},
+ {
+ "mysqlid": 36688172,
+ "data": {
+ "B6cC3-21": {
+ "sample_name": "B6cC3-1", "value": 7.51879, "variance": None,
+ "ndata": None},
+ "BXD21": {
+ "sample_name": "BXD1", "value": 7.77141, "variance": None,
+ "ndata": None},
+ "BXD12": {
+ "sample_name": "BXD12", "value": 8.39265, "variance": None,
+ "ndata": None},
+ "BXD16": {
+ "sample_name": "BXD16", "value": 8.17443, "variance": None,
+ "ndata": None},
+ "BXD19": {
+ "sample_name": "BXD19", "value": 8.30401, "variance": None,
+ "ndata": None},
+ "BXD2": {
+ "sample_name": "BXD2", "value": 7.80944, "variance": None,
+ "ndata": None}}},
+ {
+ "mysqlid": 36688172,
+ "data": {
+ "B6cC3-1": {
+ "sample_name": "B6cC3-1", "value": 7.51879, "variance": None,
+ "ndata": None},
+ "BXD1": {
+ "sample_name": "BXD1", "value": 7.77141, "variance": None,
+ "ndata": None},
+ "BXD12": {
+ "sample_name": "BXD12", "value": None, "variance": None,
+ "ndata": None},
+ "BXD16": {
+ "sample_name": "BXD16", "value": None, "variance": None,
+ "ndata": None},
+ "BXD19": {
+ "sample_name": "BXD19", "value": None, "variance": None,
+ "ndata": None},
+ "BXD2": {
+ "sample_name": "BXD2", "value": 7.80944, "variance": None,
+ "ndata": None}}})
+
+dictified_control_samples = (
+ {"B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None},
+ "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None},
+ "BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None},
+ "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None},
+ "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None},
+ "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None}},
+ {"BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None},
+ "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None},
+ "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None},
+ "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None}},
+ {"B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None},
+ "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None},
+ "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None}})
+
+class TestPartialCorrelations(TestCase):
+ """Class for testing partial correlations computation functions"""
+
+ def test_control_samples(self):
+ """Test that the control_samples works as expected."""
+ self.assertEqual(
+ control_samples(control_traits, sampleslist),
+ ((("B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"),
+ ("BXD12", "BXD16", "BXD19", "BXD2"),
+ ("B6cC3-1", "BXD1", "BXD2")),
+ ((7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944),
+ (8.39265, 8.17443, 8.30401, 7.80944),
+ (7.51879, 7.77141, 7.80944)),
+ ((None, None, None, None, None, None), (None, None, None, None),
+ (None, None, None)),
+ (6, 4, 3)))
+
+ def test_dictify_by_samples(self):
+ """
+ Given:
+ a sequence of sequences with sample names, values and variances, as
+ in the output of `gn3.partial_correlations.control_samples` or
+ the output of `gn3.db.traits.export_informative`
+ When:
+ the sequence is passed as an argument into the
+ `gn3.partial_correlations.dictify_by_sample`
+ Then:
+ return a sequence of dicts with keys being the values of the sample
+ names, and each of who's values being sub-dicts with the keys
+ 'sample_name', 'value' and 'variance' whose values correspond to the
+ values passed in.
+ """
+ self.assertEqual(
+ dictify_by_samples(
+ ((("B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"),
+ ("BXD12", "BXD16", "BXD19", "BXD2"),
+ ("B6cC3-1", "BXD1", "BXD2")),
+ ((7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944),
+ (8.39265, 8.17443, 8.30401, 7.80944),
+ (7.51879, 7.77141, 7.80944)),
+ ((None, None, None, None, None, None), (None, None, None, None),
+ (None, None, None)),
+ (6, 4, 3))),
+ dictified_control_samples)
+
+ def test_fix_samples(self):
+ """Test that fix_samples fixes the values"""
+ self.assertEqual(
+ fix_samples(
+ {"B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879,
+ "variance": None},
+ "BXD1": {"sample_name": "BXD1", "value": 7.77141,
+ "variance": None},
+ "BXD2": {"sample_name": "BXD2", "value": 7.80944,
+ "variance": None}},
+ dictified_control_samples),
+ (("BXD2",), (7.80944,),
+ (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944, 8.39265,
+ 8.17443, 8.30401, 7.80944, 7.51879, 7.77141, 7.80944),
+ (None,),
+ (None, None, None, None, None, None, None, None, None, None, None,
+ None, None)))