diff options
| -rw-r--r-- | gn3/db/traits.py | 93 | ||||
| -rw-r--r-- | gn3/heatmaps.py | 67 | ||||
| -rw-r--r-- | gn3/partial_correlations.py | 88 | ||||
| -rw-r--r-- | tests/unit/db/test_traits.py | 176 | ||||
| -rw-r--r-- | tests/unit/test_heatmaps.py | 87 | ||||
| -rw-r--r-- | tests/unit/test_partial_correlations.py | 151 | 
6 files changed, 509 insertions, 153 deletions
| diff --git a/gn3/db/traits.py b/gn3/db/traits.py index f2673c8..1c6aaa7 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -1,12 +1,81 @@ """This class contains functions relating to trait data manipulation""" import os +from functools import reduce from typing import Any, Dict, Union, Sequence + from gn3.settings import TMPDIR from gn3.random import random_string from gn3.function_helpers import compose from gn3.db.datasets import retrieve_trait_dataset +def export_trait_data( + trait_data: dict, samplelist: Sequence[str], dtype: str = "val", + var_exists: bool = False, n_exists: bool = False): + """ + Export data according to `samplelist`. Mostly used in calculating + correlations. + + DESCRIPTION: + Migrated from + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L166-L211 + + PARAMETERS + trait: (dict) + The dictionary of key-value pairs representing a trait + samplelist: (list) + A list of sample names + dtype: (str) + ... verify what this is ... + var_exists: (bool) + A flag indicating existence of variance + n_exists: (bool) + A flag indicating existence of ndata + """ + def __export_all_types(tdata, sample): + sample_data = [] + if tdata[sample]["value"]: + sample_data.append(tdata[sample]["value"]) + if var_exists: + if tdata[sample]["variance"]: + sample_data.append(tdata[sample]["variance"]) + else: + sample_data.append(None) + if n_exists: + if tdata[sample]["ndata"]: + sample_data.append(tdata[sample]["ndata"]) + else: + sample_data.append(None) + else: + if var_exists and n_exists: + sample_data += [None, None, None] + elif var_exists or n_exists: + sample_data += [None, None] + else: + sample_data.append(None) + + return tuple(sample_data) + + def __exporter(accumulator, sample): + # pylint: disable=[R0911] + if sample in trait_data["data"]: + if dtype == "val": + return accumulator + (trait_data["data"][sample]["value"], ) + if dtype == "var": + return accumulator + (trait_data["data"][sample]["variance"], ) + if dtype == "N": + return accumulator + (trait_data["data"][sample]["ndata"], ) + if dtype == "all": + return accumulator + __export_all_types(trait_data["data"], sample) + raise KeyError("Type `%s` is incorrect" % dtype) + if var_exists and n_exists: + return accumulator + (None, None, None) + if var_exists or n_exists: + return accumulator + (None, None) + return accumulator + (None,) + + return reduce(__exporter, samplelist, tuple()) + def get_trait_csv_sample_data(conn: Any, trait_name: int, phenotype_id: int): """Fetch a trait and return it as a csv string""" @@ -674,3 +743,27 @@ def generate_traits_filename(base_path: str = TMPDIR): """Generate a unique filename for use with generated traits files.""" return "{}/traits_test_file_{}.txt".format( os.path.abspath(base_path), random_string(10)) + +def export_informative(trait_data: dict, inc_var: bool = False) -> tuple: + """ + Export informative strain + + This is a migration of the `exportInformative` function in + web/webqtl/base/webqtlTrait.py module in GeneNetwork1. + + There is a chance that the original implementation has a bug, especially + dealing with the `inc_var` value. It the `inc_var` value is meant to control + the inclusion of the `variance` value, then the current implementation, and + that one in GN1 have a bug. + """ + def __exporter__(acc, data_item): + if not inc_var or data_item["variance"] is not None: + return ( + acc[0] + (data_item["sample_name"],), + acc[1] + (data_item["value"],), + acc[2] + (data_item["variance"],)) + return acc + return reduce( + __exporter__, + filter(lambda td: td["value"] is not None, trait_data["data"].values()), + (tuple(), tuple(), tuple())) diff --git a/gn3/heatmaps.py b/gn3/heatmaps.py index 2dd9d07..bf9dfd1 100644 --- a/gn3/heatmaps.py +++ b/gn3/heatmaps.py @@ -14,6 +14,7 @@ from plotly.subplots import make_subplots # type: ignore from gn3.settings import TMPDIR from gn3.random import random_string from gn3.computations.slink import slink +from gn3.db.traits import export_trait_data from gn3.computations.correlations2 import compute_correlation from gn3.db.genotypes import ( build_genotype_file, load_genotype_samples) @@ -26,72 +27,6 @@ from gn3.computations.qtlreaper import ( parse_reaper_main_results, organise_reaper_main_results) -def export_trait_data( - trait_data: dict, samplelist: Sequence[str], dtype: str = "val", - var_exists: bool = False, n_exists: bool = False): - """ - Export data according to `samplelist`. Mostly used in calculating - correlations. - - DESCRIPTION: - Migrated from - https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L166-L211 - - PARAMETERS - trait: (dict) - The dictionary of key-value pairs representing a trait - samplelist: (list) - A list of sample names - dtype: (str) - ... verify what this is ... - var_exists: (bool) - A flag indicating existence of variance - n_exists: (bool) - A flag indicating existence of ndata - """ - def __export_all_types(tdata, sample): - sample_data = [] - if tdata[sample]["value"]: - sample_data.append(tdata[sample]["value"]) - if var_exists: - if tdata[sample]["variance"]: - sample_data.append(tdata[sample]["variance"]) - else: - sample_data.append(None) - if n_exists: - if tdata[sample]["ndata"]: - sample_data.append(tdata[sample]["ndata"]) - else: - sample_data.append(None) - else: - if var_exists and n_exists: - sample_data += [None, None, None] - elif var_exists or n_exists: - sample_data += [None, None] - else: - sample_data.append(None) - - return tuple(sample_data) - - def __exporter(accumulator, sample): - # pylint: disable=[R0911] - if sample in trait_data["data"]: - if dtype == "val": - return accumulator + (trait_data["data"][sample]["value"], ) - if dtype == "var": - return accumulator + (trait_data["data"][sample]["variance"], ) - if dtype == "N": - return accumulator + (trait_data["data"][sample]["ndata"], ) - if dtype == "all": - return accumulator + __export_all_types(trait_data["data"], sample) - raise KeyError("Type `%s` is incorrect" % dtype) - if var_exists and n_exists: - return accumulator + (None, None, None) - if var_exists or n_exists: - return accumulator + (None, None) - return accumulator + (None,) - - return reduce(__exporter, samplelist, tuple()) def trait_display_name(trait: Dict): """ diff --git a/gn3/partial_correlations.py b/gn3/partial_correlations.py new file mode 100644 index 0000000..c556d10 --- /dev/null +++ b/gn3/partial_correlations.py @@ -0,0 +1,88 @@ +""" +This module deals with partial correlations. + +It is an attempt to migrate over the partial correlations feature from +GeneNetwork1. +""" + +from functools import reduce +from typing import Any, Sequence + +def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]): + """ + Fetches data for the control traits. + + This migrates `web/webqtl/correlation/correlationFunction.controlStrain` in + GN1, with a few modifications to the arguments passed in. + + PARAMETERS: + controls: A map of sample names to trait data. Equivalent to the `cvals` + value in the corresponding source function in GN1. + sampleslist: A list of samples. Equivalent to `strainlst` in the + corresponding source function in GN1 + """ + def __process_control__(trait_data): + def __process_sample__(acc, sample): + if sample in trait_data["data"].keys(): + sample_item = trait_data["data"][sample] + val = sample_item["value"] + if val is not None: + return ( + acc[0] + (sample,), + acc[1] + (val,), + acc[2] + (sample_item["variance"],)) + return acc + return reduce( + __process_sample__, sampleslist, (tuple(), tuple(), tuple())) + + return reduce( + lambda acc, item: ( + acc[0] + (item[0],), + acc[1] + (item[1],), + acc[2] + (item[2],), + acc[3] + (len(item[0]),), + ), + [__process_control__(trait_data) for trait_data in controls], + (tuple(), tuple(), tuple(), tuple())) + +def dictify_by_samples(samples_vals_vars: Sequence[Sequence]) -> Sequence[dict]: + """ + Build a sequence of dictionaries from a sequence of separate sequences of + samples, values and variances. + + This is a partial migration of + `web.webqtl.correlation.correlationFunction.fixStrains` function in GN1. + This implementation extracts code that will find common use, and that will + find use in more than one place. + """ + return tuple( + { + sample: {"sample_name": sample, "value": val, "variance": var} + for sample, val, var in zip(*trait_line) + } for trait_line in zip(*(samples_vals_vars[0:3]))) + +def fix_samples(primary_trait: dict, control_traits: Sequence[dict]) -> Sequence[Sequence[Any]]: + """ + Corrects sample_names, values and variance such that they all contain only + those samples that are common to the reference trait and all control traits. + + This is a partial migration of the + `web.webqtl.correlation.correlationFunction.fixStrain` function in GN1. + """ + primary_samples = tuple( + present[0] for present in + ((sample, all(sample in control.keys() for control in control_traits)) + for sample in primary_trait.keys()) + if present[1]) + control_vals_vars: tuple = reduce( + lambda acc, x: (acc[0] + (x[0],), acc[1] + (x[1],)), + ((item["value"], item["variance"]) + for sublist in [tuple(control.values()) for control in control_traits] + for item in sublist), + (tuple(), tuple())) + return ( + primary_samples, + tuple(primary_trait[sample]["value"] for sample in primary_samples), + control_vals_vars[0], + tuple(primary_trait[sample]["variance"] for sample in primary_samples), + control_vals_vars[1]) diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 8af8e82..4aa9389 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -2,6 +2,8 @@ from unittest import mock, TestCase from gn3.db.traits import ( build_trait_name, + export_trait_data, + export_informative, set_haveinfo_field, update_sample_data, retrieve_trait_info, @@ -12,6 +14,38 @@ from gn3.db.traits import ( retrieve_publish_trait_info, retrieve_probeset_trait_info) +samplelist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"] +trait_data = { + "mysqlid": 36688172, + "data": { + "B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None}, + "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None}, + "BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None}, + "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None}, + "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None}, + "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None}, + "BXD21": {"sample_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None}, + "BXD24": {"sample_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None}, + "BXD27": {"sample_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None}, + "BXD28": {"sample_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None}, + "BXD32": {"sample_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None}, + "BXD39": {"sample_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None}, + "BXD40": {"sample_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None}, + "BXD42": {"sample_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None}, + "BXD6": {"sample_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None}, + "BXH14": {"sample_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None}, + "BXH19": {"sample_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None}, + "BXH2": {"sample_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None}, + "BXH22": {"sample_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None}, + "BXH4": {"sample_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None}, + "BXH6": {"sample_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None}, + "BXH7": {"sample_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None}, + "BXH8": {"sample_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None}, + "BXH9": {"sample_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None}, + "C3H/HeJ": {"sample_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None}, + "C57BL/6J": {"sample_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None}, + "DBA/2J": {"sample_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}} + class TestTraitsDBFunctions(TestCase): "Test cases for traits functions" @@ -226,3 +260,145 @@ class TestTraitsDBFunctions(TestCase): with self.subTest(trait_info=trait_info, expected=expected): self.assertEqual( set_confidential_field(trait_type, trait_info), expected) + + def test_export_trait_data_dtype(self): + """ + Test `export_trait_data` with different values for the `dtype` keyword + argument + """ + for dtype, expected in [ + ["val", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["var", (None, None, None, None, None, None)], + ["N", (None, None, None, None, None, None)], + ["all", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)]]: + with self.subTest(dtype=dtype): + self.assertEqual( + export_trait_data(trait_data, samplelist, dtype=dtype), + expected) + + def test_export_trait_data_dtype_all_flags(self): + """ + Test `export_trait_data` with different values for the `dtype` keyword + argument and the different flags set up + """ + for dtype, vflag, nflag, expected in [ + ["val", False, False, + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["val", False, True, + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["val", True, False, + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["val", True, True, + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["var", False, False, (None, None, None, None, None, None)], + ["var", False, True, (None, None, None, None, None, None)], + ["var", True, False, (None, None, None, None, None, None)], + ["var", True, True, (None, None, None, None, None, None)], + ["N", False, False, (None, None, None, None, None, None)], + ["N", False, True, (None, None, None, None, None, None)], + ["N", True, False, (None, None, None, None, None, None)], + ["N", True, True, (None, None, None, None, None, None)], + ["all", False, False, + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], + ["all", False, True, + (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None, + 8.30401, None, 7.80944, None)], + ["all", True, False, + (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None, + 8.30401, None, 7.80944, None)], + ["all", True, True, + (7.51879, None, None, 7.77141, None, None, 8.39265, None, None, + 8.17443, None, None, 8.30401, None, None, 7.80944, None, None)] + ]: + with self.subTest(dtype=dtype, vflag=vflag, nflag=nflag): + self.assertEqual( + export_trait_data( + trait_data, samplelist, dtype=dtype, var_exists=vflag, + n_exists=nflag), + expected) + + def test_export_informative(self): + """Test that the function exports appropriate data.""" + # pylint: disable=W0621 + for trait_data, inc_var, expected in [ + [{"data": { + "sample1": { + "sample_name": "sample1", "value": 9, "variance": None, + "ndata": 13 + }, + "sample2": { + "sample_name": "sample2", "value": 8, "variance": None, + "ndata": 13 + }, + "sample3": { + "sample_name": "sample3", "value": 7, "variance": None, + "ndata": 13 + }, + "sample4": { + "sample_name": "sample4", "value": 6, "variance": None, + "ndata": 13 + }, + }}, 0, ( + ("sample1", "sample2", "sample3", "sample4"), (9, 8, 7, 6), + (None, None, None, None))], + [{"data": { + "sample1": { + "sample_name": "sample1", "value": 9, "variance": None, + "ndata": 13 + }, + "sample2": { + "sample_name": "sample2", "value": 8, "variance": None, + "ndata": 13 + }, + "sample3": { + "sample_name": "sample3", "value": None, "variance": None, + "ndata": 13 + }, + "sample4": { + "sample_name": "sample4", "value": 6, "variance": None, + "ndata": 13 + }, + }}, 0, ( + ("sample1", "sample2", "sample4"), (9, 8, 6), + (None, None, None))], + [{"data": { + "sample1": { + "sample_name": "sample1", "value": 9, "variance": None, + "ndata": 13 + }, + "sample2": { + "sample_name": "sample2", "value": 8, "variance": None, + "ndata": 13 + }, + "sample3": { + "sample_name": "sample3", "value": 7, "variance": None, + "ndata": 13 + }, + "sample4": { + "sample_name": "sample4", "value": 6, "variance": None, + "ndata": 13 + }, + }}, True, (tuple(), tuple(), tuple())], + [{"data": { + "sample1": { + "sample_name": "sample1", "value": 9, "variance": None, + "ndata": 13 + }, + "sample2": { + "sample_name": "sample2", "value": 8, "variance": 0.657, + "ndata": 13 + }, + "sample3": { + "sample_name": "sample3", "value": 7, "variance": None, + "ndata": 13 + }, + "sample4": { + "sample_name": "sample4", "value": 6, "variance": None, + "ndata": 13 + }, + }}, 0, ( + ("sample1", "sample2", "sample3", "sample4"), (9, 8, 7, 6), + (None, 0.657, None, None))]]: + with self.subTest(trait_data=trait_data): + self.assertEqual( + export_informative(trait_data, inc_var), expected) diff --git a/tests/unit/test_heatmaps.py b/tests/unit/test_heatmaps.py index 7b66688..03fd4a6 100644 --- a/tests/unit/test_heatmaps.py +++ b/tests/unit/test_heatmaps.py @@ -4,43 +4,12 @@ from gn3.heatmaps import ( cluster_traits, get_loci_names, get_lrs_from_chr, - export_trait_data, compute_traits_order, retrieve_samples_and_values, process_traits_data_for_heatmap) from tests.unit.sample_test_data import organised_trait_1, organised_trait_2 samplelist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"] -trait_data = { - "mysqlid": 36688172, - "data": { - "B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None}, - "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None}, - "BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None}, - "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None}, - "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None}, - "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None}, - "BXD21": {"sample_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None}, - "BXD24": {"sample_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None}, - "BXD27": {"sample_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None}, - "BXD28": {"sample_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None}, - "BXD32": {"sample_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None}, - "BXD39": {"sample_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None}, - "BXD40": {"sample_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None}, - "BXD42": {"sample_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None}, - "BXD6": {"sample_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None}, - "BXH14": {"sample_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None}, - "BXH19": {"sample_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None}, - "BXH2": {"sample_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None}, - "BXH22": {"sample_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None}, - "BXH4": {"sample_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None}, - "BXH6": {"sample_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None}, - "BXH7": {"sample_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None}, - "BXH8": {"sample_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None}, - "BXH9": {"sample_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None}, - "C3H/HeJ": {"sample_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None}, - "C57BL/6J": {"sample_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None}, - "DBA/2J": {"sample_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}} slinked = ( (((0, 2, 0.16381088984330505), @@ -55,62 +24,6 @@ slinked = ( class TestHeatmap(TestCase): """Class for testing heatmap computation functions""" - def test_export_trait_data_dtype(self): - """ - Test `export_trait_data` with different values for the `dtype` keyword - argument - """ - for dtype, expected in [ - ["val", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], - ["var", (None, None, None, None, None, None)], - ["N", (None, None, None, None, None, None)], - ["all", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)]]: - with self.subTest(dtype=dtype): - self.assertEqual( - export_trait_data(trait_data, samplelist, dtype=dtype), - expected) - - def test_export_trait_data_dtype_all_flags(self): - """ - Test `export_trait_data` with different values for the `dtype` keyword - argument and the different flags set up - """ - for dtype, vflag, nflag, expected in [ - ["val", False, False, - (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], - ["val", False, True, - (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], - ["val", True, False, - (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], - ["val", True, True, - (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], - ["var", False, False, (None, None, None, None, None, None)], - ["var", False, True, (None, None, None, None, None, None)], - ["var", True, False, (None, None, None, None, None, None)], - ["var", True, True, (None, None, None, None, None, None)], - ["N", False, False, (None, None, None, None, None, None)], - ["N", False, True, (None, None, None, None, None, None)], - ["N", True, False, (None, None, None, None, None, None)], - ["N", True, True, (None, None, None, None, None, None)], - ["all", False, False, - (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)], - ["all", False, True, - (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None, - 8.30401, None, 7.80944, None)], - ["all", True, False, - (7.51879, None, 7.77141, None, 8.39265, None, 8.17443, None, - 8.30401, None, 7.80944, None)], - ["all", True, True, - (7.51879, None, None, 7.77141, None, None, 8.39265, None, None, - 8.17443, None, None, 8.30401, None, None, 7.80944, None, None)] - ]: - with self.subTest(dtype=dtype, vflag=vflag, nflag=nflag): - self.assertEqual( - export_trait_data( - trait_data, samplelist, dtype=dtype, var_exists=vflag, - n_exists=nflag), - expected) - def test_cluster_traits(self): """ Test that the clustering is working as expected. diff --git a/tests/unit/test_partial_correlations.py b/tests/unit/test_partial_correlations.py new file mode 100644 index 0000000..7631a71 --- /dev/null +++ b/tests/unit/test_partial_correlations.py @@ -0,0 +1,151 @@ +"""Module contains tests for gn3.partial_correlations""" + +from unittest import TestCase +from gn3.partial_correlations import ( + fix_samples, + control_samples, + dictify_by_samples) + +sampleslist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"] +control_traits = ( + { + "mysqlid": 36688172, + "data": { + "B6cC3-1": { + "sample_name": "B6cC3-1", "value": 7.51879, "variance": None, + "ndata": None}, + "BXD1": { + "sample_name": "BXD1", "value": 7.77141, "variance": None, + "ndata": None}, + "BXD12": { + "sample_name": "BXD12", "value": 8.39265, "variance": None, + "ndata": None}, + "BXD16": { + "sample_name": "BXD16", "value": 8.17443, "variance": None, + "ndata": None}, + "BXD19": { + "sample_name": "BXD19", "value": 8.30401, "variance": None, + "ndata": None}, + "BXD2": { + "sample_name": "BXD2", "value": 7.80944, "variance": None, + "ndata": None}}}, + { + "mysqlid": 36688172, + "data": { + "B6cC3-21": { + "sample_name": "B6cC3-1", "value": 7.51879, "variance": None, + "ndata": None}, + "BXD21": { + "sample_name": "BXD1", "value": 7.77141, "variance": None, + "ndata": None}, + "BXD12": { + "sample_name": "BXD12", "value": 8.39265, "variance": None, + "ndata": None}, + "BXD16": { + "sample_name": "BXD16", "value": 8.17443, "variance": None, + "ndata": None}, + "BXD19": { + "sample_name": "BXD19", "value": 8.30401, "variance": None, + "ndata": None}, + "BXD2": { + "sample_name": "BXD2", "value": 7.80944, "variance": None, + "ndata": None}}}, + { + "mysqlid": 36688172, + "data": { + "B6cC3-1": { + "sample_name": "B6cC3-1", "value": 7.51879, "variance": None, + "ndata": None}, + "BXD1": { + "sample_name": "BXD1", "value": 7.77141, "variance": None, + "ndata": None}, + "BXD12": { + "sample_name": "BXD12", "value": None, "variance": None, + "ndata": None}, + "BXD16": { + "sample_name": "BXD16", "value": None, "variance": None, + "ndata": None}, + "BXD19": { + "sample_name": "BXD19", "value": None, "variance": None, + "ndata": None}, + "BXD2": { + "sample_name": "BXD2", "value": 7.80944, "variance": None, + "ndata": None}}}) + +dictified_control_samples = ( + {"B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None}, + "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None}, + "BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None}, + "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None}, + "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None}, + "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None}}, + {"BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None}, + "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None}, + "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None}, + "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None}}, + {"B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None}, + "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None}, + "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None}}) + +class TestPartialCorrelations(TestCase): + """Class for testing partial correlations computation functions""" + + def test_control_samples(self): + """Test that the control_samples works as expected.""" + self.assertEqual( + control_samples(control_traits, sampleslist), + ((("B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"), + ("BXD12", "BXD16", "BXD19", "BXD2"), + ("B6cC3-1", "BXD1", "BXD2")), + ((7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944), + (8.39265, 8.17443, 8.30401, 7.80944), + (7.51879, 7.77141, 7.80944)), + ((None, None, None, None, None, None), (None, None, None, None), + (None, None, None)), + (6, 4, 3))) + + def test_dictify_by_samples(self): + """ + Given: + a sequence of sequences with sample names, values and variances, as + in the output of `gn3.partial_correlations.control_samples` or + the output of `gn3.db.traits.export_informative` + When: + the sequence is passed as an argument into the + `gn3.partial_correlations.dictify_by_sample` + Then: + return a sequence of dicts with keys being the values of the sample + names, and each of who's values being sub-dicts with the keys + 'sample_name', 'value' and 'variance' whose values correspond to the + values passed in. + """ + self.assertEqual( + dictify_by_samples( + ((("B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"), + ("BXD12", "BXD16", "BXD19", "BXD2"), + ("B6cC3-1", "BXD1", "BXD2")), + ((7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944), + (8.39265, 8.17443, 8.30401, 7.80944), + (7.51879, 7.77141, 7.80944)), + ((None, None, None, None, None, None), (None, None, None, None), + (None, None, None)), + (6, 4, 3))), + dictified_control_samples) + + def test_fix_samples(self): + """Test that fix_samples fixes the values""" + self.assertEqual( + fix_samples( + {"B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, + "variance": None}, + "BXD1": {"sample_name": "BXD1", "value": 7.77141, + "variance": None}, + "BXD2": {"sample_name": "BXD2", "value": 7.80944, + "variance": None}}, + dictified_control_samples), + (("BXD2",), (7.80944,), + (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944, 8.39265, + 8.17443, 8.30401, 7.80944, 7.51879, 7.77141, 7.80944), + (None,), + (None, None, None, None, None, None, None, None, None, None, None, + None, None))) | 
