aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md40
-rw-r--r--gn3/api/heatmaps.py36
-rw-r--r--gn3/api/wgcna.py25
-rw-r--r--gn3/app.py14
-rw-r--r--gn3/computations/heatmap.py177
-rw-r--r--gn3/computations/parsers.py10
-rw-r--r--gn3/computations/qtlreaper.py170
-rw-r--r--gn3/computations/wgcna.py51
-rw-r--r--gn3/db/datasets.py52
-rw-r--r--gn3/db/genotypes.py184
-rw-r--r--gn3/db/traits.py78
-rw-r--r--gn3/heatmaps.py424
-rw-r--r--gn3/heatmaps/heatmaps.py54
-rw-r--r--gn3/random.py11
-rw-r--r--gn3/settings.py21
-rw-r--r--guix.scm12
-rw-r--r--requirements.txt2
-rw-r--r--scripts/output.json161
-rw-r--r--scripts/wgcna_analysis.R115
-rw-r--r--scripts/wgcna_test_data.csv9
-rw-r--r--scripts/wgcna_test_data.json65
-rw-r--r--setup.py2
-rw-r--r--tests/integration/test_wgcna.py73
-rw-r--r--tests/unit/computations/data/qtlreaper/main_output_sample.txt11
-rw-r--r--tests/unit/computations/data/qtlreaper/permu_output_sample.txt27
-rw-r--r--tests/unit/computations/test_parsers.py4
-rw-r--r--tests/unit/computations/test_qtlreaper.py135
-rw-r--r--tests/unit/computations/test_wgcna.py160
-rw-r--r--tests/unit/db/data/genotypes/genotype_sample1.geno23
-rw-r--r--tests/unit/db/test_datasets.py42
-rw-r--r--tests/unit/db/test_genotypes.py170
-rw-r--r--tests/unit/db/test_traits.py16
-rw-r--r--tests/unit/sample_test_data.py111
-rw-r--r--tests/unit/test_heatmaps.py (renamed from tests/unit/computations/test_heatmap.py)152
34 files changed, 2273 insertions, 364 deletions
diff --git a/README.md b/README.md
index 750f55d..84a7a54 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@ guix environment --load=guix.scm
Also, make sure you have the [guix-bioinformatics](https://git.genenetwork.org/guix-bioinformatics/guix-bioinformatics) channel set up.
```bash
-env GUIX_PACKAGE_PATH=~/guix-bioinformatics/ ~/.config/guix/current/bin/guix environment --load=guix.scm
+env GUIX_PACKAGE_PATH=~/guix-bioinformatics/ ~/.config/guix/current/bin/guix environment --expose=$HOME/genotype_files/ --load=guix.scm
python3
import redis
```
@@ -32,7 +32,7 @@ python3
#### Run a Guix container
```
-env GUIX_PACKAGE_PATH=~/guix-bioinformatics/ ~/.config/guix/current/bin/guix environment -C --network --load=guix.scm
+env GUIX_PACKAGE_PATH=~/guix-bioinformatics/ ~/.config/guix/current/bin/guix environment -C --network --expose=$HOME/genotype_files/ --load=guix.scm
```
@@ -157,3 +157,39 @@ guix. To freeze dependencies:
pip freeze --path venv/lib/python3.8/site-packages > requirements.txt
```
+
+## Genotype Files
+
+You can get the genotype files from http://ipfs.genenetwork.org/ipfs/QmXQy3DAUWJuYxubLHLkPMNCEVq1oV7844xWG2d1GSPFPL and save them on your host machine at, say `$HOME/genotype_files` with something like:
+
+```bash
+$ mkdir -p $HOME/genotype_files
+$ cd $HOME/genotype_files
+$ yes | 7z x genotype_files.tar.7z
+$ tar xf genotype_files.tar
+```
+
+The `genotype_files.tar.7z` file seems to only contain the **BXD.geno** genotype file.
+
+## QTLReaper (rust-qtlreaper) and Trait Files
+
+To run QTL computations, this system makes use of the [rust-qtlreaper](https://github.com/chfi/rust-qtlreaper.git) utility.
+
+To do this, the system needs to export the trait data into a tab-separated file, that can then be passed to the utility using the `--traits` option. For more information about the available options, please [see the rust-qtlreaper](https://github.com/chfi/rust-qtlreaper.git) repository.
+
+### Traits File Format
+
+The traits file begins with a header row/line with the column headers. The first column in the file has the header **"Trait"**. Every other column has a header for one of the strains in consideration.
+
+Under the **"Trait"** column, the traits are numbered from **T1** to **T<n>** where **<n>** is the count of the total number of traits in consideration.
+
+As an example, you could end up with a trait file like the following:
+
+```txt
+Trait BXD27 BXD32 DBA/2J BXD21 ...
+T1 10.5735 9.27408 9.48255 9.18253 ...
+T2 6.4471 6.7191 5.98015 6.68051 ...
+...
+```
+
+It is very important that the column header names for the strains correspond to the genotype file used.
diff --git a/gn3/api/heatmaps.py b/gn3/api/heatmaps.py
new file mode 100644
index 0000000..62ca2ad
--- /dev/null
+++ b/gn3/api/heatmaps.py
@@ -0,0 +1,36 @@
+"""
+Module to hold the entrypoint functions that generate heatmaps
+"""
+
+import io
+from flask import jsonify
+from flask import request
+from flask import Blueprint
+from gn3.heatmaps import build_heatmap
+from gn3.db_utils import database_connector
+
+heatmaps = Blueprint("heatmaps", __name__)
+
+@heatmaps.route("/clustered", methods=("POST",))
+def clustered_heatmaps():
+ """
+ Parses the incoming data and responds with the JSON-serialized plotly figure
+ representing the clustered heatmap.
+ """
+ traits_names = request.get_json().get("traits_names", tuple())
+ if len(traits_names) < 2:
+ return jsonify({
+ "message": "You need to provide at least two trait names."
+ }), 400
+ conn, _cursor = database_connector()
+ def parse_trait_fullname(trait):
+ name_parts = trait.split(":")
+ return "{dataset_name}::{trait_name}".format(
+ dataset_name=name_parts[1], trait_name=name_parts[0])
+ traits_fullnames = [parse_trait_fullname(trait) for trait in traits_names]
+
+ with io.StringIO() as io_str:
+ _filename, figure = build_heatmap(traits_fullnames, conn)
+ figure.write_json(io_str)
+ fig_json = io_str.getvalue()
+ return fig_json, 200
diff --git a/gn3/api/wgcna.py b/gn3/api/wgcna.py
new file mode 100644
index 0000000..fa044cf
--- /dev/null
+++ b/gn3/api/wgcna.py
@@ -0,0 +1,25 @@
+"""endpoint to run wgcna analysis"""
+from flask import Blueprint
+from flask import request
+from flask import current_app
+from flask import jsonify
+
+from gn3.computations.wgcna import call_wgcna_script
+
+wgcna = Blueprint("wgcna", __name__)
+
+
+@wgcna.route("/run_wgcna", methods=["POST"])
+def run_wgcna():
+ """run wgcna:output should be a json with a the data"""
+
+ wgcna_data = request.json
+
+ wgcna_script = current_app.config["WGCNA_RSCRIPT"]
+
+ results = call_wgcna_script(wgcna_script, wgcna_data)
+
+ if results.get("data") is None:
+ return jsonify(results), 401
+
+ return jsonify(results), 200
diff --git a/gn3/app.py b/gn3/app.py
index 046b5de..a25332c 100644
--- a/gn3/app.py
+++ b/gn3/app.py
@@ -3,13 +3,17 @@ import os
from typing import Dict
from typing import Union
+
from flask import Flask
+from flask_cors import CORS # type: ignore
+
from gn3.api.gemma import gemma
from gn3.api.rqtl import rqtl
from gn3.api.general import general
+from gn3.api.heatmaps import heatmaps
from gn3.api.correlation import correlation
from gn3.api.data_entry import data_entry
-
+from gn3.api.wgcna import wgcna
def create_app(config: Union[Dict, str, None] = None) -> Flask:
"""Create a new flask object"""
@@ -17,6 +21,12 @@ def create_app(config: Union[Dict, str, None] = None) -> Flask:
# Load default configuration
app.config.from_object("gn3.settings")
+ CORS(
+ app,
+ origins=app.config["CORS_ORIGINS"],
+ allow_headers=app.config["CORS_HEADERS"],
+ supports_credentials=True, intercept_exceptions=False)
+
# Load environment configuration
if "GN3_CONF" in os.environ:
app.config.from_envvar('GN3_CONF')
@@ -30,6 +40,8 @@ def create_app(config: Union[Dict, str, None] = None) -> Flask:
app.register_blueprint(general, url_prefix="/api/")
app.register_blueprint(gemma, url_prefix="/api/gemma")
app.register_blueprint(rqtl, url_prefix="/api/rqtl")
+ app.register_blueprint(heatmaps, url_prefix="/api/heatmaps")
app.register_blueprint(correlation, url_prefix="/api/correlation")
app.register_blueprint(data_entry, url_prefix="/api/dataentry")
+ app.register_blueprint(wgcna, url_prefix="/api/wgcna")
return app
diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
deleted file mode 100644
index 3c35029..0000000
--- a/gn3/computations/heatmap.py
+++ /dev/null
@@ -1,177 +0,0 @@
-"""
-This module will contain functions to be used in computation of the data used to
-generate various kinds of heatmaps.
-"""
-
-from functools import reduce
-from typing import Any, Dict, Sequence
-from gn3.computations.slink import slink
-from gn3.db.traits import retrieve_trait_data, retrieve_trait_info
-from gn3.computations.correlations2 import compute_correlation
-
-def export_trait_data(
- trait_data: dict, strainlist: Sequence[str], dtype: str = "val",
- var_exists: bool = False, n_exists: bool = False):
- """
- Export data according to `strainlist`. Mostly used in calculating
- correlations.
-
- DESCRIPTION:
- Migrated from
- https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L166-L211
-
- PARAMETERS
- trait: (dict)
- The dictionary of key-value pairs representing a trait
- strainlist: (list)
- A list of strain names
- type: (str)
- ... verify what this is ...
- var_exists: (bool)
- A flag indicating existence of variance
- n_exists: (bool)
- A flag indicating existence of ndata
- """
- def __export_all_types(tdata, strain):
- sample_data = []
- if tdata[strain]["value"]:
- sample_data.append(tdata[strain]["value"])
- if var_exists:
- if tdata[strain]["variance"]:
- sample_data.append(tdata[strain]["variance"])
- else:
- sample_data.append(None)
- if n_exists:
- if tdata[strain]["ndata"]:
- sample_data.append(tdata[strain]["ndata"])
- else:
- sample_data.append(None)
- else:
- if var_exists and n_exists:
- sample_data += [None, None, None]
- elif var_exists or n_exists:
- sample_data += [None, None]
- else:
- sample_data.append(None)
-
- return tuple(sample_data)
-
- def __exporter(accumulator, strain):
- # pylint: disable=[R0911]
- if strain in trait_data["data"]:
- if dtype == "val":
- return accumulator + (trait_data["data"][strain]["value"], )
- if dtype == "var":
- return accumulator + (trait_data["data"][strain]["variance"], )
- if dtype == "N":
- return accumulator + (trait_data["data"][strain]["ndata"], )
- if dtype == "all":
- return accumulator + __export_all_types(trait_data["data"], strain)
- raise KeyError("Type `%s` is incorrect" % dtype)
- if var_exists and n_exists:
- return accumulator + (None, None, None)
- if var_exists or n_exists:
- return accumulator + (None, None)
- return accumulator + (None,)
-
- return reduce(__exporter, strainlist, tuple())
-
-def trait_display_name(trait: Dict):
- """
- Given a trait, return a name to use to display the trait on a heatmap.
-
- DESCRIPTION
- Migrated from
- https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L141-L157
- """
- if trait.get("db", None) and trait.get("trait_name", None):
- if trait["db"]["dataset_type"] == "Temp":
- desc = trait["description"]
- if desc.find("PCA") >= 0:
- return "%s::%s" % (
- trait["db"]["displayname"],
- desc[desc.rindex(':')+1:].strip())
- return "%s::%s" % (
- trait["db"]["displayname"],
- desc[:desc.index('entered')].strip())
- prefix = "%s::%s" % (
- trait["db"]["dataset_name"], trait["trait_name"])
- if trait["cellid"]:
- return "%s::%s" % (prefix, trait["cellid"])
- return prefix
- return trait["description"]
-
-def cluster_traits(traits_data_list: Sequence[Dict]):
- """
- Clusters the trait values.
-
- DESCRIPTION
- Attempts to replicate the clustering of the traits, as done at
- https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L138-L162
- """
- def __compute_corr(tdata_i, tdata_j):
- if tdata_i[0] == tdata_j[0]:
- return 0.0
- corr_vals = compute_correlation(tdata_i[1], tdata_j[1])
- corr = corr_vals[0]
- if (1 - corr) < 0:
- return 0.0
- return 1 - corr
-
- def __cluster(tdata_i):
- return tuple(
- __compute_corr(tdata_i, tdata_j)
- for tdata_j in enumerate(traits_data_list))
-
- return tuple(__cluster(tdata_i) for tdata_i in enumerate(traits_data_list))
-
-def heatmap_data(formd, search_result, conn: Any):
- """
- heatmap function
-
- DESCRIPTION
- This function is an attempt to reproduce the initialisation at
- https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L46-L64
- and also the clustering and slink computations at
- https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L138-L165
- with the help of the `gn3.computations.heatmap.cluster_traits` function.
-
- It does not try to actually draw the heatmap image.
-
- PARAMETERS:
- TODO: Elaborate on the parameters here...
- """
- threshold = 0 # webqtlConfig.PUBLICTHRESH
- cluster_checked = formd.formdata.getvalue("clusterCheck", "")
- strainlist = [
- strain for strain in formd.strainlist if strain not in formd.parlist]
- genotype = formd.genotype
-
- def __retrieve_traitlist_and_datalist(threshold, fullname):
- trait = retrieve_trait_info(threshold, fullname, conn)
- return (
- trait,
- export_trait_data(retrieve_trait_data(trait, conn), strainlist))
-
- traits_details = [
- __retrieve_traitlist_and_datalist(threshold, fullname)
- for fullname in search_result]
- traits_list = map(lambda x: x[0], traits_details)
- traits_data_list = map(lambda x: x[1], traits_details)
-
- return {
- "target_description_checked": formd.formdata.getvalue(
- "targetDescriptionCheck", ""),
- "cluster_checked": cluster_checked,
- "slink_data": (
- slink(cluster_traits(traits_data_list))
- if cluster_checked else False),
- "sessionfile": formd.formdata.getvalue("session"),
- "genotype": genotype,
- "nLoci": sum(map(len, genotype)),
- "strainlist": strainlist,
- "ppolar": formd.ppolar,
- "mpolar":formd.mpolar,
- "traits_list": traits_list,
- "traits_data_list": traits_data_list
- }
diff --git a/gn3/computations/parsers.py b/gn3/computations/parsers.py
index 94387ff..1af35d6 100644
--- a/gn3/computations/parsers.py
+++ b/gn3/computations/parsers.py
@@ -14,7 +14,7 @@ def parse_genofile(file_path: str) -> Tuple[List[str],
'h': 0,
'u': None,
}
- genotypes, strains = [], []
+ genotypes, samples = [], []
with open(file_path, "r") as _genofile:
for line in _genofile:
line = line.strip()
@@ -22,8 +22,8 @@ def parse_genofile(file_path: str) -> Tuple[List[str],
continue
cells = line.split()
if line.startswith("Chr"):
- strains = cells[4:]
- strains = [strain.lower() for strain in strains]
+ samples = cells[4:]
+ samples = [sample.lower() for sample in samples]
continue
values = [__map.get(value.lower(), None) for value in cells[4:]]
genotype = {
@@ -32,7 +32,7 @@ def parse_genofile(file_path: str) -> Tuple[List[str],
"cm": cells[2],
"mb": cells[3],
"values": values,
- "dicvalues": dict(zip(strains, values)),
+ "dicvalues": dict(zip(samples, values)),
}
genotypes.append(genotype)
- return strains, genotypes
+ return samples, genotypes
diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
new file mode 100644
index 0000000..d1ff4ac
--- /dev/null
+++ b/gn3/computations/qtlreaper.py
@@ -0,0 +1,170 @@
+"""
+This module contains functions to interact with the `qtlreaper` utility for
+computation of QTLs.
+"""
+import os
+import subprocess
+from typing import Union
+
+from gn3.random import random_string
+from gn3.settings import TMPDIR, REAPER_COMMAND
+
+def generate_traits_file(samples, trait_values, traits_filename):
+ """
+ Generate a traits file for use with `qtlreaper`.
+
+ PARAMETERS:
+ samples: A list of samples to use as the headers for the various columns.
+ trait_values: A list of lists of values for each trait and sample.
+ traits_filename: The tab-separated value to put the values in for
+ computation of QTLs.
+ """
+ header = "Trait\t{}\n".format("\t".join(samples))
+ data = (
+ [header] +
+ ["{}\t{}\n".format(i+1, "\t".join([str(i) for i in t]))
+ for i, t in enumerate(trait_values[:-1])] +
+ ["{}\t{}".format(
+ len(trait_values), "\t".join([str(i) for i in t]))
+ for t in trait_values[-1:]])
+ with open(traits_filename, "w") as outfile:
+ outfile.writelines(data)
+
+def create_output_directory(path: str):
+ """Create the output directory at `path` if it does not exist."""
+ try:
+ os.mkdir(path)
+ except FileExistsError:
+ # If the directory already exists, do nothing.
+ pass
+
+def run_reaper(
+ genotype_filename: str, traits_filename: str,
+ other_options: tuple = ("--n_permutations", "1000"),
+ separate_nperm_output: bool = False,
+ output_dir: str = TMPDIR):
+ """
+ Run the QTLReaper command to compute the QTLs.
+
+ PARAMETERS:
+ genotype_filename: The complete path to a genotype file to use in the QTL
+ computation.
+ traits_filename: A path to a file previously generated with the
+ `generate_traits_file` function in this module, to be used in the QTL
+ computation.
+ other_options: Other options to pass to the `qtlreaper` command to modify
+ the QTL computations.
+ separate_nperm_output: A flag indicating whether or not to provide a
+ separate output for the permutations computation. The default is False,
+ which means by default, no separate output file is created.
+ output_dir: A path to the directory where the outputs are put
+
+ RETURNS:
+ The function returns a tuple of the main output file, and the output file
+ for the permutation computations. If the `separate_nperm_output` is `False`,
+ the second value in the tuple returned is `None`.
+
+ RAISES:
+ The function will raise a `subprocess.CalledProcessError` exception in case
+ of any errors running the `qtlreaper` command.
+ """
+ create_output_directory("{}/qtlreaper".format(output_dir))
+ output_filename = "{}/qtlreaper/main_output_{}.txt".format(
+ output_dir, random_string(10))
+ output_list = ["--main_output", output_filename]
+ if separate_nperm_output:
+ permu_output_filename: Union[None, str] = "{}/qtlreaper/permu_output_{}.txt".format(
+ output_dir, random_string(10))
+ output_list = output_list + [
+ "--permu_output", permu_output_filename] # type: ignore[list-item]
+ else:
+ permu_output_filename = None
+
+ command_list = [
+ REAPER_COMMAND, "--geno", genotype_filename,
+ *other_options, # this splices the `other_options` list here
+ "--traits", traits_filename,
+ *output_list # this splices the `output_list` list here
+ ]
+
+ subprocess.run(command_list, check=True)
+ return (output_filename, permu_output_filename)
+
+def chromosome_sorter_key_fn(val):
+ """
+ Useful for sorting the chromosomes
+ """
+ if isinstance(val, int):
+ return val
+ return ord(val)
+
+def organise_reaper_main_results(parsed_results):
+ """
+ Provide the results of running reaper in a format that is easier to use.
+ """
+ def __organise_by_chromosome(chr_name, items):
+ chr_items = [item for item in items if item["Chr"] == chr_name]
+ return {
+ "Chr": chr_name,
+ "loci": [{
+ "Locus": locus["Locus"],
+ "cM": locus["cM"],
+ "Mb": locus["Mb"],
+ "LRS": locus["LRS"],
+ "Additive": locus["Additive"],
+ "pValue": locus["pValue"]
+ } for locus in chr_items]}
+
+ def __organise_by_id(identifier, items):
+ id_items = [item for item in items if item["ID"] == identifier]
+ unique_chromosomes = {item["Chr"] for item in id_items}
+ return {
+ "ID": identifier,
+ "chromosomes": {
+ _chr["Chr"]: _chr for _chr in [
+ __organise_by_chromosome(chromo, id_items)
+ for chromo in sorted(
+ unique_chromosomes, key=chromosome_sorter_key_fn)]}}
+
+ unique_ids = {res["ID"] for res in parsed_results}
+ return {
+ trait["ID"]: trait for trait in
+ [__organise_by_id(_id, parsed_results) for _id in sorted(unique_ids)]}
+
+def parse_reaper_main_results(results_file):
+ """
+ Parse the results file of running QTLReaper into a list of dicts.
+ """
+ with open(results_file, "r") as infile:
+ lines = infile.readlines()
+
+ def __parse_column_float_value(value):
+ # pylint: disable=W0702
+ try:
+ return float(value)
+ except:
+ return value
+
+ def __parse_column_int_value(value):
+ # pylint: disable=W0702
+ try:
+ return int(value)
+ except:
+ return value
+
+ def __parse_line(line):
+ items = line.strip().split("\t")
+ return items[0:2] + [__parse_column_int_value(items[2])] + [
+ __parse_column_float_value(item) for item in items[3:]]
+
+ header = lines[0].strip().split("\t")
+ return [dict(zip(header, __parse_line(line))) for line in lines[1:]]
+
+def parse_reaper_permutation_results(results_file):
+ """
+ Parse the results QTLReaper permutations into a list of values.
+ """
+ with open(results_file, "r") as infile:
+ lines = infile.readlines()
+
+ return [float(line.strip()) for line in lines]
diff --git a/gn3/computations/wgcna.py b/gn3/computations/wgcna.py
new file mode 100644
index 0000000..fd508fa
--- /dev/null
+++ b/gn3/computations/wgcna.py
@@ -0,0 +1,51 @@
+"""module contains code to preprocess and call wgcna script"""
+
+import os
+import json
+import uuid
+from gn3.settings import TMPDIR
+
+from gn3.commands import run_cmd
+
+
+def dump_wgcna_data(request_data: dict):
+ """function to dump request data to json file"""
+ filename = f"{str(uuid.uuid4())}.json"
+
+ temp_file_path = os.path.join(TMPDIR, filename)
+
+ with open(temp_file_path, "w") as output_file:
+ json.dump(request_data, output_file)
+
+ return temp_file_path
+
+
+def compose_wgcna_cmd(rscript_path: str, temp_file_path: str):
+ """function to componse wgcna cmd"""
+ # (todo):issue relative paths to abs paths
+ cmd = f"Rscript ./scripts/{rscript_path} {temp_file_path}"
+ return cmd
+
+
+def call_wgcna_script(rscript_path: str, request_data: dict):
+ """function to call wgcna script"""
+ generated_file = dump_wgcna_data(request_data)
+ cmd = compose_wgcna_cmd(rscript_path, generated_file)
+
+ try:
+
+ run_cmd_results = run_cmd(cmd)
+
+ with open(generated_file, "r") as outputfile:
+
+ if run_cmd_results["code"] != 0:
+ return run_cmd_results
+ return {
+ "data": json.load(outputfile),
+ **run_cmd_results
+ }
+ except FileNotFoundError:
+ # relook at handling errors gn3
+ return {
+ "output": "output file not found"
+ }
diff --git a/gn3/db/datasets.py b/gn3/db/datasets.py
index 4a05499..6c328f5 100644
--- a/gn3/db/datasets.py
+++ b/gn3/db/datasets.py
@@ -119,9 +119,9 @@ def retrieve_dataset_name(
return fn_map[trait_type](threshold, dataset_name, conn)
-def retrieve_geno_riset_fields(name, conn):
+def retrieve_geno_group_fields(name, conn):
"""
- Retrieve the RISet, and RISetID values for various Geno trait types.
+ Retrieve the Group, and GroupID values for various Geno trait types.
"""
query = (
"SELECT InbredSet.Name, InbredSet.Id "
@@ -130,12 +130,12 @@ def retrieve_geno_riset_fields(name, conn):
"AND GenoFreeze.Name = %(name)s")
with conn.cursor() as cursor:
cursor.execute(query, {"name": name})
- return dict(zip(["riset", "risetid"], cursor.fetchone()))
+ return dict(zip(["group", "groupid"], cursor.fetchone()))
return {}
-def retrieve_publish_riset_fields(name, conn):
+def retrieve_publish_group_fields(name, conn):
"""
- Retrieve the RISet, and RISetID values for various Publish trait types.
+ Retrieve the Group, and GroupID values for various Publish trait types.
"""
query = (
"SELECT InbredSet.Name, InbredSet.Id "
@@ -144,12 +144,12 @@ def retrieve_publish_riset_fields(name, conn):
"AND PublishFreeze.Name = %(name)s")
with conn.cursor() as cursor:
cursor.execute(query, {"name": name})
- return dict(zip(["riset", "risetid"], cursor.fetchone()))
+ return dict(zip(["group", "groupid"], cursor.fetchone()))
return {}
-def retrieve_probeset_riset_fields(name, conn):
+def retrieve_probeset_group_fields(name, conn):
"""
- Retrieve the RISet, and RISetID values for various ProbeSet trait types.
+ Retrieve the Group, and GroupID values for various ProbeSet trait types.
"""
query = (
"SELECT InbredSet.Name, InbredSet.Id "
@@ -159,12 +159,12 @@ def retrieve_probeset_riset_fields(name, conn):
"AND ProbeSetFreeze.Name = %(name)s")
with conn.cursor() as cursor:
cursor.execute(query, {"name": name})
- return dict(zip(["riset", "risetid"], cursor.fetchone()))
+ return dict(zip(["group", "groupid"], cursor.fetchone()))
return {}
-def retrieve_temp_riset_fields(name, conn):
+def retrieve_temp_group_fields(name, conn):
"""
- Retrieve the RISet, and RISetID values for `Temp` trait types.
+ Retrieve the Group, and GroupID values for `Temp` trait types.
"""
query = (
"SELECT InbredSet.Name, InbredSet.Id "
@@ -173,30 +173,30 @@ def retrieve_temp_riset_fields(name, conn):
"AND Temp.Name = %(name)s")
with conn.cursor() as cursor:
cursor.execute(query, {"name": name})
- return dict(zip(["riset", "risetid"], cursor.fetchone()))
+ return dict(zip(["group", "groupid"], cursor.fetchone()))
return {}
-def retrieve_riset_fields(trait_type, trait_name, dataset_info, conn):
+def retrieve_group_fields(trait_type, trait_name, dataset_info, conn):
"""
- Retrieve the RISet, and RISetID values for various trait types.
+ Retrieve the Group, and GroupID values for various trait types.
"""
- riset_fns_map = {
- "Geno": retrieve_geno_riset_fields,
- "Publish": retrieve_publish_riset_fields,
- "ProbeSet": retrieve_probeset_riset_fields
+ group_fns_map = {
+ "Geno": retrieve_geno_group_fields,
+ "Publish": retrieve_publish_group_fields,
+ "ProbeSet": retrieve_probeset_group_fields
}
if trait_type == "Temp":
- riset_info = retrieve_temp_riset_fields(trait_name, conn)
+ group_info = retrieve_temp_group_fields(trait_name, conn)
else:
- riset_info = riset_fns_map[trait_type](dataset_info["dataset_name"], conn)
+ group_info = group_fns_map[trait_type](dataset_info["dataset_name"], conn)
return {
**dataset_info,
- **riset_info,
- "riset": (
- "BXD" if riset_info.get("riset") == "BXD300"
- else riset_info.get("riset", ""))
+ **group_info,
+ "group": (
+ "BXD" if group_info.get("group") == "BXD300"
+ else group_info.get("group", ""))
}
def retrieve_temp_trait_dataset():
@@ -281,11 +281,11 @@ def retrieve_trait_dataset(trait_type, trait, threshold, conn):
trait_type, threshold, trait["trait_name"],
trait["db"]["dataset_name"], conn)
}
- riset = retrieve_riset_fields(
+ group = retrieve_group_fields(
trait_type, trait["trait_name"], dataset_name_info, conn)
return {
"display_name": dataset_name_info["dataset_name"],
**dataset_name_info,
**dataset_fns[trait_type](),
- **riset
+ **group
}
diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py
new file mode 100644
index 0000000..8f18cac
--- /dev/null
+++ b/gn3/db/genotypes.py
@@ -0,0 +1,184 @@
+"""Genotype utilities"""
+
+import os
+import gzip
+from typing import Union, TextIO
+
+from gn3.settings import GENOTYPE_FILES
+
+def build_genotype_file(
+ geno_name: str, base_dir: str = GENOTYPE_FILES,
+ extension: str = "geno"):
+ """Build the absolute path for the genotype file."""
+ return "{}/{}.{}".format(os.path.abspath(base_dir), geno_name, extension)
+
+def load_genotype_samples(genotype_filename: str, file_type: str = "geno"):
+ """
+ Load sample of samples from genotype files.
+
+ DESCRIPTION:
+ Traits can contain a varied number of samples, some of which do not exist in
+ certain genotypes. In order to compute QTLs, GEMMAs, etc, we need to ensure
+ to pick only those samples that exist in the genotype under consideration
+ for the traits used in the computation.
+
+ This function loads a list of samples from the genotype files for use in
+ filtering out unusable samples.
+
+
+ PARAMETERS:
+ genotype_filename: The absolute path to the genotype file to load the
+ samples from.
+ file_type: The type of file. Currently supported values are 'geno' and
+ 'plink'.
+ """
+ file_type_fns = {
+ "geno": __load_genotype_samples_from_geno,
+ "plink": __load_genotype_samples_from_plink
+ }
+ return file_type_fns[file_type](genotype_filename)
+
+def __load_genotype_samples_from_geno(genotype_filename: str):
+ """
+ Helper function for `load_genotype_samples` function.
+
+ Loads samples from '.geno' files.
+ """
+ gzipped_filename = "{}.gz".format(genotype_filename)
+ if os.path.isfile(gzipped_filename):
+ genofile: Union[TextIO, gzip.GzipFile] = gzip.open(gzipped_filename)
+ else:
+ genofile = open(genotype_filename)
+
+ for row in genofile:
+ line = row.strip()
+ if (not line) or (line.startswith(("#", "@"))): # type: ignore[arg-type]
+ continue
+ break
+
+ headers = line.split("\t") # type: ignore[arg-type]
+ if headers[3] == "Mb":
+ return headers[4:]
+ return headers[3:]
+
+def __load_genotype_samples_from_plink(genotype_filename: str):
+ """
+ Helper function for `load_genotype_samples` function.
+
+ Loads samples from '.plink' files.
+ """
+ genofile = open(genotype_filename)
+ return [line.split(" ")[1] for line in genofile]
+
+def parse_genotype_labels(lines: list):
+ """
+ Parse label lines into usable genotype values
+
+ DESCRIPTION:
+ Reworks
+ https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/utility/gen_geno_ob.py#L75-L93
+ """
+ acceptable_labels = ["name", "filler", "type", "mat", "pat", "het", "unk"]
+ def __parse_label(line):
+ label, value = [l.strip() for l in line[1:].split(":")]
+ if label not in acceptable_labels:
+ return None
+ if label == "name":
+ return ("group", value)
+ return (label, value)
+ return tuple(
+ item for item in (__parse_label(line) for line in lines)
+ if item is not None)
+
+def parse_genotype_header(line: str, parlist: tuple = tuple()):
+ """
+ Parse the genotype file header line
+
+ DESCRIPTION:
+ Reworks
+ https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/utility/gen_geno_ob.py#L94-L114
+ """
+ items = [item.strip() for item in line.split("\t")]
+ mbmap = "Mb" in items
+ prgy = ((parlist + tuple(items[4:])) if mbmap
+ else (parlist + tuple(items[3:])))
+ return (
+ ("Mbmap", mbmap),
+ ("cm_column", items.index("cM")),
+ ("mb_column", None if not mbmap else items.index("Mb")),
+ ("prgy", prgy),
+ ("nprgy", len(prgy)))
+
+def parse_genotype_marker(line: str, geno_obj: dict, parlist: tuple):
+ """
+ Parse a data line in a genotype file
+
+ DESCRIPTION:
+ Reworks
+ https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/utility/gen_geno_ob.py#L143-L190
+ """
+ # pylint: disable=W0702
+ marker_row = [item.strip() for item in line.split("\t")]
+ geno_table = {
+ geno_obj["mat"]: -1, geno_obj["pat"]: 1, geno_obj["het"]: 0,
+ geno_obj["unk"]: "U"
+ }
+ start_pos = 4 if geno_obj["Mbmap"] else 3
+ if len(parlist) > 0:
+ start_pos = start_pos + 2
+
+ alleles = marker_row[start_pos:]
+ genotype = tuple(
+ (geno_table[allele] if allele in geno_table.keys() else "U")
+ for allele in alleles)
+ if len(parlist) > 0:
+ genotype = (-1, 1) + genotype
+ try:
+ cm_val = float(geno_obj["cm_column"])
+ except:
+ if geno_obj["Mbmap"]:
+ cm_val = float(geno_obj["mb_column"])
+ else:
+ cm_val = 0
+ return (
+ ("chr", marker_row[0]),
+ ("name", marker_row[1]),
+ ("cM", cm_val),
+ ("Mb", float(geno_obj["mb_column"]) if geno_obj["Mbmap"] else None),
+ ("genotype", genotype))
+
+def build_genotype_chromosomes(geno_obj, markers):
+ """
+ Build up the chromosomes from the given markers and partially built geno
+ object
+ """
+ mrks = [dict(marker) for marker in markers]
+ chr_names = {marker["chr"] for marker in mrks}
+ return tuple((
+ ("name", chr_name), ("mb_exists", geno_obj["Mbmap"]), ("cm_column", 2),
+ ("mb_column", geno_obj["mb_column"]),
+ ("loci", tuple(marker for marker in mrks if marker["chr"] == chr_name)))
+ for chr_name in sorted(chr_names))
+
+def parse_genotype_file(filename: str, parlist: tuple = tuple()):
+ """
+ Parse the provided genotype file into a usable pytho3 data structure.
+ """
+ with open(filename, "r") as infile:
+ contents = infile.readlines()
+
+ lines = tuple(line for line in contents if
+ ((not line.strip().startswith("#")) and
+ (not line.strip() == "")))
+ labels = parse_genotype_labels(
+ [line for line in lines if line.startswith("@")])
+ data_lines = tuple(line for line in lines if not line.startswith("@"))
+ header = parse_genotype_header(data_lines[0], parlist)
+ geno_obj = dict(labels + header)
+ markers = tuple(
+ [parse_genotype_marker(line, geno_obj, parlist)
+ for line in data_lines[1:]])
+ chromosomes = tuple(
+ dict(chromosome) for chromosome in
+ build_genotype_chromosomes(geno_obj, markers))
+ return {**geno_obj, "chromosomes": chromosomes}
diff --git a/gn3/db/traits.py b/gn3/db/traits.py
index 1031e44..f2673c8 100644
--- a/gn3/db/traits.py
+++ b/gn3/db/traits.py
@@ -1,5 +1,8 @@
"""This class contains functions relating to trait data manipulation"""
+import os
from typing import Any, Dict, Union, Sequence
+from gn3.settings import TMPDIR
+from gn3.random import random_string
from gn3.function_helpers import compose
from gn3.db.datasets import retrieve_trait_dataset
@@ -43,7 +46,7 @@ def update_sample_data(conn: Any,
count: Union[int, str]):
"""Given the right parameters, update sample-data from the relevant
table."""
- # pylint: disable=[R0913, R0914]
+ # pylint: disable=[R0913, R0914, C0103]
STRAIN_ID_SQL: str = "UPDATE Strain SET Name = %s WHERE Id = %s"
PUBLISH_DATA_SQL: str = ("UPDATE PublishData SET value = %s "
"WHERE StrainId = %s AND Id = %s")
@@ -60,22 +63,22 @@ def update_sample_data(conn: Any,
with conn.cursor() as cursor:
# Update the Strains table
cursor.execute(STRAIN_ID_SQL, (strain_name, strain_id))
- updated_strains: int = cursor.rowcount
+ updated_strains = cursor.rowcount
# Update the PublishData table
cursor.execute(PUBLISH_DATA_SQL,
(None if value == "x" else value,
strain_id, publish_data_id))
- updated_published_data: int = cursor.rowcount
+ updated_published_data = cursor.rowcount
# Update the PublishSE table
cursor.execute(PUBLISH_SE_SQL,
(None if error == "x" else error,
strain_id, publish_data_id))
- updated_se_data: int = cursor.rowcount
+ updated_se_data = cursor.rowcount
# Update the NStrain table
cursor.execute(N_STRAIN_SQL,
(None if count == "x" else count,
strain_id, publish_data_id))
- updated_n_strains: int = cursor.rowcount
+ updated_n_strains = cursor.rowcount
return (updated_strains, updated_published_data,
updated_se_data, updated_n_strains)
@@ -223,7 +226,7 @@ def set_homologene_id_field_probeset(trait_info, conn):
"""
query = (
"SELECT HomologeneId FROM Homologene, Species, InbredSet"
- " WHERE Homologene.GeneId = %(geneid)s AND InbredSet.Name = %(riset)s"
+ " WHERE Homologene.GeneId = %(geneid)s AND InbredSet.Name = %(group)s"
" AND InbredSet.SpeciesId = Species.Id AND"
" Species.TaxonomyId = Homologene.TaxonomyId")
with conn.cursor() as cursor:
@@ -231,7 +234,7 @@ def set_homologene_id_field_probeset(trait_info, conn):
query,
{
k:v for k, v in trait_info.items()
- if k in ["geneid", "riset"]
+ if k in ["geneid", "group"]
})
res = cursor.fetchone()
if res:
@@ -419,7 +422,7 @@ def retrieve_trait_info(
if trait_info["haveinfo"]:
return {
**trait_post_processing_functions_table[trait_dataset_type](
- {**trait_info, "riset": trait_dataset["riset"]}),
+ {**trait_info, "group": trait_dataset["group"]}),
"db": {**trait["db"], **trait_dataset}
}
return trait_info
@@ -442,18 +445,18 @@ def retrieve_temp_trait_data(trait_info: dict, conn: Any):
query,
{"trait_name": trait_info["trait_name"]})
return [dict(zip(
- ["strain_name", "value", "se_error", "nstrain", "id"], row))
+ ["sample_name", "value", "se_error", "nstrain", "id"], row))
for row in cursor.fetchall()]
return []
-def retrieve_species_id(riset, conn: Any):
+def retrieve_species_id(group, conn: Any):
"""
- Retrieve a species id given the RISet value
+ Retrieve a species id given the Group value
"""
with conn.cursor as cursor:
cursor.execute(
- "SELECT SpeciesId from InbredSet WHERE Name = %(riset)s",
- {"riset": riset})
+ "SELECT SpeciesId from InbredSet WHERE Name = %(group)s",
+ {"group": group})
return cursor.fetchone()[0]
return None
@@ -479,9 +482,9 @@ def retrieve_geno_trait_data(trait_info: Dict, conn: Any):
{"trait_name": trait_info["trait_name"],
"dataset_name": trait_info["db"]["dataset_name"],
"species_id": retrieve_species_id(
- trait_info["db"]["riset"], conn)})
+ trait_info["db"]["group"], conn)})
return [dict(zip(
- ["strain_name", "value", "se_error", "id"], row))
+ ["sample_name", "value", "se_error", "id"], row))
for row in cursor.fetchall()]
return []
@@ -512,7 +515,7 @@ def retrieve_publish_trait_data(trait_info: Dict, conn: Any):
{"trait_name": trait_info["trait_name"],
"dataset_id": trait_info["db"]["dataset_id"]})
return [dict(zip(
- ["strain_name", "value", "se_error", "nstrain", "id"], row))
+ ["sample_name", "value", "se_error", "nstrain", "id"], row))
for row in cursor.fetchall()]
return []
@@ -545,7 +548,7 @@ def retrieve_cellid_trait_data(trait_info: Dict, conn: Any):
"trait_name": trait_info["trait_name"],
"dataset_id": trait_info["db"]["dataset_id"]})
return [dict(zip(
- ["strain_name", "value", "se_error", "id"], row))
+ ["sample_name", "value", "se_error", "id"], row))
for row in cursor.fetchall()]
return []
@@ -574,29 +577,29 @@ def retrieve_probeset_trait_data(trait_info: Dict, conn: Any):
{"trait_name": trait_info["trait_name"],
"dataset_name": trait_info["db"]["dataset_name"]})
return [dict(zip(
- ["strain_name", "value", "se_error", "id"], row))
+ ["sample_name", "value", "se_error", "id"], row))
for row in cursor.fetchall()]
return []
-def with_strainlist_data_setup(strainlist: Sequence[str]):
+def with_samplelist_data_setup(samplelist: Sequence[str]):
"""
- Build function that computes the trait data from provided list of strains.
+ Build function that computes the trait data from provided list of samples.
PARAMETERS
- strainlist: (list)
- A list of strain names
+ samplelist: (list)
+ A list of sample names
RETURNS:
Returns a function that given some data from the database, computes the
- strain's value, variance and ndata values, only if the strain is present
- in the provided `strainlist` variable.
+ sample's value, variance and ndata values, only if the sample is present
+ in the provided `samplelist` variable.
"""
def setup_fn(tdata):
- if tdata["strain_name"] in strainlist:
+ if tdata["sample_name"] in samplelist:
val = tdata["value"]
if val is not None:
return {
- "strain_name": tdata["strain_name"],
+ "sample_name": tdata["sample_name"],
"value": val,
"variance": tdata["se_error"],
"ndata": tdata.get("nstrain", None)
@@ -604,19 +607,19 @@ def with_strainlist_data_setup(strainlist: Sequence[str]):
return None
return setup_fn
-def without_strainlist_data_setup():
+def without_samplelist_data_setup():
"""
Build function that computes the trait data.
RETURNS:
Returns a function that given some data from the database, computes the
- strain's value, variance and ndata values.
+ sample's value, variance and ndata values.
"""
def setup_fn(tdata):
val = tdata["value"]
if val is not None:
return {
- "strain_name": tdata["strain_name"],
+ "sample_name": tdata["sample_name"],
"value": val,
"variance": tdata["se_error"],
"ndata": tdata.get("nstrain", None)
@@ -624,7 +627,7 @@ def without_strainlist_data_setup():
return None
return setup_fn
-def retrieve_trait_data(trait: dict, conn: Any, strainlist: Sequence[str] = tuple()):
+def retrieve_trait_data(trait: dict, conn: Any, samplelist: Sequence[str] = tuple()):
"""
Retrieve trait data
@@ -647,22 +650,27 @@ def retrieve_trait_data(trait: dict, conn: Any, strainlist: Sequence[str] = tupl
if results:
# do something with mysqlid
mysqlid = results[0]["id"]
- if strainlist:
+ if samplelist:
data = [
item for item in
- map(with_strainlist_data_setup(strainlist), results)
+ map(with_samplelist_data_setup(samplelist), results)
if item is not None]
else:
data = [
item for item in
- map(without_strainlist_data_setup(), results)
+ map(without_samplelist_data_setup(), results)
if item is not None]
return {
"mysqlid": mysqlid,
"data": dict(map(
lambda x: (
- x["strain_name"],
- {k:v for k, v in x.items() if x != "strain_name"}),
+ x["sample_name"],
+ {k:v for k, v in x.items() if x != "sample_name"}),
data))}
return {}
+
+def generate_traits_filename(base_path: str = TMPDIR):
+ """Generate a unique filename for use with generated traits files."""
+ return "{}/traits_test_file_{}.txt".format(
+ os.path.abspath(base_path), random_string(10))
diff --git a/gn3/heatmaps.py b/gn3/heatmaps.py
new file mode 100644
index 0000000..adbfbc6
--- /dev/null
+++ b/gn3/heatmaps.py
@@ -0,0 +1,424 @@
+"""
+This module will contain functions to be used in computation of the data used to
+generate various kinds of heatmaps.
+"""
+
+from functools import reduce
+from typing import Any, Dict, Union, Sequence
+
+import numpy as np
+import plotly.graph_objects as go # type: ignore
+import plotly.figure_factory as ff # type: ignore
+from plotly.subplots import make_subplots # type: ignore
+
+from gn3.settings import TMPDIR
+from gn3.random import random_string
+from gn3.computations.slink import slink
+from gn3.computations.correlations2 import compute_correlation
+from gn3.db.genotypes import (
+ build_genotype_file, load_genotype_samples)
+from gn3.db.traits import (
+ retrieve_trait_data, retrieve_trait_info)
+from gn3.computations.qtlreaper import (
+ run_reaper,
+ generate_traits_file,
+ chromosome_sorter_key_fn,
+ parse_reaper_main_results,
+ organise_reaper_main_results)
+
+def export_trait_data(
+ trait_data: dict, samplelist: Sequence[str], dtype: str = "val",
+ var_exists: bool = False, n_exists: bool = False):
+ """
+ Export data according to `samplelist`. Mostly used in calculating
+ correlations.
+
+ DESCRIPTION:
+ Migrated from
+ https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L166-L211
+
+ PARAMETERS
+ trait: (dict)
+ The dictionary of key-value pairs representing a trait
+ samplelist: (list)
+ A list of sample names
+ dtype: (str)
+ ... verify what this is ...
+ var_exists: (bool)
+ A flag indicating existence of variance
+ n_exists: (bool)
+ A flag indicating existence of ndata
+ """
+ def __export_all_types(tdata, sample):
+ sample_data = []
+ if tdata[sample]["value"]:
+ sample_data.append(tdata[sample]["value"])
+ if var_exists:
+ if tdata[sample]["variance"]:
+ sample_data.append(tdata[sample]["variance"])
+ else:
+ sample_data.append(None)
+ if n_exists:
+ if tdata[sample]["ndata"]:
+ sample_data.append(tdata[sample]["ndata"])
+ else:
+ sample_data.append(None)
+ else:
+ if var_exists and n_exists:
+ sample_data += [None, None, None]
+ elif var_exists or n_exists:
+ sample_data += [None, None]
+ else:
+ sample_data.append(None)
+
+ return tuple(sample_data)
+
+ def __exporter(accumulator, sample):
+ # pylint: disable=[R0911]
+ if sample in trait_data["data"]:
+ if dtype == "val":
+ return accumulator + (trait_data["data"][sample]["value"], )
+ if dtype == "var":
+ return accumulator + (trait_data["data"][sample]["variance"], )
+ if dtype == "N":
+ return accumulator + (trait_data["data"][sample]["ndata"], )
+ if dtype == "all":
+ return accumulator + __export_all_types(trait_data["data"], sample)
+ raise KeyError("Type `%s` is incorrect" % dtype)
+ if var_exists and n_exists:
+ return accumulator + (None, None, None)
+ if var_exists or n_exists:
+ return accumulator + (None, None)
+ return accumulator + (None,)
+
+ return reduce(__exporter, samplelist, tuple())
+
+def trait_display_name(trait: Dict):
+ """
+ Given a trait, return a name to use to display the trait on a heatmap.
+
+ DESCRIPTION
+ Migrated from
+ https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L141-L157
+ """
+ if trait.get("db", None) and trait.get("trait_name", None):
+ if trait["db"]["dataset_type"] == "Temp":
+ desc = trait["description"]
+ if desc.find("PCA") >= 0:
+ return "%s::%s" % (
+ trait["db"]["displayname"],
+ desc[desc.rindex(':')+1:].strip())
+ return "%s::%s" % (
+ trait["db"]["displayname"],
+ desc[:desc.index('entered')].strip())
+ prefix = "%s::%s" % (
+ trait["db"]["dataset_name"], trait["trait_name"])
+ if trait["cellid"]:
+ return "%s::%s" % (prefix, trait["cellid"])
+ return prefix
+ return trait["description"]
+
+def cluster_traits(traits_data_list: Sequence[Dict]):
+ """
+ Clusters the trait values.
+
+ DESCRIPTION
+ Attempts to replicate the clustering of the traits, as done at
+ https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L138-L162
+ """
+ def __compute_corr(tdata_i, tdata_j):
+ if tdata_i[0] == tdata_j[0]:
+ return 0.0
+ corr_vals = compute_correlation(tdata_i[1], tdata_j[1])
+ corr = corr_vals[0]
+ if (1 - corr) < 0:
+ return 0.0
+ return 1 - corr
+
+ def __cluster(tdata_i):
+ return tuple(
+ __compute_corr(tdata_i, tdata_j)
+ for tdata_j in enumerate(traits_data_list))
+
+ return tuple(__cluster(tdata_i) for tdata_i in enumerate(traits_data_list))
+
+def get_loci_names(
+ organised: dict,
+ chromosome_names: Sequence[str]) -> Sequence[Sequence[str]]:
+ """
+ Get the loci names organised by the same order as the `chromosome_names`.
+ """
+ def __get_trait_loci(accumulator, trait):
+ chrs = tuple(trait["chromosomes"].keys())
+ trait_loci = {
+ _chr: tuple(
+ locus["Locus"]
+ for locus in trait["chromosomes"][_chr]["loci"]
+ ) for _chr in chrs
+ }
+ return {
+ **accumulator,
+ **{
+ _chr: tuple(sorted(set(
+ trait_loci[_chr] + accumulator.get(_chr, tuple()))))
+ for _chr in trait_loci.keys()
+ }
+ }
+ loci_dict: Dict[Union[str, int], Sequence[str]] = reduce(
+ __get_trait_loci, [v[1] for v in organised.items()], {})
+ return tuple(loci_dict[_chr] for _chr in chromosome_names)
+
+def build_heatmap(traits_names, conn: Any):
+ """
+ heatmap function
+
+ DESCRIPTION
+ This function is an attempt to reproduce the initialisation at
+ https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L46-L64
+ and also the clustering and slink computations at
+ https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L138-L165
+ with the help of the `gn3.computations.heatmap.cluster_traits` function.
+
+ It does not try to actually draw the heatmap image.
+
+ PARAMETERS:
+ TODO: Elaborate on the parameters here...
+ """
+ # pylint: disable=[R0914]
+ threshold = 0 # webqtlConfig.PUBLICTHRESH
+ traits = [
+ retrieve_trait_info(threshold, fullname, conn)
+ for fullname in traits_names]
+ traits_data_list = [retrieve_trait_data(t, conn) for t in traits]
+ genotype_filename = build_genotype_file(traits[0]["group"])
+ samples = load_genotype_samples(genotype_filename)
+ exported_traits_data_list = [
+ export_trait_data(td, samples) for td in traits_data_list]
+ clustered = cluster_traits(exported_traits_data_list)
+ slinked = slink(clustered)
+ traits_order = compute_traits_order(slinked)
+ samples_and_values = retrieve_samples_and_values(
+ traits_order, samples, exported_traits_data_list)
+ traits_filename = "{}/traits_test_file_{}.txt".format(
+ TMPDIR, random_string(10))
+ generate_traits_file(
+ samples_and_values[0][1],
+ [t[2] for t in samples_and_values],
+ traits_filename)
+
+ main_output, _permutations_output = run_reaper(
+ genotype_filename, traits_filename, separate_nperm_output=True)
+
+ qtlresults = parse_reaper_main_results(main_output)
+ organised = organise_reaper_main_results(qtlresults)
+
+ traits_ids = [# sort numerically, but retain the ids as strings
+ str(i) for i in sorted({int(row["ID"]) for row in qtlresults})]
+ chromosome_names = sorted(
+ {row["Chr"] for row in qtlresults}, key=chromosome_sorter_key_fn)
+ ordered_traits_names = dict(
+ zip(traits_ids,
+ [traits[idx]["trait_fullname"] for idx in traits_order]))
+
+ return generate_clustered_heatmap(
+ process_traits_data_for_heatmap(
+ organised, traits_ids, chromosome_names),
+ clustered,
+ "single_heatmap_{}".format(random_string(10)),
+ y_axis=tuple(
+ ordered_traits_names[traits_ids[order]]
+ for order in traits_order),
+ y_label="Traits",
+ x_axis=chromosome_names,
+ x_label="Chromosomes",
+ loci_names=get_loci_names(organised, chromosome_names))
+
+def compute_traits_order(slink_data, neworder: tuple = tuple()):
+ """
+ Compute the order of the traits for clustering from `slink_data`.
+
+ This function tries to reproduce the creation and update of the `neworder`
+ variable in
+ https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L120
+ and in the `web.webqtl.heatmap.Heatmap.draw` function in GN1
+ """
+ def __order_maker(norder, slnk_dt):
+ if isinstance(slnk_dt[0], int) and isinstance(slnk_dt[1], int):
+ return norder + (slnk_dt[0], slnk_dt[1])
+
+ if isinstance(slnk_dt[0], int):
+ return __order_maker((norder + (slnk_dt[0], )), slnk_dt[1])
+
+ if isinstance(slnk_dt[1], int):
+ return __order_maker(norder, slnk_dt[0]) + (slnk_dt[1], )
+
+ return __order_maker(__order_maker(norder, slnk_dt[0]), slnk_dt[1])
+
+ return __order_maker(neworder, slink_data)
+
+def retrieve_samples_and_values(orders, samplelist, traits_data_list):
+ """
+ Get the samples and their corresponding values from `samplelist` and
+ `traits_data_list`.
+
+ This migrates the code in
+ https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L215-221
+ """
+ # This feels nasty! There's a lot of mutation of values here, that might
+ # indicate something untoward in the design of this function and its
+ # dependents ==> Review
+ samples = []
+ values = []
+ rets = []
+ for order in orders:
+ temp_val = traits_data_list[order]
+ for i, sample in enumerate(samplelist):
+ if temp_val[i] is not None:
+ samples.append(sample)
+ values.append(temp_val[i])
+ rets.append([order, samples[:], values[:]])
+ samples = []
+ values = []
+
+ return rets
+
+def nearest_marker_finder(genotype):
+ """
+ Returns a function to be used with `genotype` to compute the nearest marker
+ to the trait passed to the returned function.
+
+ https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L425-434
+ """
+ def __compute_distances(chromo, trait):
+ loci = chromo.get("loci", None)
+ if not loci:
+ return None
+ return tuple(
+ {
+ "name": locus["name"],
+ "distance": abs(locus["Mb"] - trait["mb"])
+ } for locus in loci)
+
+ def __finder(trait):
+ _chrs = tuple(
+ _chr for _chr in genotype["chromosomes"]
+ if str(_chr["name"]) == str(trait["chr"]))
+ if len(_chrs) == 0:
+ return None
+ distances = tuple(
+ distance for dists in
+ filter(
+ lambda x: x is not None,
+ (__compute_distances(_chr, trait) for _chr in _chrs))
+ for distance in dists)
+ nearest = min(distances, key=lambda d: d["distance"])
+ return nearest["name"]
+ return __finder
+
+def get_nearest_marker(traits_list, genotype):
+ """
+ Retrieves the nearest marker for each of the traits in the list.
+
+ DESCRIPTION:
+ This migrates the code in
+ https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L419-L438
+ """
+ if not genotype["Mbmap"]:
+ return [None] * len(traits_list)
+
+ marker_finder = nearest_marker_finder(genotype)
+ return [marker_finder(trait) for trait in traits_list]
+
+def get_lrs_from_chr(trait, chr_name):
+ """
+ Retrieve the LRS values for a specific chromosome in the given trait.
+ """
+ chromosome = trait["chromosomes"].get(chr_name)
+ if chromosome:
+ return [
+ locus["LRS"] for locus in
+ sorted(chromosome["loci"], key=lambda loc: loc["Locus"])]
+ return [None]
+
+def process_traits_data_for_heatmap(data, trait_names, chromosome_names):
+ """
+ Process the traits data in a format useful for generating heatmap diagrams.
+ """
+ hdata = [
+ [get_lrs_from_chr(data[trait], chr_name) for trait in trait_names]
+ for chr_name in chromosome_names]
+ return hdata
+
+def generate_clustered_heatmap(
+ data, clustering_data, image_filename_prefix, x_axis=None,
+ x_label: str = "", y_axis=None, y_label: str = "",
+ loci_names: Sequence[Sequence[str]] = tuple(),
+ output_dir: str = TMPDIR,
+ colorscale=((0.0, '#0000FF'), (0.5, '#00FF00'), (1.0, '#FF0000'))):
+ """
+ Generate a dendrogram, and heatmaps for each chromosome, and put them all
+ into one plot.
+ """
+ # pylint: disable=[R0913, R0914]
+ num_cols = 1 + len(x_axis)
+ fig = make_subplots(
+ rows=1,
+ cols=num_cols,
+ shared_yaxes="rows",
+ horizontal_spacing=0.001,
+ subplot_titles=["distance"] + x_axis,
+ figure=ff.create_dendrogram(
+ np.array(clustering_data), orientation="right", labels=y_axis))
+ hms = [go.Heatmap(
+ name=chromo,
+ x=loci,
+ y=y_axis,
+ z=data_array,
+ showscale=False)
+ for chromo, data_array, loci
+ in zip(x_axis, data, loci_names)]
+ for i, heatmap in enumerate(hms):
+ fig.add_trace(heatmap, row=1, col=(i + 2))
+
+ fig.update_layout(
+ {
+ "width": 1500,
+ "height": 800,
+ "xaxis": {
+ "mirror": False,
+ "showgrid": True,
+ "title": x_label
+ },
+ "yaxis": {
+ "title": y_label
+ }
+ })
+
+ x_axes_layouts = {
+ "xaxis{}".format(i+1 if i > 0 else ""): {
+ "mirror": False,
+ "showticklabels": i == 0,
+ "ticks": "outside" if i == 0 else ""
+ }
+ for i in range(num_cols)}
+
+ fig.update_layout(
+ {
+ "width": 4000,
+ "height": 800,
+ "yaxis": {
+ "mirror": False,
+ "ticks": ""
+ },
+ **x_axes_layouts})
+ fig.update_traces(
+ showlegend=False,
+ colorscale=colorscale,
+ selector={"type": "heatmap"})
+ fig.update_traces(
+ showlegend=True,
+ showscale=True,
+ selector={"name": x_axis[-1]})
+ image_filename = "{}/{}.html".format(output_dir, image_filename_prefix)
+ fig.write_html(image_filename)
+ return image_filename, fig
diff --git a/gn3/heatmaps/heatmaps.py b/gn3/heatmaps/heatmaps.py
deleted file mode 100644
index 3bf7917..0000000
--- a/gn3/heatmaps/heatmaps.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import random
-import plotly.express as px
-
-#### Remove these ####
-
-heatmap_dir = "heatmap_images"
-
-def generate_random_data(data_stop: float = 2, width: int = 10, height: int = 30):
- """
- This is mostly a utility function to be used to generate random data, useful
- for development of the heatmap generation code, without access to the actual
- database data.
- """
- return [[random.uniform(0,data_stop) for i in range(0, width)]
- for j in range(0, height)]
-
-def heatmap_x_axis_names():
- return [
- "UCLA_BXDBXH_CARTILAGE_V2::ILM103710672",
- "UCLA_BXDBXH_CARTILAGE_V2::ILM2260338",
- "UCLA_BXDBXH_CARTILAGE_V2::ILM3140576",
- "UCLA_BXDBXH_CARTILAGE_V2::ILM5670577",
- "UCLA_BXDBXH_CARTILAGE_V2::ILM2070121",
- "UCLA_BXDBXH_CARTILAGE_V2::ILM103990541",
- "UCLA_BXDBXH_CARTILAGE_V2::ILM1190722",
- "UCLA_BXDBXH_CARTILAGE_V2::ILM6590722",
- "UCLA_BXDBXH_CARTILAGE_V2::ILM4200064",
- "UCLA_BXDBXH_CARTILAGE_V2::ILM3140463"]
-#### END: Remove these ####
-
-# Grey + Blue + Red
-def generate_heatmap():
- rows = 20
- data = generate_random_data(height=rows)
- y = (["%s"%x for x in range(1, rows+1)][:-1] + ["X"]) #replace last item with x for now
- fig = px.imshow(
- data,
- x=heatmap_x_axis_names(),
- y=y,
- width=500)
- fig.update_traces(xtype="array")
- fig.update_traces(ytype="array")
- # fig.update_traces(xgap=10)
- fig.update_xaxes(
- visible=True,
- title_text="Traits",
- title_font_size=16)
- fig.update_layout(
- coloraxis_colorscale=[
- [0.0, '#3B3B3B'], [0.4999999999999999, '#ABABAB'],
- [0.5, '#F5DE11'], [1.0, '#FF0D00']])
-
- fig.write_html("%s/%s"%(heatmap_dir, "test_image.html"))
- return fig
diff --git a/gn3/random.py b/gn3/random.py
new file mode 100644
index 0000000..f0ba574
--- /dev/null
+++ b/gn3/random.py
@@ -0,0 +1,11 @@
+"""
+Functions to generate complex random data.
+"""
+import random
+import string
+
+def random_string(length):
+ """Generate a random string of length `length`."""
+ return "".join(
+ random.choices(
+ string.ascii_letters + string.digits, k=length))
diff --git a/gn3/settings.py b/gn3/settings.py
index f4866d5..150d96d 100644
--- a/gn3/settings.py
+++ b/gn3/settings.py
@@ -24,3 +24,24 @@ GN2_BASE_URL = "http://www.genenetwork.org/"
# biweight script
BIWEIGHT_RSCRIPT = "~/genenetwork3/scripts/calculate_biweight.R"
+
+# wgcna script
+WGCNA_RSCRIPT = "wgcna_analysis.R"
+# qtlreaper command
+REAPER_COMMAND = "{}/bin/qtlreaper".format(os.environ.get("GUIX_ENVIRONMENT"))
+
+# genotype files
+GENOTYPE_FILES = os.environ.get(
+ "GENOTYPE_FILES", "{}/genotype_files/genotype".format(os.environ.get("HOME")))
+
+# CROSS-ORIGIN SETUP
+CORS_ORIGINS = [
+ "http://localhost:*",
+ "http://127.0.0.1:*"
+]
+
+CORS_HEADERS = [
+ "Content-Type",
+ "Authorization",
+ "Access-Control-Allow-Credentials"
+]
diff --git a/guix.scm b/guix.scm
index 729d089..9b8f399 100644
--- a/guix.scm
+++ b/guix.scm
@@ -43,7 +43,8 @@
(gnu packages databases)
(gnu packages statistics)
(gnu packages bioconductor)
- (gn packages golang)
+ (gnu packages golang)
+ (gn packages genenetwork)
(gnu packages python)
(gnu packages python-check)
(gnu packages python-crypto)
@@ -83,17 +84,18 @@
#:recursive? #t
#:select? git-file?))
(propagated-inputs `(("coreutils" ,coreutils)
- ("csvdiff" ,go-github-com-aswinkarthik-csvdiff)
("gemma-wrapper" ,gemma-wrapper)
("gunicorn" ,gunicorn)
("python" ,python-wrapper)
("python-bcrypt" ,python-bcrypt)
("python-flask" ,python-flask)
+ ("python-flask-cors" ,python-flask-cors)
("python-ipfshttpclient" ,python-ipfshttpclient)
("python-mypy" ,python-mypy)
("python-mypy-extensions" ,python-mypy-extensions)
("python-mysqlclient" ,python-mysqlclient)
("python-numpy" ,python-numpy)
+ ("python-plotly" ,python-plotly)
("python-pylint" ,python-pylint)
("python-redis" ,python-redis)
("python-requests" ,python-requests)
@@ -103,8 +105,12 @@
("r-optparse" ,r-optparse)
("r-qtl" ,r-qtl)
("r-stringi" ,r-stringi)
+ ("r-wgcna" ,r-wgcna)
+ ("r-rjson" ,r-rjson)
("python-plotly" ,python-plotly)
- ("python-pandas" ,python-pandas)))
+ ("python-pandas" ,python-pandas)
+ ("rust-qtlreaper" ,rust-qtlreaper)
+ ("python-flask-cors" ,python-flask-cors)))
(build-system python-build-system)
(home-page "https://github.com/genenetwork/genenetwork3")
(synopsis "GeneNetwork3 API for data science and machine learning.")
diff --git a/requirements.txt b/requirements.txt
index f94c86f..d332a96 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -33,3 +33,5 @@ urllib3==1.26.4
varint==1.0.2
Werkzeug==1.0.1
wrapt==1.12.1
+plotly==4.14.3
+flask-cors==3.0.9
diff --git a/scripts/output.json b/scripts/output.json
new file mode 100644
index 0000000..9caf837
--- /dev/null
+++ b/scripts/output.json
@@ -0,0 +1,161 @@
+{
+ "ModEigens":{
+ "MEturquoise":[
+ 0.0646677768085351,
+ 0.137200224277058,
+ 0.63451113720732,
+ -0.544002665501479,
+ -0.489487590361863,
+ 0.197111117570427
+ ]
+ },
+ "soft_threshold":{
+ "Power":[
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 9,
+ 10,
+ 12,
+ 14,
+ 16,
+ 18,
+ 20
+ ],
+ "SFT.R.sq":[
+ 0.0181093215847335,
+ 0.00011984142485113,
+ 0.000171967046945159,
+ 0.0105462010616537,
+ 0.0341444584348834,
+ 0.0687163726151286,
+ 0.0306423506274298,
+ 0.0492567394226327,
+ 0.0789539269997996,
+ 0.0944880158122527,
+ 0.122195040270446,
+ 0.0340768567186139,
+ 0.0625860126119281,
+ 0.100082257137014,
+ 0.128277841930818
+ ],
+ "slope":[
+ 3.81011386139005,
+ -0.170826531149538,
+ 0.159161787390082,
+ -1.01047981107833,
+ -1.55943531024794,
+ -1.93420125756514,
+ -1.16799247295814,
+ -1.33414063070555,
+ -1.54984944650438,
+ -1.54715757057087,
+ -1.49931213589121,
+ -1.80460140377151,
+ -2.19055343583319,
+ -2.52135805259606,
+ -2.58599487577447
+ ],
+ "truncated.R.sq":[
+ 0.768989700952805,
+ 0.522025793450566,
+ 0.329341226670409,
+ 0.110265719555879,
+ 0.0195649645183151,
+ -0.0503253079741786,
+ 0.0507498358330625,
+ -0.0129255450167765,
+ -0.035717519210676,
+ -0.0807094793662766,
+ -0.0967803564932559,
+ 0.00172686282662393,
+ -0.0340811003657508,
+ -0.0390284600100592,
+ -0.0489269837827069
+ ],
+ "mean.k.":[
+ 4.20178789454309,
+ 3.44556816856968,
+ 2.98532344074325,
+ 2.65297323828966,
+ 2.39517682414009,
+ 2.18846935370095,
+ 2.01963258852289,
+ 1.87999059876872,
+ 1.76335304166057,
+ 1.66510080962817,
+ 1.51038968360321,
+ 1.39583176924843,
+ 1.30882729664563,
+ 1.24120316437299,
+ 1.18753154238216
+ ],
+ "median.k.":[
+ 4.65733789933094,
+ 4.13585224131512,
+ 3.75980713552836,
+ 3.43775457512904,
+ 3.15305040369031,
+ 2.89933881967523,
+ 2.67225531456956,
+ 2.46825611568646,
+ 2.28437024155601,
+ 2.118086531192,
+ 1.83011067501282,
+ 1.59073325345641,
+ 1.38991168639473,
+ 1.2201000051609,
+ 1.07553194658444
+ ],
+ "max.k.":[
+ 4.81522245318686,
+ 4.21987143583501,
+ 3.83876962723542,
+ 3.55526976885104,
+ 3.32904938849614,
+ 3.14312441404036,
+ 2.98828051379132,
+ 2.85837136671219,
+ 2.74879840851137,
+ 2.65594228759455,
+ 2.50929962297015,
+ 2.40113981571731,
+ 2.31993775805391,
+ 2.25792900175825,
+ 2.2098218130451
+ ]
+ },
+ "blockGenes":[
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7
+ ],
+ "net_colors":{
+ "X1":"turquoise",
+ "X2":"turquoise",
+ "X3":"turquoise",
+ "X4":"turquoise",
+ "X5":"turquoise",
+ "X6":"turquoise",
+ "X7":"turquoise"
+ },
+ "net_unmerged":{
+ "X1":"turquoise",
+ "X2":"turquoise",
+ "X3":"turquoise",
+ "X4":"turquoise",
+ "X5":"turquoise",
+ "X6":"turquoise",
+ "X7":"turquoise"
+ },
+ "imageLoc":"/WGCNAoutput_1uujpTIpC.png"
+} \ No newline at end of file
diff --git a/scripts/wgcna_analysis.R b/scripts/wgcna_analysis.R
new file mode 100644
index 0000000..17b3537
--- /dev/null
+++ b/scripts/wgcna_analysis.R
@@ -0,0 +1,115 @@
+
+
+library(WGCNA);
+library(stringi);
+library(rjson)
+
+options(stringsAsFactors = FALSE);
+
+imgDir = Sys.getenv("GENERATED_IMAGE_DIR")
+# load expression data **assumes from json files row(traits)(columns info+samples)
+# pass the file_path as arg
+# pass the file path to read json data
+
+args = commandArgs(trailingOnly=TRUE)
+
+if (length(args)==0) {
+ stop("Argument for the file location is required", call.=FALSE)
+} else {
+ # default output file
+ json_file_path = args[1]
+}
+
+inputData <- fromJSON(file = json_file_path)
+
+
+trait_sample_data <- do.call(rbind, inputData$trait_sample_data)
+
+dataExpr <- data.frame(apply(trait_sample_data, 2, function(x) as.numeric(as.character(x))))
+# transform expressionData
+
+dataExpr <- data.frame(t(dataExpr))
+gsg = goodSamplesGenes(dataExpr, verbose = 3)
+
+if (!gsg$allOK)
+{
+if (sum(!gsg$goodGenes)>0)
+printFlush(paste("Removing genes:", paste(names(dataExpr)[!gsg$goodGenes], collapse = ", ")));
+if (sum(!gsg$goodSamples)>0)
+printFlush(paste("Removing samples:", paste(rownames(dataExpr)[!gsg$goodSamples], collapse = ", ")));
+# Remove the offending genes and samples from the data:
+dataExpr <- dataExpr[gsg$goodSamples, gsg$goodGenes]
+}
+
+## network constructions and modules
+
+names(dataExpr) = inputData$trait_names
+
+# Allow multi-threading within WGCNA
+enableWGCNAThreads()
+
+# choose softthreshhold (Calculate soft threshold)
+# xtodo allow users to pass args
+
+powers <- c(c(1:10), seq(from = 12, to=20, by=2))
+sft <- pickSoftThreshold(dataExpr, powerVector = powers, verbose = 5)
+
+# check the power estimate
+
+if (is.na(sft$powerEstimate)){
+ powerEst = 1
+}else{
+ powerEst = sft$powerEstimate
+}
+
+# pass user options
+network <- blockwiseModules(dataExpr,
+ #similarity matrix options
+ corType = inputData$corType,
+ #adjacency matrix options
+
+ power = powerEst,
+ networkType = "unsigned",
+ #TOM options
+ TOMtype = inputData$TOMtype,
+
+ #module indentification
+ verbose = 3,
+
+ minmodulesSize = inputData$minModuleSize,
+ deepSplit = 3,
+ PamRespectsDendro = FALSE
+ )
+
+
+
+genImageRandStr <- function(prefix){
+
+ randStr <- paste(prefix,stri_rand_strings(1, 9, pattern = "[A-Za-z0-9]"),sep="_")
+
+ return(paste(randStr,".png",sep=""))
+}
+
+mergedColors <- labels2colors(network$colors)
+
+imageLoc <- file.path(imgDir,genImageRandStr("WGCNAoutput"))
+png(imageLoc,width=1000,height=600,type='cairo-png')
+
+plotDendroAndColors(network$dendrograms[[1]],mergedColors[network$blockGenes[[1]]],
+"Module colors",
+dendroLabels = FALSE, hang = 0.03,
+addGuide = TRUE, guideHang = 0.05)
+
+
+json_data <- list(input = inputData,
+ output = list(ModEigens=network$MEs,
+ soft_threshold=sft$fitIndices,
+ blockGenes =network$blockGenes[[1]],
+ net_colors =network$colors,
+ power_used_for_analy=powerEst,
+ net_unmerged=network$unmergedColors,
+ imageLoc=imageLoc))
+
+json_data <- toJSON(json_data)
+
+write(json_data,file= json_file_path) \ No newline at end of file
diff --git a/scripts/wgcna_test_data.csv b/scripts/wgcna_test_data.csv
new file mode 100644
index 0000000..8db9598
--- /dev/null
+++ b/scripts/wgcna_test_data.csv
@@ -0,0 +1,9 @@
+129S1/SvImJ,A/J,AKR/J,B6D2F1,BALB/cByJ,BALB/cJ
+7.142,7.31,7.49,6.899,7.172,7.396
+7.071,7.05,7.313,6.999,7.293,7.117
+7.221,7.246,7.754,6.866,6.752,7.269
+9.221,9.246,9.954,6.866,6.952,9.269
+7.221,7.246,7.754,6.866,6.752,7.269
+7.221,7.246,7.754,6.866,6.752,7.269
+11.221,11.246,11.1154,6.866,6.1152,11.269
+
diff --git a/scripts/wgcna_test_data.json b/scripts/wgcna_test_data.json
new file mode 100644
index 0000000..1d469f4
--- /dev/null
+++ b/scripts/wgcna_test_data.json
@@ -0,0 +1,65 @@
+{
+ "trait_names":["1455537_at","1425637_at","1449593_at","1421945_a_at","1450423_s_at","1423841_at","1451144_at"],
+ "trait_sample_data":[
+ {
+ "129S1/SvImJ": 7.142,
+ "A/J": 7.31,
+ "AKR/J": 7.49,
+ "B6D2F1": 6.899,
+ "BALB/cByJ": 7.172,
+ "BALB/cJ": 7.396
+ },
+ {
+ "129S1/SvImJ": 7.071,
+ "A/J": 7.05,
+ "AKR/J": 7.313,
+ "B6D2F1": 6.999,
+ "BALB/cByJ": 7.293,
+ "BALB/cJ": 7.117
+ },
+ {
+ "129S1/SvImJ": 7.221,
+ "A/J": 7.246,
+ "AKR/J": 7.754,
+ "B6D2F1": 6.866,
+ "BALB/cByJ": 6.752,
+ "BALB/cJ": 7.269
+ },
+ {
+ "129S1/SvImJ": 9.221,
+ "A/J": 9.246,
+ "AKR/J": 9.954,
+ "B6D2F1": 6.866,
+ "BALB/cByJ": 6.952,
+ "BALB/cJ": 9.269
+ },
+ {
+ "129S1/SvImJ": 7.221,
+ "A/J": 7.246,
+ "AKR/J": 7.754,
+ "B6D2F1": 6.866,
+ "BALB/cByJ": 6.752,
+ "BALB/cJ": 7.269
+ },
+ {
+ "129S1/SvImJ": 7.221,
+ "A/J": 7.246,
+ "AKR/J": 7.754,
+ "B6D2F1": 6.866,
+ "BALB/cByJ": 6.752,
+ "BALB/cJ": 7.269
+ },
+ {
+ "129S1/SvImJ": 11.221,
+ "A/J": 11.246,
+ "AKR/J": 11.1154,
+ "B6D2F1": 6.866,
+ "BALB/cByJ": 6.1152,
+ "BALB/cJ": 11.269
+ }
+ ],
+ "TOMtype": "unsigned",
+ "minModuleSize": 30,
+ "corType": "pearson"
+
+}
diff --git a/setup.py b/setup.py
index 3f0922b..98a076f 100644
--- a/setup.py
+++ b/setup.py
@@ -20,6 +20,8 @@ setup(author='Bonface M. K.',
"requests==2.25.1"
"scipy==1.6.0"
"sqlalchemy-stubs==0.4"
+ "plotly==4.14.3"
+ "flask-cors==3.0.9"
],
license='GPLV3',
long_description=open('README.md').read(),
diff --git a/tests/integration/test_wgcna.py b/tests/integration/test_wgcna.py
new file mode 100644
index 0000000..078449d
--- /dev/null
+++ b/tests/integration/test_wgcna.py
@@ -0,0 +1,73 @@
+"""integration tests for wgcna"""
+
+from unittest import TestCase
+from unittest import mock
+
+from gn3.app import create_app
+
+
+class WgcnaIntegrationTest(TestCase):
+ """class contains wgcna integration tests"""
+
+ def setUp(self):
+ self.app = create_app().test_client()
+
+ @mock.patch("gn3.api.wgcna.call_wgcna_script")
+ def test_wgcna_endpoint(self, mock_wgcna_script):
+ """test /api/wgcna/run_wgcna endpoint"""
+
+ wgcna_output_data = {
+ "code": 0,
+ "output": "run script successfully",
+ "data": {
+ "ModEigens": {
+ "MEturquoise": [
+ 0.0646677768085351,
+ 0.137200224277058,
+ 0.63451113720732,
+ -0.544002665501479,
+ -0.489487590361863,
+ 0.197111117570427
+ ]
+ },
+ "net_colors": {
+ "X1": "turquoise",
+ "X2": "turquoise",
+ "X3": "turquoise",
+ "X4": "turquoise"
+ },
+ "imageLoc": "/WGCNAoutput_1uujpTIpC.png"
+ }
+ }
+
+ request_data = {
+ "trait_names": [
+ "1455537_at",
+ "1425637_at"
+ ],
+ "trait_sample_data": [
+ {
+ "129S1/SvImJ": 6.142,
+ "A/J": 5.31,
+ "AKR/J": 3.49,
+ "B6D2F1": 2.899,
+ "BALB/cByJ": 1.172,
+ "BALB/cJ": 7.396
+ },
+ {
+ "129S1/SvImJ": 1.42,
+ "A/J": 2.31,
+ "AKR/J": 5.49,
+ "B6D2F1": 3.899,
+ "BALB/cByJ": 1.172,
+ "BALB/cJ": 7.396
+ }
+ ]
+ }
+ mock_wgcna_script.return_value = wgcna_output_data
+
+ response = self.app.post("/api/wgcna/run_wgcna",
+ json=request_data, follow_redirects=True)
+
+ self.assertEqual(response.status_code, 200)
+ self.assertEqual(response.get_json(), wgcna_output_data)
diff --git a/tests/unit/computations/data/qtlreaper/main_output_sample.txt b/tests/unit/computations/data/qtlreaper/main_output_sample.txt
new file mode 100644
index 0000000..12b11b4
--- /dev/null
+++ b/tests/unit/computations/data/qtlreaper/main_output_sample.txt
@@ -0,0 +1,11 @@
+ID Locus Chr cM Mb LRS Additive pValue
+T1 rs31443144 1 1.500 3.010 0.500 -0.074 1.000
+T1 rs6269442 1 1.500 3.492 0.500 -0.074 1.000
+T1 rs32285189 1 1.630 3.511 0.500 -0.074 1.000
+T1 rs258367496 1 1.630 3.660 0.500 -0.074 1.000
+T1 rs32430919 1 1.750 3.777 0.500 -0.074 1.000
+T1 rs36251697 1 1.880 3.812 0.500 -0.074 1.000
+T1 rs30658298 1 2.010 4.431 0.500 -0.074 1.000
+T1 rs51852623 1 2.010 4.447 0.500 -0.074 1.000
+T1 rs31879829 1 2.140 4.519 0.500 -0.074 1.000
+T1 rs36742481 1 2.140 4.776 0.500 -0.074 1.000
diff --git a/tests/unit/computations/data/qtlreaper/permu_output_sample.txt b/tests/unit/computations/data/qtlreaper/permu_output_sample.txt
new file mode 100644
index 0000000..64cff07
--- /dev/null
+++ b/tests/unit/computations/data/qtlreaper/permu_output_sample.txt
@@ -0,0 +1,27 @@
+4.44174
+5.03825
+5.08167
+5.18119
+5.18578
+5.24563
+5.24619
+5.24619
+5.27961
+5.28228
+5.43903
+5.50188
+5.51694
+5.56830
+5.63874
+5.71346
+5.71936
+5.74275
+5.76764
+5.79815
+5.81671
+5.82775
+5.89659
+5.92117
+5.93396
+5.93396
+5.94957
diff --git a/tests/unit/computations/test_parsers.py b/tests/unit/computations/test_parsers.py
index 19c3067..b51b0bf 100644
--- a/tests/unit/computations/test_parsers.py
+++ b/tests/unit/computations/test_parsers.py
@@ -15,7 +15,7 @@ class TestParsers(unittest.TestCase):
def test_parse_genofile_with_existing_file(self):
"""Test that a genotype file is parsed correctly"""
- strains = ["bxd1", "bxd2"]
+ samples = ["bxd1", "bxd2"]
genotypes = [
{"chr": "1", "locus": "rs31443144",
"cm": "1.50", "mb": "3.010274",
@@ -51,4 +51,4 @@ class TestParsers(unittest.TestCase):
"../test_data/genotype.txt"
))
self.assertEqual(parse_genofile(
- test_genotype_file), (strains, genotypes))
+ test_genotype_file), (samples, genotypes))
diff --git a/tests/unit/computations/test_qtlreaper.py b/tests/unit/computations/test_qtlreaper.py
new file mode 100644
index 0000000..742d106
--- /dev/null
+++ b/tests/unit/computations/test_qtlreaper.py
@@ -0,0 +1,135 @@
+"""Module contains tests for gn3.computations.qtlreaper"""
+from unittest import TestCase
+from gn3.computations.qtlreaper import (
+ parse_reaper_main_results,
+ organise_reaper_main_results,
+ parse_reaper_permutation_results)
+from tests.unit.sample_test_data import organised_trait_1
+
+class TestQTLReaper(TestCase):
+ """Class for testing qtlreaper interface functions."""
+
+ def test_parse_reaper_main_results(self):
+ """Test that the main results file is parsed correctly."""
+ self.assertEqual(
+ parse_reaper_main_results(
+ "tests/unit/computations/data/qtlreaper/main_output_sample.txt"),
+ [
+ {
+ "ID": "T1", "Locus": "rs31443144", "Chr": 1, "cM": 1.500,
+ "Mb": 3.010, "LRS": 0.500, "Additive": -0.074,
+ "pValue": 1.000
+ },
+ {
+ "ID": "T1", "Locus": "rs6269442", "Chr": 1, "cM": 1.500,
+ "Mb": 3.492, "LRS": 0.500, "Additive": -0.074,
+ "pValue": 1.000
+ },
+ {
+ "ID": "T1", "Locus": "rs32285189", "Chr": 1, "cM": 1.630,
+ "Mb": 3.511, "LRS": 0.500, "Additive": -0.074,
+ "pValue": 1.000
+ },
+ {
+ "ID": "T1", "Locus": "rs258367496", "Chr": 1, "cM": 1.630,
+ "Mb": 3.660, "LRS": 0.500, "Additive": -0.074,
+ "pValue": 1.000
+ },
+ {
+ "ID": "T1", "Locus": "rs32430919", "Chr": 1, "cM": 1.750,
+ "Mb": 3.777, "LRS": 0.500, "Additive": -0.074,
+ "pValue": 1.000
+ },
+ {
+ "ID": "T1", "Locus": "rs36251697", "Chr": 1, "cM": 1.880,
+ "Mb": 3.812, "LRS": 0.500, "Additive": -0.074,
+ "pValue": 1.000
+ },
+ {
+ "ID": "T1", "Locus": "rs30658298", "Chr": 1, "cM": 2.010,
+ "Mb": 4.431, "LRS": 0.500, "Additive": -0.074,
+ "pValue": 1.000
+ },
+ {
+ "ID": "T1", "Locus": "rs51852623", "Chr": 1, "cM": 2.010,
+ "Mb": 4.447, "LRS": 0.500, "Additive": -0.074,
+ "pValue": 1.000
+ },
+ {
+ "ID": "T1", "Locus": "rs31879829", "Chr": 1, "cM": 2.140,
+ "Mb": 4.519, "LRS": 0.500, "Additive": -0.074,
+ "pValue": 1.000
+ },
+ {
+ "ID": "T1", "Locus": "rs36742481", "Chr": 1, "cM": 2.140,
+ "Mb": 4.776, "LRS": 0.500, "Additive": -0.074,
+ "pValue": 1.000
+ }
+ ])
+
+ def test_parse_reaper_permutation_results(self):
+ """Test that the permutations results file is parsed correctly."""
+ self.assertEqual(
+ parse_reaper_permutation_results(
+ "tests/unit/computations/data/qtlreaper/permu_output_sample.txt"),
+ [4.44174, 5.03825, 5.08167, 5.18119, 5.18578, 5.24563, 5.24619,
+ 5.24619, 5.27961, 5.28228, 5.43903, 5.50188, 5.51694, 5.56830,
+ 5.63874, 5.71346, 5.71936, 5.74275, 5.76764, 5.79815, 5.81671,
+ 5.82775, 5.89659, 5.92117, 5.93396, 5.93396, 5.94957])
+
+ def test_organise_reaper_main_results(self):
+ """Check that results are organised correctly."""
+ self.assertEqual(
+ organise_reaper_main_results([
+ {
+ "ID": "1", "Locus": "rs31443144", "Chr": 1, "cM": 1.500,
+ "Mb": 3.010, "LRS": 0.500, "Additive": -0.074,
+ "pValue": 1.000
+ },
+ {
+ "ID": "1", "Locus": "rs6269442", "Chr": 1, "cM": 1.500,
+ "Mb": 3.492, "LRS": 0.500, "Additive": -0.074,
+ "pValue": 1.000
+ },
+ {
+ "ID": "1", "Locus": "rs32285189", "Chr": 1, "cM": 1.630,
+ "Mb": 3.511, "LRS": 0.500, "Additive": -0.074,
+ "pValue": 1.000
+ },
+ {
+ "ID": "1", "Locus": "rs258367496", "Chr": 1, "cM": 1.630,
+ "Mb": 3.660, "LRS": 0.500, "Additive": -0.074,
+ "pValue": 1.000
+ },
+ {
+ "ID": "1", "Locus": "rs32430919", "Chr": 1, "cM": 1.750,
+ "Mb": 3.777, "LRS": 0.500, "Additive": -0.074,
+ "pValue": 1.000
+ },
+ {
+ "ID": "1", "Locus": "rs36251697", "Chr": 1, "cM": 1.880,
+ "Mb": 3.812, "LRS": 0.500, "Additive": -0.074,
+ "pValue": 1.000
+ },
+ {
+ "ID": "1", "Locus": "rs30658298", "Chr": 1, "cM": 2.010,
+ "Mb": 4.431, "LRS": 0.500, "Additive": -0.074,
+ "pValue": 1.000
+ },
+ {
+ "ID": "1", "Locus": "rs51852623", "Chr": 2, "cM": 2.010,
+ "Mb": 4.447, "LRS": 0.500, "Additive": -0.074,
+ "pValue": 1.000
+ },
+ {
+ "ID": "1", "Locus": "rs31879829", "Chr": 2, "cM": 2.140,
+ "Mb": 4.519, "LRS": 0.500, "Additive": -0.074,
+ "pValue": 1.000
+ },
+ {
+ "ID": "1", "Locus": "rs36742481", "Chr": 2, "cM": 2.140,
+ "Mb": 4.776, "LRS": 0.500, "Additive": -0.074,
+ "pValue": 1.000
+ }
+ ]),
+ organised_trait_1)
diff --git a/tests/unit/computations/test_wgcna.py b/tests/unit/computations/test_wgcna.py
new file mode 100644
index 0000000..ec81d94
--- /dev/null
+++ b/tests/unit/computations/test_wgcna.py
@@ -0,0 +1,160 @@
+"""module contains python code for wgcna"""
+from unittest import TestCase
+from unittest import mock
+
+from gn3.computations.wgcna import dump_wgcna_data
+from gn3.computations.wgcna import compose_wgcna_cmd
+from gn3.computations.wgcna import call_wgcna_script
+
+
+class TestWgcna(TestCase):
+ """test class for wgcna"""
+
+ @mock.patch("gn3.computations.wgcna.run_cmd")
+ @mock.patch("gn3.computations.wgcna.compose_wgcna_cmd")
+ @mock.patch("gn3.computations.wgcna.dump_wgcna_data")
+ def test_call_wgcna_script(self,
+ mock_dumping_data,
+ mock_compose_wgcna,
+ mock_run_cmd):
+ """test for calling wgcna script"""
+
+ # pylint: disable = line-too-long
+ mock_dumping_data.return_value = "/tmp/QmQPeNsJPyVWPFDVHb77w8G42Fvo15z4bG2X8D2GhfbSXc-test.json"
+
+ mock_compose_wgcna.return_value = "Rscript/GUIX_PATH/scripts/r_file.R /tmp/QmQPeNsJPyVWPFDVHb77w8G42Fvo15z4bG2X8D2GhfbSXc-test.json"
+
+ request_data = {
+ "trait_names": ["1455537_at", "1425637_at", "1449593_at", "1421945_a_at", "1450423_s_at", "1423841_at", "1451144_at"],
+ "trait_sample_data": [
+ {
+ "129S1/SvImJ": 7.142,
+ "A/J": 7.31,
+ "AKR/J": 7.49,
+ "B6D2F1": 6.899,
+ "BALB/cByJ": 7.172,
+ "BALB/cJ": 7.396
+ },
+ {
+ "129S1/SvImJ": 7.071,
+ "A/J": 7.05,
+ "AKR/J": 7.313,
+ "B6D2F1": 6.999,
+ "BALB/cByJ": 7.293,
+ "BALB/cJ": 7.117
+ }]}
+
+ mock_run_cmd_results = {
+
+ "code": 0,
+ "output": "Flagging genes and samples with too many missing values...\n ..step 1\nAllowing parallel execution with up to 3 working processes.\npickSoftThreshold: will use block size 7.\n pickSoftThreshold: calculating connectivity for given powers...\n ..working on genes 1 through 7 of 7\n Flagging genes and samples with too many missing values...\n ..step 1\n ..Working on block 1 .\n TOM calculation: adjacency..\n ..will not use multithreading.\nclustering..\n ....detecting modules..\n ....calculating module eigengenes..\n ....checking kME in modules..\n ..merging modules that are too close..\n mergeCloseModules: Merging modules whose distance is less than 0.15\n mergeCloseModules: less than two proper modules.\n ..color levels are turquoise\n ..there is nothing to merge.\n Calculating new MEs...\n"
+ }
+
+ json_output = "{\"inputdata\":{\"trait_sample_data \":{},\"minModuleSize\":30,\"TOMtype\":\"unsigned\"},\"outputdata\":{\"eigengenes\":[],\"colors\":[]}}"
+
+ expected_output = {
+
+ "data": {
+ "inputdata": {
+ "trait_sample_data ": {},
+ "minModuleSize": 30,
+ "TOMtype": "unsigned"
+ },
+
+ "outputdata": {
+ "eigengenes": [],
+ "colors": []
+ }
+ },
+
+ **mock_run_cmd_results
+
+ }
+
+ with mock.patch("builtins.open", mock.mock_open(read_data=json_output)):
+
+ mock_run_cmd.return_value = mock_run_cmd_results
+
+ results = call_wgcna_script(
+ "Rscript/GUIX_PATH/scripts/r_file.R", request_data)
+
+ mock_dumping_data.assert_called_once_with(request_data)
+
+ mock_compose_wgcna.assert_called_once_with(
+ "Rscript/GUIX_PATH/scripts/r_file.R",
+ "/tmp/QmQPeNsJPyVWPFDVHb77w8G42Fvo15z4bG2X8D2GhfbSXc-test.json")
+
+ mock_run_cmd.assert_called_once_with(
+ "Rscript/GUIX_PATH/scripts/r_file.R /tmp/QmQPeNsJPyVWPFDVHb77w8G42Fvo15z4bG2X8D2GhfbSXc-test.json")
+
+ self.assertEqual(results, expected_output)
+
+ @mock.patch("gn3.computations.wgcna.run_cmd")
+ @mock.patch("gn3.computations.wgcna.compose_wgcna_cmd")
+ @mock.patch("gn3.computations.wgcna.dump_wgcna_data")
+ def test_call_wgcna_script_fails(self, mock_dumping_data, mock_compose_wgcna, mock_run_cmd):
+ """test for calling wgcna script\
+ fails and generates the expected error"""
+ # pylint: disable = line-too-long,
+ mock_dumping_data.return_value = "/tmp/QmQPeNsJPyVWPFDVHb77w8G42Fvo15z4bG2X8D2GhfbSXc-test.json"
+
+ mock_compose_wgcna.return_value = "Rscript/GUIX_PATH/scripts/r_file.R /tmp/QmQPeNsJPyVWPFDVHb77w8G42Fvo15z4bG2X8D2GhfbSXc-test.json"
+
+ expected_error = {
+ "code": 2,
+ "output": "could not read the json file"
+ }
+
+ with mock.patch("builtins.open", mock.mock_open(read_data="")):
+
+ mock_run_cmd.return_value = expected_error
+ self.assertEqual(call_wgcna_script(
+ "input_file.R", ""), expected_error)
+
+ def test_compose_wgcna_cmd(self):
+ """test for composing wgcna cmd"""
+ wgcna_cmd = compose_wgcna_cmd(
+ "wgcna.r", "/tmp/wgcna.json")
+ self.assertEqual(
+ wgcna_cmd, "Rscript ./scripts/wgcna.r /tmp/wgcna.json")
+
+ @mock.patch("gn3.computations.wgcna.TMPDIR", "/tmp")
+ @mock.patch("gn3.computations.wgcna.uuid.uuid4")
+ def test_create_json_file(self, file_name_generator):
+ """test for writing the data to a csv file"""
+ # # All the traits we have data for (should not contain duplicates)
+ # All the strains we have data for (contains duplicates)
+
+ trait_sample_data = {"1425642_at": {"129S1/SvImJ": 7.142,
+ "A/J": 7.31, "AKR/J": 7.49,
+ "B6D2F1": 6.899, "BALB/cByJ": 7.172,
+ "BALB/cJ": 7.396},
+ "1457784_at": {"129S1/SvImJ": 7.071, "A/J": 7.05,
+ "AKR/J": 7.313,
+ "B6D2F1": 6.999, "BALB/cByJ": 7.293,
+ "BALB/cJ": 7.117},
+ "1444351_at": {"129S1/SvImJ": 7.221, "A/J": 7.246,
+ "AKR/J": 7.754,
+ "B6D2F1": 6.866, "BALB/cByJ": 6.752,
+ "BALB/cJ": 7.269}
+
+ }
+
+ expected_input = {
+ "trait_sample_data": trait_sample_data,
+ "TOMtype": "unsigned",
+ "minModuleSize": 30
+ }
+
+ with mock.patch("builtins.open", mock.mock_open()) as file_handler:
+
+ file_name_generator.return_value = "facb73ff-7eef-4053-b6ea-e91d3a22a00c"
+
+ results = dump_wgcna_data(
+ expected_input)
+
+ file_handler.assert_called_once_with(
+ "/tmp/facb73ff-7eef-4053-b6ea-e91d3a22a00c.json", 'w')
+
+ self.assertEqual(
+ results, "/tmp/facb73ff-7eef-4053-b6ea-e91d3a22a00c.json")
diff --git a/tests/unit/db/data/genotypes/genotype_sample1.geno b/tests/unit/db/data/genotypes/genotype_sample1.geno
new file mode 100644
index 0000000..2a55964
--- /dev/null
+++ b/tests/unit/db/data/genotypes/genotype_sample1.geno
@@ -0,0 +1,23 @@
+# File name: genotype_sample for testing
+
+# Metadata: Please retain this header information with file.
+
+
+@name: BXD
+@type: riset
+@mat: B
+@pat: D
+@het:H
+@unk: U
+
+
+
+
+
+
+Chr Locus cM Mb BXD1 BXD2 BXD5 BXD6 BXD8 BXD9
+1 rs31443144 1.50 3.010274 B B D D D B
+1 rs6269442 1.50 3.492195 B B D D H Y
+1 rs32285189 1.63 3.511204 B U D D D B
+2 rs31443144 1.50 3.010274 B B D D D B
+2 rs6269442 1.50 3.492195 B B D D H Y \ No newline at end of file
diff --git a/tests/unit/db/test_datasets.py b/tests/unit/db/test_datasets.py
index 38de0e2..39f4af9 100644
--- a/tests/unit/db/test_datasets.py
+++ b/tests/unit/db/test_datasets.py
@@ -3,10 +3,10 @@
from unittest import mock, TestCase
from gn3.db.datasets import (
retrieve_dataset_name,
- retrieve_riset_fields,
- retrieve_geno_riset_fields,
- retrieve_publish_riset_fields,
- retrieve_probeset_riset_fields)
+ retrieve_group_fields,
+ retrieve_geno_group_fields,
+ retrieve_publish_group_fields,
+ retrieve_probeset_group_fields)
class TestDatasetsDBFunctions(TestCase):
"""Test cases for datasets functions."""
@@ -40,9 +40,9 @@ class TestDatasetsDBFunctions(TestCase):
table=table, cols=columns),
{"threshold": thresh, "name": dataset_name})
- def test_retrieve_probeset_riset_fields(self):
+ def test_retrieve_probeset_group_fields(self):
"""
- Test that the `riset` and `riset_id` fields are retrieved appropriately
+ Test that the `group` and `group_id` fields are retrieved appropriately
for the 'ProbeSet' trait type.
"""
for trait_name, expected in [
@@ -52,7 +52,7 @@ class TestDatasetsDBFunctions(TestCase):
with db_mock.cursor() as cursor:
cursor.execute.return_value = ()
self.assertEqual(
- retrieve_probeset_riset_fields(trait_name, db_mock),
+ retrieve_probeset_group_fields(trait_name, db_mock),
expected)
cursor.execute.assert_called_once_with(
(
@@ -63,34 +63,34 @@ class TestDatasetsDBFunctions(TestCase):
" AND ProbeSetFreeze.Name = %(name)s"),
{"name": trait_name})
- def test_retrieve_riset_fields(self):
+ def test_retrieve_group_fields(self):
"""
- Test that the riset fields are set up correctly for the different trait
+ Test that the group fields are set up correctly for the different trait
types.
"""
for trait_type, trait_name, dataset_info, expected in [
["Publish", "pubTraitName01", {"dataset_name": "pubDBName01"},
- {"dataset_name": "pubDBName01", "riset": ""}],
+ {"dataset_name": "pubDBName01", "group": ""}],
["ProbeSet", "prbTraitName01", {"dataset_name": "prbDBName01"},
- {"dataset_name": "prbDBName01", "riset": ""}],
+ {"dataset_name": "prbDBName01", "group": ""}],
["Geno", "genoTraitName01", {"dataset_name": "genoDBName01"},
- {"dataset_name": "genoDBName01", "riset": ""}],
- ["Temp", "tempTraitName01", {}, {"riset": ""}],
+ {"dataset_name": "genoDBName01", "group": ""}],
+ ["Temp", "tempTraitName01", {}, {"group": ""}],
]:
db_mock = mock.MagicMock()
with self.subTest(
trait_type=trait_type, trait_name=trait_name,
dataset_info=dataset_info):
with db_mock.cursor() as cursor:
- cursor.execute.return_value = ("riset_name", 0)
+ cursor.execute.return_value = ("group_name", 0)
self.assertEqual(
- retrieve_riset_fields(
+ retrieve_group_fields(
trait_type, trait_name, dataset_info, db_mock),
expected)
- def test_retrieve_publish_riset_fields(self):
+ def test_retrieve_publish_group_fields(self):
"""
- Test that the `riset` and `riset_id` fields are retrieved appropriately
+ Test that the `group` and `group_id` fields are retrieved appropriately
for the 'Publish' trait type.
"""
for trait_name, expected in [
@@ -100,7 +100,7 @@ class TestDatasetsDBFunctions(TestCase):
with db_mock.cursor() as cursor:
cursor.execute.return_value = ()
self.assertEqual(
- retrieve_publish_riset_fields(trait_name, db_mock),
+ retrieve_publish_group_fields(trait_name, db_mock),
expected)
cursor.execute.assert_called_once_with(
(
@@ -110,9 +110,9 @@ class TestDatasetsDBFunctions(TestCase):
" AND PublishFreeze.Name = %(name)s"),
{"name": trait_name})
- def test_retrieve_geno_riset_fields(self):
+ def test_retrieve_geno_group_fields(self):
"""
- Test that the `riset` and `riset_id` fields are retrieved appropriately
+ Test that the `group` and `group_id` fields are retrieved appropriately
for the 'Geno' trait type.
"""
for trait_name, expected in [
@@ -122,7 +122,7 @@ class TestDatasetsDBFunctions(TestCase):
with db_mock.cursor() as cursor:
cursor.execute.return_value = ()
self.assertEqual(
- retrieve_geno_riset_fields(trait_name, db_mock),
+ retrieve_geno_group_fields(trait_name, db_mock),
expected)
cursor.execute.assert_called_once_with(
(
diff --git a/tests/unit/db/test_genotypes.py b/tests/unit/db/test_genotypes.py
new file mode 100644
index 0000000..c125224
--- /dev/null
+++ b/tests/unit/db/test_genotypes.py
@@ -0,0 +1,170 @@
+"""Tests gn3.db.genotypes"""
+from unittest import TestCase
+from gn3.db.genotypes import (
+ parse_genotype_file,
+ parse_genotype_labels,
+ parse_genotype_header,
+ parse_genotype_marker,
+ build_genotype_chromosomes)
+
+class TestGenotypes(TestCase):
+ """Tests for functions in `gn3.db.genotypes`."""
+
+ def test_parse_genotype_labels(self):
+ """Test that the genotype labels are parsed correctly."""
+ self.assertEqual(
+ parse_genotype_labels([
+ "@name: test_group\t", "@filler: test_filler ",
+ "@type:test_type", "@mat:test_mat \t", "@pat:test_pat ",
+ "@het: test_het ", "@unk: test_unk", "@other: test_other",
+ "@brrr: test_brrr "]),
+ (("group", "test_group"), ("filler", "test_filler"),
+ ("type", "test_type"), ("mat", "test_mat"), ("pat", "test_pat"),
+ ("het", "test_het"), ("unk", "test_unk")))
+
+ def test_parse_genotype_header(self):
+ """Test that the genotype header is parsed correctly."""
+ for header, expected in [
+ [("Chr\tLocus\tcM\tMb\tBXD1\tBXD2\tBXD5\tBXD6\tBXD8\tBXD9\t"
+ "BXD11\tBXD12\tBXD13\tBXD14\tBXD15\tBXD16\tBXD18\tBXD19"),
+ (("Mbmap", True), ("cm_column", 2), ("mb_column", 3),
+ ("prgy",
+ ("BXD1", "BXD2", "BXD5", "BXD6", "BXD8", "BXD9", "BXD11",
+ "BXD12", "BXD13", "BXD14", "BXD15", "BXD16", "BXD18",
+ "BXD19")),
+ ("nprgy", 14))],
+ [("Chr\tLocus\tcM\tBXD1\tBXD2\tBXD5\tBXD6\tBXD8\tBXD9\tBXD11"
+ "\tBXD12\tBXD13\tBXD14\tBXD15\tBXD16\tBXD18"),
+ (("Mbmap", False), ("cm_column", 2), ("mb_column", None),
+ ("prgy",
+ ("BXD1", "BXD2", "BXD5", "BXD6", "BXD8", "BXD9", "BXD11",
+ "BXD12", "BXD13", "BXD14", "BXD15", "BXD16", "BXD18")),
+ ("nprgy", 13))]]:
+ with self.subTest(header=header):
+ self.assertEqual(parse_genotype_header(header), expected)
+
+ def test_parse_genotype_data_line(self):
+ """Test parsing of data lines."""
+ for line, geno_obj, parlist, expected in [
+ ["1\trs31443144\t1.50\t3.010274\tB\tB\tD\tD\tD\tB\tB\tD\tB\tB",
+ {"mat": "test_mat", "pat": "test_pat", "het": "test_het",
+ "unk": "test_unk", "cm_column": 2, "Mbmap": True,
+ "mb_column": 3},
+ tuple(),
+ (("chr", "1"), ("name", "rs31443144"), ("cM", 2.0),
+ ("Mb", 3.0),
+ ("genotype",
+ ("U", "U", "U", "U", "U", "U", "U", "U", "U", "U")))],
+ ["1\trs31443144\t1.50\t3.010274\tB\tB\tD\tD\tD\tB\tB\tD\tB\tB",
+ {"mat": "test_mat", "pat": "test_pat", "het": "test_het",
+ "unk": "test_unk", "cm_column": 2, "Mbmap": True,
+ "mb_column": 3},
+ ("some", "parlist", "content"),
+ (("chr", "1"), ("name", "rs31443144"), ("cM", 2.0),
+ ("Mb", 3.0),
+ ("genotype",
+ (-1, 1, "U", "U", "U", "U", "U", "U", "U", "U")))],
+ ["1\trs31443144\t1.50\t3.010274\tB\tB\tD\tH\tD\tB\tU\tD\tB\tB",
+ {"mat": "B", "pat": "D", "het": "H", "unk": "U",
+ "cm_column": 2, "Mbmap": True, "mb_column": 3},
+ tuple(),
+ (("chr", "1"), ("name", "rs31443144"), ("cM", 2.0),
+ ("Mb", 3.0),
+ ("genotype", (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1)))]]:
+ with self.subTest(line=line):
+ self.assertEqual(
+ parse_genotype_marker(line, geno_obj, parlist),
+ expected)
+
+ def test_build_genotype_chromosomes(self):
+ """
+ Given `markers` and `geno_obj`, test that `build_genotype_chromosomes`
+ builds a sequence of chromosomes with the given markers ordered
+ according to the `chr` value."""
+ for markers, geno_obj, expected in [
+ [[(("chr", "1"), ("name", "rs31443144"), ("cM", 2.0),
+ ("Mb", 3.0),
+ ("genotype", (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1))),
+ (("chr", "2"), ("name", "rs31443144"), ("cM", 2.0),
+ ("Mb", 3.0),
+ ("genotype", (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1)))],
+ {"mat": "B", "pat": "D", "het": "H", "unk": "U",
+ "cm_column": 2, "Mbmap": True, "mb_column": 3},
+ ((("name", "1"), ("mb_exists", True), ("cm_column", 2),
+ ("mb_column", 3),
+ ("loci",
+ ({"chr": "1", "name": "rs31443144", "cM": 2.0, "Mb": 3.0,
+ "genotype": (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1)},))),
+ (("name", "2"), ("mb_exists", True), ("cm_column", 2),
+ ("mb_column", 3),
+ ("loci",
+ ({"chr": "2", "name": "rs31443144", "cM": 2.0, "Mb": 3.0,
+ "genotype": (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1)},))))],
+ [[(("chr", "1"), ("name", "rs31443144"), ("cM", 2.0),
+ ("Mb", None),
+ ("genotype", (-1, 1, 1, 0, 1, -1, "U", 1, -1, -1)))],
+ {"mat": "B", "pat": "D", "het": "H", "unk": "U",
+ "cm_column": 2, "Mbmap": False, "mb_column": None},
+ ((("name", "1"), ("mb_exists", False), ("cm_column", 2),
+ ("mb_column", None),
+ ("loci",
+ ({"chr": "1", "name": "rs31443144", "cM": 2.0, "Mb": None,
+ "genotype": (-1, 1, 1, 0, 1, -1, "U", 1, -1, -1)},))),)]]:
+ with self.subTest(markers=markers):
+ self.assertEqual(
+ build_genotype_chromosomes(geno_obj, markers),
+ expected)
+
+ def test_parse_genotype_file(self):
+ """Test the parsing of genotype files. """
+ self.assertEqual(
+ parse_genotype_file(
+ "tests/unit/db/data/genotypes/genotype_sample1.geno"),
+ {"group": "BXD",
+ "type": "riset",
+ "mat": "B",
+ "pat": "D",
+ "het": "H",
+ "unk": "U",
+ "Mbmap": True,
+ "cm_column": 2,
+ "mb_column": 3,
+ "prgy": ("BXD1", "BXD2", "BXD5", "BXD6", "BXD8", "BXD9"),
+ "nprgy": 6,
+ "chromosomes": (
+ {"name": "1",
+ "mb_exists": True,
+ "cm_column": 2,
+ "mb_column": 3,
+ "loci": (
+ {"chr": "1",
+ "name": "rs31443144",
+ "cM": 2.0,
+ "Mb": 3.0,
+ "genotype": (-1, -1, 1, 1, 1, -1)
+ },
+ {"chr": "1",
+ "name": "rs6269442",
+ "cM": 2.0,
+ "Mb": 3.0,
+ "genotype": (-1, -1, 1, 1, 0, "U")},
+ {"chr": "1",
+ "name": "rs32285189",
+ "cM": 2.0,
+ "Mb": 3.0,
+ "genotype": (-1, "U", 1, 1, 1, -1)})},
+ {"name": "2",
+ "mb_exists": True,
+ "cm_column": 2,
+ "mb_column": 3,
+ "loci": (
+ {"chr": "2",
+ "name": "rs31443144",
+ "cM": 2.0,
+ "Mb": 3.0,
+ "genotype": (-1, -1, 1, 1, 1, -1)},
+ {"chr": "2",
+ "name": "rs6269442",
+ "cM": 2.0,
+ "Mb": 3.0,
+ "genotype": (-1, -1, 1, 1, 0, "U")})})})
diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py
index ee98893..8af8e82 100644
--- a/tests/unit/db/test_traits.py
+++ b/tests/unit/db/test_traits.py
@@ -166,15 +166,19 @@ class TestTraitsDBFunctions(TestCase):
the right calls.
"""
+ # pylint: disable=C0103
db_mock = mock.MagicMock()
STRAIN_ID_SQL: str = "UPDATE Strain SET Name = %s WHERE Id = %s"
- PUBLISH_DATA_SQL: str = ("UPDATE PublishData SET value = %s "
- "WHERE StrainId = %s AND Id = %s")
- PUBLISH_SE_SQL: str = ("UPDATE PublishSE SET error = %s "
- "WHERE StrainId = %s AND DataId = %s")
- N_STRAIN_SQL: str = ("UPDATE NStrain SET count = %s "
- "WHERE StrainId = %s AND DataId = %s")
+ PUBLISH_DATA_SQL: str = (
+ "UPDATE PublishData SET value = %s "
+ "WHERE StrainId = %s AND Id = %s")
+ PUBLISH_SE_SQL: str = (
+ "UPDATE PublishSE SET error = %s "
+ "WHERE StrainId = %s AND DataId = %s")
+ N_STRAIN_SQL: str = (
+ "UPDATE NStrain SET count = %s "
+ "WHERE StrainId = %s AND DataId = %s")
with db_mock.cursor() as cursor:
type(cursor).rowcount = 1
diff --git a/tests/unit/sample_test_data.py b/tests/unit/sample_test_data.py
new file mode 100644
index 0000000..407d074
--- /dev/null
+++ b/tests/unit/sample_test_data.py
@@ -0,0 +1,111 @@
+"""
+This module holds a collection of sample data variables, used in more than one
+ test.
+
+This is mostly to avoid the `duplicate-code` pylint error that gets raised if
+the same data is defined in more than one file. It has been found that adding
+the `# pylint: disable=R0801` or `# pylint: disable=duplicate-code` to the top
+of the file seems to not work as expected.
+
+Adding these same declarations to .pylintrc is not an option, since that,
+seemingly, would deactivate the warnings for all code in the project: We do not
+want that.
+"""
+
+organised_trait_1 = {
+ "1": {
+ "ID": "1",
+ "chromosomes": {
+ 1: {"Chr": 1,
+ "loci": [
+ {
+ "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010,
+ "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+ },
+ {
+ "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492,
+ "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+ },
+ {
+ "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511,
+ "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+ },
+ {
+ "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660,
+ "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+ },
+ {
+ "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777,
+ "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+ },
+ {
+ "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812,
+ "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+ },
+ {
+ "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431,
+ "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+ }]},
+ 2: {"Chr": 2,
+ "loci": [
+ {
+ "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447,
+ "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+ },
+ {
+ "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519,
+ "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+ },
+ {
+ "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776,
+ "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+ }]}}}}
+
+organised_trait_2 = {
+ "2": {
+ "ID": "2",
+ "chromosomes": {
+ 1: {"Chr": 1,
+ "loci": [
+ {
+ "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010,
+ "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+ },
+ {
+ "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492,
+ "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+ },
+ {
+ "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511,
+ "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+ },
+ {
+ "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660,
+ "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+ },
+ {
+ "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777,
+ "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+ },
+ {
+ "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812,
+ "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+ },
+ {
+ "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431,
+ "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+ }]},
+ 2: {"Chr": 2,
+ "loci": [
+ {
+ "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447,
+ "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+ },
+ {
+ "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519,
+ "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+ },
+ {
+ "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776,
+ "LRS": 0.579, "Additive": -0.074, "pValue": 1.000
+ }]}}}}
diff --git a/tests/unit/computations/test_heatmap.py b/tests/unit/test_heatmaps.py
index 650cb45..7b66688 100644
--- a/tests/unit/computations/test_heatmap.py
+++ b/tests/unit/test_heatmaps.py
@@ -1,38 +1,56 @@
-"""Module contains tests for gn3.computations.heatmap"""
+"""Module contains tests for gn3.heatmaps.heatmaps"""
from unittest import TestCase
-from gn3.computations.heatmap import cluster_traits, export_trait_data
+from gn3.heatmaps import (
+ cluster_traits,
+ get_loci_names,
+ get_lrs_from_chr,
+ export_trait_data,
+ compute_traits_order,
+ retrieve_samples_and_values,
+ process_traits_data_for_heatmap)
+from tests.unit.sample_test_data import organised_trait_1, organised_trait_2
-strainlist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
+samplelist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
trait_data = {
"mysqlid": 36688172,
"data": {
- "B6cC3-1": {"strain_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None},
- "BXD1": {"strain_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None},
- "BXD12": {"strain_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None},
- "BXD16": {"strain_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None},
- "BXD19": {"strain_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None},
- "BXD2": {"strain_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None},
- "BXD21": {"strain_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None},
- "BXD24": {"strain_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None},
- "BXD27": {"strain_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None},
- "BXD28": {"strain_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None},
- "BXD32": {"strain_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None},
- "BXD39": {"strain_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None},
- "BXD40": {"strain_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None},
- "BXD42": {"strain_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None},
- "BXD6": {"strain_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None},
- "BXH14": {"strain_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None},
- "BXH19": {"strain_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None},
- "BXH2": {"strain_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None},
- "BXH22": {"strain_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None},
- "BXH4": {"strain_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None},
- "BXH6": {"strain_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None},
- "BXH7": {"strain_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None},
- "BXH8": {"strain_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None},
- "BXH9": {"strain_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None},
- "C3H/HeJ": {"strain_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None},
- "C57BL/6J": {"strain_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None},
- "DBA/2J": {"strain_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
+ "B6cC3-1": {"sample_name": "B6cC3-1", "value": 7.51879, "variance": None, "ndata": None},
+ "BXD1": {"sample_name": "BXD1", "value": 7.77141, "variance": None, "ndata": None},
+ "BXD12": {"sample_name": "BXD12", "value": 8.39265, "variance": None, "ndata": None},
+ "BXD16": {"sample_name": "BXD16", "value": 8.17443, "variance": None, "ndata": None},
+ "BXD19": {"sample_name": "BXD19", "value": 8.30401, "variance": None, "ndata": None},
+ "BXD2": {"sample_name": "BXD2", "value": 7.80944, "variance": None, "ndata": None},
+ "BXD21": {"sample_name": "BXD21", "value": 8.93809, "variance": None, "ndata": None},
+ "BXD24": {"sample_name": "BXD24", "value": 7.99415, "variance": None, "ndata": None},
+ "BXD27": {"sample_name": "BXD27", "value": 8.12177, "variance": None, "ndata": None},
+ "BXD28": {"sample_name": "BXD28", "value": 7.67688, "variance": None, "ndata": None},
+ "BXD32": {"sample_name": "BXD32", "value": 7.79062, "variance": None, "ndata": None},
+ "BXD39": {"sample_name": "BXD39", "value": 8.27641, "variance": None, "ndata": None},
+ "BXD40": {"sample_name": "BXD40", "value": 8.18012, "variance": None, "ndata": None},
+ "BXD42": {"sample_name": "BXD42", "value": 7.82433, "variance": None, "ndata": None},
+ "BXD6": {"sample_name": "BXD6", "value": 8.09718, "variance": None, "ndata": None},
+ "BXH14": {"sample_name": "BXH14", "value": 7.97475, "variance": None, "ndata": None},
+ "BXH19": {"sample_name": "BXH19", "value": 7.67223, "variance": None, "ndata": None},
+ "BXH2": {"sample_name": "BXH2", "value": 7.93622, "variance": None, "ndata": None},
+ "BXH22": {"sample_name": "BXH22", "value": 7.43692, "variance": None, "ndata": None},
+ "BXH4": {"sample_name": "BXH4", "value": 7.96336, "variance": None, "ndata": None},
+ "BXH6": {"sample_name": "BXH6", "value": 7.75132, "variance": None, "ndata": None},
+ "BXH7": {"sample_name": "BXH7", "value": 8.12927, "variance": None, "ndata": None},
+ "BXH8": {"sample_name": "BXH8", "value": 6.77338, "variance": None, "ndata": None},
+ "BXH9": {"sample_name": "BXH9", "value": 8.03836, "variance": None, "ndata": None},
+ "C3H/HeJ": {"sample_name": "C3H/HeJ", "value": 7.42795, "variance": None, "ndata": None},
+ "C57BL/6J": {"sample_name": "C57BL/6J", "value": 7.50606, "variance": None, "ndata": None},
+ "DBA/2J": {"sample_name": "DBA/2J", "value": 7.72588, "variance": None, "ndata": None}}}
+
+slinked = (
+ (((0, 2, 0.16381088984330505),
+ ((1, 7, 0.06024619831474998), 5, 0.19179284676938602),
+ 0.20337048635536847),
+ 9,
+ 0.23451785425383564),
+ ((3, (6, 8, 0.2140799896286565), 0.25879514152086425),
+ 4, 0.8968250491499363),
+ 0.9313185954797953)
class TestHeatmap(TestCase):
"""Class for testing heatmap computation functions"""
@@ -49,7 +67,7 @@ class TestHeatmap(TestCase):
["all", (7.51879, 7.77141, 8.39265, 8.17443, 8.30401, 7.80944)]]:
with self.subTest(dtype=dtype):
self.assertEqual(
- export_trait_data(trait_data, strainlist, dtype=dtype),
+ export_trait_data(trait_data, samplelist, dtype=dtype),
expected)
def test_export_trait_data_dtype_all_flags(self):
@@ -89,7 +107,7 @@ class TestHeatmap(TestCase):
with self.subTest(dtype=dtype, vflag=vflag, nflag=nflag):
self.assertEqual(
export_trait_data(
- trait_data, strainlist, dtype=dtype, var_exists=vflag,
+ trait_data, samplelist, dtype=dtype, var_exists=vflag,
n_exists=nflag),
expected)
@@ -141,3 +159,73 @@ class TestHeatmap(TestCase):
0.9313185954797953, 1.1683723389247052, 0.23451785425383564,
1.7413442197913358, 0.33370067057028485, 1.3256191648260216,
0.0)))
+
+ def test_compute_heatmap_order(self):
+ """Test the orders."""
+ self.assertEqual(
+ compute_traits_order(slinked), (0, 2, 1, 7, 5, 9, 3, 6, 8, 4))
+
+ def test_retrieve_samples_and_values(self):
+ """Test retrieval of samples and values."""
+ for orders, slist, tdata, expected in [
+ [
+ [2],
+ ["s1", "s2", "s3", "s4"],
+ [[2, 9, 6, None, 4],
+ [7, 5, None, None, 4],
+ [9, None, 5, 4, 7],
+ [6, None, None, 4, None]],
+ [[2, ["s1", "s3", "s4"], [9, 5, 4]]]
+ ],
+ [
+ [3],
+ ["s1", "s2", "s3", "s4", "s5"],
+ [[2, 9, 6, None, 4],
+ [7, 5, None, None, 4],
+ [9, None, 5, 4, 7],
+ [6, None, None, 4, None]],
+ [[3, ["s1", "s4"], [6, 4]]]
+ ]]:
+ with self.subTest(samplelist=slist, traitdata=tdata):
+ self.assertEqual(
+ retrieve_samples_and_values(orders, slist, tdata), expected)
+
+ def test_get_lrs_from_chr(self):
+ """Check that function gets correct LRS values"""
+ for trait, chromosome, expected in [
+ [{"chromosomes": {}}, 3, [None]],
+ [{"chromosomes": {3: {"loci": [
+ {"Locus": "b", "LRS": 1.9},
+ {"Locus": "a", "LRS": 13.2},
+ {"Locus": "d", "LRS": 53.21},
+ {"Locus": "c", "LRS": 2.22}]}}},
+ 3,
+ [13.2, 1.9, 2.22, 53.21]]]:
+ with self.subTest(trait=trait, chromosome=chromosome):
+ self.assertEqual(get_lrs_from_chr(trait, chromosome), expected)
+
+ def test_process_traits_data_for_heatmap(self):
+ """Check for correct processing of data for heatmap generation."""
+ self.assertEqual(
+ process_traits_data_for_heatmap(
+ {**organised_trait_1, **organised_trait_2},
+ ["2", "1"],
+ [1, 2]),
+ [[[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
+ [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]],
+ [[0.5, 0.579, 0.5],
+ [0.5, 0.5, 0.5]]])
+
+ def test_get_loci_names(self):
+ """Check that loci names are retrieved correctly."""
+ for organised, expected in (
+ (organised_trait_1,
+ (("rs258367496", "rs30658298", "rs31443144", "rs32285189",
+ "rs32430919", "rs36251697", "rs6269442"),
+ ("rs31879829", "rs36742481", "rs51852623"))),
+ ({**organised_trait_1, **organised_trait_2},
+ (("rs258367496", "rs30658298", "rs31443144", "rs32285189",
+ "rs32430919", "rs36251697", "rs6269442"),
+ ("rs31879829", "rs36742481", "rs51852623")))):
+ with self.subTest(organised=organised):
+ self.assertEqual(get_loci_names(organised, (1, 2)), expected)