aboutsummaryrefslogtreecommitdiff
path: root/gn3/heatmaps.py
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2021-12-06 14:04:59 +0300
committerFrederick Muriuki Muriithi2021-12-06 14:04:59 +0300
commit66406115f41594ba40e3fbbc6f69aace2d11800f (patch)
tree0f3de09b74a3f47918dd4a192665c8a06c508144 /gn3/heatmaps.py
parent77099cac68e8f4792bf54d8e1f7ce6f315bedfa7 (diff)
parent5d2248f1dabbc7dd04f48aafcc9f327817a9c92c (diff)
downloadgenenetwork3-66406115f41594ba40e3fbbc6f69aace2d11800f.tar.gz
Merge branch 'partial-correlations'
Diffstat (limited to 'gn3/heatmaps.py')
-rw-r--r--gn3/heatmaps.py202
1 files changed, 75 insertions, 127 deletions
diff --git a/gn3/heatmaps.py b/gn3/heatmaps.py
index adbfbc6..f0af409 100644
--- a/gn3/heatmaps.py
+++ b/gn3/heatmaps.py
@@ -14,6 +14,7 @@ from plotly.subplots import make_subplots # type: ignore
from gn3.settings import TMPDIR
from gn3.random import random_string
from gn3.computations.slink import slink
+from gn3.db.traits import export_trait_data
from gn3.computations.correlations2 import compute_correlation
from gn3.db.genotypes import (
build_genotype_file, load_genotype_samples)
@@ -26,72 +27,6 @@ from gn3.computations.qtlreaper import (
parse_reaper_main_results,
organise_reaper_main_results)
-def export_trait_data(
- trait_data: dict, samplelist: Sequence[str], dtype: str = "val",
- var_exists: bool = False, n_exists: bool = False):
- """
- Export data according to `samplelist`. Mostly used in calculating
- correlations.
-
- DESCRIPTION:
- Migrated from
- https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L166-L211
-
- PARAMETERS
- trait: (dict)
- The dictionary of key-value pairs representing a trait
- samplelist: (list)
- A list of sample names
- dtype: (str)
- ... verify what this is ...
- var_exists: (bool)
- A flag indicating existence of variance
- n_exists: (bool)
- A flag indicating existence of ndata
- """
- def __export_all_types(tdata, sample):
- sample_data = []
- if tdata[sample]["value"]:
- sample_data.append(tdata[sample]["value"])
- if var_exists:
- if tdata[sample]["variance"]:
- sample_data.append(tdata[sample]["variance"])
- else:
- sample_data.append(None)
- if n_exists:
- if tdata[sample]["ndata"]:
- sample_data.append(tdata[sample]["ndata"])
- else:
- sample_data.append(None)
- else:
- if var_exists and n_exists:
- sample_data += [None, None, None]
- elif var_exists or n_exists:
- sample_data += [None, None]
- else:
- sample_data.append(None)
-
- return tuple(sample_data)
-
- def __exporter(accumulator, sample):
- # pylint: disable=[R0911]
- if sample in trait_data["data"]:
- if dtype == "val":
- return accumulator + (trait_data["data"][sample]["value"], )
- if dtype == "var":
- return accumulator + (trait_data["data"][sample]["variance"], )
- if dtype == "N":
- return accumulator + (trait_data["data"][sample]["ndata"], )
- if dtype == "all":
- return accumulator + __export_all_types(trait_data["data"], sample)
- raise KeyError("Type `%s` is incorrect" % dtype)
- if var_exists and n_exists:
- return accumulator + (None, None, None)
- if var_exists or n_exists:
- return accumulator + (None, None)
- return accumulator + (None,)
-
- return reduce(__exporter, samplelist, tuple())
def trait_display_name(trait: Dict):
"""
@@ -129,11 +64,7 @@ def cluster_traits(traits_data_list: Sequence[Dict]):
def __compute_corr(tdata_i, tdata_j):
if tdata_i[0] == tdata_j[0]:
return 0.0
- corr_vals = compute_correlation(tdata_i[1], tdata_j[1])
- corr = corr_vals[0]
- if (1 - corr) < 0:
- return 0.0
- return 1 - corr
+ return 1 - compute_correlation(tdata_i[1], tdata_j[1])[0]
def __cluster(tdata_i):
return tuple(
@@ -168,7 +99,9 @@ def get_loci_names(
__get_trait_loci, [v[1] for v in organised.items()], {})
return tuple(loci_dict[_chr] for _chr in chromosome_names)
-def build_heatmap(traits_names, conn: Any):
+def build_heatmap(
+ traits_names: Sequence[str], conn: Any,
+ vertical: bool = False) -> go.Figure:
"""
heatmap function
@@ -220,17 +153,21 @@ def build_heatmap(traits_names, conn: Any):
zip(traits_ids,
[traits[idx]["trait_fullname"] for idx in traits_order]))
- return generate_clustered_heatmap(
+ return clustered_heatmap(
process_traits_data_for_heatmap(
organised, traits_ids, chromosome_names),
clustered,
- "single_heatmap_{}".format(random_string(10)),
- y_axis=tuple(
- ordered_traits_names[traits_ids[order]]
- for order in traits_order),
- y_label="Traits",
- x_axis=chromosome_names,
- x_label="Chromosomes",
+ x_axis={
+ "label": "Chromosomes",
+ "data": chromosome_names
+ },
+ y_axis={
+ "label": "Traits",
+ "data": tuple(
+ ordered_traits_names[traits_ids[order]]
+ for order in traits_order)
+ },
+ vertical=vertical,
loci_names=get_loci_names(organised, chromosome_names))
def compute_traits_order(slink_data, neworder: tuple = tuple()):
@@ -349,68 +286,81 @@ def process_traits_data_for_heatmap(data, trait_names, chromosome_names):
for chr_name in chromosome_names]
return hdata
-def generate_clustered_heatmap(
- data, clustering_data, image_filename_prefix, x_axis=None,
- x_label: str = "", y_axis=None, y_label: str = "",
+def clustered_heatmap(
+ data: Sequence[Sequence[float]], clustering_data: Sequence[float],
+ x_axis,#: Dict[Union[str, int], Union[str, Sequence[str]]],
+ y_axis: Dict[str, Union[str, Sequence[str]]],
loci_names: Sequence[Sequence[str]] = tuple(),
- output_dir: str = TMPDIR,
- colorscale=((0.0, '#0000FF'), (0.5, '#00FF00'), (1.0, '#FF0000'))):
+ vertical: bool = False,
+ colorscale: Sequence[Sequence[Union[float, str]]] = (
+ (0.0, '#0000FF'), (0.5, '#00FF00'), (1.0, '#FF0000'))) -> go.Figure:
"""
Generate a dendrogram, and heatmaps for each chromosome, and put them all
into one plot.
"""
# pylint: disable=[R0913, R0914]
- num_cols = 1 + len(x_axis)
+ x_axis_data = x_axis["data"]
+ y_axis_data = y_axis["data"]
+ num_plots = 1 + len(x_axis_data)
fig = make_subplots(
- rows=1,
- cols=num_cols,
- shared_yaxes="rows",
+ rows=num_plots if vertical else 1,
+ cols=1 if vertical else num_plots,
+ shared_xaxes="columns" if vertical else False,
+ shared_yaxes=False if vertical else "rows",
+ vertical_spacing=0.010,
horizontal_spacing=0.001,
- subplot_titles=["distance"] + x_axis,
+ subplot_titles=["" if vertical else x_axis["label"]] + [
+ "Chromosome: {}".format(chromo) if vertical else chromo
+ for chromo in x_axis_data],#+ x_axis_data,
figure=ff.create_dendrogram(
- np.array(clustering_data), orientation="right", labels=y_axis))
+ np.array(clustering_data),
+ orientation="bottom" if vertical else "right",
+ labels=y_axis_data))
hms = [go.Heatmap(
name=chromo,
- x=loci,
- y=y_axis,
+ x=y_axis_data if vertical else loci,
+ y=loci if vertical else y_axis_data,
z=data_array,
+ transpose=vertical,
showscale=False)
for chromo, data_array, loci
- in zip(x_axis, data, loci_names)]
+ in zip(x_axis_data, data, loci_names)]
for i, heatmap in enumerate(hms):
- fig.add_trace(heatmap, row=1, col=(i + 2))
-
- fig.update_layout(
- {
- "width": 1500,
- "height": 800,
- "xaxis": {
+ fig.add_trace(
+ heatmap,
+ row=((i + 2) if vertical else 1),
+ col=(1 if vertical else (i + 2)))
+
+ axes_layouts = {
+ "{axis}axis{count}".format(
+ axis=("y" if vertical else "x"),
+ count=(i+1 if i > 0 else "")): {
"mirror": False,
- "showgrid": True,
- "title": x_label
- },
- "yaxis": {
- "title": y_label
+ "showticklabels": i == 0,
+ "ticks": "outside" if i == 0 else ""
}
- })
+ for i in range(num_plots)}
- x_axes_layouts = {
- "xaxis{}".format(i+1 if i > 0 else ""): {
- "mirror": False,
- "showticklabels": i == 0,
- "ticks": "outside" if i == 0 else ""
- }
- for i in range(num_cols)}
+ print("vertical?: {} ==> {}".format("T" if vertical else "F", axes_layouts))
- fig.update_layout(
- {
- "width": 4000,
- "height": 800,
- "yaxis": {
- "mirror": False,
- "ticks": ""
- },
- **x_axes_layouts})
+ fig.update_layout({
+ "width": 800 if vertical else 4000,
+ "height": 4000 if vertical else 800,
+ "{}axis".format("x" if vertical else "y"): {
+ "mirror": False,
+ "ticks": "",
+ "side": "top" if vertical else "left",
+ "title": y_axis["label"],
+ "tickangle": 90 if vertical else 0,
+ "ticklabelposition": "outside top" if vertical else "outside left"
+ },
+ "{}axis".format("y" if vertical else "x"): {
+ "mirror": False,
+ "showgrid": True,
+ "title": "Distance",
+ "side": "right" if vertical else "top"
+ },
+ **axes_layouts})
fig.update_traces(
showlegend=False,
colorscale=colorscale,
@@ -418,7 +368,5 @@ def generate_clustered_heatmap(
fig.update_traces(
showlegend=True,
showscale=True,
- selector={"name": x_axis[-1]})
- image_filename = "{}/{}.html".format(output_dir, image_filename_prefix)
- fig.write_html(image_filename)
- return image_filename, fig
+ selector={"name": x_axis_data[-1]})
+ return fig