aboutsummaryrefslogtreecommitdiff
path: root/qtlfilesexport.py
diff options
context:
space:
mode:
authorMuriithi Frederick Muriuki2021-09-09 10:59:31 +0300
committerMuriithi Frederick Muriuki2021-09-09 10:59:31 +0300
commit0d4e21d74d54554cf278142d0a31d35bec873ac1 (patch)
tree73f32703da47896e3dd5a0f933380f0d8c7de7db /qtlfilesexport.py
parenta718069c757bea9f7ecbaee25e23bd581750f906 (diff)
downloadgenenetwork3-0d4e21d74d54554cf278142d0a31d35bec873ac1.tar.gz
Update proof-of-concept code
* Add individual heatmaps * Add dendograms * Merge multiple heatmaps to single plot Updated the proof of concept code to provide a sample of what is needed to generate the appropriate heatmaps. To generate the sample heatmaps, one can run something like: env SQL_URI="mysql://webqtlout:webqtlout@127.0.0.1:3306/db_webqtl" \ python3 qtlfilesexport.py assuming that the database can be accessed at 127.0.0.1:3306
Diffstat (limited to 'qtlfilesexport.py')
-rw-r--r--qtlfilesexport.py199
1 files changed, 192 insertions, 7 deletions
diff --git a/qtlfilesexport.py b/qtlfilesexport.py
index 799de31..100fa75 100644
--- a/qtlfilesexport.py
+++ b/qtlfilesexport.py
@@ -7,19 +7,38 @@ Run with:
replacing the variables in the angled brackets with the appropriate values
"""
+from gn3.random import random_string
from gn3.computations.slink import slink
from gn3.db_utils import database_connector
from gn3.computations.qtlreaper import run_reaper
-from gn3.computations.heatmap import export_trait_data
from gn3.db.traits import retrieve_trait_data, retrieve_trait_info
-from gn3.db.genotypes import build_genotype_file, load_genotype_samples
-from gn3.computations.qtlreaper import random_string, generate_traits_file
+from gn3.computations.heatmap import export_trait_data, get_nearest_marker
+from gn3.db.genotypes import (
+ build_genotype_file,
+ parse_genotype_file,
+ load_genotype_samples)
from gn3.computations.heatmap import (
cluster_traits,
- compute_heatmap_order,
+ compute_traits_order,
retrieve_strains_and_values)
+from gn3.computations.qtlreaper import (
+ generate_traits_file,
+ chromosome_sorter_key_fn,
+ parse_reaper_main_results,
+ organise_reaper_main_results,
+ parse_reaper_permutation_results)
-TMPDIR = "tmp/qtltests"
+import plotly.express as px
+
+## for dendrogram
+import numpy as np
+import plotly.graph_objects as go
+import plotly.figure_factory as ff
+
+# for single heatmap
+from plotly.subplots import make_subplots
+
+TMPDIR = "tmp/"
def trait_fullnames():
"""Return sample names for traits"""
@@ -35,6 +54,104 @@ def trait_fullnames():
"UCLA_BXDBXH_CARTILAGE_V2::ILM4200064",
"UCLA_BXDBXH_CARTILAGE_V2::ILM3140463"]
+def get_lrs_from_chr(trait, chr_name):
+ chromosome = trait["chromosomes"].get(chr_name)
+ if chromosome:
+ return [
+ locus["LRS"] for locus in
+ sorted(chromosome["loci"], key=lambda loc: loc["Locus"])]
+ return [None]
+
+def process_traits_data_for_heatmap(data, trait_names, chromosome_names):
+ print("TRAIT_NAMES: {}".format(trait_names))
+ print("chromosome names: {}".format(chromosome_names))
+ print("data keys: {}".format(data.keys()))
+ hdata = [
+ [get_lrs_from_chr(data[trait], chr_name) for trait in trait_names]
+ for chr_name in chromosome_names]
+ # print("hdata: {}".format(hdata))
+ return hdata
+
+def generate_heatmap(
+ data, image_filename_prefix, x_axis = None, x_label: str = "",
+ y_axis = None, y_label: str = "", output_dir: str = TMPDIR):
+ """Generate single heatmap section."""
+ print("X-AXIS:({}, {}), Y-AXIS: ({}, {}), ROWS:{}, COLS:{}".format(
+ x_axis, (len(x_axis) if x_axis else 0),
+ y_axis, (len(y_axis) if y_axis else 0),
+ len(data), len(data[0])))
+ fig = px.imshow(
+ data,
+ x = x_axis,
+ y = y_axis,
+ width=1000
+ )
+ fig.update_yaxes(title=y_label)
+ fig.update_xaxes(title=x_label)
+ image_filename = "{}/{}.html".format(output_dir, image_filename_prefix)
+ fig.write_html(image_filename)
+ return image_filename, fig
+
+def generate_dendrogram(
+ data, image_filename_prefix, x_axis = None, x_label: str = "",
+ y_axis = None, y_label: str = "", output_dir: str = TMPDIR):
+ fig = ff.create_dendrogram(
+ np.array(data), orientation="right", labels=y_axis)
+
+ heatmap = go.Heatmap(
+ x=fig['layout']['xaxis']['ticktext'],
+ y=fig['layout']['yaxis']['ticktext'],
+ z=data)
+
+ # print("HEAMAP:{}".format(heatmap))
+ fig.add_trace(heatmap)
+
+ fig.update_layout({"width": 1000, "height": 500})
+ image_filename = "{}/{}.html".format(output_dir, image_filename_prefix)
+ fig.write_html(image_filename)
+ return image_filename, fig
+
+def generate_single_heatmap(
+ data, image_filename_prefix, x_axis = None, x_label: str = "",
+ y_axis = None, y_label: str = "", output_dir: str = TMPDIR):
+ """Generate single heatmap section."""
+ # fig = go.Figure({"type": "heatmap"})
+ num_cols = len(x_axis)
+ fig = make_subplots(
+ rows=1,
+ cols=num_cols,
+ shared_yaxes="rows",
+ # horizontal_spacing=(1 / (num_cols - 1)),
+ subplot_titles=x_axis
+ )
+ hms = [go.Heatmap(
+ name=chromo,
+ y = y_axis,
+ z = data_array,
+ showscale=False) for chromo, data_array in zip(x_axis, data)]
+ for col, hm in enumerate(hms):
+ fig.add_trace(hm, row=1, col=(col + 1))
+
+ fig.update_traces(
+ showlegend=False,
+ colorscale=[
+ [0.0, '#3B3B3B'], [0.4999999999999999, '#ABABAB'],
+ [0.5, '#F5DE11'], [1.0, '#FF0D00']],
+ selector={"type": "heatmap"})
+ fig.update_traces(
+ showlegend=True,
+ showscale=True,
+ selector={"name": x_axis[-1]})
+ fig.update_layout(
+ coloraxis_colorscale=[
+ [0.0, '#3B3B3B'], [0.4999999999999999, '#ABABAB'],
+ [0.5, '#F5DE11'], [1.0, '#FF0D00']]
+ )
+ print(fig)
+ image_filename = "{}/{}.html".format(output_dir, image_filename_prefix)
+ fig.write_html(image_filename)
+ return image_filename, fig
+
def main():
"""entrypoint function"""
conn = database_connector()[0]
@@ -44,13 +161,19 @@ def main():
for fullname in trait_fullnames()]
traits_data_list = [retrieve_trait_data(t, conn) for t in traits]
genotype_filename = build_genotype_file(traits[0]["riset"])
+ genotype = parse_genotype_file(genotype_filename)
strains = load_genotype_samples(genotype_filename)
exported_traits_data_list = [
export_trait_data(td, strains) for td in traits_data_list]
slinked = slink(cluster_traits(exported_traits_data_list))
- orders = compute_heatmap_order(slinked)
+ print("SLINKED: {}".format(slinked))
+ traits_order = compute_traits_order(slinked)
+ print("KEYS: {}".format(traits[0].keys()))
+ ordered_traits_names = [
+ traits[idx]["trait_fullname"] for idx in traits_order]
+ print("ORDERS: {}".format(traits_order))
strains_and_values = retrieve_strains_and_values(
- orders, strains, exported_traits_data_list)
+ traits_order, strains, exported_traits_data_list)
strains_values = strains_and_values[0][1]
trait_values = [t[2] for t in strains_and_values]
traits_filename = "{}/traits_test_file_{}.txt".format(
@@ -64,5 +187,67 @@ def main():
print("Main output: {}, Permutation output: {}".format(
main_output, permutations_output))
+ qtlresults = parse_reaper_main_results(main_output)
+ permudata = parse_reaper_permutation_results(permutations_output)
+ # print("QTLRESULTS: {}".format(qtlresults))
+ # print("PERMUDATA: {}".format(permudata))
+
+ nearest = get_nearest_marker(traits, genotype)
+ print("NEAREST: {}".format(nearest))
+
+ organised = organise_reaper_main_results(qtlresults)
+
+ traits_ids = [# sort numerically, but retain the ids as strings
+ str(i) for i in sorted({int(row["ID"]) for row in qtlresults})]
+ chromosome_names = sorted(
+ {row["Chr"] for row in qtlresults}, key = chromosome_sorter_key_fn)
+ loci_names = sorted({row["Locus"] for row in qtlresults})
+ ordered_traits_names = {
+ res_id: trait for res_id, trait in
+ zip(traits_ids,
+ [traits[idx]["trait_fullname"] for idx in traits_order])}
+ # print("ordered:{}, original: {}".format(
+ # ordered_traits_names, [t["trait_fullname"] for t in traits]))
+ # print("chromosome_names:{}".format(chromosome_names))
+ # print("trait_ids:{}".format(traits_ids))
+ # print("loci names:{}".format(loci_names))
+ hdata = process_traits_data_for_heatmap(organised, traits_ids, chromosome_names)
+
+ # print("ZIPPED: {}".format(zip(tuple(ordered_traits_names.keys()), hdata)))
+ # print("HDATA LENGTH:{}, ORDERED TRAITS LENGTH:{}".format(len(hdata), len(ordered_traits_names.keys())))
+ heatmaps_data = [
+ generate_heatmap(
+ data,
+ "heatmap_chr{}_{}".format(chromo, random_string(10)),
+ y_axis=tuple(
+ ordered_traits_names[traits_ids[order]]
+ for order in traits_order),
+ x_label=chromo,
+ output_dir=TMPDIR)
+ for chromo, data in zip(chromosome_names, hdata)]
+ print("IMAGES FILENAMES: {}".format([img[0] for img in heatmaps_data]))
+
+ dendograms_data = [
+ generate_dendrogram(
+ data,
+ "dendo_chr{}_{}".format(chromo, random_string(10)),
+ y_axis=tuple(
+ ordered_traits_names[traits_ids[order]]
+ for order in traits_order),
+ x_label=chromo,
+ output_dir=TMPDIR)
+ for chromo, data in zip(chromosome_names, hdata)]
+
+ res = generate_single_heatmap(
+ hdata,
+ "single_heatmap_{}".format(random_string(10)),
+ y_axis=tuple(
+ ordered_traits_names[traits_ids[order]]
+ for order in traits_order),
+ y_label="Traits",
+ x_axis=[chromo for chromo in chromosome_names],
+ x_label="Chromosomes",
+ output_dir=TMPDIR)
+
if __name__ == "__main__":
main()