diff options
-rw-r--r-- | qtlfilesexport.py | 199 |
1 files changed, 192 insertions, 7 deletions
diff --git a/qtlfilesexport.py b/qtlfilesexport.py index 799de31..100fa75 100644 --- a/qtlfilesexport.py +++ b/qtlfilesexport.py @@ -7,19 +7,38 @@ Run with: replacing the variables in the angled brackets with the appropriate values """ +from gn3.random import random_string from gn3.computations.slink import slink from gn3.db_utils import database_connector from gn3.computations.qtlreaper import run_reaper -from gn3.computations.heatmap import export_trait_data from gn3.db.traits import retrieve_trait_data, retrieve_trait_info -from gn3.db.genotypes import build_genotype_file, load_genotype_samples -from gn3.computations.qtlreaper import random_string, generate_traits_file +from gn3.computations.heatmap import export_trait_data, get_nearest_marker +from gn3.db.genotypes import ( + build_genotype_file, + parse_genotype_file, + load_genotype_samples) from gn3.computations.heatmap import ( cluster_traits, - compute_heatmap_order, + compute_traits_order, retrieve_strains_and_values) +from gn3.computations.qtlreaper import ( + generate_traits_file, + chromosome_sorter_key_fn, + parse_reaper_main_results, + organise_reaper_main_results, + parse_reaper_permutation_results) -TMPDIR = "tmp/qtltests" +import plotly.express as px + +## for dendrogram +import numpy as np +import plotly.graph_objects as go +import plotly.figure_factory as ff + +# for single heatmap +from plotly.subplots import make_subplots + +TMPDIR = "tmp/" def trait_fullnames(): """Return sample names for traits""" @@ -35,6 +54,104 @@ def trait_fullnames(): "UCLA_BXDBXH_CARTILAGE_V2::ILM4200064", "UCLA_BXDBXH_CARTILAGE_V2::ILM3140463"] +def get_lrs_from_chr(trait, chr_name): + chromosome = trait["chromosomes"].get(chr_name) + if chromosome: + return [ + locus["LRS"] for locus in + sorted(chromosome["loci"], key=lambda loc: loc["Locus"])] + return [None] + +def process_traits_data_for_heatmap(data, trait_names, chromosome_names): + print("TRAIT_NAMES: {}".format(trait_names)) + print("chromosome names: {}".format(chromosome_names)) + print("data keys: {}".format(data.keys())) + hdata = [ + [get_lrs_from_chr(data[trait], chr_name) for trait in trait_names] + for chr_name in chromosome_names] + # print("hdata: {}".format(hdata)) + return hdata + +def generate_heatmap( + data, image_filename_prefix, x_axis = None, x_label: str = "", + y_axis = None, y_label: str = "", output_dir: str = TMPDIR): + """Generate single heatmap section.""" + print("X-AXIS:({}, {}), Y-AXIS: ({}, {}), ROWS:{}, COLS:{}".format( + x_axis, (len(x_axis) if x_axis else 0), + y_axis, (len(y_axis) if y_axis else 0), + len(data), len(data[0]))) + fig = px.imshow( + data, + x = x_axis, + y = y_axis, + width=1000 + ) + fig.update_yaxes(title=y_label) + fig.update_xaxes(title=x_label) + image_filename = "{}/{}.html".format(output_dir, image_filename_prefix) + fig.write_html(image_filename) + return image_filename, fig + +def generate_dendrogram( + data, image_filename_prefix, x_axis = None, x_label: str = "", + y_axis = None, y_label: str = "", output_dir: str = TMPDIR): + fig = ff.create_dendrogram( + np.array(data), orientation="right", labels=y_axis) + + heatmap = go.Heatmap( + x=fig['layout']['xaxis']['ticktext'], + y=fig['layout']['yaxis']['ticktext'], + z=data) + + # print("HEAMAP:{}".format(heatmap)) + fig.add_trace(heatmap) + + fig.update_layout({"width": 1000, "height": 500}) + image_filename = "{}/{}.html".format(output_dir, image_filename_prefix) + fig.write_html(image_filename) + return image_filename, fig + +def generate_single_heatmap( + data, image_filename_prefix, x_axis = None, x_label: str = "", + y_axis = None, y_label: str = "", output_dir: str = TMPDIR): + """Generate single heatmap section.""" + # fig = go.Figure({"type": "heatmap"}) + num_cols = len(x_axis) + fig = make_subplots( + rows=1, + cols=num_cols, + shared_yaxes="rows", + # horizontal_spacing=(1 / (num_cols - 1)), + subplot_titles=x_axis + ) + hms = [go.Heatmap( + name=chromo, + y = y_axis, + z = data_array, + showscale=False) for chromo, data_array in zip(x_axis, data)] + for col, hm in enumerate(hms): + fig.add_trace(hm, row=1, col=(col + 1)) + + fig.update_traces( + showlegend=False, + colorscale=[ + [0.0, '#3B3B3B'], [0.4999999999999999, '#ABABAB'], + [0.5, '#F5DE11'], [1.0, '#FF0D00']], + selector={"type": "heatmap"}) + fig.update_traces( + showlegend=True, + showscale=True, + selector={"name": x_axis[-1]}) + fig.update_layout( + coloraxis_colorscale=[ + [0.0, '#3B3B3B'], [0.4999999999999999, '#ABABAB'], + [0.5, '#F5DE11'], [1.0, '#FF0D00']] + ) + print(fig) + image_filename = "{}/{}.html".format(output_dir, image_filename_prefix) + fig.write_html(image_filename) + return image_filename, fig + def main(): """entrypoint function""" conn = database_connector()[0] @@ -44,13 +161,19 @@ def main(): for fullname in trait_fullnames()] traits_data_list = [retrieve_trait_data(t, conn) for t in traits] genotype_filename = build_genotype_file(traits[0]["riset"]) + genotype = parse_genotype_file(genotype_filename) strains = load_genotype_samples(genotype_filename) exported_traits_data_list = [ export_trait_data(td, strains) for td in traits_data_list] slinked = slink(cluster_traits(exported_traits_data_list)) - orders = compute_heatmap_order(slinked) + print("SLINKED: {}".format(slinked)) + traits_order = compute_traits_order(slinked) + print("KEYS: {}".format(traits[0].keys())) + ordered_traits_names = [ + traits[idx]["trait_fullname"] for idx in traits_order] + print("ORDERS: {}".format(traits_order)) strains_and_values = retrieve_strains_and_values( - orders, strains, exported_traits_data_list) + traits_order, strains, exported_traits_data_list) strains_values = strains_and_values[0][1] trait_values = [t[2] for t in strains_and_values] traits_filename = "{}/traits_test_file_{}.txt".format( @@ -64,5 +187,67 @@ def main(): print("Main output: {}, Permutation output: {}".format( main_output, permutations_output)) + qtlresults = parse_reaper_main_results(main_output) + permudata = parse_reaper_permutation_results(permutations_output) + # print("QTLRESULTS: {}".format(qtlresults)) + # print("PERMUDATA: {}".format(permudata)) + + nearest = get_nearest_marker(traits, genotype) + print("NEAREST: {}".format(nearest)) + + organised = organise_reaper_main_results(qtlresults) + + traits_ids = [# sort numerically, but retain the ids as strings + str(i) for i in sorted({int(row["ID"]) for row in qtlresults})] + chromosome_names = sorted( + {row["Chr"] for row in qtlresults}, key = chromosome_sorter_key_fn) + loci_names = sorted({row["Locus"] for row in qtlresults}) + ordered_traits_names = { + res_id: trait for res_id, trait in + zip(traits_ids, + [traits[idx]["trait_fullname"] for idx in traits_order])} + # print("ordered:{}, original: {}".format( + # ordered_traits_names, [t["trait_fullname"] for t in traits])) + # print("chromosome_names:{}".format(chromosome_names)) + # print("trait_ids:{}".format(traits_ids)) + # print("loci names:{}".format(loci_names)) + hdata = process_traits_data_for_heatmap(organised, traits_ids, chromosome_names) + + # print("ZIPPED: {}".format(zip(tuple(ordered_traits_names.keys()), hdata))) + # print("HDATA LENGTH:{}, ORDERED TRAITS LENGTH:{}".format(len(hdata), len(ordered_traits_names.keys()))) + heatmaps_data = [ + generate_heatmap( + data, + "heatmap_chr{}_{}".format(chromo, random_string(10)), + y_axis=tuple( + ordered_traits_names[traits_ids[order]] + for order in traits_order), + x_label=chromo, + output_dir=TMPDIR) + for chromo, data in zip(chromosome_names, hdata)] + print("IMAGES FILENAMES: {}".format([img[0] for img in heatmaps_data])) + + dendograms_data = [ + generate_dendrogram( + data, + "dendo_chr{}_{}".format(chromo, random_string(10)), + y_axis=tuple( + ordered_traits_names[traits_ids[order]] + for order in traits_order), + x_label=chromo, + output_dir=TMPDIR) + for chromo, data in zip(chromosome_names, hdata)] + + res = generate_single_heatmap( + hdata, + "single_heatmap_{}".format(random_string(10)), + y_axis=tuple( + ordered_traits_names[traits_ids[order]] + for order in traits_order), + y_label="Traits", + x_axis=[chromo for chromo in chromosome_names], + x_label="Chromosomes", + output_dir=TMPDIR) + if __name__ == "__main__": main() |