about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2021-09-15 12:42:38 +0300
committerFrederick Muriuki Muriithi2021-09-15 12:42:38 +0300
commit347301c1bc60ba5036625364ee48a5d72eeb1186 (patch)
treea32ba9a5671caa8b4d3f9da551f39564690b7a57
parente9fb4e45cfc52c5d86ef534b0e7f42ba8f4c84d3 (diff)
downloadgenenetwork3-347301c1bc60ba5036625364ee48a5d72eeb1186.tar.gz
Update entry-point function for heatmap generation
Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* Copy over code from the proof-of-concept implementation and clean it up a
  little for the entry-point function for heatmap generation via the API
-rw-r--r--gn3/heatmaps.py70
1 files changed, 49 insertions, 21 deletions
diff --git a/gn3/heatmaps.py b/gn3/heatmaps.py
index f3d7d25..4349ee0 100644
--- a/gn3/heatmaps.py
+++ b/gn3/heatmaps.py
@@ -131,7 +131,7 @@ def cluster_traits(traits_data_list: Sequence[Dict]):
 
     return tuple(__cluster(tdata_i) for tdata_i in enumerate(traits_data_list))
 
-def heatmap_data(traits_names, conn: Any):
+def build_heatmap(traits_names, conn: Any):
     """
     heatmap function
 
@@ -148,27 +148,55 @@ def heatmap_data(traits_names, conn: Any):
     TODO: Elaborate on the parameters here...
     """
     threshold = 0 # webqtlConfig.PUBLICTHRESH
-    def __retrieve_traitlist_and_datalist(threshold, fullname):
-        trait = retrieve_trait_info(threshold, fullname, conn)
-        return (trait, retrieve_trait_data(trait, conn))
-
-    traits_details = [
-        __retrieve_traitlist_and_datalist(threshold, fullname)
-        for fullname in traits_names]
-    traits_list = tuple(x[0] for x in traits_details)
-    traits_data_list = [x[1] for x in traits_details]
-    genotype_filename = build_genotype_file(traits_list[0]["riset"])
-    strainlist = load_genotype_samples(genotype_filename)
-    exported_traits_data_list = tuple(
-        export_trait_data(td, strainlist) for td in traits_data_list)
-    slink_data = slink(cluster_traits(exported_traits_data_list))
-    ordering_data = compute_heatmap_order(slink_data)
+    traits = [
+        retrieve_trait_info(threshold, fullname, conn)
+        for fullname in trait_fullnames()]
+    traits_data_list = [retrieve_trait_data(t, conn) for t in traits]
+    genotype_filename = build_genotype_file(traits[0]["riset"])
+    genotype = parse_genotype_file(genotype_filename)
+    strains = load_genotype_samples(genotype_filename)
+    exported_traits_data_list = [
+        export_trait_data(td, strains) for td in traits_data_list]
+    slinked = slink(cluster_traits(exported_traits_data_list))
+    traits_order = compute_traits_order(slinked)
+    ordered_traits_names = [
+        traits[idx]["trait_fullname"] for idx in traits_order]
     strains_and_values = retrieve_strains_and_values(
-        ordering_data, strainlist, exported_traits_data_list)
-    strains_values = strains_and_values[0][1]
-    trait_values = [t[2] for t in strains_and_values]
-    traits_filename = generate_traits_filename()
-    generate_traits_file(strains_values, trait_values, traits_filename)
+        traits_order, strains, exported_traits_data_list)
+    traits_filename = "{}/traits_test_file_{}.txt".format(
+        TMPDIR, random_string(10))
+    generate_traits_file(
+        strains_and_values[0][1],
+        [t[2] for t in strains_and_values],
+        traits_filename)
+
+    main_output, permutations_output = run_reaper(
+        genotype_filename, traits_filename, separate_nperm_output=True)
+
+    qtlresults = parse_reaper_main_results(main_output)
+    permudata = parse_reaper_permutation_results(permutations_output)
+    organised = organise_reaper_main_results(qtlresults)
+
+    traits_ids = [# sort numerically, but retain the ids as strings
+        str(i) for i in sorted({int(row["ID"]) for row in qtlresults})]
+    chromosome_names = sorted(
+        {row["Chr"] for row in qtlresults}, key = chromosome_sorter_key_fn)
+    loci_names = sorted({row["Locus"] for row in qtlresults})
+    ordered_traits_names = {
+        res_id: trait for res_id, trait in
+        zip(traits_ids,
+            [traits[idx]["trait_fullname"] for idx in traits_order])}
+
+    return generate_clustered_heatmap(
+        process_traits_data_for_heatmap(
+            organised, traits_ids, chromosome_names),
+        "single_heatmap_{}".format(random_string(10)),
+        y_axis=tuple(
+            ordered_traits_names[traits_ids[order]]
+                for order in traits_order),
+        y_label="Traits",
+        x_axis=[chromo for chromo in chromosome_names],
+        x_label="Chromosomes")
 
     return {
         "slink_data": slink_data,