Add code loading contents of homology file for given mb range

Currently this hard-codes the source file. Not sure how we want to store this data yet so just using a CSV file for now.
author: zsloan 2022-12-21 18:54:04 +0000
committer: zsloan 2022-12-21 13:04:07 -0600
commit: 331650d671d3cce44e248f90dcf7bd3776bb0384 (patch)
tree: f7fd475d1e625b1e746f2a32e5ca5f0fe6ada1ac /wqflask
parent: 7e4ac66bf55f222f87174178e0348e9c40a05b56 (diff)
download: genenetwork2-331650d671d3cce44e248f90dcf7bd3776bb0384.tar.gz
1 files changed, 23 insertions, 2 deletions
diff --git a/wqflask/wqflask/interval_analyst/GeneUtil.py b/wqflask/wqflask/interval_analyst/GeneUtil.py
index 7a69126c..633cc9bf 100644
--- a/wqflask/wqflask/interval_analyst/GeneUtil.py
+++ b/wqflask/wqflask/interval_analyst/GeneUtil.py
@@ -2,8 +2,29 @@ import string
 
 from wqflask.database import database_connection
 
-# Just return a list of dictionaries
-# each dictionary contains sub-dictionary
+mm10_to_hg38_file = "/home/zas1024/gn2-zach/mm10_hg38_homology/mm10ToHg38_chain_only_sorted.csv"
+
+def load_homology(chr_name, start_mb, end_mb):
+    homology_list = []
+    with open(mm10_to_hg38_file) as h_file:
+        current_chr = 0
+        for line in h_file:
+            line_items = line.split()
+            this_dict = {
+                "mm10_chr": line_items[2][3:],
+                "mm10_start": float(line_items[5])/1000000,
+                "mm10_end": float(line_items[6])/1000000,
+                "hg38_chr": line_items[7][3:],
+                "hg38_strand": line_items[9],
+                "hg38_start": float(line_items[10])/1000000,
+                "hg38_end": float(line_items[11])/1000000
+            }
+
+            if str(this_dict["mm10_chr"]) == str(chr_name) and this_dict["mm10_start"]>= start_mb and this_dict["mm10_end"] <= end_mb:
+                homology_list.append(this_dict)
+
+    return homology_list
+
 def loadGenes(chrName, diffCol, startMb, endMb, species='mouse'):
     assembly_map = {
         "mouse": "mm10",
author	zsloan	2022-12-21 18:54:04 +0000
committer	zsloan	2022-12-21 13:04:07 -0600
commit	331650d671d3cce44e248f90dcf7bd3776bb0384 (patch)
tree	f7fd475d1e625b1e746f2a32e5ca5f0fe6ada1ac /wqflask
parent	7e4ac66bf55f222f87174178e0348e9c40a05b56 (diff)
download	genenetwork2-331650d671d3cce44e248f90dcf7bd3776bb0384.tar.gz