diff options
author | zsloan | 2022-12-21 18:54:04 +0000 |
---|---|---|
committer | zsloan | 2022-12-21 13:04:07 -0600 |
commit | 331650d671d3cce44e248f90dcf7bd3776bb0384 (patch) | |
tree | f7fd475d1e625b1e746f2a32e5ca5f0fe6ada1ac | |
parent | 7e4ac66bf55f222f87174178e0348e9c40a05b56 (diff) | |
download | genenetwork2-331650d671d3cce44e248f90dcf7bd3776bb0384.tar.gz |
Add code loading contents of homology file for given mb range
Currently this hard-codes the source file. Not sure how we want to store
this data yet so just using a CSV file for now.
-rw-r--r-- | wqflask/wqflask/interval_analyst/GeneUtil.py | 25 |
1 files changed, 23 insertions, 2 deletions
diff --git a/wqflask/wqflask/interval_analyst/GeneUtil.py b/wqflask/wqflask/interval_analyst/GeneUtil.py index 7a69126c..633cc9bf 100644 --- a/wqflask/wqflask/interval_analyst/GeneUtil.py +++ b/wqflask/wqflask/interval_analyst/GeneUtil.py @@ -2,8 +2,29 @@ import string from wqflask.database import database_connection -# Just return a list of dictionaries -# each dictionary contains sub-dictionary +mm10_to_hg38_file = "/home/zas1024/gn2-zach/mm10_hg38_homology/mm10ToHg38_chain_only_sorted.csv" + +def load_homology(chr_name, start_mb, end_mb): + homology_list = [] + with open(mm10_to_hg38_file) as h_file: + current_chr = 0 + for line in h_file: + line_items = line.split() + this_dict = { + "mm10_chr": line_items[2][3:], + "mm10_start": float(line_items[5])/1000000, + "mm10_end": float(line_items[6])/1000000, + "hg38_chr": line_items[7][3:], + "hg38_strand": line_items[9], + "hg38_start": float(line_items[10])/1000000, + "hg38_end": float(line_items[11])/1000000 + } + + if str(this_dict["mm10_chr"]) == str(chr_name) and this_dict["mm10_start"]>= start_mb and this_dict["mm10_end"] <= end_mb: + homology_list.append(this_dict) + + return homology_list + def loadGenes(chrName, diffCol, startMb, endMb, species='mouse'): assembly_map = { "mouse": "mm10", |