From eb51746ce69fb43450b6896e4a71e5052df51115 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Fri, 17 May 2019 19:10:08 -0500 Subject: find top genes --- topGene_step3_generate_html.py | 50 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100755 topGene_step3_generate_html.py (limited to 'topGene_step3_generate_html.py') diff --git a/topGene_step3_generate_html.py b/topGene_step3_generate_html.py new file mode 100755 index 0000000..dfcd6fe --- /dev/null +++ b/topGene_step3_generate_html.py @@ -0,0 +1,50 @@ +import re + +## generate the html page for the top genes + +## put gene names and alias in a dictionary +geneNames={} +with open ("./ncbi_gene_symb_syno_name_txid9606_absCnt_sorted_absCnt_sorted.txt","r") as f: + for line in f: + (genes, count)=line.strip().split("\t") + gene=genes.split("|") + geneNames[gene[0]]=genes.strip() + +out=str() +html=str() +with open("./topGeneAbstractCount.tab" ,"r") as gc: + cnt=0 + for line in gc: + cnt+=1 + line=re.sub(r'^\s+','',line) + print (line) + pmid_cnt, symb=line.strip().split() + out+= symb+"\t"+geneNames[symb]+"\n" + html+="
  • "+symb+"
    \n" + if cnt==500: + break + +with open("topGene_symb_alias.txt", "w+") as tg: + tg.write(out) + tg.close() + + +htmlout=''' +{% extends "layout.html" %} +{% block content %} + +

    Top addiction related genes

    + +
    +These genes are ranked by the number of PubMed abstracts that contain the name of the gene and one or more addiction related keyword. +
    + +
      ''' + html + ''' +
    +{% endblock %} +''' + +with open("./templates/topAddictionGene.html", "w+") as html_f: + html_f.write(htmlout) + html_f.close() + -- cgit v1.2.3