From d40664196495a76ce344d94bdf283b512004ce67 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Sun, 19 May 2019 12:17:50 -0500 Subject: search for top addiction genes --- topGene_step3_generate_html.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'topGene_step3_generate_html.py') diff --git a/topGene_step3_generate_html.py b/topGene_step3_generate_html.py index dfcd6fe..6755adb 100755 --- a/topGene_step3_generate_html.py +++ b/topGene_step3_generate_html.py @@ -1,14 +1,21 @@ import re +import sys ## generate the html page for the top genes ## put gene names and alias in a dictionary +#ncbi_gene_symb_syno_name_txid9606_absCnt_sorted_absCnt_sorted_absCnt_absCnt_sorted.txt +if (len(sys.argv) != 2): + print ("please provide the name of a sorted gene abstract count file") + sys.exit() + geneNames={} -with open ("./ncbi_gene_symb_syno_name_txid9606_absCnt_sorted_absCnt_sorted.txt","r") as f: +with open (sys.argv[1],"r") as f: for line in f: (genes, count)=line.strip().split("\t") gene=genes.split("|") - geneNames[gene[0]]=genes.strip() + names=re.sub(r'^.*?\|', "", genes) + geneNames[gene[0]]=names.strip().replace("|", "; ") out=str() html=str() @@ -20,8 +27,8 @@ with open("./topGeneAbstractCount.tab" ,"r") as gc: print (line) pmid_cnt, symb=line.strip().split() out+= symb+"\t"+geneNames[symb]+"\n" - html+="
  • "+symb+"
    \n" - if cnt==500: + html+="
  • "+symb+" ("+geneNames[symb]+")
    \n" + if cnt==200: break with open("topGene_symb_alias.txt", "w+") as tg: -- cgit v1.2.3