From eb51746ce69fb43450b6896e4a71e5052df51115 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Fri, 17 May 2019 19:10:08 -0500 Subject: find top genes --- topGene_step3_generate_html.py | 50 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100755 topGene_step3_generate_html.py (limited to 'topGene_step3_generate_html.py') diff --git a/topGene_step3_generate_html.py b/topGene_step3_generate_html.py new file mode 100755 index 0000000..dfcd6fe --- /dev/null +++ b/topGene_step3_generate_html.py @@ -0,0 +1,50 @@ +import re + +## generate the html page for the top genes + +## put gene names and alias in a dictionary +geneNames={} +with open ("./ncbi_gene_symb_syno_name_txid9606_absCnt_sorted_absCnt_sorted.txt","r") as f: + for line in f: + (genes, count)=line.strip().split("\t") + gene=genes.split("|") + geneNames[gene[0]]=genes.strip() + +out=str() +html=str() +with open("./topGeneAbstractCount.tab" ,"r") as gc: + cnt=0 + for line in gc: + cnt+=1 + line=re.sub(r'^\s+','',line) + print (line) + pmid_cnt, symb=line.strip().split() + out+= symb+"\t"+geneNames[symb]+"\n" + html+="