import re ## generate the html page for the top genes ## put gene names and alias in a dictionary geneNames={} with open ("./ncbi_gene_symb_syno_name_txid9606_absCnt_sorted_absCnt_sorted.txt","r") as f: for line in f: (genes, count)=line.strip().split("\t") gene=genes.split("|") geneNames[gene[0]]=genes.strip() out=str() html=str() with open("./topGeneAbstractCount.tab" ,"r") as gc: cnt=0 for line in gc: cnt+=1 line=re.sub(r'^\s+','',line) print (line) pmid_cnt, symb=line.strip().split() out+= symb+"\t"+geneNames[symb]+"\n" html+="