import re import sys ## generate the html page for the top genes ## put gene names and alias in a dictionary #ncbi_gene_symb_syno_name_txid9606_absCnt_sorted_absCnt_sorted_absCnt_absCnt_sorted.txt if (len(sys.argv) != 2): print ("please provide the name of a sorted gene abstract count file") sys.exit() geneNames={} with open (sys.argv[1],"r") as f: for line in f: (genes, count)=line.strip().split("\t") gene=genes.split("|") names=re.sub(r'^.*?\|', "", genes) geneNames[gene[0]]=names.strip().replace("|", "; ") out=str() html=str() with open("./topGeneAbstractCount.tab" ,"r") as gc: cnt=0 for line in gc: cnt+=1 line=re.sub(r'^\s+','',line) print (line) pmid_cnt, symb=line.strip().split() out+= symb+"\t"+geneNames[symb]+"\n" html+="