import re import sys ## generate the html page for the top genes ## put gene names and alias in a dictionary #ncbi_gene_symb_syno_name_txid9606_absCnt_sorted_absCnt_sorted_absCnt_absCnt_sorted.txt if (len(sys.argv) != 2): print ("please provide the name of a sorted gene abstract count file") sys.exit() geneNames={} with open (sys.argv[1],"r") as f: for line in f: (genes, count)=line.strip().split("\t") gene=genes.split("|") names=re.sub(r'^.*?\|', "", genes) geneNames[gene[0]]=names.strip().replace("|", "; ") out=str() html=str() with open("./topGeneAbstractCount.tab" ,"r") as gc: cnt=0 for line in gc: cnt+=1 line=re.sub(r'^\s+','',line) print (line) pmid_cnt, symb=line.strip().split() out+= symb+"\t"+geneNames[symb]+"\n" html+="
  • "+symb+" ("+geneNames[symb]+")
    \n" if cnt==200: break with open("topGene_symb_alias.txt", "w+") as tg: tg.write(out) tg.close() htmlout=''' {% extends "layout.html" %} {% block content %}

    Top addiction related genes


    These genes are ranked by the number of PubMed abstracts that contain the name of the gene and one or more addiction related keyword.
      ''' + html + '''
    {% endblock %} ''' with open("./templates/topAddictionGene.html", "w+") as html_f: html_f.write(htmlout) html_f.close()