#!/bin/env python3 from flask import Flask, render_template, request, session, Response import tempfile import random import string from ratspub import * import time import os app=Flask(__name__) app.config['SECRET_KEY'] = '#DtfrL98G5t1dC*4' @app.route("/") def root(): return render_template('index.html') @app.route("/about") def about(): return render_template('about.html') @app.route('/progress') def progress(): # only 1-6 terms are allowed genes=request.args.get('query') genes=genes.replace(",", " ") genes=genes.replace(";", " ") genes=genes.split() if len(genes)>=160: message="Up to five terms can be searched at a time" return render_template('index.html', message=message) elif len(genes)==0: message="Please enter a search term " return render_template('index.html', message=message) # put the query in session cookie session['query']=genes # generate a unique session ID to track the results tf_path=tempfile.gettempdir() session['path']=tf_path+"/tmp" + ''.join(random.choice(string.ascii_letters) for x in range(6)) return render_template('progress.html') @app.route("/search") def search(): genes=session['query'] percent=round(100/(len(genes)*4),1) snt_file=session['path']+"_snt" cysdata=open(session['path']+"_cy","w+") sntdata=open(snt_file,"w+") zeroLinkNode=open(session['path']+"_0link","w+") def generate(genes, tf_name): sentences=str() edges=str() nodes=default_nodes progress=0 searchCnt=0 nodesToHide=str() for gene in genes: gene=gene.replace("-"," ") # report progress immediately progress+=percent yield "data:"+str(progress)+"\n\n" #addiction terms must present with at least one drug addiction=undic(addiction_d) +") AND ("+undic(drug_d) sent0=gene_category(gene, addiction_d, addiction, "addiction") e0=generate_edges(sent0, tf_name) # function=undic(function_d) sent1=gene_category(gene, function_d, function, "function") progress+=percent yield "data:"+str(progress)+"\n\n" e1=generate_edges(sent1, tf_name) # drug=undic(drug_d) sent2=gene_category(gene, drug_d, drug, "drug") progress+=percent yield "data:"+str(progress)+"\n\n" e2=generate_edges(sent2, tf_name) # brain has its own query terms that does not include the many short acronyms sent3=gene_category(gene, brain_d, brain_query_term, "brain") progress+=percent e3=generate_edges(sent3, tf_name) geneEdges=e0+e1+e2+e3 if len(geneEdges) >1: edges+=geneEdges nodes+="{ data: { id: '" + gene + "', nodecolor:'#E74C3C', fontweight:700, url:'/gene_gene?gene="+gene+"'} },\n" else: nodesToHide+=gene + " " sentences+=sent0+sent1+sent2+sent3 #save data before the last yield searchCnt+=1 if (searchCnt==len(genes)): progress=100 sntdata.write(sentences) sntdata.close() cysdata.write(nodes+edges) cysdata.close() zeroLinkNode.write(nodesToHide) zeroLinkNode.close() yield "data:"+str(progress)+"\n\n" return Response(generate(genes, snt_file), mimetype='text/event-stream') @app.route('/cytoscape') def cytoscape(): message2="This graph is interactive:
  • Click on a line to see the sentences in a new window
  • Click on a gene to search its relations with top 150 addiction genes
  • Click on a keyword to see the terms included in the search in a new window

    " with open(session['path']+"_cy","r") as f: elements=f.read() with open(session['path']+"_0link","r") as z: zeroLink=z.read() if (len(zeroLink)>0): message2+="No result was found for these genes: " + zeroLink + "" return render_template('cytoscape.html', elements=elements, message="Gene vs Keywords", message2=message2) @app.route("/sentences") def sentences(): edge=request.args.get('edgeID') (tf_name, gene0, cat0)=edge.split("|") out="

    "+gene0 + " and " + cat0 + "


    \n" with open(tf_name, "r") as df: all_sents=df.read() for sent in all_sents.split("\n"): if len(sent.strip())!=0: (gene,nouse,cat, pmid, text)=sent.split("\t") if (gene == gene0 and cat == cat0) : out+= "
  • "+ text + " PMID:"+pmid+"
    " return render_template('sentences.html', sentences="
      "+out+"

    ") ## show the cytoscape graph for one gene from the top gene list @app.route("/showTopGene") def showTopGene(): topGene=request.args.get('topGene') topGeneSentFile="gene_addiction_sentences.tab" with open(topGeneSentFile, "r") as sents: catCnt={} for sent in sents: (symb, cat0, cat1, pmid, sent)=sent.split("\t") if (symb == topGene) : if cat1 in catCnt.keys(): catCnt[cat1]+=1 else: catCnt[cat1]=1 nodes= "{ data: { id: '" + topGene + "', nodecolor: '" + "#2471A3" + "', fontweight:700, url:'/progress?query="+topGene+"' } },\n" edges=str() for key in catCnt.keys(): if ( key in drug_d.keys()): nc=nodecolor["drug"] else: nc=nodecolor["addiction"] nodes += "{ data: { id: '" + key + "', nodecolor: '" + nc + "', nodetype: 'top150', url:'/shownode?node="+key+"' } },\n" edgeID=topGeneSentFile+"|"+topGene+"|"+key edges+="{ data: { id: '" + edgeID+ "', source: '" + topGene + "', target: '" + key + "', sentCnt: " + str(catCnt[key]) + ", url:'/sentences?edgeID=" + edgeID + "' } },\n" message2="

  • "+topGene + " is one of the top addiction genes.
  • An archived search is shown. Click on the blue circle to update the results and include keywords for brain region and gene function. The update may take a long time to finish. " return render_template("cytoscape.html", elements=nodes+edges, message="Top addiction genes", message2=message2) @app.route("/shownode") def shownode(): node=request.args.get('node') allnodes={**brain_d, **drug_d, **function_d, **addiction_d} out="

    "+node.upper()+"


  • "+ allnodes[node].replace("|", "
  • ") return render_template('sentences.html', sentences=out+"

    ") @app.route("/gene_gene") def gene_gene(): query=request.args.get("gene") tmp_ggPMID=session['path']+"_ggPMID" os.system("esearch -db pubmed -query \"" + query + "\" | efetch -format uid |sort >" + tmp_ggPMID) abstracts=os.popen("comm -1 -2 top_150_addiction_genes_uniq.pmid " + tmp_ggPMID + " |fetch-pubmed -path "+pubmed_path+ " | xtract -pattern PubmedArticle -element MedlineCitation/PMID,ArticleTitle,AbstractText|sed \"s/-/ /g\"").read() os.system("rm "+tmp_ggPMID) topGenes=dict() out=str() hitGenes=dict() with open("./top_150_genes_symb_alias.txt", "r") as top_f: for line in top_f: (symb, alias)=line.strip().split("\t") topGenes[symb]=alias for row in abstracts.split("\n"): tiab=row.split("\t") pmid = tiab.pop(0) tiab= " ".join(tiab) sentences = sent_tokenize(tiab) ## keep the sentence only if it contains the gene for sent in sentences: if findWholeWord(query)(sent): sent=re.sub(r'\b(%s)\b' % query, r'\1', sent, flags=re.I) for symb in topGenes: if findWholeWord(topGenes[symb])(sent) : sent=sent.replace("","").replace("","") sent=re.sub(r'\b(%s)\b' % topGenes[symb], r'\1', sent, flags=re.I) out+=query+"\t"+"gene\t" + symb+"\t"+pmid+"\t"+sent+"\n" if symb in hitGenes.keys(): hitGenes[symb]+=1 else: hitGenes[symb]=1 gg_file=session['path']+"_ggSent" #gene_gene with open(gg_file, "w+") as gg: gg.write(out) gg.close() nodecolor={'query':"#E74C3C", 'top150': "#ccd1d1"} nodes= "{ data: { id: '" + query + "', nodecolor: '" + nodecolor['query'] + "', nodetype: 'ggquery', fontweight:700} },\n" edges=str() for key in hitGenes.keys(): #nodes += "{ data: { id: '" + key + "', nodecolor: '" + nodecolor['top150'] + "', nodetype: 'top150', fontcolor:'#F2D7D5', url:'/shownode?nodetype=top150&node="+key+"' } },\n" nodes += "{ data: { id: '" + key + "', nodecolor: '" + nodecolor['top150'] + "', nodetype: 'top150', url:'/showTopGene?topGene="+key+"' } },\n" edgeID=gg_file+"|"+query+"|"+key edges+="{ data: { id: '" + edgeID+ "', source: '" + query + "', target: '" + key + "', sentCnt: " + str(hitGenes[key]) + ", url:'/sentences?edgeID=" + edgeID + "' } },\n" return render_template("cytoscape.html", elements=nodes+edges, message="Gene vs top addiction genes", message2="

  • Top addiction genes are shown in grey.
  • Click on a grey gene will show an archived search.
  • Click on the lines will show gene-gene relations in a new window") ## generate a page that lists all the top 150 addiction genes with links to cytoscape graph. @app.route("/allTopGenes") def top150genes(): return render_template("topAddictionGene.html") if __name__ == '__main__': app.run(debug=True)