#!/bin/env python3 from flask import Flask, render_template, request, session, Response import tempfile import random import string from ratspub import * import time import os app=Flask(__name__) app.config['SECRET_KEY'] = '#DtfrL98G5t1dC*4' @app.route("/") def root(): return render_template('index.html') @app.route("/about") def about(): return render_template('about.html') @app.route('/progress') def progress(): # only 1-6 terms are allowed genes=request.args.get('query') genes=genes.replace(",", " ") genes=genes.replace(";", " ") genes=genes.split() if len(genes)>=100: message="Up to 100 terms can be searched at a time" return render_template('index.html', message=message) elif len(genes)==0: message="Please enter a search term " return render_template('index.html', message=message) # put the query in session cookie session['query']=genes # generate a unique session ID to track the results tf_path=tempfile.gettempdir() session['path']=tf_path+"/tmp" + ''.join(random.choice(string.ascii_letters) for x in range(6)) return render_template('progress.html', url_in="search", url_out="cytoscape") @app.route("/search") def search(): genes=session['query'] percent=round(100/(len(genes)*4),1) snt_file=session['path']+"_snt" cysdata=open(session['path']+"_cy","w+") sntdata=open(snt_file,"w+") zeroLinkNode=open(session['path']+"_0link","w+") def generate(genes, tf_name): sentences=str() edges=str() nodes=default_nodes progress=0 searchCnt=0 nodesToHide=str() for gene in genes: gene=gene.replace("-"," ") # report progress immediately progress+=percent yield "data:"+str(progress)+"\n\n" #addiction terms must present with at least one drug addiction=undic(addiction_d) +") AND ("+undic(drug_d) sent0=gene_category(gene, addiction_d, addiction, "addiction") e0=generate_edges(sent0, tf_name) # function=undic(function_d) sent1=gene_category(gene, function_d, function, "function") progress+=percent yield "data:"+str(progress)+"\n\n" e1=generate_edges(sent1, tf_name) # drug=undic(drug_d) sent2=gene_category(gene, drug_d, drug, "drug") progress+=percent yield "data:"+str(progress)+"\n\n" e2=generate_edges(sent2, tf_name) # brain has its own query terms that does not include the many short acronyms sent3=gene_category(gene, brain_d, brain_query_term, "brain") progress+=percent e3=generate_edges(sent3, tf_name) # gwas e4=searchArchived('gwas', gene) geneEdges=e0+e1+e2+e3+e4 ## there is a bug here. zero link notes are not excluded anymore if len(geneEdges) >1: edges+=geneEdges nodes+="{ data: { id: '" + gene + "', nodecolor:'#E74C3C', fontweight:700, url:'/startGeneGene?forTopGene="+gene+"'} },\n" else: nodesToHide+=gene + " " sentences+=sent0+sent1+sent2+sent3 #save data before the last yield searchCnt+=1 if (searchCnt==len(genes)): progress=100 sntdata.write(sentences) sntdata.close() cysdata.write(nodes+edges) cysdata.close() zeroLinkNode.write(nodesToHide) zeroLinkNode.close() yield "data:"+str(progress)+"\n\n" return Response(generate(genes, snt_file), mimetype='text/event-stream') @app.route('/cytoscape') def cytoscape(): message2=" This Genes vs Keywords graph is interactive:
" with open(session['path']+"_cy","r") as f: elements=f.read() with open(session['path']+"_0link","r") as z: zeroLink=z.read() if (len(zeroLink)>0): message2+="No result was found for these genes: " + zeroLink + "" return render_template('cytoscape.html', elements=elements, message2=message2) @app.route("/sentences") def sentences(): edge=request.args.get('edgeID') (tf_name, gene0, cat0)=edge.split("|") out="
") ## show the cytoscape graph for one gene from the top gene list @app.route("/showTopGene") def showTopGene(): query=request.args.get('topGene') nodesEdges=searchArchived('topGene',query) message2="
"+node.upper()+"
") @app.route("/startGeneGene") def startGeneGene(): session['forTopGene']=request.args.get('forTopGene') return render_template('progress.html', url_in="searchGeneGene", url_out="showGeneTopGene") @app.route("/searchGeneGene") def gene_gene(): tmp_ggPMID=session['path']+"_ggPMID" gg_file=session['path']+"_ggSent" #gene_gene result_file=session['path']+"_ggResult" def generate(query): progress=1 yield "data:"+str(progress)+"\n\n" os.system("esearch -db pubmed -query \"" + query + "\" | efetch -format uid |sort >" + tmp_ggPMID) abstracts=os.popen("comm -1 -2 topGene_uniq.pmid " + tmp_ggPMID + " |fetch-pubmed -path "+pubmed_path+ " | xtract -pattern PubmedArticle -element MedlineCitation/PMID,ArticleTitle,AbstractText|sed \"s/-/ /g\"").read() os.system("rm "+tmp_ggPMID) progress=10 yield "data:"+str(progress)+"\n\n" topGenes=dict() out=str() hitGenes=dict() with open("topGene_symb_alias.txt", "r") as top_f: for line in top_f: (symb, alias)=line.strip().split("\t") topGenes[symb]=alias.replace("; ","|") allAbstracts= abstracts.split("\n") abstractCnt=len(allAbstracts) rowCnt=0 for row in allAbstracts: rowCnt+=1 if rowCnt/10==int(rowCnt/10): progress=10+round(rowCnt/abstractCnt,2)*80 yield "data:"+str(progress)+"\n\n" tiab=row.split("\t") pmid = tiab.pop(0) tiab= " ".join(tiab) sentences = sent_tokenize(tiab) ## keep the sentence only if it contains the gene for sent in sentences: if findWholeWord(query)(sent): sent=re.sub(r'\b(%s)\b' % query, r'\1', sent, flags=re.I) for symb in topGenes: allNames=symb+"|"+topGenes[symb] if findWholeWord(allNames)(sent) : sent=sent.replace("","").replace("","") sent=re.sub(r'\b(%s)\b' % allNames, r'\1', sent, flags=re.I) out+=query+"\t"+"gene\t" + symb+"\t"+pmid+"\t"+sent+"\n" if symb in hitGenes.keys(): hitGenes[symb]+=1 else: hitGenes[symb]=1 progress=95 yield "data:"+str(progress)+"\n\n" with open(gg_file, "w+") as gg: gg.write(out) gg.close() results="
")
## generate a page that lists all the top 150 addiction genes with links to cytoscape graph.
@app.route("/allTopGenes")
def top150genes():
return render_template("topAddictionGene.html")
if __name__ == '__main__':
app.run(debug=True)