From e87aa54962f20d93e3cea18f425bb3f820768e63 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Tue, 21 May 2019 16:25:17 -0500 Subject: progress bar for gene_topGene search --- server.py | 132 ++++++++++++++++++++++++++++++------------------ templates/progress.html | 12 ++--- 2 files changed, 89 insertions(+), 55 deletions(-) diff --git a/server.py b/server.py index a54dab2..24476c6 100755 --- a/server.py +++ b/server.py @@ -36,7 +36,7 @@ def progress(): # generate a unique session ID to track the results tf_path=tempfile.gettempdir() session['path']=tf_path+"/tmp" + ''.join(random.choice(string.ascii_letters) for x in range(6)) - return render_template('progress.html') + return render_template('progress.html', url_in="search", url_out="cytoscape") @app.route("/search") def search(): @@ -81,9 +81,10 @@ def search(): # gwas e4=searchArchived('gwas', gene) geneEdges=e0+e1+e2+e3+e4 + ## there is a bug here. zero link notes are not excluded anymore if len(geneEdges) >1: edges+=geneEdges - nodes+="{ data: { id: '" + gene + "', nodecolor:'#E74C3C', fontweight:700, url:'/gene_gene?gene="+gene+"'} },\n" + nodes+="{ data: { id: '" + gene + "', nodecolor:'#E74C3C', fontweight:700, url:'/startGeneGene?forTopGene="+gene+"'} },\n" else: nodesToHide+=gene + " " sentences+=sent0+sent1+sent2+sent3 @@ -140,56 +141,89 @@ def shownode(): out="
"+node.upper()+"
") -@app.route("/gene_gene") +@app.route("/startGeneGene") +def startGeneGene(): + session['forTopGene']=request.args.get('forTopGene') + return render_template('progress.html', url_in="searchGeneGene", url_out="showGeneTopGene") + +@app.route("/searchGeneGene") def gene_gene(): - query=request.args.get("gene") tmp_ggPMID=session['path']+"_ggPMID" - os.system("esearch -db pubmed -query \"" + query + "\" | efetch -format uid |sort >" + tmp_ggPMID) - abstracts=os.popen("comm -1 -2 topGene_uniq.pmid " + tmp_ggPMID + " |fetch-pubmed -path "+pubmed_path+ " | xtract -pattern PubmedArticle -element MedlineCitation/PMID,ArticleTitle,AbstractText|sed \"s/-/ /g\"").read() - os.system("rm "+tmp_ggPMID) - topGenes=dict() - out=str() - hitGenes=dict() - with open("./topGene_symb_alias.txt", "r") as top_f: - for line in top_f: - (symb, alias)=line.strip().split("\t") - topGenes[symb]=alias.replace("; ","|") - for row in abstracts.split("\n"): - tiab=row.split("\t") - pmid = tiab.pop(0) - tiab= " ".join(tiab) - sentences = sent_tokenize(tiab) - ## keep the sentence only if it contains the gene - for sent in sentences: - if findWholeWord(query)(sent): - sent=re.sub(r'\b(%s)\b' % query, r'\1', sent, flags=re.I) - for symb in topGenes: - allNames=symb+"|"+topGenes[symb] - if findWholeWord(allNames)(sent) : - sent=sent.replace("","").replace("","") - sent=re.sub(r'\b(%s)\b' % allNames, r'\1', sent, flags=re.I) - out+=query+"\t"+"gene\t" + symb+"\t"+pmid+"\t"+sent+"\n" - if symb in hitGenes.keys(): - hitGenes[symb]+=1 - else: - hitGenes[symb]=1 gg_file=session['path']+"_ggSent" #gene_gene - with open(gg_file, "w+") as gg: - gg.write(out) - gg.close() - results="
")
+ result_file=session['path']+"_ggResult"
+ def generate(query):
+ progress=1
+ yield "data:"+str(progress)+"\n\n"
+ os.system("esearch -db pubmed -query \"" + query + "\" | efetch -format uid |sort >" + tmp_ggPMID)
+ abstracts=os.popen("comm -1 -2 topGene_uniq.pmid " + tmp_ggPMID + " |fetch-pubmed -path "+pubmed_path+ " | xtract -pattern PubmedArticle -element MedlineCitation/PMID,ArticleTitle,AbstractText|sed \"s/-/ /g\"").read()
+ os.system("rm "+tmp_ggPMID)
+ progress=10
+ yield "data:"+str(progress)+"\n\n"
+ topGenes=dict()
+ out=str()
+ hitGenes=dict()
+ with open("topGene_symb_alias.txt", "r") as top_f:
+ for line in top_f:
+ (symb, alias)=line.strip().split("\t")
+ topGenes[symb]=alias.replace("; ","|")
+ allAbstracts= abstracts.split("\n")
+ abstractCnt=len(allAbstracts)
+ rowCnt=0
+ for row in allAbstracts:
+ rowCnt+=1
+ if rowCnt/10==int(rowCnt/10):
+ progress=10+round(rowCnt/abstractCnt,2)*80
+ yield "data:"+str(progress)+"\n\n"
+ tiab=row.split("\t")
+ pmid = tiab.pop(0)
+ tiab= " ".join(tiab)
+ sentences = sent_tokenize(tiab)
+ ## keep the sentence only if it contains the gene
+ for sent in sentences:
+ if findWholeWord(query)(sent):
+ sent=re.sub(r'\b(%s)\b' % query, r'\1', sent, flags=re.I)
+ for symb in topGenes:
+ allNames=symb+"|"+topGenes[symb]
+ if findWholeWord(allNames)(sent) :
+ sent=sent.replace("","").replace("","")
+ sent=re.sub(r'\b(%s)\b' % allNames, r'\1', sent, flags=re.I)
+ out+=query+"\t"+"gene\t" + symb+"\t"+pmid+"\t"+sent+"\n"
+ if symb in hitGenes.keys():
+ hitGenes[symb]+=1
+ else:
+ hitGenes[symb]=1
+ progress=95
+ yield "data:"+str(progress)+"\n\n"
+ with open(gg_file, "w+") as gg:
+ gg.write(out)
+ gg.close()
+ results="
")
+
## generate a page that lists all the top 150 addiction genes with links to cytoscape graph.
@app.route("/allTopGenes")
diff --git a/templates/progress.html b/templates/progress.html
index e881a90..ff9a65f 100644
--- a/templates/progress.html
+++ b/templates/progress.html
@@ -6,12 +6,12 @@
- We hope this won't take too long ...
+ I am handling all your work while you are watching this progress bar ...