From c997194555fec42b1e879f5d702710d1941b6345 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Mon, 13 May 2019 06:06:13 -0500 Subject: agaist top 150 addiction genes --- server.py | 50 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) (limited to 'server.py') diff --git a/server.py b/server.py index 3c124d3..286f071 100755 --- a/server.py +++ b/server.py @@ -5,6 +5,7 @@ import random import string from ratspub import * import time +import os app=Flask(__name__) app.config['SECRET_KEY'] = '#DtfrL98G5t1dC*4' @@ -46,7 +47,7 @@ def search(): nodes=default_nodes progress=0 for gene in genes: - nodes+="{ data: { id: '" + gene + "', nodecolor:'#FADBD8', fontweight:700, url:'https://www.ncbi.nlm.nih.gov/gene/?term="+gene+"'} },\n" + nodes+="{ data: { id: '" + gene + "', nodecolor:'#E74C3C', fontweight:700, url:'/gene_gene?gene="+gene+"'} },\n" progress+=percent yield "data:"+str(progress)+"\n\n" sent0=gene_addiction(gene) @@ -97,5 +98,52 @@ def shownode(): out="

"+node.upper()+"


  • "+ allnodes[node].replace("|", "
  • ") return render_template('sentences.html', sentences=out+"

    ") +@app.route("/gene_gene") +def gene_gene(): + query=request.args.get("gene") + tmp_ggPMID=session['path']+"_ggPMID" + os.system("esearch -db pubmed -query \"" + query + "\" | efetch -format uid |sort >" + tmp_ggPMID) + abstracts=os.popen("comm -1 -2 top_150_addiction_genes_uniq.pmid " + tmp_ggPMID + " |fetch-pubmed -path /run/media/hao/PubMed/Archive/ | xtract -pattern PubmedArticle -element MedlineCitation/PMID,ArticleTitle,AbstractText|sed \"s/-/ /g\"").read() + os.system("rm "+tmp_ggPMID) + topGenes=dict() + out=str() + hitGenes=dict() + with open("./top_150_genes_symb_alias.txt", "r") as top_f: + for line in top_f: + (symb, alias)=line.strip().split("\t") + topGenes[symb]=alias + for row in abstracts.split("\n"): + tiab=row.split("\t") + pmid = tiab.pop(0) + tiab= " ".join(tiab) + sentences = sent_tokenize(tiab) + ## keep the sentence only if it contains the gene + for sent in sentences: + if findWholeWord(query)(sent): + sent=re.sub(r'\b(%s)\b' % query, r'\1', sent, flags=re.I) + for symb in topGenes: + if findWholeWord(topGenes[symb])(sent) : + sent=sent.replace("","").replace("","") + sent=re.sub(r'\b(%s)\b' % topGenes[symb], r'\1', sent, flags=re.I) + out+=query+"\t"+"gene\t" + symb+"\t"+pmid+"\t"+sent+"\n" + if symb in hitGenes.keys(): + hitGenes[symb]+=1 + else: + hitGenes[symb]=1 + gg_file=session['path']+"_ggSent" #gene_gene + with open(gg_file, "w+") as gg: + gg.write(out) + gg.close() + nodecolor={'query':"#E74C3C", 'top150': "#ccd1d1"} + nodes= "{ data: { id: '" + query + "', nodecolor: '" + nodecolor['query'] + "', nodetype: 'ggquery', fontweight:700, url:'/shownode?nodetype=ggquery&node="+query+"' } },\n" + edges=str() + for key in hitGenes.keys(): + #nodes += "{ data: { id: '" + key + "', nodecolor: '" + nodecolor['top150'] + "', nodetype: 'top150', fontcolor:'#F2D7D5', url:'/shownode?nodetype=top150&node="+key+"' } },\n" + nodes += "{ data: { id: '" + key + "', nodecolor: '" + nodecolor['top150'] + "', nodetype: 'top150', url:'/shownode?nodetype=top150&node="+key+"' } },\n" + edgeID=gg_file+"|"+query+"|"+key + edges+="{ data: { id: '" + edgeID+ "', source: '" + query + "', target: '" + key + "', sentCnt: " + str(hitGenes[key]) + ", url:'/sentences?edgeID=" + edgeID + "' } },\n" + return render_template("cytoscape.html", elements=nodes+edges) + + if __name__ == '__main__': app.run(debug=True) -- cgit v1.2.3