From 8ec2340063fef684268f11ea5aa631a9457459be Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Sun, 19 May 2019 12:16:32 -0500 Subject: gene v top gene search change from graph to table --- ratspub_keywords.py | 2 +- server.py | 32 +++++++++++++++++--------------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/ratspub_keywords.py b/ratspub_keywords.py index baeb5a0..0c7d0dd 100644 --- a/ratspub_keywords.py +++ b/ratspub_keywords.py @@ -21,7 +21,7 @@ brain_d ={"cortex":"cortex|prefrontal|pfc|mPFC|vmpfc|corticostriatal|cortico lim "amygdala":"amygdala|cea|bla|amy|cna", "VTA":"ventral tegmental|vta|pvta|mesolimbic|limbic|midbrain|mesoaccumbens|mesoaccumbal", "habenula":"habenula|lhb|mhb", - "hypothalamus":"hypothalamus|hypothalamic|PVN|paraventricular nucleus" + "hypothalamus":"hypothalamus|hypothalamic|PVN|paraventricular nucleus|LHA" } # brain region has too many short acronyms to just use the undic function, so search PubMed using the following brain_query_term="cortex|accumbens|striatum|amygadala|hippocampus|tegmental|mesolimbic|infralimbic|prelimbic|habenula" diff --git a/server.py b/server.py index 0a97413..d947e1f 100755 --- a/server.py +++ b/server.py @@ -100,7 +100,7 @@ def search(): @app.route('/cytoscape') def cytoscape(): - message2="This graph is interactive:
  • Click on a line to see the sentences in a new window
  • Click on a gene to search its relations with top 150 addiction genes
  • Click on a keyword to see the terms included in the search in a new window

    " + message2="This graph is interactive:

  • Click on a line to see the sentences in a new window
  • Click on a gene to search its relations with top 200 addiction genes
  • Click on a keyword to see the terms included in the search in a new window

    " with open(session['path']+"_cy","r") as f: elements=f.read() with open(session['path']+"_0link","r") as z: @@ -127,7 +127,7 @@ def sentences(): @app.route("/showTopGene") def showTopGene(): topGene=request.args.get('topGene') - topGeneSentFile="gene_addiction_sentences.tab" + topGeneSentFile="topGene_addiction_sentences.tab" with open(topGeneSentFile, "r") as sents: catCnt={} for sent in sents: @@ -162,15 +162,15 @@ def gene_gene(): query=request.args.get("gene") tmp_ggPMID=session['path']+"_ggPMID" os.system("esearch -db pubmed -query \"" + query + "\" | efetch -format uid |sort >" + tmp_ggPMID) - abstracts=os.popen("comm -1 -2 top_150_addiction_genes_uniq.pmid " + tmp_ggPMID + " |fetch-pubmed -path "+pubmed_path+ " | xtract -pattern PubmedArticle -element MedlineCitation/PMID,ArticleTitle,AbstractText|sed \"s/-/ /g\"").read() + abstracts=os.popen("comm -1 -2 topGene_uniq.pmid " + tmp_ggPMID + " |fetch-pubmed -path "+pubmed_path+ " | xtract -pattern PubmedArticle -element MedlineCitation/PMID,ArticleTitle,AbstractText|sed \"s/-/ /g\"").read() os.system("rm "+tmp_ggPMID) topGenes=dict() out=str() hitGenes=dict() - with open("./top_150_genes_symb_alias.txt", "r") as top_f: + with open("./topGene_symb_alias.txt", "r") as top_f: for line in top_f: (symb, alias)=line.strip().split("\t") - topGenes[symb]=alias + topGenes[symb]=alias.replace("; ","|") for row in abstracts.split("\n"): tiab=row.split("\t") pmid = tiab.pop(0) @@ -181,9 +181,10 @@ def gene_gene(): if findWholeWord(query)(sent): sent=re.sub(r'\b(%s)\b' % query, r'\1', sent, flags=re.I) for symb in topGenes: - if findWholeWord(topGenes[symb])(sent) : + allNames=symb+"|"+topGenes[symb] + if findWholeWord(allNames)(sent) : sent=sent.replace("","").replace("","") - sent=re.sub(r'\b(%s)\b' % topGenes[symb], r'\1', sent, flags=re.I) + sent=re.sub(r'\b(%s)\b' % allNames, r'\1', sent, flags=re.I) out+=query+"\t"+"gene\t" + symb+"\t"+pmid+"\t"+sent+"\n" if symb in hitGenes.keys(): hitGenes[symb]+=1 @@ -193,15 +194,16 @@ def gene_gene(): with open(gg_file, "w+") as gg: gg.write(out) gg.close() - nodecolor={'query':"#E74C3C", 'top150': "#ccd1d1"} - nodes= "{ data: { id: '" + query + "', nodecolor: '" + nodecolor['query'] + "', nodetype: 'ggquery', fontweight:700} },\n" - edges=str() + results="

    Gene vs top addiction genes

    Click on the number of sentences will show those sentences. Click on the top addiction gene will show an archived search for that gene.
    " + topGeneHits={} for key in hitGenes.keys(): - #nodes += "{ data: { id: '" + key + "', nodecolor: '" + nodecolor['top150'] + "', nodetype: 'top150', fontcolor:'#F2D7D5', url:'/shownode?nodetype=top150&node="+key+"' } },\n" - nodes += "{ data: { id: '" + key + "', nodecolor: '" + nodecolor['top150'] + "', nodetype: 'top150', url:'/showTopGene?topGene="+key+"' } },\n" - edgeID=gg_file+"|"+query+"|"+key - edges+="{ data: { id: '" + edgeID+ "', source: '" + query + "', target: '" + key + "', sentCnt: " + str(hitGenes[key]) + ", url:'/sentences?edgeID=" + edgeID + "' } },\n" - return render_template("cytoscape.html", elements=nodes+edges, message="Gene vs top addiction genes", message2="
  • Top addiction genes are shown in grey.
  • Click on a grey gene will show an archived search.
  • Click on the lines will show gene-gene relations in a new window") + url=gg_file+"|"+query+"|"+key + topGeneHits["
  • "+query+" and "+key+" : " + str(hitGenes[key]) + " sentences. \n"]=hitGenes[key] + #yyps = [(k, d[k]) for k in sorted(d, key=d.get, reverse=True)] + topSorted = [(k, topGeneHits[k]) for k in sorted(topGeneHits, key=topGeneHits.get, reverse=True)] + for k,v in topSorted: + results+=k + return render_template("sentences.html", sentences=results) ## generate a page that lists all the top 150 addiction genes with links to cytoscape graph. @app.route("/allTopGenes") -- cgit v1.2.3