From 57dc1ef7a63f8c05e6d4369dbcd8eb0e51f40a64 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Sun, 19 May 2019 17:40:47 -0500 Subject: UI changes --- ratspub.py | 88 ---------------------------------------- server.py | 12 +++--- static/ratspub.png | Bin 0 -> 258462 bytes static/style.css | 4 +- templates/about.html | 10 ++--- templates/index.html | 32 ++++++++++++--- templates/layout.html | 2 +- templates/topAddictionGene.html | 2 +- 8 files changed, 41 insertions(+), 109 deletions(-) delete mode 100755 ratspub.py create mode 100644 static/ratspub.png diff --git a/ratspub.py b/ratspub.py deleted file mode 100755 index 0cc5d8a..0000000 --- a/ratspub.py +++ /dev/null @@ -1,88 +0,0 @@ -#!/bin/env python3 -from nltk.tokenize import sent_tokenize -import os -import re -from ratspub_keywords import * - -global function_d, brain_d, drug_d, addiction_d, brain_query_term, pubmed_path - - -## turn dictionary (synonyms) to regular expression -def undic(dic): - return "|".join(dic.values()) - -def findWholeWord(w): - return re.compile(r'\b({0})\b'.format(w), flags=re.IGNORECASE).search - -def getSentences(query, gene): - abstracts = os.popen("esearch -db pubmed -query " + query + " | efetch -format uid |fetch-pubmed -path "+ pubmed_path + " | xtract -pattern PubmedArticle -element MedlineCitation/PMID,ArticleTitle,AbstractText|sed \"s/-/ /g\"").read() - out=str() - for row in abstracts.split("\n"): - tiab=row.split("\t") - pmid = tiab.pop(0) - tiab= " ".join(tiab) - sentences = sent_tokenize(tiab) - ## keep the sentence only if it contains the gene - for sent in sentences: - if findWholeWord(gene)(sent): - sent=re.sub(r'\b(%s)\b' % gene, r'\1', sent, flags=re.I) - out+=pmid+"\t"+sent+"\n" - return(out) - -def gene_category(gene, cat_d, query, cat): - #e.g. BDNF, addiction_d, undic(addiction_d) "addiction" - q="\"(" + query.replace("|", " OR ") + ") AND " + gene + "\"" - sents=getSentences(q, gene) - out=str() - for sent in sents.split("\n"): - for key in cat_d: - if findWholeWord(cat_d[key])(sent) : - sent=sent.replace("","").replace("","") # remove other highlights - sent=re.sub(r'\b(%s)\b' % cat_d[key], r'\1', sent, flags=re.I) # highlight keyword - out+=gene+"\t"+ cat + "\t"+key+"\t"+sent+"\n" - return(out) - -def generate_nodes(nodes_d, nodetype): - # include all search terms even if there are no edges, just to show negative result - json0 =str() - for node in nodes_d: - json0 += "{ data: { id: '" + node + "', nodecolor: '" + nodecolor[nodetype] + "', nodetype: '"+nodetype + "', url:'/shownode?nodetype=" + nodetype + "&node="+node+"' } },\n" - return(json0) - -def generate_edges(data, filename): - json0=str() - edgeCnts={} - for line in data.split("\n"): - if len(line.strip())!=0: - (source, cat, target, pmid, sent) = line.split("\t") - edgeID=filename+"|"+source+"|"+target - if edgeID in edgeCnts: - edgeCnts[edgeID]+=1 - else: - edgeCnts[edgeID]=1 - for edgeID in edgeCnts: - (filename, source,target)=edgeID.split("|") - json0+="{ data: { id: '" + edgeID + "', source: '" + source + "', target: '" + target + "', sentCnt: " + str(edgeCnts[edgeID]) + ", url:'/sentences?edgeID=" + edgeID + "' } },\n" - return(json0) - -# brain region has too many short acronyms to just use the undic function, so search PubMed using the following -brain_query_term="cortex|accumbens|striatum|amygadala|hippocampus|tegmental|mesolimbic|infralimbic|prelimbic|habenula" -function=undic(function_d) -addiction=undic(addiction_d) -drug=undic(drug_d) - -nodecolor={'function':"#A9CCE3", 'addiction': "#D7BDE2", 'drug': "#F9E79F", 'brain':"#A3E4D7"} -#https://htmlcolorcodes.com/ -n0=generate_nodes(function_d, 'function') -n1=generate_nodes(addiction_d, 'addiction') -n2=generate_nodes(drug_d, 'drug') -n3=generate_nodes(brain_d, 'brain') -default_nodes=n0+n1+n2+n3 - - -host= os.popen('hostname').read().strip() -if host=="x1": - pubmed_path="/run/media/hao/PubMed/Archive/" -elif host=="hchen3": - pubmed_path="/media/hao/2d554499-6c5b-462d-85f3-5c49b25f4ac8/PubMed/Archive" - diff --git a/server.py b/server.py index d947e1f..3dc7f9c 100755 --- a/server.py +++ b/server.py @@ -25,8 +25,8 @@ def progress(): genes=genes.replace(",", " ") genes=genes.replace(";", " ") genes=genes.split() - if len(genes)>=160: - message="Up to five terms can be searched at a time" + if len(genes)>=100: + message="Up to 100 terms can be searched at a time" return render_template('index.html', message=message) elif len(genes)==0: message="Please enter a search term " @@ -100,14 +100,14 @@ def search(): @app.route('/cytoscape') def cytoscape(): - message2="This graph is interactive:
  • Click on a line to see the sentences in a new window
  • Click on a gene to search its relations with top 200 addiction genes
  • Click on a keyword to see the terms included in the search in a new window

    " + message2="

    Gene vs Keywords

    This graph is interactive:
  • Click on a line to see the sentences in a new window
  • Click on a gene to search its relations with top 200 addiction genes
  • Click on a keyword to see the terms included in the search in a new window

    " with open(session['path']+"_cy","r") as f: elements=f.read() with open(session['path']+"_0link","r") as z: zeroLink=z.read() if (len(zeroLink)>0): message2+="No result was found for these genes: " + zeroLink + "" - return render_template('cytoscape.html', elements=elements, message="Gene vs Keywords", message2=message2) + return render_template('cytoscape.html', elements=elements, message2=message2) @app.route("/sentences") def sentences(): @@ -194,11 +194,11 @@ def gene_gene(): with open(gg_file, "w+") as gg: gg.write(out) gg.close() - results="

    Gene vs top addiction genes

    Click on the number of sentences will show those sentences. Click on the top addiction gene will show an archived search for that gene.
    " + results="

    Gene vs top addiction genes

    Click on the number of sentences will show those sentences. Click on the top addiction genes will show an archived search for that gene.
    " topGeneHits={} for key in hitGenes.keys(): url=gg_file+"|"+query+"|"+key - topGeneHits["
  • "+query+" and "+key+" : " + str(hitGenes[key]) + " sentences. \n"]=hitGenes[key] + topGeneHits["
  • "+query+" and "+key+" : " + str(hitGenes[key]) + " sentences. \n"]=hitGenes[key] #yyps = [(k, d[k]) for k in sorted(d, key=d.get, reverse=True)] topSorted = [(k, topGeneHits[k]) for k in sorted(topGeneHits, key=topGeneHits.get, reverse=True)] for k,v in topSorted: diff --git a/static/ratspub.png b/static/ratspub.png new file mode 100644 index 0000000..a031bc0 Binary files /dev/null and b/static/ratspub.png differ diff --git a/static/style.css b/static/style.css index f57b27c..aa1bbd2 100644 --- a/static/style.css +++ b/static/style.css @@ -52,4 +52,6 @@ a:active { width: 20%; } - +#searchform{ + background:#F8F9F9; +} diff --git a/templates/about.html b/templates/about.html index 4ea4112..ea5e4c7 100644 --- a/templates/about.html +++ b/templates/about.html @@ -2,15 +2,13 @@ {% extends "layout.html" %} {% block content %} -

    What does RatsPub do?

    +

    About RatsPub ...

    -

    RatsPub searches PubMed to find abstracts containing genes of interest and a list of curated addiction-related keywords. The abstracts corresponding to these returned PMIDs are then retrieve from a local archive of the PubMed. No limit on the date of publication is set. Each abstract is then broken down into sentences, which are then filtered by genes and keywords. +

    RatsPub searches PubMed to find abstracts containing genes of interest and a list of curated addiction-related keywords. The abstracts corresponding to these returned PMIDs are then retrieve from a local archive of the PubMed. No limit on the date of publication is set. Each abstract is then broken down into sentences, which are then filtered by gene names and keywords. -

    These gene-keyword relationships are presented as an interactive cytoscape graph. +

    Clicking the links will bring up the corresponding sentences in a new browser window. Clicking the keywords will bring up the correponding search terms. Clicking the genes will start a new search to find sentences containing the target gene and the top 200 addiction genes. -

    Clicking the links will bring up the corresponding sentences in a new browser window. Clicking the keyword nodes will bring up all the terms included. Clicking on the gene nodes will start a new search to find sentences containing that gene and the top 200 addiction genes. - -

    The top 200 addiction genes were obtained by searching All human genes against addiction related keywords. The archived results for these top addiction genes can be accessed via the Addiction Genes link. +

    The top 200 addiction genes were obtained by searching 29,761 human genes against addiction related keywords. To ensure comprehensive coverage, gene alias obtained from NCBI gene database were included in the search. The results were extensively curated to remove those alias that matched words that were not gene name or wrong genes. Some incorrect results remained because the same name also produced correct results. The archived results for these top addiction genes can be accessed via the Addiction Genes link.


    Source code diff --git a/templates/index.html b/templates/index.html index b94853e..f5b6aea 100644 --- a/templates/index.html +++ b/templates/index.html @@ -4,16 +4,36 @@

    Relationship with Addiction Through Searches of PubMed

    - - This app searches PubMed to find sentences that contain the query terms (e.g., gene symbols) and keywords related to drug addiction. -
    + + + +
    +

    + RatsPub searches PubMed to find sentences that contain the query terms (i.e., gene symbols) and drug addiction-related keywords. These gene-keyword relationships are presented as an interactive graph that can efficiently answer the question "What do we know about these genes and addiction?" + +

    In addition, clicking gene names in the graph will launch a new search for sentences containing the target gene and 200 addiction-related genes. These results help to answer the question "Are there genes that can link my gene of interest to addiction?" +

    +
    +
    + + Up to 100 gene symbols can be searched at a time. Gene symbols can be separated by either a space or a semicolon. Gene alias will not be automatically included because the large number of false matches associated with gene synonyms retrieved from databases. +
    +

    + Example: Rgma Nrxn3; Chrna3 + +

    - - -
    + +
    +
    + +
    +
    + {% endblock %} diff --git a/templates/layout.html b/templates/layout.html index 08977c3..6a85473 100644 --- a/templates/layout.html +++ b/templates/layout.html @@ -22,7 +22,7 @@