From 57dc1ef7a63f8c05e6d4369dbcd8eb0e51f40a64 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Sun, 19 May 2019 17:40:47 -0500 Subject: UI changes --- ratspub.py | 88 ---------------------------------------- server.py | 12 +++--- static/ratspub.png | Bin 0 -> 258462 bytes static/style.css | 4 +- templates/about.html | 10 ++--- templates/index.html | 32 ++++++++++++--- templates/layout.html | 2 +- templates/topAddictionGene.html | 2 +- 8 files changed, 41 insertions(+), 109 deletions(-) delete mode 100755 ratspub.py create mode 100644 static/ratspub.png diff --git a/ratspub.py b/ratspub.py deleted file mode 100755 index 0cc5d8a..0000000 --- a/ratspub.py +++ /dev/null @@ -1,88 +0,0 @@ -#!/bin/env python3 -from nltk.tokenize import sent_tokenize -import os -import re -from ratspub_keywords import * - -global function_d, brain_d, drug_d, addiction_d, brain_query_term, pubmed_path - - -## turn dictionary (synonyms) to regular expression -def undic(dic): - return "|".join(dic.values()) - -def findWholeWord(w): - return re.compile(r'\b({0})\b'.format(w), flags=re.IGNORECASE).search - -def getSentences(query, gene): - abstracts = os.popen("esearch -db pubmed -query " + query + " | efetch -format uid |fetch-pubmed -path "+ pubmed_path + " | xtract -pattern PubmedArticle -element MedlineCitation/PMID,ArticleTitle,AbstractText|sed \"s/-/ /g\"").read() - out=str() - for row in abstracts.split("\n"): - tiab=row.split("\t") - pmid = tiab.pop(0) - tiab= " ".join(tiab) - sentences = sent_tokenize(tiab) - ## keep the sentence only if it contains the gene - for sent in sentences: - if findWholeWord(gene)(sent): - sent=re.sub(r'\b(%s)\b' % gene, r'\1', sent, flags=re.I) - out+=pmid+"\t"+sent+"\n" - return(out) - -def gene_category(gene, cat_d, query, cat): - #e.g. BDNF, addiction_d, undic(addiction_d) "addiction" - q="\"(" + query.replace("|", " OR ") + ") AND " + gene + "\"" - sents=getSentences(q, gene) - out=str() - for sent in sents.split("\n"): - for key in cat_d: - if findWholeWord(cat_d[key])(sent) : - sent=sent.replace("","").replace("","") # remove other highlights - sent=re.sub(r'\b(%s)\b' % cat_d[key], r'\1', sent, flags=re.I) # highlight keyword - out+=gene+"\t"+ cat + "\t"+key+"\t"+sent+"\n" - return(out) - -def generate_nodes(nodes_d, nodetype): - # include all search terms even if there are no edges, just to show negative result - json0 =str() - for node in nodes_d: - json0 += "{ data: { id: '" + node + "', nodecolor: '" + nodecolor[nodetype] + "', nodetype: '"+nodetype + "', url:'/shownode?nodetype=" + nodetype + "&node="+node+"' } },\n" - return(json0) - -def generate_edges(data, filename): - json0=str() - edgeCnts={} - for line in data.split("\n"): - if len(line.strip())!=0: - (source, cat, target, pmid, sent) = line.split("\t") - edgeID=filename+"|"+source+"|"+target - if edgeID in edgeCnts: - edgeCnts[edgeID]+=1 - else: - edgeCnts[edgeID]=1 - for edgeID in edgeCnts: - (filename, source,target)=edgeID.split("|") - json0+="{ data: { id: '" + edgeID + "', source: '" + source + "', target: '" + target + "', sentCnt: " + str(edgeCnts[edgeID]) + ", url:'/sentences?edgeID=" + edgeID + "' } },\n" - return(json0) - -# brain region has too many short acronyms to just use the undic function, so search PubMed using the following -brain_query_term="cortex|accumbens|striatum|amygadala|hippocampus|tegmental|mesolimbic|infralimbic|prelimbic|habenula" -function=undic(function_d) -addiction=undic(addiction_d) -drug=undic(drug_d) - -nodecolor={'function':"#A9CCE3", 'addiction': "#D7BDE2", 'drug': "#F9E79F", 'brain':"#A3E4D7"} -#https://htmlcolorcodes.com/ -n0=generate_nodes(function_d, 'function') -n1=generate_nodes(addiction_d, 'addiction') -n2=generate_nodes(drug_d, 'drug') -n3=generate_nodes(brain_d, 'brain') -default_nodes=n0+n1+n2+n3 - - -host= os.popen('hostname').read().strip() -if host=="x1": - pubmed_path="/run/media/hao/PubMed/Archive/" -elif host=="hchen3": - pubmed_path="/media/hao/2d554499-6c5b-462d-85f3-5c49b25f4ac8/PubMed/Archive" - diff --git a/server.py b/server.py index d947e1f..3dc7f9c 100755 --- a/server.py +++ b/server.py @@ -25,8 +25,8 @@ def progress(): genes=genes.replace(",", " ") genes=genes.replace(";", " ") genes=genes.split() - if len(genes)>=160: - message="Up to five terms can be searched at a time" + if len(genes)>=100: + message="Up to 100 terms can be searched at a time" return render_template('index.html', message=message) elif len(genes)==0: message="Please enter a search term " @@ -100,14 +100,14 @@ def search(): @app.route('/cytoscape') def cytoscape(): - message2="This graph is interactive:
" + message2="
" with open(session['path']+"_cy","r") as f: elements=f.read() with open(session['path']+"_0link","r") as z: zeroLink=z.read() if (len(zeroLink)>0): message2+="No result was found for these genes: " + zeroLink + "" - return render_template('cytoscape.html', elements=elements, message="Gene vs Keywords", message2=message2) + return render_template('cytoscape.html', elements=elements, message2=message2) @app.route("/sentences") def sentences(): @@ -194,11 +194,11 @@ def gene_gene(): with open(gg_file, "w+") as gg: gg.write(out) gg.close() - results="
RatsPub searches PubMed to find abstracts containing genes of interest and a list of curated addiction-related keywords. The abstracts corresponding to these returned PMIDs are then retrieve from a local archive of the PubMed. No limit on the date of publication is set. Each abstract is then broken down into sentences, which are then filtered by genes and keywords. +
RatsPub searches PubMed to find abstracts containing genes of interest and a list of curated addiction-related keywords. The abstracts corresponding to these returned PMIDs are then retrieve from a local archive of the PubMed. No limit on the date of publication is set. Each abstract is then broken down into sentences, which are then filtered by gene names and keywords. -
These gene-keyword relationships are presented as an interactive cytoscape graph. +
Clicking the links will bring up the corresponding sentences in a new browser window. Clicking the keywords will bring up the correponding search terms. Clicking the genes will start a new search to find sentences containing the target gene and the top 200 addiction genes. -
Clicking the links will bring up the corresponding sentences in a new browser window. Clicking the keyword nodes will bring up all the terms included. Clicking on the gene nodes will start a new search to find sentences containing that gene and the top 200 addiction genes. - -
The top 200 addiction genes were obtained by searching All human genes against addiction related keywords. The archived results for these top addiction genes can be accessed via the Addiction Genes link. +
The top 200 addiction genes were obtained by searching 29,761 human genes against addiction related keywords. To ensure comprehensive coverage, gene alias obtained from NCBI gene database were included in the search. The results were extensively curated to remove those alias that matched words that were not gene name or wrong genes. Some incorrect results remained because the same name also produced correct results. The archived results for these top addiction genes can be accessed via the Addiction Genes link.
+ + RatsPub searches PubMed to find sentences that contain the query terms (i.e., gene symbols) and drug addiction-related keywords. These gene-keyword relationships are presented as an interactive graph that can efficiently answer the question "What do we know about these genes and addiction?" + + In addition, clicking gene names in the graph will launch a new search for sentences containing the target gene and 200 addiction-related genes. These results help to answer the question "Are there genes that can link my gene of interest to addiction?" + |
+
+
+ |
+ + Up to 100 gene symbols can be searched at a time. Gene symbols can be separated by either a space or a semicolon. Gene alias will not be automatically included because the large number of false matches associated with gene synonyms retrieved from databases. + |
+ Example: Rgma Nrxn3; Chrna3
+
+