aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHao Chen2019-05-20 05:47:50 -0500
committerHao Chen2019-05-20 05:47:50 -0500
commitd87ecb28847fc6c3d8b93673f1f0371b77ee22ee (patch)
tree8d1404e617fcffc7c4264cf6db3459e790829c46
parentd883f2e9e6151d0bc5c1e25924253221312b4959 (diff)
downloadgenecup-d87ecb28847fc6c3d8b93673f1f0371b77ee22ee.tar.gz
add EBI GWAS
-rw-r--r--process_gwas.py48
-rwxr-xr-xratspub.py32
-rwxr-xr-xserver.py33
-rw-r--r--templates/index.html4
-rw-r--r--templates/progress.html6
5 files changed, 92 insertions, 31 deletions
diff --git a/process_gwas.py b/process_gwas.py
new file mode 100644
index 0000000..eba59c0
--- /dev/null
+++ b/process_gwas.py
@@ -0,0 +1,48 @@
+import re
+
+with open("./addiction_gwas.tsv", "r") as f:
+ for line in f:
+ try:
+ (pmid, trait0, gene0, gene1, snp, pval, trait1)=line.strip().split("\t")
+ except:
+ next
+ key1="unassigned"
+ key2="unassigned"
+ trait=trait0+"; "+trait1
+ genes=gene0+";"+gene1
+ if re.search('cocaine', trait, flags=re.I):
+ key1="addiction"
+ key2="cocaine"
+ elif re.search('smoking|congestive|nicotine', trait, flags=re.I):
+ key1="addiction"
+ key2="nicotine"
+ elif re.search('opioid|morphin|heroin|methadone', trait, flags=re.I):
+ key1="addiction"
+ key2="opioid"
+ elif re.search('amphetam', trait, flags=re.I):
+ key1="addiction"
+ key2="amphetamine"
+ elif re.search('canabis', trait, flags=re.I):
+ key1="addiction"
+ key2="canabis"
+ elif re.search('food', trait, flags=re.I):
+ key1="addiction"
+ key2="food"
+ elif re.search('alcohol', trait, flags=re.I):
+ key1="addiction"
+ key2="alcohol"
+ elif re.search('addiction|abuse', trait, flags=re.I):
+ key1="addiction"
+ key2="addiction"
+ else:
+ key1="behavior"
+ key2="psychiatric"
+ genes=genes.replace(" - ", ";")
+ genes=genes.replace(",", ";")
+ printed=dict()
+ for gene in genes.split(";"):
+ gene=gene.replace(" ","")
+ if gene !="NR" and gene not in printed:
+ text="SNP:<b>"+snp+"</b>, P value: <b>"+pval+"</b>, Disease/trait:<b> "+trait0+"</b>, Mapped trait:<b> "+trait1+"</b>"
+ print (gene+"\t"+"GWAS"+"\t"+key2+"_GWAS\t"+pmid+"\t"+text)
+ printed[gene]=1
diff --git a/ratspub.py b/ratspub.py
index 0cc5d8a..e9a911b 100755
--- a/ratspub.py
+++ b/ratspub.py
@@ -65,13 +65,43 @@ def generate_edges(data, filename):
json0+="{ data: { id: '" + edgeID + "', source: '" + source + "', target: '" + target + "', sentCnt: " + str(edgeCnts[edgeID]) + ", url:'/sentences?edgeID=" + edgeID + "' } },\n"
return(json0)
+def searchArchived(sets, query):
+ if sets=='topGene':
+ dataFile="topGene_addiction_sentences.tab"
+ nodes= "{ data: { id: '" + query + "', nodecolor: '" + "#2471A3" + "', fontweight:700, url:'/progress?query="+query+"' } },\n"
+ elif sets=='gwas':
+ dataFile="gwas_addiction.tab"
+ nodes=str()
+ with open(dataFile, "r") as sents:
+ catCnt={}
+ for sent in sents:
+ (symb, cat0, cat1, pmid, sent)=sent.split("\t")
+ if (symb.upper() == query.upper()) :
+ if cat1 in catCnt.keys():
+ catCnt[cat1]+=1
+ else:
+ catCnt[cat1]=1
+ nodes= "{ data: { id: '" + query + "', nodecolor: '" + "#2471A3" + "', fontweight:700, url:'/progress?query="+query+"' } },\n"
+ edges=str()
+ for key in catCnt.keys():
+ if sets=='gwas':
+ nc=nodecolor["gwas"]
+ elif key in drug_d.keys():
+ nc=nodecolor["drug"]
+ else:
+ nc=nodecolor["addiction"]
+ nodes += "{ data: { id: '" + key + "', nodecolor: '" + nc + "', nodetype: 'top150', url:'/shownode?node="+key+"' } },\n"
+ edgeID=dataFile+"|"+query+"|"+key
+ edges+="{ data: { id: '" + edgeID+ "', source: '" + query + "', target: '" + key + "', sentCnt: " + str(catCnt[key]) + ", url:'/sentences?edgeID=" + edgeID + "' } },\n"
+ return(nodes+edges)
+
# brain region has too many short acronyms to just use the undic function, so search PubMed using the following
brain_query_term="cortex|accumbens|striatum|amygadala|hippocampus|tegmental|mesolimbic|infralimbic|prelimbic|habenula"
function=undic(function_d)
addiction=undic(addiction_d)
drug=undic(drug_d)
-nodecolor={'function':"#A9CCE3", 'addiction': "#D7BDE2", 'drug': "#F9E79F", 'brain':"#A3E4D7"}
+nodecolor={'function':"#A9CCE3", 'addiction': "#D7BDE2", 'drug': "#F9E79F", 'brain':"#A3E4D7", 'gwas':"#AEB6BF"}
#https://htmlcolorcodes.com/
n0=generate_nodes(function_d, 'function')
n1=generate_nodes(addiction_d, 'addiction')
diff --git a/server.py b/server.py
index 62a959e..7bdd3f2 100755
--- a/server.py
+++ b/server.py
@@ -78,7 +78,9 @@ def search():
sent3=gene_category(gene, brain_d, brain_query_term, "brain")
progress+=percent
e3=generate_edges(sent3, tf_name)
- geneEdges=e0+e1+e2+e3
+ # gwas
+ e4=searchArchived('gwas', gene)
+ geneEdges=e0+e1+e2+e3+e4
if len(geneEdges) >1:
edges+=geneEdges
nodes+="{ data: { id: '" + gene + "', nodecolor:'#E74C3C', fontweight:700, url:'/gene_gene?gene="+gene+"'} },\n"
@@ -119,36 +121,17 @@ def sentences():
for sent in all_sents.split("\n"):
if len(sent.strip())!=0:
(gene,nouse,cat, pmid, text)=sent.split("\t")
- if (gene == gene0 and cat == cat0) :
+ if (gene.upper() == gene0.upper() and cat.upper() == cat0.upper()) :
out+= "<li> "+ text + " <a href=\"https://www.ncbi.nlm.nih.gov/pubmed/?term=" + pmid +"\" target=_new>PMID:"+pmid+"<br></a>"
return render_template('sentences.html', sentences="<ol>"+out+"</ol><p>")
## show the cytoscape graph for one gene from the top gene list
@app.route("/showTopGene")
def showTopGene():
- topGene=request.args.get('topGene')
- topGeneSentFile="topGene_addiction_sentences.tab"
- with open(topGeneSentFile, "r") as sents:
- catCnt={}
- for sent in sents:
- (symb, cat0, cat1, pmid, sent)=sent.split("\t")
- if (symb == topGene) :
- if cat1 in catCnt.keys():
- catCnt[cat1]+=1
- else:
- catCnt[cat1]=1
- nodes= "{ data: { id: '" + topGene + "', nodecolor: '" + "#2471A3" + "', fontweight:700, url:'/progress?query="+topGene+"' } },\n"
- edges=str()
- for key in catCnt.keys():
- if ( key in drug_d.keys()):
- nc=nodecolor["drug"]
- else:
- nc=nodecolor["addiction"]
- nodes += "{ data: { id: '" + key + "', nodecolor: '" + nc + "', nodetype: 'top150', url:'/shownode?node="+key+"' } },\n"
- edgeID=topGeneSentFile+"|"+topGene+"|"+key
- edges+="{ data: { id: '" + edgeID+ "', source: '" + topGene + "', target: '" + key + "', sentCnt: " + str(catCnt[key]) + ", url:'/sentences?edgeID=" + edgeID + "' } },\n"
- message2="<li><strong>"+topGene + "</strong> is one of the top addiction genes. <li> An archived search is shown. Click on the blue circle to update the results and include keywords for brain region and gene function. <strong> The update may take a long time to finish.</strong> "
- return render_template("cytoscape.html", elements=nodes+edges, message="Top addiction genes", message2=message2)
+ query=request.args.get('topGene')
+ nodesEdges=searchArchived('topGene',query)
+ message2="<li><strong>"+query + "</strong> is one of the top addiction genes. <li> An archived search is shown. Click on the blue circle to update the results and include keywords for brain region and gene function. <strong> The update may take a long time to finish.</strong> "
+ return render_template("cytoscape.html", elements=nodesEdges, message="Top addiction genes", message2=message2)
@app.route("/shownode")
def shownode():
diff --git a/templates/index.html b/templates/index.html
index f5b6aea..bbdebfa 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -8,9 +8,9 @@
<table>
<tr><td >
<p>
- RatsPub searches PubMed to find <i>sentences</i> that contain the query terms (i.e., gene symbols) and <a href="https://github.com/chen42/RatsPub/blob/master/ratspub_keywords.py">drug addiction-related keywords</a>. These gene-keyword relationships are presented as an interactive graph that can efficiently answer the question <b>"What do we know about these genes and addiction?" </b>
+ RatsPub searches PubMed to find <i>sentences</i> that contain the query terms (i.e., gene symbols) and <a href="https://github.com/chen42/RatsPub/blob/master/ratspub_keywords.py">drug addiction-related keywords</a>. These gene-keyword relationships are presented as an interactive graph that can efficiently answer the question <b>"What do we know about these genes and addiction?". </b> To better answer this question, data from <a href="https://www.ebi.ac.uk/gwas/">EBI GWAS catalog </a>are also included in the search.
-<p> In addition, clicking gene names in the graph will launch a new search for sentences containing the target gene and 200 addiction-related genes. These results help to answer the question <b>"Are there genes that can link my gene of interest to addiction?" </b>
+<p> The graph has many interactive elements. For example, clicking gene names will launch a new search for sentences containing the target gene and 200 addiction-related genes. These results help to answer the question <b>"Are there genes that can link my gene of interest to addiction?" </b>
</td>
<td >
<div class='img'><img src="/static/ratspub.png" class="img-fluid">
diff --git a/templates/progress.html b/templates/progress.html
index 081f1d9..e881a90 100644
--- a/templates/progress.html
+++ b/templates/progress.html
@@ -6,9 +6,9 @@
<script>
-var r = Math.floor(Math.random() * Math.floor(125));
-var g = Math.floor(Math.random() * Math.floor(125));
-var b = Math.floor(Math.random() * Math.floor(125));
+var r = Math.floor(Math.random() * Math.floor(100));
+var g = Math.floor(Math.random() * Math.floor(100));
+var b = Math.floor(Math.random() * Math.floor(100));
document.body.style.backgroundColor = 'rgba('+r+','+g+','+b+',0.1)'
var source = new EventSource("/search");