aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHao Chen2019-05-13 13:41:44 -0500
committerHao Chen2019-05-13 13:41:44 -0500
commit1a9b0800d3e95d9cd2a1ec7597dd793cf6b6c62a (patch)
treeebfdbfe3c8778b393accbcf29d3e276347edff58
parent4931c70884c7aa7bb3d4fba927597a2f89b4c9b0 (diff)
downloadgenecup-1a9b0800d3e95d9cd2a1ec7597dd793cf6b6c62a.tar.gz
standardized the search function
-rwxr-xr-xratspub.py49
-rwxr-xr-xserver.py26
2 files changed, 28 insertions, 47 deletions
diff --git a/ratspub.py b/ratspub.py
index 79abc87..c82c9b8 100755
--- a/ratspub.py
+++ b/ratspub.py
@@ -3,7 +3,7 @@ from nltk.tokenize import sent_tokenize
import os
import re
-global function_d, brain_d, drug_d, addiction_d
+global function_d, brain_d, drug_d, addiction_d, brain_query_term
## turn dictionary (synonyms) to regular expression
def undic(dic):
@@ -27,46 +27,17 @@ def getSentences(query, gene):
out+=pmid+"\t"+sent+"\n"
return(out)
-def gene_addiction(gene):
- # search gene name & drug name in the context of addiction terms (i.e., exclude etoh affects cancer, or methods to extract cocaine)
- q="\"(" + addiction.replace("|", " OR ") + ") AND (" + drug.replace("|", " OR ", ) + ") AND " + gene + "\""
+def gene_category(gene, cat_d, query, cat):
+ #e.g. BDNF, addiction_d, undic(addiction_d) "addiction"
+ q="\"(" + query.replace("|", " OR ") + ") AND " + gene + "\""
sents=getSentences(q, gene)
out=str()
for sent in sents.split("\n"):
- for drug0 in drug_d:
- if findWholeWord(drug_d[drug0])(sent) :
- sent=sent.replace("<b>","").replace("</b>","")
- sent=re.sub(r'\b(%s)\b' % drug_d[drug0], r'<b>\1</b>', sent, flags=re.I)
- out+=gene+"\t"+"drug\t" + drug0+"\t"+sent+"\n"
- for add0 in addiction_d:
- if findWholeWord(addiction_d[add0])(sent) :
- sent=sent.replace("<b>","").replace("</b>","")
- sent=re.sub(r'\b(%s)\b' % addiction_d[add0], r'<b>\1</b>', sent, flags=re.I)
- out+=gene+"\t"+"addiction\t"+add0+"\t"+sent+"\n"
- return(out)
-
-def gene_anatomical(gene):
- q="\"(" + brain.replace("|", " OR ") + ") AND " + gene + "\""
- sents=getSentences(q,gene)
- out=str()
- for sent in sents.split("\n"):
- for brain0 in brain_d:
- if findWholeWord(brain_d[brain0])(sent) :
- sent=sent.replace("<b>","").replace("</b>","")
- sent=re.sub(r'\b(%s)\b' % brain_d[brain0], r'<b>\1</b>', sent, flags=re.I)
- out+=gene+"\t"+"brain\t"+brain0+"\t"+sent+"\n"
- return(out)
-
-def gene_functional(gene):
- q="\"(" + function.replace("|", " OR ") + ") AND " + gene + "\""
- sents=getSentences(q,gene)
- out=str()
- for sent in sents.split("\n"):
- for bio0 in function_d:
- if findWholeWord(function_d[bio0])(sent) :
- sent=sent.replace("<b>","").replace("</b>","")
- sent=re.sub(r'\b(%s)\b' % function_d[bio0], r'<b>\1</b>', sent, flags=re.I)
- out+=gene+"\t"+"function\t"+bio0+"\t"+sent+"\n"
+ for key in cat_d:
+ if findWholeWord(cat_d[key])(sent) :
+ sent=sent.replace("<b>","").replace("</b>","") # remove other highlights
+ sent=re.sub(r'\b(%s)\b' % cat_d[key], r'<b>\1</b>', sent, flags=re.I) # highlight keyword
+ out+=gene+"\t"+ cat + "\t"+key+"\t"+sent+"\n"
return(out)
def generate_nodes(nodes_d, nodetype):
@@ -117,7 +88,7 @@ brain_d ={"cortex":"cortex|prefrontal|pfc|mPFC|vmpfc|corticostriatal|cortico lim
"vta":"ventral tegmental|vta|pvta|mesolimbic|limbic|midbrain|mesoaccumbens"
}
# brain region has too many short acronyms to just use the undic function, so search PubMed using the following
-brain="cortex|accumbens|striatum|amygadala|hippocampus|tegmental|mesolimbic|infralimbic|prelimbic"
+brain_query_term="cortex|accumbens|striatum|amygadala|hippocampus|tegmental|mesolimbic|infralimbic|prelimbic"
function_d={"signalling":"signalling|signaling|phosphorylation|glycosylation",
"transcription":"transcription|methylation|hypomethylation|hypermethylation|histone|ribosome",
"neuroplasticity":"neuroplasticity|plasticity|long term potentiation|LTP|long term depression|LTD|synaptic|epsp|epsc|neurite|neurogenesis|boutons|mIPSC|IPSC|IPSP",
diff --git a/server.py b/server.py
index 2cff990..b71e6bb 100755
--- a/server.py
+++ b/server.py
@@ -18,8 +18,6 @@ def root():
def about():
return render_template('about.html')
-
-
@app.route('/progress')
def progress():
# only 1-6 terms are allowed
@@ -43,7 +41,7 @@ def progress():
@app.route("/search")
def search():
genes=session['query']
- percent=round(100/(len(genes)*3),1)
+ percent=round(100/(len(genes)*4),1)
snt_file=session['path']+"_snt"
cysdata=open(session['path']+"_cy","w+")
sntdata=open(snt_file,"w+")
@@ -53,20 +51,32 @@ def search():
nodes=default_nodes
progress=0
for gene in genes:
+ gene=gene.replace("-"," ")
nodes+="{ data: { id: '" + gene + "', nodecolor:'#E74C3C', fontweight:700, url:'/gene_gene?gene="+gene+"'} },\n"
+ # report progress immediately
progress+=percent
yield "data:"+str(progress)+"\n\n"
- sent0=gene_addiction(gene)
+ addiction=undic(addiction_d)
+ sent0=gene_category(gene, addiction_d, addiction, "addiction")
e0=generate_edges(sent0, tf_name)
- sent1=gene_functional(gene)
+ #
+ function=undic(function_d)
+ sent1=gene_category(gene, function_d, function, "function")
progress+=percent
yield "data:"+str(progress)+"\n\n"
e1=generate_edges(sent1, tf_name)
- sent2=gene_anatomical(gene)
+ #
+ drug=undic(drug_d)
+ sent2=gene_category(gene, drug_d, drug, "drug")
progress+=percent
+ yield "data:"+str(progress)+"\n\n"
e2=generate_edges(sent2, tf_name)
- edges+=e0+e1+e2
- sentences+=sent0+sent1+sent2
+ # brain has its own query terms that does not include the many short acronyms
+ sent3=gene_category(gene, brain_d, brain_query_term, "brain")
+ progress+=percent
+ e3=generate_edges(sent3, tf_name)
+ edges+=e0+e1+e2+e3
+ sentences+=sent0+sent1+sent2+sent3
#save data before the last yield
if (progress>99):
progress=100