about summary refs log tree commit diff
path: root/ratspub.py
diff options
context:
space:
mode:
authorHao Chen2019-05-13 13:41:44 -0500
committerHao Chen2019-05-13 13:41:44 -0500
commit1a9b0800d3e95d9cd2a1ec7597dd793cf6b6c62a (patch)
treeebfdbfe3c8778b393accbcf29d3e276347edff58 /ratspub.py
parent4931c70884c7aa7bb3d4fba927597a2f89b4c9b0 (diff)
downloadgenecup-1a9b0800d3e95d9cd2a1ec7597dd793cf6b6c62a.tar.gz
standardized the search function
Diffstat (limited to 'ratspub.py')
-rwxr-xr-xratspub.py49
1 files changed, 10 insertions, 39 deletions
diff --git a/ratspub.py b/ratspub.py
index 79abc87..c82c9b8 100755
--- a/ratspub.py
+++ b/ratspub.py
@@ -3,7 +3,7 @@ from nltk.tokenize import sent_tokenize
 import os
 import re
 
-global function_d, brain_d, drug_d, addiction_d
+global function_d, brain_d, drug_d, addiction_d, brain_query_term
 
 ## turn dictionary (synonyms) to regular expression
 def undic(dic):
@@ -27,46 +27,17 @@ def getSentences(query, gene):
                 out+=pmid+"\t"+sent+"\n"
     return(out)
 
-def gene_addiction(gene):
-    # search gene name & drug name  in the context of addiction terms (i.e., exclude etoh affects cancer, or methods to extract cocaine) 
-    q="\"(" + addiction.replace("|", " OR ")  + ") AND (" + drug.replace("|", " OR ", ) + ") AND " + gene + "\""
+def gene_category(gene, cat_d, query, cat):
+    #e.g. BDNF, addiction_d, undic(addiction_d) "addiction"
+    q="\"(" + query.replace("|", " OR ")  + ") AND " + gene + "\""
     sents=getSentences(q, gene)
     out=str()
     for sent in sents.split("\n"):
-        for drug0 in drug_d:
-            if findWholeWord(drug_d[drug0])(sent) :
-                sent=sent.replace("<b>","").replace("</b>","")
-                sent=re.sub(r'\b(%s)\b' % drug_d[drug0], r'<b>\1</b>', sent, flags=re.I)
-                out+=gene+"\t"+"drug\t" + drug0+"\t"+sent+"\n"
-        for add0 in addiction_d:
-            if findWholeWord(addiction_d[add0])(sent) :
-                sent=sent.replace("<b>","").replace("</b>","")
-                sent=re.sub(r'\b(%s)\b' % addiction_d[add0], r'<b>\1</b>', sent, flags=re.I)
-                out+=gene+"\t"+"addiction\t"+add0+"\t"+sent+"\n"
-    return(out)
-
-def gene_anatomical(gene):
-    q="\"(" + brain.replace("|", " OR ")  + ") AND " + gene + "\""
-    sents=getSentences(q,gene)
-    out=str()
-    for sent in sents.split("\n"):
-        for brain0 in brain_d:
-            if findWholeWord(brain_d[brain0])(sent) :
-                sent=sent.replace("<b>","").replace("</b>","")
-                sent=re.sub(r'\b(%s)\b' % brain_d[brain0], r'<b>\1</b>', sent, flags=re.I)
-                out+=gene+"\t"+"brain\t"+brain0+"\t"+sent+"\n"
-    return(out)
-
-def gene_functional(gene):
-    q="\"(" + function.replace("|", " OR ")  + ") AND " + gene + "\""
-    sents=getSentences(q,gene)
-    out=str()
-    for sent in sents.split("\n"):
-        for bio0 in function_d:
-            if findWholeWord(function_d[bio0])(sent) :
-                sent=sent.replace("<b>","").replace("</b>","")
-                sent=re.sub(r'\b(%s)\b' % function_d[bio0], r'<b>\1</b>', sent, flags=re.I)
-                out+=gene+"\t"+"function\t"+bio0+"\t"+sent+"\n"
+        for key in cat_d:
+            if findWholeWord(cat_d[key])(sent) :
+                sent=sent.replace("<b>","").replace("</b>","") # remove other highlights
+                sent=re.sub(r'\b(%s)\b' % cat_d[key], r'<b>\1</b>', sent, flags=re.I) # highlight keyword
+                out+=gene+"\t"+ cat + "\t"+key+"\t"+sent+"\n"
     return(out)
 
 def generate_nodes(nodes_d, nodetype):
@@ -117,7 +88,7 @@ brain_d ={"cortex":"cortex|prefrontal|pfc|mPFC|vmpfc|corticostriatal|cortico lim
           "vta":"ventral tegmental|vta|pvta|mesolimbic|limbic|midbrain|mesoaccumbens"
           }
 # brain region has too many short acronyms to just use the undic function, so search PubMed using the following 
-brain="cortex|accumbens|striatum|amygadala|hippocampus|tegmental|mesolimbic|infralimbic|prelimbic"
+brain_query_term="cortex|accumbens|striatum|amygadala|hippocampus|tegmental|mesolimbic|infralimbic|prelimbic"
 function_d={"signalling":"signalling|signaling|phosphorylation|glycosylation",
             "transcription":"transcription|methylation|hypomethylation|hypermethylation|histone|ribosome",
             "neuroplasticity":"neuroplasticity|plasticity|long term potentiation|LTP|long term depression|LTD|synaptic|epsp|epsc|neurite|neurogenesis|boutons|mIPSC|IPSC|IPSP",