about summary refs log tree commit diff
diff options
context:
space:
mode:
authorgunturkunhakan2021-06-05 14:17:13 -0500
committergunturkunhakan2021-06-05 14:17:13 -0500
commit0f25bb35ac3467418aa17fcccf37e704e3eb8934 (patch)
treec7581fc1ec2fc58a7562a7b682a90bf44d4c29b0
parent6235fb58955e2bc193d6ce40a4d74b33ab47dfce (diff)
downloadgenecup-0f25bb35ac3467418aa17fcccf37e704e3eb8934.tar.gz
changes in custom ontology and search
-rw-r--r--Readme.md2
-rw-r--r--addiction.onto7
-rw-r--r--addiction_gwas_ontology.md13
-rw-r--r--addiction_keywords.py10
-rwxr-xr-xmore_functions.py30
-rwxr-xr-xserver.py142
6 files changed, 125 insertions, 79 deletions
diff --git a/Readme.md b/Readme.md
index 81afd20..d0ef0b4 100644
--- a/Readme.md
+++ b/Readme.md
@@ -1,6 +1,6 @@
 # GeneCup: Mining gene relationships from PubMed using custom ontology
 
-URL: [http://genecup.org](http://genecup.org)
+URL: [https://genecup.org](https://genecup.org)
 
 GeneCup automatically extracts information from PubMed and NHGRI-EBI GWAS catalog on the relationship of any gene with a custom list of keywords hierarchically organized into an ontology. The users create an ontology by identifying categories of concepts and a list of keywords for each concept. 
 
diff --git a/addiction.onto b/addiction.onto
index ccfbf4a..c996e23 100644
--- a/addiction.onto
+++ b/addiction.onto
@@ -1,7 +1,8 @@
-{'addiction': {'addiction': {'addiction|addictive|compulsive|drug abuse|escalation|punishment'}, 'aversion': {'aversion|aversive|conditioned taste aversion|CTA'}, 'dependence': {'dependence'}, 'intoxication': {'binge|intoxication'}, 'relapse': {'craving|drug seeking|reinstatement|relapse|seeking'}, 'reward': {'conditioned place preference|CPP|drug reinforced|hedonic|ICSS|incentive|instrumental response|intracranial self stimulation|operant|reinforcement|reinforcing|reward|self administered|self administration'}, 'sensitization': {'behavioral sensitization|locomotor sensitization|drug sensitization|incentive sensitization'}, 'withdrawal': {'withdrawal'}},
- 'brain': {'accumbens': {'acbc|acbs|accumbal|accumbens|core|Nacc|NacSh|shell'}, 'amygdala': {'amy|amygdala|bla|cea|cna'}, 'cortex': {'cerebral|cingulate|cortex|cortico limbic|corticolimbic|corticostriatal|infralimbic|insular|mPFC|orbitofrontal|pfc|prefrontal|prelimbic|prl|vmpfc'}, 'habenula': {'habenula|lhb|mhb'}, 'hippocampus': {'ca1|ca3|dentate gyrus|dhpc|hip|hipp|hippocampal|hippocampus|subiculum|vhipp|vhpc'}, 'hypothalamus': {'hypothalamic|hypothalamus|LHA|paraventricular nucleus|PVN'}, 'striatum': {'basal ganglia|caudate|globus pallidus|GPI|putamen|STR|striatal|striatum'}, 'VTA': {'limbic|mesoaccumbal|mesoaccumbens|mesolimbic|midbrain|pvta|ventral tegmental|vta'}},
+{'brain': {'accumbens': {'acbc|acbs|accumbal|accumbens|core|Nacc|NacSh|shell'}, 'amygdala': {'amy|amygdala|bla|cea|cna'}, 'cortex': {'cerebral|cingulate|cortex|cortico limbic|corticolimbic|corticostriatal|infralimbic|insular|mPFC|orbitofrontal|pfc|prefrontal|prelimbic|prl|vmpfc'}, 'habenula': {'habenula|lhb|mhb'}, 'hippocampus': {'ca1|ca3|dentate gyrus|dhpc|hip|hipp|hippocampal|hippocampus|subiculum|vhipp|vhpc'}, 'hypothalamus': {'hypothalamic|hypothalamus|LHA|paraventricular nucleus|PVN'}, 'striatum': {'basal ganglia|caudate|globus pallidus|GPI|putamen|STR|striatal|striatum'}, 'VTA': {'limbic|mesoaccumbal|mesoaccumbens|mesolimbic|midbrain|pvta|ventral tegmental|vta'}},
+ 'addiction': {'addiction': {'addiction|addictive|compulsive|drug abuse|escalation|punishment'}, 'aversion': {'aversion|aversive|conditioned taste aversion|CTA'}, 'dependence': {'dependence'}, 'intoxication': {'binge|intoxication'}, 'relapse': {'craving|drug seeking|reinstatement|relapse|seeking'}, 'reward': {'conditioned place preference|CPP|drug reinforced|hedonic|ICSS|incentive|instrumental response|intracranial self stimulation|operant|reinforcement|reinforcing|reward|self administered|self administration'}, 'sensitization': {'behavioral sensitization|locomotor sensitization|drug sensitization|incentive sensitization'}, 'withdrawal': {'withdrawal'}},
  'drug': {'alcohol': {'acamprosate|alcohol|alcoholics|alcoholism|antabuse|campral|disulfiram|ethanol|naltrexone|revia|vivitrol'}, 'amphetamine': {'AMPH|amphetamine|METH|methamphetamine'}, 'benzodiazepine': {'adinazolam|alprazolam|benzodiazepine|benzos|brotizolam|chlordiazepoxide|climazolam|clobazam|clonazepam|clorazepate|diazepam|estazolam|flunitrazepam|flurazepam|halazepam|librium|loprazolam|lorazepam|lormetazepam|midazolam|nimetazepam|nitrazepam|normison|oxazepam|prazepam|temazepam|triazolam|valium|xanax'}, 'cannabinoid': {'acylethanolamines|cannabichromene|cannabidiol|cannabigerol|cannabinoids|cannabinol|cannabis|cannabivarin|cesamet|drobinal|dronabinol|endocannabinoids|epidiolex|JWH 018|JWH 122|JWH 250|marijuana|marinol|nabilone|Oleoylethanolamide|palmitoylethanolamide|phytocannabinoid|rimonabant|SR141716|SR144528|syndros|tetrahydrocannabinol|tetrahydrocannabivarin|thc|thc 9'}, 'cocaine': {'cocaine'}, 'nicotine': {'nicotine|smokers|smoking|tobacco'}, 'opioid': {'buprenorphine|codeine|fentanyl|heroin|hycodan|hydrocodone|hydromorphone|kadian|kratom|methadone|morphine|naloxone|opioids|oxycodone|oxycontin|percocet|suboxone|tramadol|ultram|vicodin'}, 'psychedelics': {'ayahuasca|ecstasy|ibogaine|ketamine|LSD|lysergic acid diethylamide|MDMA|mescaline|methylenedioxymethamphetamine|N methoxybenzyl|NBOMes|peyote|psilocybin|psychedelic|psychedelics'}},
  'function': {'neuroplasticity': {'boutons|epsc|epsp|IPSC|IPSP|long term depression|long term potentiation|LTD|LTP|mIPSC|neurite|neurogenesis|neuroplasticity|plasticity|synaptic'}, 'neurotransmission': {'5 ht|acetylcholine|cholinergic|DAergic|dopamine|dopaminergic|GABA|GABAergic|glutamate|glutamatergic|muscarinic|neuropeptides|neurotransmission|nicotinic|serotonergic|serotonin'}, 'signalling': {'glycosylation|phosphorylation|signaling|signalling|kinase|binding|signal transduction|second messengers|cGMP|cAMP'}, 'transcription': {'histone|hypermethylation|hypomethylation|methylation|ribosome|transcription'}},
  'psychiatric': {'anxiety': {'anxiety|anxious'}, 'autism': {'autism|autistic'}, 'bipolar': {'bipolar disorder'}, 'compulsive': {'compulsive|obsessive'}, 'depression': {'depression|depressive|major depressive disorder|MDD'}, 'impulsivity': {'5 CSRTT|5 choice task|delay discounting|delay exposure|delay intolerance|delayed reward|delay task|five choice serial reaction time task|impulsive|impulsivity|premature responding'}, 'schizophrenia': {'schizophrenia'}},
  'cell': {'neuron': {'adrenergic neurons|cholinergic neurons|dopaminergic neurons|gabaergic neurons|glutamatergic neurons|GnRH neurons|interneurons|monoaminergic neurons|medium spiny neurons|motor neurons|neuronal cells|nitrergic neurons|noradrenergic neurons|projection neurons|pyramidal neurons|sensory neurons|serotonergic neurons|somatostatin neurons|neurons|excitatory neurons|inhibitory neurons|corticospinal neurons|dopamine neurons|D1 neurons|D2 neurons|afferent neurons|efferent neurons|serotonin neurons|cortical neurons|hippocampal neurons|DA neurons|CNS neurons|cortex neurons|mesencephalic neurons|orexin neurons|catecholaminergic neurons|striatal neurons|bipolar neurons|ganglion cells|RGC|horizontal cells|amacrine cells'}, 'astrocyte': {'astrocytic|astrocytes|astroglia|astroglial'}, 'microglia': {'microglia|microglial'}, 'endothelium': {'endothelium|endothelial cells'}, 'oligodendrocyte': {'oligodendrocytes'}},
- 'stress': {'PTSD': {'PTSD|post traumatic stress|post traumatic stress symptoms|post traumatic stress disorder'}, 'stress': {'distress|psychological trauma|stress'}}}
\ No newline at end of file
+ 'stress': {'PTSD': {'PTSD|post traumatic stress|post traumatic stress symptoms|post traumatic stress disorder'}, 'stress': {'distress|psychological trauma|stress'}},
+ 'GWAS': {'psychiatric': {'psychiatric|schizophrenia|autism|depression|anxiety|bipolar|mental'},'nicotine': {'nicotine|smoking|chronic obstructive|tobacco'}, 'addiction': {'addiction|cocaine|opioid|morphine|amphetamine|methadone|heroin|drug dependence'}, 'alcohol': {'alcohol'}}}
diff --git a/addiction_gwas_ontology.md b/addiction_gwas_ontology.md
new file mode 100644
index 0000000..b02b7e6
--- /dev/null
+++ b/addiction_gwas_ontology.md
@@ -0,0 +1,13 @@
+
+
+GWAS
+	psychiatric
+		schizophrenia; autism; depression; anxiety; bipolar; mental; 
+	nicotine
+		smoking; chronic obstructive; tobacco
+	addiction
+		cocaine; opioid; morphine; amphetamine; methadone; heroin; drug dependence; 
+	alcohol
+
+
+		
diff --git a/addiction_keywords.py b/addiction_keywords.py
index 323fd16..0e813fc 100644
--- a/addiction_keywords.py
+++ b/addiction_keywords.py
@@ -47,3 +47,13 @@ cell_d={'neuron':'adrenergic neurons*|cholinergic neurons*|dopaminergic neurons*
 stress_d={'PTSD':'PTSD|post traumatic stress|post traumatic stress symptoms*|post traumatic stress disorder',
 'stress':'distress|psychological trauma|stress'}
 
+GWAS_d={'psychiatric':'psychiatric|schizophrenia|autism|depression|anxiety|bipolar|mental',
+'nicotine':'nicotine|smoking|chronic obstructive|tobacco',
+'addiction':'addiction|cocaine|opioid|morphine|amphetamine|methadone|heroin|drug dependence',
+'alcohol':'alcohol'}
+
+
+
+
+
+
diff --git a/more_functions.py b/more_functions.py
index 234330b..cb070d9 100755
--- a/more_functions.py
+++ b/more_functions.py
@@ -29,18 +29,25 @@ def getabstracts(gene,query):
         query2 = query+"s*"
     query3 = query2.replace("s|", "s* OR ")
     query4 = query3.replace("|", "s* OR ")
-    query="\"(" + query4 + ") AND " + gene + "\""
+    
+    #query4=query
+    #query="\"(" + query4 + ") AND ((" + gene + "[tiab]) or (" + gene + "[meSH]))\""
+    query="\"(" + query4 + ") AND (" + gene + " [tiab])\""
+    #query = "neurons* AND (penk [tiab])"
     abstracts = os.popen("esearch -db pubmed -query " +  query \
         + " | efetch -format uid |fetch-pubmed -path "+ pubmed_path \
         + " | xtract -pattern PubmedArticle -element MedlineCitation/PMID,ArticleTitle,AbstractText|sed \"s/-/ /g\"").read()
+    #print(abstracts)
     return(abstracts)
 
 sentences_ls=[]
 def getSentences(gene, sentences_ls):
     out=str()
     # Keep the sentence only if it contains the gene 
+    #print(sentences_ls)
     for sent in sentences_ls:
-        if gene.lower() in sent.lower():
+        #if gene.lower() in sent.lower():
+        if re.search(r'\b'+gene.lower()+r'\b',sent.lower()):
             pmid = sent.split(' ')[0]
             sent = sent.split(' ',1)[1]
             sent=re.sub(r'\b(%s)\b' % gene, r'<strong>\1</strong>', sent, flags=re.I)
@@ -50,18 +57,27 @@ def getSentences(gene, sentences_ls):
 def gene_category(gene, cat_d, cat, abstracts,addiction_flag,dictn):
     # e.g. BDNF, addiction_d, undic(addiction_d) "addiction"
     sents=getSentences(gene, abstracts)
+    #print(abstracts)
     out=str()
     if (addiction_flag==1):
         for sent in sents.split("\n"):
             for key in cat_d:
-                if findWholeWord(cat_d[key])(sent) :
+                if key =='s':
+                    key_ad = key+"*"
+                else:
+                    key_ad = key+"s*"
+                if findWholeWord(key_ad)(sent) :
                     sent=sent.replace("<b>","").replace("</b>","") # remove other highlights
-                    sent=re.sub(r'\b(%s)\b' % cat_d[key], r'<b>\1</b>', sent, flags=re.I) # highlight keyword
+                    sent=re.sub(r'\b(%s)\b' % key_ad, r'<b>\1</b>', sent, flags=re.I) # highlight keyword
                     out+=gene+"\t"+ cat + "\t"+key+"\t"+sent+"\n"
     else:
-        for sent in sents.split("\n"):
-            for key_1 in dictn[cat_d].keys():
-                for key_2 in dictn[cat_d][key_1]:
+        for key_1 in dictn[cat_d].keys():
+            for key_2 in dictn[cat_d][key_1]:
+                if key_2[-1] =='s':
+                    key_2 = key_2+"*"
+                else:
+                    key_2 = key_2+"s*"
+                for sent in sents.split("\n"):
                     if findWholeWord(key_2)(sent) :
                         sent=sent.replace("<b>","").replace("</b>","") # remove other highlights
                         sent=re.sub(r'\b(%s)\b' % key_2, r'<b>\1</b>', sent, flags=re.I) # highlight keyword
diff --git a/server.py b/server.py
index b821799..519eedf 100755
--- a/server.py
+++ b/server.py
@@ -140,10 +140,13 @@ def login():
             session['name'] = found_user.name
             session['id'] = found_user.id
             flash("Login Succesful!")
+            ontoarchive()
+            onto_len_dir = session['onto_len_dir']
+            onto_list = session['onto_list']
         else:
             flash("Invalid username or password!", "inval")
             return render_template('signup.html')
- 
+    print(onto_list)
     return render_template('index.html',onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto)
 
 
@@ -169,13 +172,15 @@ def signup():
         session['name'] = name
         password = bcrypt.hashpw(password.encode('utf8'), bcrypt.gensalt())
         user = users(name=name, email=email, password = password)       
-
         if found_user:
             session['email'] = found_user.email
             session['hashed_email'] = hashlib.md5(session['email'] .encode('utf-8')).hexdigest()
             session['id'] = found_user.id
             found_user.name = name
             db.session.commit()
+            ontoarchive()
+            onto_len_dir = session['onto_len_dir']
+            onto_list = session['onto_list']
         else:
             db.session.add(user)
             db.session.commit()
@@ -204,13 +209,16 @@ def signin():
 
         if (found_user and (bcrypt.checkpw(password.encode('utf8'), found_user.password))):
             session['email'] = found_user.email
-            session['hashed_email'] = hashlib.md5(session['email'] .encode('utf-8')).hexdigest()
+            session['hashed_email'] = hashlib.md5(session['email'].encode('utf-8')).hexdigest()
             session['name'] = found_user.name
             session['id'] = found_user.id
             flash("Login Succesful!")
-            onto_len_dir = 0
-            onto_list = ''
+            #onto_len_dir = 0
+            #onto_list = ''
             onto_cont=open("addiction.onto","r").read()
+            ontoarchive()
+            onto_len_dir = session['onto_len_dir']
+            onto_list = session['onto_list']
             dict_onto=ast.literal_eval(onto_cont)
             return render_template('index.html', onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto)
         else:
@@ -741,7 +749,6 @@ def progress():
 
     for gen in genes:
         genes_session += str(gen) + "_"
-
     genes_session = genes_session[:-1]
     session['query']=genes
     return render_template('progress.html', url_in="search", url_out="cytoscape/?rnd="+rnd+"&genequery="+genes_session)
@@ -750,12 +757,12 @@ def progress():
 @app.route("/search")
 def search():
     genes=session['query']
-    percent_ratio=len(genes)
+    percent_ratio=len(genes)+1
 
     if(len(genes)==1):
         percent_ratio=2
     timeextension=session['timeextension']
-    percent=round(100/percent_ratio*8,1) # 7 categories + 1 at the beginning
+    percent=round(100/percent_ratio,1)-1 # 7 categories + 1 at the beginning
 
     if ('email' in session):
         sessionpath = session['path_user'] + timeextension
@@ -777,7 +784,6 @@ def search():
     nodecolor={}
     nodecolor['GWAS'] = "hsl(0, 0%, 70%)"
     nodes_list = []
-    nodes_list_for_gwas = []
 
     if 'namecat' in session:
         namecat_flag=1
@@ -786,8 +792,6 @@ def search():
         dict_onto=ast.literal_eval(onto_cont)
 
         for ky in dict_onto.keys():
-            for nd_g in dict_onto[ky]:
-                nodes_list_for_gwas.append(nd_g)
             nodecolor[ky] = "hsl("+str((n_num+1)*int(360/len(dict_onto.keys())))+", 70%, 80%)"
             d["nj{0}".format(n_num)]=generate_nodes_json(dict_onto[ky],str(ky),nodecolor[ky])
             n_num+=1
@@ -802,8 +806,6 @@ def search():
     else:
         namecat_flag=0
         for ky in dictionary.keys():
-            for nd_g in dictionary[ky]:
-                nodes_list_for_gwas.append(nd_g)
             nodecolor[ky] = "hsl("+str((n_num+1)*int(360/len(dictionary.keys())))+", 70%, 80%)"
             d["nj{0}".format(n_num)]=generate_nodes_json(dictionary[ky],str(ky),nodecolor[ky])
             n_num+=1
@@ -826,9 +828,8 @@ def search():
             progress=0
             searchCnt=0
             nodesToHide=str()
-            json_edges = str()
-            progress+=percent            
-            genes_or = ' or '.join(genes)
+            json_edges = str()           
+            #genes_or = ' [tiab] or '.join(genes)
             all_d=''
 
             if namecat_flag==1:
@@ -837,7 +838,6 @@ def search():
 
                 for ky in dict_onto.keys():
                     if (ky in search_type):
-                        ls_plural = list(dict_onto[ky].values())
                         all_d_ls=undic(list(dict_onto[ky].values()))
                         all_d = all_d+'|'+all_d_ls
             else:
@@ -846,23 +846,24 @@ def search():
                         all_d_ls=undic(list(dictionary[ky].values()))
                         all_d = all_d+'|'+all_d_ls
             all_d=all_d[1:]
-            abstracts_raw = getabstracts(genes_or,all_d)
+            if ("GWAS" in search_type):
+                datf = pd.read_csv('./utility/gwas_used.csv',sep='\t')
             progress+=percent
-            sentences_ls=[]
-
-            for row in abstracts_raw.split("\n"):
-                tiab=row.split("\t")
-                pmid = tiab.pop(0)
-                tiab= " ".join(tiab)
-                sentences_tok = sent_tokenize(tiab)
-                for sent_tok in sentences_tok:
-                    sent_tok = pmid + ' ' + sent_tok
-                    sentences_ls.append(sent_tok)
+            yield "data:"+str(progress)+"\n\n"
             for gene in genes:
+                abstracts_raw = getabstracts(gene,all_d)
+                sentences_ls=[]
+
+                for row in abstracts_raw.split("\n"):
+                    tiab=row.split("\t")
+                    pmid = tiab.pop(0)
+                    tiab= " ".join(tiab)
+                    sentences_tok = sent_tokenize(tiab)
+                    for sent_tok in sentences_tok:
+                        sent_tok = pmid + ' ' + sent_tok
+                        sentences_ls.append(sent_tok)
                 gene=gene.replace("-"," ")
-                # report progress immediately
-                progress+=percent
-                yield "data:"+str(progress)+"\n\n"
+                
                 geneEdges = ""
 
                 if namecat_flag==1:
@@ -872,53 +873,58 @@ def search():
                     dict_onto = dictionary
 
                 for ky in dict_onto.keys():
-                    if (ky=='addiction') and ('addiction' in dict_onto.keys())\
-                        and ('drug' in dict_onto.keys()) and ('addiction' in dict_onto['addiction'].keys())\
-                        and ('aversion' in dict_onto['addiction'].keys()) and ('intoxication' in dict_onto['addiction'].keys()):
-                        #addiction terms must present with at least one drug
-                        addiction_flag=1
-                        #addiction=undic0(addiction_d) +") AND ("+undic0(drug_d)
-                        sent=gene_category(gene, addiction_d, "addiction", sentences_ls,addiction_flag,dict_onto)
-                        if ('addiction' in search_type):
-                            geneEdges += generate_edges(sent, tf_name)
-                            json_edges += generate_edges_json(sent, tf_name)
-                    else:
-                        addiction_flag=0
-                        if namecat_flag==1:
-                            onto_cont = open(ses_namecat+".onto","r").read()
-                            dict_onto=ast.literal_eval(onto_cont)
-                            #ky_d=undic(list(dict_onto[ky].values()))    
-                            sent=gene_category(gene,ky,str(ky), sentences_ls, addiction_flag,dict_onto)
-                            
+                    if (ky in search_type):
+                        if (ky=='addiction') and ('addiction' in dict_onto.keys())\
+                            and ('drug' in dict_onto.keys()) and ('addiction' in dict_onto['addiction'].keys())\
+                            and ('aversion' in dict_onto['addiction'].keys()) and ('intoxication' in dict_onto['addiction'].keys()):
+                            #addiction terms must present with at least one drug
+                            addiction_flag=1
+                            #addiction=undic0(addiction_d) +") AND ("+undic0(drug_d)
+                            sent=gene_category(gene, addiction_d, "addiction", sentences_ls,addiction_flag,dict_onto)
+                            if ('addiction' in search_type):
+                                geneEdges += generate_edges(sent, tf_name)
+                                json_edges += generate_edges_json(sent, tf_name)
                         else:
-                            ky_d=undic(list(dict_onto[ky].values()))
-                            sent=gene_category(gene,ky,str(ky), sentences_ls, addiction_flag,dict_onto)
-                        progress+=percent
-                        yield "data:"+str(progress)+"\n\n"
-                        if (ky in search_type):
+                            addiction_flag=0
+                            if namecat_flag==1:
+                                onto_cont = open(ses_namecat+".onto","r").read()
+                                dict_onto=ast.literal_eval(onto_cont)
+                                #ky_d=undic(list(dict_onto[ky].values()))    
+                                sent=gene_category(gene,ky,str(ky), sentences_ls, addiction_flag,dict_onto)
+                            
+                            else:
+                                #ky_d=undic(list(dict_onto[ky].values()))
+                                sent=gene_category(gene,ky,str(ky), sentences_ls, addiction_flag,dict_onto)
+                            yield "data:"+str(progress)+"\n\n"
+                            
                             geneEdges += generate_edges(sent, tf_name)
                             json_edges += generate_edges_json(sent, tf_name)                
-                    sentences+=sent
+                        sentences+=sent
                 if ("GWAS" in search_type):
                     gwas_sent=[]
-                    for nd in nodes_list_for_gwas:
-                        gwas_text=''
-                        datf = pd.read_csv('./utility/gwas_used.csv',sep='\t')
-                        datf_sub = datf[datf['DISEASE/TRAIT'].str.contains(nd,regex=False, case=False, na=False)
-                            & (datf['REPORTED GENE(S)'].str.contains(gene,regex=False, case=False, na=False)
-                            | (datf['MAPPED_GENE'].str.contains(gene,regex=False, case=False, na=False)))]
-                        
-                        if not datf_sub.empty:
-                            for index, row in datf_sub.iterrows():
-                                gwas_text = "SNP:<b>"+str(row['SNPS'])+"</b>, P value: <b>"+str(row['P-VALUE'])\
-                                    +"</b>, Disease/trait:<b> "+str(row['DISEASE/TRAIT'])+"</b>, Mapped trait:<b> "\
-                                    +str(row['MAPPED_TRAIT'])+"</b><br>"
-                                gwas_sent.append(gene+"\t"+"GWAS"+"\t"+nd+"_GWAS\t"+str(row['PUBMEDID'])+"\t"+gwas_text)
+                    datf_sub1 = datf[datf['REPORTED GENE(S)'].str.contains('(?:\s|^)'+gene+'(?:\s|$)', flags=re.IGNORECASE)
+                                    | (datf['MAPPED_GENE'].str.contains('(?:\s|^)'+gene+'(?:\s|$)', flags=re.IGNORECASE))]
+                    for nd2 in dict_onto['GWAS'].keys():
+                        for nd1 in dict_onto['GWAS'][nd2]:    
+                            for nd in nd1.split('|'):
+                                gwas_text=''
+                                datf_sub = datf_sub1[datf_sub1['DISEASE/TRAIT'].str.contains('(?:\s|^)'+nd+'(?:\s|$)', flags=re.IGNORECASE)]
+                                    #& (datf['REPORTED GENE(S)'].str.contains('(?:\s|^)'+gene+'(?:\s|$)', flags=re.IGNORECASE)
+                                    #| (datf['MAPPED_GENE'].str.contains('(?:\s|^)'+gene+'(?:\s|$)', flags=re.IGNORECASE)))]
+                                if not datf_sub.empty:
+                                    for index, row in datf_sub.iterrows():
+                                        gwas_text = "SNP:<b>"+str(row['SNPS'])+"</b>, P value: <b>"+str(row['P-VALUE'])\
+                                            +"</b>, Disease/trait:<b> "+str(row['DISEASE/TRAIT'])+"</b>, Mapped trait:<b> "\
+                                            +str(row['MAPPED_TRAIT'])+"</b><br>"
+                                        gwas_sent.append(gene+"\t"+"GWAS"+"\t"+nd+"_GWAS\t"+str(row['PUBMEDID'])+"\t"+gwas_text)
                     cys, gwas_json, sn_file = searchArchived('GWAS', gene , 'json',gwas_sent, path_user)
                     with open(path_user+"gwas_results.tab", "w") as gwas_edges:
                         gwas_edges.write(sn_file)
                     geneEdges += cys
                     json_edges += gwas_json  
+                # report progress immediately
+                progress+=percent
+                yield "data:"+str(progress)+"\n\n"
                                     
                 if len(geneEdges) >0:
                     edges+=geneEdges