aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgunturkunhakan2021-06-05 14:17:13 -0500
committergunturkunhakan2021-06-05 14:17:13 -0500
commit0f25bb35ac3467418aa17fcccf37e704e3eb8934 (patch)
treec7581fc1ec2fc58a7562a7b682a90bf44d4c29b0
parent6235fb58955e2bc193d6ce40a4d74b33ab47dfce (diff)
downloadgenecup-0f25bb35ac3467418aa17fcccf37e704e3eb8934.tar.gz
changes in custom ontology and search
-rw-r--r--Readme.md2
-rw-r--r--addiction.onto7
-rw-r--r--addiction_gwas_ontology.md13
-rw-r--r--addiction_keywords.py10
-rwxr-xr-xmore_functions.py30
-rwxr-xr-xserver.py142
6 files changed, 125 insertions, 79 deletions
diff --git a/Readme.md b/Readme.md
index 81afd20..d0ef0b4 100644
--- a/Readme.md
+++ b/Readme.md
@@ -1,6 +1,6 @@
# GeneCup: Mining gene relationships from PubMed using custom ontology
-URL: [http://genecup.org](http://genecup.org)
+URL: [https://genecup.org](https://genecup.org)
GeneCup automatically extracts information from PubMed and NHGRI-EBI GWAS catalog on the relationship of any gene with a custom list of keywords hierarchically organized into an ontology. The users create an ontology by identifying categories of concepts and a list of keywords for each concept.
diff --git a/addiction.onto b/addiction.onto
index ccfbf4a..c996e23 100644
--- a/addiction.onto
+++ b/addiction.onto
@@ -1,7 +1,8 @@
-{'addiction': {'addiction': {'addiction|addictive|compulsive|drug abuse|escalation|punishment'}, 'aversion': {'aversion|aversive|conditioned taste aversion|CTA'}, 'dependence': {'dependence'}, 'intoxication': {'binge|intoxication'}, 'relapse': {'craving|drug seeking|reinstatement|relapse|seeking'}, 'reward': {'conditioned place preference|CPP|drug reinforced|hedonic|ICSS|incentive|instrumental response|intracranial self stimulation|operant|reinforcement|reinforcing|reward|self administered|self administration'}, 'sensitization': {'behavioral sensitization|locomotor sensitization|drug sensitization|incentive sensitization'}, 'withdrawal': {'withdrawal'}},
- 'brain': {'accumbens': {'acbc|acbs|accumbal|accumbens|core|Nacc|NacSh|shell'}, 'amygdala': {'amy|amygdala|bla|cea|cna'}, 'cortex': {'cerebral|cingulate|cortex|cortico limbic|corticolimbic|corticostriatal|infralimbic|insular|mPFC|orbitofrontal|pfc|prefrontal|prelimbic|prl|vmpfc'}, 'habenula': {'habenula|lhb|mhb'}, 'hippocampus': {'ca1|ca3|dentate gyrus|dhpc|hip|hipp|hippocampal|hippocampus|subiculum|vhipp|vhpc'}, 'hypothalamus': {'hypothalamic|hypothalamus|LHA|paraventricular nucleus|PVN'}, 'striatum': {'basal ganglia|caudate|globus pallidus|GPI|putamen|STR|striatal|striatum'}, 'VTA': {'limbic|mesoaccumbal|mesoaccumbens|mesolimbic|midbrain|pvta|ventral tegmental|vta'}},
+{'brain': {'accumbens': {'acbc|acbs|accumbal|accumbens|core|Nacc|NacSh|shell'}, 'amygdala': {'amy|amygdala|bla|cea|cna'}, 'cortex': {'cerebral|cingulate|cortex|cortico limbic|corticolimbic|corticostriatal|infralimbic|insular|mPFC|orbitofrontal|pfc|prefrontal|prelimbic|prl|vmpfc'}, 'habenula': {'habenula|lhb|mhb'}, 'hippocampus': {'ca1|ca3|dentate gyrus|dhpc|hip|hipp|hippocampal|hippocampus|subiculum|vhipp|vhpc'}, 'hypothalamus': {'hypothalamic|hypothalamus|LHA|paraventricular nucleus|PVN'}, 'striatum': {'basal ganglia|caudate|globus pallidus|GPI|putamen|STR|striatal|striatum'}, 'VTA': {'limbic|mesoaccumbal|mesoaccumbens|mesolimbic|midbrain|pvta|ventral tegmental|vta'}},
+ 'addiction': {'addiction': {'addiction|addictive|compulsive|drug abuse|escalation|punishment'}, 'aversion': {'aversion|aversive|conditioned taste aversion|CTA'}, 'dependence': {'dependence'}, 'intoxication': {'binge|intoxication'}, 'relapse': {'craving|drug seeking|reinstatement|relapse|seeking'}, 'reward': {'conditioned place preference|CPP|drug reinforced|hedonic|ICSS|incentive|instrumental response|intracranial self stimulation|operant|reinforcement|reinforcing|reward|self administered|self administration'}, 'sensitization': {'behavioral sensitization|locomotor sensitization|drug sensitization|incentive sensitization'}, 'withdrawal': {'withdrawal'}},
'drug': {'alcohol': {'acamprosate|alcohol|alcoholics|alcoholism|antabuse|campral|disulfiram|ethanol|naltrexone|revia|vivitrol'}, 'amphetamine': {'AMPH|amphetamine|METH|methamphetamine'}, 'benzodiazepine': {'adinazolam|alprazolam|benzodiazepine|benzos|brotizolam|chlordiazepoxide|climazolam|clobazam|clonazepam|clorazepate|diazepam|estazolam|flunitrazepam|flurazepam|halazepam|librium|loprazolam|lorazepam|lormetazepam|midazolam|nimetazepam|nitrazepam|normison|oxazepam|prazepam|temazepam|triazolam|valium|xanax'}, 'cannabinoid': {'acylethanolamines|cannabichromene|cannabidiol|cannabigerol|cannabinoids|cannabinol|cannabis|cannabivarin|cesamet|drobinal|dronabinol|endocannabinoids|epidiolex|JWH 018|JWH 122|JWH 250|marijuana|marinol|nabilone|Oleoylethanolamide|palmitoylethanolamide|phytocannabinoid|rimonabant|SR141716|SR144528|syndros|tetrahydrocannabinol|tetrahydrocannabivarin|thc|thc 9'}, 'cocaine': {'cocaine'}, 'nicotine': {'nicotine|smokers|smoking|tobacco'}, 'opioid': {'buprenorphine|codeine|fentanyl|heroin|hycodan|hydrocodone|hydromorphone|kadian|kratom|methadone|morphine|naloxone|opioids|oxycodone|oxycontin|percocet|suboxone|tramadol|ultram|vicodin'}, 'psychedelics': {'ayahuasca|ecstasy|ibogaine|ketamine|LSD|lysergic acid diethylamide|MDMA|mescaline|methylenedioxymethamphetamine|N methoxybenzyl|NBOMes|peyote|psilocybin|psychedelic|psychedelics'}},
'function': {'neuroplasticity': {'boutons|epsc|epsp|IPSC|IPSP|long term depression|long term potentiation|LTD|LTP|mIPSC|neurite|neurogenesis|neuroplasticity|plasticity|synaptic'}, 'neurotransmission': {'5 ht|acetylcholine|cholinergic|DAergic|dopamine|dopaminergic|GABA|GABAergic|glutamate|glutamatergic|muscarinic|neuropeptides|neurotransmission|nicotinic|serotonergic|serotonin'}, 'signalling': {'glycosylation|phosphorylation|signaling|signalling|kinase|binding|signal transduction|second messengers|cGMP|cAMP'}, 'transcription': {'histone|hypermethylation|hypomethylation|methylation|ribosome|transcription'}},
'psychiatric': {'anxiety': {'anxiety|anxious'}, 'autism': {'autism|autistic'}, 'bipolar': {'bipolar disorder'}, 'compulsive': {'compulsive|obsessive'}, 'depression': {'depression|depressive|major depressive disorder|MDD'}, 'impulsivity': {'5 CSRTT|5 choice task|delay discounting|delay exposure|delay intolerance|delayed reward|delay task|five choice serial reaction time task|impulsive|impulsivity|premature responding'}, 'schizophrenia': {'schizophrenia'}},
'cell': {'neuron': {'adrenergic neurons|cholinergic neurons|dopaminergic neurons|gabaergic neurons|glutamatergic neurons|GnRH neurons|interneurons|monoaminergic neurons|medium spiny neurons|motor neurons|neuronal cells|nitrergic neurons|noradrenergic neurons|projection neurons|pyramidal neurons|sensory neurons|serotonergic neurons|somatostatin neurons|neurons|excitatory neurons|inhibitory neurons|corticospinal neurons|dopamine neurons|D1 neurons|D2 neurons|afferent neurons|efferent neurons|serotonin neurons|cortical neurons|hippocampal neurons|DA neurons|CNS neurons|cortex neurons|mesencephalic neurons|orexin neurons|catecholaminergic neurons|striatal neurons|bipolar neurons|ganglion cells|RGC|horizontal cells|amacrine cells'}, 'astrocyte': {'astrocytic|astrocytes|astroglia|astroglial'}, 'microglia': {'microglia|microglial'}, 'endothelium': {'endothelium|endothelial cells'}, 'oligodendrocyte': {'oligodendrocytes'}},
- 'stress': {'PTSD': {'PTSD|post traumatic stress|post traumatic stress symptoms|post traumatic stress disorder'}, 'stress': {'distress|psychological trauma|stress'}}} \ No newline at end of file
+ 'stress': {'PTSD': {'PTSD|post traumatic stress|post traumatic stress symptoms|post traumatic stress disorder'}, 'stress': {'distress|psychological trauma|stress'}},
+ 'GWAS': {'psychiatric': {'psychiatric|schizophrenia|autism|depression|anxiety|bipolar|mental'},'nicotine': {'nicotine|smoking|chronic obstructive|tobacco'}, 'addiction': {'addiction|cocaine|opioid|morphine|amphetamine|methadone|heroin|drug dependence'}, 'alcohol': {'alcohol'}}}
diff --git a/addiction_gwas_ontology.md b/addiction_gwas_ontology.md
new file mode 100644
index 0000000..b02b7e6
--- /dev/null
+++ b/addiction_gwas_ontology.md
@@ -0,0 +1,13 @@
+
+
+GWAS
+ psychiatric
+ schizophrenia; autism; depression; anxiety; bipolar; mental;
+ nicotine
+ smoking; chronic obstructive; tobacco
+ addiction
+ cocaine; opioid; morphine; amphetamine; methadone; heroin; drug dependence;
+ alcohol
+
+
+
diff --git a/addiction_keywords.py b/addiction_keywords.py
index 323fd16..0e813fc 100644
--- a/addiction_keywords.py
+++ b/addiction_keywords.py
@@ -47,3 +47,13 @@ cell_d={'neuron':'adrenergic neurons*|cholinergic neurons*|dopaminergic neurons*
stress_d={'PTSD':'PTSD|post traumatic stress|post traumatic stress symptoms*|post traumatic stress disorder',
'stress':'distress|psychological trauma|stress'}
+GWAS_d={'psychiatric':'psychiatric|schizophrenia|autism|depression|anxiety|bipolar|mental',
+'nicotine':'nicotine|smoking|chronic obstructive|tobacco',
+'addiction':'addiction|cocaine|opioid|morphine|amphetamine|methadone|heroin|drug dependence',
+'alcohol':'alcohol'}
+
+
+
+
+
+
diff --git a/more_functions.py b/more_functions.py
index 234330b..cb070d9 100755
--- a/more_functions.py
+++ b/more_functions.py
@@ -29,18 +29,25 @@ def getabstracts(gene,query):
query2 = query+"s*"
query3 = query2.replace("s|", "s* OR ")
query4 = query3.replace("|", "s* OR ")
- query="\"(" + query4 + ") AND " + gene + "\""
+
+ #query4=query
+ #query="\"(" + query4 + ") AND ((" + gene + "[tiab]) or (" + gene + "[meSH]))\""
+ query="\"(" + query4 + ") AND (" + gene + " [tiab])\""
+ #query = "neurons* AND (penk [tiab])"
abstracts = os.popen("esearch -db pubmed -query " + query \
+ " | efetch -format uid |fetch-pubmed -path "+ pubmed_path \
+ " | xtract -pattern PubmedArticle -element MedlineCitation/PMID,ArticleTitle,AbstractText|sed \"s/-/ /g\"").read()
+ #print(abstracts)
return(abstracts)
sentences_ls=[]
def getSentences(gene, sentences_ls):
out=str()
# Keep the sentence only if it contains the gene
+ #print(sentences_ls)
for sent in sentences_ls:
- if gene.lower() in sent.lower():
+ #if gene.lower() in sent.lower():
+ if re.search(r'\b'+gene.lower()+r'\b',sent.lower()):
pmid = sent.split(' ')[0]
sent = sent.split(' ',1)[1]
sent=re.sub(r'\b(%s)\b' % gene, r'<strong>\1</strong>', sent, flags=re.I)
@@ -50,18 +57,27 @@ def getSentences(gene, sentences_ls):
def gene_category(gene, cat_d, cat, abstracts,addiction_flag,dictn):
# e.g. BDNF, addiction_d, undic(addiction_d) "addiction"
sents=getSentences(gene, abstracts)
+ #print(abstracts)
out=str()
if (addiction_flag==1):
for sent in sents.split("\n"):
for key in cat_d:
- if findWholeWord(cat_d[key])(sent) :
+ if key =='s':
+ key_ad = key+"*"
+ else:
+ key_ad = key+"s*"
+ if findWholeWord(key_ad)(sent) :
sent=sent.replace("<b>","").replace("</b>","") # remove other highlights
- sent=re.sub(r'\b(%s)\b' % cat_d[key], r'<b>\1</b>', sent, flags=re.I) # highlight keyword
+ sent=re.sub(r'\b(%s)\b' % key_ad, r'<b>\1</b>', sent, flags=re.I) # highlight keyword
out+=gene+"\t"+ cat + "\t"+key+"\t"+sent+"\n"
else:
- for sent in sents.split("\n"):
- for key_1 in dictn[cat_d].keys():
- for key_2 in dictn[cat_d][key_1]:
+ for key_1 in dictn[cat_d].keys():
+ for key_2 in dictn[cat_d][key_1]:
+ if key_2[-1] =='s':
+ key_2 = key_2+"*"
+ else:
+ key_2 = key_2+"s*"
+ for sent in sents.split("\n"):
if findWholeWord(key_2)(sent) :
sent=sent.replace("<b>","").replace("</b>","") # remove other highlights
sent=re.sub(r'\b(%s)\b' % key_2, r'<b>\1</b>', sent, flags=re.I) # highlight keyword
diff --git a/server.py b/server.py
index b821799..519eedf 100755
--- a/server.py
+++ b/server.py
@@ -140,10 +140,13 @@ def login():
session['name'] = found_user.name
session['id'] = found_user.id
flash("Login Succesful!")
+ ontoarchive()
+ onto_len_dir = session['onto_len_dir']
+ onto_list = session['onto_list']
else:
flash("Invalid username or password!", "inval")
return render_template('signup.html')
-
+ print(onto_list)
return render_template('index.html',onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto)
@@ -169,13 +172,15 @@ def signup():
session['name'] = name
password = bcrypt.hashpw(password.encode('utf8'), bcrypt.gensalt())
user = users(name=name, email=email, password = password)
-
if found_user:
session['email'] = found_user.email
session['hashed_email'] = hashlib.md5(session['email'] .encode('utf-8')).hexdigest()
session['id'] = found_user.id
found_user.name = name
db.session.commit()
+ ontoarchive()
+ onto_len_dir = session['onto_len_dir']
+ onto_list = session['onto_list']
else:
db.session.add(user)
db.session.commit()
@@ -204,13 +209,16 @@ def signin():
if (found_user and (bcrypt.checkpw(password.encode('utf8'), found_user.password))):
session['email'] = found_user.email
- session['hashed_email'] = hashlib.md5(session['email'] .encode('utf-8')).hexdigest()
+ session['hashed_email'] = hashlib.md5(session['email'].encode('utf-8')).hexdigest()
session['name'] = found_user.name
session['id'] = found_user.id
flash("Login Succesful!")
- onto_len_dir = 0
- onto_list = ''
+ #onto_len_dir = 0
+ #onto_list = ''
onto_cont=open("addiction.onto","r").read()
+ ontoarchive()
+ onto_len_dir = session['onto_len_dir']
+ onto_list = session['onto_list']
dict_onto=ast.literal_eval(onto_cont)
return render_template('index.html', onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto)
else:
@@ -741,7 +749,6 @@ def progress():
for gen in genes:
genes_session += str(gen) + "_"
-
genes_session = genes_session[:-1]
session['query']=genes
return render_template('progress.html', url_in="search", url_out="cytoscape/?rnd="+rnd+"&genequery="+genes_session)
@@ -750,12 +757,12 @@ def progress():
@app.route("/search")
def search():
genes=session['query']
- percent_ratio=len(genes)
+ percent_ratio=len(genes)+1
if(len(genes)==1):
percent_ratio=2
timeextension=session['timeextension']
- percent=round(100/percent_ratio*8,1) # 7 categories + 1 at the beginning
+ percent=round(100/percent_ratio,1)-1 # 7 categories + 1 at the beginning
if ('email' in session):
sessionpath = session['path_user'] + timeextension
@@ -777,7 +784,6 @@ def search():
nodecolor={}
nodecolor['GWAS'] = "hsl(0, 0%, 70%)"
nodes_list = []
- nodes_list_for_gwas = []
if 'namecat' in session:
namecat_flag=1
@@ -786,8 +792,6 @@ def search():
dict_onto=ast.literal_eval(onto_cont)
for ky in dict_onto.keys():
- for nd_g in dict_onto[ky]:
- nodes_list_for_gwas.append(nd_g)
nodecolor[ky] = "hsl("+str((n_num+1)*int(360/len(dict_onto.keys())))+", 70%, 80%)"
d["nj{0}".format(n_num)]=generate_nodes_json(dict_onto[ky],str(ky),nodecolor[ky])
n_num+=1
@@ -802,8 +806,6 @@ def search():
else:
namecat_flag=0
for ky in dictionary.keys():
- for nd_g in dictionary[ky]:
- nodes_list_for_gwas.append(nd_g)
nodecolor[ky] = "hsl("+str((n_num+1)*int(360/len(dictionary.keys())))+", 70%, 80%)"
d["nj{0}".format(n_num)]=generate_nodes_json(dictionary[ky],str(ky),nodecolor[ky])
n_num+=1
@@ -826,9 +828,8 @@ def search():
progress=0
searchCnt=0
nodesToHide=str()
- json_edges = str()
- progress+=percent
- genes_or = ' or '.join(genes)
+ json_edges = str()
+ #genes_or = ' [tiab] or '.join(genes)
all_d=''
if namecat_flag==1:
@@ -837,7 +838,6 @@ def search():
for ky in dict_onto.keys():
if (ky in search_type):
- ls_plural = list(dict_onto[ky].values())
all_d_ls=undic(list(dict_onto[ky].values()))
all_d = all_d+'|'+all_d_ls
else:
@@ -846,23 +846,24 @@ def search():
all_d_ls=undic(list(dictionary[ky].values()))
all_d = all_d+'|'+all_d_ls
all_d=all_d[1:]
- abstracts_raw = getabstracts(genes_or,all_d)
+ if ("GWAS" in search_type):
+ datf = pd.read_csv('./utility/gwas_used.csv',sep='\t')
progress+=percent
- sentences_ls=[]
-
- for row in abstracts_raw.split("\n"):
- tiab=row.split("\t")
- pmid = tiab.pop(0)
- tiab= " ".join(tiab)
- sentences_tok = sent_tokenize(tiab)
- for sent_tok in sentences_tok:
- sent_tok = pmid + ' ' + sent_tok
- sentences_ls.append(sent_tok)
+ yield "data:"+str(progress)+"\n\n"
for gene in genes:
+ abstracts_raw = getabstracts(gene,all_d)
+ sentences_ls=[]
+
+ for row in abstracts_raw.split("\n"):
+ tiab=row.split("\t")
+ pmid = tiab.pop(0)
+ tiab= " ".join(tiab)
+ sentences_tok = sent_tokenize(tiab)
+ for sent_tok in sentences_tok:
+ sent_tok = pmid + ' ' + sent_tok
+ sentences_ls.append(sent_tok)
gene=gene.replace("-"," ")
- # report progress immediately
- progress+=percent
- yield "data:"+str(progress)+"\n\n"
+
geneEdges = ""
if namecat_flag==1:
@@ -872,53 +873,58 @@ def search():
dict_onto = dictionary
for ky in dict_onto.keys():
- if (ky=='addiction') and ('addiction' in dict_onto.keys())\
- and ('drug' in dict_onto.keys()) and ('addiction' in dict_onto['addiction'].keys())\
- and ('aversion' in dict_onto['addiction'].keys()) and ('intoxication' in dict_onto['addiction'].keys()):
- #addiction terms must present with at least one drug
- addiction_flag=1
- #addiction=undic0(addiction_d) +") AND ("+undic0(drug_d)
- sent=gene_category(gene, addiction_d, "addiction", sentences_ls,addiction_flag,dict_onto)
- if ('addiction' in search_type):
- geneEdges += generate_edges(sent, tf_name)
- json_edges += generate_edges_json(sent, tf_name)
- else:
- addiction_flag=0
- if namecat_flag==1:
- onto_cont = open(ses_namecat+".onto","r").read()
- dict_onto=ast.literal_eval(onto_cont)
- #ky_d=undic(list(dict_onto[ky].values()))
- sent=gene_category(gene,ky,str(ky), sentences_ls, addiction_flag,dict_onto)
-
+ if (ky in search_type):
+ if (ky=='addiction') and ('addiction' in dict_onto.keys())\
+ and ('drug' in dict_onto.keys()) and ('addiction' in dict_onto['addiction'].keys())\
+ and ('aversion' in dict_onto['addiction'].keys()) and ('intoxication' in dict_onto['addiction'].keys()):
+ #addiction terms must present with at least one drug
+ addiction_flag=1
+ #addiction=undic0(addiction_d) +") AND ("+undic0(drug_d)
+ sent=gene_category(gene, addiction_d, "addiction", sentences_ls,addiction_flag,dict_onto)
+ if ('addiction' in search_type):
+ geneEdges += generate_edges(sent, tf_name)
+ json_edges += generate_edges_json(sent, tf_name)
else:
- ky_d=undic(list(dict_onto[ky].values()))
- sent=gene_category(gene,ky,str(ky), sentences_ls, addiction_flag,dict_onto)
- progress+=percent
- yield "data:"+str(progress)+"\n\n"
- if (ky in search_type):
+ addiction_flag=0
+ if namecat_flag==1:
+ onto_cont = open(ses_namecat+".onto","r").read()
+ dict_onto=ast.literal_eval(onto_cont)
+ #ky_d=undic(list(dict_onto[ky].values()))
+ sent=gene_category(gene,ky,str(ky), sentences_ls, addiction_flag,dict_onto)
+
+ else:
+ #ky_d=undic(list(dict_onto[ky].values()))
+ sent=gene_category(gene,ky,str(ky), sentences_ls, addiction_flag,dict_onto)
+ yield "data:"+str(progress)+"\n\n"
+
geneEdges += generate_edges(sent, tf_name)
json_edges += generate_edges_json(sent, tf_name)
- sentences+=sent
+ sentences+=sent
if ("GWAS" in search_type):
gwas_sent=[]
- for nd in nodes_list_for_gwas:
- gwas_text=''
- datf = pd.read_csv('./utility/gwas_used.csv',sep='\t')
- datf_sub = datf[datf['DISEASE/TRAIT'].str.contains(nd,regex=False, case=False, na=False)
- & (datf['REPORTED GENE(S)'].str.contains(gene,regex=False, case=False, na=False)
- | (datf['MAPPED_GENE'].str.contains(gene,regex=False, case=False, na=False)))]
-
- if not datf_sub.empty:
- for index, row in datf_sub.iterrows():
- gwas_text = "SNP:<b>"+str(row['SNPS'])+"</b>, P value: <b>"+str(row['P-VALUE'])\
- +"</b>, Disease/trait:<b> "+str(row['DISEASE/TRAIT'])+"</b>, Mapped trait:<b> "\
- +str(row['MAPPED_TRAIT'])+"</b><br>"
- gwas_sent.append(gene+"\t"+"GWAS"+"\t"+nd+"_GWAS\t"+str(row['PUBMEDID'])+"\t"+gwas_text)
+ datf_sub1 = datf[datf['REPORTED GENE(S)'].str.contains('(?:\s|^)'+gene+'(?:\s|$)', flags=re.IGNORECASE)
+ | (datf['MAPPED_GENE'].str.contains('(?:\s|^)'+gene+'(?:\s|$)', flags=re.IGNORECASE))]
+ for nd2 in dict_onto['GWAS'].keys():
+ for nd1 in dict_onto['GWAS'][nd2]:
+ for nd in nd1.split('|'):
+ gwas_text=''
+ datf_sub = datf_sub1[datf_sub1['DISEASE/TRAIT'].str.contains('(?:\s|^)'+nd+'(?:\s|$)', flags=re.IGNORECASE)]
+ #& (datf['REPORTED GENE(S)'].str.contains('(?:\s|^)'+gene+'(?:\s|$)', flags=re.IGNORECASE)
+ #| (datf['MAPPED_GENE'].str.contains('(?:\s|^)'+gene+'(?:\s|$)', flags=re.IGNORECASE)))]
+ if not datf_sub.empty:
+ for index, row in datf_sub.iterrows():
+ gwas_text = "SNP:<b>"+str(row['SNPS'])+"</b>, P value: <b>"+str(row['P-VALUE'])\
+ +"</b>, Disease/trait:<b> "+str(row['DISEASE/TRAIT'])+"</b>, Mapped trait:<b> "\
+ +str(row['MAPPED_TRAIT'])+"</b><br>"
+ gwas_sent.append(gene+"\t"+"GWAS"+"\t"+nd+"_GWAS\t"+str(row['PUBMEDID'])+"\t"+gwas_text)
cys, gwas_json, sn_file = searchArchived('GWAS', gene , 'json',gwas_sent, path_user)
with open(path_user+"gwas_results.tab", "w") as gwas_edges:
gwas_edges.write(sn_file)
geneEdges += cys
json_edges += gwas_json
+ # report progress immediately
+ progress+=percent
+ yield "data:"+str(progress)+"\n\n"
if len(geneEdges) >0:
edges+=geneEdges