From b9f3baf7b8e71b371e27049fb891bae61d33457d Mon Sep 17 00:00:00 2001 From: gunturkunhakan Date: Tue, 8 Jun 2021 14:58:12 -0500 Subject: fixed progress bar and corrected gwas sentences --- server.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/server.py b/server.py index 519eedf..142aaa9 100755 --- a/server.py +++ b/server.py @@ -146,7 +146,6 @@ def login(): else: flash("Invalid username or password!", "inval") return render_template('signup.html') - print(onto_list) return render_template('index.html',onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto) @@ -762,7 +761,7 @@ def search(): if(len(genes)==1): percent_ratio=2 timeextension=session['timeextension'] - percent=round(100/percent_ratio,1)-1 # 7 categories + 1 at the beginning + percent=100/percent_ratio-0.00000001 # 7 categories + 1 at the beginning if ('email' in session): sessionpath = session['path_user'] + timeextension @@ -918,7 +917,7 @@ def search(): +str(row['MAPPED_TRAIT'])+"
" gwas_sent.append(gene+"\t"+"GWAS"+"\t"+nd+"_GWAS\t"+str(row['PUBMEDID'])+"\t"+gwas_text) cys, gwas_json, sn_file = searchArchived('GWAS', gene , 'json',gwas_sent, path_user) - with open(path_user+"gwas_results.tab", "w") as gwas_edges: + with open(path_user+"gwas_results.tab", "a") as gwas_edges: gwas_edges.write(sn_file) geneEdges += cys json_edges += gwas_json -- cgit v1.2.3 From e4c91a690078535deb3f55cee3e20dcb229dd267 Mon Sep 17 00:00:00 2001 From: gunturkunhakan Date: Tue, 8 Jun 2021 14:59:07 -0500 Subject: update the file --- requirements.md | 3 +-- requirements.txt | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) create mode 100644 requirements.txt diff --git a/requirements.md b/requirements.md index 7b90aec..8824016 100644 --- a/requirements.md +++ b/requirements.md @@ -47,5 +47,4 @@ * tensorflow-estimator==2.4.0 -* python==3.8.5 - +* python==3.8.5 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c2ba0ba --- /dev/null +++ b/requirements.txt @@ -0,0 +1,33 @@ +pandas==1.2.1 +bcrypt==3.1.7 +cffi==1.13.2 +pycparser==2.19 +Flask-SQLAlchemy==2.4.4 +Flask==1.1.2 +Click==7.0 +itsdangerous==1.1.0 +Jinja2==2.11.3 +MarkupSafe==1.0 +Werkzeug==1.0.0 +SQLAlchemy==1.3.23 +Keras==2.4.3 +h5py==2.10.0 +numpy==1.19.5 +six==1.15.0 +Keras-Preprocessing==1.1.2 +PyYAML==5.3.1 +scipy==1.6.0 +nltk==3.5 +regex==2020.11.13 +tensorflow==2.4.1 +absl-py==0.11.0 +astunparse==1.6.3 +gast==0.3.3 +grpcio==1.32.0 +protobuf==3.14.0 +tensorboard==2.4.1 +Markdown==3.3.3 +Werkzeug==1.0.1 +wheel==0.36.2 +tensorflow-estimator==2.4.0 +python==3.8.5 -- cgit v1.2.3 From eec87541da723543cfafe6f5b7cff1ec5774ba2e Mon Sep 17 00:00:00 2001 From: gunturkunhakan Date: Wed, 30 Jun 2021 23:23:58 -0500 Subject: fixed a bug in search --- more_functions.py | 32 ++++++++++++++++---------------- server.py | 4 +++- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/more_functions.py b/more_functions.py index cb070d9..21d6cbc 100755 --- a/more_functions.py +++ b/more_functions.py @@ -23,21 +23,10 @@ def findWholeWord(w): return re.compile(r'\b({0})\b'.format(w), flags=re.IGNORECASE).search def getabstracts(gene,query): - if query[-1] =='s': - query2 = query+"*" - else: - query2 = query+"s*" - query3 = query2.replace("s|", "s* OR ") - query4 = query3.replace("|", "s* OR ") - - #query4=query - #query="\"(" + query4 + ") AND ((" + gene + "[tiab]) or (" + gene + "[meSH]))\"" - query="\"(" + query4 + ") AND (" + gene + " [tiab])\"" - #query = "neurons* AND (penk [tiab])" + query="\"(" + query + ") AND (" + gene + " [tiab])\"" abstracts = os.popen("esearch -db pubmed -query " + query \ + " | efetch -format uid |fetch-pubmed -path "+ pubmed_path \ + " | xtract -pattern PubmedArticle -element MedlineCitation/PMID,ArticleTitle,AbstractText|sed \"s/-/ /g\"").read() - #print(abstracts) return(abstracts) sentences_ls=[] @@ -57,6 +46,7 @@ def getSentences(gene, sentences_ls): def gene_category(gene, cat_d, cat, abstracts,addiction_flag,dictn): # e.g. BDNF, addiction_d, undic(addiction_d) "addiction" sents=getSentences(gene, abstracts) + #print(sents) #print(abstracts) out=str() if (addiction_flag==1): @@ -66,7 +56,12 @@ def gene_category(gene, cat_d, cat, abstracts,addiction_flag,dictn): key_ad = key+"*" else: key_ad = key+"s*" - if findWholeWord(key_ad)(sent) : + key_ad = key_ad.replace("s|", "s*|") + key_ad = key_ad.replace("|", "s*|") + key_ad = key_ad.replace("s*s*", "s*") + #if findWholeWord(key_ad)(sent) : + re_find = re.compile(r'\b{}\b'.format(key_ad), re.IGNORECASE) + if re_find.findall(sent): sent=sent.replace("","").replace("","") # remove other highlights sent=re.sub(r'\b(%s)\b' % key_ad, r'\1', sent, flags=re.I) # highlight keyword out+=gene+"\t"+ cat + "\t"+key+"\t"+sent+"\n" @@ -77,11 +72,17 @@ def gene_category(gene, cat_d, cat, abstracts,addiction_flag,dictn): key_2 = key_2+"*" else: key_2 = key_2+"s*" + key_2 = key_2.replace("s|", "s*|") + key_2 = key_2.replace("|", "s*|") + key_2 = key_2.replace("s*s*", "s*") for sent in sents.split("\n"): - if findWholeWord(key_2)(sent) : + re_find = re.compile(r'\b{}\b'.format(key_2), re.IGNORECASE) + #if findWholeWord(key_2)(sent) : + #if re.compile(r'\b(%s)\b' %key_2,sent, re.IGNORECASE): + if re_find.findall(sent): sent=sent.replace("","").replace("","") # remove other highlights sent=re.sub(r'\b(%s)\b' % key_2, r'\1', sent, flags=re.I) # highlight keyword - out+=gene+"\t"+ cat + "\t"+key_1+"\t"+sent+"\n" + out+=gene+"\t"+ cat + "\t"+key_1+"\t"+sent+"\n" return(out) def generate_nodes(nodes_d, nodetype,nodecolor): @@ -134,7 +135,6 @@ def generate_edges_json(data, filename): elif (edgeID not in edgeCnts) and (pmid+target not in pmid_list): edgeCnts[edgeID]=1 pmid_list.append(pmid+target) - for edgeID in edgeCnts: (filename, source,target)=edgeID.split("|") edges_json0+="{ \"id\": \"" + edgeID + "\", \"source\": \"" + source + "\", \"target\": \"" + target + "\", \"sentCnt\": \"" + str(edgeCnts[edgeID]) + "\", \"url\":\"/sentences?edgeID=" + edgeID + "\" },\n" diff --git a/server.py b/server.py index 142aaa9..1d5f2ce 100755 --- a/server.py +++ b/server.py @@ -851,6 +851,7 @@ def search(): yield "data:"+str(progress)+"\n\n" for gene in genes: abstracts_raw = getabstracts(gene,all_d) + #print(abstracts_raw) sentences_ls=[] for row in abstracts_raw.split("\n"): @@ -890,10 +891,11 @@ def search(): dict_onto=ast.literal_eval(onto_cont) #ky_d=undic(list(dict_onto[ky].values())) sent=gene_category(gene,ky,str(ky), sentences_ls, addiction_flag,dict_onto) - else: #ky_d=undic(list(dict_onto[ky].values())) + #print(sentences_ls) sent=gene_category(gene,ky,str(ky), sentences_ls, addiction_flag,dict_onto) + #print(sent) yield "data:"+str(progress)+"\n\n" geneEdges += generate_edges(sent, tf_name) -- cgit v1.2.3 From c08a7a39a6703422e94c6217fd8b19403f6010ab Mon Sep 17 00:00:00 2001 From: gunturkunhakan Date: Thu, 1 Jul 2021 22:03:04 -0500 Subject: fixed a bug --- more_functions.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/more_functions.py b/more_functions.py index 21d6cbc..7c21c2a 100755 --- a/more_functions.py +++ b/more_functions.py @@ -59,12 +59,13 @@ def gene_category(gene, cat_d, cat, abstracts,addiction_flag,dictn): key_ad = key_ad.replace("s|", "s*|") key_ad = key_ad.replace("|", "s*|") key_ad = key_ad.replace("s*s*", "s*") - #if findWholeWord(key_ad)(sent) : - re_find = re.compile(r'\b{}\b'.format(key_ad), re.IGNORECASE) - if re_find.findall(sent): - sent=sent.replace("","").replace("","") # remove other highlights - sent=re.sub(r'\b(%s)\b' % key_ad, r'\1', sent, flags=re.I) # highlight keyword - out+=gene+"\t"+ cat + "\t"+key+"\t"+sent+"\n" + key_ad_ls = key_ad.split('|') + for key_ad in key_ad_ls: + re_find = re.compile(r'\b{}\b'.format(key_ad), re.IGNORECASE) + if re_find.findall(sent): + sent=sent.replace("","").replace("","") # remove other highlights + sent=re.sub(r'\b(%s)\b' % key_ad, r'\1', sent, flags=re.I) # highlight keyword + out+=gene+"\t"+ cat + "\t"+key+"\t"+sent+"\n" else: for key_1 in dictn[cat_d].keys(): for key_2 in dictn[cat_d][key_1]: @@ -75,14 +76,14 @@ def gene_category(gene, cat_d, cat, abstracts,addiction_flag,dictn): key_2 = key_2.replace("s|", "s*|") key_2 = key_2.replace("|", "s*|") key_2 = key_2.replace("s*s*", "s*") + key_2_ls = key_2.split('|') for sent in sents.split("\n"): - re_find = re.compile(r'\b{}\b'.format(key_2), re.IGNORECASE) - #if findWholeWord(key_2)(sent) : - #if re.compile(r'\b(%s)\b' %key_2,sent, re.IGNORECASE): - if re_find.findall(sent): - sent=sent.replace("","").replace("","") # remove other highlights - sent=re.sub(r'\b(%s)\b' % key_2, r'\1', sent, flags=re.I) # highlight keyword - out+=gene+"\t"+ cat + "\t"+key_1+"\t"+sent+"\n" + for key_2 in key_2_ls: + re_find = re.compile(r'\b{}\b'.format(key_2), re.IGNORECASE) + if re_find.findall(sent): + sent=sent.replace("","").replace("","") # remove other highlights + sent=re.sub(r'\b(%s)\b' % key_2, r'\1', sent, flags=re.I) # highlight keyword + out+=gene+"\t"+ cat + "\t"+key_1+"\t"+sent+"\n" return(out) def generate_nodes(nodes_d, nodetype,nodecolor): -- cgit v1.2.3 From 07646071d7bac3547c686fe189487e06e9947d9e Mon Sep 17 00:00:00 2001 From: gunturkunhakan Date: Thu, 1 Jul 2021 22:19:19 -0500 Subject: fixed a bug --- server.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/server.py b/server.py index 1d5f2ce..33f3bb0 100755 --- a/server.py +++ b/server.py @@ -569,7 +569,11 @@ def ontoarchive(): if ('email' in session): if os.path.exists(datadir+"/user/"+str(session['hashed_email'])+"/ontology") == False: flash("Ontology history doesn't exist!") - return render_template('index.html',onto_len_dir=session['onto_len_dir'], onto_list=session['onto_list']) + onto_len_dir = 0 + onto_list = '' + onto_cont=open("addiction.onto","r").read() + dict_onto=ast.literal_eval(onto_cont) + return render_template('index.html',onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto) else: session['user_folder'] = datadir+"/user/"+str(session['hashed_email']) else: -- cgit v1.2.3