diff options
author | chen42 | 2021-08-18 08:50:52 -0500 |
---|---|---|
committer | chen42 | 2021-08-18 08:50:52 -0500 |
commit | 647f8ee0d6c60e578f6d4eb17e942f4ff76c5166 (patch) | |
tree | 67f53d0533bc4a0a7dd66d21c1c3f89bf8c28226 /more_functions.py | |
parent | ad86682a01d03587cc164cbbf58026c3c55f6d0d (diff) | |
parent | 07646071d7bac3547c686fe189487e06e9947d9e (diff) | |
download | genecup-647f8ee0d6c60e578f6d4eb17e942f4ff76c5166.tar.gz |
Merge branch 'master' of https://github.com/hakangunturkun/GeneCup
Diffstat (limited to 'more_functions.py')
-rwxr-xr-x | more_functions.py | 43 |
1 files changed, 22 insertions, 21 deletions
diff --git a/more_functions.py b/more_functions.py index cb070d9..7c21c2a 100755 --- a/more_functions.py +++ b/more_functions.py @@ -23,21 +23,10 @@ def findWholeWord(w): return re.compile(r'\b({0})\b'.format(w), flags=re.IGNORECASE).search def getabstracts(gene,query): - if query[-1] =='s': - query2 = query+"*" - else: - query2 = query+"s*" - query3 = query2.replace("s|", "s* OR ") - query4 = query3.replace("|", "s* OR ") - - #query4=query - #query="\"(" + query4 + ") AND ((" + gene + "[tiab]) or (" + gene + "[meSH]))\"" - query="\"(" + query4 + ") AND (" + gene + " [tiab])\"" - #query = "neurons* AND (penk [tiab])" + query="\"(" + query + ") AND (" + gene + " [tiab])\"" abstracts = os.popen("esearch -db pubmed -query " + query \ + " | efetch -format uid |fetch-pubmed -path "+ pubmed_path \ + " | xtract -pattern PubmedArticle -element MedlineCitation/PMID,ArticleTitle,AbstractText|sed \"s/-/ /g\"").read() - #print(abstracts) return(abstracts) sentences_ls=[] @@ -57,6 +46,7 @@ def getSentences(gene, sentences_ls): def gene_category(gene, cat_d, cat, abstracts,addiction_flag,dictn): # e.g. BDNF, addiction_d, undic(addiction_d) "addiction" sents=getSentences(gene, abstracts) + #print(sents) #print(abstracts) out=str() if (addiction_flag==1): @@ -66,10 +56,16 @@ def gene_category(gene, cat_d, cat, abstracts,addiction_flag,dictn): key_ad = key+"*" else: key_ad = key+"s*" - if findWholeWord(key_ad)(sent) : - sent=sent.replace("<b>","").replace("</b>","") # remove other highlights - sent=re.sub(r'\b(%s)\b' % key_ad, r'<b>\1</b>', sent, flags=re.I) # highlight keyword - out+=gene+"\t"+ cat + "\t"+key+"\t"+sent+"\n" + key_ad = key_ad.replace("s|", "s*|") + key_ad = key_ad.replace("|", "s*|") + key_ad = key_ad.replace("s*s*", "s*") + key_ad_ls = key_ad.split('|') + for key_ad in key_ad_ls: + re_find = re.compile(r'\b{}\b'.format(key_ad), re.IGNORECASE) + if re_find.findall(sent): + sent=sent.replace("<b>","").replace("</b>","") # remove other highlights + sent=re.sub(r'\b(%s)\b' % key_ad, r'<b>\1</b>', sent, flags=re.I) # highlight keyword + out+=gene+"\t"+ cat + "\t"+key+"\t"+sent+"\n" else: for key_1 in dictn[cat_d].keys(): for key_2 in dictn[cat_d][key_1]: @@ -77,11 +73,17 @@ def gene_category(gene, cat_d, cat, abstracts,addiction_flag,dictn): key_2 = key_2+"*" else: key_2 = key_2+"s*" + key_2 = key_2.replace("s|", "s*|") + key_2 = key_2.replace("|", "s*|") + key_2 = key_2.replace("s*s*", "s*") + key_2_ls = key_2.split('|') for sent in sents.split("\n"): - if findWholeWord(key_2)(sent) : - sent=sent.replace("<b>","").replace("</b>","") # remove other highlights - sent=re.sub(r'\b(%s)\b' % key_2, r'<b>\1</b>', sent, flags=re.I) # highlight keyword - out+=gene+"\t"+ cat + "\t"+key_1+"\t"+sent+"\n" + for key_2 in key_2_ls: + re_find = re.compile(r'\b{}\b'.format(key_2), re.IGNORECASE) + if re_find.findall(sent): + sent=sent.replace("<b>","").replace("</b>","") # remove other highlights + sent=re.sub(r'\b(%s)\b' % key_2, r'<b>\1</b>', sent, flags=re.I) # highlight keyword + out+=gene+"\t"+ cat + "\t"+key_1+"\t"+sent+"\n" return(out) def generate_nodes(nodes_d, nodetype,nodecolor): @@ -134,7 +136,6 @@ def generate_edges_json(data, filename): elif (edgeID not in edgeCnts) and (pmid+target not in pmid_list): edgeCnts[edgeID]=1 pmid_list.append(pmid+target) - for edgeID in edgeCnts: (filename, source,target)=edgeID.split("|") edges_json0+="{ \"id\": \"" + edgeID + "\", \"source\": \"" + source + "\", \"target\": \"" + target + "\", \"sentCnt\": \"" + str(edgeCnts[edgeID]) + "\", \"url\":\"/sentences?edgeID=" + edgeID + "\" },\n" |