diff options
| author | Pjotr Prins | 2026-03-27 11:24:12 +0100 |
|---|---|---|
| committer | Pjotr Prins | 2026-03-27 11:24:12 +0100 |
| commit | afa3fd534a558fb2ea11f8c40df968635d4291c7 (patch) | |
| tree | 80cd544dc92336030237e40a9836637cb6a34657 | |
| parent | ca94eb79835184c561dad8f4cddce4ffb4077850 (diff) | |
| download | genecup-afa3fd534a558fb2ea11f8c40df968635d4291c7.tar.gz | |
Spacing and show errors on punkt
| -rwxr-xr-x | server.py | 178 |
1 files changed, 89 insertions, 89 deletions
diff --git a/server.py b/server.py index f457e17..e39944d 100755 --- a/server.py +++ b/server.py @@ -25,7 +25,7 @@ load_dotenv() import os GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") -nltk.download('punkt', quiet=True) +nltk.download('punkt') # import pickle # Removed from collections import Counter from datetime import datetime @@ -47,8 +47,8 @@ from google import genai import re import ast from more_functions import * -from nltk.tokenize import sent_tokenize -from more_functions import getabstracts, undic, gene_category +from nltk.tokenize import sent_tokenize +from more_functions import getabstracts, undic, gene_category GENECUP_PROMPT_TEMPLATE = "" try: @@ -149,7 +149,7 @@ def classify_stress_with_gemini(sentence_text): ) # We need to parse the classification from the response classification = response.text.strip().lower() - + # The model might return "Cellular Level Stress" or "Organismal Stress" if "cellular" in classification: return "neg" # 'neg' for Cellular Level Stress @@ -158,7 +158,7 @@ def classify_stress_with_gemini(sentence_text): else: print(f"Warning: Gemini returned unexpected classification: '{classification}' for sentence: '{sentence_text}'") return "unknown" - + except Exception as e: print(f"Error calling Gemini API for stress classification: {e}") return "error_api_call" @@ -183,7 +183,7 @@ Classification:""" contents=prompt ) classification = response.text.strip().lower() - + if classification == "systemic": return "pos" # 'pos' for systemic stress elif classification == "cellular": @@ -191,7 +191,7 @@ Classification:""" else: print(f"Warning: Gemini returned unexpected classification: '{classification}' for sentence: '{sentence_text}'") return "unknown" - + except Exception as e: print(f"Error calling Gemini API for stress classification: {e}") return "error_api_call" @@ -210,8 +210,8 @@ class users(db.Model): # def clean_doc(doc, vocab): # doc = doc.lower() # tokens = doc.split() -# re_punc = re.compile('[%s]' % re.escape(string.punctuation)) -# tokens = [re_punc.sub('' , w) for w in tokens] +# re_punc = re.compile('[%s]' % re.escape(string.punctuation)) +# tokens = [re_punc.sub('' , w) for w in tokens] # tokens = [word for word in tokens if len(word) > 1] # stop_words = set(stopwords.words('english')) # tokens = [w for w in tokens if not w in stop_words] @@ -257,7 +257,7 @@ def root(): ontoarchive() onto_len_dir = session['onto_len_dir'] onto_list = session['onto_list'] - else: + else: onto_len_dir = 0 onto_list = '' @@ -309,13 +309,13 @@ def signup(): if (found_user and (bcrypt.checkpw(password.encode('utf8'), found_user.password)==False)): flash("Already registered, but wrong password!", "inval") - return render_template('signup.html',onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto,version=version()) + return render_template('signup.html',onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto,version=version()) session['email'] = email session['hashed_email'] = hashlib.md5(session['email'] .encode('utf-8')).hexdigest() session['name'] = name password = bcrypt.hashpw(password.encode('utf8'), bcrypt.gensalt()) - user = users(name=name, email=email, password = password) + user = users(name=name, email=email, password = password) if found_user: session['email'] = found_user.email session['hashed_email'] = hashlib.md5(session['email'] .encode('utf-8')).hexdigest() @@ -367,10 +367,10 @@ def signin(): return render_template('index.html', onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto,version=version()) else: flash("Invalid username or password!", "inval") - return render_template('signup.html',version=version()) + return render_template('signup.html',version=version()) return render_template('signin.html',version=version()) -# change password +# change password @app.route("/<nm_passwd>", methods=["POST", "GET"]) def profile(nm_passwd): try: @@ -407,7 +407,7 @@ def profile(nm_passwd): return render_template("/passwd_change.html", name=user_name,version=version()) else: return "This url does not exist" - else: + else: return "This url does not exist" except (AttributeError): return "This url does not exist" @@ -424,7 +424,7 @@ def logout(): global user1 if session['name'] != '': user1 = session['name'] - else: + else: user1 = session['email'] flash(f"You have been logged out, {user1}", "inval") # Used f-string for clarity session.pop('email', None) @@ -524,7 +524,7 @@ def ontology(): keycat = keycat.replace('| ', '|') namecat=re.sub('[^,a-zA-Z0-9 \n]', '', namecat) - # Generate a unique session ID depending on timestamp to track the results + # Generate a unique session ID depending on timestamp to track the results timestamp = datetime.utcnow().replace(microsecond=0) timestamp = timestamp.replace(tzinfo=pytz.utc) timestamp = timestamp.astimezone(pytz.timezone("America/Chicago")) @@ -557,8 +557,8 @@ def ontology(): with open("addiction.onto","r") as f1: with open(session['namecat']+".onto", "w") as f2: for line in f1: - f2.write(line) - else: + f2.write(line) + else: f= open(session['namecat']+".onto","w") dict_onto={} else: @@ -573,8 +573,8 @@ def ontology(): flag_kw=0 if (',' in maincat) or (',' in subcat): flash("Only one word can be added to the category and subcategory at a time.","inval") - elif maincat in dict_onto.keys(): # Layer 2, main category - if subcat in dict_onto[maincat].keys(): # Layer 3, keywords shown in results + elif maincat in dict_onto.keys(): # Layer 2, main category + if subcat in dict_onto[maincat].keys(): # Layer 3, keywords shown in results keycat_ls = keycat.split('|') for kw in str.split(next(iter(dict_onto[maincat][subcat])), '|'): # Layer 4, synonyms for keycat_word in keycat_ls: @@ -610,7 +610,7 @@ def ontology(): flash("You must login to change the addiction ontology.") else: flash("You must login to create a new ontology.") - + if request.form['submit'] == 'remove': if ('email' in session): session['namecat']=namecat @@ -633,8 +633,8 @@ def ontology(): with open("addiction.onto","r") as f1: with open(session['namecat']+".onto", "w") as f2: for line in f1: - f2.write(line) - else: + f2.write(line) + else: f= open(session['namecat']+".onto","w") dict_onto={} @@ -646,10 +646,10 @@ def ontology(): dict_onto={} else: dict_onto=ast.literal_eval(onto_cont) - + flag_kw=0 - if maincat in dict_onto.keys(): # Layer 2, main category - if subcat in dict_onto[maincat].keys(): # Layer 3, keywords shown in results + if maincat in dict_onto.keys(): # Layer 2, main category + if subcat in dict_onto[maincat].keys(): # Layer 3, keywords shown in results for kw in str.split(next(iter(dict_onto[maincat][subcat])), '|'): keycat_ls = keycat.split('|') for keycat_word in keycat_ls: # Layer 4, synonyms @@ -664,11 +664,11 @@ def ontology(): dict_onto[maincat]=re.sub(r'\''+subcat+'\': \'{}\', ', '', str(dict_onto[maincat])) dict_onto[maincat]=re.sub(r'\''+subcat+'\': \'{}\'', '', str(dict_onto[maincat])) if '{}' in dict_onto[maincat]: - dict_onto=re.sub(r', \''+maincat+'\': \'{}\'', '', str(dict_onto)) + dict_onto=re.sub(r', \''+maincat+'\': \'{}\'', '', str(dict_onto)) dict_onto=str(dict_onto).replace('\"{','{') dict_onto=str(dict_onto).replace('}\"','}') dict_onto=str(dict_onto).replace('\'{','{') - dict_onto=str(dict_onto).replace('}\'','}') + dict_onto=str(dict_onto).replace('}\'','}') with open(session['namecat']+'.onto', 'w') as file3: file3.write(str(dict_onto)) if flag_kw==0: @@ -676,12 +676,12 @@ def ontology(): else: flash("\""+subcat+"\" is not a subcategory.","inval") else: - flash("\""+subcat+"\" is not a category.","inval") + flash("\""+subcat+"\" is not a category.","inval") else: if namecat=='addiction': flash("You must login to change the addiction ontology.") else: - flash("You must login to create a new ontology.") + flash("You must login to create a new ontology.") if 'namecat' in session: file2 = open(session['namecat']+".onto","r") @@ -718,7 +718,7 @@ def ontoarchive(): onto_list = '' onto_cont=open("addiction.onto","r").read() dict_onto=ast.literal_eval(onto_cont) - return render_template('index.html',onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto,version=version()) + return render_template('index.html',onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto,version=version()) else: session['user_folder'] = datadir+"/user/"+str(session['hashed_email']) else: @@ -732,10 +732,10 @@ def ontoarchive(): session_id=session['id'] def sorted_alphanumeric(data): convert = lambda text: int(text) if text.isdigit() else text.lower() - alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] + alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] return sorted(data, key=alphanum_key) - dirlist = sorted_alphanumeric(os.listdir(session['user_folder']+"/ontology/")) + dirlist = sorted_alphanumeric(os.listdir(session['user_folder']+"/ontology/")) onto_folder_list = [] onto_directory_list = [] onto_list=[] @@ -793,26 +793,26 @@ def progress(): if ('email' in session): onto_len_dir = session['onto_len_dir'] onto_list = session['onto_list'] - else: + else: onto_len_dir = 0 onto_list = '' onto_cont=open("addiction.onto","r").read() dict_onto=ast.literal_eval(onto_cont) message="<span class='text-danger'>Up to 200 terms can be searched at a time</span>" return render_template('index.html' ,onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto, message=message,version=version()) - + if len(genes)==0: if ('email' in session): onto_len_dir = session['onto_len_dir'] onto_list = session['onto_list'] - else: + else: onto_len_dir = 0 onto_list = '' onto_cont=open("addiction.onto","r").read() dict_onto=ast.literal_eval(onto_cont) message="<span class='text-danger'>Please enter a search term </span>" return render_template('index.html',onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto, message=message,version=version()) - + tf_path=tempfile.gettempdir() genes_for_folder_name ="" if len(genes) == 1: @@ -828,7 +828,7 @@ def progress(): genes_for_folder_name =str(genes[0])+"_"+str(genes[1])+"_"+str(genes[2]) marker="_m" - # Generate a unique session ID depending on timestamp to track the results + # Generate a unique session ID depending on timestamp to track the results timestamp = datetime.utcnow().replace(microsecond=0) timestamp = timestamp.replace(tzinfo=pytz.utc) timestamp = timestamp.astimezone(pytz.timezone("America/Chicago")) @@ -885,7 +885,7 @@ def progress(): session['rnd'] = timeextension+"_0_"+genes_for_folder_name+marker+"_0_"+onto_name_archive rnd = session['rnd'] else: - rnd = "tmp" + ''.join(random.choice(string.ascii_letters) for x in range(6)) + rnd = "tmp" + ''.join(random.choice(string.ascii_letters) for x in range(6)) session['path']=tf_path+ "/" + rnd os.makedirs(session['path']) search_type = request.args.getlist('type') @@ -926,7 +926,7 @@ def search(): search_type = session['search_type'] temp_nodes = "" json_nodes = "{\"data\":[" - + n_num=0 d={} nodecolor={} @@ -965,7 +965,7 @@ def search(): nodes_list.append(nd) json_nodes += generate_nodes_json(dictionary[ky],str(ky),nodecolor[ky]) d["nj{0}".format(n_num)]='' - + json_nodes = json_nodes[:-2] # Handles case if json_nodes was only "{\"data\":[" if json_nodes == "{\"data\"": # if it was empty before -2 json_nodes = "{\"data\":[]}" @@ -981,7 +981,7 @@ def search(): progress=0 searchCnt=0 nodesToHide=str() - json_edges = str() + json_edges = str() #genes_or = ' [tiab] or '.join(genes) all_d='' @@ -1003,7 +1003,7 @@ def search(): datf = pd.read_csv('./utility/gwas_used.csv',sep='\t') progress+=percent yield "data:"+str(progress)+"\n\n" - + for gene in genes: abstracts_raw = getabstracts(gene,all_d) # all_d might be empty if no search_type matches #print(abstracts_raw) @@ -1019,7 +1019,7 @@ def search(): sent_tok = pmid + ' ' + sent_tok sentences_ls.append(sent_tok) gene=gene.replace("-"," ") - + geneEdges = "" # Use the already determined current_dict_onto @@ -1048,9 +1048,9 @@ def search(): addiction_flag=0 sent=gene_category(gene,ky,str(ky), sentences_ls, addiction_flag,dict_onto_loop) yield "data:"+str(progress)+"\n\n" - + geneEdges += generate_edges(sent, tf_name) - json_edges += generate_edges_json(sent, tf_name) + json_edges += generate_edges_json(sent, tf_name) sentences+=sent if ("GWAS" in search_type and 'GWAS' in dict_onto_loop): # Added check for GWAS in dict_onto_loop gwas_sent=[] @@ -1067,7 +1067,7 @@ def search(): # This implies dict_onto_loop['GWAS'][nd2] contains combined keywords. # Let's assume the structure is { 'subcategory' : {'keyword_group1', 'keyword_group2'} } # where keyword_group is "termA|termB" - + # Iterating over the values of the sub-dictionary if it's a dict, or elements if it's a list/set sub_keywords_container = dict_onto_loop['GWAS'][nd2] # This needs to be robust to the actual structure of dict_onto_loop['GWAS'][nd2] @@ -1084,7 +1084,7 @@ def search(): actual_keywords_to_iterate.extend(sub_keywords_container.split('|')) - for nd in actual_keywords_to_iterate: + for nd in actual_keywords_to_iterate: gwas_text='' # Added na=False to contains calls datf_sub = datf_sub1[datf_sub1['DISEASE/TRAIT'].str.contains('(?:\s|^)'+nd+'(?:\s|$)', flags=re.IGNORECASE, na=False)] @@ -1096,11 +1096,11 @@ def search(): with open(path_user+"gwas_results.tab", "a") as gwas_edges: gwas_edges.write(sn_file) geneEdges += cys - json_edges += gwas_json + json_edges += gwas_json # report progress immediately progress+=percent yield "data:"+str(progress)+"\n\n" - + if len(geneEdges) >0: rnd = '' if 'email' in session: @@ -1121,7 +1121,7 @@ def search(): progress=100 sntdata.write(sentences) sntdata.close() - cysdata.write(nodes+edges) + cysdata.write(nodes+edges) cysdata.close() zeroLinkNode.write(nodesToHide) zeroLinkNode.close() @@ -1176,9 +1176,9 @@ def tableview(): content = edgesjsonfile.read().strip() if content and content != "{\"data\":[]}": # Reset file pointer and load json - edgesjsonfile.seek(0) + edgesjsonfile.seek(0) jedges = json.load(edgesjsonfile) - nodata_temp = 0 + nodata_temp = 0 else: jedges = {"data": []} # Ensure jedges is a dict except FileNotFoundError: @@ -1202,7 +1202,7 @@ def tableview(): onto_cont=open("addiction.onto","r").read() dict_onto=ast.literal_eval(onto_cont) return render_template('index.html', onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto,version=version()) - + jedges ='' nodata_temp = 1 # Default to no data try: @@ -1276,7 +1276,7 @@ def tableview0(): print(f"Warning: Could not decode JSON from {datadir+gene_url_tmp}/edges.json") jedges = {"data": []} nodata_temp = 1 - + else: # genes_session_tmp=tf_path+"/"+rnd_url # Not used further gene_url_tmp = tf_path+"/"+rnd_url @@ -1351,9 +1351,9 @@ def userarchive(): session_id=session['id'] def sorted_alphanumeric(data): convert = lambda text: int(text) if text.isdigit() else text.lower() - alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] + alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] return sorted(data, key=alphanum_key) - dirlist = sorted_alphanumeric(os.listdir(session['user_folder'])) + dirlist = sorted_alphanumeric(os.listdir(session['user_folder'])) folder_list = [] directory_list = [] gene_list=[] @@ -1447,13 +1447,13 @@ def date(): if p['source'] not in current_gene_list: gene_list_all.append(p['source']) current_gene_list.append(p['source']) - + display_gene_list = current_gene_list added = "" if len(current_gene_list)>3: display_gene_list = current_gene_list[0:3] added = ",..." - + gene_name_str = str(display_gene_list)[1:-1] # Remove brackets gene_name_str=gene_name_str.replace("'","") gene_name = gene_name_str + added @@ -1469,7 +1469,7 @@ def date(): except IndexError: gene_name = "N/A" num_gene = 0 - + genes_session_str = '' # Renamed to avoid conflict for gen_item in gene_list_all: # Use gene_list_all derived from edges if possible genes_session_str += str(gen_item).strip() + "_" # Ensure clean gene names @@ -1483,7 +1483,7 @@ def date(): onto_cont=open("addiction.onto","r").read() dict_onto=ast.literal_eval(onto_cont) return render_template('index.html', onto_len_dir=onto_len_dir, onto_list=onto_list_session, ontol = 'addiction', dict_onto = dict_onto,version=version()) - + message3="<ul><li> <font color=\"#E74C3C\">Click on the abstract count to read sentences linking the keyword and the gene</font> <li> Click on a keyword to see the terms included in the search. <li>View the results in <a href='\\cytoscape/?rnd={}&genequery={}'\ ><b> a graph.</b></a> </ul> Links will be preserved when the table is copy-n-pasted into a spreadsheet.".format(select_date,genes_session_str) return render_template('tableview.html',nodata_temp=nodata_temp, num_gene=num_gene,genes_session_tmp = genes_session_tmp, rnd_url=select_date ,jedges=jedges, jnodes=jnodes,gene_name=gene_name, genes_url=genes_session_str, message3=message3,no_footer=True,version=version()) @@ -1495,7 +1495,7 @@ def cytoscape(): # genes_session_tmp=tf_path + "/" + genes_url # This variable is not used # rnd_url_tmp=tf_path +"/" + rnd_url # This is for non-logged in users path later message2="<ul><li><font color=\"#E74C3C\">Click on a line to read the sentences </font> <li>Click on a keyword to see the terms included in the search<li>Hover a pointer over a node to hide other links <li>Move the nodes around to adjust visibility <li> Reload the page to restore the default layout<li>View the results in <a href='\\tableview/?rnd={}&genequery={}'\ ><b>a table. </b></a></ul>".format(rnd_url,genes_url) - + elements = "" # Default empty elements zeroLink = "" # Default empty zeroLink @@ -1532,7 +1532,7 @@ def cytoscape(): onto_cont=open("addiction.onto","r").read() dict_onto=ast.literal_eval(onto_cont) return render_template('index.html', onto_len_dir=onto_len_dir, onto_list=onto_list_session, ontol = 'addiction', dict_onto = dict_onto,version=version()) - + try: with open(rnd_url_path+"_0link","r") as z: zeroLink=z.read() @@ -1541,7 +1541,7 @@ def cytoscape(): if (len(zeroLink.strip())>0): # Check if zeroLink has content after stripping whitespace message2+="<span style=\"color:darkred;\">No result was found for these genes: " + zeroLink + "</span>" - + return render_template('cytoscape.html', elements=elements, message2=message2,version=version()) @@ -1549,7 +1549,7 @@ def cytoscape(): def sentences(): # Removed predict_sent and CNN model loading # def predict_sent(sent_for_pred): ... - + pmid_list=[] pmid_string='' edge=request.args.get('edgeID') @@ -1561,7 +1561,7 @@ def sentences(): num_abstract = 0 stress_cellular = "<br><br><br>"+"</ol><b>Sentence(s) describing cellular stress (classified using Gemini API):</b><hr><ol>" stress_systemic = "<b></ol>Sentence(s) describing systemic stress (classified using Gemini API):</b><hr><ol>" - + matching_sents = get_sentences_from_file(tf_name, gene0, cat0) if not matching_sents: # It's possible the file was found but no sentences matched the criteria. @@ -1573,10 +1573,10 @@ def sentences(): for sent_obj in matching_sents: text = sent_obj['text'] pmid = sent_obj['pmid'] - + formatted_line = f"<li> {text} <a href=\"https://www.ncbi.nlm.nih.gov/pubmed/?term={pmid}\" target=_new>PMID:{pmid}<br></a>" all_stress_sentences.append({'raw_text': text, 'html_line': formatted_line}) - + out3 += formatted_line if(pmid+cat0 not in pmid_list): pmid_string = pmid_string + ' ' + pmid @@ -1612,7 +1612,7 @@ Here are the sentences to classify: # The model might wrap the JSON in ```json ... ```, so we need to clean it. cleaned_response_text = response.text.strip().replace("```json", "").replace("```", "").strip() classifications = json.loads(cleaned_response_text) - + # Step 4: Distribute the sentences into buckets based on the parsed classifications for i, s_obj in enumerate(all_stress_sentences): # Get the classification for sentence 'i'. Use .get() for safety. @@ -1662,7 +1662,7 @@ def showTopGene(): if isinstance(archived_data, tuple) and len(archived_data) > 0: nodesEdges = archived_data[0] else: # Fallback if searchArchived doesn't return expected tuple - nodesEdges = "" + nodesEdges = "" print(f"Warning: searchArchived did not return expected data for {query}") message2="<li><strong>"+query + "</strong> is one of the top addiction genes. <li> An archived search is shown. Click on the blue circle to update the results and include keywords for brain region and gene function. <strong> The update may take a long time to finish.</strong> " @@ -1738,7 +1738,7 @@ def synonyms(): allnodes = globals()['genes'] else: print("Warning: 'genes' dictionary for synonyms not found.") - + synonym_list = list(allnodes[node].split("|")) session['synonym_list'] = synonym_list session['main_gene'] = node.upper() @@ -1763,7 +1763,7 @@ def synonyms(): gwas_file_path = path + "gwas_results.tab" sents_by_main_cat = {} - + try: with open(snt_file_path, "r") as f: for line in f: @@ -1826,16 +1826,16 @@ def synonyms(): if isinstance(search_terms_obj, set) and search_terms_obj: search_terms_str = next(iter(search_terms_obj)) elif isinstance(search_terms_obj, str): search_terms_str = search_terms_obj else: continue - + keyword_regex_str = r'\b(' + '|'.join(re.escape(term) for term in search_terms_str.split('|')) + r')\b' keyword_regex = re.compile(keyword_regex_str, re.IGNORECASE) - + sents_for_this_keyword = [s for s in sentences_ls if gene_regex.search(s['text']) and keyword_regex.search(s['text'])] - + if sents_for_this_keyword: pubmed_formatted_sentences += f"\n## Keyword: {keyword_node} (Category: {category_key})\n" for sent_obj in sents_for_this_keyword: pubmed_formatted_sentences += f"- {sent_obj['text']} (PMID: {sent_obj['pmid']})\n" - + gwas_formatted_sentences = "" if 'GWAS' in current_ontology: try: @@ -1919,7 +1919,7 @@ def gene_gene(): tmp_ggPMID=session['path']+"_ggPMID" gg_file=session['path']+"_ggSent" # Gene_gene result_file=session['path']+"_ggResult" - + # pubmed_path needs to be defined, assuming it's a global or config # For this example, let's assume it's a pre-configured path. # If not, this os.system call will fail or use current dir. @@ -1937,14 +1937,14 @@ def gene_gene(): # Ensure query is safe for shell command safe_query = query.replace("\"", "\\\"") # Basic escaping os.system(f"esearch -db pubmed -query \"{safe_query}\" | efetch -format uid |sort > \"{tmp_ggPMID}\"") - + # 'topGene_uniq.pmid' file needs to exist # For robustness, check if it exists top_gene_pmid_file = "topGene_uniq.pmid" if not os.path.exists(top_gene_pmid_file): print(f"Warning: {top_gene_pmid_file} not found. Gene-gene search might be affected.") # Create an empty file to prevent comm command error, or handle differently - open(top_gene_pmid_file, 'a').close() + open(top_gene_pmid_file, 'a').close() abstracts_cmd = f"comm -1 -2 \"{top_gene_pmid_file}\" \"{tmp_ggPMID}\" | fetch-pubmed -path \"{pubmed_path}\" | xtract -pattern PubmedArticle -element MedlineCitation/PMID,ArticleTitle,AbstractText | sed \"s/-/ /g\"" try: @@ -1963,7 +1963,7 @@ def gene_gene(): topGenes=dict() out_str=str() # Renamed from out hitGenes=dict() - + # 'topGene_symb_alias.txt' file needs to exist top_gene_alias_file = "topGene_symb_alias.txt" if os.path.exists(top_gene_alias_file): @@ -1986,14 +1986,14 @@ def gene_gene(): if abstractCnt > 0 and rowCnt % 10 == 0 : # Check abstractCnt > 0 progress=10+round(rowCnt/abstractCnt,2)*80 yield "data:"+str(progress)+"\n\n" - + tiab_parts=row.split("\t", 1) # Split only on first tab if len(tiab_parts) < 2: continue # Skip malformed lines pmid = tiab_parts[0] tiab_text_gg = tiab_parts[1] # Renamed - + sentences_gg = sent_tokenize(tiab_text_gg) # Renamed - ## keep the sentence only if it contains the gene + ## keep the sentence only if it contains the gene for sent_item in sentences_gg: # Renamed if findWholeWord(query)(sent_item): sent_item=re.sub(r'\b(%s)\b' % query, r'<strong>\1</strong>', sent_item, flags=re.I) @@ -2012,26 +2012,26 @@ def gene_gene(): with open(gg_file, "w+") as gg: gg.write(out_str) # gg.close() # Not needed with 'with open' - + results_html="<h4>"+query+" vs top addiction genes</h4> Click on the number of sentences will show those sentences. Click on the <span style=\"background-color:#FcF3cf\">top addiction genes</span> will show an archived search for that gene.<hr>" # Renamed topGeneHits={} for key_gene in hitGenes.keys(): # Renamed url_gg=gg_file+"|"+query+"|"+key_gene # Renamed sentword="sentence" if hitGenes[key_gene]==1 else "sentences" topGeneHits[ "<li> <a href=/sentences?edgeID=" + url_gg+ " target=_new>" + "Show " + str(hitGenes[key_gene]) + " " + sentword +" </a> about "+query+" and <a href=/showTopGene?topGene="+key_gene+" target=_gene><span style=\"background-color:#FcF3cf\">"+key_gene+"</span></a>" ]=hitGenes[key_gene] - + topSorted = sorted(topGeneHits.items(), key=lambda item: item[1], reverse=True) # Correct way to sort dict by value - + for k_html,v_count in topSorted: # Renamed results_html+=k_html - + with open(result_file, "w+") as saveResult: # Ensure it's opened in write mode saveResult.write(results_html) # saveResult.close() # Not needed progress=100 yield "data:"+str(progress)+"\n\n" - + # Start the run query_gene_gene=session.get('forTopGene', '') # Get from session, default to empty if not query_gene_gene: |
