switch to gemini-flash

author: chen42 2026-03-24 09:21:50 -0500
committer: chen42 2026-03-24 09:21:50 -0500
commit: 5e68858ef98f61f80ba5992296c36db6c8dc67c9 (patch)
tree: 9c6184fe6fffdfa0bef10dbf4dcd17ff324dbeb9 /server.py
parent: 427a6ab4f4a1b45608addf3df23088251d4480a8 (diff)
download: genecup-5e68858ef98f61f80ba5992296c36db6c8dc67c9.tar.gz
1 files changed, 903 insertions, 365 deletions
diff --git a/server.py b/server.py
index 9d34bf9..19d7486 100755
--- a/server.py
+++ b/server.py
@@ -13,32 +13,44 @@ from os import listdir
 
 import bcrypt
 import nltk
-import numpy as np
+# import numpy as np # Removed
 import pandas as pd
 import pytz
 from flask import (Flask, Response, flash, jsonify, redirect, render_template,
                    request, session, url_for)
 from flask_sqlalchemy import SQLAlchemy
-from numpy import array
+# from numpy import array # Removed
 
-nltk.download('punkt')
-import pickle
+from dotenv import load_dotenv
+load_dotenv()
+import os
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+
+nltk.download('punkt', quiet=True)
+# import pickle # Removed
 from collections import Counter
 from datetime import datetime
 
-import tensorflow
-import tensorflow.keras
-from nltk.corpus import stopwords
-from nltk.stem.porter import PorterStemmer
-from tensorflow.keras import backend as K
-from tensorflow.keras import metrics, optimizers
-from tensorflow.keras.layers import *
-from tensorflow.keras.layers import Dense, Embedding, Flatten
-from tensorflow.keras.models import Model, Sequential
-from tensorflow.keras.preprocessing.sequence import pad_sequences
-from tensorflow.keras.preprocessing.text import Tokenizer
+# Gemini API related imports
+import google.generativeai as genai
 
+import re
+import ast
 from more_functions import *
+from nltk.tokenize import sent_tokenize 
+from more_functions import getabstracts, undic, gene_category 
+
+GENECUP_PROMPT_TEMPLATE = ""
+try:
+    with open("genecup_synthesis_prompt.txt", "r") as f:
+        GENECUP_PROMPT_TEMPLATE = f.read()
+except FileNotFoundError:
+    print("Warning: genecup_synthesis_prompt.txt not found.  LLM prompts will be incomplete.")
+except Exception as e:
+    print(f"Error loading genecup_synthesis_prompt.txt: {e}. LLM prompts will be affected.")
+
+
+
 
 app=Flask(__name__)
 #datadir="/export/ratspub/"
@@ -48,8 +60,125 @@ datadir="./"
 app.config['SECRET_KEY'] = '#DtfrL98G5t1dC*4'
 app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///'+datadir+'userspub.sqlite'
 db = SQLAlchemy(app)
+
+
+def get_sentences_from_file(file_path, gene_name, category_name=None):
+    """Reads a sentence file and returns sentences matching a gene and category."""
+    matching_sentences = []
+    try:
+        with open(file_path, "r") as f:
+            for line in f:
+                if not line.strip():
+                    continue
+                try:
+                    (gene, nouse, cat, pmid, text) = line.split("\t")
+                    cat_match = (category_name is None) or (cat.strip().upper() == category_name.strip().upper())
+                    if (gene.strip().upper() == gene_name.strip().upper() and cat_match):
+                        matching_sentences.append({'pmid': pmid, 'text': text, 'category': cat})
+                except ValueError:
+                    continue
+    except FileNotFoundError:
+        print(f"Sentence file not found: {file_path}")
+    except Exception as e:
+        print(f"Error reading sentence file {file_path}: {e}")
+    return matching_sentences
+
+
 nltk.data.path.append("./nlp/")
 
+# Initialize database within application context
+with app.app_context():
+    db.create_all()
+
+# Configure Gemini API Key
+# IMPORTANT: Set the GEMINI_API_KEY environment variable
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+if not GEMINI_API_KEY:
+    print("Warning: GEMINI_API_KEY environment variable not set. Stress classification via Gemini will not work.")
+else:
+    try:
+        genai.configure(api_key=GEMINI_API_KEY)
+    except Exception as e:
+        print(f"Error configuring Gemini API: {e}")
+        GEMINI_API_KEY = None # Ensure it's None if configuration fails
+'''
+STRESS_PROMPT_TEMPLATE = ""
+try:
+    with open("stress_prompt.txt", "r") as f_prompt:
+        STRESS_PROMPT_TEMPLATE = f_prompt.read()
+except FileNotFoundError:
+    print("FATAL ERROR: stress_prompt.txt not found. Stress classification will fail.")
+except Exception as e:
+    print(f"FATAL ERROR: Could not read stress_prompt.txt: {e}")
+
+# few shot Function to classify stress using Gemini API
+def classify_stress_with_gemini(sentence_text):
+    if not GEMINI_API_KEY:
+        print("Gemini API key not configured. Skipping classification.")
+        return "error_no_api_key"
+
+    # --- THIS IS THE MODIFIED PART ---
+    # Check if the prompt template was loaded successfully
+    if not STRESS_PROMPT_TEMPLATE:
+        print("Stress prompt template is not available. Skipping classification.")
+        return "error_no_prompt_template"
+
+    try:
+        model_gemini = genai.GenerativeModel('gemini-3-flash-preview')
+        
+        # Append the new sentence and the final instruction to the prompt template
+        # This is safer than .format() when the template contains its own curly braces.
+        prompt = STRESS_PROMPT_TEMPLATE + f'\nSentence: {sentence_text}\nClassification:'
+        print(prompt)
+        response = model_gemini.generate_content(prompt)
+        # We need to parse the classification from the response
+        classification = response.text.strip().lower()
+        
+        # The model might return "Cellular Level Stress" or "Organismal Stress"
+        if "cellular" in classification:
+            return "neg"  # 'neg' for Cellular Level Stress
+        elif "organismal" in classification:
+            return "pos"  # 'pos' for Organismal Stress
+        else:
+            print(f"Warning: Gemini returned unexpected classification: '{classification}' for sentence: '{sentence_text}'")
+            return "unknown"
+            
+    except Exception as e:
+        print(f"Error calling Gemini API for stress classification: {e}")
+        return "error_api_call"
+
+
+# zero-shot Function to classify stress using Gemini API
+def classify_stress_with_gemini(sentence_text):
+    if not GEMINI_API_KEY:
+        print("Gemini API key not configured. Skipping classification.")
+        return "error_no_api_key"
+
+    try:
+        model_gemini = genai.GenerativeModel('gemini-3-flash-preview')
+        prompt = f"""Classify the following sentence based on whether it describes 'systemic stress' or 'cellular stress'.
+Please return ONLY the word 'systemic' if it describes systemic stress, or ONLY the word 'cellular' if it describes cellular stress. Do not add any other explanation or punctuation.
+
+Sentence: "{sentence_text}"
+
+Classification:"""
+
+        response = model_gemini.generate_content(prompt)
+        classification = response.text.strip().lower()
+        
+        if classification == "systemic":
+            return "pos"  # 'pos' for systemic stress
+        elif classification == "cellular":
+            return "neg"  # 'neg' for cellular stress
+        else:
+            print(f"Warning: Gemini returned unexpected classification: '{classification}' for sentence: '{sentence_text}'")
+            return "unknown"
+            
+    except Exception as e:
+        print(f"Error calling Gemini API for stress classification: {e}")
+        return "error_api_call"
+'''
+
 # Sqlite database
 class users(db.Model):
     __tablename__='user'
@@ -59,46 +188,47 @@ class users(db.Model):
     password = db.Column(db.String(128), nullable=False)
     date_created = db.Column(db.DateTime, default=datetime.utcnow)
 
-# Preprocessing of words for CNN
-def clean_doc(doc, vocab):
-    doc = doc.lower()
-    tokens = doc.split()
-    re_punc = re.compile('[%s]' % re.escape(string.punctuation))    
-    tokens = [re_punc.sub('' , w) for w in tokens]    
-    tokens = [word for word in tokens if len(word) > 1]
-    stop_words = set(stopwords.words('english'))
-    tokens = [w for w in tokens if not w in stop_words]
-    porter = PorterStemmer()
-    stemmed = [porter.stem(word) for word in tokens]
-    return tokens
-
-# Load tokenizer
-with open('./nlp/tokenizer.pickle', 'rb') as handle:
-    tokenizer = pickle.load(handle)
-
-# Load vocabulary
-with open('./nlp/vocabulary.txt', 'r') as vocab:
-    vocab = vocab.read()
-
-def tf_auc_score(y_true, y_pred):
-    return tensorflow.metrics.auc(y_true, y_pred)[1]
-
-K.clear_session()
-
-# Create the CNN model
-def create_model(vocab_size, max_length):
-    model = Sequential()
-    model.add(Embedding(vocab_size, 32, input_length=max_length))
-    model.add(Conv1D(filters=16, kernel_size=4, activation='relu'))
-    model.add(MaxPooling1D(pool_size=2))
-    model.add(Flatten())
-    model.add(Dense(10, activation='relu'))
-    model.add(Dense(1, activation='sigmoid'))
-    opt = tensorflow.keras.optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999)
-    model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[tf_auc_score])
-    return model
+# Preprocessing of words for CNN (REMOVED)
+# def clean_doc(doc, vocab):
+#     doc = doc.lower()
+#     tokens = doc.split()
+#     re_punc = re.compile('[%s]' % re.escape(string.punctuation))    
+#     tokens = [re_punc.sub('' , w) for w in tokens]    
+#     tokens = [word for word in tokens if len(word) > 1]
+#     stop_words = set(stopwords.words('english'))
+#     tokens = [w for w in tokens if not w in stop_words]
+#     porter = PorterStemmer()
+#     stemmed = [porter.stem(word) for word in tokens]
+#     return tokens
+
+# Load tokenizer (REMOVED)
+# with open('./nlp/tokenizer.pickle', 'rb') as handle:
+#     tokenizer = pickle.load(handle)
+
+# Load vocabulary (REMOVED)
+# with open('./nlp/vocabulary.txt', 'r') as vocab_file_handle: # Renamed variable to avoid conflict
+#     vocab_text = vocab_file_handle.read() # Renamed variable
+
+# def tf_auc_score(y_true, y_pred): (REMOVED)
+#     return tensorflow.metrics.AUC()(y_true, y_pred)
+
+# K.clear_session() (REMOVED)
+
+# Create the CNN model (REMOVED)
+# def create_model(vocab_size, max_length):
+#     model = Sequential()
+#     model.add(Embedding(vocab_size, 32, input_length=max_length))
+#     model.add(Conv1D(filters=16, kernel_size=4, activation='relu'))
+#     model.add(MaxPooling1D(pool_size=2))
+#     model.add(Flatten())
+#     model.add(Dense(10, activation='relu'))
+#     model.add(Dense(1, activation='sigmoid'))
+#     opt = tensorflow.keras.optimizers.Adamax(learning_rate=0.002, beta_1=0.9, beta_2=0.999)
+#     model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[tf_auc_score])
+#     return model
 
 # Use addiction ontology by default
+import ast # Moved import ast here as it's first used here.
 onto_cont=open("addiction.onto","r").read()
 dictionary=ast.literal_eval(onto_cont)
 
@@ -278,7 +408,7 @@ def logout():
             user1 = session['name']
         else: 
             user1 = session['email']
-    flash("You have been logged out, {user1}", "inval")
+    flash(f"You have been logged out, {user1}", "inval") # Used f-string for clarity
     session.pop('email', None)
     session.clear()
     return render_template('index.html',onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto)
@@ -699,10 +829,10 @@ def progress():
         except:
             namecat = 'addiction'
             session['namecat'] = namecat
-        if namecat=='choose your ontology' or namecat=='addiction' or namecat == 'addiction':
+        if namecat=='choose your ontology' or namecat=='addiction' or namecat == 'addiction': # Redundant 'addiction' check
             session['namecat']='addiction'
             onto_cont=open("addiction.onto","r").read()
-            dictionary=ast.literal_eval(onto_cont)
+            # dictionary=ast.literal_eval(onto_cont) # dictionary is global, no need to re-assign from local onto_cont
             search_type = request.args.getlist('type')
             if (search_type == []):
                 search_type = ['GWAS', 'function', 'addiction', 'drug', 'brain', 'stress', 'psychiatric', 'cell']
@@ -805,7 +935,7 @@ def search():
         d["nj{0}".format(n_num)]=''
     else:
         namecat_flag=0
-        for ky in dictionary.keys():
+        for ky in dictionary.keys(): # Using global 'dictionary'
             nodecolor[ky] = "hsl("+str((n_num+1)*int(360/len(dictionary.keys())))+", 70%, 80%)"
             d["nj{0}".format(n_num)]=generate_nodes_json(dictionary[ky],str(ky),nodecolor[ky])
             n_num+=1
@@ -818,10 +948,15 @@ def search():
                 json_nodes += generate_nodes_json(dictionary[ky],str(ky),nodecolor[ky])
         d["nj{0}".format(n_num)]=''
     
-    json_nodes = json_nodes[:-2]
-    json_nodes =json_nodes+"]}"
-    def generate(genes, tf_name):
+    json_nodes = json_nodes[:-2] # Handles case if json_nodes was only "{\"data\":["
+    if json_nodes == "{\"data\"": # if it was empty before -2
+        json_nodes = "{\"data\":[]}"
+    else:
+        json_nodes =json_nodes+"]}"
+
+    def generate(genes, tf_name): # tf_name is snt_file
         with app.test_request_context():
+            from nltk.tokenize import sent_tokenize # Moved import here, as it's only used in this function scope.
             sentences=str()
             edges=str()
             nodes = temp_nodes
@@ -832,34 +967,36 @@ def search():
             #genes_or = ' [tiab] or '.join(genes)
             all_d=''
 
+            current_dict_onto = {} # To hold the relevant ontology for this search pass
             if namecat_flag==1:
-                onto_cont = open(ses_namecat+".onto","r").read()
-                dict_onto=ast.literal_eval(onto_cont)
-
-                for ky in dict_onto.keys():
-                    if (ky in search_type):
-                        all_d_ls=undic(list(dict_onto[ky].values()))
-                        all_d = all_d+'|'+all_d_ls
+                onto_cont_local = open(ses_namecat+".onto","r").read() # ses_namecat from outer scope
+                current_dict_onto=ast.literal_eval(onto_cont_local)
             else:
-                for ky in dictionary.keys():
-                    if (ky in search_type):
-                        all_d_ls=undic(list(dictionary[ky].values()))
-                        all_d = all_d+'|'+all_d_ls
-            all_d=all_d[1:]
+                current_dict_onto = dictionary # Use global dictionary
+
+            for ky in current_dict_onto.keys():
+                if (ky in search_type):
+                    all_d_ls=undic(list(current_dict_onto[ky].values()))
+                    all_d = all_d+'|'+all_d_ls
+            if all_d: # Check if all_d is not empty
+                all_d=all_d[1:]
+
             if ("GWAS" in search_type):
                 datf = pd.read_csv('./utility/gwas_used.csv',sep='\t')
             progress+=percent
             yield "data:"+str(progress)+"\n\n"
+            
             for gene in genes:
-                abstracts_raw = getabstracts(gene,all_d)
+                abstracts_raw = getabstracts(gene,all_d) # all_d might be empty if no search_type matches
                 #print(abstracts_raw)
                 sentences_ls=[]
 
                 for row in abstracts_raw.split("\n"):
+                    if not row.strip(): continue # Skip empty lines
                     tiab=row.split("\t")
                     pmid = tiab.pop(0)
-                    tiab= " ".join(tiab)
-                    sentences_tok = sent_tokenize(tiab)
+                    tiab_text = " ".join(tiab) # Renamed to avoid conflict
+                    sentences_tok = sent_tokenize(tiab_text)
                     for sent_tok in sentences_tok:
                         sent_tok = pmid + ' ' + sent_tok
                         sentences_ls.append(sent_tok)
@@ -867,60 +1004,76 @@ def search():
                 
                 geneEdges = ""
 
-                if namecat_flag==1:
-                    onto_cont = open(ses_namecat+".onto","r").read()
-                    dict_onto=ast.literal_eval(onto_cont)
-                else:
-                    dict_onto = dictionary
+                # Use the already determined current_dict_onto
+                # if namecat_flag==1:
+                #     onto_cont = open(ses_namecat+".onto","r").read()
+                #     dict_onto_loop=ast.literal_eval(onto_cont)
+                # else:
+                #     dict_onto_loop = dictionary
+                dict_onto_loop = current_dict_onto
 
-                for ky in dict_onto.keys():
+                for ky in dict_onto_loop.keys():
                     if (ky in search_type):
-                        if (ky=='addiction') and ('addiction' in dict_onto.keys())\
-                            and ('drug' in dict_onto.keys()) and ('addiction' in dict_onto['addiction'].keys())\
-                            and ('aversion' in dict_onto['addiction'].keys()) and ('intoxication' in dict_onto['addiction'].keys()):
-                            #addiction terms must present with at least one drug
+                        # The special handling for 'addiction' with 'drug' needs careful check of dict_onto_loop structure
+                        if (ky=='addiction') and ('addiction' in dict_onto_loop.keys())\
+                            and ('drug' in dict_onto_loop.keys()) and ('addiction' in dict_onto_loop['addiction'].keys())\
+                            and ('aversion' in dict_onto_loop['addiction'].keys()) and ('intoxication' in dict_onto_loop['addiction'].keys()):
                             addiction_flag=1
-                            #addiction=undic0(addiction_d) +") AND ("+undic0(drug_d)
-                            sent=gene_category(gene, addiction_d, "addiction", sentences_ls,addiction_flag,dict_onto)
-                            if ('addiction' in search_type):
+                            # addiction_d is not defined here, assume it's a global or from more_functions
+                            # This part might need `addiction_d` from `more_functions.py` to be correctly defined.
+                            # For now, assuming addiction_d is available in the scope.
+                            sent=gene_category(gene, addiction_d, "addiction", sentences_ls,addiction_flag,dict_onto_loop)
+                            if ('addiction' in search_type): # This check is redundant with outer if
                                 geneEdges += generate_edges(sent, tf_name)
                                 json_edges += generate_edges_json(sent, tf_name)
                         else:
                             addiction_flag=0
-                            if namecat_flag==1:
-                                onto_cont = open(ses_namecat+".onto","r").read()
-                                dict_onto=ast.literal_eval(onto_cont)
-                                #ky_d=undic(list(dict_onto[ky].values()))    
-                                sent=gene_category(gene,ky,str(ky), sentences_ls, addiction_flag,dict_onto)
-                            else:
-                                #ky_d=undic(list(dict_onto[ky].values()))
-                                #print(sentences_ls)
-                                sent=gene_category(gene,ky,str(ky), sentences_ls, addiction_flag,dict_onto)
-                                #print(sent)
+                            sent=gene_category(gene,ky,str(ky), sentences_ls, addiction_flag,dict_onto_loop)
                             yield "data:"+str(progress)+"\n\n"
                             
                             geneEdges += generate_edges(sent, tf_name)
                             json_edges += generate_edges_json(sent, tf_name)                
                         sentences+=sent
-                if ("GWAS" in search_type):
+                if ("GWAS" in search_type and 'GWAS' in dict_onto_loop): # Added check for GWAS in dict_onto_loop
                     gwas_sent=[]
-                    print (datf)
-                    datf_sub1 = datf[datf["MAPPED_GENE"].str.contains('(?:\s|^)'+gene+'(?:\s|$)', flags=re.IGNORECASE)
-                                    | (datf["REPORTED GENE(S)"].str.contains('(?:\s|^)'+gene+'(?:\s|$)', flags=re.IGNORECASE))]
-                    print (datf_sub1)
-                    for nd2 in dict_onto['GWAS'].keys():
-                        for nd1 in dict_onto['GWAS'][nd2]:    
-                            for nd in nd1.split('|'):
-                                gwas_text=''
-                                datf_sub = datf_sub1[datf_sub1['DISEASE/TRAIT'].str.contains('(?:\s|^)'+nd+'(?:\s|$)', flags=re.IGNORECASE)]
-                                    #& (datf['REPORTED GENE(S)'].str.contains('(?:\s|^)'+gene+'(?:\s|$)', flags=re.IGNORECASE)
-                                    #| (datf['MAPPED_GENE'].str.contains('(?:\s|^)'+gene+'(?:\s|$)', flags=re.IGNORECASE)))]
-                                if not datf_sub.empty:
-                                    for index, row in datf_sub.iterrows():
-                                        gwas_text = "SNP:<b>"+str(row['SNPS'])+"</b>, P value: <b>"+str(row['P-VALUE'])\
-                                            +"</b>, Disease/trait:<b> "+str(row['DISEASE/TRAIT'])+"</b>, Mapped trait:<b> "\
-                                            +str(row['MAPPED_TRAIT'])+"</b><br>"
-                                        gwas_sent.append(gene+"\t"+"GWAS"+"\t"+nd+"_GWAS\t"+str(row['PUBMEDID'])+"\t"+gwas_text)
+                    # print (datf) # datf is loaded earlier
+                    datf_sub1 = datf[datf["MAPPED_GENE"].str.contains('(?:\s|^)'+gene+'(?:\s|$)', flags=re.IGNORECASE, na=False)
+                                    | (datf["REPORTED GENE(S)"].str.contains('(?:\s|^)'+gene+'(?:\s|$)', flags=re.IGNORECASE, na=False))]
+                    # print (datf_sub1)
+                    for nd2 in dict_onto_loop['GWAS'].keys():
+                        # Ensure dict_onto_loop['GWAS'][nd2] is iterable and contains strings
+                        # Example: if dict_onto_loop['GWAS'][nd2] is {'keyword1|keyword2'}
+                        # next(iter(dict_onto_loop['GWAS'][nd2])) might be what was intended
+                        # Assuming dict_onto_loop['GWAS'][nd2] is a set/list of keyword strings like {'kw1|kw2', 'kw3'}
+                        # The original code was: for nd1 in dict_onto_loop['GWAS'][nd2]: for nd in nd1.split('|'):
+                        # This implies dict_onto_loop['GWAS'][nd2] contains combined keywords.
+                        # Let's assume the structure is { 'subcategory' : {'keyword_group1', 'keyword_group2'} }
+                        # where keyword_group is "termA|termB"
+                        
+                        # Iterating over the values of the sub-dictionary if it's a dict, or elements if it's a list/set
+                        sub_keywords_container = dict_onto_loop['GWAS'][nd2]
+                        # This needs to be robust to the actual structure of dict_onto_loop['GWAS'][nd2]
+                        # Assuming it's a set of strings, where each string can be pipe-separated.
+                        # e.g., sub_keywords_container = {'phenotype1|phenotype_alias', 'phenotype2'}
+                        actual_keywords_to_iterate = []
+                        if isinstance(sub_keywords_container, dict): # e.g. {'phenotype_group': 'pheno1|pheno2'}
+                             for key_group_str in sub_keywords_container.values(): # Or .keys() if that's the intent
+                                actual_keywords_to_iterate.extend(key_group_str.split('|'))
+                        elif isinstance(sub_keywords_container, (list, set)):
+                            for key_group_str in sub_keywords_container:
+                                actual_keywords_to_iterate.extend(key_group_str.split('|'))
+                        elif isinstance(sub_keywords_container, str): # e.g. 'pheno1|pheno2'
+                            actual_keywords_to_iterate.extend(sub_keywords_container.split('|'))
+
+
+                        for nd in actual_keywords_to_iterate:  
+                            gwas_text=''
+                            # Added na=False to contains calls
+                            datf_sub = datf_sub1[datf_sub1['DISEASE/TRAIT'].str.contains('(?:\s|^)'+nd+'(?:\s|$)', flags=re.IGNORECASE, na=False)]
+                            if not datf_sub.empty:
+                                for index, row in datf_sub.iterrows():
+                                    gwas_text = f"SNP:{row['SNPS']}, P value: {row['P-VALUE']}, Disease/trait: {row['DISEASE/TRAIT']}, Mapped trait: {row['MAPPED_TRAIT']}"
+                                    gwas_sent.append(gene+"\t"+"GWAS"+"\t"+nd2+"_GWAS\t"+str(row['PUBMEDID'])+"\t"+gwas_text) # Changed nd to nd2 for target node
                     cys, gwas_json, sn_file = searchArchived('GWAS', gene , 'json',gwas_sent, path_user)
                     with open(path_user+"gwas_results.tab", "a") as gwas_edges:
                         gwas_edges.write(sn_file)
@@ -931,8 +1084,17 @@ def search():
                 yield "data:"+str(progress)+"\n\n"
                                     
                 if len(geneEdges) >0:
+                    rnd = ''
+                    if 'email' in session:
+                        if 'rnd' in session:
+                            rnd = session['rnd']
+                        elif 'path_user' in session:
+                            rnd = session['path_user'].split('/')[-2]
+                    elif 'path' in session:
+                        rnd = session['path'].split('/')[-1]
+
                     edges+=geneEdges
-                    nodes+="{ data: { id: '" + gene +  "', nodecolor:'#E74C3C', fontweight:700, url:'/synonyms?node="+gene+"'} },\n"
+                    nodes+="{ data: { id: '" + gene +  "', nodecolor:'#E74C3C', fontweight:700, url:'/synonyms?node="+gene+"&rnd="+rnd+"'} },\n"
                 else:
                     nodesToHide+=gene +  " "
 
@@ -947,14 +1109,20 @@ def search():
                     zeroLinkNode.close()
                 yield "data:"+str(progress)+"\n\n"
 
-           # Edges in json format
-            json_edges="{\"data\":["+json_edges
-            json_edges = json_edges[:-2]
-            json_edges =json_edges+"]}"
+                        # Edges in json format
+            json_edges_content = json_edges.strip()
+            if json_edges_content.endswith(','):
+                json_edges_content = json_edges_content[:-1]
+
+            if not json_edges_content:
+                json_edges = "{\"data\":[]}"
+            else:
+                json_edges = "{\"data\":[" + json_edges_content + "]}"
 
             # Write edges to txt file in json format also in user folder
             with open(path_user+"edges.json", "w") as temp_file_edges:
-                temp_file_edges.write(json_edges) 
+                temp_file_edges.write(json_edges)
+
     with open(path_user+"nodes.json", "w") as temp_file_nodes:
         temp_file_nodes.write(json_nodes)
     return Response(generate(genes, snt_file), mimetype='text/event-stream')
@@ -983,15 +1151,26 @@ def tableview():
             return render_template('index.html', onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto)
 
         jedges =''
-        file_edges = open(datadir+gene_url_tmp +'/edges.json', 'r')
-        for line in file_edges.readlines():
-            if ':' not in line:
-                nodata_temp = 1
-            else: 
-                nodata_temp = 0
-                with open(datadir+gene_url_tmp +"/edges.json") as edgesjsonfile:
+        nodata_temp = 1 # Default to no data
+        try:
+            with open(datadir+gene_url_tmp +"/edges.json") as edgesjsonfile:
+                # Check if file is empty or just contains empty structure
+                content = edgesjsonfile.read().strip()
+                if content and content != "{\"data\":[]}":
+                    # Reset file pointer and load json
+                    edgesjsonfile.seek(0) 
                     jedges = json.load(edgesjsonfile)
-                break
+                    nodata_temp = 0 
+                else:
+                    jedges = {"data": []} # Ensure jedges is a dict
+        except FileNotFoundError:
+            jedges = {"data": []} # Ensure jedges is a dict if file not found
+        except json.JSONDecodeError:
+            print(f"Warning: Could not decode JSON from {datadir+gene_url_tmp}/edges.json")
+            jedges = {"data": []} # Ensure jedges is a dict
+            nodata_temp = 1
+
+
     else:
         genes_session_tmp=tf_path+"/"+rnd_url
         gene_url_tmp = genes_session_tmp
@@ -1005,16 +1184,25 @@ def tableview():
             onto_cont=open("addiction.onto","r").read()
             dict_onto=ast.literal_eval(onto_cont)
             return render_template('index.html', onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto)
+        
         jedges =''
-        file_edges = open(gene_url_tmp +'/edges.json', 'r')
-        for line in file_edges.readlines():
-            if ':' not in line:
-                nodata_temp = 1
-            else: 
-                nodata_temp = 0
-                with open(gene_url_tmp +"/edges.json") as edgesjsonfile:
+        nodata_temp = 1 # Default to no data
+        try:
+            with open(gene_url_tmp +'/edges.json') as edgesjsonfile:
+                content = edgesjsonfile.read().strip()
+                if content and content != "{\"data\":[]}":
+                    edgesjsonfile.seek(0)
                     jedges = json.load(edgesjsonfile)
-                break
+                    nodata_temp = 0
+                else:
+                    jedges = {"data": []}
+        except FileNotFoundError:
+             jedges = {"data": []}
+        except json.JSONDecodeError:
+            print(f"Warning: Could not decode JSON from {gene_url_tmp}/edges.json")
+            jedges = {"data": []}
+            nodata_temp = 1
+
     genename=genes_url.split("_")
     if len(genename)>3:
         genename = genename[0:3]
@@ -1040,7 +1228,7 @@ def tableview0():
 
     if ('email' in session):
         filename = rnd_url.split("_0_")[0]
-        genes_session_tmp = datadir+"/user/"+str(session['hashed_email'])+"/"+rnd_url+"/"+filename
+        # genes_session_tmp = datadir+"/user/"+str(session['hashed_email'])+"/"+rnd_url+"/"+filename # Not used further
         gene_url_tmp = "/user/"+str(session['hashed_email'])+"/"+rnd_url
         try:
             with open(datadir+gene_url_tmp+"/nodes.json") as jsonfile:
@@ -1054,18 +1242,26 @@ def tableview0():
             return render_template('index.html', onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto)
 
         jedges =''
-        file_edges = open(datadir+gene_url_tmp+'/edges.json', 'r')
-        for line in file_edges.readlines():
-            if ':' not in line:
-                nodata_temp = 1
-            else: 
-                nodata_temp = 0
-                with open(datadir+gene_url_tmp+"/edges.json") as edgesjsonfile:
+        nodata_temp = 1 # Default to no data
+        try:
+            with open(datadir+gene_url_tmp +'/edges.json') as edgesjsonfile:
+                content = edgesjsonfile.read().strip()
+                if content and content != "{\"data\":[]}":
+                    edgesjsonfile.seek(0)
                     jedges = json.load(edgesjsonfile)
-                break
+                    nodata_temp = 0
+                else:
+                    jedges = {"data": []}
+        except FileNotFoundError:
+             jedges = {"data": []}
+        except json.JSONDecodeError:
+            print(f"Warning: Could not decode JSON from {datadir+gene_url_tmp}/edges.json")
+            jedges = {"data": []}
+            nodata_temp = 1
+            
     else:
-        genes_session_tmp=tf_path+"/"+rnd_url
-        gene_url_tmp = genes_session_tmp
+        # genes_session_tmp=tf_path+"/"+rnd_url # Not used further
+        gene_url_tmp = tf_path+"/"+rnd_url
         try:
             with open(gene_url_tmp+"/nodes.json") as jsonfile:
                 jnodes = json.load(jsonfile)
@@ -1078,15 +1274,23 @@ def tableview0():
             return render_template('index.html', onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto)
 
         jedges =''
-        file_edges = open(gene_url_tmp+'/edges.json', 'r')
-        for line in file_edges.readlines():
-            if ':' not in line:
-                nodata_temp = 1
-            else: 
-                nodata_temp = 0
-                with open(gene_url_tmp+"/edges.json") as edgesjsonfile:
+        nodata_temp = 1 # Default to no data
+        try:
+            with open(gene_url_tmp +'/edges.json') as edgesjsonfile:
+                content = edgesjsonfile.read().strip()
+                if content and content != "{\"data\":[]}":
+                    edgesjsonfile.seek(0)
                     jedges = json.load(edgesjsonfile)
-                break
+                    nodata_temp = 0
+                else:
+                    jedges = {"data": []}
+        except FileNotFoundError:
+            jedges = {"data": []}
+        except json.JSONDecodeError:
+            print(f"Warning: Could not decode JSON from {gene_url_tmp}/edges.json")
+            jedges = {"data": []}
+            nodata_temp = 1
+
     genes_url=request.args.get('genequery')
     genename=genes_url.split("_")
     if len(genename)>3:
@@ -1118,7 +1322,7 @@ def userarchive():
         else:
             session['user_folder'] = datadir+"/user/"+str(session['hashed_email'])
     else:
-        onto_name_archive=''
+        # onto_name_archive='' # This variable is not used here
         flash("You logged out!")
         onto_len_dir = 0
         onto_list = ''
@@ -1135,26 +1339,34 @@ def userarchive():
     folder_list = []
     directory_list = []
     gene_list=[]
-    onto_list=[]
+    onto_list_archive =[] # Renamed to avoid conflict with outer scope 'onto_list'
 
     for filename in dirlist:
-        if ('_0_'  in filename):
-            folder_list.append(filename)
-            gene_name = filename.split('_0_')[1]
-            onto_name = filename.split('_0_')[2]
-            if gene_name[-2:] == '_m':
-                gene_name = gene_name[:-2]
-                gene_name = gene_name + ", ..."
-            gene_name = gene_name.replace('_', ', ')
-            gene_list.append(gene_name)
-            onto_list.append(onto_name)
-            onto_name=""
-            gene_name=""
-            filename=filename[0:4]+"-"+filename[5:7]+"-"+filename[8:13]+":"+filename[14:16]+":"+filename[17:19]
-            directory_list.append(filename)
+        if ('_0_'  in filename): # Ensure it's a search result folder, not e.g. "ontology"
+            if os.path.isdir(os.path.join(session['user_folder'], filename)): # Check if it's a directory
+                folder_list.append(filename)
+                try:
+                    gene_name = filename.split('_0_')[1]
+                    onto_name = filename.split('_0_')[2]
+                    if gene_name.endswith('_m'): # Check using endswith for robustness
+                        gene_name = gene_name[:-2]
+                        gene_name = gene_name + ", ..."
+                    gene_name = gene_name.replace('_', ', ')
+                    gene_list.append(gene_name)
+                    onto_list_archive.append(onto_name) # Use renamed list
+                    # onto_name="" # Not necessary, re-assigned in loop
+                    # gene_name="" # Not necessary, re-assigned in loop
+                    # Format filename for display
+                    display_filename=filename.split('_0_')[0] # Get only the timestamp part for display formatting
+                    display_filename=display_filename[0:4]+"-"+display_filename[5:7]+"-"+display_filename[8:10]+" "+display_filename[11:13]+":"+display_filename[14:16]+":"+display_filename[17:19]
+                    directory_list.append(display_filename)
+                except IndexError:
+                    print(f"Skipping folder with unexpected name format: {filename}")
+                    continue
+
     len_dir = len(directory_list)
     message3="<ul><li> Click on the Date/Time to view archived results. <li>The Date/Time are based on US Central time zone.</ul> "
-    return render_template('userarchive.html', len_dir=len_dir, gene_list = gene_list, onto_list = onto_list, folder_list=folder_list, directory_list=directory_list, session_id=session_id, message3=message3)
+    return render_template('userarchive.html', len_dir=len_dir, gene_list = gene_list, onto_list = onto_list_archive, folder_list=folder_list, directory_list=directory_list, session_id=session_id, message3=message3)
 
 
 # Remove the search directory
@@ -1177,189 +1389,301 @@ def remove():
 def date():
     select_date = request.args.get('selected_date')
     # Open the cache folder for the user
-    tf_path=datadir+"/user"
+    tf_path=datadir+"/user" # tf_path is effectively datadir+"/user"
+    nodata_temp = 1 # Default to no data
+    jedges = {"data": []} # Default empty jedges
+    jnodes = {"data": []} # Default empty jnodes
+    gene_list_all = []
+    gene_name = "N/A"
+    num_gene = 0
+
     if ('email' in session):
-        time_extension = str(select_date)
-        time_extension = time_extension.split('_0_')[0]
-        gene_name1 = str(select_date).split('_0_')[1]
-        time_extension = time_extension.replace(':', '_')
-        time_extension = time_extension.replace('-', '_')
-        session['user_folder'] = tf_path+"/"+str(session['hashed_email'])
-        genes_session_tmp = tf_path+"/"+str(session['hashed_email'])+"/"+select_date+"/"+time_extension
-        with open(tf_path+"/"+str(session['hashed_email'])+"/"+select_date+"/nodes.json", "r") as jsonfile:
-            jnodes = json.load(jsonfile)
-        jedges =''
-        file_edges = open(tf_path+"/"+str(session['hashed_email'])+"/"+select_date+"/edges.json", "r")
-        for line in file_edges.readlines():
-            if ':' not in line:
-                nodata_temp = 1
-            else:
-                nodata_temp = 0
-                with open(tf_path+"/"+str(session['hashed_email'])+"/"+select_date+"/edges.json", "r") as edgesjsonfile:
+        time_extension = str(select_date).split('_0_')[0]
+        # gene_name1 = str(select_date).split('_0_')[1] # Not used directly for fetching, gene list derived from edges
+        # time_extension = time_extension.replace(':', '_') # This was for folder creation, not reading
+        # time_extension = time_extension.replace('-', '_')
+        session['user_folder'] = tf_path+"/"+str(session['hashed_email']) # This seems redundant here
+        genes_session_tmp = tf_path+"/"+str(session['hashed_email'])+"/"+select_date+"/"+time_extension # This path is for the _snt, _cy files etc.
+
+        try:
+            with open(tf_path+"/"+str(session['hashed_email'])+"/"+select_date+"/nodes.json", "r") as jsonfile:
+                jnodes = json.load(jsonfile)
+        except (FileNotFoundError, json.JSONDecodeError) as e:
+            print(f"Error loading nodes.json: {e}")
+            # Keep default jnodes
+
+        try:
+            with open(tf_path+"/"+str(session['hashed_email'])+"/"+select_date+"/edges.json", "r") as edgesjsonfile:
+                content = edgesjsonfile.read().strip()
+                if content and content != "{\"data\":[]}":
+                    edgesjsonfile.seek(0)
                     jedges = json.load(edgesjsonfile)
-                break
-        gene_list_all=[]
-        gene_list=[]
-        if nodata_temp == 0:
+                    nodata_temp = 0
+        except (FileNotFoundError, json.JSONDecodeError) as e:
+            print(f"Error loading edges.json: {e}")
+            # Keep default jedges and nodata_temp = 1
+
+        if nodata_temp == 0 and jedges.get("data"):
+            current_gene_list = []
             for p in jedges['data']:
-                if p['source'] not in gene_list:
+                if p['source'] not in current_gene_list:
                     gene_list_all.append(p['source'])
-                    gene_list.append(p['source'])
-            if len(gene_list)>3:
-                gene_list = gene_list[0:3]
+                    current_gene_list.append(p['source'])
+            
+            display_gene_list = current_gene_list
+            added = ""
+            if len(current_gene_list)>3:
+                display_gene_list = current_gene_list[0:3]
                 added = ",..."
-            else:
-                added = ""
-            gene_name = str(gene_list)[1:]
-            gene_name=gene_name[:-1]
-            gene_name=gene_name.replace("'","")
-            gene_name = gene_name+added
-            num_gene = gene_name.count(',')+1
-        else:
-            gene_name1 = gene_name1.replace("_", ", ")
-            gene_name = gene_name1
-            num_gene = gene_name1.count(',')+1
-            for i in range(0,num_gene):
-                gene_list.append(gene_name1.split(',')[i])
-        genes_session = ''
-        for gen in gene_list_all:
-            genes_session += str(gen) + "_"
-        genes_session = genes_session[:-1]
+            
+            gene_name_str = str(display_gene_list)[1:-1] # Remove brackets
+            gene_name_str=gene_name_str.replace("'","")
+            gene_name = gene_name_str + added
+            num_gene = len(current_gene_list) # Count of unique source genes
+        else: # No data or error, try to get gene name from folder
+            try:
+                gene_name_from_folder = str(select_date).split('_0_')[1]
+                if gene_name_from_folder.endswith("_m"):
+                    gene_name_from_folder = gene_name_from_folder[:-2] + ", ..."
+                gene_name = gene_name_from_folder.replace("_", ", ")
+                num_gene = gene_name.count(',') + 1
+                gene_list_all = gene_name.split(', ') # Approximate
+            except IndexError:
+                gene_name = "N/A"
+                num_gene = 0
+        
+        genes_session_str = '' # Renamed to avoid conflict
+        for gen_item in gene_list_all: # Use gene_list_all derived from edges if possible
+            genes_session_str += str(gen_item).strip() + "_" # Ensure clean gene names
+        if genes_session_str:
+            genes_session_str = genes_session_str[:-1]
+
     else:
         flash("You logged out!")
         onto_len_dir = 0
-        onto_list = ''
+        onto_list_session = '' # Renamed to avoid conflict
         onto_cont=open("addiction.onto","r").read()
         dict_onto=ast.literal_eval(onto_cont)
-        return render_template('index.html', onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto)
-    message3="<ul><li> <font color=\"#E74C3C\">Click on the abstract count to read sentences linking the keyword and the gene</font> <li> Click on a keyword to see the terms included in the search. <li>View the results in <a href='\\cytoscape/?rnd={}&genequery={}'\ ><b> a graph.</b></a> </ul> Links will be preserved when the table is copy-n-pasted into a spreadsheet.".format(select_date,genes_session)
-    return render_template('tableview.html',nodata_temp=nodata_temp, num_gene=num_gene,genes_session_tmp = genes_session_tmp, rnd_url=select_date ,jedges=jedges, jnodes=jnodes,gene_name=gene_name, genes_url=genes_session, message3=message3)
+        return render_template('index.html', onto_len_dir=onto_len_dir, onto_list=onto_list_session, ontol = 'addiction', dict_onto = dict_onto)
+    
+    message3="<ul><li> <font color=\"#E74C3C\">Click on the abstract count to read sentences linking the keyword and the gene</font> <li> Click on a keyword to see the terms included in the search. <li>View the results in <a href='\\cytoscape/?rnd={}&genequery={}'\ ><b> a graph.</b></a> </ul> Links will be preserved when the table is copy-n-pasted into a spreadsheet.".format(select_date,genes_session_str)
+    return render_template('tableview.html',nodata_temp=nodata_temp, num_gene=num_gene,genes_session_tmp = genes_session_tmp, rnd_url=select_date ,jedges=jedges, jnodes=jnodes,gene_name=gene_name, genes_url=genes_session_str, message3=message3)
 
 @app.route('/cytoscape/')
 def cytoscape():
     genes_url=request.args.get('genequery')
     rnd_url=request.args.get('rnd')
     tf_path=tempfile.gettempdir()
-    genes_session_tmp=tf_path + "/" + genes_url
-    rnd_url_tmp=tf_path +"/" + rnd_url
+    # genes_session_tmp=tf_path + "/" + genes_url # This variable is not used
+    # rnd_url_tmp=tf_path +"/" + rnd_url # This is for non-logged in users path later
     message2="<ul><li><font color=\"#E74C3C\">Click on a line to read the sentences </font> <li>Click on a keyword to see the terms included in the search<li>Hover a pointer over a node to hide other links <li>Move the nodes around to adjust visibility <li> Reload the page to restore the default layout<li>View the results in <a href='\\tableview/?rnd={}&genequery={}'\ ><b>a table. </b></a></ul>".format(rnd_url,genes_url)
     
+    elements = "" # Default empty elements
+    zeroLink = "" # Default empty zeroLink
+
     if ('email' in session):
-        filename = rnd_url.split("_0_")[0]
-        rnd_url_tmp = datadir+"/user/"+str(session['hashed_email'])+"/"+rnd_url+"/"+filename
+        filename_part = rnd_url.split("_0_")[0] # Corrected variable name
+        rnd_url_path = datadir+"/user/"+str(session['hashed_email'])+"/"+rnd_url+"/"+filename_part # Corrected variable name
         try:
-            with open(rnd_url_tmp+"_cy","r") as f:
+            with open(rnd_url_path+"_cy","r") as f:
                 elements=f.read()
         except FileNotFoundError:
-            flash("You logged out!")
+            flash("You logged out or the search data is missing!") # More specific message
             onto_len_dir = 0
-            onto_list = ''
+            onto_list_session = '' # Renamed
             onto_cont=open("addiction.onto","r").read()
             dict_onto=ast.literal_eval(onto_cont)
-            return render_template('index.html', onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto)
+            return render_template('index.html', onto_len_dir=onto_len_dir, onto_list=onto_list_session, ontol = 'addiction', dict_onto = dict_onto)
 
-        with open(rnd_url_tmp+"_0link","r") as z:
-            zeroLink=z.read()
-            if (len(zeroLink)>0):
-                message2+="<span style=\"color:darkred;\">No result was found for these genes: " + zeroLink + "</span>"
-    else:
-        rnd_url_tmp=tf_path +"/" + rnd_url
         try:
-            rnd_url_tmp.replace("\"", "")
-            with open(rnd_url_tmp+"_cy","r") as f:
+            with open(rnd_url_path+"_0link","r") as z:
+                zeroLink=z.read()
+        except FileNotFoundError:
+            zeroLink = "" # File might not exist if no zero link genes
+
+    else: # Not logged in, use temp path
+        rnd_url_path=tf_path +"/" + rnd_url
+        try:
+            # rnd_url_path.replace("\"", "") # This doesn't modify in place and is likely not needed
+            with open(rnd_url_path+"_cy","r") as f:
                 elements=f.read()
         except FileNotFoundError:
-            flash("You logged out!")
+            flash("You logged out or the search data is missing!")
             onto_len_dir = 0
-            onto_list = ''
+            onto_list_session = '' # Renamed
             onto_cont=open("addiction.onto","r").read()
             dict_onto=ast.literal_eval(onto_cont)
-            return render_template('index.html', onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto)
+            return render_template('index.html', onto_len_dir=onto_len_dir, onto_list=onto_list_session, ontol = 'addiction', dict_onto = dict_onto)
+        
+        try:
+            with open(rnd_url_path+"_0link","r") as z:
+                zeroLink=z.read()
+        except FileNotFoundError:
+            zeroLink = ""
 
-        with open(rnd_url_tmp+"_0link","r") as z:
-            zeroLink=z.read()
-            if (len(zeroLink)>0):
-                message2+="<span style=\"color:darkred;\">No result was found for these genes: " + zeroLink + "</span>"
+    if (len(zeroLink.strip())>0): # Check if zeroLink has content after stripping whitespace
+        message2+="<span style=\"color:darkred;\">No result was found for these genes: " + zeroLink + "</span>"
+            
     return render_template('cytoscape.html', elements=elements, message2=message2)
 
 
 @app.route("/sentences")
 def sentences():
-    def predict_sent(sent_for_pred):
-        max_length = 64
-        tokens = clean_doc(sent_for_pred, vocab)
-        tokens = [w for w in tokens if w in vocab]
-        # convert to line
-        line = ' '.join(tokens)
-        line = [line]
-        tokenized_sent = tokenizer.texts_to_sequences(line)
-        tokenized_sent = pad_sequences(tokenized_sent, maxlen=max_length, padding='post') 
-        predict_sent = model.predict(tokenized_sent, verbose=0)
-        percent_sent = predict_sent[0,0]
-        if round(percent_sent) == 0:
-            return 'neg'
-        else:
-            return 'pos'
+    # Removed predict_sent and CNN model loading
+    # def predict_sent(sent_for_pred): ...
+    
     pmid_list=[]
     pmid_string=''
     edge=request.args.get('edgeID')
     (tf_name, gene0, cat0)=edge.split("|")
 
-    if(cat0=='stress'):
-        model = create_model(23154, 64)
-        model.load_weights("./nlp/weights.ckpt")
     out3=""
     out_pos = ""
     out_neg = ""
     num_abstract = 0
-    stress_cellular = "<br><br><br>"+"</ol><b>Sentence(s) describing celluar stress (classified using a deep learning model):</b><hr><ol>"
-    stress_systemic = "<b></ol>Sentence(s) describing systemic stress (classified using a deep learning model):</b><hr><ol>"
-    with open(tf_name, "r") as df:
-        all_sents=df.read()
-
-    for sent in all_sents.split("\n"):
-        if len(sent.strip())!=0:
-            (gene,nouse,cat, pmid, text)=sent.split("\t")
-            if (gene.upper() == gene0.upper() and cat.upper() == cat0.upper()) :
-                out3+= "<li> "+ text + " <a href=\"https://www.ncbi.nlm.nih.gov/pubmed/?term=" + pmid +"\" target=_new>PMID:"+pmid+"<br></a>"
-                num_abstract += 1
-                if(pmid+cat0 not in pmid_list):
-                    pmid_string = pmid_string + ' ' + pmid
-                    pmid_list.append(pmid+cat0)
-                if(cat0=='stress'):
-                    out4 = predict_sent(text)
-                    if(out4 == 'pos'):
-                        out_pred_pos = "<li> "+ text + " <a href=\"https://www.ncbi.nlm.nih.gov/pubmed/?term=" + pmid +"\" target=_new>PMID:"+pmid+"<br></a>"                    
-                        out_pos += out_pred_pos
-                    else:
-                        out_pred_neg = "<li>"+ text + " <a href=\"https://www.ncbi.nlm.nih.gov/pubmed/?term=" + pmid +"\" target=_new>PMID:"+pmid+"<br></a>"                    
-                        out_neg += out_pred_neg
+    stress_cellular = "<br><br><br>"+"</ol><b>Sentence(s) describing cellular stress (classified using Gemini API):</b><hr><ol>"
+    stress_systemic = "<b></ol>Sentence(s) describing systemic stress (classified using Gemini API):</b><hr><ol>"
+    
+    matching_sents = get_sentences_from_file(tf_name, gene0, cat0)
+    if not matching_sents:
+        # It's possible the file was found but no sentences matched the criteria.
+        return render_template('sentences.html', sentences=f"<p>No sentences found for {gene0} and {cat0}.</p>")
+
+    all_stress_sentences = []
+    num_abstract = len(matching_sents)
+
+    for sent_obj in matching_sents:
+        text = sent_obj['text']
+        pmid = sent_obj['pmid']
+        
+        formatted_line = f"<li> {text} <a href=\"https://www.ncbi.nlm.nih.gov/pubmed/?term={pmid}\" target=_new>PMID:{pmid}<br></a>"
+        all_stress_sentences.append({'raw_text': text, 'html_line': formatted_line})
+        
+        out3 += formatted_line
+        if(pmid+cat0 not in pmid_list):
+            pmid_string = pmid_string + ' ' + pmid
+            pmid_list.append(pmid+cat0)
+
+    # Step 2: If the category is 'stress' and we have sentences, perform batch classification
+    if cat0 == 'stress' and all_stress_sentences:
+        if not GEMINI_API_KEY:
+            print("Gemini API key not configured. Skipping batch classification.")
+        else:
+            try:
+                # Create the batched prompt
+                sentences_to_classify_str = ""
+                for i, s_obj in enumerate(all_stress_sentences):
+                    # Use a unique, parsable identifier for each sentence
+                    sentences_to_classify_str += f'Sentence {i}: "{s_obj["raw_text"]}"\n'
+
+                batched_prompt = f"""For each sentence below, classify it as describing "Cellular Stress" or "Organismal Stress".
+Return your response as a valid JSON object where keys are the sentence numbers (e.g., "0", "1", "2") and values are the classification ("Cellular Stress" or "Organismal Stress").
+
+Example format: {{"0": "Cellular Stress", "1": "Organismal Stress"}}
+
+Here are the sentences to classify:
+{sentences_to_classify_str}
+"""
+                # Call the API
+                model_gemini = genai.GenerativeModel('gemini-3-flash-preview')
+                response = model_gemini.generate_content(batched_prompt)
+
+                # Step 3: Parse the JSON response
+                # The model might wrap the JSON in ```json ... ```, so we need to clean it.
+                cleaned_response_text = response.text.strip().replace("```json", "").replace("```", "").strip()
+                classifications = json.loads(cleaned_response_text)
+                
+                # Step 4: Distribute the sentences into buckets based on the parsed classifications
+                for i, s_obj in enumerate(all_stress_sentences):
+                    # Get the classification for sentence 'i'. Use .get() for safety.
+                    classification = classifications.get(str(i), "unknown").lower()
+                    if "cellular" in classification:
+                        out_neg += s_obj['html_line']
+                    elif "organismal" in classification:
+                        out_pos += s_obj['html_line']
+
+            except Exception as e:
+                print(f"Error during batch Gemini classification: {e}")
     out1="<h3>"+gene0 + " and " + cat0  + "</h3>\n"
     if len(pmid_list)>1:
-        out2 = str(num_abstract) + ' sentences in ' + " <a href=\"https://www.ncbi.nlm.nih.gov/pubmed/?term=" + pmid_string +"\" target=_new>"+ str(len(pmid_list)) + ' studies' +"<br></a>" + "<br><br>"
-    else:
-        out2 = str(num_abstract) + ' sentence(s) in '+ " <a href=\"https://www.ncbi.nlm.nih.gov/pubmed/?term=" + pmid_string +"\" target=_new>"+ str(len(pmid_list)) + ' study' +"<br></a>" "<br><br>"
-    if(out_neg == "" and out_pos == ""):
-        out= out1+ out2 +out3
-    elif(out_pos != "" and out_neg!=""):
-        out = out1 + out2 + stress_systemic+out_pos + stress_cellular + out_neg
-    elif(out_pos != "" and out_neg ==""):
-        out= out1+ out2 + stress_systemic + out_pos
-    elif(out_neg != "" and out_pos == ""):
-        out = out1 +out2+stress_cellular+out_neg
-    K.clear_session()
-    return render_template('sentences.html', sentences="<ol>"+out+"</ol><p>")
+        out2 = str(num_abstract) + ' sentences in ' + " <a href=\"https://www.ncbi.nlm.nih.gov/pubmed/?term=" + pmid_string.strip() +"\" target=_new>"+ str(len(pmid_list)) + ' studies' +"<br></a>" + "<br><br>"
+    elif len(pmid_list) == 1: # Handle single study case
+        out2 = str(num_abstract) + ' sentence(s) in '+ " <a href=\"https://www.ncbi.nlm.nih.gov/pubmed/?term=" + pmid_string.strip() +"\" target=_new>"+ str(len(pmid_list)) + ' study' +"<br></a>" "<br><br>"
+    else: # No PMIDs found, num_abstract might still be > 0 if PMIDs were not parsable in file but text matched
+        out2 = str(num_abstract) + ' sentence(s) found.<br><br>'
+
+
+    if(cat0 == 'stress'): # Only show stress classification if category is stress
+        if(out_neg == "" and out_pos == ""):
+            # If no classification results, show all sentences if any, or a message
+            if out3:
+                 out= out1+ out2 + "<b>All related sentences (Gemini classification not available or no specific stress types found):</b><hr><ol>" + out3
+            else:
+                 out = out1 + out2 + "No sentences found for this combination, or Gemini classification yielded no results."
+        elif(out_pos != "" and out_neg!=""):
+            out = out1 + out2 + stress_systemic+out_pos + stress_cellular + out_neg
+        elif(out_pos != "" and out_neg ==""):
+            out= out1+ out2 + stress_systemic + out_pos
+        elif(out_neg != "" and out_pos == ""):
+            out = out1 +out2+stress_cellular+out_neg
+    else: # Not stress category, just show all found sentences
+        out= out1+ out2 + "<ol>" + out3
+
+    # K.clear_session() # Removed
+    return render_template('sentences.html', sentences=out+"</ol><p>")
 
 
 # Show the cytoscape graph for one gene from the top gene list
 @app.route("/showTopGene")
 def showTopGene():
     query=request.args.get('topGene')
-    nodesEdges=searchArchived('topGene',query, 'cys','','')[0]
+    # Assuming searchArchived returns a tuple, and the first element is nodesEdges
+    archived_data = searchArchived('topGene',query, 'cys','','')
+    if isinstance(archived_data, tuple) and len(archived_data) > 0:
+        nodesEdges = archived_data[0]
+    else: # Fallback if searchArchived doesn't return expected tuple
+        nodesEdges = "" 
+        print(f"Warning: searchArchived did not return expected data for {query}")
+
     message2="<li><strong>"+query + "</strong> is one of the top addiction genes. <li> An archived search is shown. Click on the blue circle to update the results and include keywords for brain region and gene function. <strong> The update may take a long time to finish.</strong> "
     return render_template("cytoscape.html", elements=nodesEdges, message="Top addiction genes", message2=message2)
 
+'''
+@app.route("/shownode")
+def shownode():
+    node=request.args.get('node')
+    out = "" # Default value
+    current_dict_onto = {}
+
+    if 'namecat' in session:
+        try:
+            with open(session['namecat']+".onto","r") as file2:
+                onto_cont_local=file2.read()
+                current_dict_onto=ast.literal_eval(onto_cont_local)
+        except FileNotFoundError:
+            print(f"Ontology file not found: {session['namecat']}.onto. Falling back to default.")
+            current_dict_onto = dictionary # Fallback to default if custom not found
+        except Exception as e:
+            print(f"Error loading custom ontology {session['namecat']}.onto: {e}. Falling back to default.")
+            current_dict_onto = dictionary
+    else:
+        current_dict_onto = dictionary # Default global dictionary
+
+    for ky in current_dict_onto.keys():
+        if node in current_dict_onto[ky].keys():
+            # Ensure current_dict_onto[ky][node] is a dict and has at least one item
+            node_details = current_dict_onto[ky][node]
+            if isinstance(node_details, dict) and node_details:
+                 out="<p>"+node.upper()+"<hr><li>"+ next(iter(node_details)).replace("|", "<li>")
+                 break # Found the node, no need to check other keys
+            elif isinstance(node_details, str): # If it's just a string of keywords
+                 out="<p>"+node.upper()+"<hr><li>"+ node_details.replace("|", "<li>")
+                 break
+    if not out: # If node not found or details are empty
+        out = f"<p>Details for node '{node.upper()}' not found in the current ontology.</p>"
 
+    return render_template('sentences.html', sentences=out+"<p>")
+'''
 @app.route("/shownode")
 def shownode():
     node=request.args.get('node')
@@ -1377,30 +1701,173 @@ def shownode():
     return render_template('sentences.html', sentences=out+"<p>")
 
 
+
 @app.route("/synonyms")
 def synonyms():
-    node=request.args.get('node')
-    node=node.upper()
-    allnodes={**genes}
+    node = request.args.get('node')
+    rnd = request.args.get('rnd')
+
+    if not node:
+        return "Error: Gene node is required.", 400
+    node = node.upper()
+
     try:
-        synonym_list = list(allnodes[node].split("|")) 
+        # --- Part 1: Handle Synonyms Links ---
+        allnodes = {}
+        if 'genes' in globals() and isinstance(globals()['genes'], dict):
+            allnodes = globals()['genes']
+        else:
+            print("Warning: 'genes' dictionary for synonyms not found.")
+        
+        synonym_list = list(allnodes[node].split("|"))
         session['synonym_list'] = synonym_list
         session['main_gene'] = node.upper()
-        out="<hr><li>"+ allnodes[node].replace("|", "<li>")
-        synonym_list_str = ';'.join([str(syn) for syn in synonym_list]) 
-        synonym_list_str +=';' + node
+        synonym_list_str = ';'.join([str(syn) for syn in synonym_list])
+        synonym_list_str += ';' + node
         case = 1
-        return render_template('genenames.html', case = case, gene = node.upper(), synonym_list = synonym_list, synonym_list_str=synonym_list_str)
-    except:
-        try:
-            synonym_list = session['synonym_list']
-            synonym_list_str = ';'.join([str(syn) for syn in synonym_list]) 
-            synonym_list_str +=';' + node
-            case = 1
-            return render_template('genenames.html', case=case, gene = session['main_gene'] , synonym_list = synonym_list, synonym_list_str=synonym_list_str)
-        except:
-            case = 2
-            return render_template('genenames.html', gene = node, case = case)
+
+        formatted_sentences = ""
+
+        if rnd and rnd.strip():
+            # --- Logic to use existing search results ---
+            print(f"Synonyms: rnd '{rnd}' provided. Reading from search results.")
+            path = ''
+            if 'email' in session and 'hashed_email' in session:
+                path = datadir+"/user/"+str(session['hashed_email'])+"/"+rnd+"/"
+            else:
+                tf_path = tempfile.gettempdir()
+                path = tf_path + "/" + rnd + "/"
+
+            timestamp = rnd.split("_0_")[0]
+            snt_file_path = path + timestamp + "_snt"
+            gwas_file_path = path + "gwas_results.tab"
+
+            sents_by_main_cat = {}
+            
+            try:
+                with open(snt_file_path, "r") as f:
+                    for line in f:
+                        if not line.strip(): continue
+                        try:
+                            (l_gene, l_main_cat, l_sub_cat, l_pmid, l_text) = line.strip().split("\t")
+                            if l_gene.upper() == node:
+                                if l_main_cat not in sents_by_main_cat: sents_by_main_cat[l_main_cat] = {}
+                                if l_sub_cat not in sents_by_main_cat[l_main_cat]: sents_by_main_cat[l_main_cat][l_sub_cat] = []
+                                sents_by_main_cat[l_main_cat][l_sub_cat].append({'pmid': l_pmid, 'text': l_text})
+                        except ValueError: continue
+            except FileNotFoundError: print(f"Sentence file not found: {snt_file_path}")
+
+            try:
+                with open(gwas_file_path, "r") as f:
+                    for line in f:
+                        if not line.strip(): continue
+                        try:
+                            (l_gene, l_main_cat, l_sub_cat, l_pmid, l_text) = line.strip().split("\t")
+                            if l_gene.upper() == node:
+                                if 'GWAS' not in sents_by_main_cat: sents_by_main_cat['GWAS'] = {}
+                                sub_cat_clean = l_sub_cat.replace('_GWAS', '')
+                                if sub_cat_clean not in sents_by_main_cat['GWAS']: sents_by_main_cat['GWAS'][sub_cat_clean] = []
+                                sents_by_main_cat['GWAS'][sub_cat_clean].append({'pmid': l_pmid, 'text': l_text})
+                        except ValueError: continue
+            except FileNotFoundError: print(f"GWAS sentence file not found: {gwas_file_path}")
+
+            for main_cat, sub_cats in sorted(sents_by_main_cat.items()):
+                for sub_cat, sentences in sorted(sub_cats.items()):
+                    formatted_sentences += f"\n## Keyword: {sub_cat} (Category: {main_cat})\n"
+                    for sent_obj in sentences:
+                        clean_text = re.sub('<[^<]+?>', '', sent_obj['text'])
+                        formatted_sentences += f"- {clean_text} (PMID: {sent_obj['pmid']})\n"
+        else:
+            # --- Fallback Logic: Perform a fresh search ---
+            print(f"Synonyms: rnd not provided. Performing fresh search for {node}.")
+            current_ontology = {}
+            if 'namecat' in session and session['namecat'] != 'addiction' and not session['namecat'].startswith(tempfile.gettempdir()):
+                try:
+                    with open(session['namecat'] + ".onto", "r") as f_onto: current_ontology = ast.literal_eval(f_onto.read())
+                except (FileNotFoundError, SyntaxError, TypeError): current_ontology = dictionary
+            else: current_ontology = dictionary
+
+            abstracts_raw = getabstracts(node, "")
+            sentences_ls = []
+            if abstracts_raw:
+                for row in abstracts_raw.split("\n"):
+                    if not row.strip(): continue
+                    parts = row.split("\t", 1)
+                    if len(parts) < 2: continue
+                    pmid, tiab_text = parts
+                    for sent_tok in sent_tokenize(tiab_text): sentences_ls.append({'pmid': pmid, 'text': sent_tok})
+
+            pubmed_formatted_sentences = ""
+            if sentences_ls:
+                gene_regex = re.compile(r'\b(' + re.escape(node) + r')\b', re.IGNORECASE)
+                for category_key, keyword_nodes in sorted(current_ontology.items()):
+                    if not isinstance(keyword_nodes, dict): continue
+                    for keyword_node, search_terms_obj in sorted(keyword_nodes.items()):
+                        if isinstance(search_terms_obj, set) and search_terms_obj: search_terms_str = next(iter(search_terms_obj))
+                        elif isinstance(search_terms_obj, str): search_terms_str = search_terms_obj
+                        else: continue
+                        
+                        keyword_regex_str = r'\b(' + '|'.join(re.escape(term) for term in search_terms_str.split('|')) + r')\b'
+                        keyword_regex = re.compile(keyword_regex_str, re.IGNORECASE)
+                        
+                        sents_for_this_keyword = [s for s in sentences_ls if gene_regex.search(s['text']) and keyword_regex.search(s['text'])]
+                        
+                        if sents_for_this_keyword:
+                            pubmed_formatted_sentences += f"\n## Keyword: {keyword_node} (Category: {category_key})\n"
+                            for sent_obj in sents_for_this_keyword: pubmed_formatted_sentences += f"- {sent_obj['text']} (PMID: {sent_obj['pmid']})\n"
+            
+            gwas_formatted_sentences = ""
+            if 'GWAS' in current_ontology:
+                try:
+                    datf = pd.read_csv('./utility/gwas_used.csv', sep='\t')
+                    gene_pattern = r'(?:\s|^)' + re.escape(node) + r'(?:\s|$)'
+                    datf_sub1 = datf[datf["MAPPED_GENE"].str.contains(gene_pattern, flags=re.IGNORECASE, na=False) | datf["REPORTED GENE(S)"].str.contains(gene_pattern, flags=re.IGNORECASE, na=False)]
+                    if not datf_sub1.empty:
+                        gwas_sents_for_node = []
+                        gwas_ontology_part = current_ontology.get('GWAS', {})
+                        if isinstance(gwas_ontology_part, dict):
+                            for keyword_node, search_terms_obj in sorted(gwas_ontology_part.items()):
+                                if isinstance(search_terms_obj, set) and search_terms_obj: search_terms_str = next(iter(search_terms_obj))
+                                elif isinstance(search_terms_obj, str): search_terms_str = search_terms_obj
+                                else: continue
+                                for term in search_terms_str.split('|'):
+                                    if not term: continue
+                                    term_pattern = r'(?:\s|^)' + re.escape(term) + r'(?:\s|$)'
+                                    datf_sub = datf_sub1[datf_sub1['DISEASE/TRAIT'].str.contains(term_pattern, flags=re.IGNORECASE, na=False)]
+                                    if not datf_sub.empty:
+                                        for _, row in datf_sub.iterrows():
+                                            gwas_text = f"SNP:{row['SNPS']}, P value: {row['P-VALUE']}, Disease/trait: {row['DISEASE/TRAIT']}, Mapped trait: {row['MAPPED_TRAIT']}"
+                                            gwas_sents_for_node.append({'pmid': row['PUBMEDID'], 'text': gwas_text, 'category': keyword_node})
+                        if gwas_sents_for_node:
+                            gwas_by_keyword = {}
+                            for s in gwas_sents_for_node:
+                                kw = s['category']
+                                if kw not in gwas_by_keyword: gwas_by_keyword[kw] = []
+                                gwas_by_keyword[kw].append(s)
+                            for keyword, sentences in sorted(gwas_by_keyword.items()):
+                                gwas_formatted_sentences += f"\n\n## Keyword: {keyword} (Category: GWAS)\n"
+                                unique_sentences = {f"{s['pmid']}_{s['text']}": s for s in sentences}
+                                for sent_obj in unique_sentences.values(): gwas_formatted_sentences += f"- {sent_obj['text']} (PMID: {sent_obj['pmid']})\n"
+                except FileNotFoundError: print("Warning: ./utility/gwas_used.csv not found.")
+                except Exception as e: print(f"Error processing GWAS data in /synonyms fallback: {e}")
+
+            formatted_sentences = pubmed_formatted_sentences + gwas_formatted_sentences
+
+        # --- Part 4: Assemble final prompt ---
+        if not formatted_sentences.strip():
+            formatted_sentences = "No relevant sentences were found in the literature for this gene."
+
+        prompt_string = GENECUP_PROMPT_TEMPLATE.replace("{{gene}}", node)
+        prompt_string += formatted_sentences
+
+        return render_template('genenames.html', case=case, gene=node.upper(), synonym_list=synonym_list, synonym_list_str=synonym_list_str, prompt=prompt_string)
+
+    except KeyError:
+        case = 2
+        return render_template('genenames.html', gene=node, case=case)
+    except Exception as e:
+        print(f"An unexpected error occurred in /synonyms for node {node}: {e}")
+        return f"An error occurred while processing your request for {node}.", 500
 
 
 @app.route("/startGeneGene")
@@ -1411,86 +1878,157 @@ def startGeneGene():
 
 @app.route("/searchGeneGene")
 def gene_gene():
+    # Ensure session['path'] is set (e.g. from /progress by non-logged-in user)
+    if 'path' not in session:
+        # Handle error: session path not set, perhaps redirect or show error
+        # For now, let's assume it's set by a previous step like /progress
+        # If it can be called directly, this needs robust handling.
+        # Quick fix: if not set, create a temporary one, but this might indicate flow issue
+        if 'email' not in session : # Only create temp path if not logged in and path is missing
+             tf_path_gg=tempfile.gettempdir()
+             rnd_gg = "tmp_gg" + ''.join(random.choice(string.ascii_letters) for x in range(6))
+             session['path'] = tf_path_gg + "/" + rnd_gg
+             os.makedirs(session['path'], exist_ok=True)
+        else: # Logged in user should have path_user from /progress
+            if 'path_user' in session:
+                session['path'] = session['path_user'] # Unify to use session['path']
+            else: # Critical error if logged in and no path_user
+                 return "Error: User session path not found.", 500
+
+
     tmp_ggPMID=session['path']+"_ggPMID"
     gg_file=session['path']+"_ggSent" # Gene_gene
     result_file=session['path']+"_ggResult"
+    
+    # pubmed_path needs to be defined, assuming it's a global or config
+    # For this example, let's assume it's a pre-configured path.
+    # If not, this os.system call will fail or use current dir.
+    # pubmed_path = "/path/to/local/pubmed/mirror" # Example, should be configured
+    pubmed_path = "./pubmed_data/" # Placeholder, ensure this path exists or is correctly set
+    os.makedirs(pubmed_path, exist_ok=True) # Ensure it exists if it's a local relative path
+
+    def findWholeWord(w): # Helper function, should be defined if not in more_functions
+        return re.compile(r'(?<!\w)({})(?!\w)'.format(w), flags=re.IGNORECASE).search
 
     def generate(query):
+        from nltk.tokenize import sent_tokenize # Local import
         progress=1
         yield "data:"+str(progress)+"\n\n"
-        os.system("esearch -db pubmed -query \"" +  query + "\" | efetch -format uid |sort >" + tmp_ggPMID)
-        abstracts=os.popen("comm -1 -2 topGene_uniq.pmid " + tmp_ggPMID + " |fetch-pubmed -path "+pubmed_path+ " | xtract -pattern PubmedArticle -element MedlineCitation/PMID,ArticleTitle,AbstractText|sed \"s/-/ /g\"").read()
-        os.system("rm "+tmp_ggPMID)
+        # Ensure query is safe for shell command
+        safe_query = query.replace("\"", "\\\"") # Basic escaping
+        os.system(f"esearch -db pubmed -query \"{safe_query}\" | efetch -format uid |sort > \"{tmp_ggPMID}\"")
+        
+        # 'topGene_uniq.pmid' file needs to exist
+        # For robustness, check if it exists
+        top_gene_pmid_file = "topGene_uniq.pmid"
+        if not os.path.exists(top_gene_pmid_file):
+            print(f"Warning: {top_gene_pmid_file} not found. Gene-gene search might be affected.")
+            # Create an empty file to prevent comm command error, or handle differently
+            open(top_gene_pmid_file, 'a').close() 
+
+        abstracts_cmd = f"comm -1 -2 \"{top_gene_pmid_file}\" \"{tmp_ggPMID}\" | fetch-pubmed -path \"{pubmed_path}\" | xtract -pattern PubmedArticle -element MedlineCitation/PMID,ArticleTitle,AbstractText | sed \"s/-/ /g\""
+        try:
+            abstracts_process = os.popen(abstracts_cmd)
+            abstracts = abstracts_process.read()
+            abstracts_process.close()
+        except Exception as e_abs:
+            print(f"Error getting abstracts for gene-gene search: {e_abs}")
+            abstracts = ""
+
+        if os.path.exists(tmp_ggPMID): # Clean up temp file
+            os.system(f"rm \"{tmp_ggPMID}\"")
+
         progress=10
         yield "data:"+str(progress)+"\n\n"
         topGenes=dict()
-        out=str()
+        out_str=str() # Renamed from out
         hitGenes=dict()
-        with open("topGene_symb_alias.txt", "r") as top_f:
-            for line in top_f:
-                (symb, alias)=line.strip().split("\t")
-                topGenes[symb]=alias.replace("; ","|")
+        
+        # 'topGene_symb_alias.txt' file needs to exist
+        top_gene_alias_file = "topGene_symb_alias.txt"
+        if os.path.exists(top_gene_alias_file):
+            with open(top_gene_alias_file, "r") as top_f:
+                for line in top_f:
+                    parts = line.strip().split("\t")
+                    if len(parts) == 2:
+                        symb, alias = parts
+                        topGenes[symb]=alias.replace("; ","|")
+        else:
+            print(f"Warning: {top_gene_alias_file} not found. Top gene list will be empty.")
+
         allAbstracts= abstracts.split("\n")
-        abstractCnt=len(allAbstracts)
+        abstractCnt=len(allAbstracts) if abstracts else 0 # Handle empty abstracts
         rowCnt=0
 
         for row in allAbstracts:
+            if not row.strip(): continue
             rowCnt+=1
-            if rowCnt/10==int(rowCnt/10):
+            if abstractCnt > 0 and rowCnt % 10 == 0 : # Check abstractCnt > 0
                 progress=10+round(rowCnt/abstractCnt,2)*80
                 yield "data:"+str(progress)+"\n\n"
-            tiab=row.split("\t")
-            pmid = tiab.pop(0)
-            tiab= " ".join(tiab)
-            sentences = sent_tokenize(tiab)
+            
+            tiab_parts=row.split("\t", 1) # Split only on first tab
+            if len(tiab_parts) < 2: continue # Skip malformed lines
+            pmid = tiab_parts[0]
+            tiab_text_gg = tiab_parts[1] # Renamed
+            
+            sentences_gg = sent_tokenize(tiab_text_gg) # Renamed
             ## keep the sentence only if it contains the gene 
-            for sent in sentences:
-                if findWholeWord(query)(sent):
-                    sent=re.sub(r'\b(%s)\b' % query, r'<strong>\1</strong>', sent, flags=re.I)
-                    for symb in topGenes:
-                        allNames=symb+"|"+topGenes[symb]
-                        if findWholeWord(allNames)(sent) :
-                            sent=sent.replace("<b>","").replace("</b>","")
-                            sent=re.sub(r'\b(%s)\b' % allNames, r'<b>\1</b>', sent, flags=re.I)
-                            out+=query+"\t"+"gene\t" + symb+"\t"+pmid+"\t"+sent+"\n"
-                            if symb in hitGenes.keys():
-                                hitGenes[symb]+=1
+            for sent_item in sentences_gg: # Renamed
+                if findWholeWord(query)(sent_item):
+                    sent_item=re.sub(r'\b(%s)\b' % query, r'<strong>\1</strong>', sent_item, flags=re.I)
+                    for symb_item in topGenes: # Renamed
+                        allNames=symb_item+"|"+topGenes[symb_item]
+                        if findWholeWord(allNames)(sent_item) :
+                            sent_item=sent_item.replace("<b>","").replace("</b>","") # Clean previous bolds
+                            sent_item=re.sub(r'\b(%s)\b' % allNames, r'<b>\1</b>', sent_item, flags=re.I) # Bold current match
+                            out_str+=query+"\t"+"gene\t" + symb_item+"\t"+pmid+"\t"+sent_item+"\n"
+                            if symb_item in hitGenes: # Check if key exists
+                                hitGenes[symb_item]+=1
                             else:
-                                hitGenes[symb]=1
+                                hitGenes[symb_item]=1
         progress=95
         yield "data:"+str(progress)+"\n\n"
         with open(gg_file, "w+") as gg:
-            gg.write(out)
-            gg.close()
-        results="<h4>"+query+" vs top addiction genes</h4> Click on the number of sentences will show those sentences. Click on the <span style=\"background-color:#FcF3cf\">top addiction genes</span> will show an archived search for that gene.<hr>"
+            gg.write(out_str)
+            # gg.close() # Not needed with 'with open'
+        
+        results_html="<h4>"+query+" vs top addiction genes</h4> Click on the number of sentences will show those sentences. Click on the <span style=\"background-color:#FcF3cf\">top addiction genes</span> will show an archived search for that gene.<hr>" # Renamed
         topGeneHits={}
-        for key in hitGenes.keys():
-            url=gg_file+"|"+query+"|"+key
-            if hitGenes[key]==1:
-                sentword="sentence"
-            else:
-                sentword="sentences"
-            topGeneHits[ "<li> <a href=/sentences?edgeID=" + url+ " target=_new>" + "Show " + str(hitGenes[key]) + " " + sentword +" </a> about "+query+" and <a href=/showTopGene?topGene="+key+" target=_gene><span style=\"background-color:#FcF3cf\">"+key+"</span></a>" ]=hitGenes[key]
-        topSorted = [(k, topGeneHits[k]) for k in sorted(topGeneHits, key=topGeneHits.get, reverse=True)]
+        for key_gene in hitGenes.keys(): # Renamed
+            url_gg=gg_file+"|"+query+"|"+key_gene # Renamed
+            sentword="sentence" if hitGenes[key_gene]==1 else "sentences"
+            topGeneHits[ "<li> <a href=/sentences?edgeID=" + url_gg+ " target=_new>" + "Show " + str(hitGenes[key_gene]) + " " + sentword +" </a> about "+query+" and <a href=/showTopGene?topGene="+key_gene+" target=_gene><span style=\"background-color:#FcF3cf\">"+key_gene+"</span></a>" ]=hitGenes[key_gene]
         
-        for k,v in topSorted:
-            results+=k
-        saveResult=open(result_file, "w+")
-        saveResult.write(results)
-        saveResult.close()
+        topSorted = sorted(topGeneHits.items(), key=lambda item: item[1], reverse=True) # Correct way to sort dict by value
+        
+        for k_html,v_count in topSorted: # Renamed
+            results_html+=k_html
+        
+        with open(result_file, "w+") as saveResult: # Ensure it's opened in write mode
+            saveResult.write(results_html)
+            # saveResult.close() # Not needed
+
         progress=100
         yield "data:"+str(progress)+"\n\n"
     
     # Start the run
-    query=session['forTopGene']
-    return Response(generate(query), mimetype='text/event-stream')
+    query_gene_gene=session.get('forTopGene', '') # Get from session, default to empty
+    if not query_gene_gene:
+        return Response("Error: No gene query found for gene-gene search.", mimetype='text/event-stream')
+    return Response(generate(query_gene_gene), mimetype='text/event-stream')
 
 
 @app.route('/showGeneTopGene')
 def showGeneTopGene ():
-    with open(session['path']+"_ggResult", "r") as result_f:
-        results=result_f.read()
-    return render_template('sentences.html', sentences=results+"<p><br>")
+    results_content = "<p>No results found.</p>" # Default content
+    result_file_path = session.get('path', '') + "_ggResult" # Get path from session
+    if result_file_path and os.path.exists(result_file_path):
+        with open(result_file_path, "r") as result_f:
+            results_content=result_f.read()
+    else:
+        print(f"Warning: Result file {result_file_path} not found for showGeneTopGene.")
+    return render_template('sentences.html', sentences=results_content+"<p><br>")
 
 
 # Generate a page that lists all the top 150 addiction genes with links to cytoscape graph.
@@ -1500,5 +2038,5 @@ def top150genes():
 
 
 if __name__ == '__main__':
-    db.create_all()
-    app.run(debug=True, port=4200)
+    # For production, consider using a more robust web server like Gunicorn or Waitress
+    app.run(debug=True, host='0.0.0.0', port=4200) # Changed to 0.0.0.0 for accessibility if needed
author	chen42	2026-03-24 09:21:50 -0500
committer	chen42	2026-03-24 09:21:50 -0500
commit	5e68858ef98f61f80ba5992296c36db6c8dc67c9 (patch)
tree	9c6184fe6fffdfa0bef10dbf4dcd17ff324dbeb9 /server.py
parent	427a6ab4f4a1b45608addf3df23088251d4480a8 (diff)
download	genecup-5e68858ef98f61f80ba5992296c36db6c8dc67c9.tar.gz