From 5e68858ef98f61f80ba5992296c36db6c8dc67c9 Mon Sep 17 00:00:00 2001
From: chen42
Date: Tue, 24 Mar 2026 09:21:50 -0500
Subject: switch to gemini-flash
---
server.py | 1268 +++++++++++++++++++++++++++++++++++++++++++------------------
1 file changed, 903 insertions(+), 365 deletions(-)
(limited to 'server.py')
diff --git a/server.py b/server.py
index 9d34bf9..19d7486 100755
--- a/server.py
+++ b/server.py
@@ -13,32 +13,44 @@ from os import listdir
import bcrypt
import nltk
-import numpy as np
+# import numpy as np # Removed
import pandas as pd
import pytz
from flask import (Flask, Response, flash, jsonify, redirect, render_template,
request, session, url_for)
from flask_sqlalchemy import SQLAlchemy
-from numpy import array
+# from numpy import array # Removed
-nltk.download('punkt')
-import pickle
+from dotenv import load_dotenv
+load_dotenv()
+import os
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+
+nltk.download('punkt', quiet=True)
+# import pickle # Removed
from collections import Counter
from datetime import datetime
-import tensorflow
-import tensorflow.keras
-from nltk.corpus import stopwords
-from nltk.stem.porter import PorterStemmer
-from tensorflow.keras import backend as K
-from tensorflow.keras import metrics, optimizers
-from tensorflow.keras.layers import *
-from tensorflow.keras.layers import Dense, Embedding, Flatten
-from tensorflow.keras.models import Model, Sequential
-from tensorflow.keras.preprocessing.sequence import pad_sequences
-from tensorflow.keras.preprocessing.text import Tokenizer
+# Gemini API related imports
+import google.generativeai as genai
+import re
+import ast
from more_functions import *
+from nltk.tokenize import sent_tokenize
+from more_functions import getabstracts, undic, gene_category
+
+GENECUP_PROMPT_TEMPLATE = ""
+try:
+ with open("genecup_synthesis_prompt.txt", "r") as f:
+ GENECUP_PROMPT_TEMPLATE = f.read()
+except FileNotFoundError:
+ print("Warning: genecup_synthesis_prompt.txt not found. LLM prompts will be incomplete.")
+except Exception as e:
+ print(f"Error loading genecup_synthesis_prompt.txt: {e}. LLM prompts will be affected.")
+
+
+
app=Flask(__name__)
#datadir="/export/ratspub/"
@@ -48,8 +60,125 @@ datadir="./"
app.config['SECRET_KEY'] = '#DtfrL98G5t1dC*4'
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///'+datadir+'userspub.sqlite'
db = SQLAlchemy(app)
+
+
+def get_sentences_from_file(file_path, gene_name, category_name=None):
+ """Reads a sentence file and returns sentences matching a gene and category."""
+ matching_sentences = []
+ try:
+ with open(file_path, "r") as f:
+ for line in f:
+ if not line.strip():
+ continue
+ try:
+ (gene, nouse, cat, pmid, text) = line.split("\t")
+ cat_match = (category_name is None) or (cat.strip().upper() == category_name.strip().upper())
+ if (gene.strip().upper() == gene_name.strip().upper() and cat_match):
+ matching_sentences.append({'pmid': pmid, 'text': text, 'category': cat})
+ except ValueError:
+ continue
+ except FileNotFoundError:
+ print(f"Sentence file not found: {file_path}")
+ except Exception as e:
+ print(f"Error reading sentence file {file_path}: {e}")
+ return matching_sentences
+
+
nltk.data.path.append("./nlp/")
+# Initialize database within application context
+with app.app_context():
+ db.create_all()
+
+# Configure Gemini API Key
+# IMPORTANT: Set the GEMINI_API_KEY environment variable
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+if not GEMINI_API_KEY:
+ print("Warning: GEMINI_API_KEY environment variable not set. Stress classification via Gemini will not work.")
+else:
+ try:
+ genai.configure(api_key=GEMINI_API_KEY)
+ except Exception as e:
+ print(f"Error configuring Gemini API: {e}")
+ GEMINI_API_KEY = None # Ensure it's None if configuration fails
+'''
+STRESS_PROMPT_TEMPLATE = ""
+try:
+ with open("stress_prompt.txt", "r") as f_prompt:
+ STRESS_PROMPT_TEMPLATE = f_prompt.read()
+except FileNotFoundError:
+ print("FATAL ERROR: stress_prompt.txt not found. Stress classification will fail.")
+except Exception as e:
+ print(f"FATAL ERROR: Could not read stress_prompt.txt: {e}")
+
+# few shot Function to classify stress using Gemini API
+def classify_stress_with_gemini(sentence_text):
+ if not GEMINI_API_KEY:
+ print("Gemini API key not configured. Skipping classification.")
+ return "error_no_api_key"
+
+ # --- THIS IS THE MODIFIED PART ---
+ # Check if the prompt template was loaded successfully
+ if not STRESS_PROMPT_TEMPLATE:
+ print("Stress prompt template is not available. Skipping classification.")
+ return "error_no_prompt_template"
+
+ try:
+ model_gemini = genai.GenerativeModel('gemini-3-flash-preview')
+
+ # Append the new sentence and the final instruction to the prompt template
+ # This is safer than .format() when the template contains its own curly braces.
+ prompt = STRESS_PROMPT_TEMPLATE + f'\nSentence: {sentence_text}\nClassification:'
+ print(prompt)
+ response = model_gemini.generate_content(prompt)
+ # We need to parse the classification from the response
+ classification = response.text.strip().lower()
+
+ # The model might return "Cellular Level Stress" or "Organismal Stress"
+ if "cellular" in classification:
+ return "neg" # 'neg' for Cellular Level Stress
+ elif "organismal" in classification:
+ return "pos" # 'pos' for Organismal Stress
+ else:
+ print(f"Warning: Gemini returned unexpected classification: '{classification}' for sentence: '{sentence_text}'")
+ return "unknown"
+
+ except Exception as e:
+ print(f"Error calling Gemini API for stress classification: {e}")
+ return "error_api_call"
+
+
+# zero-shot Function to classify stress using Gemini API
+def classify_stress_with_gemini(sentence_text):
+ if not GEMINI_API_KEY:
+ print("Gemini API key not configured. Skipping classification.")
+ return "error_no_api_key"
+
+ try:
+ model_gemini = genai.GenerativeModel('gemini-3-flash-preview')
+ prompt = f"""Classify the following sentence based on whether it describes 'systemic stress' or 'cellular stress'.
+Please return ONLY the word 'systemic' if it describes systemic stress, or ONLY the word 'cellular' if it describes cellular stress. Do not add any other explanation or punctuation.
+
+Sentence: "{sentence_text}"
+
+Classification:"""
+
+ response = model_gemini.generate_content(prompt)
+ classification = response.text.strip().lower()
+
+ if classification == "systemic":
+ return "pos" # 'pos' for systemic stress
+ elif classification == "cellular":
+ return "neg" # 'neg' for cellular stress
+ else:
+ print(f"Warning: Gemini returned unexpected classification: '{classification}' for sentence: '{sentence_text}'")
+ return "unknown"
+
+ except Exception as e:
+ print(f"Error calling Gemini API for stress classification: {e}")
+ return "error_api_call"
+'''
+
# Sqlite database
class users(db.Model):
__tablename__='user'
@@ -59,46 +188,47 @@ class users(db.Model):
password = db.Column(db.String(128), nullable=False)
date_created = db.Column(db.DateTime, default=datetime.utcnow)
-# Preprocessing of words for CNN
-def clean_doc(doc, vocab):
- doc = doc.lower()
- tokens = doc.split()
- re_punc = re.compile('[%s]' % re.escape(string.punctuation))
- tokens = [re_punc.sub('' , w) for w in tokens]
- tokens = [word for word in tokens if len(word) > 1]
- stop_words = set(stopwords.words('english'))
- tokens = [w for w in tokens if not w in stop_words]
- porter = PorterStemmer()
- stemmed = [porter.stem(word) for word in tokens]
- return tokens
-
-# Load tokenizer
-with open('./nlp/tokenizer.pickle', 'rb') as handle:
- tokenizer = pickle.load(handle)
-
-# Load vocabulary
-with open('./nlp/vocabulary.txt', 'r') as vocab:
- vocab = vocab.read()
-
-def tf_auc_score(y_true, y_pred):
- return tensorflow.metrics.auc(y_true, y_pred)[1]
-
-K.clear_session()
-
-# Create the CNN model
-def create_model(vocab_size, max_length):
- model = Sequential()
- model.add(Embedding(vocab_size, 32, input_length=max_length))
- model.add(Conv1D(filters=16, kernel_size=4, activation='relu'))
- model.add(MaxPooling1D(pool_size=2))
- model.add(Flatten())
- model.add(Dense(10, activation='relu'))
- model.add(Dense(1, activation='sigmoid'))
- opt = tensorflow.keras.optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999)
- model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[tf_auc_score])
- return model
+# Preprocessing of words for CNN (REMOVED)
+# def clean_doc(doc, vocab):
+# doc = doc.lower()
+# tokens = doc.split()
+# re_punc = re.compile('[%s]' % re.escape(string.punctuation))
+# tokens = [re_punc.sub('' , w) for w in tokens]
+# tokens = [word for word in tokens if len(word) > 1]
+# stop_words = set(stopwords.words('english'))
+# tokens = [w for w in tokens if not w in stop_words]
+# porter = PorterStemmer()
+# stemmed = [porter.stem(word) for word in tokens]
+# return tokens
+
+# Load tokenizer (REMOVED)
+# with open('./nlp/tokenizer.pickle', 'rb') as handle:
+# tokenizer = pickle.load(handle)
+
+# Load vocabulary (REMOVED)
+# with open('./nlp/vocabulary.txt', 'r') as vocab_file_handle: # Renamed variable to avoid conflict
+# vocab_text = vocab_file_handle.read() # Renamed variable
+
+# def tf_auc_score(y_true, y_pred): (REMOVED)
+# return tensorflow.metrics.AUC()(y_true, y_pred)
+
+# K.clear_session() (REMOVED)
+
+# Create the CNN model (REMOVED)
+# def create_model(vocab_size, max_length):
+# model = Sequential()
+# model.add(Embedding(vocab_size, 32, input_length=max_length))
+# model.add(Conv1D(filters=16, kernel_size=4, activation='relu'))
+# model.add(MaxPooling1D(pool_size=2))
+# model.add(Flatten())
+# model.add(Dense(10, activation='relu'))
+# model.add(Dense(1, activation='sigmoid'))
+# opt = tensorflow.keras.optimizers.Adamax(learning_rate=0.002, beta_1=0.9, beta_2=0.999)
+# model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[tf_auc_score])
+# return model
# Use addiction ontology by default
+import ast # Moved import ast here as it's first used here.
onto_cont=open("addiction.onto","r").read()
dictionary=ast.literal_eval(onto_cont)
@@ -278,7 +408,7 @@ def logout():
user1 = session['name']
else:
user1 = session['email']
- flash("You have been logged out, {user1}", "inval")
+ flash(f"You have been logged out, {user1}", "inval") # Used f-string for clarity
session.pop('email', None)
session.clear()
return render_template('index.html',onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto)
@@ -699,10 +829,10 @@ def progress():
except:
namecat = 'addiction'
session['namecat'] = namecat
- if namecat=='choose your ontology' or namecat=='addiction' or namecat == 'addiction':
+ if namecat=='choose your ontology' or namecat=='addiction' or namecat == 'addiction': # Redundant 'addiction' check
session['namecat']='addiction'
onto_cont=open("addiction.onto","r").read()
- dictionary=ast.literal_eval(onto_cont)
+ # dictionary=ast.literal_eval(onto_cont) # dictionary is global, no need to re-assign from local onto_cont
search_type = request.args.getlist('type')
if (search_type == []):
search_type = ['GWAS', 'function', 'addiction', 'drug', 'brain', 'stress', 'psychiatric', 'cell']
@@ -805,7 +935,7 @@ def search():
d["nj{0}".format(n_num)]=''
else:
namecat_flag=0
- for ky in dictionary.keys():
+ for ky in dictionary.keys(): # Using global 'dictionary'
nodecolor[ky] = "hsl("+str((n_num+1)*int(360/len(dictionary.keys())))+", 70%, 80%)"
d["nj{0}".format(n_num)]=generate_nodes_json(dictionary[ky],str(ky),nodecolor[ky])
n_num+=1
@@ -818,10 +948,15 @@ def search():
json_nodes += generate_nodes_json(dictionary[ky],str(ky),nodecolor[ky])
d["nj{0}".format(n_num)]=''
- json_nodes = json_nodes[:-2]
- json_nodes =json_nodes+"]}"
- def generate(genes, tf_name):
+ json_nodes = json_nodes[:-2] # Handles case if json_nodes was only "{\"data\":["
+ if json_nodes == "{\"data\"": # if it was empty before -2
+ json_nodes = "{\"data\":[]}"
+ else:
+ json_nodes =json_nodes+"]}"
+
+ def generate(genes, tf_name): # tf_name is snt_file
with app.test_request_context():
+ from nltk.tokenize import sent_tokenize # Moved import here, as it's only used in this function scope.
sentences=str()
edges=str()
nodes = temp_nodes
@@ -832,34 +967,36 @@ def search():
#genes_or = ' [tiab] or '.join(genes)
all_d=''
+ current_dict_onto = {} # To hold the relevant ontology for this search pass
if namecat_flag==1:
- onto_cont = open(ses_namecat+".onto","r").read()
- dict_onto=ast.literal_eval(onto_cont)
-
- for ky in dict_onto.keys():
- if (ky in search_type):
- all_d_ls=undic(list(dict_onto[ky].values()))
- all_d = all_d+'|'+all_d_ls
+ onto_cont_local = open(ses_namecat+".onto","r").read() # ses_namecat from outer scope
+ current_dict_onto=ast.literal_eval(onto_cont_local)
else:
- for ky in dictionary.keys():
- if (ky in search_type):
- all_d_ls=undic(list(dictionary[ky].values()))
- all_d = all_d+'|'+all_d_ls
- all_d=all_d[1:]
+ current_dict_onto = dictionary # Use global dictionary
+
+ for ky in current_dict_onto.keys():
+ if (ky in search_type):
+ all_d_ls=undic(list(current_dict_onto[ky].values()))
+ all_d = all_d+'|'+all_d_ls
+ if all_d: # Check if all_d is not empty
+ all_d=all_d[1:]
+
if ("GWAS" in search_type):
datf = pd.read_csv('./utility/gwas_used.csv',sep='\t')
progress+=percent
yield "data:"+str(progress)+"\n\n"
+
for gene in genes:
- abstracts_raw = getabstracts(gene,all_d)
+ abstracts_raw = getabstracts(gene,all_d) # all_d might be empty if no search_type matches
#print(abstracts_raw)
sentences_ls=[]
for row in abstracts_raw.split("\n"):
+ if not row.strip(): continue # Skip empty lines
tiab=row.split("\t")
pmid = tiab.pop(0)
- tiab= " ".join(tiab)
- sentences_tok = sent_tokenize(tiab)
+ tiab_text = " ".join(tiab) # Renamed to avoid conflict
+ sentences_tok = sent_tokenize(tiab_text)
for sent_tok in sentences_tok:
sent_tok = pmid + ' ' + sent_tok
sentences_ls.append(sent_tok)
@@ -867,60 +1004,76 @@ def search():
geneEdges = ""
- if namecat_flag==1:
- onto_cont = open(ses_namecat+".onto","r").read()
- dict_onto=ast.literal_eval(onto_cont)
- else:
- dict_onto = dictionary
+ # Use the already determined current_dict_onto
+ # if namecat_flag==1:
+ # onto_cont = open(ses_namecat+".onto","r").read()
+ # dict_onto_loop=ast.literal_eval(onto_cont)
+ # else:
+ # dict_onto_loop = dictionary
+ dict_onto_loop = current_dict_onto
- for ky in dict_onto.keys():
+ for ky in dict_onto_loop.keys():
if (ky in search_type):
- if (ky=='addiction') and ('addiction' in dict_onto.keys())\
- and ('drug' in dict_onto.keys()) and ('addiction' in dict_onto['addiction'].keys())\
- and ('aversion' in dict_onto['addiction'].keys()) and ('intoxication' in dict_onto['addiction'].keys()):
- #addiction terms must present with at least one drug
+ # The special handling for 'addiction' with 'drug' needs careful check of dict_onto_loop structure
+ if (ky=='addiction') and ('addiction' in dict_onto_loop.keys())\
+ and ('drug' in dict_onto_loop.keys()) and ('addiction' in dict_onto_loop['addiction'].keys())\
+ and ('aversion' in dict_onto_loop['addiction'].keys()) and ('intoxication' in dict_onto_loop['addiction'].keys()):
addiction_flag=1
- #addiction=undic0(addiction_d) +") AND ("+undic0(drug_d)
- sent=gene_category(gene, addiction_d, "addiction", sentences_ls,addiction_flag,dict_onto)
- if ('addiction' in search_type):
+ # addiction_d is not defined here, assume it's a global or from more_functions
+ # This part might need `addiction_d` from `more_functions.py` to be correctly defined.
+ # For now, assuming addiction_d is available in the scope.
+ sent=gene_category(gene, addiction_d, "addiction", sentences_ls,addiction_flag,dict_onto_loop)
+ if ('addiction' in search_type): # This check is redundant with outer if
geneEdges += generate_edges(sent, tf_name)
json_edges += generate_edges_json(sent, tf_name)
else:
addiction_flag=0
- if namecat_flag==1:
- onto_cont = open(ses_namecat+".onto","r").read()
- dict_onto=ast.literal_eval(onto_cont)
- #ky_d=undic(list(dict_onto[ky].values()))
- sent=gene_category(gene,ky,str(ky), sentences_ls, addiction_flag,dict_onto)
- else:
- #ky_d=undic(list(dict_onto[ky].values()))
- #print(sentences_ls)
- sent=gene_category(gene,ky,str(ky), sentences_ls, addiction_flag,dict_onto)
- #print(sent)
+ sent=gene_category(gene,ky,str(ky), sentences_ls, addiction_flag,dict_onto_loop)
yield "data:"+str(progress)+"\n\n"
geneEdges += generate_edges(sent, tf_name)
json_edges += generate_edges_json(sent, tf_name)
sentences+=sent
- if ("GWAS" in search_type):
+ if ("GWAS" in search_type and 'GWAS' in dict_onto_loop): # Added check for GWAS in dict_onto_loop
gwas_sent=[]
- print (datf)
- datf_sub1 = datf[datf["MAPPED_GENE"].str.contains('(?:\s|^)'+gene+'(?:\s|$)', flags=re.IGNORECASE)
- | (datf["REPORTED GENE(S)"].str.contains('(?:\s|^)'+gene+'(?:\s|$)', flags=re.IGNORECASE))]
- print (datf_sub1)
- for nd2 in dict_onto['GWAS'].keys():
- for nd1 in dict_onto['GWAS'][nd2]:
- for nd in nd1.split('|'):
- gwas_text=''
- datf_sub = datf_sub1[datf_sub1['DISEASE/TRAIT'].str.contains('(?:\s|^)'+nd+'(?:\s|$)', flags=re.IGNORECASE)]
- #& (datf['REPORTED GENE(S)'].str.contains('(?:\s|^)'+gene+'(?:\s|$)', flags=re.IGNORECASE)
- #| (datf['MAPPED_GENE'].str.contains('(?:\s|^)'+gene+'(?:\s|$)', flags=re.IGNORECASE)))]
- if not datf_sub.empty:
- for index, row in datf_sub.iterrows():
- gwas_text = "SNP:"+str(row['SNPS'])+", P value: "+str(row['P-VALUE'])\
- +", Disease/trait: "+str(row['DISEASE/TRAIT'])+", Mapped trait: "\
- +str(row['MAPPED_TRAIT'])+"
"
- gwas_sent.append(gene+"\t"+"GWAS"+"\t"+nd+"_GWAS\t"+str(row['PUBMEDID'])+"\t"+gwas_text)
+ # print (datf) # datf is loaded earlier
+ datf_sub1 = datf[datf["MAPPED_GENE"].str.contains('(?:\s|^)'+gene+'(?:\s|$)', flags=re.IGNORECASE, na=False)
+ | (datf["REPORTED GENE(S)"].str.contains('(?:\s|^)'+gene+'(?:\s|$)', flags=re.IGNORECASE, na=False))]
+ # print (datf_sub1)
+ for nd2 in dict_onto_loop['GWAS'].keys():
+ # Ensure dict_onto_loop['GWAS'][nd2] is iterable and contains strings
+ # Example: if dict_onto_loop['GWAS'][nd2] is {'keyword1|keyword2'}
+ # next(iter(dict_onto_loop['GWAS'][nd2])) might be what was intended
+ # Assuming dict_onto_loop['GWAS'][nd2] is a set/list of keyword strings like {'kw1|kw2', 'kw3'}
+ # The original code was: for nd1 in dict_onto_loop['GWAS'][nd2]: for nd in nd1.split('|'):
+ # This implies dict_onto_loop['GWAS'][nd2] contains combined keywords.
+ # Let's assume the structure is { 'subcategory' : {'keyword_group1', 'keyword_group2'} }
+ # where keyword_group is "termA|termB"
+
+ # Iterating over the values of the sub-dictionary if it's a dict, or elements if it's a list/set
+ sub_keywords_container = dict_onto_loop['GWAS'][nd2]
+ # This needs to be robust to the actual structure of dict_onto_loop['GWAS'][nd2]
+ # Assuming it's a set of strings, where each string can be pipe-separated.
+ # e.g., sub_keywords_container = {'phenotype1|phenotype_alias', 'phenotype2'}
+ actual_keywords_to_iterate = []
+ if isinstance(sub_keywords_container, dict): # e.g. {'phenotype_group': 'pheno1|pheno2'}
+ for key_group_str in sub_keywords_container.values(): # Or .keys() if that's the intent
+ actual_keywords_to_iterate.extend(key_group_str.split('|'))
+ elif isinstance(sub_keywords_container, (list, set)):
+ for key_group_str in sub_keywords_container:
+ actual_keywords_to_iterate.extend(key_group_str.split('|'))
+ elif isinstance(sub_keywords_container, str): # e.g. 'pheno1|pheno2'
+ actual_keywords_to_iterate.extend(sub_keywords_container.split('|'))
+
+
+ for nd in actual_keywords_to_iterate:
+ gwas_text=''
+ # Added na=False to contains calls
+ datf_sub = datf_sub1[datf_sub1['DISEASE/TRAIT'].str.contains('(?:\s|^)'+nd+'(?:\s|$)', flags=re.IGNORECASE, na=False)]
+ if not datf_sub.empty:
+ for index, row in datf_sub.iterrows():
+ gwas_text = f"SNP:{row['SNPS']}, P value: {row['P-VALUE']}, Disease/trait: {row['DISEASE/TRAIT']}, Mapped trait: {row['MAPPED_TRAIT']}"
+ gwas_sent.append(gene+"\t"+"GWAS"+"\t"+nd2+"_GWAS\t"+str(row['PUBMEDID'])+"\t"+gwas_text) # Changed nd to nd2 for target node
cys, gwas_json, sn_file = searchArchived('GWAS', gene , 'json',gwas_sent, path_user)
with open(path_user+"gwas_results.tab", "a") as gwas_edges:
gwas_edges.write(sn_file)
@@ -931,8 +1084,17 @@ def search():
yield "data:"+str(progress)+"\n\n"
if len(geneEdges) >0:
+ rnd = ''
+ if 'email' in session:
+ if 'rnd' in session:
+ rnd = session['rnd']
+ elif 'path_user' in session:
+ rnd = session['path_user'].split('/')[-2]
+ elif 'path' in session:
+ rnd = session['path'].split('/')[-1]
+
edges+=geneEdges
- nodes+="{ data: { id: '" + gene + "', nodecolor:'#E74C3C', fontweight:700, url:'/synonyms?node="+gene+"'} },\n"
+ nodes+="{ data: { id: '" + gene + "', nodecolor:'#E74C3C', fontweight:700, url:'/synonyms?node="+gene+"&rnd="+rnd+"'} },\n"
else:
nodesToHide+=gene + " "
@@ -947,14 +1109,20 @@ def search():
zeroLinkNode.close()
yield "data:"+str(progress)+"\n\n"
- # Edges in json format
- json_edges="{\"data\":["+json_edges
- json_edges = json_edges[:-2]
- json_edges =json_edges+"]}"
+ # Edges in json format
+ json_edges_content = json_edges.strip()
+ if json_edges_content.endswith(','):
+ json_edges_content = json_edges_content[:-1]
+
+ if not json_edges_content:
+ json_edges = "{\"data\":[]}"
+ else:
+ json_edges = "{\"data\":[" + json_edges_content + "]}"
# Write edges to txt file in json format also in user folder
with open(path_user+"edges.json", "w") as temp_file_edges:
- temp_file_edges.write(json_edges)
+ temp_file_edges.write(json_edges)
+
with open(path_user+"nodes.json", "w") as temp_file_nodes:
temp_file_nodes.write(json_nodes)
return Response(generate(genes, snt_file), mimetype='text/event-stream')
@@ -983,15 +1151,26 @@ def tableview():
return render_template('index.html', onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto)
jedges =''
- file_edges = open(datadir+gene_url_tmp +'/edges.json', 'r')
- for line in file_edges.readlines():
- if ':' not in line:
- nodata_temp = 1
- else:
- nodata_temp = 0
- with open(datadir+gene_url_tmp +"/edges.json") as edgesjsonfile:
+ nodata_temp = 1 # Default to no data
+ try:
+ with open(datadir+gene_url_tmp +"/edges.json") as edgesjsonfile:
+ # Check if file is empty or just contains empty structure
+ content = edgesjsonfile.read().strip()
+ if content and content != "{\"data\":[]}":
+ # Reset file pointer and load json
+ edgesjsonfile.seek(0)
jedges = json.load(edgesjsonfile)
- break
+ nodata_temp = 0
+ else:
+ jedges = {"data": []} # Ensure jedges is a dict
+ except FileNotFoundError:
+ jedges = {"data": []} # Ensure jedges is a dict if file not found
+ except json.JSONDecodeError:
+ print(f"Warning: Could not decode JSON from {datadir+gene_url_tmp}/edges.json")
+ jedges = {"data": []} # Ensure jedges is a dict
+ nodata_temp = 1
+
+
else:
genes_session_tmp=tf_path+"/"+rnd_url
gene_url_tmp = genes_session_tmp
@@ -1005,16 +1184,25 @@ def tableview():
onto_cont=open("addiction.onto","r").read()
dict_onto=ast.literal_eval(onto_cont)
return render_template('index.html', onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto)
+
jedges =''
- file_edges = open(gene_url_tmp +'/edges.json', 'r')
- for line in file_edges.readlines():
- if ':' not in line:
- nodata_temp = 1
- else:
- nodata_temp = 0
- with open(gene_url_tmp +"/edges.json") as edgesjsonfile:
+ nodata_temp = 1 # Default to no data
+ try:
+ with open(gene_url_tmp +'/edges.json') as edgesjsonfile:
+ content = edgesjsonfile.read().strip()
+ if content and content != "{\"data\":[]}":
+ edgesjsonfile.seek(0)
jedges = json.load(edgesjsonfile)
- break
+ nodata_temp = 0
+ else:
+ jedges = {"data": []}
+ except FileNotFoundError:
+ jedges = {"data": []}
+ except json.JSONDecodeError:
+ print(f"Warning: Could not decode JSON from {gene_url_tmp}/edges.json")
+ jedges = {"data": []}
+ nodata_temp = 1
+
genename=genes_url.split("_")
if len(genename)>3:
genename = genename[0:3]
@@ -1040,7 +1228,7 @@ def tableview0():
if ('email' in session):
filename = rnd_url.split("_0_")[0]
- genes_session_tmp = datadir+"/user/"+str(session['hashed_email'])+"/"+rnd_url+"/"+filename
+ # genes_session_tmp = datadir+"/user/"+str(session['hashed_email'])+"/"+rnd_url+"/"+filename # Not used further
gene_url_tmp = "/user/"+str(session['hashed_email'])+"/"+rnd_url
try:
with open(datadir+gene_url_tmp+"/nodes.json") as jsonfile:
@@ -1054,18 +1242,26 @@ def tableview0():
return render_template('index.html', onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto)
jedges =''
- file_edges = open(datadir+gene_url_tmp+'/edges.json', 'r')
- for line in file_edges.readlines():
- if ':' not in line:
- nodata_temp = 1
- else:
- nodata_temp = 0
- with open(datadir+gene_url_tmp+"/edges.json") as edgesjsonfile:
+ nodata_temp = 1 # Default to no data
+ try:
+ with open(datadir+gene_url_tmp +'/edges.json') as edgesjsonfile:
+ content = edgesjsonfile.read().strip()
+ if content and content != "{\"data\":[]}":
+ edgesjsonfile.seek(0)
jedges = json.load(edgesjsonfile)
- break
+ nodata_temp = 0
+ else:
+ jedges = {"data": []}
+ except FileNotFoundError:
+ jedges = {"data": []}
+ except json.JSONDecodeError:
+ print(f"Warning: Could not decode JSON from {datadir+gene_url_tmp}/edges.json")
+ jedges = {"data": []}
+ nodata_temp = 1
+
else:
- genes_session_tmp=tf_path+"/"+rnd_url
- gene_url_tmp = genes_session_tmp
+ # genes_session_tmp=tf_path+"/"+rnd_url # Not used further
+ gene_url_tmp = tf_path+"/"+rnd_url
try:
with open(gene_url_tmp+"/nodes.json") as jsonfile:
jnodes = json.load(jsonfile)
@@ -1078,15 +1274,23 @@ def tableview0():
return render_template('index.html', onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto)
jedges =''
- file_edges = open(gene_url_tmp+'/edges.json', 'r')
- for line in file_edges.readlines():
- if ':' not in line:
- nodata_temp = 1
- else:
- nodata_temp = 0
- with open(gene_url_tmp+"/edges.json") as edgesjsonfile:
+ nodata_temp = 1 # Default to no data
+ try:
+ with open(gene_url_tmp +'/edges.json') as edgesjsonfile:
+ content = edgesjsonfile.read().strip()
+ if content and content != "{\"data\":[]}":
+ edgesjsonfile.seek(0)
jedges = json.load(edgesjsonfile)
- break
+ nodata_temp = 0
+ else:
+ jedges = {"data": []}
+ except FileNotFoundError:
+ jedges = {"data": []}
+ except json.JSONDecodeError:
+ print(f"Warning: Could not decode JSON from {gene_url_tmp}/edges.json")
+ jedges = {"data": []}
+ nodata_temp = 1
+
genes_url=request.args.get('genequery')
genename=genes_url.split("_")
if len(genename)>3:
@@ -1118,7 +1322,7 @@ def userarchive():
else:
session['user_folder'] = datadir+"/user/"+str(session['hashed_email'])
else:
- onto_name_archive=''
+ # onto_name_archive='' # This variable is not used here
flash("You logged out!")
onto_len_dir = 0
onto_list = ''
@@ -1135,26 +1339,34 @@ def userarchive():
folder_list = []
directory_list = []
gene_list=[]
- onto_list=[]
+ onto_list_archive =[] # Renamed to avoid conflict with outer scope 'onto_list'
for filename in dirlist:
- if ('_0_' in filename):
- folder_list.append(filename)
- gene_name = filename.split('_0_')[1]
- onto_name = filename.split('_0_')[2]
- if gene_name[-2:] == '_m':
- gene_name = gene_name[:-2]
- gene_name = gene_name + ", ..."
- gene_name = gene_name.replace('_', ', ')
- gene_list.append(gene_name)
- onto_list.append(onto_name)
- onto_name=""
- gene_name=""
- filename=filename[0:4]+"-"+filename[5:7]+"-"+filename[8:13]+":"+filename[14:16]+":"+filename[17:19]
- directory_list.append(filename)
+ if ('_0_' in filename): # Ensure it's a search result folder, not e.g. "ontology"
+ if os.path.isdir(os.path.join(session['user_folder'], filename)): # Check if it's a directory
+ folder_list.append(filename)
+ try:
+ gene_name = filename.split('_0_')[1]
+ onto_name = filename.split('_0_')[2]
+ if gene_name.endswith('_m'): # Check using endswith for robustness
+ gene_name = gene_name[:-2]
+ gene_name = gene_name + ", ..."
+ gene_name = gene_name.replace('_', ', ')
+ gene_list.append(gene_name)
+ onto_list_archive.append(onto_name) # Use renamed list
+ # onto_name="" # Not necessary, re-assigned in loop
+ # gene_name="" # Not necessary, re-assigned in loop
+ # Format filename for display
+ display_filename=filename.split('_0_')[0] # Get only the timestamp part for display formatting
+ display_filename=display_filename[0:4]+"-"+display_filename[5:7]+"-"+display_filename[8:10]+" "+display_filename[11:13]+":"+display_filename[14:16]+":"+display_filename[17:19]
+ directory_list.append(display_filename)
+ except IndexError:
+ print(f"Skipping folder with unexpected name format: {filename}")
+ continue
+
len_dir = len(directory_list)
message3="
No sentences found for {gene0} and {cat0}.
") + + all_stress_sentences = [] + num_abstract = len(matching_sents) + + for sent_obj in matching_sents: + text = sent_obj['text'] + pmid = sent_obj['pmid'] + + formatted_line = f"")
+ out2 = str(num_abstract) + ' sentences in ' + " "+ str(len(pmid_list)) + ' studies' +"
" + "
"
+ elif len(pmid_list) == 1: # Handle single study case
+ out2 = str(num_abstract) + ' sentence(s) in '+ " "+ str(len(pmid_list)) + ' study' +"
" "
"
+ else: # No PMIDs found, num_abstract might still be > 0 if PMIDs were not parsable in file but text matched
+ out2 = str(num_abstract) + ' sentence(s) found.
'
+
+
+ if(cat0 == 'stress'): # Only show stress classification if category is stress
+ if(out_neg == "" and out_pos == ""):
+ # If no classification results, show all sentences if any, or a message
+ if out3:
+ out= out1+ out2 + "All related sentences (Gemini classification not available or no specific stress types found):
") # Show the cytoscape graph for one gene from the top gene list @app.route("/showTopGene") def showTopGene(): query=request.args.get('topGene') - nodesEdges=searchArchived('topGene',query, 'cys','','')[0] + # Assuming searchArchived returns a tuple, and the first element is nodesEdges + archived_data = searchArchived('topGene',query, 'cys','','') + if isinstance(archived_data, tuple) and len(archived_data) > 0: + nodesEdges = archived_data[0] + else: # Fallback if searchArchived doesn't return expected tuple + nodesEdges = "" + print(f"Warning: searchArchived did not return expected data for {query}") + message2="
"+node.upper()+"
"+node.upper()+"
Details for node '{node.upper()}' not found in the current ontology.
" + return render_template('sentences.html', sentences=out+"") +''' @app.route("/shownode") def shownode(): node=request.args.get('node') @@ -1377,30 +1701,173 @@ def shownode(): return render_template('sentences.html', sentences=out+"
") + @app.route("/synonyms") def synonyms(): - node=request.args.get('node') - node=node.upper() - allnodes={**genes} + node = request.args.get('node') + rnd = request.args.get('rnd') + + if not node: + return "Error: Gene node is required.", 400 + node = node.upper() + try: - synonym_list = list(allnodes[node].split("|")) + # --- Part 1: Handle Synonyms Links --- + allnodes = {} + if 'genes' in globals() and isinstance(globals()['genes'], dict): + allnodes = globals()['genes'] + else: + print("Warning: 'genes' dictionary for synonyms not found.") + + synonym_list = list(allnodes[node].split("|")) session['synonym_list'] = synonym_list session['main_gene'] = node.upper() - out="
")
+ results_content = "
No results found.
" # Default content + result_file_path = session.get('path', '') + "_ggResult" # Get path from session + if result_file_path and os.path.exists(result_file_path): + with open(result_file_path, "r") as result_f: + results_content=result_f.read() + else: + print(f"Warning: Result file {result_file_path} not found for showGeneTopGene.") + return render_template('sentences.html', sentences=results_content+"
")
# Generate a page that lists all the top 150 addiction genes with links to cytoscape graph.
@@ -1500,5 +2038,5 @@ def top150genes():
if __name__ == '__main__':
- db.create_all()
- app.run(debug=True, port=4200)
+ # For production, consider using a more robust web server like Gunicorn or Waitress
+ app.run(debug=True, host='0.0.0.0', port=4200) # Changed to 0.0.0.0 for accessibility if needed
--
cgit 1.4.1
From bc84bbc6bec82f4cddbba4e84e855965bea267a2 Mon Sep 17 00:00:00 2001
From: chen42
Date: Wed, 25 Mar 2026 16:48:59 -0500
Subject: update requirements
---
genecup_synthesis_prompt.txt | 12 ++--
ratspub.py | 167 -------------------------------------------
requirements.txt | 50 ++++++-------
server.py | 19 +++--
templates/genenames.html | 73 +++++++++++++++++--
5 files changed, 113 insertions(+), 208 deletions(-)
delete mode 100755 ratspub.py
(limited to 'server.py')
diff --git a/genecup_synthesis_prompt.txt b/genecup_synthesis_prompt.txt
index 75af3af..c8ee861 100644
--- a/genecup_synthesis_prompt.txt
+++ b/genecup_synthesis_prompt.txt
@@ -9,18 +9,18 @@ Organism:
Phenotype:
Candidate Gene: {{gene}}
-Goal: To critically evaluate [Candidate Gene] as a plausible causal gene for the [Phenotype] by analyzing the literature excerpts provided at the end of this prompt with appropriate scientific caution.
+Goal: To critically evaluate {{gene}} as a plausible causal gene for the [Phenotype] by analyzing the literature excerpts provided at the end of this prompt with appropriate scientific caution.
2. Required Analysis:
Please perform the following four-step analysis based on the Source Information provided at the end. Your evaluation must be rigorous and avoid overstating claims. Acknowledge the limitations of interpreting isolated sentences and prioritize a nuanced perspective.
A. Term Disambiguation
-For each sentence provided in the "Source Information" section, confirm if the term "[Candidate Gene]" unambiguously refers to the intended gene. If the term is used ambiguously or refers to another scientific concept, state this and exclude the sentence from further analysis. Proceed only with the confirmed sentences.
+For each sentence provided in the "Source Information" section, confirm if the term "{{gene}}" unambiguously refers to the intended gene. If the term is used ambiguously or refers to another scientific concept, state this and exclude the sentence from further analysis. Proceed only with the confirmed sentences.
B. Synthesis of Function and Experimental Context
-From the sentences confirmed in Step A, synthesize the known biological functions of [Candidate Gene]. Do not create a single, flattened narrative. Instead, structure your summary to reflect the nuance of the findings:
+From the sentences confirmed in Step A, synthesize the known biological functions of {{gene}}. Do not create a single, flattened narrative. Instead, structure your summary to reflect the nuance of the findings:
Characterize Each Function: For each reported function, describe what the gene does.
Note the Experimental System: Specify the context for each finding. Was it observed in vivo (e.g., in a mouse model), in vitro (e.g., in a specific cell line like HEK293), or is it a finding from a computational prediction? (Cite PMID/ID).
Distinguish Strength of Claims: Differentiate between established, speculative, or indirect roles. For example, note if the source text uses cautious language like "may regulate," "is associated with," or "is thought to be involved in." (Cite PMID/ID).
@@ -28,7 +28,7 @@ Acknowledge Inconsistencies: If any sentences suggest conflicting or different r
C. Critical Evaluation of Causal Gene Plausibility (with In-text Citations)
-Construct a detailed scientific evaluation of [Candidate Gene]'s plausibility for [Phenotype]. Your argument must be built cautiously, explicitly weighing the evidence for and against the gene's candidacy. Every claim you make must be immediately followed by its source (PMID/ID).
+Construct a detailed scientific evaluation of {{gene}}'s plausibility for [Phenotype]. Your argument must be built cautiously, explicitly weighing the evidence for and against the gene's candidacy. Every claim you make must be immediately followed by its source (PMID/ID).
Start with an Initial Caveat that acknowledges the inherent limitations of this analysis, such as the small number of excerpts and the lack of full experimental details.
@@ -60,8 +60,8 @@ Evaluate the nature of these prior associations. Are they from robust genetic st
D. Balanced Concluding Assessment
Conclude with a brief, balanced summary that encapsulates the strength of the evidence. This conclusion must reflect the cautious and critical nature of your analysis.
-Summarize Supporting Evidence: Briefly state the strongest, most direct lines of evidence that support [Candidate Gene] as a plausible candidate, citing the key PMIDs.
+Summarize Supporting Evidence: Briefly state the strongest, most direct lines of evidence that support {{gene}} as a plausible candidate, citing the key PMIDs.
Summarize Limitations and Gaps: Crucially, summarize the most significant weaknesses in the argument. This includes any identified knowledge gaps, lack of specificity, reliance on non-ideal experimental models, or speculative functional links.
-Final Judgment on Plausibility: Provide a final, nuanced statement on whether [Candidate Gene] is a weak, plausible, or strong candidate based only on the provided information. Avoid definitive conclusions and frame the outcome in terms of what further research would be needed to solidify the connection.
+Final Judgment on Plausibility: Provide a final, nuanced statement on whether {{gene}} is a weak, plausible, or strong candidate based only on the provided information. Avoid definitive conclusions and frame the outcome in terms of what further research would be needed to solidify the connection.
3. Source Information:
diff --git a/ratspub.py b/ratspub.py
deleted file mode 100755
index 5621b5e..0000000
--- a/ratspub.py
+++ /dev/null
@@ -1,167 +0,0 @@
-#!/bin/env python3
-from nltk.tokenize import sent_tokenize
-import os
-import re
-from ratspub_keywords import *
-from gene_synonyms import *
-
-global function_d, brain_d, drug_d, addiction_d, brain_query_term, pubmed_path, genes
-
-## turn dictionary (synonyms) to regular expression
-def undic(dic):
- return "|".join(dic.values())
-
-def findWholeWord(w):
- return re.compile(r'\b({0})\b'.format(w), flags=re.IGNORECASE).search
-
-def getSentences(query, gene):
- abstracts = os.popen("esearch -db pubmed -query " + query + " | efetch -format uid |fetch-pubmed -path "+ pubmed_path + " | xtract -pattern PubmedArticle -element MedlineCitation/PMID,ArticleTitle,AbstractText|sed \"s/-/ /g\"").read()
- out=str()
- for row in abstracts.split("\n"):
- tiab=row.split("\t")
- pmid = tiab.pop(0)
- tiab= " ".join(tiab)
- sentences = sent_tokenize(tiab)
- ## keep the sentence only if it contains the gene
- for sent in sentences:
- if findWholeWord(gene)(sent):
- sent=re.sub(r'\b(%s)\b' % gene, r'\1', sent, flags=re.I)
- out+=pmid+"\t"+sent+"\n"
- return(out)
-
-def gene_category(gene, cat_d, query, cat):
- #e.g. BDNF, addiction_d, undic(addiction_d) "addiction"
- q="\"(" + query.replace("|", " OR ") + ") AND " + gene + "\""
- sents=getSentences(q, gene)
- out=str()
- for sent in sents.split("\n"):
- for key in cat_d:
- if findWholeWord(cat_d[key])(sent) :
- sent=sent.replace("","").replace("","") # remove other highlights
- sent=re.sub(r'\b(%s)\b' % cat_d[key], r'\1', sent, flags=re.I) # highlight keyword
- out+=gene+"\t"+ cat + "\t"+key+"\t"+sent+"\n"
- return(out)
-
-def generate_nodes(nodes_d, nodetype):
- # include all search terms even if there are no edges, just to show negative result
- json0 =str()
- for node in nodes_d:
- json0 += "{ data: { id: '" + node + "', nodecolor: '" + nodecolor[nodetype] + "', nodetype: '"+nodetype + "', url:'/shownode?nodetype=" + nodetype + "&node="+node+"' } },\n"
- return(json0)
-
-def generate_nodes_json(nodes_d, nodetype):
- # include all search terms even if there are no edges, just to show negative result
- nodes_json0 =str()
- for node in nodes_d:
- nodes_json0 += "{ \"id\": \"" + node + "\", \"nodecolor\": \"" + nodecolor[nodetype] + "\", \"nodetype\": \"" + nodetype + "\", \"url\":\"/shownode?nodetype=" + nodetype + "&node="+node+"\" },\n"
- return(nodes_json0)
-
-def generate_edges(data, filename):
- pmid_list=[]
- json0=str()
- edgeCnts={}
- for line in data.split("\n"):
- if len(line.strip())!=0:
- (source, cat, target, pmid, sent) = line.split("\t")
- edgeID=filename+"|"+source+"|"+target
- if (edgeID in edgeCnts) and (pmid+target not in pmid_list):
- edgeCnts[edgeID]+=1
- pmid_list.append(pmid+target)
- elif (edgeID not in edgeCnts) and (pmid+target not in pmid_list):
- edgeCnts[edgeID]=1
- pmid_list.append(pmid+target)
- for edgeID in edgeCnts:
- (filename, source,target)=edgeID.split("|")
- json0+="{ data: { id: '" + edgeID + "', source: '" + source + "', target: '" + target + "', sentCnt: " + str(edgeCnts[edgeID]) + ", url:'/sentences?edgeID=" + edgeID + "' } },\n"
- return(json0)
-
-def generate_edges_json(data, filename):
- pmid_list=[]
- edges_json0=str()
- edgeCnts={}
- for line in data.split("\n"):
- if len(line.strip())!=0:
- (source, cat, target, pmid, sent) = line.split("\t")
- edgeID=filename+"|"+source+"|"+target
- if (edgeID in edgeCnts) and (pmid+target not in pmid_list):
- edgeCnts[edgeID]+=1
- pmid_list.append(pmid+target)
- elif (edgeID not in edgeCnts) and (pmid+target not in pmid_list):
- edgeCnts[edgeID]=1
- pmid_list.append(pmid+target)
- for edgeID in edgeCnts:
- (filename, source,target)=edgeID.split("|")
- edges_json0+="{ \"id\": \"" + edgeID + "\", \"source\": \"" + source + "\", \"target\": \"" + target + "\", \"sentCnt\": \"" + str(edgeCnts[edgeID]) + "\", \"url\":\"/sentences?edgeID=" + edgeID + "\" },\n"
- return(edges_json0)
-
-def searchArchived(sets, query, filetype):
- if sets=='topGene':
- dataFile="topGene_addiction_sentences.tab"
- nodes= "{ data: { id: '" + query + "', nodecolor: '" + "#2471A3" + "', fontweight:700, url:'/progress?query="+query+"' } },\n"
-
- elif sets=='GWAS':
- dataFile="gwas_addiction.tab"
- nodes=str()
- with open(dataFile, "r") as sents:
- pmid_list=[]
- cat1_list=[]
- catCnt={}
- for sent in sents:
- (symb, cat0, cat1, pmid, sent)=sent.split("\t")
- if (symb.upper() == query.upper()) :
- if (cat1 in catCnt.keys()) and (pmid+cat1 not in pmid_list):
- pmid_list.append(pmid+cat1)
- catCnt[cat1]+=1
- elif (cat1 not in catCnt.keys()):
- catCnt[cat1]=1
- pmid_list.append(pmid+cat1)
-
- nodes= "{ data: { id: '" + query + "', nodecolor: '" + "#2471A3" + "', fontweight:700, url:'/progress?query="+query+"' } },\n"
- edges=str()
- gwas_json=str()
- for key in catCnt.keys():
- if sets=='GWAS':
- nc=nodecolor["GWAS"]
- nodes += "{ data: { id: '" + key + "', nodecolor: '" + nc + "', url:'https://www.ebi.ac.uk/gwas/search?query="+key.replace("_GWAS","")+"' } },\n"
- elif key in drug_d.keys():
- nc=nodecolor["drug"]
- nodes += "{ data: { id: '" + key + "', nodecolor: '" + nc + "', url:'/shownode?node="+key+"' } },\n"
- else:
- nc=nodecolor["addiction"]
- nodes += "{ data: { id: '" + key + "', nodecolor: '" + nc + "', url:'/shownode?node="+key+"' } },\n"
- edgeID=dataFile+"|"+query+"|"+key
- edges+="{ data: { id: '" + edgeID+ "', source: '" + query + "', target: '" + key + "', sentCnt: " + str(catCnt[key]) + ", url:'/sentences?edgeID=" + edgeID + "' } },\n"
- gwas_json+="{ \"id\": \"" + edgeID + "\", \"source\": \"" + query + "\", \"target\": \"" + key + "\", \"sentCnt\": \"" + str(catCnt[key]) + "\", \"url\":\"/sentences?edgeID=" + edgeID + "\" },\n"
- if(filetype == 'cys'):
- return(nodes+edges)
- else:
- return(gwas_json)
-# brain region has too many short acronyms to just use the undic function, so search PubMed using the following
-brain_query_term="cortex|accumbens|striatum|amygadala|hippocampus|tegmental|mesolimbic|infralimbic|prelimbic|habenula"
-function=undic(function_d)
-addiction=undic(addiction_d)
-drug=undic(drug_d)
-
-gene_s=undic(genes)
-
-nodecolor={'function':"#A9CCE3", 'addiction': "#D7BDE2", 'drug': "#F9E79F", 'brain':"#A3E4D7", 'GWAS':"#AEB6BF", 'stress':"#EDBB99", 'psychiatric':"#F5B7B1"}
-#https://htmlcolorcodes.com/ third column down
-
-n0=generate_nodes(function_d, 'function')
-n1=generate_nodes(addiction_d, 'addiction')
-n2=generate_nodes(drug_d, 'drug')
-n3=generate_nodes(brain_d, 'brain')
-n4=generate_nodes(stress_d, 'stress')
-n5=generate_nodes(psychiatric_d, 'psychiatric')
-n6=''
-
-nj0=generate_nodes_json(function_d, 'function')
-nj1=generate_nodes_json(addiction_d, 'addiction')
-nj2=generate_nodes_json(drug_d, 'drug')
-nj3=generate_nodes_json(brain_d, 'brain')
-nj4=generate_nodes_json(stress_d, 'stress')
-nj5=generate_nodes_json(psychiatric_d, 'psychiatric')
-nj6=''
-
-pubmed_path=os.environ["EDIRECT_PUBMED_MASTER"]
-
diff --git a/requirements.txt b/requirements.txt
index c2ba0ba..5c15516 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,33 +1,29 @@
-pandas==1.2.1
-bcrypt==3.1.7
-cffi==1.13.2
-pycparser==2.19
-Flask-SQLAlchemy==2.4.4
+# Core Data Stack (Stable for 3.12)
+numpy>=1.26.0
+pandas==2.2.0
+
+# Core Application Dependencies
Flask==1.1.2
+Flask-SQLAlchemy==2.4.4
+SQLAlchemy==1.3.23
+bcrypt==3.1.7
+python-dotenv
+pytz
+
+# Natural Language Processing
+nltk==3.5
+
+# Generative AI (Migrated to new SDK)
+google-genai
+
+# Utilities and Sub-dependencies
Click==7.0
itsdangerous==1.1.0
Jinja2==2.11.3
-MarkupSafe==1.0
-Werkzeug==1.0.0
-SQLAlchemy==1.3.23
-Keras==2.4.3
-h5py==2.10.0
-numpy==1.19.5
-six==1.15.0
-Keras-Preprocessing==1.1.2
-PyYAML==5.3.1
-scipy==1.6.0
-nltk==3.5
-regex==2020.11.13
-tensorflow==2.4.1
-absl-py==0.11.0
-astunparse==1.6.3
-gast==0.3.3
-grpcio==1.32.0
-protobuf==3.14.0
-tensorboard==2.4.1
-Markdown==3.3.3
+MarkupSafe==2.0.1
Werkzeug==1.0.1
+Markdown==3.3.3
+cffi==1.17.0
+pycparser==2.19
+six==1.17.0
wheel==0.36.2
-tensorflow-estimator==2.4.0
-python==3.8.5
diff --git a/server.py b/server.py
index 19d7486..d9b4ef3 100755
--- a/server.py
+++ b/server.py
@@ -34,6 +34,17 @@ from datetime import datetime
# Gemini API related imports
import google.generativeai as genai
+# Removed TensorFlow and Keras related imports
+# import tensorflow
+# import tensorflow.keras
+# from nltk.corpus import stopwords # Removed
+# from nltk.stem.porter import PorterStemmer # Removed
+# from tensorflow.keras import backend as K # Removed
+# from tensorflow.keras import metrics, optimizers # Removed
+# from tensorflow.keras.layers import * # Removed (Dense, Embedding, Flatten, Conv1D, MaxPooling1D)
+# from tensorflow.keras.models import Model, Sequential # Removed
+# from tensorflow.keras.preprocessing.sequence import pad_sequences # Removed
+# from tensorflow.keras.preprocessing.text import Tokenizer # Removed
import re
import ast
from more_functions import *
@@ -124,7 +135,7 @@ def classify_stress_with_gemini(sentence_text):
return "error_no_prompt_template"
try:
- model_gemini = genai.GenerativeModel('gemini-3-flash-preview')
+ model_gemini = genai.GenerativeModel('gemini-2.5-pro')
# Append the new sentence and the final instruction to the prompt template
# This is safer than .format() when the template contains its own curly braces.
@@ -155,7 +166,7 @@ def classify_stress_with_gemini(sentence_text):
return "error_no_api_key"
try:
- model_gemini = genai.GenerativeModel('gemini-3-flash-preview')
+ model_gemini = genai.GenerativeModel('gemini-2.5-pro')
prompt = f"""Classify the following sentence based on whether it describes 'systemic stress' or 'cellular stress'.
Please return ONLY the word 'systemic' if it describes systemic stress, or ONLY the word 'cellular' if it describes cellular stress. Do not add any other explanation or punctuation.
@@ -1585,7 +1596,7 @@ Here are the sentences to classify:
{sentences_to_classify_str}
"""
# Call the API
- model_gemini = genai.GenerativeModel('gemini-3-flash-preview')
+ model_gemini = genai.GenerativeModel('gemini-2.5-pro')
response = model_gemini.generate_content(batched_prompt)
# Step 3: Parse the JSON response
@@ -2039,4 +2050,4 @@ def top150genes():
if __name__ == '__main__':
# For production, consider using a more robust web server like Gunicorn or Waitress
- app.run(debug=True, host='0.0.0.0', port=4200) # Changed to 0.0.0.0 for accessibility if needed
+ app.run(debug=True, host='0.0.0.0', port=4200) # Changed to 0.0.0.0 for accessibility if needed
\ No newline at end of file
diff --git a/templates/genenames.html b/templates/genenames.html
index fe22d0b..d1e4960 100644
--- a/templates/genenames.html
+++ b/templates/genenames.html
@@ -18,11 +18,76 @@
{%endfor%}
- {%else%}
+ {# --- Added Section for Gemini Prompt --- #}
+ {% if prompt %}
+
Prompt generation failed or no sentences found.
+ {% endif %} + {# --- END OF MODIFIED SECTION --- #} + +{%else%} No synonym for {{gene}} is found. {%endif%}