From 5e68858ef98f61f80ba5992296c36db6c8dc67c9 Mon Sep 17 00:00:00 2001
From: chen42
Date: Tue, 24 Mar 2026 09:21:50 -0500
Subject: switch to gemini-flash
---
server.py | 1268 +++++++++++++++++++++++++++++++++++++++++++------------------
1 file changed, 903 insertions(+), 365 deletions(-)
(limited to 'server.py')
diff --git a/server.py b/server.py
index 9d34bf9..19d7486 100755
--- a/server.py
+++ b/server.py
@@ -13,32 +13,44 @@ from os import listdir
import bcrypt
import nltk
-import numpy as np
+# import numpy as np # Removed
import pandas as pd
import pytz
from flask import (Flask, Response, flash, jsonify, redirect, render_template,
request, session, url_for)
from flask_sqlalchemy import SQLAlchemy
-from numpy import array
+# from numpy import array # Removed
-nltk.download('punkt')
-import pickle
+from dotenv import load_dotenv
+load_dotenv()
+import os
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+
+nltk.download('punkt', quiet=True)
+# import pickle # Removed
from collections import Counter
from datetime import datetime
-import tensorflow
-import tensorflow.keras
-from nltk.corpus import stopwords
-from nltk.stem.porter import PorterStemmer
-from tensorflow.keras import backend as K
-from tensorflow.keras import metrics, optimizers
-from tensorflow.keras.layers import *
-from tensorflow.keras.layers import Dense, Embedding, Flatten
-from tensorflow.keras.models import Model, Sequential
-from tensorflow.keras.preprocessing.sequence import pad_sequences
-from tensorflow.keras.preprocessing.text import Tokenizer
+# Gemini API related imports
+import google.generativeai as genai
+import re
+import ast
from more_functions import *
+from nltk.tokenize import sent_tokenize
+from more_functions import getabstracts, undic, gene_category
+
+GENECUP_PROMPT_TEMPLATE = ""
+try:
+ with open("genecup_synthesis_prompt.txt", "r") as f:
+ GENECUP_PROMPT_TEMPLATE = f.read()
+except FileNotFoundError:
+ print("Warning: genecup_synthesis_prompt.txt not found. LLM prompts will be incomplete.")
+except Exception as e:
+ print(f"Error loading genecup_synthesis_prompt.txt: {e}. LLM prompts will be affected.")
+
+
+
app=Flask(__name__)
#datadir="/export/ratspub/"
@@ -48,8 +60,125 @@ datadir="./"
app.config['SECRET_KEY'] = '#DtfrL98G5t1dC*4'
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///'+datadir+'userspub.sqlite'
db = SQLAlchemy(app)
+
+
+def get_sentences_from_file(file_path, gene_name, category_name=None):
+ """Reads a sentence file and returns sentences matching a gene and category."""
+ matching_sentences = []
+ try:
+ with open(file_path, "r") as f:
+ for line in f:
+ if not line.strip():
+ continue
+ try:
+ (gene, nouse, cat, pmid, text) = line.split("\t")
+ cat_match = (category_name is None) or (cat.strip().upper() == category_name.strip().upper())
+ if (gene.strip().upper() == gene_name.strip().upper() and cat_match):
+ matching_sentences.append({'pmid': pmid, 'text': text, 'category': cat})
+ except ValueError:
+ continue
+ except FileNotFoundError:
+ print(f"Sentence file not found: {file_path}")
+ except Exception as e:
+ print(f"Error reading sentence file {file_path}: {e}")
+ return matching_sentences
+
+
nltk.data.path.append("./nlp/")
+# Initialize database within application context
+with app.app_context():
+ db.create_all()
+
+# Configure Gemini API Key
+# IMPORTANT: Set the GEMINI_API_KEY environment variable
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+if not GEMINI_API_KEY:
+ print("Warning: GEMINI_API_KEY environment variable not set. Stress classification via Gemini will not work.")
+else:
+ try:
+ genai.configure(api_key=GEMINI_API_KEY)
+ except Exception as e:
+ print(f"Error configuring Gemini API: {e}")
+ GEMINI_API_KEY = None # Ensure it's None if configuration fails
+'''
+STRESS_PROMPT_TEMPLATE = ""
+try:
+ with open("stress_prompt.txt", "r") as f_prompt:
+ STRESS_PROMPT_TEMPLATE = f_prompt.read()
+except FileNotFoundError:
+ print("FATAL ERROR: stress_prompt.txt not found. Stress classification will fail.")
+except Exception as e:
+ print(f"FATAL ERROR: Could not read stress_prompt.txt: {e}")
+
+# few shot Function to classify stress using Gemini API
+def classify_stress_with_gemini(sentence_text):
+ if not GEMINI_API_KEY:
+ print("Gemini API key not configured. Skipping classification.")
+ return "error_no_api_key"
+
+ # --- THIS IS THE MODIFIED PART ---
+ # Check if the prompt template was loaded successfully
+ if not STRESS_PROMPT_TEMPLATE:
+ print("Stress prompt template is not available. Skipping classification.")
+ return "error_no_prompt_template"
+
+ try:
+ model_gemini = genai.GenerativeModel('gemini-3-flash-preview')
+
+ # Append the new sentence and the final instruction to the prompt template
+ # This is safer than .format() when the template contains its own curly braces.
+ prompt = STRESS_PROMPT_TEMPLATE + f'\nSentence: {sentence_text}\nClassification:'
+ print(prompt)
+ response = model_gemini.generate_content(prompt)
+ # We need to parse the classification from the response
+ classification = response.text.strip().lower()
+
+ # The model might return "Cellular Level Stress" or "Organismal Stress"
+ if "cellular" in classification:
+ return "neg" # 'neg' for Cellular Level Stress
+ elif "organismal" in classification:
+ return "pos" # 'pos' for Organismal Stress
+ else:
+ print(f"Warning: Gemini returned unexpected classification: '{classification}' for sentence: '{sentence_text}'")
+ return "unknown"
+
+ except Exception as e:
+ print(f"Error calling Gemini API for stress classification: {e}")
+ return "error_api_call"
+
+
+# zero-shot Function to classify stress using Gemini API
+def classify_stress_with_gemini(sentence_text):
+ if not GEMINI_API_KEY:
+ print("Gemini API key not configured. Skipping classification.")
+ return "error_no_api_key"
+
+ try:
+ model_gemini = genai.GenerativeModel('gemini-3-flash-preview')
+ prompt = f"""Classify the following sentence based on whether it describes 'systemic stress' or 'cellular stress'.
+Please return ONLY the word 'systemic' if it describes systemic stress, or ONLY the word 'cellular' if it describes cellular stress. Do not add any other explanation or punctuation.
+
+Sentence: "{sentence_text}"
+
+Classification:"""
+
+ response = model_gemini.generate_content(prompt)
+ classification = response.text.strip().lower()
+
+ if classification == "systemic":
+ return "pos" # 'pos' for systemic stress
+ elif classification == "cellular":
+ return "neg" # 'neg' for cellular stress
+ else:
+ print(f"Warning: Gemini returned unexpected classification: '{classification}' for sentence: '{sentence_text}'")
+ return "unknown"
+
+ except Exception as e:
+ print(f"Error calling Gemini API for stress classification: {e}")
+ return "error_api_call"
+'''
+
# Sqlite database
class users(db.Model):
__tablename__='user'
@@ -59,46 +188,47 @@ class users(db.Model):
password = db.Column(db.String(128), nullable=False)
date_created = db.Column(db.DateTime, default=datetime.utcnow)
-# Preprocessing of words for CNN
-def clean_doc(doc, vocab):
- doc = doc.lower()
- tokens = doc.split()
- re_punc = re.compile('[%s]' % re.escape(string.punctuation))
- tokens = [re_punc.sub('' , w) for w in tokens]
- tokens = [word for word in tokens if len(word) > 1]
- stop_words = set(stopwords.words('english'))
- tokens = [w for w in tokens if not w in stop_words]
- porter = PorterStemmer()
- stemmed = [porter.stem(word) for word in tokens]
- return tokens
-
-# Load tokenizer
-with open('./nlp/tokenizer.pickle', 'rb') as handle:
- tokenizer = pickle.load(handle)
-
-# Load vocabulary
-with open('./nlp/vocabulary.txt', 'r') as vocab:
- vocab = vocab.read()
-
-def tf_auc_score(y_true, y_pred):
- return tensorflow.metrics.auc(y_true, y_pred)[1]
-
-K.clear_session()
-
-# Create the CNN model
-def create_model(vocab_size, max_length):
- model = Sequential()
- model.add(Embedding(vocab_size, 32, input_length=max_length))
- model.add(Conv1D(filters=16, kernel_size=4, activation='relu'))
- model.add(MaxPooling1D(pool_size=2))
- model.add(Flatten())
- model.add(Dense(10, activation='relu'))
- model.add(Dense(1, activation='sigmoid'))
- opt = tensorflow.keras.optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999)
- model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[tf_auc_score])
- return model
+# Preprocessing of words for CNN (REMOVED)
+# def clean_doc(doc, vocab):
+# doc = doc.lower()
+# tokens = doc.split()
+# re_punc = re.compile('[%s]' % re.escape(string.punctuation))
+# tokens = [re_punc.sub('' , w) for w in tokens]
+# tokens = [word for word in tokens if len(word) > 1]
+# stop_words = set(stopwords.words('english'))
+# tokens = [w for w in tokens if not w in stop_words]
+# porter = PorterStemmer()
+# stemmed = [porter.stem(word) for word in tokens]
+# return tokens
+
+# Load tokenizer (REMOVED)
+# with open('./nlp/tokenizer.pickle', 'rb') as handle:
+# tokenizer = pickle.load(handle)
+
+# Load vocabulary (REMOVED)
+# with open('./nlp/vocabulary.txt', 'r') as vocab_file_handle: # Renamed variable to avoid conflict
+# vocab_text = vocab_file_handle.read() # Renamed variable
+
+# def tf_auc_score(y_true, y_pred): (REMOVED)
+# return tensorflow.metrics.AUC()(y_true, y_pred)
+
+# K.clear_session() (REMOVED)
+
+# Create the CNN model (REMOVED)
+# def create_model(vocab_size, max_length):
+# model = Sequential()
+# model.add(Embedding(vocab_size, 32, input_length=max_length))
+# model.add(Conv1D(filters=16, kernel_size=4, activation='relu'))
+# model.add(MaxPooling1D(pool_size=2))
+# model.add(Flatten())
+# model.add(Dense(10, activation='relu'))
+# model.add(Dense(1, activation='sigmoid'))
+# opt = tensorflow.keras.optimizers.Adamax(learning_rate=0.002, beta_1=0.9, beta_2=0.999)
+# model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[tf_auc_score])
+# return model
# Use addiction ontology by default
+import ast # Moved import ast here as it's first used here.
onto_cont=open("addiction.onto","r").read()
dictionary=ast.literal_eval(onto_cont)
@@ -278,7 +408,7 @@ def logout():
user1 = session['name']
else:
user1 = session['email']
- flash("You have been logged out, {user1}", "inval")
+ flash(f"You have been logged out, {user1}", "inval") # Used f-string for clarity
session.pop('email', None)
session.clear()
return render_template('index.html',onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto)
@@ -699,10 +829,10 @@ def progress():
except:
namecat = 'addiction'
session['namecat'] = namecat
- if namecat=='choose your ontology' or namecat=='addiction' or namecat == 'addiction':
+ if namecat=='choose your ontology' or namecat=='addiction' or namecat == 'addiction': # Redundant 'addiction' check
session['namecat']='addiction'
onto_cont=open("addiction.onto","r").read()
- dictionary=ast.literal_eval(onto_cont)
+ # dictionary=ast.literal_eval(onto_cont) # dictionary is global, no need to re-assign from local onto_cont
search_type = request.args.getlist('type')
if (search_type == []):
search_type = ['GWAS', 'function', 'addiction', 'drug', 'brain', 'stress', 'psychiatric', 'cell']
@@ -805,7 +935,7 @@ def search():
d["nj{0}".format(n_num)]=''
else:
namecat_flag=0
- for ky in dictionary.keys():
+ for ky in dictionary.keys(): # Using global 'dictionary'
nodecolor[ky] = "hsl("+str((n_num+1)*int(360/len(dictionary.keys())))+", 70%, 80%)"
d["nj{0}".format(n_num)]=generate_nodes_json(dictionary[ky],str(ky),nodecolor[ky])
n_num+=1
@@ -818,10 +948,15 @@ def search():
json_nodes += generate_nodes_json(dictionary[ky],str(ky),nodecolor[ky])
d["nj{0}".format(n_num)]=''
- json_nodes = json_nodes[:-2]
- json_nodes =json_nodes+"]}"
- def generate(genes, tf_name):
+ json_nodes = json_nodes[:-2] # Handles case if json_nodes was only "{\"data\":["
+ if json_nodes == "{\"data\"": # if it was empty before -2
+ json_nodes = "{\"data\":[]}"
+ else:
+ json_nodes =json_nodes+"]}"
+
+ def generate(genes, tf_name): # tf_name is snt_file
with app.test_request_context():
+ from nltk.tokenize import sent_tokenize # Moved import here, as it's only used in this function scope.
sentences=str()
edges=str()
nodes = temp_nodes
@@ -832,34 +967,36 @@ def search():
#genes_or = ' [tiab] or '.join(genes)
all_d=''
+ current_dict_onto = {} # To hold the relevant ontology for this search pass
if namecat_flag==1:
- onto_cont = open(ses_namecat+".onto","r").read()
- dict_onto=ast.literal_eval(onto_cont)
-
- for ky in dict_onto.keys():
- if (ky in search_type):
- all_d_ls=undic(list(dict_onto[ky].values()))
- all_d = all_d+'|'+all_d_ls
+ onto_cont_local = open(ses_namecat+".onto","r").read() # ses_namecat from outer scope
+ current_dict_onto=ast.literal_eval(onto_cont_local)
else:
- for ky in dictionary.keys():
- if (ky in search_type):
- all_d_ls=undic(list(dictionary[ky].values()))
- all_d = all_d+'|'+all_d_ls
- all_d=all_d[1:]
+ current_dict_onto = dictionary # Use global dictionary
+
+ for ky in current_dict_onto.keys():
+ if (ky in search_type):
+ all_d_ls=undic(list(current_dict_onto[ky].values()))
+ all_d = all_d+'|'+all_d_ls
+ if all_d: # Check if all_d is not empty
+ all_d=all_d[1:]
+
if ("GWAS" in search_type):
datf = pd.read_csv('./utility/gwas_used.csv',sep='\t')
progress+=percent
yield "data:"+str(progress)+"\n\n"
+
for gene in genes:
- abstracts_raw = getabstracts(gene,all_d)
+ abstracts_raw = getabstracts(gene,all_d) # all_d might be empty if no search_type matches
#print(abstracts_raw)
sentences_ls=[]
for row in abstracts_raw.split("\n"):
+ if not row.strip(): continue # Skip empty lines
tiab=row.split("\t")
pmid = tiab.pop(0)
- tiab= " ".join(tiab)
- sentences_tok = sent_tokenize(tiab)
+ tiab_text = " ".join(tiab) # Renamed to avoid conflict
+ sentences_tok = sent_tokenize(tiab_text)
for sent_tok in sentences_tok:
sent_tok = pmid + ' ' + sent_tok
sentences_ls.append(sent_tok)
@@ -867,60 +1004,76 @@ def search():
geneEdges = ""
- if namecat_flag==1:
- onto_cont = open(ses_namecat+".onto","r").read()
- dict_onto=ast.literal_eval(onto_cont)
- else:
- dict_onto = dictionary
+ # Use the already determined current_dict_onto
+ # if namecat_flag==1:
+ # onto_cont = open(ses_namecat+".onto","r").read()
+ # dict_onto_loop=ast.literal_eval(onto_cont)
+ # else:
+ # dict_onto_loop = dictionary
+ dict_onto_loop = current_dict_onto
- for ky in dict_onto.keys():
+ for ky in dict_onto_loop.keys():
if (ky in search_type):
- if (ky=='addiction') and ('addiction' in dict_onto.keys())\
- and ('drug' in dict_onto.keys()) and ('addiction' in dict_onto['addiction'].keys())\
- and ('aversion' in dict_onto['addiction'].keys()) and ('intoxication' in dict_onto['addiction'].keys()):
- #addiction terms must present with at least one drug
+ # The special handling for 'addiction' with 'drug' needs careful check of dict_onto_loop structure
+ if (ky=='addiction') and ('addiction' in dict_onto_loop.keys())\
+ and ('drug' in dict_onto_loop.keys()) and ('addiction' in dict_onto_loop['addiction'].keys())\
+ and ('aversion' in dict_onto_loop['addiction'].keys()) and ('intoxication' in dict_onto_loop['addiction'].keys()):
addiction_flag=1
- #addiction=undic0(addiction_d) +") AND ("+undic0(drug_d)
- sent=gene_category(gene, addiction_d, "addiction", sentences_ls,addiction_flag,dict_onto)
- if ('addiction' in search_type):
+ # addiction_d is not defined here, assume it's a global or from more_functions
+ # This part might need `addiction_d` from `more_functions.py` to be correctly defined.
+ # For now, assuming addiction_d is available in the scope.
+ sent=gene_category(gene, addiction_d, "addiction", sentences_ls,addiction_flag,dict_onto_loop)
+ if ('addiction' in search_type): # This check is redundant with outer if
geneEdges += generate_edges(sent, tf_name)
json_edges += generate_edges_json(sent, tf_name)
else:
addiction_flag=0
- if namecat_flag==1:
- onto_cont = open(ses_namecat+".onto","r").read()
- dict_onto=ast.literal_eval(onto_cont)
- #ky_d=undic(list(dict_onto[ky].values()))
- sent=gene_category(gene,ky,str(ky), sentences_ls, addiction_flag,dict_onto)
- else:
- #ky_d=undic(list(dict_onto[ky].values()))
- #print(sentences_ls)
- sent=gene_category(gene,ky,str(ky), sentences_ls, addiction_flag,dict_onto)
- #print(sent)
+ sent=gene_category(gene,ky,str(ky), sentences_ls, addiction_flag,dict_onto_loop)
yield "data:"+str(progress)+"\n\n"
geneEdges += generate_edges(sent, tf_name)
json_edges += generate_edges_json(sent, tf_name)
sentences+=sent
- if ("GWAS" in search_type):
+ if ("GWAS" in search_type and 'GWAS' in dict_onto_loop): # Added check for GWAS in dict_onto_loop
gwas_sent=[]
- print (datf)
- datf_sub1 = datf[datf["MAPPED_GENE"].str.contains('(?:\s|^)'+gene+'(?:\s|$)', flags=re.IGNORECASE)
- | (datf["REPORTED GENE(S)"].str.contains('(?:\s|^)'+gene+'(?:\s|$)', flags=re.IGNORECASE))]
- print (datf_sub1)
- for nd2 in dict_onto['GWAS'].keys():
- for nd1 in dict_onto['GWAS'][nd2]:
- for nd in nd1.split('|'):
- gwas_text=''
- datf_sub = datf_sub1[datf_sub1['DISEASE/TRAIT'].str.contains('(?:\s|^)'+nd+'(?:\s|$)', flags=re.IGNORECASE)]
- #& (datf['REPORTED GENE(S)'].str.contains('(?:\s|^)'+gene+'(?:\s|$)', flags=re.IGNORECASE)
- #| (datf['MAPPED_GENE'].str.contains('(?:\s|^)'+gene+'(?:\s|$)', flags=re.IGNORECASE)))]
- if not datf_sub.empty:
- for index, row in datf_sub.iterrows():
- gwas_text = "SNP:"+str(row['SNPS'])+", P value: "+str(row['P-VALUE'])\
- +", Disease/trait: "+str(row['DISEASE/TRAIT'])+", Mapped trait: "\
- +str(row['MAPPED_TRAIT'])+"
"
- gwas_sent.append(gene+"\t"+"GWAS"+"\t"+nd+"_GWAS\t"+str(row['PUBMEDID'])+"\t"+gwas_text)
+ # print (datf) # datf is loaded earlier
+ datf_sub1 = datf[datf["MAPPED_GENE"].str.contains('(?:\s|^)'+gene+'(?:\s|$)', flags=re.IGNORECASE, na=False)
+ | (datf["REPORTED GENE(S)"].str.contains('(?:\s|^)'+gene+'(?:\s|$)', flags=re.IGNORECASE, na=False))]
+ # print (datf_sub1)
+ for nd2 in dict_onto_loop['GWAS'].keys():
+ # Ensure dict_onto_loop['GWAS'][nd2] is iterable and contains strings
+ # Example: if dict_onto_loop['GWAS'][nd2] is {'keyword1|keyword2'}
+ # next(iter(dict_onto_loop['GWAS'][nd2])) might be what was intended
+ # Assuming dict_onto_loop['GWAS'][nd2] is a set/list of keyword strings like {'kw1|kw2', 'kw3'}
+ # The original code was: for nd1 in dict_onto_loop['GWAS'][nd2]: for nd in nd1.split('|'):
+ # This implies dict_onto_loop['GWAS'][nd2] contains combined keywords.
+ # Let's assume the structure is { 'subcategory' : {'keyword_group1', 'keyword_group2'} }
+ # where keyword_group is "termA|termB"
+
+ # Iterating over the values of the sub-dictionary if it's a dict, or elements if it's a list/set
+ sub_keywords_container = dict_onto_loop['GWAS'][nd2]
+ # This needs to be robust to the actual structure of dict_onto_loop['GWAS'][nd2]
+ # Assuming it's a set of strings, where each string can be pipe-separated.
+ # e.g., sub_keywords_container = {'phenotype1|phenotype_alias', 'phenotype2'}
+ actual_keywords_to_iterate = []
+ if isinstance(sub_keywords_container, dict): # e.g. {'phenotype_group': 'pheno1|pheno2'}
+ for key_group_str in sub_keywords_container.values(): # Or .keys() if that's the intent
+ actual_keywords_to_iterate.extend(key_group_str.split('|'))
+ elif isinstance(sub_keywords_container, (list, set)):
+ for key_group_str in sub_keywords_container:
+ actual_keywords_to_iterate.extend(key_group_str.split('|'))
+ elif isinstance(sub_keywords_container, str): # e.g. 'pheno1|pheno2'
+ actual_keywords_to_iterate.extend(sub_keywords_container.split('|'))
+
+
+ for nd in actual_keywords_to_iterate:
+ gwas_text=''
+ # Added na=False to contains calls
+ datf_sub = datf_sub1[datf_sub1['DISEASE/TRAIT'].str.contains('(?:\s|^)'+nd+'(?:\s|$)', flags=re.IGNORECASE, na=False)]
+ if not datf_sub.empty:
+ for index, row in datf_sub.iterrows():
+ gwas_text = f"SNP:{row['SNPS']}, P value: {row['P-VALUE']}, Disease/trait: {row['DISEASE/TRAIT']}, Mapped trait: {row['MAPPED_TRAIT']}"
+ gwas_sent.append(gene+"\t"+"GWAS"+"\t"+nd2+"_GWAS\t"+str(row['PUBMEDID'])+"\t"+gwas_text) # Changed nd to nd2 for target node
cys, gwas_json, sn_file = searchArchived('GWAS', gene , 'json',gwas_sent, path_user)
with open(path_user+"gwas_results.tab", "a") as gwas_edges:
gwas_edges.write(sn_file)
@@ -931,8 +1084,17 @@ def search():
yield "data:"+str(progress)+"\n\n"
if len(geneEdges) >0:
+ rnd = ''
+ if 'email' in session:
+ if 'rnd' in session:
+ rnd = session['rnd']
+ elif 'path_user' in session:
+ rnd = session['path_user'].split('/')[-2]
+ elif 'path' in session:
+ rnd = session['path'].split('/')[-1]
+
edges+=geneEdges
- nodes+="{ data: { id: '" + gene + "', nodecolor:'#E74C3C', fontweight:700, url:'/synonyms?node="+gene+"'} },\n"
+ nodes+="{ data: { id: '" + gene + "', nodecolor:'#E74C3C', fontweight:700, url:'/synonyms?node="+gene+"&rnd="+rnd+"'} },\n"
else:
nodesToHide+=gene + " "
@@ -947,14 +1109,20 @@ def search():
zeroLinkNode.close()
yield "data:"+str(progress)+"\n\n"
- # Edges in json format
- json_edges="{\"data\":["+json_edges
- json_edges = json_edges[:-2]
- json_edges =json_edges+"]}"
+ # Edges in json format
+ json_edges_content = json_edges.strip()
+ if json_edges_content.endswith(','):
+ json_edges_content = json_edges_content[:-1]
+
+ if not json_edges_content:
+ json_edges = "{\"data\":[]}"
+ else:
+ json_edges = "{\"data\":[" + json_edges_content + "]}"
# Write edges to txt file in json format also in user folder
with open(path_user+"edges.json", "w") as temp_file_edges:
- temp_file_edges.write(json_edges)
+ temp_file_edges.write(json_edges)
+
with open(path_user+"nodes.json", "w") as temp_file_nodes:
temp_file_nodes.write(json_nodes)
return Response(generate(genes, snt_file), mimetype='text/event-stream')
@@ -983,15 +1151,26 @@ def tableview():
return render_template('index.html', onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto)
jedges =''
- file_edges = open(datadir+gene_url_tmp +'/edges.json', 'r')
- for line in file_edges.readlines():
- if ':' not in line:
- nodata_temp = 1
- else:
- nodata_temp = 0
- with open(datadir+gene_url_tmp +"/edges.json") as edgesjsonfile:
+ nodata_temp = 1 # Default to no data
+ try:
+ with open(datadir+gene_url_tmp +"/edges.json") as edgesjsonfile:
+ # Check if file is empty or just contains empty structure
+ content = edgesjsonfile.read().strip()
+ if content and content != "{\"data\":[]}":
+ # Reset file pointer and load json
+ edgesjsonfile.seek(0)
jedges = json.load(edgesjsonfile)
- break
+ nodata_temp = 0
+ else:
+ jedges = {"data": []} # Ensure jedges is a dict
+ except FileNotFoundError:
+ jedges = {"data": []} # Ensure jedges is a dict if file not found
+ except json.JSONDecodeError:
+ print(f"Warning: Could not decode JSON from {datadir+gene_url_tmp}/edges.json")
+ jedges = {"data": []} # Ensure jedges is a dict
+ nodata_temp = 1
+
+
else:
genes_session_tmp=tf_path+"/"+rnd_url
gene_url_tmp = genes_session_tmp
@@ -1005,16 +1184,25 @@ def tableview():
onto_cont=open("addiction.onto","r").read()
dict_onto=ast.literal_eval(onto_cont)
return render_template('index.html', onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto)
+
jedges =''
- file_edges = open(gene_url_tmp +'/edges.json', 'r')
- for line in file_edges.readlines():
- if ':' not in line:
- nodata_temp = 1
- else:
- nodata_temp = 0
- with open(gene_url_tmp +"/edges.json") as edgesjsonfile:
+ nodata_temp = 1 # Default to no data
+ try:
+ with open(gene_url_tmp +'/edges.json') as edgesjsonfile:
+ content = edgesjsonfile.read().strip()
+ if content and content != "{\"data\":[]}":
+ edgesjsonfile.seek(0)
jedges = json.load(edgesjsonfile)
- break
+ nodata_temp = 0
+ else:
+ jedges = {"data": []}
+ except FileNotFoundError:
+ jedges = {"data": []}
+ except json.JSONDecodeError:
+ print(f"Warning: Could not decode JSON from {gene_url_tmp}/edges.json")
+ jedges = {"data": []}
+ nodata_temp = 1
+
genename=genes_url.split("_")
if len(genename)>3:
genename = genename[0:3]
@@ -1040,7 +1228,7 @@ def tableview0():
if ('email' in session):
filename = rnd_url.split("_0_")[0]
- genes_session_tmp = datadir+"/user/"+str(session['hashed_email'])+"/"+rnd_url+"/"+filename
+ # genes_session_tmp = datadir+"/user/"+str(session['hashed_email'])+"/"+rnd_url+"/"+filename # Not used further
gene_url_tmp = "/user/"+str(session['hashed_email'])+"/"+rnd_url
try:
with open(datadir+gene_url_tmp+"/nodes.json") as jsonfile:
@@ -1054,18 +1242,26 @@ def tableview0():
return render_template('index.html', onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto)
jedges =''
- file_edges = open(datadir+gene_url_tmp+'/edges.json', 'r')
- for line in file_edges.readlines():
- if ':' not in line:
- nodata_temp = 1
- else:
- nodata_temp = 0
- with open(datadir+gene_url_tmp+"/edges.json") as edgesjsonfile:
+ nodata_temp = 1 # Default to no data
+ try:
+ with open(datadir+gene_url_tmp +'/edges.json') as edgesjsonfile:
+ content = edgesjsonfile.read().strip()
+ if content and content != "{\"data\":[]}":
+ edgesjsonfile.seek(0)
jedges = json.load(edgesjsonfile)
- break
+ nodata_temp = 0
+ else:
+ jedges = {"data": []}
+ except FileNotFoundError:
+ jedges = {"data": []}
+ except json.JSONDecodeError:
+ print(f"Warning: Could not decode JSON from {datadir+gene_url_tmp}/edges.json")
+ jedges = {"data": []}
+ nodata_temp = 1
+
else:
- genes_session_tmp=tf_path+"/"+rnd_url
- gene_url_tmp = genes_session_tmp
+ # genes_session_tmp=tf_path+"/"+rnd_url # Not used further
+ gene_url_tmp = tf_path+"/"+rnd_url
try:
with open(gene_url_tmp+"/nodes.json") as jsonfile:
jnodes = json.load(jsonfile)
@@ -1078,15 +1274,23 @@ def tableview0():
return render_template('index.html', onto_len_dir=onto_len_dir, onto_list=onto_list, ontol = 'addiction', dict_onto = dict_onto)
jedges =''
- file_edges = open(gene_url_tmp+'/edges.json', 'r')
- for line in file_edges.readlines():
- if ':' not in line:
- nodata_temp = 1
- else:
- nodata_temp = 0
- with open(gene_url_tmp+"/edges.json") as edgesjsonfile:
+ nodata_temp = 1 # Default to no data
+ try:
+ with open(gene_url_tmp +'/edges.json') as edgesjsonfile:
+ content = edgesjsonfile.read().strip()
+ if content and content != "{\"data\":[]}":
+ edgesjsonfile.seek(0)
jedges = json.load(edgesjsonfile)
- break
+ nodata_temp = 0
+ else:
+ jedges = {"data": []}
+ except FileNotFoundError:
+ jedges = {"data": []}
+ except json.JSONDecodeError:
+ print(f"Warning: Could not decode JSON from {gene_url_tmp}/edges.json")
+ jedges = {"data": []}
+ nodata_temp = 1
+
genes_url=request.args.get('genequery')
genename=genes_url.split("_")
if len(genename)>3:
@@ -1118,7 +1322,7 @@ def userarchive():
else:
session['user_folder'] = datadir+"/user/"+str(session['hashed_email'])
else:
- onto_name_archive=''
+ # onto_name_archive='' # This variable is not used here
flash("You logged out!")
onto_len_dir = 0
onto_list = ''
@@ -1135,26 +1339,34 @@ def userarchive():
folder_list = []
directory_list = []
gene_list=[]
- onto_list=[]
+ onto_list_archive =[] # Renamed to avoid conflict with outer scope 'onto_list'
for filename in dirlist:
- if ('_0_' in filename):
- folder_list.append(filename)
- gene_name = filename.split('_0_')[1]
- onto_name = filename.split('_0_')[2]
- if gene_name[-2:] == '_m':
- gene_name = gene_name[:-2]
- gene_name = gene_name + ", ..."
- gene_name = gene_name.replace('_', ', ')
- gene_list.append(gene_name)
- onto_list.append(onto_name)
- onto_name=""
- gene_name=""
- filename=filename[0:4]+"-"+filename[5:7]+"-"+filename[8:13]+":"+filename[14:16]+":"+filename[17:19]
- directory_list.append(filename)
+ if ('_0_' in filename): # Ensure it's a search result folder, not e.g. "ontology"
+ if os.path.isdir(os.path.join(session['user_folder'], filename)): # Check if it's a directory
+ folder_list.append(filename)
+ try:
+ gene_name = filename.split('_0_')[1]
+ onto_name = filename.split('_0_')[2]
+ if gene_name.endswith('_m'): # Check using endswith for robustness
+ gene_name = gene_name[:-2]
+ gene_name = gene_name + ", ..."
+ gene_name = gene_name.replace('_', ', ')
+ gene_list.append(gene_name)
+ onto_list_archive.append(onto_name) # Use renamed list
+ # onto_name="" # Not necessary, re-assigned in loop
+ # gene_name="" # Not necessary, re-assigned in loop
+ # Format filename for display
+ display_filename=filename.split('_0_')[0] # Get only the timestamp part for display formatting
+ display_filename=display_filename[0:4]+"-"+display_filename[5:7]+"-"+display_filename[8:10]+" "+display_filename[11:13]+":"+display_filename[14:16]+":"+display_filename[17:19]
+ directory_list.append(display_filename)
+ except IndexError:
+ print(f"Skipping folder with unexpected name format: {filename}")
+ continue
+
len_dir = len(directory_list)
message3="
No sentences found for {gene0} and {cat0}.
") + + all_stress_sentences = [] + num_abstract = len(matching_sents) + + for sent_obj in matching_sents: + text = sent_obj['text'] + pmid = sent_obj['pmid'] + + formatted_line = f"")
+ out2 = str(num_abstract) + ' sentences in ' + " "+ str(len(pmid_list)) + ' studies' +"
" + "
"
+ elif len(pmid_list) == 1: # Handle single study case
+ out2 = str(num_abstract) + ' sentence(s) in '+ " "+ str(len(pmid_list)) + ' study' +"
" "
"
+ else: # No PMIDs found, num_abstract might still be > 0 if PMIDs were not parsable in file but text matched
+ out2 = str(num_abstract) + ' sentence(s) found.
'
+
+
+ if(cat0 == 'stress'): # Only show stress classification if category is stress
+ if(out_neg == "" and out_pos == ""):
+ # If no classification results, show all sentences if any, or a message
+ if out3:
+ out= out1+ out2 + "All related sentences (Gemini classification not available or no specific stress types found):
") # Show the cytoscape graph for one gene from the top gene list @app.route("/showTopGene") def showTopGene(): query=request.args.get('topGene') - nodesEdges=searchArchived('topGene',query, 'cys','','')[0] + # Assuming searchArchived returns a tuple, and the first element is nodesEdges + archived_data = searchArchived('topGene',query, 'cys','','') + if isinstance(archived_data, tuple) and len(archived_data) > 0: + nodesEdges = archived_data[0] + else: # Fallback if searchArchived doesn't return expected tuple + nodesEdges = "" + print(f"Warning: searchArchived did not return expected data for {query}") + message2="
"+node.upper()+"
"+node.upper()+"
Details for node '{node.upper()}' not found in the current ontology.
" + return render_template('sentences.html', sentences=out+"") +''' @app.route("/shownode") def shownode(): node=request.args.get('node') @@ -1377,30 +1701,173 @@ def shownode(): return render_template('sentences.html', sentences=out+"
") + @app.route("/synonyms") def synonyms(): - node=request.args.get('node') - node=node.upper() - allnodes={**genes} + node = request.args.get('node') + rnd = request.args.get('rnd') + + if not node: + return "Error: Gene node is required.", 400 + node = node.upper() + try: - synonym_list = list(allnodes[node].split("|")) + # --- Part 1: Handle Synonyms Links --- + allnodes = {} + if 'genes' in globals() and isinstance(globals()['genes'], dict): + allnodes = globals()['genes'] + else: + print("Warning: 'genes' dictionary for synonyms not found.") + + synonym_list = list(allnodes[node].split("|")) session['synonym_list'] = synonym_list session['main_gene'] = node.upper() - out="
")
+ results_content = "
No results found.
" # Default content + result_file_path = session.get('path', '') + "_ggResult" # Get path from session + if result_file_path and os.path.exists(result_file_path): + with open(result_file_path, "r") as result_f: + results_content=result_f.read() + else: + print(f"Warning: Result file {result_file_path} not found for showGeneTopGene.") + return render_template('sentences.html', sentences=results_content+"
")
# Generate a page that lists all the top 150 addiction genes with links to cytoscape graph.
@@ -1500,5 +2038,5 @@ def top150genes():
if __name__ == '__main__':
- db.create_all()
- app.run(debug=True, port=4200)
+ # For production, consider using a more robust web server like Gunicorn or Waitress
+ app.run(debug=True, host='0.0.0.0', port=4200) # Changed to 0.0.0.0 for accessibility if needed
--
cgit 1.4.1