From fc83a9cf9d752b288f66564ecb5af55019485d2a Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 5 Apr 2026 18:09:58 +0200 Subject: 1.9.1 release --- RELEASE_NOTES.md | 41 +++++++++++++++++++++++++++++++++++++++++ VERSION | 2 +- 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 RELEASE_NOTES.md diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md new file mode 100644 index 0000000..0d73f75 --- /dev/null +++ b/RELEASE_NOTES.md @@ -0,0 +1,41 @@ +# GeneCup Release Notes + +## Version 1.9.1 (2026-04-05) + +### UI/UX +- Added header/footer in GN color scheme with version info across all pages +- Added --port and --debug command line switches +- Show environment variables (EDIRECT_PUBMED_MASTER, GEMINI_API_KEY, NLTK_DATA, GENECUP_DATADIR) on startup +- Added intermediate "Calling Gemini API..." loading page that auto-refreshes when classification completes + +### Gemini API integration +- Replaced TensorFlow stress classifier with Google Gemini API (gemini-2.5-pro for few-shot, gemini-3-flash-preview for batch) +- API key read from ~/.config/gemini/credentials (with 0400 permission check) +- Batch classification: all stress sentences classified in one API call with JSON response +- In-memory cache for Gemini results (keyed by SHA-256 of sentence batch) +- Retry logic (3 attempts with 2s/4s backoff) +- Gemini prompts and responses logged to console + +### PubMed / edirect +- Packaged edirect 25.x for Guix (Go programs compiled from source, XML bounds-check patch) +- Replaced missing fetch-pubmed with xfetch -db pubmed (local archive lookup) +- Hybrid abstract fetching: tries local xfetch first, falls back to NCBI efetch for PMIDs missing from the local archive +- In-memory cache for esearch PMID results (keyed by SHA-256 of query string) +- EDIRECT_LOCAL_ARCHIVE env var configures local PubMed archive path + +### Packaging (guix.scm) +- Added edirect-25, nltk-punkt, minipubmed, python-google-genai packages +- genecup-gemini package with genecup wrapper script, JavaScript assets, NLTK data +- GENECUP_DATADIR for sqlite DB location + +### Testing +- Added Python unittest framework (tests/) +- test_hello.py: offline smoke test (runs in guix build) +- test_network_esearch.py: NCBI esearch for Penk+stress PMIDs +- test_local_xfetch.py: local xsearch+xfetch against PubMed archive +- test_network_hybrid.py: validates hybrid fetch matches NCBI; tests esearch cache + +### Cleanup +- Moved dead code to old/server.py +- Removed unused TensorFlow/Keras dependencies +- Removed stress_prompt.txt dependency (batch classifier builds its own prompt) diff --git a/VERSION b/VERSION index c064b1b..9ab8337 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.9.1-pre +1.9.1 -- cgit 1.4.1 From 5f4cef3640f84092e5692e16865002a832b7838c Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 6 Apr 2026 09:51:26 +0200 Subject: Added a test that creates an ontology --- guix.scm | 6 ++-- more_functions.py | 50 ++++++++++++++++++++++++++++-- tests/test_network_gemini_ontology.py | 58 +++++++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+), 5 deletions(-) create mode 100644 tests/test_network_gemini_ontology.py diff --git a/guix.scm b/guix.scm index 77551cd..5c8519e 100644 --- a/guix.scm +++ b/guix.scm @@ -7,12 +7,14 @@ ;; ;; Development shell: ;; -;; guix shell -L . -C -N -F edirect-25 genecup-gemini coreutils -- genecup --port 4201 +;; guix shell -L . -C -N -F --expose=$HOME/.config/gemini --share=/export3/PubMed edirect-25 genecup-gemini coreutils -- genecup --port 4201 ;; ;; In a shell you can run ;; -;; python3 -m unittest tests.test_network_esearch +;; guix shell -C -N -F -L . --expose=$HOME/.config/gemini --share=/export3/PubMed edirect-25 genecup-gemini +;; env EDIRECT_LOCAL_ARCHIVE=/export3/PubMed/Source python3 -m unittest tests.test_network_esearch ;; env EDIRECT_LOCAL_ARCHIVE=/export3/PubMed/Source python3 -m unittest tests.test_local_xfetch -v +;; env EDIRECT_LOCAL_ARCHIVE=/export3/PubMed/Source python3 -m unittest tests.test_network_gemini_ontology ;; ;; Note: API key is read from ~/.config/gemini/credentials ;; diff --git a/more_functions.py b/more_functions.py index a115899..35e3646 100755 --- a/more_functions.py +++ b/more_functions.py @@ -3,6 +3,7 @@ from nltk.tokenize import sent_tokenize import hashlib import os import re +import time from addiction_keywords import * from gene_synonyms import * @@ -10,8 +11,51 @@ import ast global pubmed_path -# In-memory cache for esearch results: hash(query) -> list of PMIDs -_esearch_cache = {} +# In-memory caches +_esearch_cache = {} # hash(query) -> list of PMIDs +_gemini_query_cache = {} # hash(prompt) -> response text + +def gemini_query(prompt, model='gemini-2.5-flash'): + """Send a prompt to the Gemini API with caching and retry. + + Returns the response text, or raises on failure. + """ + from google import genai + + cache_key = hashlib.sha256(prompt.encode()).hexdigest() + if cache_key in _gemini_query_cache: + print(f" Gemini query cache hit") + return _gemini_query_cache[cache_key] + + api_key = os.environ.get("GEMINI_API_KEY", "") + if not api_key: + cred_file = os.path.expanduser("~/.config/gemini/credentials") + if os.path.isfile(cred_file): + with open(cred_file) as f: + api_key = f.read().strip() + if not api_key: + raise RuntimeError("No Gemini API key found") + + client = genai.Client(api_key=api_key) + last_error = None + for attempt in range(3): + try: + if attempt > 0: + time.sleep(2 * attempt) + print(f" Gemini retry {attempt + 1}/3") + print(f" Gemini API call ({model}): {prompt[:80]}...") + response = client.models.generate_content( + model=model, + contents=prompt + ) + result = response.text.strip() + print(f" Gemini response: {result[:200]}") + _gemini_query_cache[cache_key] = result + return result + except Exception as e: + last_error = e + print(f" Gemini attempt {attempt + 1}/3 failed: {e}") + raise RuntimeError(f"Gemini API failed after 3 attempts: {last_error}") def esearch_pmids(query): """Search PubMed for PMIDs matching query. Results are cached in memory. @@ -246,7 +290,7 @@ pubmed_path=os.environ.get("EDIRECT_LOCAL_ARCHIVE", "./minipubmed") print(f" pubmed_path={pubmed_path}") if not os.path.isdir(pubmed_path): - print(f"ERROR: EDIRECT_LOCAL_ARCHIVE directory not found: {pubmed_path} - note this is a recent env variable that replaces the others") + print(f"ERROR: EDIRECT_LOCAL_ARCHIVE directory not found: {pubmed_path} - note this is a recent env variable that replaces the others (ignore the minipub reference)") raise SystemExit(1) testdir = os.path.join(pubmed_path, "pubmed", "Archive", "00") if not os.path.isdir(testdir): diff --git a/tests/test_network_gemini_ontology.py b/tests/test_network_gemini_ontology.py new file mode 100644 index 0000000..4a9db34 --- /dev/null +++ b/tests/test_network_gemini_ontology.py @@ -0,0 +1,58 @@ +"""Test Gemini API for generating SUD ontology terms. + +Requires a Gemini API key in ~/.config/gemini/credentials and internet access. + +Run with: python3 -m unittest tests.test_network_gemini_ontology -v +""" + +import os +import sys +import time +import unittest + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) +from more_functions import gemini_query + +PROMPT = ( + """ + Give me a list of terms on substance abuse disorder (SUD) that act + as traits and classifiers in scientific literature with a focus on + behaviour and brain attributes related to the hippocampus. Avoid + aliases and synonyms as well as gene names. Each term should be + 1-3 words (max). Give me a list of at least 20, but no more than + 80, most used terms. Return only the terms, one per line, no + numbering.""" +) + +class TestGeminiOntology(unittest.TestCase): + def test_1_sud_ontology_terms(self): + """Gemini should return 20-50 SUD ontology terms.""" + t0 = time.time() + response = gemini_query(PROMPT) + elapsed = time.time() - t0 + terms = [t.strip() for t in response.strip().split("\n") if t.strip()] + print(f" Got {len(terms)} terms ({elapsed:.2f}s)") + for t in terms: + print(f" - {t}") + self.assertGreaterEqual(len(terms), 20, + f"Expected at least 20 terms, got {len(terms)}") + self.assertLessEqual(len(terms), 80, + f"Expected at most 80 terms, got {len(terms)}") + # Each term should be short (1-3 words, allow some slack) + long_terms = [t for t in terms if len(t.split()) > 5] + self.assertEqual(len(long_terms), 0, + f"Terms too long: {long_terms}") + + def test_2_cached_ontology(self): + """Second call should use cache and be fast.""" + # Ensure cache is populated from test_1 + gemini_query(PROMPT) + t0 = time.time() + response = gemini_query(PROMPT) + elapsed = time.time() - t0 + terms = [t.strip() for t in response.strip().split("\n") if t.strip()] + print(f" Cached: {len(terms)} terms ({elapsed:.4f}s)") + self.assertLess(elapsed, 0.01, f"Cache lookup too slow: {elapsed:.4f}s") + +if __name__ == "__main__": + unittest.main() -- cgit 1.4.1 From 5c28db960087ddb282a13e4dd2c3a3dd5c4cc207 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 6 Apr 2026 09:56:19 +0200 Subject: Add aliases --- tests/test_network_gemini_ontology.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_network_gemini_ontology.py b/tests/test_network_gemini_ontology.py index 4a9db34..2a84909 100644 --- a/tests/test_network_gemini_ontology.py +++ b/tests/test_network_gemini_ontology.py @@ -21,7 +21,7 @@ PROMPT = ( aliases and synonyms as well as gene names. Each term should be 1-3 words (max). Give me a list of at least 20, but no more than 80, most used terms. Return only the terms, one per line, no - numbering.""" + numbering. Add abbreviations and aliases as a list with each term, separated by commas""" ) class TestGeminiOntology(unittest.TestCase): @@ -40,8 +40,6 @@ class TestGeminiOntology(unittest.TestCase): f"Expected at most 80 terms, got {len(terms)}") # Each term should be short (1-3 words, allow some slack) long_terms = [t for t in terms if len(t.split()) > 5] - self.assertEqual(len(long_terms), 0, - f"Terms too long: {long_terms}") def test_2_cached_ontology(self): """Second call should use cache and be fast.""" -- cgit 1.4.1 From 5da54ef6347acdba3613e3b1b161b66013817206 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 6 Apr 2026 09:57:53 +0200 Subject: Create template --- server.py | 27 +++++++++++++++++++++++++++ templates/create-ontology.html | 23 +++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 templates/create-ontology.html diff --git a/server.py b/server.py index c81cbc9..a20d388 100755 --- a/server.py +++ b/server.py @@ -346,6 +346,33 @@ def logout(): def about(): return render_template('about.html',version=version()) +@app.route("/create-ontology", methods=["GET", "POST"]) +def create_ontology(): + from more_functions import gemini_query + default_prompt = ( + "Give me a list of terms on substance abuse disorder (SUD) that act " + "as traits and classifiers in scientific literature with a focus on " + "behaviour and brain attributes related to the hippocampus. Avoid " + "aliases and synonyms as well as gene names. Each term should be " + "1-3 words (max). Give me a list of at least 20, but no more than " + "80, most used terms. Return only the terms, one per line, no " + "numbering. Add abbreviations and aliases as a list with each term, " + "separated by commas") + if request.method == "POST": + prompt = request.form.get("prompt", default_prompt) + try: + result = gemini_query(prompt) + terms = [t.strip() for t in result.strip().split("\n") if t.strip()] + return render_template('create-ontology.html', + prompt=prompt, result=result, + count=len(terms), version=version()) + except Exception as e: + return render_template('create-ontology.html', + prompt=prompt, result=f"Error: {e}", + count=0, version=version()) + return render_template('create-ontology.html', + prompt=default_prompt, result=None, + count=0, version=version()) # Ontology selection @app.route("/index_ontology", methods=["POST", "GET"]) diff --git a/templates/create-ontology.html b/templates/create-ontology.html new file mode 100644 index 0000000..44d8ef1 --- /dev/null +++ b/templates/create-ontology.html @@ -0,0 +1,23 @@ +{% extends "layout.html" %} +{% block content %} + +
+

Create Ontology with Gemini AI

+ +
+
+ + +
+ +
+ + {% if result %} +
+ + +
+ {% endif %} +
+ +{% endblock %} -- cgit 1.4.1 From 5a551f6434a6b26adb0f604d64f703c677ea4b67 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 6 Apr 2026 10:26:33 +0200 Subject: Getting ontology results --- server.py | 40 ++++++++++++++++++++++++++++++++++++++++ templates/create-ontology.html | 27 ++++++++++++++++++++++++++- 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/server.py b/server.py index a20d388..626179a 100755 --- a/server.py +++ b/server.py @@ -359,6 +359,38 @@ def create_ontology(): "numbering. Add abbreviations and aliases as a list with each term, " "separated by commas") if request.method == "POST": + action = request.form.get("action", "generate") + + if action == "search": + # Build a temporary .onto file from the result terms and redirect to /progress + result_text = request.form.get("result", "") + query = request.form.get("query", "") + search_types = request.form.getlist("type") + # Build onto dict: each term is its own category with aliases as pipe-separated keywords + dict_onto = {} + for line in result_text.strip().split("\n"): + line = line.strip() + if not line: + continue + parts = [p.strip() for p in line.split(",")] + category = parts[0] + keywords = "|".join(parts) + dict_onto[category] = {category: {keywords}} + # Save to a temp .onto file + onto_path = os.path.join(tempfile.gettempdir(), "gemini_ontology") + with open(onto_path + ".onto", "w") as f: + f.write(repr(dict_onto)) + session['namecat'] = onto_path + print(f" Created ontology: {onto_path}.onto with {len(dict_onto)} categories") + print(f" Gene query: '{query}', search_types: {search_types}") + # Build the redirect URL with type and query params + from urllib.parse import urlencode + params = [("query", query)] + for t in search_types: + params.append(("type", t)) + return redirect("/progress?" + urlencode(params)) + + # action == "generate" prompt = request.form.get("prompt", default_prompt) try: result = gemini_query(prompt) @@ -830,6 +862,11 @@ def progress(): if (search_type == []): search_type = ['GWAS', 'function', 'addiction', 'drug', 'brain', 'stress', 'psychiatric', 'cell'] session['search_type'] = search_type + # Use default addiction ontology unless redirected from /create-ontology + if request.referrer and '/create-ontology' in request.referrer: + pass # keep session['namecat'] set by /create-ontology + elif 'namecat' in session: + del session['namecat'] genes_session = '' for gen in genes: @@ -873,8 +910,10 @@ def search(): if 'namecat' in session: namecat_flag=1 ses_namecat = session['namecat'] + print(f" /search: namecat={ses_namecat}, search_type={search_type}") onto_cont = open(session['namecat']+".onto","r").read() dict_onto=ast.literal_eval(onto_cont) + print(f" /search: onto categories={list(dict_onto.keys())[:10]}") for ky in dict_onto.keys(): nodecolor[ky] = "hsl("+str((n_num+1)*int(360/len(dict_onto.keys())))+", 70%, 80%)" @@ -935,6 +974,7 @@ def search(): all_d = all_d+'|'+all_d_ls if all_d: # Check if all_d is not empty all_d=all_d[1:] + print(f" /search generate: all_d={all_d[:200] if all_d else '(empty)'}, search_type={search_type}") if ("GWAS" in search_type): datf = pd.read_csv('./utility/gwas_used.csv',sep='\t') diff --git a/templates/create-ontology.html b/templates/create-ontology.html index 44d8ef1..627bc86 100644 --- a/templates/create-ontology.html +++ b/templates/create-ontology.html @@ -5,6 +5,7 @@

Create Ontology with Gemini AI

+
@@ -15,8 +16,32 @@ {% if result %}
- +
+ + + +
+ + +
+
+ + + + {% endif %}
-- cgit 1.4.1 From ae4ac90310a55589dbbcb32621b1455367702b9a Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 6 Apr 2026 10:31:01 +0200 Subject: Add form feedback --- templates/create-ontology.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/create-ontology.html b/templates/create-ontology.html index 627bc86..537c246 100644 --- a/templates/create-ontology.html +++ b/templates/create-ontology.html @@ -10,7 +10,7 @@ - + {% if result %} @@ -26,7 +26,7 @@
- +