From 5f4cef3640f84092e5692e16865002a832b7838c Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 6 Apr 2026 09:51:26 +0200 Subject: Added a test that creates an ontology --- tests/test_network_gemini_ontology.py | 58 +++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 tests/test_network_gemini_ontology.py (limited to 'tests') diff --git a/tests/test_network_gemini_ontology.py b/tests/test_network_gemini_ontology.py new file mode 100644 index 0000000..4a9db34 --- /dev/null +++ b/tests/test_network_gemini_ontology.py @@ -0,0 +1,58 @@ +"""Test Gemini API for generating SUD ontology terms. + +Requires a Gemini API key in ~/.config/gemini/credentials and internet access. + +Run with: python3 -m unittest tests.test_network_gemini_ontology -v +""" + +import os +import sys +import time +import unittest + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) +from more_functions import gemini_query + +PROMPT = ( + """ + Give me a list of terms on substance abuse disorder (SUD) that act + as traits and classifiers in scientific literature with a focus on + behaviour and brain attributes related to the hippocampus. Avoid + aliases and synonyms as well as gene names. Each term should be + 1-3 words (max). Give me a list of at least 20, but no more than + 80, most used terms. Return only the terms, one per line, no + numbering.""" +) + +class TestGeminiOntology(unittest.TestCase): + def test_1_sud_ontology_terms(self): + """Gemini should return 20-50 SUD ontology terms.""" + t0 = time.time() + response = gemini_query(PROMPT) + elapsed = time.time() - t0 + terms = [t.strip() for t in response.strip().split("\n") if t.strip()] + print(f" Got {len(terms)} terms ({elapsed:.2f}s)") + for t in terms: + print(f" - {t}") + self.assertGreaterEqual(len(terms), 20, + f"Expected at least 20 terms, got {len(terms)}") + self.assertLessEqual(len(terms), 80, + f"Expected at most 80 terms, got {len(terms)}") + # Each term should be short (1-3 words, allow some slack) + long_terms = [t for t in terms if len(t.split()) > 5] + self.assertEqual(len(long_terms), 0, + f"Terms too long: {long_terms}") + + def test_2_cached_ontology(self): + """Second call should use cache and be fast.""" + # Ensure cache is populated from test_1 + gemini_query(PROMPT) + t0 = time.time() + response = gemini_query(PROMPT) + elapsed = time.time() - t0 + terms = [t.strip() for t in response.strip().split("\n") if t.strip()] + print(f" Cached: {len(terms)} terms ({elapsed:.4f}s)") + self.assertLess(elapsed, 0.01, f"Cache lookup too slow: {elapsed:.4f}s") + +if __name__ == "__main__": + unittest.main() -- cgit 1.4.1 From 5c28db960087ddb282a13e4dd2c3a3dd5c4cc207 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 6 Apr 2026 09:56:19 +0200 Subject: Add aliases --- tests/test_network_gemini_ontology.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'tests') diff --git a/tests/test_network_gemini_ontology.py b/tests/test_network_gemini_ontology.py index 4a9db34..2a84909 100644 --- a/tests/test_network_gemini_ontology.py +++ b/tests/test_network_gemini_ontology.py @@ -21,7 +21,7 @@ PROMPT = ( aliases and synonyms as well as gene names. Each term should be 1-3 words (max). Give me a list of at least 20, but no more than 80, most used terms. Return only the terms, one per line, no - numbering.""" + numbering. Add abbreviations and aliases as a list with each term, separated by commas""" ) class TestGeminiOntology(unittest.TestCase): @@ -40,8 +40,6 @@ class TestGeminiOntology(unittest.TestCase): f"Expected at most 80 terms, got {len(terms)}") # Each term should be short (1-3 words, allow some slack) long_terms = [t for t in terms if len(t.split()) > 5] - self.assertEqual(len(long_terms), 0, - f"Terms too long: {long_terms}") def test_2_cached_ontology(self): """Second call should use cache and be fast.""" -- cgit 1.4.1