From 5da54ef6347acdba3613e3b1b161b66013817206 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 6 Apr 2026 09:57:53 +0200 Subject: Create template --- server.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'server.py') diff --git a/server.py b/server.py index c81cbc9..a20d388 100755 --- a/server.py +++ b/server.py @@ -346,6 +346,33 @@ def logout(): def about(): return render_template('about.html',version=version()) +@app.route("/create-ontology", methods=["GET", "POST"]) +def create_ontology(): + from more_functions import gemini_query + default_prompt = ( + "Give me a list of terms on substance abuse disorder (SUD) that act " + "as traits and classifiers in scientific literature with a focus on " + "behaviour and brain attributes related to the hippocampus. Avoid " + "aliases and synonyms as well as gene names. Each term should be " + "1-3 words (max). Give me a list of at least 20, but no more than " + "80, most used terms. Return only the terms, one per line, no " + "numbering. Add abbreviations and aliases as a list with each term, " + "separated by commas") + if request.method == "POST": + prompt = request.form.get("prompt", default_prompt) + try: + result = gemini_query(prompt) + terms = [t.strip() for t in result.strip().split("\n") if t.strip()] + return render_template('create-ontology.html', + prompt=prompt, result=result, + count=len(terms), version=version()) + except Exception as e: + return render_template('create-ontology.html', + prompt=prompt, result=f"Error: {e}", + count=0, version=version()) + return render_template('create-ontology.html', + prompt=default_prompt, result=None, + count=0, version=version()) # Ontology selection @app.route("/index_ontology", methods=["POST", "GET"]) -- cgit 1.4.1 From 5a551f6434a6b26adb0f604d64f703c677ea4b67 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 6 Apr 2026 10:26:33 +0200 Subject: Getting ontology results --- server.py | 40 ++++++++++++++++++++++++++++++++++++++++ templates/create-ontology.html | 27 ++++++++++++++++++++++++++- 2 files changed, 66 insertions(+), 1 deletion(-) (limited to 'server.py') diff --git a/server.py b/server.py index a20d388..626179a 100755 --- a/server.py +++ b/server.py @@ -359,6 +359,38 @@ def create_ontology(): "numbering. Add abbreviations and aliases as a list with each term, " "separated by commas") if request.method == "POST": + action = request.form.get("action", "generate") + + if action == "search": + # Build a temporary .onto file from the result terms and redirect to /progress + result_text = request.form.get("result", "") + query = request.form.get("query", "") + search_types = request.form.getlist("type") + # Build onto dict: each term is its own category with aliases as pipe-separated keywords + dict_onto = {} + for line in result_text.strip().split("\n"): + line = line.strip() + if not line: + continue + parts = [p.strip() for p in line.split(",")] + category = parts[0] + keywords = "|".join(parts) + dict_onto[category] = {category: {keywords}} + # Save to a temp .onto file + onto_path = os.path.join(tempfile.gettempdir(), "gemini_ontology") + with open(onto_path + ".onto", "w") as f: + f.write(repr(dict_onto)) + session['namecat'] = onto_path + print(f" Created ontology: {onto_path}.onto with {len(dict_onto)} categories") + print(f" Gene query: '{query}', search_types: {search_types}") + # Build the redirect URL with type and query params + from urllib.parse import urlencode + params = [("query", query)] + for t in search_types: + params.append(("type", t)) + return redirect("/progress?" + urlencode(params)) + + # action == "generate" prompt = request.form.get("prompt", default_prompt) try: result = gemini_query(prompt) @@ -830,6 +862,11 @@ def progress(): if (search_type == []): search_type = ['GWAS', 'function', 'addiction', 'drug', 'brain', 'stress', 'psychiatric', 'cell'] session['search_type'] = search_type + # Use default addiction ontology unless redirected from /create-ontology + if request.referrer and '/create-ontology' in request.referrer: + pass # keep session['namecat'] set by /create-ontology + elif 'namecat' in session: + del session['namecat'] genes_session = '' for gen in genes: @@ -873,8 +910,10 @@ def search(): if 'namecat' in session: namecat_flag=1 ses_namecat = session['namecat'] + print(f" /search: namecat={ses_namecat}, search_type={search_type}") onto_cont = open(session['namecat']+".onto","r").read() dict_onto=ast.literal_eval(onto_cont) + print(f" /search: onto categories={list(dict_onto.keys())[:10]}") for ky in dict_onto.keys(): nodecolor[ky] = "hsl("+str((n_num+1)*int(360/len(dict_onto.keys())))+", 70%, 80%)" @@ -935,6 +974,7 @@ def search(): all_d = all_d+'|'+all_d_ls if all_d: # Check if all_d is not empty all_d=all_d[1:] + print(f" /search generate: all_d={all_d[:200] if all_d else '(empty)'}, search_type={search_type}") if ("GWAS" in search_type): datf = pd.read_csv('./utility/gwas_used.csv',sep='\t') diff --git a/templates/create-ontology.html b/templates/create-ontology.html index 44d8ef1..627bc86 100644 --- a/templates/create-ontology.html +++ b/templates/create-ontology.html @@ -5,6 +5,7 @@

Create Ontology with Gemini AI

+
@@ -15,8 +16,32 @@ {% if result %}
- +
+ + + +
+ + +
+
+ + + + {% endif %}
-- cgit 1.4.1 From 1add2e683ac93cb8e63a446332dd66504d4c9e61 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 6 Apr 2026 11:16:54 +0200 Subject: Only 3 letter abbreviations --- server.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'server.py') diff --git a/server.py b/server.py index 626179a..79dda7d 100755 --- a/server.py +++ b/server.py @@ -348,6 +348,8 @@ def about(): @app.route("/create-ontology", methods=["GET", "POST"]) def create_ontology(): + if request.method == "GET": + session.pop('namecat', None) from more_functions import gemini_query default_prompt = ( "Give me a list of terms on substance abuse disorder (SUD) that act " @@ -356,7 +358,7 @@ def create_ontology(): "aliases and synonyms as well as gene names. Each term should be " "1-3 words (max). Give me a list of at least 20, but no more than " "80, most used terms. Return only the terms, one per line, no " - "numbering. Add abbreviations and aliases as a list with each term, " + "numbering. Add abbreviations and aliases - each at least 3 letters that have no other meaning - as a list with each term, " "separated by commas") if request.method == "POST": action = request.form.get("action", "generate") -- cgit 1.4.1 From acca175362eb1d1ce2b0cd263c39537b2b8a6f2b Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 6 Apr 2026 11:44:20 +0200 Subject: Combine pubmed online search to a single query --- more_functions.py | 16 ++++++++++++++++ server.py | 35 +++++++++++++++++++---------------- 2 files changed, 35 insertions(+), 16 deletions(-) (limited to 'server.py') diff --git a/more_functions.py b/more_functions.py index 35e3646..5d48adc 100755 --- a/more_functions.py +++ b/more_functions.py @@ -113,6 +113,22 @@ def hybrid_fetch_abstracts(pmid_list): abstracts += extra return abstracts +def getabstracts_batch(genes, query): + """Fetch abstracts for multiple genes in a single PubMed query. + + Builds: (keywords) AND (gene1 [tiab] OR gene2 [tiab] OR ...) + Returns tab-separated lines: PMID, ArticleTitle, AbstractText + """ + genes_clause = " OR ".join(g + " [tiab]" for g in genes) + full_query = "\"(" + query + ") AND (" + genes_clause + ")\"" + pmid_list = esearch_pmids(full_query) + if not pmid_list: + print(f" no PMIDs found for {genes}") + return "" + print(f" PMIDs ({len(pmid_list)}): {' '.join(pmid_list[:20])}{'...' if len(pmid_list) > 20 else ''}") + abstracts = hybrid_fetch_abstracts(pmid_list) + return abstracts + def getabstracts(gene,query): """ 1. esearch -db pubmed -query ... -- searches PubMed for the gene + keyword query, returns matching record IDs diff --git a/server.py b/server.py index 79dda7d..25ccad5 100755 --- a/server.py +++ b/server.py @@ -64,7 +64,7 @@ import re import ast from more_functions import * from nltk.tokenize import sent_tokenize -from more_functions import getabstracts, undic, gene_category +from more_functions import getabstracts, getabstracts_batch, undic, gene_category GENECUP_PROMPT_TEMPLATE = "" try: @@ -983,22 +983,25 @@ def search(): progress+=percent yield "data:"+str(progress)+"\n\n" - for gene in genes: - print(f"Fetching info for gene {gene}\n") - abstracts_raw = getabstracts(gene,all_d) # all_d might be empty if no search_type matches - print(abstracts_raw) - sentences_ls=[] + # Batch fetch all abstracts in a single PubMed query + print(f"Batch fetching abstracts for {len(genes)} genes") + all_abstracts_raw = getabstracts_batch(genes, all_d) if all_d else "" + # Parse all sentences once + all_sentences = [] + for row in all_abstracts_raw.split("\n"): + if not row.strip(): continue + tiab = row.split("\t") + pmid = tiab.pop(0) + tiab_text = " ".join(tiab) + for sent_tok in sent_tokenize(tiab_text): + all_sentences.append(pmid + ' ' + sent_tok) - for row in abstracts_raw.split("\n"): - if not row.strip(): continue # Skip empty lines - tiab=row.split("\t") - pmid = tiab.pop(0) - tiab_text = " ".join(tiab) # Renamed to avoid conflict - sentences_tok = sent_tokenize(tiab_text) - for sent_tok in sentences_tok: - sent_tok = pmid + ' ' + sent_tok - sentences_ls.append(sent_tok) - gene=gene.replace("-"," ") + for gene in genes: + gene = gene.replace("-", " ") + # Filter sentences that mention this gene + gene_re = re.compile(r'\b' + re.escape(gene) + r'\b', re.IGNORECASE) + sentences_ls = [s for s in all_sentences if gene_re.search(s)] + print(f" Gene {gene}: {len(sentences_ls)} sentences") geneEdges = "" -- cgit 1.4.1