about summary refs log tree commit diff
path: root/more_functions.py
diff options
context:
space:
mode:
authorPjotr Prins2026-04-06 11:44:20 +0200
committerPjotr Prins2026-04-06 11:44:20 +0200
commitacca175362eb1d1ce2b0cd263c39537b2b8a6f2b (patch)
treeac41c9f7ce4e8dcbf6a03282dc0cda581b08d511 /more_functions.py
parent1add2e683ac93cb8e63a446332dd66504d4c9e61 (diff)
downloadgenecup-acca175362eb1d1ce2b0cd263c39537b2b8a6f2b.tar.gz
Combine pubmed online search to a single query
Diffstat (limited to 'more_functions.py')
-rwxr-xr-xmore_functions.py16
1 files changed, 16 insertions, 0 deletions
diff --git a/more_functions.py b/more_functions.py
index 35e3646..5d48adc 100755
--- a/more_functions.py
+++ b/more_functions.py
@@ -113,6 +113,22 @@ def hybrid_fetch_abstracts(pmid_list):
         abstracts += extra
     return abstracts
 
+def getabstracts_batch(genes, query):
+    """Fetch abstracts for multiple genes in a single PubMed query.
+
+    Builds: (keywords) AND (gene1 [tiab] OR gene2 [tiab] OR ...)
+    Returns tab-separated lines: PMID, ArticleTitle, AbstractText
+    """
+    genes_clause = " OR ".join(g + " [tiab]" for g in genes)
+    full_query = "\"(" + query + ") AND (" + genes_clause + ")\""
+    pmid_list = esearch_pmids(full_query)
+    if not pmid_list:
+        print(f"  no PMIDs found for {genes}")
+        return ""
+    print(f"  PMIDs ({len(pmid_list)}): {' '.join(pmid_list[:20])}{'...' if len(pmid_list) > 20 else ''}")
+    abstracts = hybrid_fetch_abstracts(pmid_list)
+    return abstracts
+
 def getabstracts(gene,query):
     """
       1. esearch -db pubmed -query ... -- searches PubMed for the gene + keyword query, returns matching record IDs