Abstract out sanitizing json-ld result into a function.

* gn3/db/rdf/wiki.py (__sanitize_result): New function. (get_wiki_entries_by_symbol): Delete sanitization code. (get_comment_history): Ditto. Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
author: Munyoki Kilyungi 2024-09-09 22:50:33 +0300
committer: BonfaceKilz 2024-09-11 10:30:57 +0300
commit: 8088292c3fc1669f5a8b2dc2a78180cf0be380cd (patch)
tree: 8dbb245ce7ceaf3464cdbd2a2558b804a5af4b3d /gn3/db/rdf
parent: ad018eff1169c8d0db00c26f219af82df7c737c0 (diff)
download: genenetwork3-8088292c3fc1669f5a8b2dc2a78180cf0be380cd.tar.gz
1 files changed, 21 insertions, 26 deletions
diff --git a/gn3/db/rdf/wiki.py b/gn3/db/rdf/wiki.py
index 2e397a8..f7b0030 100644
--- a/gn3/db/rdf/wiki.py
+++ b/gn3/db/rdf/wiki.py
@@ -24,6 +24,23 @@ WIKI_CONTEXT = BASE_CONTEXT | {
 }
 
 
+def __sanitize_result(result: dict):
+    """Make sure `categories` and `pubmed_ids` are always arrays
+
+    """
+    categories = result.get("categories")
+    if isinstance(categories, str):
+        result["categories"] = [categories] if categories else []
+    pmids = result.get("pubmed_ids")
+    if isinstance(pmids, str):
+        result["pubmed_ids"] = [pmids] if pmids else []
+    if isinstance(pmids, int):
+        result["pubmed_ids"] = [pmids]
+    result["pubmed_ids"] = [int(pmid.split("/")[-1]) if isinstance(pmid, str) else pmid
+                            for pmid in result["pubmed_ids"]]
+    return result
+
+
 def get_wiki_entries_by_symbol(symbol: str, sparql_uri: str) -> dict:
     """Fetch all the Wiki entries using the symbol"""
     # This query uses a sub-query to fetch the latest comment by the
@@ -85,18 +102,8 @@ CONSTRUCT {
         query, WIKI_CONTEXT,
         sparql_uri
     )
-    data = results.get("data")
-    for result in data:
-        categories = result.get("categories") or []
-        if categories and isinstance(categories, str):
-            result["categories"] = [categories]
-        pmids = result.get("pubmed_ids")
-        if pmids and isinstance(pmids, str):
-            result["pubmed_ids"] = [pmids]
-        elif pmids:
-            result["pubmed_ids"] = [int(pmid.split("/")[-1]) for pmid in pmids]
-        else:
-            result["pubmed_ids"] = []
+    data = [__sanitize_result(result)
+            for result in results.get("data")]
     results["data"] = sorted(data, key=lambda d: d["created"])
     if not data:
         return results
@@ -154,20 +161,8 @@ CONSTRUCT {
         query, WIKI_CONTEXT,
         sparql_uri
     )
-    data = results.get("data")
-    for result in data:
-        categories = result.get("categories") or []
-        if categories and isinstance(categories, str):
-            result["categories"] = [categories]
-        pmids = result.get("pubmed_ids")
-        if pmids and isinstance(pmids, str):
-            result["pubmed_ids"] = [pmids]
-        elif pmids:
-            result["pubmed_ids"] = [int(pmid.split("/")[-1]) for pmid in pmids]
-        else:
-            result["pubmed_ids"] = []
-        result["version"] = int(result["version"])
-
+    data = [__sanitize_result(result)
+            for result in results.get("data")]
     # We manually sort the array, since the SPARQL engine does not
     # provide a guarantee that it will support an ORDER BY clause in a
     # CONSTRUCT. Using ORDER BY on a solution sequence for a CONSTRUCT
author	Munyoki Kilyungi	2024-09-09 22:50:33 +0300
committer	BonfaceKilz	2024-09-11 10:30:57 +0300
commit	8088292c3fc1669f5a8b2dc2a78180cf0be380cd (patch)
tree	8dbb245ce7ceaf3464cdbd2a2558b804a5af4b3d /gn3/db/rdf
parent	ad018eff1169c8d0db00c26f219af82df7c737c0 (diff)
download	genenetwork3-8088292c3fc1669f5a8b2dc2a78180cf0be380cd.tar.gz