From 8088292c3fc1669f5a8b2dc2a78180cf0be380cd Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Mon, 9 Sep 2024 22:50:33 +0300 Subject: Abstract out sanitizing json-ld result into a function. * gn3/db/rdf/wiki.py (__sanitize_result): New function. (get_wiki_entries_by_symbol): Delete sanitization code. (get_comment_history): Ditto. Signed-off-by: Munyoki Kilyungi --- gn3/db/rdf/wiki.py | 47 +++++++++++++++++++++-------------------------- 1 file changed, 21 insertions(+), 26 deletions(-) (limited to 'gn3') diff --git a/gn3/db/rdf/wiki.py b/gn3/db/rdf/wiki.py index 2e397a8..f7b0030 100644 --- a/gn3/db/rdf/wiki.py +++ b/gn3/db/rdf/wiki.py @@ -24,6 +24,23 @@ WIKI_CONTEXT = BASE_CONTEXT | { } +def __sanitize_result(result: dict): + """Make sure `categories` and `pubmed_ids` are always arrays + + """ + categories = result.get("categories") + if isinstance(categories, str): + result["categories"] = [categories] if categories else [] + pmids = result.get("pubmed_ids") + if isinstance(pmids, str): + result["pubmed_ids"] = [pmids] if pmids else [] + if isinstance(pmids, int): + result["pubmed_ids"] = [pmids] + result["pubmed_ids"] = [int(pmid.split("/")[-1]) if isinstance(pmid, str) else pmid + for pmid in result["pubmed_ids"]] + return result + + def get_wiki_entries_by_symbol(symbol: str, sparql_uri: str) -> dict: """Fetch all the Wiki entries using the symbol""" # This query uses a sub-query to fetch the latest comment by the @@ -85,18 +102,8 @@ CONSTRUCT { query, WIKI_CONTEXT, sparql_uri ) - data = results.get("data") - for result in data: - categories = result.get("categories") or [] - if categories and isinstance(categories, str): - result["categories"] = [categories] - pmids = result.get("pubmed_ids") - if pmids and isinstance(pmids, str): - result["pubmed_ids"] = [pmids] - elif pmids: - result["pubmed_ids"] = [int(pmid.split("/")[-1]) for pmid in pmids] - else: - result["pubmed_ids"] = [] + data = [__sanitize_result(result) + for result in results.get("data")] results["data"] = sorted(data, key=lambda d: d["created"]) if not data: return results @@ -154,20 +161,8 @@ CONSTRUCT { query, WIKI_CONTEXT, sparql_uri ) - data = results.get("data") - for result in data: - categories = result.get("categories") or [] - if categories and isinstance(categories, str): - result["categories"] = [categories] - pmids = result.get("pubmed_ids") - if pmids and isinstance(pmids, str): - result["pubmed_ids"] = [pmids] - elif pmids: - result["pubmed_ids"] = [int(pmid.split("/")[-1]) for pmid in pmids] - else: - result["pubmed_ids"] = [] - result["version"] = int(result["version"]) - + data = [__sanitize_result(result) + for result in results.get("data")] # We manually sort the array, since the SPARQL engine does not # provide a guarantee that it will support an ORDER BY clause in a # CONSTRUCT. Using ORDER BY on a solution sequence for a CONSTRUCT -- cgit v1.2.3