about summary refs log tree commit diff
path: root/gn3/db/rdf/wiki.py
diff options
context:
space:
mode:
Diffstat (limited to 'gn3/db/rdf/wiki.py')
-rw-r--r--gn3/db/rdf/wiki.py158
1 files changed, 135 insertions, 23 deletions
diff --git a/gn3/db/rdf/wiki.py b/gn3/db/rdf/wiki.py
index b2b301a..dd8d204 100644
--- a/gn3/db/rdf/wiki.py
+++ b/gn3/db/rdf/wiki.py
@@ -15,6 +15,7 @@ from gn3.db.rdf import (
     RDF_PREFIXES,
     query_frame_and_compact,
     update_rdf,
+    sparql_query,
 )
 
 
@@ -41,6 +42,10 @@ def __sanitize_result(result: dict) -> dict:
     if not result:
         return {}
     categories = result.get("categories")
+    if (version := result.get("version")) and isinstance(version, str):
+        result["version"] = int(version)
+    if (wiki_id := result.get("id")) and isinstance(version, str):
+        result["id"] = int(wiki_id)
     if isinstance(categories, str):
         result["categories"] = [categories] if categories else []
     result["categories"] = sorted(result["categories"])
@@ -79,7 +84,7 @@ CONSTRUCT {
              gnt:belongsToCategory ?category ;
              gnt:hasVersion ?max ;
              dct:created ?created ;
-             dct:identifier ?id_ .
+             dct:identifier ?id .
 } FROM $graph WHERE {
     ?comment rdfs:label ?text_ ;
              gnt:symbol ?symbol ;
@@ -88,12 +93,12 @@ CONSTRUCT {
              dct:created ?createTime .
     FILTER ( LCASE(STR(?symbol)) = LCASE("$symbol") ) .
     {
-        SELECT (MAX(?vers) AS ?max) ?id_ WHERE {
+        SELECT (MAX(?vers) AS ?max_) ?id_ WHERE {
             ?comment dct:identifier ?id_ ;
                      dct:hasVersion ?vers .
         }
     }
-    ?comment dct:hasVersion ?max .
+    ?comment dct:hasVersion ?max_ .
     OPTIONAL { ?comment gnt:reason ?reason_ } .
     OPTIONAL {
         ?comment gnt:belongsToSpecies ?speciesId .
@@ -106,6 +111,8 @@ CONSTRUCT {
     OPTIONAL { ?comment gnt:belongsToCategory ?category_ } .
     BIND (str(?createTime) AS ?created) .
     BIND (str(?text_) AS ?text) .
+    BIND (str(?max_) AS ?max) .
+    BIND (str(?id_) AS ?id) .
     BIND (STR(COALESCE(?pmid_, "")) AS ?pmid) .
     BIND (COALESCE(?reason_, "") AS ?reason) .
     BIND (STR(COALESCE(?weburl_, "")) AS ?weburl) .
@@ -154,7 +161,7 @@ CONSTRUCT {
              rdfs:label ?text_ ;
              gnt:symbol ?symbol ;
              dct:created ?createTime ;
-             dct:hasVersion ?version ;
+             dct:hasVersion ?version_ ;
              dct:identifier $comment_id .
     OPTIONAL { ?comment gnt:reason ?reason_ } .
     OPTIONAL {
@@ -167,6 +174,7 @@ CONSTRUCT {
     OPTIONAL { ?comment foaf:mbox ?email_ . } .
     OPTIONAL { ?comment gnt:belongsToCategory ?category_ . } .
     BIND (str(?text_) AS ?text) .
+    BIND (str(?version_) AS ?version) .
     BIND (str(?createTime) AS ?created) .
     BIND (STR(COALESCE(?pmid_, "")) AS ?pmid) .
     BIND (COALESCE(?reason_, "") AS ?reason) .
@@ -186,38 +194,42 @@ CONSTRUCT {
 
 
 def update_wiki_comment(
-        insert_dict: dict,
-        sparql_user: str,
-        sparql_password: str,
-        sparql_auth_uri: str,
-        graph: str = "<http://genenetwork.org>",
+    insert_dict: dict,
+    sparql_user: str,
+    sparql_password: str,
+    sparql_auth_uri: str,
+    graph: str = "<http://genenetwork.org>",
 ) -> str:
     """Update a wiki comment by inserting a comment with the same
-identifier but an updated version id.
+    identifier but an updated version id.
     """
     name = f"gn:wiki-{insert_dict['Id']}-{insert_dict['versionId']}"
-    comment_triple = Template("""$name rdfs:label '''$comment'''@en ;
+    comment_triple = Template(
+        """$name rdfs:label '''$comment'''@en ;
 rdf:type gnc:GNWikiEntry ;
 gnt:symbol "$symbol" ;
 dct:identifier "$comment_id"^^xsd:integer ;
 dct:hasVersion "$next_version"^^xsd:integer ;
 dct:created "$created"^^xsd:datetime .
-""").substitute(
+"""
+    ).substitute(
         comment=insert_dict["comment"],
-        name=name, symbol=insert_dict['symbol'],
-        comment_id=insert_dict["Id"], next_version=insert_dict["versionId"],
-        created=insert_dict["createtime"])
+        name=name,
+        symbol=insert_dict["symbol"],
+        comment_id=insert_dict["Id"],
+        next_version=insert_dict["versionId"],
+        created=insert_dict["createtime"],
+    )
     using = ""
     if insert_dict["email"]:
         comment_triple += f"{name} foaf:mbox <{insert_dict['email']}> .\n"
     if insert_dict["initial"]:
         comment_triple += f"{name} gnt:initial \"{insert_dict['initial']}\" .\n"
-    if insert_dict["species"]:
+    if insert_dict["species"] and insert_dict["species"].lower() != "no specific species":
         comment_triple += f"{name} gnt:belongsToSpecies ?speciesId .\n"
         using = Template(
-            """ USING $graph WHERE { ?speciesId gnt:shortName "$species" . } """).substitute(
-                graph=graph, species=insert_dict["species"]
-        )
+            """ USING $graph WHERE { ?speciesId gnt:shortName "$species" . } """
+        ).substitute(graph=graph, species=insert_dict["species"])
     if insert_dict["reason"]:
         comment_triple += f"{name} gnt:reason \"{insert_dict['reason']}\" .\n"
     if insert_dict["weburl"]:
@@ -236,10 +248,110 @@ INSERT {
 GRAPH $graph {
 $comment_triple}
 } $using
-""").substitute(prefix=RDF_PREFIXES,
-                graph=graph,
-                comment_triple=comment_triple,
-                using=using),
+"""
+        ).substitute(
+            prefix=RDF_PREFIXES, graph=graph, comment_triple=comment_triple, using=using
+        ),
+        sparql_user=sparql_user,
+        sparql_password=sparql_password,
+        sparql_auth_uri=sparql_auth_uri,
+    )
+
+
+def get_rif_entries_by_symbol(
+    symbol: str, sparql_uri: str, graph: str = "<http://genenetwork.org>"
+) -> dict:
+    """Fetch NCBI RIF entries for a given symbol (case-insensitive).
+
+This function retrieves NCBI RIF entries using a SPARQL `SELECT` query
+instead of a `CONSTRUCT` to avoid truncation.  The Virtuoso SPARQL
+engine limits query results to 1,048,576 triples per solution, and
+NCBI entries can exceed this limit.  Since there may be more than
+2,000 entries, which could result in the number of triples surpassing
+the limit, `SELECT` is used to ensure complete data retrieval without
+truncation.  See:
+
+<https://community.openlinksw.com/t/sparql-query-limiting-results-to-100000-triples/2131>
+
+    """
+    # XXX: Consider pagination
+    query = Template(
+        """
+$prefix
+
+SELECT ?comment ?symbol ?species ?pubmed_id ?version ?created ?gene_id ?taxonomic_id
+FROM $graph WHERE {
+    ?comment_id rdfs:label ?text_ ;
+                gnt:symbol ?symbol ;
+                rdf:type gnc:NCBIWikiEntry ;
+                gnt:hasGeneId ?gene_id_ ;
+                dct:hasVersion ?version ;
+                dct:references ?pmid_ ;
+                dct:created ?createTime ;
+                gnt:belongsToSpecies ?speciesId .
+    ?speciesId rdfs:label ?species .
+    FILTER ( LCASE(?symbol) = LCASE("$symbol") ) .
+    OPTIONAL { ?comment_id skos:notation ?taxonId_ . } .
+    BIND (STR(?text_) AS ?comment) .
+    BIND (xsd:integer(STRAFTER(STR(?taxonId_), STR(taxon:))) AS ?taxonomic_id) .
+    BIND (xsd:integer(STRAFTER(STR(?pmid_), STR(pubmed:))) AS ?pubmed_id) .
+    BIND (xsd:integer(STRAFTER(STR(?gene_id_), STR(generif:))) AS ?gene_id) .
+    BIND (STR(?createTime) AS ?created) .
+} ORDER BY ?species ?createTime
+"""
+    ).substitute(prefix=RDF_PREFIXES, graph=graph, symbol=symbol)
+    results: dict[str, dict | list] = {
+        "@context": {
+            "dct": "http://purl.org/dc/terms/",
+            "gnt": "http://genenetwork.org/term/",
+            "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
+            "skos": "http://www.w3.org/2004/02/skos/core#",
+            "symbol": "gnt:symbol",
+            "species": "gnt:species",
+            "taxonomic_id": "skos:notation",
+            "gene_id": "gnt:hasGeneId",
+            "pubmed_id": "dct:references",
+            "created": "dct:created",
+            "comment": "rdfs:comment",
+            "version": "dct:hasVersion",
+        }
+    }
+    data: list[dict[str, int | str]] = []
+    for entry in sparql_query(query=query, endpoint=sparql_uri, format_type="json"):
+        data.append(
+            {
+                key: int(metadata.get("value"))
+                if metadata.get("value").isdigit()
+                else metadata.get("value")
+                for key, metadata in entry.items()
+            }
+        )
+    results["data"] = data
+    return results
+
+
+def delete_wiki_entries_by_id(
+    wiki_id: int,
+    sparql_user: str,
+    sparql_password: str,
+    sparql_auth_uri: str,
+    graph: str = "<http://genenetwork.org>",
+) -> str:
+    """Delete all wiki entries associated with a given ID."""
+    query = Template(
+        """
+$prefix
+
+DELETE WHERE {
+    GRAPH $graph {
+        ?comment dct:identifier \"$wiki_id\"^^xsd:integer .
+        ?comment ?p ?o .
+    }
+}
+"""
+    ).substitute(prefix=RDF_PREFIXES, graph=graph, wiki_id=wiki_id)
+    return update_rdf(
+        query=query,
         sparql_user=sparql_user,
         sparql_password=sparql_password,
         sparql_auth_uri=sparql_auth_uri,