about summary refs log tree commit diff
path: root/gn3/db/rdf.py
diff options
context:
space:
mode:
Diffstat (limited to 'gn3/db/rdf.py')
-rw-r--r--gn3/db/rdf.py126
1 files changed, 89 insertions, 37 deletions
diff --git a/gn3/db/rdf.py b/gn3/db/rdf.py
index eb4014a..5a95683 100644
--- a/gn3/db/rdf.py
+++ b/gn3/db/rdf.py
@@ -4,39 +4,12 @@ This module is a collection of functions that handle SPARQL queries.
 
 """
 import json
-
+from string import Template
 from SPARQLWrapper import SPARQLWrapper
 from pyld import jsonld  # type: ignore
-
-
-PREFIXES = {
-    "dcat": "http://www.w3.org/ns/dcat#",
-    "dct": "http://purl.org/dc/terms/",
-    "ex": "http://example.org/stuff/1.0/",
-    "fabio": "http://purl.org/spar/fabio/",
-    "foaf": "http://xmlns.com/foaf/0.1/",
-    "generif": "http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=",
-    "genotype": "http://genenetwork.org/genotype/",
-    "gn": "http://genenetwork.org/id/",
-    "gnc": "http://genenetwork.org/category/",
-    "gnt": "http://genenetwork.org/term/",
-    "owl": "http://www.w3.org/2002/07/owl#",
-    "phenotype": "http://genenetwork.org/phenotype/",
-    "prism": "http://prismstandard.org/namespaces/basic/2.0/",
-    "publication": "http://genenetwork.org/publication/",
-    "pubmed": "http://rdf.ncbi.nlm.nih.gov/pubmed/",
-    "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
-    "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
-    "skos": "http://www.w3.org/2004/02/skos/core#",
-    "taxon": "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=",
-    "up": "http://purl.uniprot.org/core/",
-    "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#",
-    "xsd": "http://www.w3.org/2001/XMLSchema#",
-}
-
-
-RDF_PREFIXES = "\n".join([f"PREFIX {key}: <{value}>"
-                          for key, value in PREFIXES.items()])
+from gn3.db.constants import (
+    RDF_PREFIXES, BASE_CONTEXT
+)
 
 
 def sparql_construct_query(query: str, endpoint: str) -> dict:
@@ -51,22 +24,101 @@ def sparql_construct_query(query: str, endpoint: str) -> dict:
 def query_frame_and_compact(query: str, context: dict, endpoint: str) -> dict:
     """Frame and then compact the results given a context"""
     results = sparql_construct_query(query, endpoint)
-    if not results:
-        return {}
     return jsonld.compact(jsonld.frame(results, context), context)
 
 
 def query_and_compact(query: str, context: dict, endpoint: str) -> dict:
     """Compact the results given a context"""
     results = sparql_construct_query(query, endpoint)
-    if not results:
-        return {}
     return jsonld.compact(results, context)
 
 
 def query_and_frame(query: str, context: dict, endpoint: str) -> dict:
     """Frame the results given a context"""
     results = sparql_construct_query(query, endpoint)
-    if not results:
-        return {}
     return jsonld.frame(results, context)
+
+
+def get_wiki_entries_by_symbol(symbol: str, sparql_uri: str) -> dict:
+    """Fetch all the Wiki entries using the symbol"""
+    # This query uses a sub-query to fetch the latest comment by the
+    # version id.
+    query = Template("""
+$prefix
+
+CONSTRUCT {
+    ?uid rdfs:label ?symbolName;
+         gnt:reason ?reason ;
+         gnt:species ?species ;
+         dct:references ?pmid ;
+         foaf:homepage ?weburl ;
+         rdfs:comment ?comment ;
+         foaf:mbox ?email ;
+         gnt:initial ?usercode ;
+         gnt:belongsToCategory ?category ;
+         gnt:hasVersion ?versionId ;
+         dct:created ?created ;
+         dct:identifier ?identifier .
+} WHERE {
+    ?symbolId rdfs:label ?symbolName .
+    ?uid rdfs:comment ?comment ;
+         gnt:symbol ?symbolId ;
+         rdf:type gnc:GNWikiEntry ;
+         dct:created ?createTime .
+    FILTER ( LCASE(?symbolName) = LCASE('$symbol') ) .
+    {
+        SELECT (MAX(?vers) AS ?max) ?id_ WHERE {
+            ?symbolId rdfs:label ?symbolName .
+            ?uid dct:identifier ?id_ ;
+                 dct:hasVersion ?vers ;
+                 dct:identifier ?id_ ;
+                 gnt:symbol ?symbolId .
+            FILTER ( LCASE(?symbolName) = LCASE('$symbol') ) .
+        }
+    }
+    ?uid dct:hasVersion ?max ;
+         dct:identifier ?id_ .
+    OPTIONAL { ?uid gnt:reason ?reason } .
+    OPTIONAL {
+        ?uid gnt:belongsToSpecies ?speciesId .
+        ?speciesId gnt:shortName ?species .
+    } .
+    OPTIONAL { ?uid dct:references ?pubmedId . } .
+    OPTIONAL { ?uid foaf:homepage ?weburl . } .
+    OPTIONAL { ?uid gnt:initial ?usercode . } .
+    OPTIONAL { ?uid gnt:mbox ?email . } .
+    OPTIONAL { ?uid gnt:belongsToCategory ?category . } .
+    BIND (str(?version) AS ?versionId) .
+    BIND (str(?id_) AS ?identifier) .
+    BIND (str(?pubmedId) AS ?pmid) .
+    BIND (str(?createTime) AS ?created) .
+}
+""").substitute(prefix=RDF_PREFIXES, symbol=symbol,)
+    context = BASE_CONTEXT | {
+        "foaf": "http://xmlns.com/foaf/0.1/",
+        "dct": "http://purl.org/dc/terms/",
+        "categories": "gnt:belongsToCategory",
+        "web_url": "foaf:homepage",
+        "version": "gnt:hasVersion",
+        "symbol": "rdfs:label",
+        "reason": "gnt:reason",
+        "species": "gnt:species",
+        "pubmed_id": "dct:references",
+        "email": "foaf:mbox",
+        "initial": "gnt:initial",
+        "comment": "rdfs:comment",
+        "created": "dct:created",
+        "id": "dct:identifier",
+        # This points to the RDF Node which is the unique identifier
+        # for this triplet.  It's constructed using the comment-id and
+        # the comment-versionId
+        "wiki_identifier": "@id",
+    }
+    results = query_frame_and_compact(
+        query, context,
+        sparql_uri
+    )
+    data = results.get("data")
+    if not data:
+        return results
+    return results