about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--gn3/api/metadata.py2
-rw-r--r--gn3/api/metadata_api/wiki.py24
-rw-r--r--gn3/db/rdf.py124
-rw-r--r--gn3/db/rdf/__init__.py (renamed from gn3/db/constants.py)43
-rw-r--r--gn3/db/rdf/wiki.py168
-rw-r--r--gn3/db/wiki.py17
6 files changed, 242 insertions, 136 deletions
diff --git a/gn3/api/metadata.py b/gn3/api/metadata.py
index 3f28f5d..6110880 100644
--- a/gn3/api/metadata.py
+++ b/gn3/api/metadata.py
@@ -15,7 +15,7 @@ from gn3.db.datasets import (retrieve_metadata,
                              get_history)
 from gn3.db.rdf import (query_frame_and_compact,
                         query_and_compact)
-from gn3.db.constants import (
+from gn3.db.rdf import (
     RDF_PREFIXES, BASE_CONTEXT,
     DATASET_CONTEXT,
     DATASET_SEARCH_CONTEXT, PUBLICATION_CONTEXT,
diff --git a/gn3/api/metadata_api/wiki.py b/gn3/api/metadata_api/wiki.py
index a4abef6..9ea0d53 100644
--- a/gn3/api/metadata_api/wiki.py
+++ b/gn3/api/metadata_api/wiki.py
@@ -5,8 +5,9 @@ from typing import Any, Dict
 from flask import Blueprint, request, jsonify, current_app, make_response
 from gn3 import db_utils
 from gn3.db import wiki
-from gn3.db.rdf import (query_frame_and_compact,
-                        get_wiki_entries_by_symbol)
+from gn3.db.rdf import query_frame_and_compact
+from gn3.db.rdf.wiki import (get_wiki_entries_by_symbol,
+                             get_comment_history)
 
 
 wiki_blueprint = Blueprint("wiki", __name__, url_prefix="wiki")
@@ -71,7 +72,6 @@ def edit_wiki(comment_id: int):
 @wiki_blueprint.route("/<string:symbol>", methods=["GET"])
 def get_wiki_entries(symbol: str):
     """Fetch wiki entries"""
-    content_type = request.headers.get("Content-Type")
     status_code = 200
     response = get_wiki_entries_by_symbol(
         symbol=symbol,
@@ -80,7 +80,7 @@ def get_wiki_entries(symbol: str):
     if not data:
         data = {}
         status_code = 404
-    if content_type == "application/ld+json":
+    if request.headers.get("Accept") == "application/ld+json":
         payload = make_response(response)
         payload.headers["Content-Type"] = "application/ld+json"
         return payload, status_code
@@ -117,3 +117,19 @@ def get_species():
         species_dict = wiki.get_species(cursor)
         return jsonify(species_dict)
     return jsonify(error="Error getting species, most likely due to DB error!"), 500
+
+
+@wiki_blueprint.route("/<int:comment_id>/history", methods=["GET"])
+def get_history(comment_id):
+    status_code = 200
+    response = get_comment_history(comment_id=comment_id,
+                                   sparql_uri=current_app.config["SPARQL_ENDPOINT"])
+    data = response.get("data")
+    if not data:
+        data = {}
+        status_code = 404
+    if request.headers.get("Accept") == "application/ld+json":
+        payload = make_response(response)
+        payload.headers["Content-Type"] = "application/ld+json"
+        return payload, status_code
+    return jsonify(data), status_code
diff --git a/gn3/db/rdf.py b/gn3/db/rdf.py
deleted file mode 100644
index 5a95683..0000000
--- a/gn3/db/rdf.py
+++ /dev/null
@@ -1,124 +0,0 @@
-"""RDF utilities
-
-This module is a collection of functions that handle SPARQL queries.
-
-"""
-import json
-from string import Template
-from SPARQLWrapper import SPARQLWrapper
-from pyld import jsonld  # type: ignore
-from gn3.db.constants import (
-    RDF_PREFIXES, BASE_CONTEXT
-)
-
-
-def sparql_construct_query(query: str, endpoint: str) -> dict:
-    """Query virtuoso using a CONSTRUCT query and return a json-ld
-    dictionary"""
-    sparql = SPARQLWrapper(endpoint)
-    sparql.setQuery(query)
-    results = sparql.queryAndConvert()
-    return json.loads(results.serialize(format="json-ld"))  # type: ignore
-
-
-def query_frame_and_compact(query: str, context: dict, endpoint: str) -> dict:
-    """Frame and then compact the results given a context"""
-    results = sparql_construct_query(query, endpoint)
-    return jsonld.compact(jsonld.frame(results, context), context)
-
-
-def query_and_compact(query: str, context: dict, endpoint: str) -> dict:
-    """Compact the results given a context"""
-    results = sparql_construct_query(query, endpoint)
-    return jsonld.compact(results, context)
-
-
-def query_and_frame(query: str, context: dict, endpoint: str) -> dict:
-    """Frame the results given a context"""
-    results = sparql_construct_query(query, endpoint)
-    return jsonld.frame(results, context)
-
-
-def get_wiki_entries_by_symbol(symbol: str, sparql_uri: str) -> dict:
-    """Fetch all the Wiki entries using the symbol"""
-    # This query uses a sub-query to fetch the latest comment by the
-    # version id.
-    query = Template("""
-$prefix
-
-CONSTRUCT {
-    ?uid rdfs:label ?symbolName;
-         gnt:reason ?reason ;
-         gnt:species ?species ;
-         dct:references ?pmid ;
-         foaf:homepage ?weburl ;
-         rdfs:comment ?comment ;
-         foaf:mbox ?email ;
-         gnt:initial ?usercode ;
-         gnt:belongsToCategory ?category ;
-         gnt:hasVersion ?versionId ;
-         dct:created ?created ;
-         dct:identifier ?identifier .
-} WHERE {
-    ?symbolId rdfs:label ?symbolName .
-    ?uid rdfs:comment ?comment ;
-         gnt:symbol ?symbolId ;
-         rdf:type gnc:GNWikiEntry ;
-         dct:created ?createTime .
-    FILTER ( LCASE(?symbolName) = LCASE('$symbol') ) .
-    {
-        SELECT (MAX(?vers) AS ?max) ?id_ WHERE {
-            ?symbolId rdfs:label ?symbolName .
-            ?uid dct:identifier ?id_ ;
-                 dct:hasVersion ?vers ;
-                 dct:identifier ?id_ ;
-                 gnt:symbol ?symbolId .
-            FILTER ( LCASE(?symbolName) = LCASE('$symbol') ) .
-        }
-    }
-    ?uid dct:hasVersion ?max ;
-         dct:identifier ?id_ .
-    OPTIONAL { ?uid gnt:reason ?reason } .
-    OPTIONAL {
-        ?uid gnt:belongsToSpecies ?speciesId .
-        ?speciesId gnt:shortName ?species .
-    } .
-    OPTIONAL { ?uid dct:references ?pubmedId . } .
-    OPTIONAL { ?uid foaf:homepage ?weburl . } .
-    OPTIONAL { ?uid gnt:initial ?usercode . } .
-    OPTIONAL { ?uid gnt:mbox ?email . } .
-    OPTIONAL { ?uid gnt:belongsToCategory ?category . } .
-    BIND (str(?version) AS ?versionId) .
-    BIND (str(?id_) AS ?identifier) .
-    BIND (str(?pubmedId) AS ?pmid) .
-    BIND (str(?createTime) AS ?created) .
-}
-""").substitute(prefix=RDF_PREFIXES, symbol=symbol,)
-    context = BASE_CONTEXT | {
-        "foaf": "http://xmlns.com/foaf/0.1/",
-        "dct": "http://purl.org/dc/terms/",
-        "categories": "gnt:belongsToCategory",
-        "web_url": "foaf:homepage",
-        "version": "gnt:hasVersion",
-        "symbol": "rdfs:label",
-        "reason": "gnt:reason",
-        "species": "gnt:species",
-        "pubmed_id": "dct:references",
-        "email": "foaf:mbox",
-        "initial": "gnt:initial",
-        "comment": "rdfs:comment",
-        "created": "dct:created",
-        "id": "dct:identifier",
-        # This points to the RDF Node which is the unique identifier
-        # for this triplet.  It's constructed using the comment-id and
-        # the comment-versionId
-        "wiki_identifier": "@id",
-    }
-    results = query_frame_and_compact(
-        query, context,
-        sparql_uri
-    )
-    data = results.get("data")
-    if not data:
-        return results
-    return results
diff --git a/gn3/db/constants.py b/gn3/db/rdf/__init__.py
index 45e3bfc..ffb75e5 100644
--- a/gn3/db/constants.py
+++ b/gn3/db/rdf/__init__.py
@@ -1,6 +1,15 @@
+"""RDF
+
+Constants for prefixes and contexts; and wrapper functions around
+creating contexts to be used by jsonld when framing and/or compacting.
+
 """
-This module contains some constants used in other modules.
-"""
+import json
+
+from SPARQLWrapper import SPARQLWrapper
+from pyld import jsonld  # type: ignore
+
+
 PREFIXES = {
     "dcat": "http://www.w3.org/ns/dcat#",
     "dct": "http://purl.org/dc/terms/",
@@ -150,3 +159,33 @@ PHENOTYPE_CONTEXT = BASE_CONTEXT | PUBLICATION_CONTEXT | {
     "species": "gnt:belongsToSpecies",
     "group": "gnt:belongsToGroup",
 }
+
+
+def sparql_construct_query(query: str, endpoint: str) -> dict:
+    """Query virtuoso using a CONSTRUCT query and return a json-ld
+    dictionary"""
+    sparql = SPARQLWrapper(endpoint)
+    sparql.setQuery(query)
+    results = sparql.queryAndConvert()
+    return json.loads(results.serialize(format="json-ld"))  # type: ignore
+
+
+def query_frame_and_compact(query: str, context: dict, endpoint: str) -> dict:
+    """Frame and then compact the results given a context"""
+    results = sparql_construct_query(query, endpoint)
+    return jsonld.compact(
+        jsonld.frame(results, context),
+        context,
+        options={"graph": True})
+
+
+def query_and_compact(query: str, context: dict, endpoint: str) -> dict:
+    """Compact the results given a context"""
+    results = sparql_construct_query(query, endpoint)
+    return jsonld.compact(results, context, options={"graph": True})
+
+
+def query_and_frame(query: str, context: dict, endpoint: str) -> dict:
+    """Frame the results given a context"""
+    results = sparql_construct_query(query, endpoint)
+    return jsonld.frame(results, context)
diff --git a/gn3/db/rdf/wiki.py b/gn3/db/rdf/wiki.py
new file mode 100644
index 0000000..f7bec47
--- /dev/null
+++ b/gn3/db/rdf/wiki.py
@@ -0,0 +1,168 @@
+"""Sparql queries to get metadata about WIKI and RIF metadata.
+
+"""
+from string import Template
+from gn3.db.rdf import (BASE_CONTEXT, RDF_PREFIXES,
+                        query_frame_and_compact)
+
+
+WIKI_CONTEXT = BASE_CONTEXT | {
+    "foaf": "http://xmlns.com/foaf/0.1/",
+    "dct": "http://purl.org/dc/terms/",
+    "categories": "gnt:belongsToCategory",
+    "web_url": "foaf:homepage",
+    "version": "gnt:hasVersion",
+    "symbol": "rdfs:label",
+    "reason": "gnt:reason",
+    "species": "gnt:species",
+    "pubmed_ids": "dct:references",
+    "email": "foaf:mbox",
+    "initial": "gnt:initial",
+    "comment": "rdfs:comment",
+    "created": "dct:created",
+    "id": "dct:identifier",
+    # This points to the RDF Node which is the unique identifier
+    # for this triplet.  It's constructed using the comment-id and
+    # the comment-versionId
+    "wiki_identifier": "@id",
+}
+
+
+def get_wiki_entries_by_symbol(symbol: str, sparql_uri: str) -> dict:
+    """Fetch all the Wiki entries using the symbol"""
+    # This query uses a sub-query to fetch the latest comment by the
+    # version id.
+    query = Template("""
+$prefix
+
+CONSTRUCT {
+    ?uid rdfs:label ?symbolName;
+         gnt:reason ?reason ;
+         gnt:species ?species ;
+         dct:references ?pmid ;
+         foaf:homepage ?weburl ;
+         rdfs:comment ?comment ;
+         foaf:mbox ?email ;
+         gnt:initial ?usercode ;
+         gnt:belongsToCategory ?category ;
+         gnt:hasVersion ?versionId ;
+         dct:created ?created ;
+         dct:identifier ?identifier .
+} WHERE {
+    ?symbolId rdfs:label ?symbolName .
+    ?uid rdfs:comment ?comment ;
+         gnt:symbol ?symbolId ;
+         rdf:type gnc:GNWikiEntry ;
+         dct:created ?createTime .
+    FILTER ( LCASE(?symbolName) = LCASE('$symbol') ) .
+    {
+        SELECT (MAX(?vers) AS ?max) ?id_ WHERE {
+            ?symbolId rdfs:label ?symbolName .
+            ?uid dct:identifier ?id_ ;
+                 dct:hasVersion ?vers ;
+                 dct:identifier ?id_ ;
+                 gnt:symbol ?symbolId .
+            FILTER ( LCASE(?symbolName) = LCASE('$symbol') ) .
+        }
+    }
+    ?uid dct:hasVersion ?max ;
+         dct:identifier ?id_ .
+    OPTIONAL { ?uid gnt:reason ?reason } .
+    OPTIONAL {
+        ?uid gnt:belongsToSpecies ?speciesId .
+        ?speciesId gnt:shortName ?species .
+    } .
+    OPTIONAL { ?uid dct:references ?pubmedId . } .
+    OPTIONAL { ?uid foaf:homepage ?weburl . } .
+    OPTIONAL { ?uid gnt:initial ?usercode . } .
+    OPTIONAL { ?uid foaf:mbox ?email . } .
+    OPTIONAL { ?uid gnt:belongsToCategory ?category . } .
+    BIND (str(?version) AS ?versionId) .
+    BIND (str(?id_) AS ?identifier) .
+    BIND (str(?pubmedId) AS ?pmid) .
+    BIND (str(?createTime) AS ?created) .
+}
+""").substitute(prefix=RDF_PREFIXES, symbol=symbol,)
+    results = query_frame_and_compact(
+        query, WIKI_CONTEXT,
+        sparql_uri
+    )
+    data = results.get("data")
+    if not data:
+        return results
+    return results
+
+
+def get_comment_history(comment_id: int, sparql_uri: str) -> dict:
+    """Get all the historical data for a given id"""
+    query = Template("""
+$prefix
+
+CONSTRUCT {
+    ?uid rdfs:label ?symbolName ;
+         gnt:reason ?reason ;
+         gnt:species ?species ;
+         dct:references ?pmid ;
+         foaf:homepage ?weburl ;
+         rdfs:comment ?comment ;
+         foaf:mbox ?email ;
+         gnt:initial ?usercode ;
+         gnt:belongsToCategory ?category ;
+         gnt:hasVersion ?versionId ;
+         dct:created ?created .
+} WHERE {
+    ?symbolId rdfs:label ?symbolName .
+    ?uid rdf:type gnc:GNWikiEntry ;
+         rdfs:comment ?comment ;
+         gnt:symbol ?symbolId ;
+         dct:created ?createTime ;
+         dct:hasVersion ?version ;
+         dct:identifier $comment_id ;
+         dct:identifier ?id_ .
+    OPTIONAL { ?uid gnt:reason ?reason_ } .
+    OPTIONAL {
+        ?uid gnt:belongsToSpecies ?speciesId .
+        ?speciesId gnt:shortName ?species_ .
+    } .
+    OPTIONAL { ?uid dct:references ?pmid . } .
+    OPTIONAL { ?uid foaf:homepage ?weburl_ . } .
+    OPTIONAL { ?uid gnt:initial ?usercode_ . } .
+    OPTIONAL { ?uid foaf:mbox ?email_ . } .
+    OPTIONAL { ?uid gnt:belongsToCategory ?category_ . } .
+    BIND (str(?version) AS ?versionId) .
+    BIND (str(?createTime) AS ?created) .
+    BIND (COALESCE(?reason_, "") AS ?reason) .
+    BIND (COALESCE(?weburl_, "") AS ?weburl) .
+    BIND (COALESCE(?usercode_, "") AS ?usercode) .
+    BIND (COALESCE(?email_, "") AS ?email) .
+    BIND (COALESCE(?species_, "") AS ?species) .
+    BIND (COALESCE(?category_, "") AS ?category) .
+}
+""").substitute(prefix=RDF_PREFIXES, comment_id=comment_id)
+    results = query_frame_and_compact(
+        query, WIKI_CONTEXT,
+        sparql_uri
+    )
+    data = results.get("data")
+    for result in data:
+        categories = result.get("categories") or []
+        if categories and isinstance(categories, str):
+            result["categories"] = [categories]
+        pmids = result.get("pubmed_ids")
+        if pmids and isinstance(pmids, str):
+            result["pubmed_ids"] = [pmids]
+        elif pmids:
+            result["pubmed_ids"] = [int(pmid) for pmid in pmids]
+        else:
+            result["pubmed_ids"] = []
+        result["version"] = int(result["version"])
+
+    # We manually sort the array, since the SPARQL engine does not
+    # provide a guarantee that it will support an ORDER BY clause in a
+    # CONSTRUCT. Using ORDER BY on a solution sequence for a CONSTRUCT
+    # or DESCRIBE query has no direct effect because only SELECT
+    # returns a sequence of results.  See:
+    # <https://stackoverflow.com/questions/78186393>
+    # <https://www.w3.org/TR/rdf-sparql-query/#modOrderBy>
+    results["data"] = sorted(data, key=lambda d: d["version"], reverse=True)
+    return results
diff --git a/gn3/db/wiki.py b/gn3/db/wiki.py
index abb1644..7ef5e68 100644
--- a/gn3/db/wiki.py
+++ b/gn3/db/wiki.py
@@ -9,7 +9,7 @@ class MissingDBDataException(Exception):
     """Error due to DB missing some data"""
 
 
-def get_latest_comment(connection, comment_id: str) -> int:
+def get_latest_comment(connection, comment_id: int) -> int:
     """ Latest comment is one with the highest versionId """
     cursor = connection.cursor(DictCursor)
     query = """ SELECT versionId AS version, symbol, PubMed_ID AS pubmed_ids, sp.Name AS species,
@@ -19,7 +19,7 @@ def get_latest_comment(connection, comment_id: str) -> int:
 		WHERE gr.Id = %s
 		ORDER BY versionId DESC LIMIT 1;
     """
-    cursor.execute(query, (comment_id,))
+    cursor.execute(query, (str(comment_id),))
     result = cursor.fetchone()
     result["pubmed_ids"] = [x.strip() for x in result["pubmed_ids"].split()]
     categories_query = """
@@ -36,7 +36,8 @@ def get_latest_comment(connection, comment_id: str) -> int:
 
 def get_species_id(cursor, species_name: str) -> int:
     """Find species id given species `Name`"""
-    cursor.execute("SELECT SpeciesID from Species  WHERE Name = %s", (species_name,))
+    cursor.execute(
+        "SELECT SpeciesID from Species  WHERE Name = %s", (species_name,))
     species_ids = cursor.fetchall()
     if len(species_ids) != 1:
         raise MissingDBDataException(
@@ -52,7 +53,8 @@ def get_next_comment_version(cursor, comment_id: int) -> int:
     )
     latest_version = cursor.fetchone()[0]
     if latest_version is None:
-        raise MissingDBDataException(f"No comment found with comment_id={comment_id}")
+        raise MissingDBDataException(
+            f"No comment found with comment_id={comment_id}")
     return latest_version + 1
 
 
@@ -63,17 +65,22 @@ def get_categories_ids(cursor, categories: List[str]) -> List[int]:
     for category in set(categories):
         cat_id = dict_cats.get(category.strip())
         if cat_id is None:
-            raise MissingDBDataException(f"Category with Name={category} not found")
+            raise MissingDBDataException(
+                f"Category with Name={category} not found")
         category_ids.append(cat_id)
     return category_ids
 
+
 def get_categories(cursor) -> Dict[str, int]:
+    """Get all categories"""
     cursor.execute("SELECT Name, Id from GeneCategory")
     raw_categories = cursor.fetchall()
     dict_cats = dict(raw_categories)
     return dict_cats
 
+
 def get_species(cursor) -> Dict[str, str]:
+    """Get all species"""
     cursor.execute("SELECT Name, SpeciesName from Species")
     raw_species = cursor.fetchall()
     dict_cats = dict(raw_species)