diff options
Diffstat (limited to 'gn3/db/rdf.py')
-rw-r--r-- | gn3/db/rdf.py | 126 |
1 files changed, 89 insertions, 37 deletions
diff --git a/gn3/db/rdf.py b/gn3/db/rdf.py index eb4014a..5a95683 100644 --- a/gn3/db/rdf.py +++ b/gn3/db/rdf.py @@ -4,39 +4,12 @@ This module is a collection of functions that handle SPARQL queries. """ import json - +from string import Template from SPARQLWrapper import SPARQLWrapper from pyld import jsonld # type: ignore - - -PREFIXES = { - "dcat": "http://www.w3.org/ns/dcat#", - "dct": "http://purl.org/dc/terms/", - "ex": "http://example.org/stuff/1.0/", - "fabio": "http://purl.org/spar/fabio/", - "foaf": "http://xmlns.com/foaf/0.1/", - "generif": "http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=", - "genotype": "http://genenetwork.org/genotype/", - "gn": "http://genenetwork.org/id/", - "gnc": "http://genenetwork.org/category/", - "gnt": "http://genenetwork.org/term/", - "owl": "http://www.w3.org/2002/07/owl#", - "phenotype": "http://genenetwork.org/phenotype/", - "prism": "http://prismstandard.org/namespaces/basic/2.0/", - "publication": "http://genenetwork.org/publication/", - "pubmed": "http://rdf.ncbi.nlm.nih.gov/pubmed/", - "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", - "rdfs": "http://www.w3.org/2000/01/rdf-schema#", - "skos": "http://www.w3.org/2004/02/skos/core#", - "taxon": "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=", - "up": "http://purl.uniprot.org/core/", - "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#", - "xsd": "http://www.w3.org/2001/XMLSchema#", -} - - -RDF_PREFIXES = "\n".join([f"PREFIX {key}: <{value}>" - for key, value in PREFIXES.items()]) +from gn3.db.constants import ( + RDF_PREFIXES, BASE_CONTEXT +) def sparql_construct_query(query: str, endpoint: str) -> dict: @@ -51,22 +24,101 @@ def sparql_construct_query(query: str, endpoint: str) -> dict: def query_frame_and_compact(query: str, context: dict, endpoint: str) -> dict: """Frame and then compact the results given a context""" results = sparql_construct_query(query, endpoint) - if not results: - return {} return jsonld.compact(jsonld.frame(results, context), context) def query_and_compact(query: str, context: dict, endpoint: str) -> dict: """Compact the results given a context""" results = sparql_construct_query(query, endpoint) - if not results: - return {} return jsonld.compact(results, context) def query_and_frame(query: str, context: dict, endpoint: str) -> dict: """Frame the results given a context""" results = sparql_construct_query(query, endpoint) - if not results: - return {} return jsonld.frame(results, context) + + +def get_wiki_entries_by_symbol(symbol: str, sparql_uri: str) -> dict: + """Fetch all the Wiki entries using the symbol""" + # This query uses a sub-query to fetch the latest comment by the + # version id. + query = Template(""" +$prefix + +CONSTRUCT { + ?uid rdfs:label ?symbolName; + gnt:reason ?reason ; + gnt:species ?species ; + dct:references ?pmid ; + foaf:homepage ?weburl ; + rdfs:comment ?comment ; + foaf:mbox ?email ; + gnt:initial ?usercode ; + gnt:belongsToCategory ?category ; + gnt:hasVersion ?versionId ; + dct:created ?created ; + dct:identifier ?identifier . +} WHERE { + ?symbolId rdfs:label ?symbolName . + ?uid rdfs:comment ?comment ; + gnt:symbol ?symbolId ; + rdf:type gnc:GNWikiEntry ; + dct:created ?createTime . + FILTER ( LCASE(?symbolName) = LCASE('$symbol') ) . + { + SELECT (MAX(?vers) AS ?max) ?id_ WHERE { + ?symbolId rdfs:label ?symbolName . + ?uid dct:identifier ?id_ ; + dct:hasVersion ?vers ; + dct:identifier ?id_ ; + gnt:symbol ?symbolId . + FILTER ( LCASE(?symbolName) = LCASE('$symbol') ) . + } + } + ?uid dct:hasVersion ?max ; + dct:identifier ?id_ . + OPTIONAL { ?uid gnt:reason ?reason } . + OPTIONAL { + ?uid gnt:belongsToSpecies ?speciesId . + ?speciesId gnt:shortName ?species . + } . + OPTIONAL { ?uid dct:references ?pubmedId . } . + OPTIONAL { ?uid foaf:homepage ?weburl . } . + OPTIONAL { ?uid gnt:initial ?usercode . } . + OPTIONAL { ?uid gnt:mbox ?email . } . + OPTIONAL { ?uid gnt:belongsToCategory ?category . } . + BIND (str(?version) AS ?versionId) . + BIND (str(?id_) AS ?identifier) . + BIND (str(?pubmedId) AS ?pmid) . + BIND (str(?createTime) AS ?created) . +} +""").substitute(prefix=RDF_PREFIXES, symbol=symbol,) + context = BASE_CONTEXT | { + "foaf": "http://xmlns.com/foaf/0.1/", + "dct": "http://purl.org/dc/terms/", + "categories": "gnt:belongsToCategory", + "web_url": "foaf:homepage", + "version": "gnt:hasVersion", + "symbol": "rdfs:label", + "reason": "gnt:reason", + "species": "gnt:species", + "pubmed_id": "dct:references", + "email": "foaf:mbox", + "initial": "gnt:initial", + "comment": "rdfs:comment", + "created": "dct:created", + "id": "dct:identifier", + # This points to the RDF Node which is the unique identifier + # for this triplet. It's constructed using the comment-id and + # the comment-versionId + "wiki_identifier": "@id", + } + results = query_frame_and_compact( + query, context, + sparql_uri + ) + data = results.get("data") + if not data: + return results + return results |