aboutsummaryrefslogtreecommitdiff
path: root/gn3/db/rdf
diff options
context:
space:
mode:
authorMunyoki Kilyungi2024-08-29 22:32:35 +0300
committerBonfaceKilz2024-09-05 16:39:14 +0300
commitea1880d48734b271172497fc205ba7f28706ba2a (patch)
tree10772578bf996c6746d7ac86010c7702e8dd68e9 /gn3/db/rdf
parent1cb8b2c3b242522461f7db98008a9e7e882bee9a (diff)
downloadgenenetwork3-ea1880d48734b271172497fc205ba7f28706ba2a.tar.gz
Restructure RDF module.
* gn3/api/metadata.py: Import constants from gn3.db.rdf * gn3/api/metadata_api/wiki.py: Ditto. Import "get_wiki_entries_by_symbol" from gn3.db.rdf.wiki. * gn3/db/constants.py: Delete file and move all constants ... * gn3/db/rdf.py: ... and functions ... * gn3/db/rdf/__init__.py: ... here. Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'gn3/db/rdf')
-rw-r--r--gn3/db/rdf/__init__.py188
-rw-r--r--gn3/db/rdf/wiki.py91
2 files changed, 279 insertions, 0 deletions
diff --git a/gn3/db/rdf/__init__.py b/gn3/db/rdf/__init__.py
new file mode 100644
index 0000000..c763810
--- /dev/null
+++ b/gn3/db/rdf/__init__.py
@@ -0,0 +1,188 @@
+"""RDF
+
+Constants for prefixes and contexts; and wrapper functions around
+creating contexts to be used by jsonld when framing and/or compacting.
+
+"""
+import json
+
+from SPARQLWrapper import SPARQLWrapper
+from pyld import jsonld # type: ignore
+
+
+PREFIXES = {
+ "dcat": "http://www.w3.org/ns/dcat#",
+ "dct": "http://purl.org/dc/terms/",
+ "ex": "http://example.org/stuff/1.0/",
+ "fabio": "http://purl.org/spar/fabio/",
+ "foaf": "http://xmlns.com/foaf/0.1/",
+ "generif": "http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=",
+ "genotype": "http://genenetwork.org/genotype/",
+ "gn": "http://genenetwork.org/id/",
+ "gnc": "http://genenetwork.org/category/",
+ "gnt": "http://genenetwork.org/term/",
+ "owl": "http://www.w3.org/2002/07/owl#",
+ "phenotype": "http://genenetwork.org/phenotype/",
+ "prism": "http://prismstandard.org/namespaces/basic/2.0/",
+ "publication": "http://genenetwork.org/publication/",
+ "pubmed": "http://rdf.ncbi.nlm.nih.gov/pubmed/",
+ "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
+ "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
+ "skos": "http://www.w3.org/2004/02/skos/core#",
+ "taxon": "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=",
+ "up": "http://purl.uniprot.org/core/",
+ "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#",
+ "xsd": "http://www.w3.org/2001/XMLSchema#",
+}
+
+RDF_PREFIXES = "\n".join([f"PREFIX {key}: <{value}>"
+ for key, value in PREFIXES.items()])
+
+BASE_CONTEXT = {
+ "data": "@graph",
+ "type": "@type",
+ "gn": "http://genenetwork.org/id/",
+ "gnc": "http://genenetwork.org/category/",
+ "gnt": "http://genenetwork.org/term/",
+ "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
+ "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#>",
+}
+
+DATASET_CONTEXT = {
+ "accessRights": "dct:accessRights",
+ "accessionId": "dct:identifier",
+ "acknowledgement": "gnt:hasAcknowledgement",
+ "altLabel": "skos:altLabel",
+ "caseInfo": "gnt:hasCaseInfo",
+ "classifiedUnder": "xkos:classifiedUnder",
+ "contributors": "dct:creator",
+ "contactPoint": "dcat:contactPoint",
+ "created": "dct:created",
+ "dcat": "http://www.w3.org/ns/dcat#",
+ "dct": "http://purl.org/dc/terms/",
+ "description": "dct:description",
+ "ex": "http://example.org/stuff/1.0/",
+ "experimentDesignInfo": "gnt:hasExperimentDesignInfo",
+ "experimentType": "gnt:hasExperimentType",
+ "foaf": "http://xmlns.com/foaf/0.1/",
+ "geoSeriesId": "gnt:hasGeoSeriesId",
+ "gnt": "http://genenetwork.org/term/",
+ "inbredSet": "gnt:belongsToGroup",
+ "label": "rdfs:label",
+ "normalization": "gnt:usesNormalization",
+ "platformInfo": "gnt:hasPlatformInfo",
+ "notes": "gnt:hasNotes",
+ "organization": "foaf:Organization",
+ "prefLabel": "skos:prefLabel",
+ "citation": "dct:isReferencedBy",
+ "GoTree": "gnt:hasGOTreeValue",
+ "platform": "gnt:usesPlatform",
+ "processingInfo": "gnt:hasDataProcessingInfo",
+ "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
+ "skos": "http://www.w3.org/2004/02/skos/core#",
+ "specifics": "gnt:hasContentInfo",
+ "title": "dct:title",
+ "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#",
+ "tissueInfo": "gnt:hasTissueInfo",
+ "tissue": "gnt:hasTissue",
+ "contactWebUrl": "foaf:homepage",
+ "contactName": "foaf:name",
+}
+
+SEARCH_CONTEXT = {
+ "pages": "ex:pages",
+ "hits": "ex:hits",
+ "result": "ex:result",
+ "results": "ex:items",
+ "resultItem": "ex:resultType",
+ "currentPage": "ex:currentPage",
+}
+
+DATASET_SEARCH_CONTEXT = SEARCH_CONTEXT | {
+ "classifiedUnder": "xkos:classifiedUnder",
+ "created": "dct:created",
+ "dct": "http://purl.org/dc/terms/",
+ "ex": "http://example.org/stuff/1.0/",
+ "inbredSet": "ex:belongsToInbredSet",
+ "title": "dct:title",
+ "name": "rdfs:label",
+ "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
+ "type": "@type",
+ "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#",
+}
+
+PUBLICATION_CONTEXT = {
+ "dct": "http://purl.org/dc/terms/",
+ "fabio": "http://purl.org/spar/fabio/",
+ "prism": "http://prismstandard.org/namespaces/basic/2.0/",
+ "xsd": "http://www.w3.org/2001/XMLSchema#",
+ "title": "dct:title",
+ "journal": "fabio:Journal",
+ "volume": "prism:volume",
+ "page": "fabio:page",
+ "creator": "dct:creator",
+ "abstract": "dct:abstract",
+ "year": {
+ "@id": "fabio:hasPublicationYear",
+ "@type": "xsd:gYear",
+ },
+ "month": {
+ "@id": "prism:publicationDate",
+ "@type": "xsd:gMonth"
+ },
+}
+
+PHENOTYPE_CONTEXT = BASE_CONTEXT | PUBLICATION_CONTEXT | {
+ "skos": "http://www.w3.org/2004/02/skos/core#",
+ "dcat": "http://www.w3.org/ns/dcat#",
+ "prism": "http://prismstandard.org/namespaces/basic/2.0/",
+ "traitName": "skos:altLabel",
+ "trait": "rdfs:label",
+ "altName": "rdfs:altLabel",
+ "description": "dct:description",
+ "abbreviation": "gnt:abbreviation",
+ "labCode": "gnt:labCode",
+ "submitter": "gnt:submitter",
+ "dataset": "dcat:Distribution",
+ "contributor": "dct:contributor",
+ "mean": "gnt:mean",
+ "locus": "gnt:locus",
+ "lodScore": "gnt:lodScore",
+ "references": "dct:isReferencedBy",
+ "additive": "gnt:additive",
+ "sequence": "gnt:sequence",
+ "prefLabel": "skos:prefLabel",
+ "identifier": "dct:identifier",
+ "chromosome": "gnt:chr",
+ "mb": "gnt:mb",
+ "peakLocation": "gnt:locus",
+ "species": "gnt:belongsToSpecies",
+ "group": "gnt:belongsToGroup",
+}
+
+
+def sparql_construct_query(query: str, endpoint: str) -> dict:
+ """Query virtuoso using a CONSTRUCT query and return a json-ld
+ dictionary"""
+ sparql = SPARQLWrapper(endpoint)
+ sparql.setQuery(query)
+ results = sparql.queryAndConvert()
+ return json.loads(results.serialize(format="json-ld")) # type: ignore
+
+
+def query_frame_and_compact(query: str, context: dict, endpoint: str) -> dict:
+ """Frame and then compact the results given a context"""
+ results = sparql_construct_query(query, endpoint)
+ return jsonld.compact(jsonld.frame(results, context), context)
+
+
+def query_and_compact(query: str, context: dict, endpoint: str) -> dict:
+ """Compact the results given a context"""
+ results = sparql_construct_query(query, endpoint)
+ return jsonld.compact(results, context)
+
+
+def query_and_frame(query: str, context: dict, endpoint: str) -> dict:
+ """Frame the results given a context"""
+ results = sparql_construct_query(query, endpoint)
+ return jsonld.frame(results, context)
diff --git a/gn3/db/rdf/wiki.py b/gn3/db/rdf/wiki.py
new file mode 100644
index 0000000..1fc3130
--- /dev/null
+++ b/gn3/db/rdf/wiki.py
@@ -0,0 +1,91 @@
+"""Sparql queries to get metadata about WIKI and RIF metadata.
+
+"""
+from string import Template
+from gn3.db.rdf import (BASE_CONTEXT, RDF_PREFIXES,
+ query_frame_and_compact)
+
+
+def get_wiki_entries_by_symbol(symbol: str, sparql_uri: str) -> dict:
+ """Fetch all the Wiki entries using the symbol"""
+ # This query uses a sub-query to fetch the latest comment by the
+ # version id.
+ query = Template("""
+$prefix
+
+CONSTRUCT {
+ ?uid rdfs:label ?symbolName;
+ gnt:reason ?reason ;
+ gnt:species ?species ;
+ dct:references ?pmid ;
+ foaf:homepage ?weburl ;
+ rdfs:comment ?comment ;
+ foaf:mbox ?email ;
+ gnt:initial ?usercode ;
+ gnt:belongsToCategory ?category ;
+ gnt:hasVersion ?versionId ;
+ dct:created ?created ;
+ dct:identifier ?identifier .
+} WHERE {
+ ?symbolId rdfs:label ?symbolName .
+ ?uid rdfs:comment ?comment ;
+ gnt:symbol ?symbolId ;
+ rdf:type gnc:GNWikiEntry ;
+ dct:created ?createTime .
+ FILTER ( LCASE(?symbolName) = LCASE('$symbol') ) .
+ {
+ SELECT (MAX(?vers) AS ?max) ?id_ WHERE {
+ ?symbolId rdfs:label ?symbolName .
+ ?uid dct:identifier ?id_ ;
+ dct:hasVersion ?vers ;
+ dct:identifier ?id_ ;
+ gnt:symbol ?symbolId .
+ FILTER ( LCASE(?symbolName) = LCASE('$symbol') ) .
+ }
+ }
+ ?uid dct:hasVersion ?max ;
+ dct:identifier ?id_ .
+ OPTIONAL { ?uid gnt:reason ?reason } .
+ OPTIONAL {
+ ?uid gnt:belongsToSpecies ?speciesId .
+ ?speciesId gnt:shortName ?species .
+ } .
+ OPTIONAL { ?uid dct:references ?pubmedId . } .
+ OPTIONAL { ?uid foaf:homepage ?weburl . } .
+ OPTIONAL { ?uid gnt:initial ?usercode . } .
+ OPTIONAL { ?uid gnt:mbox ?email . } .
+ OPTIONAL { ?uid gnt:belongsToCategory ?category . } .
+ BIND (str(?version) AS ?versionId) .
+ BIND (str(?id_) AS ?identifier) .
+ BIND (str(?pubmedId) AS ?pmid) .
+ BIND (str(?createTime) AS ?created) .
+}
+""").substitute(prefix=RDF_PREFIXES, symbol=symbol,)
+ context = BASE_CONTEXT | {
+ "foaf": "http://xmlns.com/foaf/0.1/",
+ "dct": "http://purl.org/dc/terms/",
+ "categories": "gnt:belongsToCategory",
+ "web_url": "foaf:homepage",
+ "version": "gnt:hasVersion",
+ "symbol": "rdfs:label",
+ "reason": "gnt:reason",
+ "species": "gnt:species",
+ "pubmed_id": "dct:references",
+ "email": "foaf:mbox",
+ "initial": "gnt:initial",
+ "comment": "rdfs:comment",
+ "created": "dct:created",
+ "id": "dct:identifier",
+ # This points to the RDF Node which is the unique identifier
+ # for this triplet. It's constructed using the comment-id and
+ # the comment-versionId
+ "wiki_identifier": "@id",
+ }
+ results = query_frame_and_compact(
+ query, context,
+ sparql_uri
+ )
+ data = results.get("data")
+ if not data:
+ return results
+ return results