diff options
author | Munyoki Kilyungi | 2024-08-29 22:32:35 +0300 |
---|---|---|
committer | BonfaceKilz | 2024-09-05 16:39:14 +0300 |
commit | ea1880d48734b271172497fc205ba7f28706ba2a (patch) | |
tree | 10772578bf996c6746d7ac86010c7702e8dd68e9 /gn3/db/rdf | |
parent | 1cb8b2c3b242522461f7db98008a9e7e882bee9a (diff) | |
download | genenetwork3-ea1880d48734b271172497fc205ba7f28706ba2a.tar.gz |
Restructure RDF module.
* gn3/api/metadata.py: Import constants from gn3.db.rdf
* gn3/api/metadata_api/wiki.py: Ditto. Import
"get_wiki_entries_by_symbol" from gn3.db.rdf.wiki.
* gn3/db/constants.py: Delete file and move all constants ...
* gn3/db/rdf.py: ... and functions ...
* gn3/db/rdf/__init__.py: ... here.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'gn3/db/rdf')
-rw-r--r-- | gn3/db/rdf/__init__.py | 188 | ||||
-rw-r--r-- | gn3/db/rdf/wiki.py | 91 |
2 files changed, 279 insertions, 0 deletions
diff --git a/gn3/db/rdf/__init__.py b/gn3/db/rdf/__init__.py new file mode 100644 index 0000000..c763810 --- /dev/null +++ b/gn3/db/rdf/__init__.py @@ -0,0 +1,188 @@ +"""RDF + +Constants for prefixes and contexts; and wrapper functions around +creating contexts to be used by jsonld when framing and/or compacting. + +""" +import json + +from SPARQLWrapper import SPARQLWrapper +from pyld import jsonld # type: ignore + + +PREFIXES = { + "dcat": "http://www.w3.org/ns/dcat#", + "dct": "http://purl.org/dc/terms/", + "ex": "http://example.org/stuff/1.0/", + "fabio": "http://purl.org/spar/fabio/", + "foaf": "http://xmlns.com/foaf/0.1/", + "generif": "http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=", + "genotype": "http://genenetwork.org/genotype/", + "gn": "http://genenetwork.org/id/", + "gnc": "http://genenetwork.org/category/", + "gnt": "http://genenetwork.org/term/", + "owl": "http://www.w3.org/2002/07/owl#", + "phenotype": "http://genenetwork.org/phenotype/", + "prism": "http://prismstandard.org/namespaces/basic/2.0/", + "publication": "http://genenetwork.org/publication/", + "pubmed": "http://rdf.ncbi.nlm.nih.gov/pubmed/", + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "skos": "http://www.w3.org/2004/02/skos/core#", + "taxon": "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=", + "up": "http://purl.uniprot.org/core/", + "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#", + "xsd": "http://www.w3.org/2001/XMLSchema#", +} + +RDF_PREFIXES = "\n".join([f"PREFIX {key}: <{value}>" + for key, value in PREFIXES.items()]) + +BASE_CONTEXT = { + "data": "@graph", + "type": "@type", + "gn": "http://genenetwork.org/id/", + "gnc": "http://genenetwork.org/category/", + "gnt": "http://genenetwork.org/term/", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#>", +} + +DATASET_CONTEXT = { + "accessRights": "dct:accessRights", + "accessionId": "dct:identifier", + "acknowledgement": "gnt:hasAcknowledgement", + "altLabel": "skos:altLabel", + "caseInfo": "gnt:hasCaseInfo", + "classifiedUnder": "xkos:classifiedUnder", + "contributors": "dct:creator", + "contactPoint": "dcat:contactPoint", + "created": "dct:created", + "dcat": "http://www.w3.org/ns/dcat#", + "dct": "http://purl.org/dc/terms/", + "description": "dct:description", + "ex": "http://example.org/stuff/1.0/", + "experimentDesignInfo": "gnt:hasExperimentDesignInfo", + "experimentType": "gnt:hasExperimentType", + "foaf": "http://xmlns.com/foaf/0.1/", + "geoSeriesId": "gnt:hasGeoSeriesId", + "gnt": "http://genenetwork.org/term/", + "inbredSet": "gnt:belongsToGroup", + "label": "rdfs:label", + "normalization": "gnt:usesNormalization", + "platformInfo": "gnt:hasPlatformInfo", + "notes": "gnt:hasNotes", + "organization": "foaf:Organization", + "prefLabel": "skos:prefLabel", + "citation": "dct:isReferencedBy", + "GoTree": "gnt:hasGOTreeValue", + "platform": "gnt:usesPlatform", + "processingInfo": "gnt:hasDataProcessingInfo", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "skos": "http://www.w3.org/2004/02/skos/core#", + "specifics": "gnt:hasContentInfo", + "title": "dct:title", + "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#", + "tissueInfo": "gnt:hasTissueInfo", + "tissue": "gnt:hasTissue", + "contactWebUrl": "foaf:homepage", + "contactName": "foaf:name", +} + +SEARCH_CONTEXT = { + "pages": "ex:pages", + "hits": "ex:hits", + "result": "ex:result", + "results": "ex:items", + "resultItem": "ex:resultType", + "currentPage": "ex:currentPage", +} + +DATASET_SEARCH_CONTEXT = SEARCH_CONTEXT | { + "classifiedUnder": "xkos:classifiedUnder", + "created": "dct:created", + "dct": "http://purl.org/dc/terms/", + "ex": "http://example.org/stuff/1.0/", + "inbredSet": "ex:belongsToInbredSet", + "title": "dct:title", + "name": "rdfs:label", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "type": "@type", + "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#", +} + +PUBLICATION_CONTEXT = { + "dct": "http://purl.org/dc/terms/", + "fabio": "http://purl.org/spar/fabio/", + "prism": "http://prismstandard.org/namespaces/basic/2.0/", + "xsd": "http://www.w3.org/2001/XMLSchema#", + "title": "dct:title", + "journal": "fabio:Journal", + "volume": "prism:volume", + "page": "fabio:page", + "creator": "dct:creator", + "abstract": "dct:abstract", + "year": { + "@id": "fabio:hasPublicationYear", + "@type": "xsd:gYear", + }, + "month": { + "@id": "prism:publicationDate", + "@type": "xsd:gMonth" + }, +} + +PHENOTYPE_CONTEXT = BASE_CONTEXT | PUBLICATION_CONTEXT | { + "skos": "http://www.w3.org/2004/02/skos/core#", + "dcat": "http://www.w3.org/ns/dcat#", + "prism": "http://prismstandard.org/namespaces/basic/2.0/", + "traitName": "skos:altLabel", + "trait": "rdfs:label", + "altName": "rdfs:altLabel", + "description": "dct:description", + "abbreviation": "gnt:abbreviation", + "labCode": "gnt:labCode", + "submitter": "gnt:submitter", + "dataset": "dcat:Distribution", + "contributor": "dct:contributor", + "mean": "gnt:mean", + "locus": "gnt:locus", + "lodScore": "gnt:lodScore", + "references": "dct:isReferencedBy", + "additive": "gnt:additive", + "sequence": "gnt:sequence", + "prefLabel": "skos:prefLabel", + "identifier": "dct:identifier", + "chromosome": "gnt:chr", + "mb": "gnt:mb", + "peakLocation": "gnt:locus", + "species": "gnt:belongsToSpecies", + "group": "gnt:belongsToGroup", +} + + +def sparql_construct_query(query: str, endpoint: str) -> dict: + """Query virtuoso using a CONSTRUCT query and return a json-ld + dictionary""" + sparql = SPARQLWrapper(endpoint) + sparql.setQuery(query) + results = sparql.queryAndConvert() + return json.loads(results.serialize(format="json-ld")) # type: ignore + + +def query_frame_and_compact(query: str, context: dict, endpoint: str) -> dict: + """Frame and then compact the results given a context""" + results = sparql_construct_query(query, endpoint) + return jsonld.compact(jsonld.frame(results, context), context) + + +def query_and_compact(query: str, context: dict, endpoint: str) -> dict: + """Compact the results given a context""" + results = sparql_construct_query(query, endpoint) + return jsonld.compact(results, context) + + +def query_and_frame(query: str, context: dict, endpoint: str) -> dict: + """Frame the results given a context""" + results = sparql_construct_query(query, endpoint) + return jsonld.frame(results, context) diff --git a/gn3/db/rdf/wiki.py b/gn3/db/rdf/wiki.py new file mode 100644 index 0000000..1fc3130 --- /dev/null +++ b/gn3/db/rdf/wiki.py @@ -0,0 +1,91 @@ +"""Sparql queries to get metadata about WIKI and RIF metadata. + +""" +from string import Template +from gn3.db.rdf import (BASE_CONTEXT, RDF_PREFIXES, + query_frame_and_compact) + + +def get_wiki_entries_by_symbol(symbol: str, sparql_uri: str) -> dict: + """Fetch all the Wiki entries using the symbol""" + # This query uses a sub-query to fetch the latest comment by the + # version id. + query = Template(""" +$prefix + +CONSTRUCT { + ?uid rdfs:label ?symbolName; + gnt:reason ?reason ; + gnt:species ?species ; + dct:references ?pmid ; + foaf:homepage ?weburl ; + rdfs:comment ?comment ; + foaf:mbox ?email ; + gnt:initial ?usercode ; + gnt:belongsToCategory ?category ; + gnt:hasVersion ?versionId ; + dct:created ?created ; + dct:identifier ?identifier . +} WHERE { + ?symbolId rdfs:label ?symbolName . + ?uid rdfs:comment ?comment ; + gnt:symbol ?symbolId ; + rdf:type gnc:GNWikiEntry ; + dct:created ?createTime . + FILTER ( LCASE(?symbolName) = LCASE('$symbol') ) . + { + SELECT (MAX(?vers) AS ?max) ?id_ WHERE { + ?symbolId rdfs:label ?symbolName . + ?uid dct:identifier ?id_ ; + dct:hasVersion ?vers ; + dct:identifier ?id_ ; + gnt:symbol ?symbolId . + FILTER ( LCASE(?symbolName) = LCASE('$symbol') ) . + } + } + ?uid dct:hasVersion ?max ; + dct:identifier ?id_ . + OPTIONAL { ?uid gnt:reason ?reason } . + OPTIONAL { + ?uid gnt:belongsToSpecies ?speciesId . + ?speciesId gnt:shortName ?species . + } . + OPTIONAL { ?uid dct:references ?pubmedId . } . + OPTIONAL { ?uid foaf:homepage ?weburl . } . + OPTIONAL { ?uid gnt:initial ?usercode . } . + OPTIONAL { ?uid gnt:mbox ?email . } . + OPTIONAL { ?uid gnt:belongsToCategory ?category . } . + BIND (str(?version) AS ?versionId) . + BIND (str(?id_) AS ?identifier) . + BIND (str(?pubmedId) AS ?pmid) . + BIND (str(?createTime) AS ?created) . +} +""").substitute(prefix=RDF_PREFIXES, symbol=symbol,) + context = BASE_CONTEXT | { + "foaf": "http://xmlns.com/foaf/0.1/", + "dct": "http://purl.org/dc/terms/", + "categories": "gnt:belongsToCategory", + "web_url": "foaf:homepage", + "version": "gnt:hasVersion", + "symbol": "rdfs:label", + "reason": "gnt:reason", + "species": "gnt:species", + "pubmed_id": "dct:references", + "email": "foaf:mbox", + "initial": "gnt:initial", + "comment": "rdfs:comment", + "created": "dct:created", + "id": "dct:identifier", + # This points to the RDF Node which is the unique identifier + # for this triplet. It's constructed using the comment-id and + # the comment-versionId + "wiki_identifier": "@id", + } + results = query_frame_and_compact( + query, context, + sparql_uri + ) + data = results.get("data") + if not data: + return results + return results |