diff options
Diffstat (limited to 'gn3/db/rdf/__init__.py')
-rw-r--r-- | gn3/db/rdf/__init__.py | 188 |
1 files changed, 188 insertions, 0 deletions
diff --git a/gn3/db/rdf/__init__.py b/gn3/db/rdf/__init__.py new file mode 100644 index 0000000..c763810 --- /dev/null +++ b/gn3/db/rdf/__init__.py @@ -0,0 +1,188 @@ +"""RDF + +Constants for prefixes and contexts; and wrapper functions around +creating contexts to be used by jsonld when framing and/or compacting. + +""" +import json + +from SPARQLWrapper import SPARQLWrapper +from pyld import jsonld # type: ignore + + +PREFIXES = { + "dcat": "http://www.w3.org/ns/dcat#", + "dct": "http://purl.org/dc/terms/", + "ex": "http://example.org/stuff/1.0/", + "fabio": "http://purl.org/spar/fabio/", + "foaf": "http://xmlns.com/foaf/0.1/", + "generif": "http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=", + "genotype": "http://genenetwork.org/genotype/", + "gn": "http://genenetwork.org/id/", + "gnc": "http://genenetwork.org/category/", + "gnt": "http://genenetwork.org/term/", + "owl": "http://www.w3.org/2002/07/owl#", + "phenotype": "http://genenetwork.org/phenotype/", + "prism": "http://prismstandard.org/namespaces/basic/2.0/", + "publication": "http://genenetwork.org/publication/", + "pubmed": "http://rdf.ncbi.nlm.nih.gov/pubmed/", + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "skos": "http://www.w3.org/2004/02/skos/core#", + "taxon": "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=", + "up": "http://purl.uniprot.org/core/", + "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#", + "xsd": "http://www.w3.org/2001/XMLSchema#", +} + +RDF_PREFIXES = "\n".join([f"PREFIX {key}: <{value}>" + for key, value in PREFIXES.items()]) + +BASE_CONTEXT = { + "data": "@graph", + "type": "@type", + "gn": "http://genenetwork.org/id/", + "gnc": "http://genenetwork.org/category/", + "gnt": "http://genenetwork.org/term/", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#>", +} + +DATASET_CONTEXT = { + "accessRights": "dct:accessRights", + "accessionId": "dct:identifier", + "acknowledgement": "gnt:hasAcknowledgement", + "altLabel": "skos:altLabel", + "caseInfo": "gnt:hasCaseInfo", + "classifiedUnder": "xkos:classifiedUnder", + "contributors": "dct:creator", + "contactPoint": "dcat:contactPoint", + "created": "dct:created", + "dcat": "http://www.w3.org/ns/dcat#", + "dct": "http://purl.org/dc/terms/", + "description": "dct:description", + "ex": "http://example.org/stuff/1.0/", + "experimentDesignInfo": "gnt:hasExperimentDesignInfo", + "experimentType": "gnt:hasExperimentType", + "foaf": "http://xmlns.com/foaf/0.1/", + "geoSeriesId": "gnt:hasGeoSeriesId", + "gnt": "http://genenetwork.org/term/", + "inbredSet": "gnt:belongsToGroup", + "label": "rdfs:label", + "normalization": "gnt:usesNormalization", + "platformInfo": "gnt:hasPlatformInfo", + "notes": "gnt:hasNotes", + "organization": "foaf:Organization", + "prefLabel": "skos:prefLabel", + "citation": "dct:isReferencedBy", + "GoTree": "gnt:hasGOTreeValue", + "platform": "gnt:usesPlatform", + "processingInfo": "gnt:hasDataProcessingInfo", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "skos": "http://www.w3.org/2004/02/skos/core#", + "specifics": "gnt:hasContentInfo", + "title": "dct:title", + "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#", + "tissueInfo": "gnt:hasTissueInfo", + "tissue": "gnt:hasTissue", + "contactWebUrl": "foaf:homepage", + "contactName": "foaf:name", +} + +SEARCH_CONTEXT = { + "pages": "ex:pages", + "hits": "ex:hits", + "result": "ex:result", + "results": "ex:items", + "resultItem": "ex:resultType", + "currentPage": "ex:currentPage", +} + +DATASET_SEARCH_CONTEXT = SEARCH_CONTEXT | { + "classifiedUnder": "xkos:classifiedUnder", + "created": "dct:created", + "dct": "http://purl.org/dc/terms/", + "ex": "http://example.org/stuff/1.0/", + "inbredSet": "ex:belongsToInbredSet", + "title": "dct:title", + "name": "rdfs:label", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "type": "@type", + "xkos": "http://rdf-vocabulary.ddialliance.org/xkos#", +} + +PUBLICATION_CONTEXT = { + "dct": "http://purl.org/dc/terms/", + "fabio": "http://purl.org/spar/fabio/", + "prism": "http://prismstandard.org/namespaces/basic/2.0/", + "xsd": "http://www.w3.org/2001/XMLSchema#", + "title": "dct:title", + "journal": "fabio:Journal", + "volume": "prism:volume", + "page": "fabio:page", + "creator": "dct:creator", + "abstract": "dct:abstract", + "year": { + "@id": "fabio:hasPublicationYear", + "@type": "xsd:gYear", + }, + "month": { + "@id": "prism:publicationDate", + "@type": "xsd:gMonth" + }, +} + +PHENOTYPE_CONTEXT = BASE_CONTEXT | PUBLICATION_CONTEXT | { + "skos": "http://www.w3.org/2004/02/skos/core#", + "dcat": "http://www.w3.org/ns/dcat#", + "prism": "http://prismstandard.org/namespaces/basic/2.0/", + "traitName": "skos:altLabel", + "trait": "rdfs:label", + "altName": "rdfs:altLabel", + "description": "dct:description", + "abbreviation": "gnt:abbreviation", + "labCode": "gnt:labCode", + "submitter": "gnt:submitter", + "dataset": "dcat:Distribution", + "contributor": "dct:contributor", + "mean": "gnt:mean", + "locus": "gnt:locus", + "lodScore": "gnt:lodScore", + "references": "dct:isReferencedBy", + "additive": "gnt:additive", + "sequence": "gnt:sequence", + "prefLabel": "skos:prefLabel", + "identifier": "dct:identifier", + "chromosome": "gnt:chr", + "mb": "gnt:mb", + "peakLocation": "gnt:locus", + "species": "gnt:belongsToSpecies", + "group": "gnt:belongsToGroup", +} + + +def sparql_construct_query(query: str, endpoint: str) -> dict: + """Query virtuoso using a CONSTRUCT query and return a json-ld + dictionary""" + sparql = SPARQLWrapper(endpoint) + sparql.setQuery(query) + results = sparql.queryAndConvert() + return json.loads(results.serialize(format="json-ld")) # type: ignore + + +def query_frame_and_compact(query: str, context: dict, endpoint: str) -> dict: + """Frame and then compact the results given a context""" + results = sparql_construct_query(query, endpoint) + return jsonld.compact(jsonld.frame(results, context), context) + + +def query_and_compact(query: str, context: dict, endpoint: str) -> dict: + """Compact the results given a context""" + results = sparql_construct_query(query, endpoint) + return jsonld.compact(results, context) + + +def query_and_frame(query: str, context: dict, endpoint: str) -> dict: + """Frame the results given a context""" + results = sparql_construct_query(query, endpoint) + return jsonld.frame(results, context) |