aboutsummaryrefslogtreecommitdiff
path: root/gn3
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-10-11 17:28:09 +0300
committerBonfaceKilz2023-10-27 13:45:32 +0300
commit50349f7e067bd894d4aa4865ded5589ee1fcd9e9 (patch)
treeaf219a41a281e565257925754b78fdcd01116639 /gn3
parent5e17b469b29419098eeb81a5537426f0d0b26aec (diff)
downloadgenenetwork3-50349f7e067bd894d4aa4865ded5589ee1fcd9e9.tar.gz
Implement "GET /metadata/datasets/:name".
* gn3/api/metadata.py: Import json, SPARQLWrapper.{JSON, JSONLD}. (dataset): Rename this to ... (datasets): ... this. Return a well formatted JSONLD result from a dataset. Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'gn3')
-rw-r--r--gn3/api/metadata.py109
-rw-r--r--gn3/db/rdf.py42
2 files changed, 100 insertions, 51 deletions
diff --git a/gn3/api/metadata.py b/gn3/api/metadata.py
index df3c4a9..877ff7c 100644
--- a/gn3/api/metadata.py
+++ b/gn3/api/metadata.py
@@ -1,4 +1,6 @@
"""API for fetching metadata using an API"""
+import json
+
from string import Template
from http.client import RemoteDisconnected
from urllib.error import URLError
@@ -6,29 +8,118 @@ from flask import Blueprint
from flask import jsonify
from flask import current_app
-from SPARQLWrapper import SPARQLWrapper
+from pyld import jsonld
+from SPARQLWrapper import JSON, JSONLD, SPARQLWrapper
from gn3.db.rdf import get_dataset_metadata
from gn3.db.rdf import get_publication_metadata
from gn3.db.rdf import get_phenotype_metadata
from gn3.db.rdf import get_genotype_metadata
from gn3.db.rdf import sparql_query
-from gn3.db.rdf import RDF_PREFIXES
+from gn3.db.rdf import RDF_PREFIXES, PREFIXES
metadata = Blueprint("metadata", __name__)
-@metadata.route("/dataset/<name>", methods=["GET"])
-def dataset(name):
+@metadata.route("/datasets/<name>", methods=["GET"])
+def datasets(name):
"""Fetch a dataset's metadata given it's ACCESSION_ID or NAME"""
try:
- return jsonify(
- get_dataset_metadata(
- SPARQLWrapper(current_app.config.get("SPARQL_ENDPOINT")),
- name,
- ).data
+ sparql = SPARQLWrapper(current_app.config.get("SPARQL_ENDPOINT"))
+ sparql.setQuery(Template("""
+$prefix
+
+CONSTRUCT {
+ ?dataset ?predicate ?term ;
+ rdf:type dcat:Dataset ;
+ ex:belongsToInbredSet ?inbredSetName ;
+ gnt:usesNormalization ?normalizationLabel ;
+ dcat:contactPoint ?investigatorName ;
+ xkos:classifiedUnder ?altName ;
+ ex:platform ?platform ;
+ ex:tissue ?tissue .
+ ?platform ?platformPred ?platformObject ;
+ ex:info ?platformInfo .
+ ?tissue rdfs:label ?tissueName ;
+ rdf:type gnc:tissue ;
+ ex:info ?tissueInfo .
+} WHERE {
+ ?dataset rdf:type dcat:Dataset ;
+ xkos:classifiedUnder ?inbredSet ;
+ rdfs:label "$name" .
+ OPTIONAL {
+ ?inbredSet ^skos:member gnc:Set ;
+ rdfs:label ?inbredSetName .
+ } .
+ OPTIONAL {
+ ?type ^xkos:classifiedUnder ?dataset ;
+ ^skos:member gnc:DatasetType ;
+ skos:prefLabel ?altName .
+ } .
+ OPTIONAL {
+ ?normalization ^gnt:usesNormalization ?dataset ;
+ rdfs:label ?normalizationLabel .
+ } .
+ OPTIONAL {
+ ?investigator foaf:name ?investigatorName ;
+ ^dcat:contactPoint ?dataset .
+ } .
+ OPTIONAL {
+ ?platform ^gnt:usesPlatform ?dataset ;
+ ?platformPred ?platformObject .
+ } .
+ OPTIONAL {
+ ?dataset gnt:hasPlatformInfo ?platformInfo .
+ } .
+ OPTIONAL {
+ ?dataset gnt:hasTissueInfo ?tissueInfo .
+ } .
+ OPTIONAL {
+ ?dataset gnt:hasTissue ?tissue .
+ ?tissue rdfs:label ?tissueName .
+ } .
+ FILTER (!regex(str(?predicate), '(classifiedUnder|usesNormalization|contactPoint|hasPlatformInfo|tissueInfo)', 'i')) .
+ FILTER (!regex(str(?platformPred), '(classifiedUnder|geoSeriesId|hasGoTreeValue)', 'i')) .
+}""").substitute(prefix=RDF_PREFIXES, name=name))
+ results = sparql.queryAndConvert()
+ results = json.loads(
+ results.serialize(format="json-ld")
)
+ frame = {
+ "@context": PREFIXES | {
+ "data": "@graph",
+ "type": "@type",
+ "id": "@id",
+ "inbredSet": "ex:belongsToInbredSet",
+ "description": "dct:description",
+ "created": "dct:created",
+ "normalization": "gnt:usesNormalization",
+ "classifiedUnder": "xkos:classifiedUnder",
+ "accessRights": "dct:accessRights",
+ "accessionId": "dct:identifier",
+ "title": "dct:title",
+ "label": "rdfs:label",
+ "altLabel": "skos:altLabel",
+ "prefLabel": "skos:prefLabel",
+ "contactPoint": "dcat:contactPoint",
+ "organization": "foaf:Organization",
+ "info": "ex:info",
+ "caseInfo": "gnt:hasCaseInfo",
+ "geoSeriesId": "gnt:hasGeoSeriesId",
+ "experimentDesignInfo": "gnt:hasExperimentDesignInfo",
+ "notes": "gnt:hasNotes",
+ "processingInfo": "gnt:hasDataProcessingInfo",
+ "acknowledgement": "gnt:hasAcknowledgement",
+ "tissue": "ex:tissue",
+ "platform": "ex:platform",
+ },
+ "type": "dcat:Dataset",
+ }
+ return jsonld.compact(jsonld.frame(results, frame), frame)
+ # The virtuoso server is misconfigured or it isn't running at all
+ except (RemoteDisconnected, URLError):
+ return jsonify({})
# The virtuoso server is misconfigured or it isn't running at all
except (RemoteDisconnected, URLError):
return jsonify({})
diff --git a/gn3/db/rdf.py b/gn3/db/rdf.py
index 472da64..c5a75f2 100644
--- a/gn3/db/rdf.py
+++ b/gn3/db/rdf.py
@@ -213,45 +213,3 @@ CONSTRUCT {
)[0].items():
result[key] = value
return result
-
-
-def get_genotype_metadata(
- sparql_conn: SPARQLWrapper, name: str
-):
- """Return info about a phenotype with a given NAME"""
- __metadata_query = """
-$prefix
-
-CONSTRUCT {
- ?genotype ?pPredicate ?pValue .
- ?genotype gn:speciesName ?speciesName .
- ?genotype gn:inbredSetName ?inbredSetBinomialName .
- ?genotype gn:datasetName ?datasetFullName .
-} WHERE {
- ?genotype ?pPredicate ?pValue .
- OPTIONAL {
- ?genotype gn:genotypeOfDataset ?dataset .
- ?dataset gn:fullName ?datasetFullName .
- }.
- OPTIONAL {
- ?genotype gn:genotypeOfDataset ?dataset .
- ?dataset gn:datasetOfInbredSet ?inbredSet .
- ?inbredSet gn:binomialName ?inbredSetBinomialName .
- ?inbredSet gn:inbredSetOfSpecies ?species .
- ?species gn:displayName ?speciesName .
- } .
- FILTER( ?genotype = genotype:$name ) .
- MINUS {
- ?genotype rdf:type ?pValue .
- }
-}
-"""
- result: MonadicDict = MonadicDict()
- for key, value in sparql_query(
- sparql_conn,
- Template(__metadata_query)
- .substitute(name=name,
- prefix=RDF_PREFIXES)
- )[0].items():
- result[key] = value
- return result