aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-10-03 16:13:20 +0300
committerBonfaceKilz2023-10-27 13:45:32 +0300
commit58047e6c7e3f7d4b7c3cdb859fae4e25c7e2605a (patch)
treebc21d6936b6db487bbbe4276826946c6f17e56fc
parent47a10086b305e062b0c7b32c40fe222714407084 (diff)
downloadgenenetwork3-58047e6c7e3f7d4b7c3cdb859fae4e25c7e2605a.tar.gz
Simplify CONSTRUCT query when fetching a dataset's metadata.
* gn3/api/metadata.py (dataset): Update docstring. * gn3/db/rdf.py (get_dataset_metadata): Simplify CONSTRUCT query. Also, now you can fetch metadata using either an accession_id or the dataset's name. Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rw-r--r--gn3/api/metadata.py2
-rw-r--r--gn3/db/rdf.py90
2 files changed, 27 insertions, 65 deletions
diff --git a/gn3/api/metadata.py b/gn3/api/metadata.py
index f21739a..df3c4a9 100644
--- a/gn3/api/metadata.py
+++ b/gn3/api/metadata.py
@@ -21,7 +21,7 @@ metadata = Blueprint("metadata", __name__)
@metadata.route("/dataset/<name>", methods=["GET"])
def dataset(name):
- """Fetch a dataset's metadata given it's ACCESSION_ID"""
+ """Fetch a dataset's metadata given it's ACCESSION_ID or NAME"""
try:
return jsonify(
get_dataset_metadata(
diff --git a/gn3/db/rdf.py b/gn3/db/rdf.py
index 80b32fa..78ddc17 100644
--- a/gn3/db/rdf.py
+++ b/gn3/db/rdf.py
@@ -89,80 +89,42 @@ def get_dataset_metadata(
sparql_conn: SPARQLWrapper, name: str
) -> MonadicDict:
"""Return info about dataset with a given NAME"""
- __metadata_query = """
+ response: MonadicDict = MonadicDict()
+ for key, value in sparql_query(
+ sparql_conn,
+ Template("""
$prefix
CONSTRUCT {
- gn:dataset ?datasetTerm ?datasetValue .
- gn:dataset ?platformName ?platform_name .
- gn:dataset gn:normalization ?normalization .
- gn:dataset gn:investigatorName ?investigatorName .
- gn:dataset gn:investigatorWebUrl ?investigatorWebUrl .
- gn:dataset gn:tissueName ?tissueName .
- gn:dataset gn:organism ?speciesDisplayName .
- gn:dataset gn:organismUrl ?ncbiReference .
- gn:dataset gn:inbredSetName ?inbredSetName .
- gn:dataset gn:geoPlatformUrl ?geoPlatform .
- gn:dataset gn:platformName ?platform_name .
+ ?dataset ?predicate ?term .
+ ?dataset gnt:classifiedUnder ?inbredSetName .
+ ?dataset gnt:usesNormalization ?normalizationLabel .
+ ?typePredicate ex:DatasetType ?typeName .
} WHERE {
- ?subClass rdf:subClassOf gn:dataset .
- ?dataset rdf:type ?subclass ;
- gn:name "$name";
- ?datasetTerm ?datasetValue .
- OPTIONAL {
- ?dataset gn:datasetOfInvestigator ?investigator .
- ?investigator foaf:name ?investigatorName .
- ?investigator foaf:homepage ?investigatorWebUrl .
- } .
- OPTIONAL{
- ?dataset gn:normalization ?normalizationType .
- ?normalizationType gn:name ?normalization .
- } .
- OPTIONAL{
- ?dataset gn:datasetOfSpecies ?species .
- ?species gn:displayName ?speciesDisplayName .
- ?species gn:organism ?ncbiReference .
- } .
- OPTIONAL {
- ?dataset gn:datasetOfInbredSet ?inbredSet .
- ?inbredSet gn:binomialName ?inbredSetName .
- ?inbredSet gn:inbredSetOfSpecies ?species .
- ?species gn:displayName ?speciesDisplayName .
- ?species gn:organism ?ncbiReference .
- } .
- OPTIONAL{
- ?dataset gn:datasetOfPlatform ?platform .
- ?platform gn:name ?platform_name .
- ?platform gn:geoPlatform ?geoPlatform .
- } .
- OPTIONAL{
- ?dataset gn:datasetOfTissue ?tissue .
- ?tissue gn:name ?tissueName .
- } .
- VALUES ?datasetTerm {
- dct:created gn:aboutCases gn:aboutDataProcessing gn:aboutPlatform
- gn:aboutTissue gn:accessionId gn:acknowledgment gn:citation
- gn:contributors gn:datasetGroup gn:datasetOfinvestigator
- gn:experimentDesign gn:geoSeries gn:name gn:notes
- gn:specifics gn:summary gn:title gn:publicationTitle
- gn:datasetStatusName gn:datasetOfOrganization
- }
-}
-"""
- response: MonadicDict = MonadicDict()
- for key, value in sparql_query(
- sparql_conn,
- Template(__metadata_query)
+ ?dataset rdf:type dcat:Dataset .
+ ?dataset ?predicate ?term .
+ ?dataset xkos:classifiedUnder ?inbredSet .
+ gnc:Set skos:member ?inbredSet .
+ ?dataset (rdfs:label|dct:identifier) "$name" .
+ ?inbredSet rdfs:label ?inbredSetName .
+ OPTIONAL {
+ ?dataset xkos:classifiedUnder ?type .
+ gnc:DatasetType skos:member ?type .
+ ?type ?typePredicate ?typeName .
+ ?type (skos:altLabel|skos:prefLabel) ?typeName .
+ } .
+ OPTIONAL {
+ ?dataset gnt:usesNormalization ?normalization .
+ ?normalization rdfs:label ?normalizationLabel .
+ }
+ FILTER (!regex(str(?predicate), '(classifiedUnder|usesNormalization)','i')) .
+}""")
.substitute(
prefix=RDF_PREFIXES,
name=name
)
)[0].items():
response[key] = value
- if isinstance(value, str) and not (
- key.endswith("Url") or key == "geoSeries"
- ):
- response[key] = value.map(get_url_local_name) # type: ignore
return response