"""RDF utilities
This module is a collection of functions that handle SPARQL queries.
"""
from typing import Tuple
from string import Template
from SPARQLWrapper import JSON, SPARQLWrapper
from pymonad.maybe import Just
from gn3.monads import MonadicDict
RDF_PREFIXES = """PREFIX dct:
PREFIX foaf:
PREFIX generif:
PREFIX gn:
PREFIX owl:
PREFIX pubmed:
PREFIX rdf:
PREFIX rdfs:
PREFIX taxon:
PREFIX up:
PREFIX xsd:
"""
def sparql_query(
sparql_conn: SPARQLWrapper, query: str
) -> Tuple[MonadicDict, ...]:
"""Run a SPARQL query and return the bound variables."""
sparql_conn.setQuery(query)
sparql_conn.setReturnFormat(JSON)
results = sparql_conn.queryAndConvert()
if _r := results["results"]["bindings"]: # type: ignore
return (*(MonadicDict(bindings) for bindings in _r),) # type: ignore
return (MonadicDict(),)
def get_dataset_metadata(
sparql_conn: SPARQLWrapper, name: str
) -> MonadicDict:
"""Return info about dataset with a given NAME"""
__metadata_query = """
PREFIX gn:
SELECT ?accession_id ?dataset_group ?status ?title ?geo_series ?specifics ?summary ?about_tissue
?about_platform ?about_data_processing ?notes ?experiment_design ?contributors ?citation ?acknowledgement
?platform_name ?tissue_name ?normalization_name ?species_name ?inbred_set_name
?name ?address ?city ?state ?zip ?phone ?email ?country ?homepage
WHERE {
?dataset gn:accessionId ?accession_id ;
rdf:type gn:dataset ;
gn:name "$name" .
OPTIONAL { ?dataset gn:aboutDataProcessing ?about_data_processing } .
OPTIONAL { ?dataset gn:aboutPlatform ?about_platform } .
OPTIONAL { ?dataset gn:aboutTissue ?about_tissue } .
OPTIONAL { ?dataset gn:acknowledgement ?acknowledgement } .
OPTIONAL { ?dataset gn:citation ?citation } .
OPTIONAL { ?dataset gn:contributors ?contributors } .
OPTIONAL { ?dataset gn:datasetGroup ?dataset_group } .
OPTIONAL { ?dataset gn:datasetStatus ?status } .
OPTIONAL { ?dataset gn:experimentDesign ?experiment_design } .
OPTIONAL { ?dataset gn:geoSeries ?geo_series } .
OPTIONAL { ?dataset gn:notes ?notes } .
OPTIONAL { ?dataset gn:specifics ?specifics } .
OPTIONAL { ?dataset gn:summary ?summary } .
OPTIONAL { ?dataset gn:title ?title } .
OPTIONAL {
?dataset gn:normalization ?normalization .
?normalization gn:name ?normalization_name .
} .
OPTIONAL {
?dataset gn:datasetOfPlatform ?platform .
?platform gn:name ?platform_name .
} .
OPTIONAL {
?dataset gn:datasetOfTissue ?tissue .
?tissue gn:name ?tissue_name .
} .
OPTIONAL {
?dataset gn:datasetOfSpecies ?species ;
gn:datasetOfInbredSet ?inbred_set .
?species gn:name ?species_name .
?inbred_set gn:name ?inbred_set_name .
} .
OPTIONAL {
?dataset gn:datasetOfInvestigator ?investigator .
OPTIONAL { ?investigator foaf:name ?name . }
OPTIONAL { ?investigator gn:address ?address . }
OPTIONAL { ?investigator gn:city ?city . }
OPTIONAL { ?investigator gn:state ?state . }
OPTIONAL { ?investigator gn:zipCode ?zip . }
OPTIONAL { ?investigator foaf:phone ?phone . }
OPTIONAL { ?investigator foaf:mbox ?email . }
OPTIONAL { ?investigator gn:country ?country . }
OPTIONAL { ?investigator foaf:homepage ?homepage . }
}
}
"""
result: MonadicDict = MonadicDict()
for key, value in sparql_query(
sparql_conn,
Template(__metadata_query).substitute(name=name)
)[0].items():
result[key] = value.bind(lambda x: Just(x["value"]))
return result
def get_trait_metadata(
sparql_conn: SPARQLWrapper,
trait_name: str,
dataset_name: str
):
"""Return metadata about a given trait"""
__metadata_query = """
PREFIX gn:
SELECT strafter((str(?key)), "http://genenetwork.org/sampledata:") as ?key
?value WHERE {
gn:sampledata_$trait_name gn:sampledata:dataset "$dataset_name" .
gn:sampledata_$trait_name ?key ?value .
}
"""
result: MonadicDict = MonadicDict()
for _r in sparql_query(
sparql_conn,
Template(__metadata_query)
.substitute(trait_name=trait_name,
dataset_name=dataset_name)
):
_key = _r["key"].bind(lambda x: x["value"]) # type:ignore
if _key:
result[_key] = _r["value"].bind(lambda x: Just(x["value"])) # type:ignore
return result