"""RDF utilities This module is a collection of functions that handle SPARQL queries. """ from typing import Tuple from string import Template from SPARQLWrapper import JSON, SPARQLWrapper from pymonad.maybe import Just from gn3.monads import MonadicDict RDF_PREFIXES = """PREFIX dct: <http://purl.org/dc/terms/> PREFIX foaf: <http://xmlns.com/foaf/0.1/> PREFIX generif: <http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=> PREFIX gn: <http://genenetwork.org/> PREFIX owl: <http://www.w3.org/2002/07/owl#> PREFIX pubmed: <http://rdf.ncbi.nlm.nih.gov/pubmed/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX taxon: <http://purl.uniprot.org/taxonomy/> PREFIX up: <http://purl.uniprot.org/core/> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> """ def sparql_query( sparql_conn: SPARQLWrapper, query: str ) -> Tuple[MonadicDict, ...]: """Run a SPARQL query and return the bound variables.""" sparql_conn.setQuery(query) sparql_conn.setReturnFormat(JSON) results = sparql_conn.queryAndConvert() if _r := results["results"]["bindings"]: # type: ignore return (*(MonadicDict(bindings) for bindings in _r),) # type: ignore return (MonadicDict(),) def get_dataset_metadata( sparql_conn: SPARQLWrapper, name: str ) -> MonadicDict: """Return info about dataset with a given NAME""" __metadata_query = """ PREFIX gn: <http://genenetwork.org/> SELECT ?accession_id ?dataset_group ?status ?title ?geo_series ?specifics ?summary ?about_tissue ?about_platform ?about_data_processing ?notes ?experiment_design ?contributors ?citation ?acknowledgement ?platform_name ?tissue_name ?normalization_name ?species_name ?inbred_set_name ?name ?address ?city ?state ?zip ?phone ?email ?country ?homepage WHERE { ?dataset gn:accessionId ?accession_id ; rdf:type gn:dataset ; gn:name "$name" . OPTIONAL { ?dataset gn:aboutDataProcessing ?about_data_processing } . OPTIONAL { ?dataset gn:aboutPlatform ?about_platform } . OPTIONAL { ?dataset gn:aboutTissue ?about_tissue } . OPTIONAL { ?dataset gn:acknowledgement ?acknowledgement } . OPTIONAL { ?dataset gn:citation ?citation } . OPTIONAL { ?dataset gn:contributors ?contributors } . OPTIONAL { ?dataset gn:datasetGroup ?dataset_group } . OPTIONAL { ?dataset gn:datasetStatus ?status } . OPTIONAL { ?dataset gn:experimentDesign ?experiment_design } . OPTIONAL { ?dataset gn:geoSeries ?geo_series } . OPTIONAL { ?dataset gn:notes ?notes } . OPTIONAL { ?dataset gn:specifics ?specifics } . OPTIONAL { ?dataset gn:summary ?summary } . OPTIONAL { ?dataset gn:title ?title } . OPTIONAL { ?dataset gn:normalization ?normalization . ?normalization gn:name ?normalization_name . } . OPTIONAL { ?dataset gn:datasetOfPlatform ?platform . ?platform gn:name ?platform_name . } . OPTIONAL { ?dataset gn:datasetOfTissue ?tissue . ?tissue gn:name ?tissue_name . } . OPTIONAL { ?dataset gn:datasetOfSpecies ?species ; gn:datasetOfInbredSet ?inbred_set . ?species gn:name ?species_name . ?inbred_set gn:name ?inbred_set_name . } . OPTIONAL { ?dataset gn:datasetOfInvestigator ?investigator . OPTIONAL { ?investigator foaf:name ?name . } OPTIONAL { ?investigator gn:address ?address . } OPTIONAL { ?investigator gn:city ?city . } OPTIONAL { ?investigator gn:state ?state . } OPTIONAL { ?investigator gn:zipCode ?zip . } OPTIONAL { ?investigator foaf:phone ?phone . } OPTIONAL { ?investigator foaf:mbox ?email . } OPTIONAL { ?investigator gn:country ?country . } OPTIONAL { ?investigator foaf:homepage ?homepage . } } } """ result: MonadicDict = MonadicDict() for key, value in sparql_query( sparql_conn, Template(__metadata_query).substitute(name=name) )[0].items(): result[key] = value.bind(lambda x: Just(x["value"])) return result def get_trait_metadata( sparql_conn: SPARQLWrapper, trait_name: str, dataset_name: str ): """Return metadata about a given trait""" __metadata_query = """ PREFIX gn: <http://genenetwork.org/> SELECT strafter((str(?key)), "http://genenetwork.org/sampledata:") as ?key ?value WHERE { gn:sampledata_$trait_name gn:sampledata:dataset "$dataset_name" . gn:sampledata_$trait_name ?key ?value . } """ result: MonadicDict = MonadicDict() for _r in sparql_query( sparql_conn, Template(__metadata_query) .substitute(trait_name=trait_name, dataset_name=dataset_name) ): _key = _r["key"].bind(lambda x: x["value"]) # type:ignore if _key: result[_key] = _r["value"].bind(lambda x: Just(x["value"])) # type:ignore return result