diff options
Diffstat (limited to 'gn3/db')
-rw-r--r-- | gn3/db/rdf.py | 125 |
1 files changed, 64 insertions, 61 deletions
diff --git a/gn3/db/rdf.py b/gn3/db/rdf.py index 21a5121..872fe01 100644 --- a/gn3/db/rdf.py +++ b/gn3/db/rdf.py @@ -9,7 +9,8 @@ from urllib.parse import unquote from urllib.parse import urlparse from SPARQLWrapper import JSON, SPARQLWrapper -from pymonad.maybe import Just +from pymonad.maybe import Just, Nothing + from gn3.monads import MonadicDict @@ -53,68 +54,70 @@ def get_dataset_metadata( ) -> MonadicDict: """Return info about dataset with a given NAME""" __metadata_query = """ -PREFIX gn: <http://genenetwork.org/> - -SELECT ?accession_id ?dataset_group ?status ?title ?geo_series ?specifics ?summary ?about_tissue -?about_platform ?about_data_processing ?notes ?experiment_design ?contributors ?citation ?acknowledgement -?platform_name ?tissue_name ?normalization_name ?species_name ?inbred_set_name -?name ?address ?city ?state ?zip ?phone ?email ?country ?homepage -WHERE { - ?dataset gn:accessionId ?accession_id ; - rdf:type gn:dataset ; - gn:name "$name" . - OPTIONAL { ?dataset gn:aboutDataProcessing ?about_data_processing } . - OPTIONAL { ?dataset gn:aboutPlatform ?about_platform } . - OPTIONAL { ?dataset gn:aboutTissue ?about_tissue } . - OPTIONAL { ?dataset gn:acknowledgement ?acknowledgement } . - OPTIONAL { ?dataset gn:citation ?citation } . - OPTIONAL { ?dataset gn:contributors ?contributors } . - OPTIONAL { ?dataset gn:datasetGroup ?dataset_group } . - OPTIONAL { ?dataset gn:datasetStatus ?status } . - OPTIONAL { ?dataset gn:experimentDesign ?experiment_design } . - OPTIONAL { ?dataset gn:geoSeries ?geo_series } . - OPTIONAL { ?dataset gn:notes ?notes } . - OPTIONAL { ?dataset gn:specifics ?specifics } . - OPTIONAL { ?dataset gn:summary ?summary } . - OPTIONAL { ?dataset gn:title ?title } . - OPTIONAL { - ?dataset gn:normalization ?normalization . - ?normalization gn:name ?normalization_name . - } . - OPTIONAL { - ?dataset gn:datasetOfPlatform ?platform . - ?platform gn:name ?platform_name . - } . - OPTIONAL { - ?dataset gn:datasetOfTissue ?tissue . - ?tissue gn:name ?tissue_name . - } . - OPTIONAL { - ?dataset gn:datasetOfSpecies ?species ; - gn:datasetOfInbredSet ?inbred_set . - ?species gn:name ?species_name . - ?inbred_set gn:name ?inbred_set_name . - } . - OPTIONAL { - ?dataset gn:datasetOfInvestigator ?investigator . - OPTIONAL { ?investigator foaf:name ?name . } - OPTIONAL { ?investigator gn:address ?address . } - OPTIONAL { ?investigator gn:city ?city . } - OPTIONAL { ?investigator gn:state ?state . } - OPTIONAL { ?investigator gn:zipCode ?zip . } - OPTIONAL { ?investigator foaf:phone ?phone . } - OPTIONAL { ?investigator foaf:mbox ?email . } - OPTIONAL { ?investigator gn:country ?country . } - OPTIONAL { ?investigator foaf:homepage ?homepage . } - } +$prefix + +CONSTRUCT { + gn:dataset ?datasetTerm ?datasetValue . + gn:dataset ?platformName ?platform_name . + gn:dataset gn:normalization ?normalization . + gn:dataset gn:investigatorName ?investigatorName . + gn:dataset gn:investigatorWebUrl ?investigatorWebUrl . + gn:dataset gn:tissueName ?tissueName . +} WHERE { + ?subClass rdf:subClassOf gn:dataset . + ?dataset rdf:type ?subclass ; + gn:name "$name"; + ?datasetTerm ?datasetValue . + OPTIONAL { + ?dataset gn:datasetOfInvestigator ?investigator . + ?investigator foaf:name ?investigatorName . + ?investigator foaf:homepage ?investigatorWebUrl . + } . + OPTIONAL{ + ?dataset gn:normalization ?normalizationType . + ?normalizationType gn:name ?normalization . + } . + OPTIONAL{ + ?dataset gn:datasetOfPlatform ?platform . + ?platform gn:name ?platform_name . + } . + OPTIONAL{ + ?dataset gn:datasetOfTissue ?tissue . + ?tissue gn:name ?tissueName . + } . + VALUES ?datasetTerm { + dct:created gn:aboutCases gn:aboutDataProcessing gn:aboutPlatform + gn:aboutTissue gn:accessionId gn:acknowledgment gn:citation + gn:contributors gn:datasetGroup gn:datasetOfinvestigator + gn:experimentDesign gn:geoSeries gn:name gn:notes + gn:specifics gn:summary gn:title + } } - """ +""" result: MonadicDict = MonadicDict() - for key, value in sparql_query( - sparql_conn, - Template(__metadata_query).substitute(name=name) - )[0].items(): - result[key] = value.bind(lambda x: Just(x["value"])) + results = sparql_query( + sparql_conn, + Template(__metadata_query).substitute(prefix=RDF_PREFIXES, name=name), + ) + for item in results: + predicate = (item["p"] + .map(lambda x: x["value"]) # type: ignore + .map(strip_url)) + subject = ( + item["s"] + .map(lambda x: x["value"]) # type: ignore + .map(strip_url) + .maybe(None, lambda x: x) + ) + object_ = (item["o"] + .maybe( + Nothing, + lambda x: Just(x["value"])) # type: ignore + ) + if subject == "homepage": + object_ = object_.map(strip_url) # type: ignore + if _p := predicate.maybe(None, lambda x: x): # type: ignore + result[_p] = object_ return result |