aboutsummaryrefslogtreecommitdiff
path: root/gn3/db/rdf.py
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-06-02 21:34:39 +0300
committerBonfaceKilz2023-06-02 21:44:19 +0300
commit0eb9201b056f91aca4aa6068b7f5e3b8a028dcc0 (patch)
tree97534ecede67869cbdd97247613d53c29439a5fd /gn3/db/rdf.py
parentea0dd0dc21a659105f25f22fc8624849890de99e (diff)
downloadgenenetwork3-0eb9201b056f91aca4aa6068b7f5e3b8a028dcc0.tar.gz
Replace SELECT with CONSTRUCT when fetching a dataset's metadata
* gn3/db/rdf.py: Importh pymonad.Nothing. (get_dataset_metadata): Replace SELECT with CONSTRUCT. Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'gn3/db/rdf.py')
-rw-r--r--gn3/db/rdf.py125
1 files changed, 64 insertions, 61 deletions
diff --git a/gn3/db/rdf.py b/gn3/db/rdf.py
index 21a5121..872fe01 100644
--- a/gn3/db/rdf.py
+++ b/gn3/db/rdf.py
@@ -9,7 +9,8 @@ from urllib.parse import unquote
from urllib.parse import urlparse
from SPARQLWrapper import JSON, SPARQLWrapper
-from pymonad.maybe import Just
+from pymonad.maybe import Just, Nothing
+
from gn3.monads import MonadicDict
@@ -53,68 +54,70 @@ def get_dataset_metadata(
) -> MonadicDict:
"""Return info about dataset with a given NAME"""
__metadata_query = """
-PREFIX gn: <http://genenetwork.org/>
-
-SELECT ?accession_id ?dataset_group ?status ?title ?geo_series ?specifics ?summary ?about_tissue
-?about_platform ?about_data_processing ?notes ?experiment_design ?contributors ?citation ?acknowledgement
-?platform_name ?tissue_name ?normalization_name ?species_name ?inbred_set_name
-?name ?address ?city ?state ?zip ?phone ?email ?country ?homepage
-WHERE {
- ?dataset gn:accessionId ?accession_id ;
- rdf:type gn:dataset ;
- gn:name "$name" .
- OPTIONAL { ?dataset gn:aboutDataProcessing ?about_data_processing } .
- OPTIONAL { ?dataset gn:aboutPlatform ?about_platform } .
- OPTIONAL { ?dataset gn:aboutTissue ?about_tissue } .
- OPTIONAL { ?dataset gn:acknowledgement ?acknowledgement } .
- OPTIONAL { ?dataset gn:citation ?citation } .
- OPTIONAL { ?dataset gn:contributors ?contributors } .
- OPTIONAL { ?dataset gn:datasetGroup ?dataset_group } .
- OPTIONAL { ?dataset gn:datasetStatus ?status } .
- OPTIONAL { ?dataset gn:experimentDesign ?experiment_design } .
- OPTIONAL { ?dataset gn:geoSeries ?geo_series } .
- OPTIONAL { ?dataset gn:notes ?notes } .
- OPTIONAL { ?dataset gn:specifics ?specifics } .
- OPTIONAL { ?dataset gn:summary ?summary } .
- OPTIONAL { ?dataset gn:title ?title } .
- OPTIONAL {
- ?dataset gn:normalization ?normalization .
- ?normalization gn:name ?normalization_name .
- } .
- OPTIONAL {
- ?dataset gn:datasetOfPlatform ?platform .
- ?platform gn:name ?platform_name .
- } .
- OPTIONAL {
- ?dataset gn:datasetOfTissue ?tissue .
- ?tissue gn:name ?tissue_name .
- } .
- OPTIONAL {
- ?dataset gn:datasetOfSpecies ?species ;
- gn:datasetOfInbredSet ?inbred_set .
- ?species gn:name ?species_name .
- ?inbred_set gn:name ?inbred_set_name .
- } .
- OPTIONAL {
- ?dataset gn:datasetOfInvestigator ?investigator .
- OPTIONAL { ?investigator foaf:name ?name . }
- OPTIONAL { ?investigator gn:address ?address . }
- OPTIONAL { ?investigator gn:city ?city . }
- OPTIONAL { ?investigator gn:state ?state . }
- OPTIONAL { ?investigator gn:zipCode ?zip . }
- OPTIONAL { ?investigator foaf:phone ?phone . }
- OPTIONAL { ?investigator foaf:mbox ?email . }
- OPTIONAL { ?investigator gn:country ?country . }
- OPTIONAL { ?investigator foaf:homepage ?homepage . }
- }
+$prefix
+
+CONSTRUCT {
+ gn:dataset ?datasetTerm ?datasetValue .
+ gn:dataset ?platformName ?platform_name .
+ gn:dataset gn:normalization ?normalization .
+ gn:dataset gn:investigatorName ?investigatorName .
+ gn:dataset gn:investigatorWebUrl ?investigatorWebUrl .
+ gn:dataset gn:tissueName ?tissueName .
+} WHERE {
+ ?subClass rdf:subClassOf gn:dataset .
+ ?dataset rdf:type ?subclass ;
+ gn:name "$name";
+ ?datasetTerm ?datasetValue .
+ OPTIONAL {
+ ?dataset gn:datasetOfInvestigator ?investigator .
+ ?investigator foaf:name ?investigatorName .
+ ?investigator foaf:homepage ?investigatorWebUrl .
+ } .
+ OPTIONAL{
+ ?dataset gn:normalization ?normalizationType .
+ ?normalizationType gn:name ?normalization .
+ } .
+ OPTIONAL{
+ ?dataset gn:datasetOfPlatform ?platform .
+ ?platform gn:name ?platform_name .
+ } .
+ OPTIONAL{
+ ?dataset gn:datasetOfTissue ?tissue .
+ ?tissue gn:name ?tissueName .
+ } .
+ VALUES ?datasetTerm {
+ dct:created gn:aboutCases gn:aboutDataProcessing gn:aboutPlatform
+ gn:aboutTissue gn:accessionId gn:acknowledgment gn:citation
+ gn:contributors gn:datasetGroup gn:datasetOfinvestigator
+ gn:experimentDesign gn:geoSeries gn:name gn:notes
+ gn:specifics gn:summary gn:title
+ }
}
- """
+"""
result: MonadicDict = MonadicDict()
- for key, value in sparql_query(
- sparql_conn,
- Template(__metadata_query).substitute(name=name)
- )[0].items():
- result[key] = value.bind(lambda x: Just(x["value"]))
+ results = sparql_query(
+ sparql_conn,
+ Template(__metadata_query).substitute(prefix=RDF_PREFIXES, name=name),
+ )
+ for item in results:
+ predicate = (item["p"]
+ .map(lambda x: x["value"]) # type: ignore
+ .map(strip_url))
+ subject = (
+ item["s"]
+ .map(lambda x: x["value"]) # type: ignore
+ .map(strip_url)
+ .maybe(None, lambda x: x)
+ )
+ object_ = (item["o"]
+ .maybe(
+ Nothing,
+ lambda x: Just(x["value"])) # type: ignore
+ )
+ if subject == "homepage":
+ object_ = object_.map(strip_url) # type: ignore
+ if _p := predicate.maybe(None, lambda x: x): # type: ignore
+ result[_p] = object_
return result