about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-06-02 21:34:39 +0300
committerBonfaceKilz2023-06-02 21:44:19 +0300
commit0eb9201b056f91aca4aa6068b7f5e3b8a028dcc0 (patch)
tree97534ecede67869cbdd97247613d53c29439a5fd
parentea0dd0dc21a659105f25f22fc8624849890de99e (diff)
downloadgenenetwork3-0eb9201b056f91aca4aa6068b7f5e3b8a028dcc0.tar.gz
Replace SELECT with CONSTRUCT when fetching a dataset's metadata
* gn3/db/rdf.py: Importh pymonad.Nothing.
(get_dataset_metadata): Replace SELECT with CONSTRUCT.

Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rw-r--r--gn3/db/rdf.py125
1 files changed, 64 insertions, 61 deletions
diff --git a/gn3/db/rdf.py b/gn3/db/rdf.py
index 21a5121..872fe01 100644
--- a/gn3/db/rdf.py
+++ b/gn3/db/rdf.py
@@ -9,7 +9,8 @@ from urllib.parse import unquote
 from urllib.parse import urlparse
 
 from SPARQLWrapper import JSON, SPARQLWrapper
-from pymonad.maybe import Just
+from pymonad.maybe import Just, Nothing
+
 from gn3.monads import MonadicDict
 
 
@@ -53,68 +54,70 @@ def get_dataset_metadata(
 ) -> MonadicDict:
     """Return info about dataset with a given NAME"""
     __metadata_query = """
-PREFIX gn: <http://genenetwork.org/>
-
-SELECT ?accession_id ?dataset_group ?status ?title ?geo_series ?specifics ?summary ?about_tissue
-?about_platform ?about_data_processing ?notes ?experiment_design ?contributors ?citation ?acknowledgement
-?platform_name ?tissue_name ?normalization_name ?species_name ?inbred_set_name
-?name ?address ?city ?state ?zip ?phone ?email ?country ?homepage
-WHERE {
-  ?dataset gn:accessionId ?accession_id ;
-           rdf:type gn:dataset ;
-           gn:name "$name" .
-  OPTIONAL { ?dataset gn:aboutDataProcessing ?about_data_processing } .
-  OPTIONAL { ?dataset gn:aboutPlatform ?about_platform } .
-  OPTIONAL { ?dataset gn:aboutTissue ?about_tissue } .
-  OPTIONAL { ?dataset gn:acknowledgement ?acknowledgement } .
-  OPTIONAL { ?dataset gn:citation ?citation } .
-  OPTIONAL { ?dataset gn:contributors ?contributors } .
-  OPTIONAL { ?dataset gn:datasetGroup ?dataset_group } .
-  OPTIONAL { ?dataset gn:datasetStatus ?status } .
-  OPTIONAL { ?dataset gn:experimentDesign ?experiment_design } .
-  OPTIONAL { ?dataset gn:geoSeries ?geo_series } .
-  OPTIONAL { ?dataset gn:notes ?notes } .
-  OPTIONAL { ?dataset gn:specifics ?specifics } .
-  OPTIONAL { ?dataset gn:summary ?summary } .
-  OPTIONAL { ?dataset gn:title ?title } .
-  OPTIONAL {
-    ?dataset gn:normalization ?normalization .
-    ?normalization gn:name ?normalization_name .
-  } .
-  OPTIONAL {
-    ?dataset gn:datasetOfPlatform ?platform .
-    ?platform gn:name ?platform_name .
-  } .
-  OPTIONAL {
-    ?dataset gn:datasetOfTissue ?tissue .
-    ?tissue gn:name ?tissue_name .
-  } .
-  OPTIONAL {
-      ?dataset gn:datasetOfSpecies ?species ;
-               gn:datasetOfInbredSet ?inbred_set .
-      ?species gn:name ?species_name .
-      ?inbred_set gn:name ?inbred_set_name .
-  } .
-  OPTIONAL {
-      ?dataset gn:datasetOfInvestigator ?investigator .
-           OPTIONAL { ?investigator foaf:name ?name . }
-           OPTIONAL { ?investigator gn:address ?address . }
-           OPTIONAL { ?investigator gn:city ?city . }
-           OPTIONAL { ?investigator gn:state ?state . }
-           OPTIONAL { ?investigator gn:zipCode ?zip . }
-           OPTIONAL { ?investigator foaf:phone ?phone . }
-           OPTIONAL { ?investigator foaf:mbox ?email . }
-           OPTIONAL { ?investigator gn:country ?country . }
-           OPTIONAL { ?investigator foaf:homepage ?homepage . }
-  }
+$prefix
+
+CONSTRUCT {
+    gn:dataset ?datasetTerm ?datasetValue .
+    gn:dataset ?platformName ?platform_name .
+    gn:dataset gn:normalization ?normalization .
+    gn:dataset gn:investigatorName ?investigatorName .
+    gn:dataset gn:investigatorWebUrl ?investigatorWebUrl .
+    gn:dataset gn:tissueName ?tissueName .
+} WHERE {
+    ?subClass rdf:subClassOf gn:dataset .
+    ?dataset rdf:type ?subclass ;
+             gn:name "$name";
+             ?datasetTerm ?datasetValue .
+    OPTIONAL {
+        ?dataset gn:datasetOfInvestigator ?investigator .
+        ?investigator foaf:name ?investigatorName .
+        ?investigator foaf:homepage ?investigatorWebUrl .
+    } .
+    OPTIONAL{
+        ?dataset gn:normalization ?normalizationType .
+        ?normalizationType gn:name ?normalization .
+    } .
+    OPTIONAL{
+        ?dataset gn:datasetOfPlatform ?platform .
+        ?platform gn:name ?platform_name .
+    } .
+    OPTIONAL{
+        ?dataset gn:datasetOfTissue ?tissue .
+        ?tissue gn:name ?tissueName .
+    } .
+    VALUES ?datasetTerm {
+        dct:created gn:aboutCases gn:aboutDataProcessing gn:aboutPlatform
+        gn:aboutTissue gn:accessionId gn:acknowledgment gn:citation
+        gn:contributors gn:datasetGroup gn:datasetOfinvestigator
+        gn:experimentDesign gn:geoSeries gn:name gn:notes
+        gn:specifics gn:summary gn:title
+    }
 }
-    """
+"""
     result: MonadicDict = MonadicDict()
-    for key, value in sparql_query(
-            sparql_conn,
-            Template(__metadata_query).substitute(name=name)
-    )[0].items():
-        result[key] = value.bind(lambda x: Just(x["value"]))
+    results = sparql_query(
+        sparql_conn,
+        Template(__metadata_query).substitute(prefix=RDF_PREFIXES, name=name),
+    )
+    for item in results:
+        predicate = (item["p"]
+                     .map(lambda x: x["value"]) # type: ignore
+                     .map(strip_url))
+        subject = (
+            item["s"]
+            .map(lambda x: x["value"]) # type: ignore
+            .map(strip_url)
+            .maybe(None, lambda x: x)
+        )
+        object_ = (item["o"]
+                   .maybe(
+                       Nothing,
+                       lambda x: Just(x["value"])) # type: ignore
+                   )
+        if subject == "homepage":
+            object_ = object_.map(strip_url) # type: ignore
+        if _p := predicate.maybe(None, lambda x: x):  # type: ignore
+            result[_p] = object_
     return result