about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-06-06 00:45:38 +0300
committerBonfaceKilz2023-06-06 00:47:58 +0300
commit2a025c10fb287068c8e8e93eb95c91fef4ca55cb (patch)
tree358576e4f5353c09c0bb8067edf3f5021671336f
parente374aeaa7b09a4fe1337a715e17418240c58032c (diff)
downloadgenenetwork3-2a025c10fb287068c8e8e93eb95c91fef4ca55cb.tar.gz
Match how dataset metadata is fetched with GN1
* gn3/db/rdf.py (sparql_query): Parse CONSTRUCTS and SELECTS
differently.
(strip_url): Rename to ...
(get_url_local_name): ... this.
(get_dataset_metadata): Fetch extra fields in RDF.

Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rw-r--r--gn3/db/rdf.py89
1 files changed, 54 insertions, 35 deletions
diff --git a/gn3/db/rdf.py b/gn3/db/rdf.py
index 96aa042..d03493c 100644
--- a/gn3/db/rdf.py
+++ b/gn3/db/rdf.py
@@ -9,7 +9,7 @@ from urllib.parse import unquote
 from urllib.parse import urlparse
 
 from SPARQLWrapper import JSON, SPARQLWrapper
-from pymonad.maybe import Just, Nothing
+from pymonad.maybe import Just
 
 from gn3.monads import MonadicDict
 
@@ -30,18 +30,29 @@ PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
 
 
 def sparql_query(
-        sparql_conn: SPARQLWrapper, query: str
+        sparql_conn: SPARQLWrapper, query: str,
 ) -> Tuple[MonadicDict, ...]:
     """Run a SPARQL query and return the bound variables."""
     sparql_conn.setQuery(query)
     sparql_conn.setReturnFormat(JSON)
-    results = sparql_conn.queryAndConvert()
-    if _r := results["results"]["bindings"]:  # type: ignore
-        return (*(MonadicDict(bindings) for bindings in _r),)  # type: ignore
-    return (MonadicDict(),)
-
-
-def strip_url(string: str) -> str:
+    parsed_response = MonadicDict()
+    results = sparql_conn.queryAndConvert()["results"]["bindings"]  # type: ignore
+    if results:
+        for result in results:
+            if "s" in result:  # A CONSTRUCT
+                parsed_response[
+                        get_url_local_name(
+                            result["p"]["value"]  # type: ignore
+                        )
+                    ] = Just(result["o"]["value"])  # type: ignore
+            elif "key" in result:  # A SELECT
+                parsed_response[
+                    result["key"]  # type: ignore
+                ] = Just(result["value"])  # type: ignore
+    return (parsed_response,)
+
+
+def get_url_local_name(string: str) -> str:
     """Get the last item after a '/" from a URL"""
     if string.startswith("http"):
         url = urlparse(string)
@@ -63,6 +74,11 @@ CONSTRUCT {
     gn:dataset gn:investigatorName ?investigatorName .
     gn:dataset gn:investigatorWebUrl ?investigatorWebUrl .
     gn:dataset gn:tissueName ?tissueName .
+    gn:dataset gn:organism ?speciesDisplayName .
+    gn:dataset gn:organismUrl ?ncbiReference .
+    gn:dataset gn:inbredSetName ?inbredSetName .
+    gn:dataset gn:geoPlatform ?geoPlatform .
+    gn:dataset gn:platformName ?platform_name .
 } WHERE {
     ?subClass rdf:subClassOf gn:dataset .
     ?dataset rdf:type ?subclass ;
@@ -78,8 +94,21 @@ CONSTRUCT {
         ?normalizationType gn:name ?normalization .
     } .
     OPTIONAL{
+        ?dataset gn:datasetOfSpecies ?species .
+        ?species gn:displayName ?speciesDisplayName .
+        ?species gn:organism ?ncbiReference .
+    } .
+    OPTIONAL {
+        ?dataset gn:datasetOfInbredSet ?inbredSet .
+        ?inbredSet gn:binomialName ?inbredSetName .
+        ?inbredSet gn:inbredSetOfSpecies ?species .
+        ?species gn:displayName ?speciesDisplayName .
+        ?species gn:organism ?ncbiReference .
+    } .
+    OPTIONAL{
         ?dataset gn:datasetOfPlatform ?platform .
         ?platform gn:name ?platform_name .
+        ?platform gn:geoPlatform ?geoPlatform .
     } .
     OPTIONAL{
         ?dataset gn:datasetOfTissue ?tissue .
@@ -90,35 +119,25 @@ CONSTRUCT {
         gn:aboutTissue gn:accessionId gn:acknowledgment gn:citation
         gn:contributors gn:datasetGroup gn:datasetOfinvestigator
         gn:experimentDesign gn:geoSeries gn:name gn:notes
-        gn:specifics gn:summary gn:title
+        gn:specifics gn:summary gn:title gn:publicationTitle
+        gn:datasetStatusName gn:datasetOfOrganization
     }
 }
 """
-    result: MonadicDict = MonadicDict()
-    results = sparql_query(
-        sparql_conn,
-        Template(__metadata_query).substitute(prefix=RDF_PREFIXES, name=name),
-    )
-    for item in results:
-        predicate = (item["p"]
-                     .map(lambda x: x["value"]) # type: ignore
-                     .map(strip_url))
-        subject = (
-            item["s"]
-            .map(lambda x: x["value"]) # type: ignore
-            .map(strip_url)
-            .maybe(None, lambda x: x)
-        )
-        object_ = (item["o"]
-                   .maybe(
-                       Nothing,
-                       lambda x: Just(x["value"])) # type: ignore
-                   )
-        if subject == "homepage":
-            object_ = object_.map(strip_url) # type: ignore
-        if _p := predicate.maybe(None, lambda x: x):  # type: ignore
-            result[_p] = object_
-    return result
+    response: MonadicDict = MonadicDict()
+    for key, value in sparql_query(
+            sparql_conn,
+            Template(__metadata_query)
+            .substitute(
+                prefix=RDF_PREFIXES,
+                name=name
+            )
+    )[0].items():
+        if key.endswith("Url"):
+            response[key] = value
+        else:
+            response[key] = value.map(get_url_local_name)
+    return response
 
 
 def get_trait_metadata(