about summary refs log tree commit diff
diff options
context:
space:
mode:
authorPjotr Prins2023-08-06 11:13:23 +0200
committerPjotr Prins2023-08-06 11:13:23 +0200
commit6cad296ed1c3f4ffbc621c645cc53e2392d9faff (patch)
treef51be710f274fdb6a498147042a5f2be45b3a8e7
parenta00edcbc54026ea956ca41029474ff0e6a838616 (diff)
downloadgn-guile-6cad296ed1c3f4ffbc621c645cc53e2392d9faff.tar.gz
SPARQL: move into separate module
-rw-r--r--gn/cache/memoize.scm4
-rw-r--r--gn/db/sparql.scm196
-rwxr-xr-xweb/webserver.scm179
3 files changed, 202 insertions, 177 deletions
diff --git a/gn/cache/memoize.scm b/gn/cache/memoize.scm
index e7ccc33..42b2163 100644
--- a/gn/cache/memoize.scm
+++ b/gn/cache/memoize.scm
@@ -7,6 +7,10 @@
 ;; https://lispdreams.wordpress.com/2016/04/08/lisp-memoization-techniques/
 
 (define-module (gn cache memoize)
+  #:use-module (srfi srfi-1)
+  #:use-module (srfi srfi-13) ; hash table for memoize
+  #:use-module (srfi srfi-11) ; let-values
+
   #:export (memoize
             memoize2))
 
diff --git a/gn/db/sparql.scm b/gn/db/sparql.scm
new file mode 100644
index 0000000..6c3b5b2
--- /dev/null
+++ b/gn/db/sparql.scm
@@ -0,0 +1,196 @@
+(define-module (gn db sparql)
+  #:use-module (json)
+  #:use-module (ice-9 match)
+  #:use-module (ice-9 format)
+  #:use-module (ice-9 iconv)
+  #:use-module (ice-9 receive)
+  #:use-module (ice-9 string-fun)
+  #:use-module (gn cache memoize)
+  #:use-module (web client)
+  #:use-module (web uri)
+
+  #:export (memo-sparql-species
+            memo-sparql-species-meta
+            memo-sparql-wd-species-info
+            compile-species
+            get-rows
+            tsv->scm
+            )
+)
+
+(define (gn-sparql-endpoint-url)
+  "https://sparql.genenetwork.org/sparql")
+
+(define (wd-sparql-endpoint-url)
+  "https://query.wikidata.org/sparql")
+
+(define (wdt-taxon-name) "wdt:P225")
+
+(define (sparql-exec endpoint-url query)
+  "Execute raw SPARQL query returning response as a UTF8 string"
+  (bytevector->string (receive (response-status response-body)
+                          (http-request (string-append endpoint-url "?default-graph-uri=&query=" (uri-encode query) "&format=application%2Fsparql-results%2Bjson"))
+
+                         response-body) "UTF-8"))
+
+(define (sparql-tsv endpoint-url query)
+  "Execute raw SPARQL query returning response as a UTF8 string, e.g.
+(tsv->scm (sparql-tsv (wd-sparql-endpoint-url) \"wd:Q158695\"))
+"
+  ; GET /sparql?query=SELECT%20DISTINCT%20%2A%20where%20%7B%0A%20%20wd%3AQ158695%20wdt%3AP225%20%3Fo%20.%0A%7D%20limit%205 HTTP/2
+  (receive (response-status response-body)
+                          (http-get (pk (string-append endpoint-url "?query=" (uri-encode query))) #:headers '((Accept . "text/tab-separated-values")(user-agent . "curl/7.74.0")))
+                         response-body))
+
+(define (unpack field response)
+  "Helper to get nested JSON field from SPARQL response"
+  (cdr (assoc field response)))
+
+(define (sparql-names response)
+  "Helper to get the names part of a SPARQL query"
+  (unpack "vars" (unpack "head" response)))
+
+(define (sparql-results response)
+  "Helper to get the results part of a SPARQL query"
+  (unpack "bindings" (unpack "results" response)))
+
+(define (sparql-scm endpoint-url query)
+  "Return dual S-exp 'resultset' of varnames and results"
+  (let ((response (json-string->scm (sparql-exec endpoint-url query))))
+   (values (sparql-names response) (sparql-results response))))
+
+(define (tsv->scm text)
+  "Split a TSV string into a list of fields. Returns list of names header) and rows"
+  (let ([lst (map (lambda (f) (string-split f #\tab) ) (delete "" (string-split text #\newline)))])
+    (values (car lst) (cdr lst))
+  ))
+
+#!
+(define-values (names res) (sparql-species-meta))
+(define table (get-rows names res))
+(define recs '())
+(define h (compile-species recs table))
+(assoc "http://genenetwork.org/species_drosophila_melanogaster" h)
+(assoc-ref h "http://genenetwork.org/species_drosophila_melanogaster") ;; note switch!
+(define d (car h))
+(assoc-ref (list d) "http://genenetwork.org/species_drosophila_melanogaster")
+
+(scm->json #(1  (("2" . 3))))
+;; [1,{"2":3}]
+(scm->json #("http://genenetwork.org/species_drosophila_melanogaster" (("http://genenetwork.org/menuName" . "Drosophila") ("http://genenetwork.org/binomialName" . "Drosophila melanogaster") )))
+;; ["http://genenetwork.org/species_drosophila_melanogaster",{"http://genenetwork.org/menuName":"Drosophila","http://genenetwork.org/binomialName":"Drosophila melanogaster"}]
+l
+;; (("http://genenetwork.org/menuName" "Drosophila") ("http://genenetwork.org/name" "Drosophila") ("http://genenetwork.org/binomialName" "Drosophila melanogaster"))
+(scm->json (map (lambda (i) (cons (car i) (car (cdr i)))) l))
+;; {"http://genenetwork.org/menuName":"Drosophila","http://genenetwork.org/name":"Drosophila","http://genenetwork.org/binomialName":"Drosophila melanogaster"}
+
+
+curl -G https://query.wikidata.org/sparql -H "Accept: application/json; charset=utf-8" --data-urlencode query="SELECT DISTINCT * where {
+  wd:Q158695 wdt:P225 ?o .
+} limit 5"
+{
+  "head" : {
+    "vars" : [ "o" ]
+  },
+  "results" : {
+    "bindings" : [ {
+      "o" : {
+        "type" : "literal",
+        "value" : "Arabidopsis thaliana"
+      }
+    } ]
+  }
+}
+!#
+
+(define (sparql-wd-species-info species)
+  "Returns wikidata entry for species, e.g.:
+
+   (sparql-wd-species-info \"Q158695\") generates something like
+
+SELECT DISTINCT * where {  wd:Q158695 wdt:P225 ?o . } limit 10
+
+"
+  (sparql-tsv (wd-sparql-endpoint-url) (string-append "
+SELECT DISTINCT ?taxon ?ncbi ?descr where {
+    wd:" species " " (wdt-taxon-name) " ?taxon ;
+               wdt:P685 ?ncbi ;
+      schema:description ?descr .
+    ?species wdt:P685 ?ncbi .
+    FILTER (lang(?descr)='en')
+} limit 5
+
+")))
+
+(define memo-sparql-wd-species-info
+  (memoize sparql-wd-species-info))
+
+#!
+gn:Mus_musculus rdf:type gnc:species .
+gn:Mus_musculus gnt:name "Mouse" .
+gn:Mus_musculus rdfs:label "Mouse (Mus musculus, mm10)" .
+gn:Mus_musculus gnt:binomialName "Mus musculus" .
+gn:Mus_musculus gnt:family "Vertebrates" .
+gn:Mus_musculus gnt:organism taxon:10090 .
+!#
+
+(define (sparql-species)
+  (sparql-scm (gn-sparql-endpoint-url) "
+PREFIX gn: <http://genenetwork.org/id/>
+PREFIX gnc: <http://genenetwork.org/category/>
+PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+
+SELECT DISTINCT ?species WHERE {
+    ?species rdf:type gnc:species .
+}"))
+
+(define memo-sparql-species
+  (memoize2 sparql-species))
+
+(define (sparql-species-meta)
+  "Return values names recs"
+  (sparql-scm (gn-sparql-endpoint-url) "
+PREFIX gn: <http://genenetwork.org/id/>
+PREFIX gnc: <http://genenetwork.org/category/>
+PREFIX gnt: <http://genenetwork.org/term/>
+PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+
+SELECT ?species ?p ?o WHERE {
+   MINUS { ?species rdf:type ?o . }
+{
+  SELECT DISTINCT ?species ?p ?o WHERE {
+    ?species rdf:type gnc:species .
+    ?species ?p ?o .
+   }}}"))
+
+(define memo-sparql-species-meta
+  (memoize2 sparql-species-meta))
+
+
+(define (get-values names row)
+  "Get values by name from a resultset row"
+  (map (lambda (n) (unpack "value" (unpack n row))) (array->list names)))
+
+(define (get-rows names results)
+  "Format results as a list of values ordered by names"
+  (map (lambda (row) (get-values names row)) (array->list results)))
+
+;; from the triples first harvest the species URIs, followed by creating records of information
+
+(define (compile-species recs rows)
+  "Compile a matrix of species triples into records"
+  (for-each (lambda (r)
+		(let* ([s (car r)]
+		       [v (cdr (cdr r))]
+		       [p (car (cdr r))]
+		       [nrec '()]
+		       [kv (assoc s recs)]) ; find record to fill based on subject
+		  (if (not kv)
+		      (set! nrec '())
+		      (set! nrec (cdr kv))
+		      )
+		  (set! nrec (assoc-set! nrec p v))
+		  (set! recs (assoc-set! recs s nrec))
+		  ))
+		rows)
+  recs)
diff --git a/web/webserver.scm b/web/webserver.scm
index 910024f..657d82e 100755
--- a/web/webserver.scm
+++ b/web/webserver.scm
@@ -18,7 +18,6 @@
  ;; (ice-9 source)
  (srfi srfi-1)
  (srfi srfi-11) ; let-values
- (srfi srfi-13) ; hash table for memoize
  (srfi srfi-26)
  (web http)
  (web client)
@@ -26,7 +25,8 @@
  (web response)
  (web uri)
  (fibers web server)
- (gn cache memoize))
+ (gn cache memoize)
+ (gn db sparql))
 
 (define get-version
   "2.0")
@@ -34,11 +34,6 @@
 (define (base-url)
   "https://genenetwork.org")
 
-(define (gn-sparql-endpoint-url)
-  "https://sparql.genenetwork.org/sparql")
-
-(define (wd-sparql-endpoint-url)
-  "https://query.wikidata.org/sparql")
 
 (define (prefix)
   "Build the API URL including version"
@@ -73,7 +68,6 @@
 (define (mk-predicate postfix)
   (mk-html (string-append "predicate" "/" postfix)))
 
-(define (wdt-taxon-name) "wdt:P225")
 
 (define info `(
   ("name" . "GeneNetwork REST API")
@@ -92,175 +86,6 @@
      (,(mk-url "datasets")."Get a list of datasets")))))
 
 
-(define (sparql-exec endpoint-url query)
-  "Execute raw SPARQL query returning response as a UTF8 string"
-  (bytevector->string (receive (response-status response-body)
-                          (http-request (string-append endpoint-url "?default-graph-uri=&query=" (uri-encode query) "&format=application%2Fsparql-results%2Bjson"))
-
-                         response-body) "UTF-8"))
-
-(define (sparql-tsv endpoint-url query)
-  "Execute raw SPARQL query returning response as a UTF8 string, e.g.
-(tsv->scm (sparql-tsv (wd-sparql-endpoint-url) \"wd:Q158695\"))
-"
-  ; GET /sparql?query=SELECT%20DISTINCT%20%2A%20where%20%7B%0A%20%20wd%3AQ158695%20wdt%3AP225%20%3Fo%20.%0A%7D%20limit%205 HTTP/2
-  (receive (response-status response-body)
-                          (http-get (pk (string-append endpoint-url "?query=" (uri-encode query))) #:headers '((Accept . "text/tab-separated-values")(user-agent . "curl/7.74.0")))
-                         response-body))
-
-(define (unpack field response)
-  "Helper to get nested JSON field from SPARQL response"
-  (cdr (assoc field response)))
-
-(define (sparql-names response)
-  "Helper to get the names part of a SPARQL query"
-  (unpack "vars" (unpack "head" response)))
-
-(define (sparql-results response)
-  "Helper to get the results part of a SPARQL query"
-  (unpack "bindings" (unpack "results" response)))
-
-(define (sparql-scm endpoint-url query)
-  "Return dual S-exp 'resultset' of varnames and results"
-  (let ((response (json-string->scm (sparql-exec endpoint-url query))))
-   (values (sparql-names response) (sparql-results response))))
-
-(define (tsv->scm text)
-  "Split a TSV string into a list of fields. Returns list of names header) and rows"
-  (let ([lst (map (lambda (f) (string-split f #\tab) ) (delete "" (string-split text #\newline)))])
-    (values (car lst) (cdr lst))
-  ))
-
-#!
-(define-values (names res) (sparql-species-meta))
-(define table (get-rows names res))
-(define recs '())
-(define h (compile-species recs table))
-(assoc "http://genenetwork.org/species_drosophila_melanogaster" h)
-(assoc-ref h "http://genenetwork.org/species_drosophila_melanogaster") ;; note switch!
-(define d (car h))
-(assoc-ref (list d) "http://genenetwork.org/species_drosophila_melanogaster")
-
-(scm->json #(1  (("2" . 3))))
-;; [1,{"2":3}]
-(scm->json #("http://genenetwork.org/species_drosophila_melanogaster" (("http://genenetwork.org/menuName" . "Drosophila") ("http://genenetwork.org/binomialName" . "Drosophila melanogaster") )))
-;; ["http://genenetwork.org/species_drosophila_melanogaster",{"http://genenetwork.org/menuName":"Drosophila","http://genenetwork.org/binomialName":"Drosophila melanogaster"}]
-l
-;; (("http://genenetwork.org/menuName" "Drosophila") ("http://genenetwork.org/name" "Drosophila") ("http://genenetwork.org/binomialName" "Drosophila melanogaster"))
-(scm->json (map (lambda (i) (cons (car i) (car (cdr i)))) l))
-;; {"http://genenetwork.org/menuName":"Drosophila","http://genenetwork.org/name":"Drosophila","http://genenetwork.org/binomialName":"Drosophila melanogaster"}
-
-
-curl -G https://query.wikidata.org/sparql -H "Accept: application/json; charset=utf-8" --data-urlencode query="SELECT DISTINCT * where {
-  wd:Q158695 wdt:P225 ?o .
-} limit 5"
-{
-  "head" : {
-    "vars" : [ "o" ]
-  },
-  "results" : {
-    "bindings" : [ {
-      "o" : {
-        "type" : "literal",
-        "value" : "Arabidopsis thaliana"
-      }
-    } ]
-  }
-}
-!#
-
-(define (sparql-wd-species-info species)
-  "Returns wikidata entry for species, e.g.:
-
-   (sparql-wd-species-info \"Q158695\") generates something like
-
-SELECT DISTINCT * where {  wd:Q158695 wdt:P225 ?o . } limit 10
-
-"
-  (sparql-tsv (wd-sparql-endpoint-url) (string-append "
-SELECT DISTINCT ?taxon ?ncbi ?descr where {
-    wd:" species " " (wdt-taxon-name) " ?taxon ;
-               wdt:P685 ?ncbi ;
-      schema:description ?descr .
-    ?species wdt:P685 ?ncbi .
-    FILTER (lang(?descr)='en')
-} limit 5
-
-")))
-
-(define memo-sparql-wd-species-info
-  (memoize sparql-wd-species-info))
-
-#!
-gn:Mus_musculus rdf:type gnc:species .
-gn:Mus_musculus gnt:name "Mouse" .
-gn:Mus_musculus rdfs:label "Mouse (Mus musculus, mm10)" .
-gn:Mus_musculus gnt:binomialName "Mus musculus" .
-gn:Mus_musculus gnt:family "Vertebrates" .
-gn:Mus_musculus gnt:organism taxon:10090 .
-!#
-
-(define (sparql-species)
-  (sparql-scm (gn-sparql-endpoint-url) "
-PREFIX gn: <http://genenetwork.org/id/>
-PREFIX gnc: <http://genenetwork.org/category/>
-PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
-
-SELECT DISTINCT ?species WHERE {
-    ?species rdf:type gnc:species .
-}"))
-
-(define memo-sparql-species
-  (memoize2 sparql-species))
-
-(define (sparql-species-meta)
-  "Return values names recs"
-  (sparql-scm (gn-sparql-endpoint-url) "
-PREFIX gn: <http://genenetwork.org/id/>
-PREFIX gnc: <http://genenetwork.org/category/>
-PREFIX gnt: <http://genenetwork.org/term/>
-PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
-
-SELECT ?species ?p ?o WHERE {
-   MINUS { ?species rdf:type ?o . }
-{
-  SELECT DISTINCT ?species ?p ?o WHERE {
-    ?species rdf:type gnc:species .
-    ?species ?p ?o .
-   }}}"))
-
-(define memo-sparql-species-meta
-  (memoize2 sparql-species-meta))
-
-
-(define (get-values names row)
-  "Get values by name from a resultset row"
-  (map (lambda (n) (unpack "value" (unpack n row))) (array->list names)))
-
-(define (get-rows names results)
-  "Format results as a list of values ordered by names"
-  (map (lambda (row) (get-values names row)) (array->list results)))
-
-;; from the triples first harvest the species URIs, followed by creating records of information
-
-(define (compile-species recs rows)
-  "Compile a matrix of species triples into records"
-  (for-each (lambda (r)
-		(let* ([s (car r)]
-		       [v (cdr (cdr r))]
-		       [p (car (cdr r))]
-		       [nrec '()]
-		       [kv (assoc s recs)]) ; find record to fill based on subject
-		  (if (not kv)
-		      (set! nrec '())
-		      (set! nrec (cdr kv))
-		      )
-		  (set! nrec (assoc-set! nrec p v))
-		  (set! recs (assoc-set! recs s nrec))
-		  ))
-		rows)
-  recs)
-
 ;; result should be a vector of list of pair
 (define (species-digest recs)
   (map (lambda (r)