From 3395ec582bb6f0a3c9f79f264789c77986dd9878 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 6 Aug 2023 10:19:32 +0200 Subject: Init new project for GN on guile. --- web/webserver.scm | 463 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 463 insertions(+) create mode 100755 web/webserver.scm (limited to 'web/webserver.scm') diff --git a/web/webserver.scm b/web/webserver.scm new file mode 100755 index 0000000..ce754d8 --- /dev/null +++ b/web/webserver.scm @@ -0,0 +1,463 @@ +#!/usr/bin/env guile \ +-e main -s +!# +;; Minimal web server can be started from command line. Current example routes: +;; +;; localhost:8080/ +;; + +(use-modules + (json) + (ice-9 match) + (ice-9 format) + (ice-9 iconv) + (ice-9 receive) + (ice-9 string-fun) + ;; (ice-9 debugger) + ;; (ice-9 breakpoints) + ;; (ice-9 source) + (srfi srfi-1) + (srfi srfi-11) ; let-values + (srfi srfi-13) ; hash table for memoize + (srfi srfi-26) + (web http) + (web client) + (web request) + (web response) + (web uri) + (fibers web server)) + +;; basically lifted from https://lispdreams.wordpress.com/2016/04/08/lisp-memoization-techniques/ + +(define (memoize f) + "Simple memoize just uses alists at this point and does not expire" + (let ((result-table '())) + (lambda (. args) + (let ((cache-value (assoc args (pk result-table)))) + (if (pk cache-value) + (cdr cache-value) + (let ((result (apply f args))) + (set! result-table + (alist-cons args result result-table)) result)))))) + +(define (memoize2 f) + "Simple memoize functions that returns values pair and uses alists at this point and does not expire" + (let ((result-table '())) + (lambda (. args) + (let ((c (assoc args (pk result-table)))) + (if (pk c) + (values (car (cdr c)) (car (cdr (cdr c)))) + (let-values (((r1 r2) (apply f args))) + (set! result-table + (alist-cons args (list r1 r2) result-table)) (values r1 r2))))))) + +(define get-version + "2.0") + +(define (base-url) + "https://genenetwork.org") + +(define (gn-sparql-endpoint-url) + "https://sparql.genenetwork.org/sparql") + +(define (wd-sparql-endpoint-url) + "https://query.wikidata.org/sparql") + +(define (prefix) + "Build the API URL including version" + (string-append (base-url) "/api/" get-version)) + +(define* (mk-url postfix #:optional (ext "")) + "Add the path to the API URL" + (string-append (prefix) "/" postfix ext)) + +(define (mk-html path) + "Create a pointer to HTML documentation" + (string-append (base-url) "/" path ".html")) + +(define (mk-doc path) + "Create a pointer to HTML documentation" + (mk-html (string-append "doc/" path))) + +(define (mk-meta path) + "Create a meta URL for the API path" + (mk-url path ".meta.json")) + +(define (mk-rec path) + "Create a JSON URL for the API path" + (mk-url path ".json")) + +(define (mk-term postfix) + (mk-html (string-append "term" "/" postfix))) + +(define (mk-id postfix) + (mk-html (string-append "id" "/" postfix))) + +(define (mk-predicate postfix) + (mk-html (string-append "predicate" "/" postfix))) + +(define (wdt-taxon-name) "wdt:P225") + +(define info `( + ("name" . "GeneNetwork REST API") + ("version" . ,get-version) + ("comment" . "This is the official REST API for the GeneNetwork service hosted at https://genenetwork.org/") + ("license" . (("source code" . "AGPL"))) + ("note" . "work in progress (WIP)") + ("prefix" . ,(prefix)) + ("links". (("species" . ,(mk-meta "species")))))) + +(define info-meta `( + ("doc" . ,(mk-html "info")) + ("API" . + ((,(mk-url "species")."Get a list of all species") + (,(mk-url "mouse")."Get information on mouse") + (,(mk-url "datasets")."Get a list of datasets"))))) + + +(define (sparql-exec endpoint-url query) + "Execute raw SPARQL query returning response as a UTF8 string" + (bytevector->string (receive (response-status response-body) + (http-request (string-append endpoint-url "?default-graph-uri=&query=" (uri-encode query) "&format=application%2Fsparql-results%2Bjson")) + + response-body) "UTF-8")) + +(define (sparql-tsv endpoint-url query) + "Execute raw SPARQL query returning response as a UTF8 string, e.g. +(tsv->scm (sparql-tsv (wd-sparql-endpoint-url) \"wd:Q158695\")) +" + ; GET /sparql?query=SELECT%20DISTINCT%20%2A%20where%20%7B%0A%20%20wd%3AQ158695%20wdt%3AP225%20%3Fo%20.%0A%7D%20limit%205 HTTP/2 + (receive (response-status response-body) + (http-get (pk (string-append endpoint-url "?query=" (uri-encode query))) #:headers '((Accept . "text/tab-separated-values")(user-agent . "curl/7.74.0"))) + response-body)) + +(define (unpack field response) + "Helper to get nested JSON field from SPARQL response" + (cdr (assoc field response))) + +(define (sparql-names response) + "Helper to get the names part of a SPARQL query" + (unpack "vars" (unpack "head" response))) + +(define (sparql-results response) + "Helper to get the results part of a SPARQL query" + (unpack "bindings" (unpack "results" response))) + +(define (sparql-scm endpoint-url query) + "Return dual S-exp 'resultset' of varnames and results" + (let ((response (json-string->scm (sparql-exec endpoint-url query)))) + (values (sparql-names response) (sparql-results response)))) + +(define (tsv->scm text) + "Split a TSV string into a list of fields. Returns list of names header) and rows" + (let ([lst (map (lambda (f) (string-split f #\tab) ) (delete "" (string-split text #\newline)))]) + (values (car lst) (cdr lst)) + )) + +#! +(define-values (names res) (sparql-species-meta)) +(define table (get-rows names res)) +(define recs '()) +(define h (compile-species recs table)) +(assoc "http://genenetwork.org/species_drosophila_melanogaster" h) +(assoc-ref h "http://genenetwork.org/species_drosophila_melanogaster") ;; note switch! +(define d (car h)) +(assoc-ref (list d) "http://genenetwork.org/species_drosophila_melanogaster") + +(scm->json #(1 (("2" . 3)))) +;; [1,{"2":3}] +(scm->json #("http://genenetwork.org/species_drosophila_melanogaster" (("http://genenetwork.org/menuName" . "Drosophila") ("http://genenetwork.org/binomialName" . "Drosophila melanogaster") ))) +;; ["http://genenetwork.org/species_drosophila_melanogaster",{"http://genenetwork.org/menuName":"Drosophila","http://genenetwork.org/binomialName":"Drosophila melanogaster"}] +l +;; (("http://genenetwork.org/menuName" "Drosophila") ("http://genenetwork.org/name" "Drosophila") ("http://genenetwork.org/binomialName" "Drosophila melanogaster")) +(scm->json (map (lambda (i) (cons (car i) (car (cdr i)))) l)) +;; {"http://genenetwork.org/menuName":"Drosophila","http://genenetwork.org/name":"Drosophila","http://genenetwork.org/binomialName":"Drosophila melanogaster"} + + +curl -G https://query.wikidata.org/sparql -H "Accept: application/json; charset=utf-8" --data-urlencode query="SELECT DISTINCT * where { + wd:Q158695 wdt:P225 ?o . +} limit 5" +{ + "head" : { + "vars" : [ "o" ] + }, + "results" : { + "bindings" : [ { + "o" : { + "type" : "literal", + "value" : "Arabidopsis thaliana" + } + } ] + } +} +!# + +(define (sparql-wd-species-info species) + "Returns wikidata entry for species, e.g.: + + (sparql-wd-species-info \"Q158695\") generates something like + +SELECT DISTINCT * where { wd:Q158695 wdt:P225 ?o . } limit 10 + +" + (sparql-tsv (wd-sparql-endpoint-url) (string-append " +SELECT DISTINCT ?taxon ?ncbi ?descr where { + wd:" species " " (wdt-taxon-name) " ?taxon ; + wdt:P685 ?ncbi ; + schema:description ?descr . + ?species wdt:P685 ?ncbi . + FILTER (lang(?descr)='en') +} limit 5 + +"))) + +(define memo-sparql-wd-species-info + (memoize sparql-wd-species-info)) + +#! +gn:Mus_musculus rdf:type gnc:species . +gn:Mus_musculus gnt:name "Mouse" . +gn:Mus_musculus rdfs:label "Mouse (Mus musculus, mm10)" . +gn:Mus_musculus gnt:binomialName "Mus musculus" . +gn:Mus_musculus gnt:family "Vertebrates" . +gn:Mus_musculus gnt:organism taxon:10090 . +!# + +(define (sparql-species) + (sparql-scm (gn-sparql-endpoint-url) " +PREFIX gn: +PREFIX gnc: +PREFIX rdf: + +SELECT DISTINCT ?species WHERE { + ?species rdf:type gnc:species . +}")) + +(define memo-sparql-species + (memoize2 sparql-species)) + +(define (sparql-species-meta) + "Return values names recs" + (sparql-scm (gn-sparql-endpoint-url) " +PREFIX gn: +PREFIX gnc: +PREFIX gnt: +PREFIX rdf: + +SELECT ?species ?p ?o WHERE { + MINUS { ?species rdf:type ?o . } +{ + SELECT DISTINCT ?species ?p ?o WHERE { + ?species rdf:type gnc:species . + ?species ?p ?o . + }}}")) + +(define memo-sparql-species-meta + (memoize2 sparql-species-meta)) + + +(define (get-values names row) + "Get values by name from a resultset row" + (map (lambda (n) (unpack "value" (unpack n row))) (array->list names))) + +(define (get-rows names results) + "Format results as a list of values ordered by names" + (map (lambda (row) (get-values names row)) (array->list results))) + +;; from the triples first harvest the species URIs, followed by creating records of information + +(define (compile-species recs rows) + "Compile a matrix of species triples into records" + (for-each (lambda (r) + (let* ([s (car r)] + [v (cdr (cdr r))] + [p (car (cdr r))] + [nrec '()] + [kv (assoc s recs)]) ; find record to fill based on subject + (if (not kv) + (set! nrec '()) + (set! nrec (cdr kv)) + ) + (set! nrec (assoc-set! nrec p v)) + (set! recs (assoc-set! recs s nrec)) + )) + rows) + recs) + +;; result should be a vector of list of pair +(define (species-digest recs) + (map (lambda (r) + (let* ([k (car r)] + [v (cdr r)]) + ; with key use (cons k (map (lambda (i) (cons (car i) (car (cdr i)))) v)) + (map (lambda (i) (cons (url-parse-id (car i)) (car (cdr i)))) v) + )) + recs ) + ) + +(define (get-species) + (receive (names res) (memo-sparql-species-meta) + (let* ([table (get-rows names res)] + [recs '()] + [h (compile-species recs table)]) + (species-digest h)) + )) + +; (define (wd-species-info wd) +; ) + +(define (url-parse-id uri) + (if uri + (car (reverse (string-split uri #\057))) + "unknown" + )) + +(define (strip-lang s) + "Strip quotes and language tag (@en) from RDF entries" + (list->string (match (string->list s) + [(#\"rest ... #\") rest] + [(#\"rest ... #\" #\@ #\e #\n) rest] + [rest rest])) + ) + +(define (normalize-id str) + ;; (string-replace-substring (string-downcase str) " " "_") + (string-replace-substring str " " "_") + ) + +(define (get-expanded-taxon id) + "Get information on a specific species, e.g. mouse" + #f + ) + +(define (get-expanded-species) + "Here we add information related to each species" + (map (lambda (rec) + (let ([wd-id (url-parse-id (assoc-ref rec "22-rdf-syntax-ns#isDefinedBy"))] + [short-name (normalize-id (assoc-ref rec "name"))]) + (if (string=? wd-id "unknown") + rec + ; wikidata query: + (receive (names row) (tsv->scm (memo-sparql-wd-species-info wd-id)) + (match (pk (car row)) + ((taxonomy-name ncbi descr) + (let ([ncbi-id (strip-lang ncbi)] + [taxonomy-lnk (string-replace-substring (strip-lang taxonomy-name) " " "_")]) + (cons `("id" . ,short-name) + (cons `("wikidata" . ,wd-id) + (cons `("taxonomy-id" . ,ncbi-id) + (cons `("ncbi-url" . ,(string-append "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=" ncbi-id)) + (cons `("uniprot-url" . ,(string-append "https://www.uniprot.org/taxonomy/" ncbi-id)) + (cons `("wikidata-url" . ,(string-append "http://www.wikidata.org/entity/" wd-id)) + (cons `("wikispecies-url" . ,(string-append "https://species.wikimedia.org/wiki/" taxonomy-lnk)) + (cons `("taxonomy-name" . ,(strip-lang taxonomy-name)) + ; (cons `("shortname" . ,shortname) - problematic + (cons `("description" . ,(strip-lang descr)) + rec))))))))))) + ) + ))) + ) (get-species) +)) + +(define (get-species-api-str) + (scm->json-string #("https://genenetwork.org/api/v2/mouse/" + "https://genenetwork.org/api/v2/rat/"))) + +(define (get-species-shortnames recs) + (map (lambda r (assoc-ref (car r) "shortName")) recs)) + +(define (get-species-links recs) + "Return a list of short names and expand them to URIs" + (map (lambda r + (let ([shortname (assoc-ref (car r) "shortName")]) + (cons shortname (mk-rec shortname)))) recs) + ) + +(define (get-species-rec) + (list->vector (get-expanded-species))) + +(define (get-species-meta) + (let ([recs (get-expanded-species)]) + `(("comment" . "Get information on species") + ("doc" . ,(mk-doc "species")) + ("meta" . ,(mk-meta "species")) + ("rec" . ,(mk-rec "species")) + ("links" . ,(get-species-links recs))))) + +;; ---- REST API web server handler + +(define (not-found2 request) + (values (build-response #:code 404) + (string-append "Resource X not found: " + (uri->string (request-uri request))))) + +(define (not-found uri) + (list (build-response #:code 404) + (string-append "Resource not found: " (uri->string uri)))) + +(define (render-json json) + (list '((content-type . (application/json))) + (lambda (port) + (scm->json json port)))) + +(define (render-json-string2 json) + (list '((content-type . (text/plain))) + (lambda (port) + ;; (display "ThthxST" port) + (format port "~a" "foo") + ))) + +(define (controller request body) + (match-lambda + (('GET) + (render-json info)) + (('GET "version") + (render-json get-version)) + (('GET "species.json") + (render-json (get-species-rec))) + (('GET "species.meta.json") + (render-json (get-species-meta))) + (('GET "species") + (render-json (get-species-meta))) + (('GET id) + (let ([names (get-species-shortnames (get-expanded-species))]) + (match (string->list id) + [(name ... #\. #\j #\s #\o #\n) (render-json (list->string name))] + [rest (render-json "WIP")]))) + (_ (not-found (request-uri request))) + )) + +(define (request-path-components request) + (split-and-decode-uri-path (uri-path (request-uri request)))) + +(define (handler request body) + (format #t "~a ~a\n" + (request-method request) + (uri-path (request-uri request))) + (apply values + ((controller request body) + (cons (request-method request) + (request-path-components request))))) + +(define (start-web-server address port) + (format (current-error-port) + "GN REST API web server listening on http://~a:~a/~%" + address port) + ;; Wrap handler in another function to support live hacking via the + ;; REPL. If handler is passed as is and is then redefined via the + ;; REPL, the web server will still be using the old handler. The + ;; only way to update the handler reference held by the web server + ;; would be to restart the web server. + (run-server (cut handler <> <>) + #:addr (inet-pton AF_INET address) + #:port port)) + +(define (main args) + (write (string-append "Starting Guile REST API " get-version " server!")) + (write args) + (newline) + (let ((listen (inexact->exact (string->number (car (cdr args)))))) + (display `("listening on" ,listen)) + ;; (write listen) + ;; (run-server hello-world-handler 'http `(#:port ,listen)))) + (start-web-server "127.0.0.1" listen))) -- cgit v1.2.3