diff options
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | .guix/modules/gn-guile.scm | 2 | ||||
| -rw-r--r-- | README.md | 43 | ||||
| -rwxr-xr-x | bin/gn-guile | 38 | ||||
| -rw-r--r-- | gn/data/dataset.scm | 61 | ||||
| -rw-r--r-- | gn/data/genotype.scm | 1 | ||||
| -rw-r--r-- | gn/data/strains.scm | 2 | ||||
| -rw-r--r-- | gn/db/mysql.scm | 14 | ||||
| -rw-r--r-- | gn/db/sources/wikidata.scm | 50 | ||||
| -rwxr-xr-x[-rw-r--r--] | gn/db/sparql.scm | 141 | ||||
| -rw-r--r-- | gn/runner/gemma.scm | 15 | ||||
| -rw-r--r-- | manifest.scm | 31 | ||||
| -rwxr-xr-x | scripts/lmdb-publishdata-export.scm | 47 | ||||
| -rwxr-xr-x | scripts/precompute/list-traits-to-compute.scm | 4 | ||||
| -rw-r--r-- | web/README.md | 1 | ||||
| -rw-r--r-- | web/config.scm | 86 | ||||
| -rw-r--r-- | web/view/brand/aging.scm | 7 | ||||
| -rw-r--r-- | web/view/brand/msk.scm | 2 | ||||
| -rw-r--r-- | web/view/doc.scm | 2 | ||||
| -rw-r--r-- | web/view/view.scm | 12 | ||||
| -rw-r--r-- | web/webserver.scm | 197 |
21 files changed, 591 insertions, 166 deletions
diff --git a/.gitignore b/.gitignore index 5f81cf8..26235cb 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ pheno.txt GWA.json K.json .aider* +.config/** diff --git a/.guix/modules/gn-guile.scm b/.guix/modules/gn-guile.scm index 03f2b14..87e059e 100644 --- a/.guix/modules/gn-guile.scm +++ b/.guix/modules/gn-guile.scm @@ -8,7 +8,7 @@ ;; (define-module (gn-guile) - #:use-module ((gn packages guile) #:select (gn-guile) #:prefix gn:) + #:use-module ((gn-machines genenetwork) #:select (gn-guile) #:prefix gn:) #:use-module (guix gexp) #:use-module (guix utils) #:use-module (guix packages) diff --git a/README.md b/README.md index d3326bd..2577c00 100644 --- a/README.md +++ b/README.md @@ -25,31 +25,60 @@ The current repository lives at git clone tux02.genenetwork.org:/home/git/public/gn-guile ``` -GNU Guile allows you to develop against a live running web server using emacs-geiser. To try this fire up the web server from the `web` directory as +or + +``` +git remote add gn git.genenetwork.org:/home/git/public/gn-guile +``` + +GNU Guile allows you to develop against a live running web server using emacs-geiser on port 1970. To try this fire up the web server from the `web` directory as ```sh -guix shell --container --network --file=guix.scm -- guile -L . --fresh-auto-compile --listen=1970 -e main web/webserver.scm 8091 +guix shell -L ~/guix-bioinformatics --container --network --development --file=guix.scm -- guile -L . --fresh-auto-compile --listen=1970 -e main bin/gn-guile 8091 ``` +By default the root points to the API: + +curl http://127.0.0.1:8091 + +We also have some services + +curl http://127.0.0.1:8091/home/msk +curl http://127.0.0.1:8091/home/aging + The `--container` option runs the code in an isolated container, and the `--network` option connects that container's networking to the host to allow you to access the running service. +If you get an error `no code for module (gn packages guile)` add the appropriate guix-bioinformatics repo to the load path (`-L` switch). Note that, instead of hard-wiring guix-bioinformatics, the recommended way is to use a guix channel as defined in [guix-channel](./.guix-channel). + +We recommend checking the Guix documentation for manifests, channels and guix.scm definitions. + +To run a standalone server you should run without the listener on port 1970: + +``` +guix shell -L ~/guix-bioinformatics --container --network --file=guix.scm -- guile -L . --fresh-auto-compile -e main bin/gn-guile 8091 +``` + ## Welcome to the world of interactive Lisp programming Next fire up emacs with `emacs-geiser-guile` and connect to the running web server with `M-x geiser-connect` and the port `1970`. Now you can not only inspect procedures, but also update any prodedure on the live server using `C-M-x` on code, call and get updated output from the webserver! No need to save/reload files and all that. Note that you may have to try different versions of guile+emacs to succeed. +Also these days it may be a better bet to use Andrew Tropin's alternatives ares and arei that are very powerful alternatives to geiser (with a more common lisp type interactive experience): + +=> https://git.sr.ht/~abcdw/guile-ares-rs +=> https://git.sr.ht/~abcdw/emacs-arei + + # Tools Some tooling and scripts that run independently are stored in `./scripts`. +# Forwarding a MySQL port +You may want to forward a mysql port if there is no DB locally you can do something like: -# Development - -``` -git remote add gn git.genenetwork.org:/home/git/public/gn-guile -``` +ssh -L 3306:127.0.0.1:3306 -f -N tux02.genenetwork.org # Topics diff --git a/bin/gn-guile b/bin/gn-guile new file mode 100755 index 0000000..47bd259 --- /dev/null +++ b/bin/gn-guile @@ -0,0 +1,38 @@ +#!/usr/bin/env sh +# -*- mode: scheme; -*- +exec guile --no-auto-compile -e main -s "$0" "$@" +!# + +;;; gn-guile --- GN Guile web service +;;; Copyright © 2026 Frederick M. Muriithi <fredmanglis@gmail.com> +;;; +;;; This file is part of gn-guile +;;; +;;; gn-guile is free software: you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation, either version 3 of the License, or +;;; (at your option) any later version. +;;; +;;; gn-guile is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with gn-guile. If not, see <https://www.gnu.org/licenses/>. + +(use-modules (config) + (web config) + (web gn-uri) + (web webserver)) + +(define (main args) + (write (string-append "Starting Guile REST API " get-version " server!")) + (write args) + (newline) + (let* ((options (parse-cli-options args)) + (listen (option-ref options 'port))) + (when (option-ref options 'write) + (options-write options)) + (display `("listening on" ,listen)) + (start-web-server "127.0.0.1" listen (cli-options->gn-guile-config options)))) diff --git a/gn/data/dataset.scm b/gn/data/dataset.scm index c28cf25..afe75ba 100644 --- a/gn/data/dataset.scm +++ b/gn/data/dataset.scm @@ -4,14 +4,21 @@ #:use-module (ice-9 iconv) #:use-module (ice-9 receive) #:use-module (ice-9 string-fun) + #:use-module (srfi srfi-1) #:use-module (dbi dbi) #:use-module (gn db mysql) + #:use-module (gn data genotype) #:use-module (gn data group) #:use-module (gn util convert) #:use-module (web gn-uri) + #:use-module (rnrs base) ; for assert #:export ( dataset-name + get-bxd-publish-list + get-bxd-publish-values-list + get-bxd-publish-name-value-dict + get-bxd-publish-dataid-name-value-dict )) (define (get-dataset db probesetfreeze-id) @@ -22,3 +29,57 @@ (define (dataset-name db probesetfreeze-id) (assoc-ref (get-dataset db probesetfreeze-id) "Name")) + +(define (get-dataid-from-publishxrefid id) + "Get the internal dataid from publishxref - which is the same as used in the GN2 web interface" + (call-with-db + (lambda (db) + (let [(query (string-append "SELECT Id,PhenotypeId,DataId FROM PublishXRef WHERE Id=" id " AND InbredSetId=1 LIMIT 1"))] + (dbi-query db query) + (pk (int-to-string (assoc-ref (get-row db) "DataId"))))))) + +(define (get-bxd-publish-list) + (call-with-db + (lambda (db) + (let [(query "SELECT Id,PhenotypeId,DataId FROM PublishXRef WHERE InbredSetId=1")] + (dbi-query db query) + (get-rows db '()))))) + +(define* (get-bxd-publish-values-list dataid #:optional used-for-mapping?) + "Returns dict of name values , e.g. [{\"Name\":\"C57BL/6J\",\"value\":9.136},{\"Name\":\"DBA/2J\",\"value\":4.401},{\"Name\":\"BXD9\",\"value\":4.36}, ... used-for-mapping? skips the founders and maybe other unmappable inds. Note, currently unused." + (call-with-db + (lambda (db) + (let [(query (string-append "SELECT Strain.Name, PublishData.value FROM Strain, PublishData WHERE PublishData.Id=" dataid " and Strain.Id=StrainID;"))] + (dbi-query db query) + (if used-for-mapping? + (remove null? (pk (get-rows-apply db + (lambda (r) + (if (string-contains (assoc-ref r "Name") "BXD") + `(("Name" . ,(assoc-ref r "Name")) ("value" . ,(assoc-ref r "value"))) + '() ) ;; return empty on no match + ) '()))) + (get-rows db '()) + ))))) + +(define* (get-bxd-publish-dataid-name-value-dict dataid #:optional used-for-mapping?) + "Returns dict of name values, e.g. (((\"C57BL/6J\" . 9.136) (\"DBA/2J\" . 4.401) (\"BXD9\" . 4.36) ... used-for-mapping? skips the founders and maybe other unmappable inds." + (call-with-db + (lambda (db) + (let [(query (string-append "SELECT Strain.Name, PublishData.value FROM Strain, PublishData WHERE PublishData.Id=" dataid " and Strain.Id=StrainID;"))] + (dbi-query db query) + (if used-for-mapping? + (remove null? (pk (get-rows-apply db + (lambda (r) + (if (string-contains (assoc-ref r "Name") "BXD") + `(,(assoc-ref r "Name") . ,(assoc-ref r "value")) + '() ) ;; return empty on no match + ) '()))) + (remove null? (pk (get-rows-apply db + (lambda (r) + `(,(assoc-ref r "Name") . ,(assoc-ref r "value")) + ) '()))) + ))))) + +(define* (get-bxd-publish-name-value-dict id #:optional used-for-mapping?) + "Same as above function, but starting from data id" + (get-bxd-publish-dataid-name-value-dict (get-dataid-from-publishxrefid id) used-for-mapping?)) diff --git a/gn/data/genotype.scm b/gn/data/genotype.scm index c7cb63c..5574382 100644 --- a/gn/data/genotype.scm +++ b/gn/data/genotype.scm @@ -16,6 +16,7 @@ )) (define (geno-inds-bxd fn) + "Returns information from GN's BXD.json, note it fetches the first geno file info, now BXD.8.geno" (let [(js (call-with-input-file fn (lambda (port) (json->scm port))))] diff --git a/gn/data/strains.scm b/gn/data/strains.scm index e5f839b..07b69ff 100644 --- a/gn/data/strains.scm +++ b/gn/data/strains.scm @@ -25,7 +25,7 @@ "Return assoc list of tuples of strain id+names: ((4 . BXD1) (5 . BXD2) (6 . BXD5) (7 . BXD6)... -used-for-mapping? will say whether the strains/individuals are used for mapping. Always True, FIXME +optional key used-for-mapping? will say whether the strains/individuals are used for mapping. " (call-with-db (lambda (db) diff --git a/gn/db/mysql.scm b/gn/db/mysql.scm index ccd414a..8da7b60 100644 --- a/gn/db/mysql.scm +++ b/gn/db/mysql.scm @@ -32,22 +32,26 @@ ;; (display "===> OPENING DB") ;; (newline) (let [(db (dbi-open "mysql" "webqtlout:webqtlout:db_webqtl:tcp:127.0.0.1:3306"))] - (ensure db) + (ensure db "Can't open connection") db ))) (define (call-with-db thunk) - (thunk (db-open))) + (let* [(db (db-open)) + (result (thunk db))] + (dbi-close db) + result)) -(define (ensure db) +(define (ensure db msg1) "Use DBI-style handle to report an error. On error the program will stop." (match (dbi-get_status db) ((stat . msg) (if (= stat 0) #t (begin - (display msg) + (display "SQL Connection ERROR! ") + (display (string-append msg1 " - " msg) (newline) - (assert stat)))))) + (assert #f))))))) (define (has-result? db) "Return #t or #f if result is valid" diff --git a/gn/db/sources/wikidata.scm b/gn/db/sources/wikidata.scm index 7397426..954ce93 100644 --- a/gn/db/sources/wikidata.scm +++ b/gn/db/sources/wikidata.scm @@ -1,10 +1,38 @@ #! -Wikidata queries +Wikidata queries, initially lifted over from the gn3 gene-alias code (that was written in Racket). +Note you can take a SPARQL query and push it into https://query.wikidata.org/. E.g. generate a query and +copy paste into the query service: + +scheme@(guile-user) [3]> (display (wikidata-query-geneids "Shh")) +``` +SELECT DISTINCT ?wikidata_id + WHERE { + ?wikidata_id wdt:P31 wd:Q7187; + wdt:P703 ?species . + VALUES (?species) { (wd:Q15978631 ) ( wd:Q83310 ) ( wd:Q184224 ) } . + ?wikidata_id rdfs:label "Shh"@en . + } +``` + +It is possible to run queries through curl with + +``` +curl -G https://query.wikidata.org/sparql -H "Accept: application/json; charset=utf-8" --data-urlencode query=" + SELECT DISTINCT ?alias + WHERE { + wd:Q24420953 rdfs:label ?name ; + skos:altLabel ?alias . + FILTER(LANG(?name) = \"en\" && LANG(?alias) = \"en\"). + }" +``` !# (define-module (gn db sources wikidata) + #:export (wikidata-query-geneids + wikidata-query-gene-aliases + ) ) (define ps-encoded-by "ps:P702") @@ -14,16 +42,24 @@ Wikidata queries (define wd-mouse "wd:Q83310") (define wd-rat "wd:Q184224") (define wd-gene "wd:Q7187") +(define wd-shh-rat "wd:Q24420953") -(define (wikidata_query_geneids gene_name) - "Return the wikidata identifiers pointing to genes of listed species" +(define (wikidata-query-geneids gene_name) + "SPARQL query to get the wikidata identifiers pointing to genes of listed species, e.g. 'Shh'" (string-append "SELECT DISTINCT ?wikidata_id WHERE { ?wikidata_id " wdt-instance-of " " wd-gene "; " wdt-in-taxon " ?species . VALUES (?species) { (" wd-human " ) ( " wd-mouse" ) ( " wd-rat" ) } . - ?wikidata_id rdfs:label \"" gene_name "\"@en . - } -" - )) + ?wikidata_id rdfs:label \"" gene_name "\"@en .}")) + +(define (wikidata-query-gene-aliases wikidata_id) + "SPARQL query to get a list of gene aliases based on a wikidata identifier, e.g. for Q24420953. This +version supports the expanded id only, so <http://www.wikidata.org/entity/Q24420953> including the <,>." + (string-append + "SELECT DISTINCT ?stripped_alias + WHERE { " wikidata_id " rdfs:label ?name ; + skos:altLabel ?alias . + BIND (STR(?alias) AS ?stripped_alias) . + FILTER(LANG(?name) = \"en\" && LANG(?alias) = \"en\").}")) diff --git a/gn/db/sparql.scm b/gn/db/sparql.scm index f03389b..fbcd2cc 100644..100755 --- a/gn/db/sparql.scm +++ b/gn/db/sparql.scm @@ -8,25 +8,29 @@ the case. !# (define-module (gn db sparql) - #:use-module (json) - #:use-module (ice-9 match) + #:use-module (gn cache memoize) + #:use-module (gn db sources wikidata) #:use-module (ice-9 format) #:use-module (ice-9 iconv) + #:use-module (ice-9 match) #:use-module (ice-9 receive) #:use-module (ice-9 string-fun) + #:use-module (json) + #:use-module (srfi srfi-1) #:use-module (web client) + #:use-module (web http) + #:use-module (web gn-uri) #:use-module (web request) + #:use-module (web response) #:use-module (web uri) - #:use-module (gn cache memoize) - #:use-module (gn db sources wikidata) - #:use-module (web gn-uri) - #:export (memo-sparql-species memo-sparql-species-meta sparql-species-meta sparql-groups-meta sparql-group-info memo-sparql-wd-species-info + memo-sparql-wd-gene-aliases + memo-sparql-wd-geneids compile-species compile-groups-meta get-rows @@ -34,9 +38,11 @@ the case. strip-lang make-table make-pairs - ) -) + sparql-http-get + sparql-by-term)) +(define virtuoso-endpoint + (or (getenv "SPARQL-ENDPOINT") "http://localhost:8890/sparql/")) (define (strip-lang s) "Strip quotes and language tag (@en) from RDF entries" @@ -55,9 +61,9 @@ the case. (define (gn-sparql-prefix query) (string-append " -PREFIX gn: <http://genenetwork.org/id/> -PREFIX gnt: <http://genenetwork.org/term/> -PREFIX gnc: <http://genenetwork.org/category/> +PREFIX gn: <http://rdf.genenetwork.org/v1/id/> +PREFIX gnt: <http://rdf.genenetwork.org/v1/term/> +PREFIX gnc: <http://rdf.genenetwork.org/v1/category/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> " query)) @@ -73,7 +79,9 @@ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> (define (sparql-tsv endpoint-url query) "Execute raw SPARQL query returning response as a UTF8 string, e.g. -(tsv->scm (sparql-tsv (wd-sparql-endpoint-url) \"wd:Q158695\")) +(tsv->scm (sparql-tsv (wd-sparql-endpoint-url) \"wd:Q158695\")). + +Note this procedure works for wikidata, but not for gn! " ; GET /sparql?query=SELECT%20DISTINCT%20%2A%20where%20%7B%0A%20%20wd%3AQ158695%20wdt%3AP225%20%3Fo%20.%0A%7D%20limit%205 HTTP/2 (receive (response-status response-body) @@ -93,7 +101,9 @@ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> (unpack "bindings" (unpack "results" response))) (define (sparql-scm endpoint-url query) - "Return dual S-exp 'resultset' of varnames and results" + "Return dual S-exp 'resultset' of varnames and results. + +Note this procedure works for GN, but does not yet work for wikidata" (let ((response (json-string->scm (sparql-exec endpoint-url (gn-sparql-prefix query))))) (values (sparql-names response) (sparql-results response)))) @@ -101,46 +111,7 @@ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> (define (tsv->scm text) "Split a TSV string into a list of fields. Returns list of names header) and rows" (let ([lst (map (lambda (f) (string-split f #\tab) ) (delete "" (string-split text #\newline)))]) - (values (car lst) (cdr lst)) - )) - -#! -(define-values (names res) (sparql-species-meta)) -(define table (get-rows names res)) -(define recs '()) -(define h (compile-species recs table)) -(assoc "http://genenetwork.org/species_drosophila_melanogaster" h) -(assoc-ref h "http://genenetwork.org/id/Drosophila_melanogaster") -(define d (car h)) -(assoc-ref (list d) "http://genenetwork.org/species_drosophila_melanogaster") - -(scm->json #(1 (("2" . 3)))) -;; [1,{"2":3}] -(scm->json #("http://genenetwork.org/species_drosophila_melanogaster" (("http://genenetwork.org/menuName" . "Drosophila") ("http://genenetwork.org/binomialName" . "Drosophila melanogaster") ))) -;; ["http://genenetwork.org/species_drosophila_melanogaster",{"http://genenetwork.org/menuName":"Drosophila","http://genenetwork.org/binomialName":"Drosophila melanogaster"}] -l -;; (("http://genenetwork.org/menuName" "Drosophila") ("http://genenetwork.org/name" "Drosophila") ("http://genenetwork.org/binomialName" "Drosophila melanogaster")) -(scm->json (map (lambda (i) (cons (car i) (car (cdr i)))) l)) -;; {"http://genenetwork.org/menuName":"Drosophila","http://genenetwork.org/name":"Drosophila","http://genenetwork.org/binomialName":"Drosophila melanogaster"} - - -curl -G https://query.wikidata.org/sparql -H "Accept: application/json; charset=utf-8" --data-urlencode query="SELECT DISTINCT * where { - wd:Q158695 wdt:P225 ?o . -} limit 5" -{ - "head" : { - "vars" : [ "o" ] - }, - "results" : { - "bindings" : [ { - "o" : { - "type" : "literal", - "value" : "Arabidopsis thaliana" - } - } ] - } -} -!# + (values (car lst) (cdr lst)))) (define (sparql-wd-species-info species) "Returns wikidata entry for species, e.g.: @@ -161,9 +132,47 @@ SELECT DISTINCT ?taxon ?ncbi ?descr where { "))) +(define (flatten lst) + (cond ((null? lst) '()) + ((pair? lst) (append (flatten (car lst)) (flatten (cdr lst)))) + (else (list lst)))) + +(define (remove-quotes s) + (substring s 1 (- (string-length s) 1))) + (define memo-sparql-wd-species-info (memoize sparql-wd-species-info)) +(define (sparql-wd-geneids gene-name) + "Return a list of expanded wikidata ids, e.g. +(\"<http://www.wikidata.org/entity/Q14860079>\" \"<http://www.wikidata.org/entity/Q24420953>\")" + (receive (type values) + (tsv->scm (sparql-tsv (wd-sparql-endpoint-url) (wikidata-query-geneids gene-name))) + (map (lambda (item) (car item)) values) ;; flatten list + )) + +(define memo-sparql-wd-geneids + (memoize sparql-wd-geneids)) + +(define (sparql-wd-gene-aliases geneids) + "Returns a flattened and dedpulicated list of geneids with +(sparql-wd-gene-aliases '(\"Q14860079\" \"Q24420953\")) +" + (let* ([aliases + (map (lambda (geneid) + (receive (type values) + (tsv->scm (sparql-tsv (wd-sparql-endpoint-url) (wikidata-query-gene-aliases (pk geneid)))) + (map (lambda (item) (car item)) values) ;; flatten list)) + ) + ) geneids)] + [rm-quotes-aliases (map (lambda (s) (remove-quotes s)) (flatten aliases))] + ) + (delete-duplicates rm-quotes-aliases))) + +(define memo-sparql-wd-gene-aliases + (memoize sparql-wd-gene-aliases)) + + #! gn:Mus_musculus rdf:type gnc:species . gn:Mus_musculus gnt:name "Mouse" . @@ -281,3 +290,29 @@ dump-species-metadata.ttl:gn:Axbxa gnt:belongsToSpecies gn:Mus_musculus . " gnid " ?key ?value . # FILTER ( !EXISTS{ " gnid " gnt:hasTissue ?value }) }"))) + + +(define* (sparql-http-get endpoint-url query #:optional (mime-type "text/microdata+html")) + (receive (response-status response-body) + (http-request + (format #f "~a?default-graph-uri=&query=~a&format=~a" + endpoint-url (uri-encode query) (uri-encode mime-type)) + #:method 'GET) + (values + (build-response + #:code (response-code response-status) + #:headers `((content-type . ,(parse-header 'content-type mime-type)))) + response-body))) + +(define (sparql-by-term prefix val) + (let ((url-alist '((gn . "<http://rdf.genenetwork.org/v1/id/>") + (gnc . "<http://rdf.genenetwork.org/v1/category/>") + (gnt . "<http://rdf.genenetwork.org/v1/term/>")))) + (format #f "PREFIX ~a: ~a + +CONSTRUCT { + ~a:~a ?p ?o . +} FROM <http://rdf.genenetwork.org/v1> +WHERE { + ~a:~a ?p ?o . +}" prefix (assoc-ref url-alist prefix) prefix val prefix val))) diff --git a/gn/runner/gemma.scm b/gn/runner/gemma.scm index 9a5c0fc..c577305 100644 --- a/gn/runner/gemma.scm +++ b/gn/runner/gemma.scm @@ -10,11 +10,24 @@ #:use-module (rnrs base) #:export ( - write-pheno-file + gemma-pheno-txt invoke-gemma-wrapper-loco run-gemma )) +(define (gemma-pheno-txt family traits) + "Return a list of values for GEMMA" + (assert (string=? family "BXD")) ; only supported right now + (define bxd-inds (geno-inds-bxd "BXD.json")) + (assert (= 235 (length bxd-inds))) + (map (lambda (ind) + (let [(value (assoc-ref traits ind))] + (if value + (format #f "~a" value) + "NA\n") + )) + bxd-inds)) + (define (write-pheno-file fn traits) (define bxd-inds (geno-inds-bxd "BXD.json")) (assert (= 235 (length bxd-inds))) diff --git a/manifest.scm b/manifest.scm new file mode 100644 index 0000000..aa2b3be --- /dev/null +++ b/manifest.scm @@ -0,0 +1,31 @@ +;; please do not remove +;; ----------------------- +;; Move these dependencies to `propagated-inputs' for the `gn-guile' package in +;; guix-bioinformatics and remove this file. +;; +;; This will help avoid inconsistencies in the list of dependencies when doing +;; development and when deploying. +;; ----------------------- +(specifications->manifest + '("coreutils" + "guile" + "guile-dbi" + "guile-dbd-mysql" + "guile-fibers" + "guile-json" + "guile-gnutls" + "guile-readline" + "guile-redis" + "openssl" + "nss-certs" + "gemma" + "parallel" + "tar" + "xz" + "python" + "python-lmdb" + "python-cffi" + "guile-gcrypt" + "guile-hashing" + "time" + "gemma-gn2")) diff --git a/scripts/lmdb-publishdata-export.scm b/scripts/lmdb-publishdata-export.scm index 2c1b4f3..8427112 100755 --- a/scripts/lmdb-publishdata-export.scm +++ b/scripts/lmdb-publishdata-export.scm @@ -154,12 +154,7 @@ dataset-trait combinations, and saves strain values to LMDB files in (match row ((("Name" . dataset-name) ("Id" . trait-id)) - (let* ((md5-hash - (md5->string (md5 (string->bytevector (format #f "~a-~a" dataset-name trait-id) - (make-transcoder (utf-8-codec)))))) - (data-dir (assq-ref settings 'output-dir)) - (md5-hash-dir (format #f "~a/~a" data-dir md5-hash)) - (data-query (format #f "SELECT + (let* ((data-query (format #f "SELECT JSON_ARRAYAGG(JSON_ARRAY(Strain.Name, PublishData.Value)) AS data, MD5(JSON_ARRAY(Strain.Name, PublishData.Value)) as md5hash FROM @@ -181,25 +176,27 @@ WHERE PublishFreeze.confidentiality < 1 ORDER BY LENGTH(Strain.Name), Strain.Name" dataset-name trait-id))) - (match (call-with-target-database - settings - (lambda (db2) (sql-find db2 data-query))) - ((("data" . data) - ("md5hash" . dataset-hash)) - (let ((lmdb-dir (string-join data-dir "/" md5-hash "-" dataset-hash))) - (log-msg - 'INFO (format #f "Writing ~a-~a to: ~a" dataset-name trait-id lmdb-dir)) - (unless (file-exists? data-dir) - (mkdir data-dir)) - (lmdb-save (string-join data-dir "/index") - (string-join (list dataset-name "-" trait-id)) - (string-join (list md5-hash "-" dataset-hash))) - (vector-for-each - (lambda (_ x) - (match x - (#(strain value) - (lmdb-save lmdb-dir strain value)))) - (json-string->scm data))))))))) + (match (call-with-target-database + settings + (lambda (db2) (sql-find db2 data-query))) + ((("data" . data) + ("md5hash" . md5-hash)) + (let* ((trait-name (format #f "~a~a" dataset-name trait-id)) + (base-dir (assq-ref settings 'output-dir)) + (out (format #f "~a-~a" trait-name + (substring md5-hash 0 12))) + (out-dir (format #f "~a/~a" base-dir out))) + (log-msg + 'INFO (format #f "Writing ~a to: ~a" trait-name out-dir)) + (unless (file-exists? out-dir) + (mkdir out-dir)) + (lmdb-save (format #f "~a/index" base-dir) trait-name out) + (vector-for-each + (lambda (_ x) + (match x + (#(strain value) + (lmdb-save out-dir strain value)))) + (json-string->scm data))))))))) db "SELECT DISTINCT PublishFreeze.Name, PublishXRef.Id FROM PublishData INNER JOIN Strain ON PublishData.StrainId = Strain.Id diff --git a/scripts/precompute/list-traits-to-compute.scm b/scripts/precompute/list-traits-to-compute.scm index 9f900d1..102a6fa 100755 --- a/scripts/precompute/list-traits-to-compute.scm +++ b/scripts/precompute/list-traits-to-compute.scm @@ -15,6 +15,10 @@ You may want to forward a mysql port if there is no DB locally ssh -L 3306:127.0.0.1:3306 -f -N tux02.genenetwork.org +ignore IPv6 message: + + bind [::1]:3306: Cannot assign requested address + test connection with mysql client: mysql -uwebqtlout -pwebqtlout -A -h 127.0.0.1 -P 3306 db_webqtl -e "show tables;" diff --git a/web/README.md b/web/README.md new file mode 100644 index 0000000..fc7e158 --- /dev/null +++ b/web/README.md @@ -0,0 +1 @@ +Run the webserver from one directory up. diff --git a/web/config.scm b/web/config.scm new file mode 100644 index 0000000..9b3b9c2 --- /dev/null +++ b/web/config.scm @@ -0,0 +1,86 @@ +;;; Copyright © 2026 Frederick M Muriithi <fredmanglis@gmail.com> + +(define-module (web config) + #:use-module (srfi srfi-9 gnu) + + #:use-module (config) + #:use-module (config api) + #:use-module (config parser sexp) + + #:export (<gn-guile-config> + gn-guile-config-port + gn-guile-config-gn-docs-remote-url + gn-guile-config-gn-docs-local-checkout + gn-guile-config-gn-docs-working-branch + + parse-cli-options + cli-options->gn-guile-config)) + +(define-immutable-record-type <gn-guile-config> + (gn-guile-config port gn-docs-remote-url gn-docs-local-checkout + gn-docs-working-branch) + gn-guile-config? + (port gn-guile-config-port) + (gn-docs-remote-url gn-guile-config-gn-docs-remote-url) + (gn-docs-local-checkout gn-guile-config-gn-docs-local-checkout) + (gn-docs-working-branch gn-guile-config-gn-docs-working-branch)) + + +(define string->exact (compose inexact->exact string->number)) + + +(define (user-port? parsed) + (and (positive? parsed) (>= parsed 1024) (<= parsed 49151))) + + +(define (parse-cli-options cmd-line) + "Read configuration values from files and command-line options and convert them to appropriate data types." + (let ((config + (configuration (name 'gn-guile) + (synopsis "gn-guile web service: provide services + to main Genenetwork service.") + (description "gn-guile web service is a small +service, written in GNU Guile, that provides some functionality to the main +Genenetwork service in the background. This is not meant for direct user +interaction.") + (keywords + (list (switch (name 'write) + (default #f) + (test boolean?) + (character #f) + (synopsis "Write the settings to configuration file(s)") + (description "When this option is present, the configuration values, provided as command line option, will be written to the file path(s) that has/have been specified.")) + (setting (name 'port) + (default 8091) + (test user-port?) + (handler string->exact) + (character #\p) + (synopsis "Port number that the service will listen on")) + (setting (name 'gn-docs-remote-url) + (default "git@git.genenetwork.org:/home/git/public/gn-docs") + (test string?) + (character #\r) + (synopsis "Remote URI for gn-docs repository")) + (setting (name 'gn-docs-local-checkout) + (default (string-append (dirname (getcwd)) "/gn-guile-files/gn-docs")) + (test file-exists?) + (character #\c) + (synopsis "Path where gn-docs is checked out")) + (setting (name 'gn-docs-working-branch) + (default "non-existent") + (test string?) + (character #\b) + (synopsis "Branch to push/pull from")))) + (parser sexp-parser) + (directory (list (in-home ".config/gn-guile/") + (in-cwd ".config/")))))) + (getopt-config-auto cmd-line config))) + + +(define (cli-options->gn-guile-config cli-options) + "Extract specific values from guile-config's <codex> object into gn-guile's custom configuration object." + (gn-guile-config + (option-ref cli-options 'port) + (option-ref cli-options 'gn-docs-remote-url) + (option-ref cli-options 'gn-docs-local-checkout) + (option-ref cli-options 'gn-docs-working-branch))) diff --git a/web/view/brand/aging.scm b/web/view/brand/aging.scm index 19db4d7..f1c48c9 100644 --- a/web/view/brand/aging.scm +++ b/web/view/brand/aging.scm @@ -45,15 +45,14 @@ )) ,@head) (body - ;; (header (p "TEST")) (main (@ (class "container")) (h1 ,title) (article - (img (@ (src "/static/images/ole-farmer.jpg") (alt "ol farmer by hohumhobo is licensed under CC BY 2.0") (width "400") (align "right"))) -,info) + ;; (img (@ (src "/static/images/ole-farmer.jpg") (alt "ol farmer by hohumhobo is licensed under CC BY 2.0") (width "400") (align "right"))) + ,info) (footer (hr) - (p "Copyright © 2005-2023 " + (p "Copyright © 2005-2025 " (a (@ (href "https://genenetwork.org/")) "GeneNetwork Webservices") " | GeneNetwork and this website runs fully on free software. See status and download the " (a (@ (href "https://ci.genenetwork.org/")) "source code") "."))) diff --git a/web/view/brand/msk.scm b/web/view/brand/msk.scm index 69c1253..4cbcec4 100644 --- a/web/view/brand/msk.scm +++ b/web/view/brand/msk.scm @@ -51,7 +51,7 @@ (p ,info) (footer (hr) - (p "Copyright © 2005-2023 " + (p "Copyright © 2005-2025 " (a (@ (href "https://genenetwork.org/")) "GeneNetwork Webservices") " | GeneNetwork and this website runs fully on free software. See status and download the " (a (@ (href "https://ci.genenetwork.org/")) "source code") "."))) diff --git a/web/view/doc.scm b/web/view/doc.scm index 71112eb..cec4400 100644 --- a/web/view/doc.scm +++ b/web/view/doc.scm @@ -44,7 +44,7 @@ ,(scm->json-string body #:pretty #t)) ; (p ,(parse-html "<b>some raw really <i>text</i> here</b>")) (footer - (p "Copyright © 2005—2023 by the GeneNetwork community with a touch of " (span (@ (class "lambda")) "λ") "!") + (p "Copyright © 2005—2025 by the GeneNetwork community with a touch of " (span (@ (class "lambda")) "λ") "!") (p "This is free software. Download the " (a (@ (href "https://ci.genenetwork.org/")) "source code") ".")) diff --git a/web/view/view.scm b/web/view/view.scm index 4300863..a7592ad 100644 --- a/web/view/view.scm +++ b/web/view/view.scm @@ -15,7 +15,7 @@ #:export (view-brand)) -(define (view-aging) +(define (view-aging-home) (aging-html #:info `( ,(markdown-github->sxml "genenetwork/gn-docs/general/brand/aging/home.md") @@ -45,7 +45,15 @@ data to benefit from the power of integrated datasets, please contact:") (define* (view-brand path) (match path - ("aging" (view-aging)) + ("aging/um-het3" (aging-html #:info + `(,(markdown-github->sxml "genenetwork/gn-docs/general/brand/aging/home.md")))) + ("aging/UM-HET3" (aging-html #:info + `(,(markdown-github->sxml "genenetwork/gn-docs/general/brand/aging/home.md")))) + ("aging/UMHET-3" (aging-html #:info + `(,(markdown-github->sxml "genenetwork/gn-docs/general/brand/aging/home.md")))) + ("aging/umhet-3" (aging-html #:info + `(,(markdown-github->sxml "genenetwork/gn-docs/general/brand/aging/home.md")))) + ("aging" (view-aging-home)) ("gnqa" (default-gn-template "genenetwork/gn-docs/general/brand/gnqa/gnqa.md" "GeneNetwork Question and Answer System")) diff --git a/web/webserver.scm b/web/webserver.scm index d2a8c8d..8c909a5 100644 --- a/web/webserver.scm +++ b/web/webserver.scm @@ -1,37 +1,42 @@ -(use-modules (json) - (ice-9 match) - (ice-9 format) - (ice-9 iconv) - (ice-9 receive) - (ice-9 string-fun) - (ice-9 exceptions) - (srfi srfi-1) - (srfi srfi-11) - (srfi srfi-19) - (srfi srfi-26) - (rnrs io ports) - (rnrs bytevectors) - (web http) - (web client) - (web request) - (web response) - (web uri) - (fibers web server) - (gn cache memoize) - (web gn-uri) - (gn db sparql) - (gn data species) - (gn data group) - (web sxml) - (web view view) - (web view doc) - (web view markdown)) - -(define +current-repo-path+ - (getenv "CURRENT_REPO_PATH")) - -(define +cgit-repo-path+ - (getenv "CGIT_REPO_PATH")) +(define-module (web webserver) + #:use-module (json) + #:use-module (ice-9 match) + #:use-module (ice-9 format) + #:use-module (ice-9 iconv) + #:use-module (ice-9 receive) + #:use-module (ice-9 string-fun) + #:use-module (ice-9 exceptions) + #:use-module (srfi srfi-1) + #:use-module (srfi srfi-11) + #:use-module (srfi srfi-13) + #:use-module (srfi srfi-19) + #:use-module (srfi srfi-26) + #:use-module (rnrs io ports) + #:use-module (rnrs bytevectors) + #:use-module (web http) + #:use-module (web client) + #:use-module (web request) + #:use-module (web response) + #:use-module (web uri) + #:use-module (web server) + #:use-module (gn cache memoize) + #:use-module (web gn-uri) + #:use-module (gn db sparql) + #:use-module (gn data dataset) + #:use-module (gn data species) + #:use-module (gn data group) + #:use-module (gn runner gemma) + #:use-module (web sxml) + #:use-module (web config) + #:use-module (web view view) + #:use-module (web view doc) + #:use-module (web view markdown) + #:export (start-web-server)) + +(define (get-extension filename) + (let ((dot-pos (string-rindex filename #\.))) + (if dot-pos + (substring filename dot-pos) ""))) (define +info+ `(("name" . "GeneNetwork REST API") ("version" . ,get-version) @@ -56,6 +61,20 @@ otherwise search for set/group data" (if taxoninfo taxoninfo (cdr (get-group-data id))))) +(define (get-bxd-publish) + "Return a list of published datasets by their record ID. We add the dataset ID and phenotype ID for quick reference" + (list->vector (get-bxd-publish-list))) + +(define* (get-bxd-publish-dataid-values dataid #:optional used-for-mapping?) + (get-bxd-publish-dataid-name-value-dict dataid used-for-mapping?)) + +(define* (get-bxd-publish-values dataid #:optional used-for-mapping?) + (get-bxd-publish-name-value-dict dataid used-for-mapping?)) + +(define (get-gene-aliases genename) + "Return a vector of aliases for genename." + (list->vector (memo-sparql-wd-gene-aliases (memo-sparql-wd-geneids genename)))) + (define (not-found2 request) (values (build-response #:code 404) (string-append "Resource X not found: " @@ -79,7 +98,7 @@ otherwise search for set/group data" ("html" text/html))) (define (file-extension file-name) - (last (string-split file-name #\.))) + (last (string-split file-name #\.))) ;; FIXME: does not handle files with multiple dots (define* (render-static-image file-name #:key (extra-headers '())) @@ -123,6 +142,11 @@ otherwise search for set/group data" (lambda (port) (sxml->html (view-brand path) port)))) +(define (render-string str) + (list '((content-type application/txt)) + (lambda (port) + (put-string port str)))) + (define (render-json json) (list '((content-type application/json)) (lambda (port) @@ -154,7 +178,7 @@ otherwise search for set/group data" (cons (string->symbol (uri-decode key)) (uri-decode value)))) -(define (edit-file-handler repo request) +(define (edit-file-handler local-repo working-branch request) (catch 'file-error (lambda () (let* ((query (uri-query (request-uri request))) @@ -165,8 +189,12 @@ otherwise search for set/group data" (query-path (assoc-ref params 'file_path))) (if query-path - (build-json-response 200 - (fetch-file repo query-path)) + (begin + (git-invoke local-repo "fetch" "origin" working-branch) + (git-invoke local-repo "reset" "--hard" + (string-append "origin/" working-branch)) + (build-json-response 200 + (fetch-file local-repo query-path))) (throw 'file-error "Please provide a valid file path in the query")))) (lambda (key . args) @@ -175,6 +203,24 @@ otherwise search for set/group data" `(("error" . ,key) ("msg" . ,msg))))))) +(define (render-sparql request prefix val) + (let* ((mime (negotiate-mime request)) + (resp-mime (if (or (string-contains (symbol->string mime) "html") + (string-contains (symbol->string mime) "microdata")) + 'text/html + mime))) + (receive (sparql-header sparql-resp) + (sparql-http-get + (or (getenv "SPARQL-ENDPOINT") "http://localhost:8890/sparql/") + (sparql-by-term prefix val) + (symbol->string mime)) + (list `((content-type ,resp-mime)) + (lambda (port) + (let ((resp (if (string? sparql-resp) + sparql-resp + (utf8->string sparql-resp)))) + (put-string port resp))))))) + (define (invalid-data? data target) (if (string? (assoc-ref data target)) (if (string-null? (assoc-ref data target)) @@ -185,7 +231,7 @@ otherwise search for set/group data" (format #f "The Key *** ~a *** is missing in your Json Data" target)))) -(define (commit-file-handler repo request body) +(define (commit-file-handler repo-checkout remote-url request body) (catch 'system-error (lambda () (let* ((post-data (decode-request-json body)) @@ -202,14 +248,14 @@ otherwise search for set/group data" (build-json-response 200 ((lambda () (let ((message - (commit-file +current-repo-path+ + (commit-file repo-checkout file-name content commit-message username email prev-commit))) - (git-invoke +current-repo-path+ "push" +cgit-repo-path+) + (git-invoke repo-checkout "push" remote-url) message)))))) (lambda (key . args) (let ((msg (car args))) @@ -217,7 +263,15 @@ otherwise search for set/group data" `(("error" . ,key) ("msg" . ,msg))))))) -(define (controller request body) +(define (negotiate-mime request) + (let* ((headers (request-headers request)) + (accept (caar (assoc-ref headers 'accept)))) + (if (or (eq? (string->symbol "*/*") accept) + (eq? (string->symbol "text/html") accept)) + 'application/x-nice-microdata + accept))) + +(define (controller request body config) (match-lambda (('GET) (render-json +info+)) @@ -231,6 +285,24 @@ otherwise search for set/group data" (render-static-image (string-append (dirname (current-filename)) "/static/images/" fn))) (('GET "home" path) (render-brand path)) ; branding route for /home/aging, /home/msk etc + (('GET "home" "aging" path) + (render-brand (string-append "aging/" path))) ; branding route subs of /home/aging/... + (('GET "dataset" "bxd-publish" "list") + (render-json (get-bxd-publish))) + (('GET "dataset" "bxd-publish" "dataid" "values" page) + (match (get-extension page) + (".json" + (render-json (get-bxd-publish-dataid-values (basename page ".json")))) + (else (display "ERROR: unknown file extension")))) + (('GET "dataset" "bxd-publish" "values" page) + (match (get-extension page) + (".json" + (render-json (get-bxd-publish-values (basename page ".json")))) + ;; (".tsv" (render-string "TEST1\nTEST2")) + ;; (".gemma" (render-string (string-join (gemma-pheno-txt "BXD" (get-bxd-publish-values (basename page ".gemma"))) ""))) + (else (display "ERROR: unknown file extension")))) + (('GET "dataset" "bxd-publish" "mapping" "values" (string-append dataid ".json")) + (render-json (get-bxd-publish-values dataid #t))) (('GET "doc" "species.html") (render-doc "doc" "species.html" (get-species-meta))) @@ -247,6 +319,8 @@ otherwise search for set/group data" (('GET "doc" path ... page) ;; serve documents from /doc/ (render-doc path page)) + (('GET "gene" "aliases" genename) + (render-json (get-gene-aliases genename))) (('GET "species.json") (render-json (get-species-data))) (('GET "species.meta.json") @@ -254,9 +328,13 @@ otherwise search for set/group data" (('GET "species") (render-json (get-species-meta))) (('GET "edit") - (edit-file-handler +current-repo-path+ request)) + (edit-file-handler (gn-guile-config-gn-docs-local-checkout config) + (gn-guile-config-gn-docs-working-branch config) + request)) (('POST "commit") - (commit-file-handler +current-repo-path+ request body)) + (commit-file-handler (gn-guile-config-gn-docs-local-checkout config) + (gn-guile-config-gn-docs-remote-url config) + request body)) (('GET id) (let ((names (get-species-shortnames (get-expanded-species)))) (match (string->list id) @@ -280,21 +358,31 @@ otherwise search for set/group data" #\n) (render-json (get-id-data (list->string name)))) (rest (render-json "NOP"))))) + ;; RDF End-points + (('GET "v1" "id" id) + (render-sparql request 'gn id)) + + (('GET "v1" "category" category) + (render-sparql request 'gnc category)) + + (('GET "v1" "term" term) + (render-sparql request 'gnt term)) + (_ (not-found (request-uri request))))) (define (request-path-components request) (split-and-decode-uri-path (uri-path (request-uri request)))) -(define (handler request body) +(define (handler request body config) (format #t "~a ~a\n" (request-method request) (uri-path (request-uri request))) (apply values - ((controller request body) + ((controller request body config) (cons (request-method request) (request-path-components request))))) -(define (start-web-server address port) +(define (start-web-server address port config) (format (current-error-port) "GN REST API web server listening on http://~a:~a/~%" address port) ;; Wrap handler in another function to support live hacking via the @@ -302,14 +390,7 @@ otherwise search for set/group data" ;; REPL, the web server will still be using the old handler. The ;; only way to update the handler reference held by the web server ;; would be to restart the web server. - (run-server (cut handler <> <>) - #:addr (inet-pton AF_INET address) - #:port port)) - -(define (main args) - (write (string-append "Starting Guile REST API " get-version " server!")) - (write args) - (newline) - (let ((listen (inexact->exact (string->number (car (cdr args)))))) - (display `("listening on" ,listen)) - (start-web-server "127.0.0.1" listen))) + (run-server (cut handler <> <> config) + 'http + (list #:addr (inet-pton AF_INET address) + #:port port))) |
