diff options
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | .guix-channel | 8 | ||||
-rw-r--r-- | .guix/modules/gn-guile.scm | 34 | ||||
-rwxr-xr-x | gn-guile.sh | 3 | ||||
l---------[-rw-r--r--] | guix.scm | 80 | ||||
-rwxr-xr-x | scripts/lmdb-publishdata-export.scm | 229 | ||||
-rw-r--r-- | web/css/gn-template-style.css | 39 | ||||
-rw-r--r-- | web/templates/genenetwork.scm | 18 | ||||
-rw-r--r-- | web/view/view.scm | 4 | ||||
-rw-r--r-- | web/webserver.scm | 18 |
10 files changed, 346 insertions, 88 deletions
@@ -2,3 +2,4 @@ BXD.* pheno.txt GWA.json K.json +.aider* diff --git a/.guix-channel b/.guix-channel new file mode 100644 index 0000000..6e0982a --- /dev/null +++ b/.guix-channel @@ -0,0 +1,8 @@ +(channel + (version 0) + (directory ".guix/modules") + (dependencies + (channel + (name guix-bioinformatics) + (url "https://git.genenetwork.org/guix-bioinformatics") + (branch "master"))))
\ No newline at end of file diff --git a/.guix/modules/gn-guile.scm b/.guix/modules/gn-guile.scm new file mode 100644 index 0000000..03f2b14 --- /dev/null +++ b/.guix/modules/gn-guile.scm @@ -0,0 +1,34 @@ +;; To use this file to build HEAD of gn-guile: +;; +;; guix build -f guix.scm +;; +;; To get a development container +;; +;; guix shell -C -D -f guix.scm +;; + +(define-module (gn-guile) + #:use-module ((gn packages guile) #:select (gn-guile) #:prefix gn:) + #:use-module (guix gexp) + #:use-module (guix utils) + #:use-module (guix packages) + #:use-module (guix git-download)) + +(define %source-dir (dirname (dirname (current-source-directory)))) + +(define vcs-file? + (or (git-predicate %source-dir) + (const #t))) + +(define-public gn-guile + (package + (inherit gn:gn-guile) + (source + (local-file "../.." + "gn-guile-checkout" + #:recursive? #t + #:select? vcs-file?)))) + +;; Add definition for tests should you need it, here. + +gn-guile diff --git a/gn-guile.sh b/gn-guile.sh new file mode 100755 index 0000000..9341b26 --- /dev/null +++ b/gn-guile.sh @@ -0,0 +1,3 @@ +#! @SHELL@ + +guile -e main web/webserver.scm "$@" @@ -1,79 +1 @@ -;; To use this file to build HEAD of gn-guile: -;; -;; guix build -f guix.scm -;; -;; To get a development container -;; -;; guix shell -C -D -f guix.scm -;; - -(define-module (gn-guile-package) - #:use-module ((guix licenses) #:prefix license:) - #:use-module (guix gexp) - #:use-module (guix packages) - #:use-module (guix git-download) - #:use-module (guix build-system guile) - #:use-module (guix utils) - #:use-module (gnu packages build-tools) - #:use-module (gnu packages package-management) - #:use-module ((gnu packages bash) #:select (bash-minimal)) - #:use-module ((gnu packages bioinformatics) #:select (gemma)) - #:use-module ((gnu packages certs) #:select (nss-certs)) - #:use-module ((gnu packages guile) #:select (guile-json-4 guile-3.0 guile-readline)) - #:use-module ((gnu packages guile-xyz) #:select (guile-dbi guile-dbd-mysql guile-fibers guile-redis guile-hashing guile-commonmark)) - #:use-module ((gnu packages parallel) #:select (parallel)) - #:use-module ((gnu packages perl) #:select (perl)) - #:use-module ((gnu packages tls) #:select (guile-gnutls openssl)) - #:use-module (srfi srfi-1) - #:use-module (ice-9 popen) - #:use-module (ice-9 rdelim)) - -(define %source-dir (dirname (current-filename))) - -(define %git-commit - (read-string (open-pipe "git show HEAD | head -1 | cut -d ' ' -f 2" OPEN_READ))) - -(define-public gn-guile-git - (package - (name "gn-guile-git") - (version (git-version "4.0.0-" "HEAD" %git-commit)) - (source (local-file %source-dir - #:recursive? #t - #:select? (or (git-predicate (dirname (current-source-directory))) - (const #t)))) - (build-system guile-build-system) - (propagated-inputs - (list guile-3.0 guile-dbi guile-dbd-mysql guile-fibers guile-gnutls guile-readline - guile-commonmark guile-redis openssl nss-certs gemma parallel guile-hashing - guile-json-4)) - (arguments - (list - #:phases - #~(modify-phases %standard-phases - ;; When using the guile-build-system, guild doesn't - ;; correctly set the GUILE_LOAD_PATH for the various guile - ;; packages in the build phase leading to build failures. - (add-before 'build 'augment-GUILE_LOAD_PATH - (lambda* (#:key outputs #:allow-other-keys) - (let* ((guile-version (target-guile-effective-version)) - (guile-dbi-scm (string-append #$guile-dbi "/share/guile/site/2.2"))) - ;; guild uses this: "\\.(scm|sls)$" regexp to try and - ;; compile all scm files in this repository. We don't - ;; need to compile guix.scm - (delete-file "guix.scm") - (setenv "GUILE_LOAD_PATH" - (string-append - guile-dbi-scm ":" - (format - #f "~{~a:~}" - (map (lambda (package) - (format #f "~a/share/guile/site/~a" - package guile-version)) - (list #$guile-fibers #$guile-commonmark #$guile-json-4 #$guile-hashing))) - #$(getenv "GUILE_LOAD_PATH"))))))))) - (home-page "https://git.genenetwork.com/gn-guile") - (synopsis "Next generation GN code in guile") - (description "Use of guile.") - (license license:gpl3))) - -gn-guile-git +.guix/modules/gn-guile.scm
\ No newline at end of file diff --git a/scripts/lmdb-publishdata-export.scm b/scripts/lmdb-publishdata-export.scm new file mode 100755 index 0000000..8427112 --- /dev/null +++ b/scripts/lmdb-publishdata-export.scm @@ -0,0 +1,229 @@ +#! /usr/bin/env guile +!# +;; To run this script run: +;; +;; $ guix shell guile guile-hashing guile3-dbi guile3-dbd-mysql \ +;; guile-lmdb -- guile lmdb-publishdata-export.scm +;; conn.scm +;; +;; Example conn.scm: +;; ((sql-username . "webqtlout") +;; (sql-password . "xxxx") +;; (sql-database . "db_webqtl") +;; (sql-host . "localhost") +;; (sql-port . 3306) +;; (output-dir . "/tmp/data") +;; (log-file . "/tmp/export.log")) + +(use-modules (dbi dbi) + (rnrs bytevectors) + (system foreign) + (ice-9 match) + (srfi srfi-1) + (srfi srfi-26) + (srfi srfi-43) + (rnrs io ports) + (hashing md5) + ((lmdb lmdb) #:prefix mdb:) + (ice-9 format) + (ice-9 exceptions) + (json) + (logging logger) + (logging rotating-log) + (logging port-log) + (oop goops)) + + +;; Set up logging +(define* (setup-logging #:key (log-file "lmdb-dump-log")) + "Initialize the logging system with rotating file logs and error port output. + Creates a new logger, adds rotating and error port handlers, + sets it as the default logger, and opens the log for writing." + (let ((lgr (make <logger>)) + (rotating (make <rotating-log> + #:num-files 3 + #:size-limit 1024 + #:file-name log-file)) + (err (make <port-log> #:port (current-error-port)))) + ;; add the handlers to our logger + (add-handler! lgr rotating) + (add-handler! lgr err) + ;; make this the application's default logger + (set-default-logger! lgr) + (open-log! lgr))) + +(define (shutdown-logging) + "Properly shutdown the logging system. + Flushes any pending log messages, closes the log handlers, + and removes the default logger reference." + (flush-log) ;; since no args, it uses the default + (close-log!) ;; since no args, it uses the default + (set-default-logger! #f)) + +(define (call-with-database backend connection-string proc) + "Execute PROC with an open database connection. BACKEND is the +database type (e.g. \"mysql\"). CONNECTION-STRING is the database +connection string." + (let ((db #f)) + (dynamic-wind + (lambda () + (set! db (dbi-open backend connection-string))) + (cut proc db) + (lambda () + (when db + (dbi-close db)))))) + +(define (call-with-target-database connection-settings proc) + "Connect to the target database using CONNECTION-SETTINGS and execute +PROC." + (call-with-database "mysql" (string-join + (list (assq-ref connection-settings 'sql-username) + (assq-ref connection-settings 'sql-password) + (assq-ref connection-settings 'sql-database) + "tcp" + (assq-ref connection-settings 'sql-host) + (number->string + (assq-ref connection-settings 'sql-port))) + ":") + proc)) + +(define* (lmdb-save path key value) + "Save a NUM with KEY to PATH." + (mdb:with-env-and-txn + (path) (env txn) + (let ((dbi (mdb:dbi-open txn #f 0))) + (mdb:put txn dbi key + (if (number? value) + (number->string value) + value))))) + +(define (sql-exec db statement) + "Execute an SQL STATEMENT on database connection DB. Throws an error +if the statement execution fails." + (dbi-query db statement) + (database-check-status db)) + +(define (sql-fold proc init db statement) + "Fold over SQL query results." + (sql-exec db statement) + (let loop ((result init)) + (let ((row (dbi-get_row db))) + (if row + (loop (proc row result)) + result)))) + +(define (sql-for-each proc db statement) + "Apply PROC to each row returned by STATEMENT." + (sql-fold (lambda (row _) + (proc row)) + #f db statement)) + +(define (sql-map proc db statement) + "Map PROC over rows returned by STATEMENT." + (sql-fold (lambda (row result) + (cons (proc row) result)) + (list) db statement)) + +(define (sql-find db statement) + (sql-exec db statement) + (dbi-get_row db)) + +(define (database-check-status db) + "Check the status of the last database operation on DB. Throws an +error if the status code is non-zero." + (match (dbi-get_status db) + ((code . str) + (unless (zero? code) + (error str))))) + +(define* (save-dataset-values settings) + "Main function to extract and save dataset values. Queries the +database for datasets and their values, computes MD5 hashes for +dataset-trait combinations, and saves strain values to LMDB files in +/export5/lmdb-data-hashes/." + (dynamic-wind + (lambda () + (setup-logging #:log-file (assq-ref settings 'log-file)) + (log-msg 'INFO "Starting dataset value extraction")) + (lambda () + (call-with-target-database + settings + (lambda (db) + (sql-for-each + (lambda (row) + (match row + ((("Name" . dataset-name) + ("Id" . trait-id)) + (let* ((data-query (format #f "SELECT +JSON_ARRAYAGG(JSON_ARRAY(Strain.Name, PublishData.Value)) AS data, + MD5(JSON_ARRAY(Strain.Name, PublishData.Value)) as md5hash +FROM + PublishData + INNER JOIN Strain ON PublishData.StrainId = Strain.Id + INNER JOIN PublishXRef ON PublishData.Id = PublishXRef.DataId + INNER JOIN PublishFreeze ON PublishXRef.InbredSetId = PublishFreeze.InbredSetId +LEFT JOIN PublishSE ON + PublishSE.DataId = PublishData.Id AND + PublishSE.StrainId = PublishData.StrainId +LEFT JOIN NStrain ON + NStrain.DataId = PublishData.Id AND + NStrain.StrainId = PublishData.StrainId +WHERE + PublishFreeze.Name = \"~a\" AND + PublishXRef.Id = ~a AND + PublishFreeze.public > 0 AND + PublishData.value IS NOT NULL AND + PublishFreeze.confidentiality < 1 +ORDER BY + LENGTH(Strain.Name), Strain.Name" dataset-name trait-id))) + (match (call-with-target-database + settings + (lambda (db2) (sql-find db2 data-query))) + ((("data" . data) + ("md5hash" . md5-hash)) + (let* ((trait-name (format #f "~a~a" dataset-name trait-id)) + (base-dir (assq-ref settings 'output-dir)) + (out (format #f "~a-~a" trait-name + (substring md5-hash 0 12))) + (out-dir (format #f "~a/~a" base-dir out))) + (log-msg + 'INFO (format #f "Writing ~a to: ~a" trait-name out-dir)) + (unless (file-exists? out-dir) + (mkdir out-dir)) + (lmdb-save (format #f "~a/index" base-dir) trait-name out) + (vector-for-each + (lambda (_ x) + (match x + (#(strain value) + (lmdb-save out-dir strain value)))) + (json-string->scm data))))))))) + db + "SELECT DISTINCT PublishFreeze.Name, PublishXRef.Id FROM +PublishData INNER JOIN Strain ON PublishData.StrainId = Strain.Id +INNER JOIN PublishXRef ON PublishData.Id = PublishXRef.DataId +INNER JOIN PublishFreeze ON PublishXRef.InbredSetId = PublishFreeze.InbredSetId +LEFT JOIN PublishSE ON + PublishSE.DataId = PublishData.Id AND + PublishSE.StrainId = PublishData.StrainId +LEFT JOIN NStrain ON + NStrain.DataId = PublishData.Id AND + NStrain.StrainId = PublishData.StrainId +WHERE + PublishFreeze.public > 0 AND + PublishFreeze.confidentiality < 1 +ORDER BY + PublishFreeze.Id, PublishXRef.Id")))) + (lambda () + (shutdown-logging)))) + +(define main + (match-lambda* + ((_ connection-settings-file) + (save-dataset-values (call-with-input-file connection-settings-file + read))) + ((arg0 _ ...) + (display (format "Usage: ~a CONNECTION-SETTINGS-FILE~%" arg0) + (current-error-port)) + (exit #f)))) + +(apply main (command-line)) diff --git a/web/css/gn-template-style.css b/web/css/gn-template-style.css new file mode 100644 index 0000000..38893c6 --- /dev/null +++ b/web/css/gn-template-style.css @@ -0,0 +1,39 @@ +* { + box-sizing: border-box; +} + +body { + margin: 0.7em; + display: grid; + grid-template-columns: 9fr 1fr; + grid-gap: 20px; + + font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; + font-style: normal; + font-size: 20px; +} + +#header { + grid-column-start: 1; + grid-column-end: 3; + + background-color: #336699; + color: #FFFFFF; + border-radius: 3px; + min-height: 30px; +} + +#header #header-text { + padding-left: 0.2em; +} + +#main { + grid-column-start: 1; + grid-column-end: 2; + + max-width: 650px; +} + +#main img { + max-width: 650px; +} diff --git a/web/templates/genenetwork.scm b/web/templates/genenetwork.scm new file mode 100644 index 0000000..64e9852 --- /dev/null +++ b/web/templates/genenetwork.scm @@ -0,0 +1,18 @@ +(define-module (web templates genenetwork) + #:use-module (web view markdown) + + #:export (default-gn-template)) + +(define* (default-gn-template path #:optional (title "Default Page Template")) + "Render `PATH' with a default template and styling that fits in with + GeneNetwork's look and feel." + `(html + (head + (meta (@ (charset "UTF-8"))) + (meta (@ (name "viewport") (content "width=device-width, initial-scale=1.0"))) + (title ,title) + (link (@ (rel "stylesheet") (type "text/css") + (href "/css/gn-template-style.css")))) + (body + (header (@ (id "header")) (span (@ (id "header-text")) "GeneNetwork")) + (main (@ (id "main")) ,(markdown-github->sxml path))))) diff --git a/web/view/view.scm b/web/view/view.scm index 4584cf8..4300863 100644 --- a/web/view/view.scm +++ b/web/view/view.scm @@ -10,6 +10,7 @@ #:use-module (web view markdown) #:use-module (web view brand msk) #:use-module (web view brand aging) + #:use-module (web templates genenetwork) #:export (view-brand)) @@ -45,6 +46,9 @@ data to benefit from the power of integrated datasets, please contact:") (define* (view-brand path) (match path ("aging" (view-aging)) + ("gnqa" (default-gn-template + "genenetwork/gn-docs/general/brand/gnqa/gnqa.md" + "GeneNetwork Question and Answer System")) ( _ (msk-html #:info `( ,(markdown-github->sxml "genenetwork/gn-docs/general/brand/msk/home.md") diff --git a/web/webserver.scm b/web/webserver.scm index 6f40614..d2a8c8d 100644 --- a/web/webserver.scm +++ b/web/webserver.scm @@ -87,9 +87,9 @@ otherwise search for set/group data" (modified (and stat (make-time time-utc 0 (stat:mtime stat))))) - (list `((content-type ,(assoc-ref file-mime-types - (file-extension file-name))) - (last-modified ,(time-utc->date modified))) + (list `((content-type . ,(assoc-ref file-mime-types + (file-extension file-name))) + (last-modified . ,(time-utc->date modified))) (call-with-input-file file-name get-bytevector-all)))) @@ -100,9 +100,9 @@ otherwise search for set/group data" (modified (and stat (make-time time-utc 0 (stat:mtime stat))))) - (list `((content-type ,(assoc-ref file-mime-types - (file-extension path))) - (last-modified ,(time-utc->date modified))) + (list `((content-type . ,(assoc-ref file-mime-types + (file-extension path))) + (last-modified . ,(time-utc->date modified))) (call-with-input-file path get-bytevector-all)))) @@ -224,11 +224,11 @@ otherwise search for set/group data" (('GET "version") (render-json get-version)) (('GET "css" fn) - (render-static-file (string-append "css/" fn))) + (render-static-file (string-append (dirname (current-filename)) "/css/" fn))) (('GET "map" fn) - (render-static-file (string-append "css/" fn))) + (render-static-file (string-append (dirname (current-filename)) "/css/" fn))) (('GET "static" "images" fn) - (render-static-image (string-append "static/images/" fn))) + (render-static-image (string-append (dirname (current-filename)) "/static/images/" fn))) (('GET "home" path) (render-brand path)) ; branding route for /home/aging, /home/msk etc (('GET "doc" "species.html") |