;;; genenetwork-machines --- Guix configuration for genenetwork machines ;;; Copyright © 2024 Arun Isaac ;;; Copyright © 2024 Frederick M. Muriithi ;;; Copyright © 2024 Pjotr Prins ;;; ;;; This file is part of genenetwork-machines. ;;; ;;; genenetwork-machines is free software: you can redistribute it ;;; and/or modify it under the terms of the GNU General Public License ;;; as published by the Free Software Foundation, either version 3 of ;;; the License, or (at your option) any later version. ;;; ;;; genenetwork-machines is distributed in the hope that it will be ;;; useful, but WITHOUT ANY WARRANTY; without even the implied ;;; warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ;;; See the GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with genenetwork-machines. If not, see ;;; . (define-module (genenetwork services genenetwork) #:use-module ((gn packages genenetwork) #:select (genenetwork2 genenetwork3 gn-auth gn-uploader)) #:use-module ((gnu packages web) #:select (nginx)) #:use-module ((gnu packages admin) #:select (shadow shepherd)) #:use-module ((gnu packages python) #:select (python)) #:use-module (gnu services) #:use-module (gnu services web) #:use-module (gnu services mcron) #:use-module (gnu system file-systems) #:use-module (gnu system shadow) #:use-module (guix build python-build-system) #:use-module (guix diagnostics) #:use-module (guix gexp) #:use-module (guix i18n) #:use-module (guix packages) #:use-module (guix profiles) #:use-module (guix records) #:use-module (forge environment) #:use-module (forge nginx) #:use-module (forge gunicorn) #:use-module (forge socket) #:use-module (srfi srfi-1) #:use-module (ice-9 match) #:export (genenetwork-service-type genenetwork-configuration genenetwork-configuration? genenetwork-configuration-genenetwork2 ; gn2 guix package used from guix-bioinformatics genenetwork-configuration-genenetwork3 ; gn3 guix package used from guix-bioinformatics genenetwork-configuration-server-name ; outside DNS genenetwork-configuration-port ; external port genenetwork-configuration-gn2-port ; internal port genenetwork-configuration-gn3-port ; internal port genenetwork-configuration-auth-db ; RW auth DB genenetwork-configuration-xapian-db ; RO search index, unless you want to regenerate inside VM genenetwork-configuration-genotype-files ; RO genotype files genenetwork-configuration-gn3-data-directory ; RO genenetwork-configuration-sparql-endpoint genenetwork-configuration-gn-sourcecode-directory ; used for mounting local source code inside VM (dev/debug) genenetwork-configuration-gn2-secrets ; RO GN2 secrets so we don't need to regenerate every startup genenetwork-configuration-gn3-secrets ; RO GN3 secrets so we don't need to regenerate every startup gn-uploader-service-type gn-uploader-configuration gn-uploader-configuration? gn-uploader-configuration-server-name gn-uploader-configuration-port gn-uploader-configuration-secrets)) (define-record-type* genenetwork-configuration make-genenetwork-configuration genenetwork-configuration? (genenetwork2 genenetwork-configuration-genenetwork2 (default genenetwork2)) (genenetwork3 genenetwork-configuration-genenetwork3 (default genenetwork3)) (gn-auth genenetwork-configuration-gn-auth (default gn-auth)) (server-name genenetwork-configuration-server-name (default "genenetwork.org")) (gn-auth-server-name genenetwork-configuration-gn-auth-server-name (default "auth.genenetwork.org")) (gn2-port genenetwork-configuration-gn2-port (default 8082)) (gn3-port genenetwork-configuration-gn3-port (default 8083)) (gn-auth-port genenetwork-configuration-gn-auth-port (default 8084)) (gn3-alias-server-port genenetwork-gn3-alias-server-port (default 8000)) (sql-uri genenetwork-configuration-sql-uri (default "mysql://username:password@localhost/database")) (auth-db genenetwork-configuration-auth-db (default "/var/genenetwork/auth.db")) (llm-db-path genenetwork-configuration-llm-db-path (default "/var/genenetwork/llm.db")) (xapian-db genenetwork-configuration-xapian-db (default "/var/genenetwork/xapian")) (genotype-files genenetwork-configuration-genotype-files (default "/var/genenetwork/genotype-files")) (sparql-endpoint genenetwork-configuration-sparql-endpoint (default "http://localhost:8081/sparql")) (gn-sourcecode-directory genenetwork-configuration-gn-sourcecode-directory (default "/var/empty")) (gn3-data-directory genenetwork-configuration-gn3-data-directory (default "/var/genenetwork")) (gn2-secrets genenetwork-configuration-gn2-secrets (default "/etc/genenetwork")) (gn3-secrets genenetwork-configuration-gn3-secrets (default "/etc/genenetwork/gn3-secrets.py")) (gn-auth-secrets genenetwork-configuration-gn-auth-secrets (default "/etc/genenetwork")) (log-level genenetwork-configuration-log-level (default 'warning) (sanitize sanitize-log-level))) (define-record-type* gn-uploader-configuration make-gn-uploader-configuration gn-uploader-configuration? (gn-uploader gn-uploader-configuration-gn-uploader (default gn-uploader)) (server-name gn-uploader-server-name (default "upload.genenetwork.org")) (port gn-uploader-configuration-port (default 8085)) (sql-uri gn-uploader-configuration-sql-uri (default "mysql://username:password@localhost/database")) (data-directory gn-uploader-configuration-data-directory (default "/var/genenetwork")) (secrets gn-uploader-configuration-secrets (default "/etc/genenetwork/gn-uploader-secrets.py")) (auth-server-url gn-uploader-configuration-auth-server-url (default "https://auth.genenetwork.org")) (gn2-server-url gn-uploader-configuration-gn2-server-url (default "https://genenetwork.org")) (log-level gn-uploader-configuration-log-level (default 'warning) (sanitize sanitize-log-level))) (define (sanitize-log-level log-level) (case log-level ((fatal error warning info debug trace) log-level) (else (leave (G_ "Log level ~a is invalid. It must be one of the following symbols---fatal, error, warn, info, debug or trace.~%") log-level)))) (define %genenetwork-accounts (list (user-group (name "genenetwork") (system? #t)) (user-account (name "genenetwork") (group "genenetwork") (system? #t) (comment "GeneNetwork user") (home-directory "/var/empty") (shell (file-append shadow "/sbin/nologin"))))) (define (build-xapian-index-cron-gexp config) (let* ((sql-uri (genenetwork-configuration-sql-uri config)) (genenetwork3 (genenetwork-configuration-genenetwork3 config)) (xapian-directory (genenetwork-configuration-xapian-db config)) (sparql-endpoint (genenetwork-configuration-sparql-endpoint config)) (xapian-build-directory (string-append xapian-directory "/build")) (herd (file-append shepherd "/bin/herd")) (index-genenetwork (file-append genenetwork3 "/bin/index-genenetwork")) (gn3-profile (profile (content (package->development-manifest genenetwork3)) (allow-collisions? #t))) (python3-version (python-version (package-version python)))) (with-imported-modules '((guix build utils)) #~(begin (use-modules (guix build utils) (srfi srfi-26)) ;; Use GN3 in container, not newly cloned GN3 to avoid ;; inconsistencies between versions (setenv "PYTHONPATH" (string-append #$(file-append genenetwork3 "/lib/python" python3-version "/site-packages") ":" #$gn3-profile "/lib/python" #$python3-version "/site-packages")) (when (and ;; not currently building an index (not (file-exists? #$xapian-build-directory)) ;; data has been modified (zero? (status:exit-val (system* #$index-genenetwork "is-data-modified" #$xapian-directory #$sql-uri #$sparql-endpoint)))) (dynamic-wind (const #t) ;; build the index (lambda () (invoke #$index-genenetwork "create-xapian-index" #$xapian-build-directory #$sql-uri #$sparql-endpoint) (dynamic-wind ;; stop GN3: Here there is magic!!! ;; The name `gunicorn-genenetwork' is magical. It is not set ;; here nor at the point of call, rather, it is set in a ;; dependency of the system (forge), thereby creating a ;; coupling between this g-expression and whatever forge ;; is doing. We need to figure out a way to pass in the ;; service name as part of the call to break that coupling (cut invoke #$herd "stop" "gunicorn-genenetwork3") ;;replace old index (lambda () (for-each (lambda (file) (rename-file file (string-append #$xapian-directory "/" (basename file)))) (find-files #$xapian-build-directory))) ;; restart GN3 (cut invoke #$herd "start" "gunicorn-genenetwork3"))) (lambda () ;; delete build directory (delete-file-recursively #$xapian-build-directory) ;; set up correct permissions (for-each (lambda (file) (chmod file #o644)) (find-files #$xapian-directory))))))))) (define (make-non-human-data-public-cron-gexp config) (match-record config (gn-auth sql-uri auth-db) (with-imported-modules '((guix build utils)) #~(begin (use-modules (guix build utils)) (setenv "PYTHONPATH" (string-append #$(file-append gn-auth "/lib/python" (python-version (package-version python)) "/site-packages") ":" #$(profile (content (package->development-manifest gn-auth)) (allow-collisions? #t)) "/lib/python" #$(python-version (package-version python)) "/site-packages")) (invoke #$(file-append python "/bin/python3") "-m" "scripts.batch_assign_data_to_default_admin" #$auth-db #$sql-uri))))) (define (genenetwork-activation config) (match-record config (gn2-secrets gn3-secrets gn-auth-secrets auth-db) (with-imported-modules '((guix build utils)) #~(begin (use-modules (guix build utils)) (for-each (lambda (file) (chown file (passwd:uid (getpw "gunicorn-gn-auth")) (passwd:gid (getpw "gunicorn-gn-auth")))) (append (list #$gn-auth-secrets) (find-files #$(dirname auth-db) #:directories? #t) (find-files #$gn-auth-secrets #:directories? #t))) ;; Let each service user own their own secrets files. (for-each (lambda (file) (chown file (passwd:uid (getpw "gunicorn-genenetwork2")) (passwd:gid (getpw "gunicorn-genenetwork2")))) (append (list #$gn2-secrets) (find-files #$gn2-secrets #:directories? #t))) (chown #$gn3-secrets (passwd:uid (getpw "gunicorn-genenetwork3")) (passwd:gid (getpw "gunicorn-genenetwork3"))) ;; Set owner-only permissions on secrets files. (for-each (lambda (file) (chmod file #o600)) (append (list #$gn3-secrets) (find-files #$gn2-secrets #:directories? #f) (find-files #$gn-auth-secrets #:directories? #f))))))) (define (configuration-file-gexp alist) "Return a G-expression that constructs a configuration file of key-value pairs. @var{alist} is an association list mapping keys to their values. Keys must be strings. Values may be strings, G-expressions or numbers." #~(begin (use-modules (ice-9 match)) (call-with-output-file #$output (lambda (port) (for-each (match-lambda ((key value) (display key port) (display " = " port) (cond ((number? value) (display value port)) (else (display "\"" port) (display value port) (display "\"" port))) (newline port))) '#$alist))))) (define (genenetwork-gunicorn-apps config) "Return a list of gunicorn apps to run the genenetwork server described by @var{config}, a @code{} object." (match-record config (genenetwork2 genenetwork3 gn-auth server-name gn-auth-server-name gn2-port gn3-port gn-auth-port sql-uri auth-db xapian-db genotype-files sparql-endpoint gn-sourcecode-directory gn3-data-directory gn2-secrets gn3-secrets gn-auth-secrets llm-db-path log-level) ;; If we mapped only the mysqld.sock socket file, it would break ;; when the external mysqld server is restarted. (let* ((database-mapping (file-system-mapping (source "/run/mysqld") (target source) (writable? #t))) (gn2-profile (profile (content (package->development-manifest genenetwork2)) (allow-collisions? #t))) (gn2-conf (computed-file "gn2.conf" (configuration-file-gexp `(("GN2_SECRETS" ,(string-append gn2-secrets "/gn2-secrets.py")) ("GEMMA_COMMAND" ,(file-append gn2-profile "/bin/gemma")) ("GEMMA_WRAPPER_COMMAND" ,(file-append gn2-profile "/bin/gemma-wrapper")) ("GENENETWORK_FILES" ,genotype-files) ("GN3_LOCAL_URL" ,(string-append "http://localhost:" (number->string gn3-port))) ("GN_SERVER_URL" ,(string-append "https://" server-name "/api3/")) ("AUTH_SERVER_URL" ,(string-append "https://" gn-auth-server-name "/")) ("JS_GUIX_PATH" ,(file-append gn2-profile "/share/genenetwork2/javascript")) ("PLINK_COMMAND" ,(file-append gn2-profile "/bin/plink2")) ("SQL_URI" ,sql-uri) ("AI_SEARCH_ENABLED" "True"))))) (gn3-conf (computed-file "gn3.conf" (configuration-file-gexp `(("AUTH_DB" ,auth-db) ("DATA_DIR" ,gn3-data-directory) ("SOURCE_DIR" ,gn-sourcecode-directory) ("SPARQL_ENDPOINT" ,sparql-endpoint) ("SQL_URI" ,sql-uri) ("XAPIAN_DB_PATH" ,xapian-db) ("GENOTYPE_FILES" ,genotype-files) ("REAPER_COMMAND" ,(file-append gn2-profile "/bin/qtlreaper")) ("LLM_DB_PATH" ,llm-db-path))))) (gn-auth-conf (computed-file "gn-auth.conf" (configuration-file-gexp `(("GN_AUTH_SECRETS" ,(string-append gn-auth-secrets "/gn-auth-secrets.py")) ("AUTH_DB" ,auth-db) ("SQL_URI" ,sql-uri) ("CLIENTS_SSL_PUBLIC_KEYS_DIR" ,(string-append gn-auth-secrets "/clients-public-keys")) ("SSL_PRIVATE_KEY" ,(string-append gn-auth-secrets "/gn-auth-ssl-private-key.pem"))))))) (list (gunicorn-app (name "genenetwork2") (package genenetwork2) (sockets (list (forge-ip-socket (port gn2-port)))) (wsgi-app-module "gn2.wsgi") (workers 20) (timeout 1200) (environment-variables (list (environment-variable (name "GN2_PROFILE") (value gn2-profile)) (environment-variable (name "TMPDIR") (value "/tmp")) (environment-variable (name "GN2_SETTINGS") (value gn2-conf)) (environment-variable (name "HOME") (value "/tmp")))) (mappings (list database-mapping (file-system-mapping (source genotype-files) (target source)) (file-system-mapping (source gn-sourcecode-directory) (target source)) (file-system-mapping ; GN2 and GN3 need to share TMPDIR (source "/tmp") (target "/tmp") (writable? #t)) (file-system-mapping (source gn2-conf) (target source)) (file-system-mapping (source gn2-profile) (target source)) (file-system-mapping (source gn2-secrets) (target source) (writable? #t)))) (extra-cli-arguments (list "--log-level" (string-upcase (symbol->string log-level))))) (gunicorn-app (name "genenetwork3") (package genenetwork3) (sockets (list (forge-ip-socket (port gn3-port)))) (wsgi-app-module "gn3.app:create_app()") (workers 20) ;; gunicorn's default 30 second timeout is insufficient ;; for Fahamu AI endpoints and results in worker timeout ;; errors. (timeout 1200) (environment-variables (list (environment-variable (name "GN3_CONF") (value gn3-conf)) (environment-variable (name "TMPDIR") (value "/tmp")) (environment-variable (name "GN3_SECRETS") (value gn3-secrets)) (environment-variable (name "HOME") (value "/tmp")))) (mappings (list database-mapping (file-system-mapping (source gn3-conf) (target source)) (file-system-mapping (source gn3-secrets) (target source)) (file-system-mapping (source gn-sourcecode-directory) (target source)) (file-system-mapping (source genotype-files) (target source)) (file-system-mapping (source gn3-data-directory) (target source)) ; Rqtl usese this (file-system-mapping ; GN2 and GN3 need to share TMPDIR (source "/tmp") (target "/tmp") (writable? #t)) (file-system-mapping (source xapian-db) (target source)) (file-system-mapping (source llm-db-path) (target source) (writable? #t)))) (extra-cli-arguments (list "--log-level" (string-upcase (symbol->string log-level))))) (gunicorn-app (name "gn-auth") (package gn-auth) (sockets (list (forge-ip-socket (port gn-auth-port)))) (wsgi-app-module "gn_auth:create_app()") (workers 20) (environment-variables (list (environment-variable (name "GN_AUTH_CONF") (value gn-auth-conf)) (environment-variable (name "HOME") (value "/tmp")) (environment-variable (name "AUTHLIB_INSECURE_TRANSPORT") (value "true")))) (mappings (list database-mapping (file-system-mapping (source gn-auth-conf) (target source)) (file-system-mapping (source auth-db) (target source) (writable? #t)) (file-system-mapping (source gn-auth-secrets) (target source) (writable? #t)))) (extra-cli-arguments (list "--log-level" (string-upcase (symbol->string log-level))))))))) (define (genenetwork-nginx-server-blocks config) "Return a list of @code{} records specifying reverse proxies for the genenetwork service described by @var{config}, a @code{} record." (match-record config (server-name gn-auth-server-name gn2-port gn3-port gn-auth-port gn3-alias-server-port) (list (nginx-server-configuration (server-name (list server-name)) (locations (list (nginx-location-configuration (uri "/") (body (list (string-append "proxy_pass http://localhost:" (number->string gn2-port) ";") "proxy_set_header Host $host;" "proxy_read_timeout 20m;" "proxy_set_header X-Forwarded-Proto $scheme;"))) (nginx-location-configuration (uri "/api3/") (body (list "rewrite /api3/(.*) /api/$1 break;" (string-append "proxy_pass http://localhost:" (number->string gn3-port) ";") "proxy_set_header Host $host;"))) (nginx-location-configuration (uri "/gn3/") (body (list "rewrite /gn3/(.*) /$1 break;" (string-append "proxy_pass http://localhost:" (number->string gn3-alias-server-port) ";") "proxy_redirect off;" "proxy_set_header Host $host;")))))) (nginx-server-configuration (server-name (list gn-auth-server-name)) (locations (list (nginx-location-configuration (uri "/") (body (list (string-append "proxy_pass http://localhost:" (number->string gn-auth-port) ";") "proxy_set_header Host $host;"))))))))) (define (genenetwork-mcron-jobs config) (list #~(job '(next-hour) #$(program-file "build-xapian-index-cron-gexp" (build-xapian-index-cron-gexp config)) #:user "root") #~(job '(next-hour) #$(program-file "make-non-human-data-public-cron-gexp" (make-non-human-data-public-cron-gexp config)) #:user "gunicorn-gn-auth") )) (define genenetwork-service-type (service-type (name 'genenetwork) (description "Run GeneNetwork") (extensions (list (service-extension account-service-type (const %genenetwork-accounts)) (service-extension activation-service-type genenetwork-activation) (service-extension gunicorn-service-type genenetwork-gunicorn-apps) (service-extension forge-nginx-service-type genenetwork-nginx-server-blocks) (service-extension mcron-service-type genenetwork-mcron-jobs))) (default-value (genenetwork-configuration)))) (define (gn-uploader-activation config) (match-record config (secrets data-directory) (with-imported-modules '((guix build utils)) #~(begin (use-modules (guix build utils)) ;; Let service user own their own secrets files. (chown #$secrets (passwd:uid (getpw "gunicorn-gn-uploader")) (passwd:gid (getpw "gunicorn-gn-uploader"))) ;; Set owner-only permissions on secrets files. (for-each (lambda (file) (chmod file #o600)) (list #$secrets)) ;; Let gn-uploader service own its data-directory (for-each (lambda (file) (chown file (passwd:uid (getpw "gunicorn-gn-uploader")) (passwd:gid (getpw "gunicorn-gn-uploader")))) (append (list #$data-directory) (find-files #$data-directory #:directories? #t))))))) (define (gn-uploader-gunicorn-app config) (match-record config (gn-uploader sql-uri port data-directory secrets log-level auth-server-url gn2-server-url) ;; If we mapped only the mysqld.sock socket file, it would break ;; when the external mysqld server is restarted. (let ((database-mapping (file-system-mapping (source "/run/mysqld") (target source) (writable? #t))) (gn-uploader-conf (computed-file "gn-uploader.conf" (configuration-file-gexp `(("UPLOADER_SECRETS" ,secrets) ("SQL_URI" ,sql-uri) ("UPLOAD_FOLDER" ,(string-append data-directory "/uploads")) ("AUTH_SERVER_URL" ,auth-server-url) ("GN2_SERVER_URL" ,gn2-server-url))))) (gn-uploader-profile (profile (content (package->development-manifest gn-uploader)) (allow-collisions? #t)))) (list (gunicorn-app (name "gn-uploader") (package gn-uploader) (sockets (list (forge-ip-socket (port port)))) (wsgi-app-module "scripts.qcapp_wsgi:app") (workers 20) (environment-variables (list (environment-variable (name "UPLOADER_CONF") (value gn-uploader-conf)) (environment-variable (name "HOME") (value "/tmp")) (environment-variable (name "GN_UPLOADER_ENVIRONMENT") (value gn-uploader-profile)))) (mappings (list database-mapping (file-system-mapping (source gn-uploader-conf) (target source)) (file-system-mapping (source secrets) (target source)) (file-system-mapping (source data-directory) (target source) (writable? #t)) (file-system-mapping (source gn-uploader-profile) (target source)))) (extra-cli-arguments (list "--log-level" (string-upcase (symbol->string log-level))))))))) (define (gn-uploader-nginx-server-block config) (match-record config (server-name port gn-uploader) (list (nginx-server-configuration (server-name (list server-name)) (locations (list (nginx-location-configuration (uri "/static") (body (list #~(string-append "root " #$(file-append gn-uploader "/lib/python" (python-version (package-version python)) "/site-packages/uploader;"))))) (nginx-location-configuration (uri "/") (body (list (string-append "proxy_pass http://localhost:" (number->string port) ";") "proxy_set_header Host $host;" "proxy_set_header X-Forwarded-Proto $scheme;" "client_max_body_size 500M;"))))))))) (define gn-uploader-service-type (service-type (name 'gn-uploader) (description "GeneNetwork data uploader service.") (extensions (list (service-extension activation-service-type gn-uploader-activation) (service-extension gunicorn-service-type gn-uploader-gunicorn-app) (service-extension forge-nginx-service-type gn-uploader-nginx-server-block))) (default-value (gn-uploader-configuration))))