aboutsummaryrefslogtreecommitdiff
;;; genenetwork-machines --- Guix configuration for genenetwork machines
;;; Copyright © 2024 Arun Isaac <arunisaac@systemreboot.net>
;;; Copyright © 2024 Frederick M. Muriithi <fredmanglis@gmail.com>
;;; Copyright © 2024 Pjotr Prins <pjotr.public01@thebird.nl>
;;; Copyright © 2024 Munyoki Kilyungi <me@bonfacemunyoki.com>
;;; Copyright © 2024 Alexander Kabui <alexanderkabui@gmail.com>
;;;
;;; This file is part of genenetwork-machines.
;;;
;;; genenetwork-machines is free software: you can redistribute it
;;; and/or modify it under the terms of the GNU General Public License
;;; as published by the Free Software Foundation, either version 3 of
;;; the License, or (at your option) any later version.
;;;
;;; genenetwork-machines is distributed in the hope that it will be
;;; useful, but WITHOUT ANY WARRANTY; without even the implied
;;; warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
;;; See the GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with genenetwork-machines.  If not, see
;;; <https://www.gnu.org/licenses/>.

(define-module (genenetwork services genenetwork)
  #:use-module ((gn packages genenetwork) #:select (genenetwork2 genenetwork3 gn-auth gn-uploader))
  #:use-module ((gnu packages admin) #:select (shadow))
  #:use-module (gnu services)
  #:use-module (gnu services web)
  #:use-module (gnu system file-systems)
  #:use-module (gnu system shadow)
  #:use-module (guix gexp)
  #:use-module (guix profiles)
  #:use-module (guix records)
  #:use-module (forge environment)
  #:use-module (forge nginx)
  #:use-module (forge gunicorn)
  #:use-module (forge socket)
  #:use-module (srfi srfi-1)
  #:use-module (ice-9 match)
  #:export (genenetwork-service-type
            genenetwork-configuration
            genenetwork-configuration?
            genenetwork-configuration-genenetwork2   ; gn2 guix package used from guix-bioinformatics
            genenetwork-configuration-genenetwork3   ; gn3 guix package used from guix-bioinformatics
            genenetwork-configuration-server-name    ; outside DNS
            genenetwork-configuration-port           ; external port
            genenetwork-configuration-gn2-port       ; internal port
            genenetwork-configuration-gn3-port       ; internal port
            genenetwork-configuration-auth-db        ; RW auth DB
            genenetwork-configuration-xapian-db      ; RO search index, unless you want to regenerate inside VM
            genenetwork-configuration-genotype-files ; RO genotype files
            genenetwork-configuration-gn3-data-directory ; RO
            genenetwork-configuration-sparql-endpoint
            genenetwork-configuration-gn-sourcecode-directory ; used for mounting local source code inside VM (dev/debug)
            genenetwork-configuration-gn2-secrets ; RO GN2 secrets so we don't need to regenerate every startup
            genenetwork-configuration-gn3-secrets ; RO GN3 secrets so we don't need to regenerate every startup
            genenetwork-configuration-ssl-path
            gn-uploader-service-type
            gn-uploader-configuration
            gn-uploader-configuration?
            gn-uploader-configuration-server-name
            gn-uploader-configuration-port
            gn-uploader-configuration-secrets))

(define-record-type* <genenetwork-configuration>
  genenetwork-configuration make-genenetwork-configuration
  genenetwork-configuration?
  (genenetwork2 genenetwork-configuration-genenetwork2
                (default genenetwork2))
  (genenetwork3 genenetwork-configuration-genenetwork3
                (default genenetwork3))
  (gn-auth genenetwork-configuration-gn-auth
           (default gn-auth))
  (server-name genenetwork-configuration-server-name
               (default "genenetwork.org"))
  (gn-auth-server-name genenetwork-configuration-gn-auth-server-name
                       (default "auth.genenetwork.org"))
  (gn2-port genenetwork-configuration-gn2-port
            (default 8082))
  (gn3-port genenetwork-configuration-gn3-port
            (default 8083))
  (gn-auth-port genenetwork-configuration-gn-auth-port
                (default 8084))
  (sql-uri genenetwork-configuration-sql-uri
           (default "mysql://username:password@localhost/database"))
  (auth-db genenetwork-configuration-auth-db
           (default "/var/genenetwork/auth.db"))
  (xapian-db genenetwork-configuration-xapian-db
             (default "/var/genenetwork/xapian"))
  (genotype-files genenetwork-configuration-genotype-files
                  (default "/var/genenetwork/genotype-files"))
  (sparql-endpoint genenetwork-configuration-sparql-endpoint
                   (default "http://localhost:8081/sparql"))
  (gn-sourcecode-directory genenetwork-configuration-gn-sourcecode-directory
                      (default "/var/empty"))
  (gn3-data-directory genenetwork-configuration-gn3-data-directory
                      (default "/var/genenetwork"))
  (gn2-secrets genenetwork-configuration-gn2-secrets
               (default "/etc/genenetwork/gn2-secrets.py"))
  (gn3-secrets genenetwork-configuration-gn3-secrets
               (default "/etc/genenetwork/gn3-secrets.py"))
  (gn-auth-secrets genenetwork-configuration-gn-auth-secrets
                   (default "/etc/genenetwork/gn-auth-secrets.py"))
  (llm-db genenetwork-configuration-llm-db
	  (default "/var/genenetwork/llm.db"))
  (ssl-path genenetwork-configuration-ssl-path
	    (default "/var/ssl")))

(define-record-type* <gn-uploader-configuration>
  gn-uploader-configuration make-gn-uploader-configuration
  gn-uploader-configuration?
  (gn-uploader gn-uploader-configuration-gn-uploader
               (default gn-uploader))
  (server-name gn-uploader-server-name
               (default "upload.genenetwork.org"))
  (port gn-uploader-configuration-port
        (default 8085))
  (sql-uri gn-uploader-configuration-sql-uri
           (default "mysql://username:password@localhost/database"))
  (data-directory gn-uploader-configuration-data-directory
                  (default "/var/genenetwork"))
  (secrets gn-uploader-configuration-secrets
           (default "/etc/genenetwork/gn-uploader-secrets.py"))
  (log-level gn-uploader-log-level (default "WARNING")))

(define %genenetwork-accounts
  (list (user-group
         (name "genenetwork")
         (system? #t))
        (user-account
         (name "genenetwork")
         (group "genenetwork")
         (system? #t)
         (comment "GeneNetwork user")
         (home-directory "/var/empty")
         (shell (file-append shadow "/sbin/nologin")))))

(define (genenetwork-activation config)
  (match-record config <genenetwork-configuration>
    (gn2-secrets gn3-secrets gn-auth-secrets auth-db llm-db ssl-path)
    (with-imported-modules '((guix build utils))
      #~(begin
          (use-modules (guix build utils))

          (for-each (lambda (file)
                      (chown file
                             (passwd:uid (getpw "gunicorn-gn-auth"))
                             (passwd:gid (getpw "gunicorn-gn-auth"))))
                    (cons #$gn-auth-secrets
                          (find-files #$(dirname auth-db)
                                      #:directories? #t)))
	  ;; Everyone can read, write and execute
	  (for-each (lambda (file)
		      (chmod file #o777))
		    (find-files #$ssl-path #:directories? #t))
          ;; Let each service user own their own secrets files.
          (chown #$gn2-secrets
                 (passwd:uid (getpw "gunicorn-genenetwork2"))
                 (passwd:gid (getpw "gunicorn-genenetwork2")))
          (chown #$gn3-secrets
                 (passwd:uid (getpw "gunicorn-genenetwork3"))
                 (passwd:gid (getpw "gunicorn-genenetwork3")))
	  (chown #$llm-db
		 (passwd:uid (getpw "gunicorn-genenetwork3"))
		 (passwd:gid (getpw "gunicorn-genenetwork3")))
          ;; Set owner-only permissions on secrets files.
          (for-each (lambda (file)
                      (chmod file #o600))
                    (list #$gn2-secrets
                          #$gn3-secrets
                          #$gn-auth-secrets))))))

(define (configuration-file-gexp alist)
  "Return a G-expression that constructs a configuration file of
key-value pairs. @var{alist} is an association list mapping keys to
their values. Keys must be strings. Values may be strings,
G-expressions or numbers."
  #~(begin
      (use-modules (ice-9 match))

      (call-with-output-file #$output
        (lambda (port)
          (for-each (match-lambda
                      ((key value)
                       (display key port)
                       (display " = " port)
                       (cond
                        ((number? value)
                         (display value port))
                        (else
                         (display "\"" port)
                         (display value port)
                         (display "\"" port)))
                       (newline port)))
                    '#$alist)))))

(define (genenetwork-gunicorn-apps config)
  "Return a list of gunicorn apps to run the genenetwork server
described by @var{config}, a @code{<genenetwork-configuration>}
object."
  (match-record config <genenetwork-configuration>
    (genenetwork2 genenetwork3 gn-auth server-name gn-auth-server-name gn2-port gn3-port gn-auth-port sql-uri auth-db xapian-db genotype-files sparql-endpoint gn-sourcecode-directory gn3-data-directory gn2-secrets gn3-secrets gn-auth-secrets llm-db ssl-path)
    ;; If we mapped only the mysqld.sock socket file, it would break
    ;; when the external mysqld server is restarted.
    (let* ((database-mapping (file-system-mapping
                              (source "/run/mysqld")
                              (target source)
                              (writable? #t)))
           (gn2-profile (profile
                         (content (package->development-manifest genenetwork2))
                         (allow-collisions? #t)))
           (gn2-conf (computed-file "gn2.conf"
                                    (configuration-file-gexp
                                     `(("GEMMA_COMMAND" ,(file-append gn2-profile "/bin/gemma"))
                                       ("GEMMA_WRAPPER_COMMAND" ,(file-append gn2-profile "/bin/gemma-wrapper"))
                                       ("GENENETWORK_FILES" ,genotype-files)
                                       ("GN2_SECRETS" ,gn2-secrets)
                                       ("GN3_LOCAL_URL" ,(string-append "http://localhost:"
                                                                        (number->string gn3-port)))
				       ("SSL_PRIVATE_KEY" ,(string-append ssl-path "/gn2-ssl-private-key.pem"))
				       ("AUTH_SERVER_SSL_PUBLIC_KEY"
					,(string-append ssl-path "/gn-auth-ssl-public-key.pem"))
                                       ("GN_SERVER_URL" ,(string-append "https://" server-name "/api3/"))
                                       ("AUTH_SERVER_URL" ,(string-append "https://" gn-auth-server-name "/"))
                                       ("JS_GUIX_PATH" ,(file-append gn2-profile "/share/genenetwork2/javascript"))
                                       ("PLINK_COMMAND" ,(file-append gn2-profile "/bin/plink2"))
                                       ("SQL_URI" ,sql-uri)))))
           (gn3-conf (computed-file "gn3.conf"
                                    (configuration-file-gexp
                                     `(("AUTH_DB" ,auth-db)
                                       ("DATA_DIR" ,gn3-data-directory)
                                       ("SOURCE_DIR" ,gn-sourcecode-directory)
                                       ("SPARQL_ENDPOINT" ,sparql-endpoint)
				       ("AUTH_SERVER_SSL_PUBLIC_KEY"
					,(string-append ssl-path "/gn-auth-ssl-public-key.pem"))
                                       ("SQL_URI" ,sql-uri)
				       ("LLM_DB_PATH" ,llm-db)
                                       ("XAPIAN_DB_PATH" ,xapian-db)))))
           (gn-auth-conf (computed-file "gn-auth.conf"
                                        (configuration-file-gexp
                                         `(("AUTH_DB" ,auth-db)
                                           ("SQL_URI" ,sql-uri)
					   ("CLIENTS_SSL_PUBLIC_KEYS_DIR"
					    ,(string-append ssl-path "/clients-public-keys"))
					   ("SSL_PRIVATE_KEY"
					    ,(string-append ssl-path "/gn-auth-ssl-private-key.pem"))
                                           ("GN_AUTH_SECRETS" ,gn-auth-secrets))))))
      (list (gunicorn-app
             (name "genenetwork2")
             (package genenetwork2)
             (sockets (list (forge-ip-socket
                             (port gn2-port))))
             (wsgi-app-module "gn2.wsgi")
             (workers 20)
             (timeout 1200)
             (environment-variables
              (list (environment-variable
                     (name "GN2_PROFILE")
                     (value gn2-profile))
                    (environment-variable
                     (name "TMPDIR")
                     (value "/tmp"))
                    (environment-variable
                     (name "GN2_SETTINGS")
                     (value gn2-conf))
                    (environment-variable
                     (name "HOME")
                     (value "/tmp"))))
             (mappings (list database-mapping
                             (file-system-mapping
                              (source genotype-files)
                              (target source))
                             (file-system-mapping
                              (source gn-sourcecode-directory)
                              (target source))
                             (file-system-mapping ; GN2 and GN3 need to share TMPDIR
                              (source "/tmp")
                              (target "/tmp")
                              (writable? #t))
                             (file-system-mapping
                              (source gn2-conf)
                              (target source))
                             (file-system-mapping
                              (source gn2-profile)
                              (target source))
                             (file-system-mapping
                              (source gn2-secrets)
                              (target source))
			     (file-system-mapping
                              (source llm-db)
                              (target source)
			      (writable? #t)))))
            (gunicorn-app
             (name "genenetwork3")
             (package genenetwork3)
             (sockets (list (forge-ip-socket
                             (port gn3-port))))
             (wsgi-app-module "gn3.app:create_app()")
             (workers 20)
             ;; gunicorn's default 30 second timeout is insufficient
             ;; for Fahamu AI endpoints and results in worker timeout
             ;; errors.
             (timeout 1200)
             (environment-variables
              (list (environment-variable
                     (name "GN3_CONF")
                     (value gn3-conf))
                    (environment-variable
                     (name "TMPDIR")
                     (value "/tmp"))
                    (environment-variable
                     (name "GN3_SECRETS")
                     (value gn3-secrets))
                    (environment-variable
                     (name "HOME")
                     (value "/tmp"))))
             (mappings (list database-mapping
                             (file-system-mapping
                              (source gn3-conf)
                              (target source))
                             (file-system-mapping
                              (source gn3-secrets)
                              (target source))
                             (file-system-mapping
                              (source gn-sourcecode-directory)
                              (target source))
                             (file-system-mapping
                              (source genotype-files)
                              (target source))
                             (file-system-mapping
                              (source gn3-data-directory)
                              (target source))    ; Rqtl uses this
                             (file-system-mapping ; GN2 and GN3 need to share TMPDIR
                              (source "/tmp")
                              (target "/tmp")
                              (writable? #t))
                             (file-system-mapping
                              (source xapian-db)
                              (target source))
			     (file-system-mapping
                              (source auth-db)
                              (target source))
			     (file-system-mapping
                              (source llm-db)
                              (target source)
                              (writable? #t))
			     (file-system-mapping
                              (source ssl-path)
                              (target source)
			      (writable? #t)))))
            (gunicorn-app
             (name "gn-auth")
             (package gn-auth)
             (sockets (list (forge-ip-socket
                             (port gn-auth-port))))
             (wsgi-app-module "gn_auth:create_app()")
             (workers 20)
             (environment-variables
              (list (environment-variable
                     (name "GN_AUTH_CONF")
                     (value gn-auth-conf))
                    (environment-variable
                     (name "HOME")
                     (value "/tmp"))
                    (environment-variable
                     (name "AUTHLIB_INSECURE_TRANSPORT")
                     (value "true"))))
             (mappings (list database-mapping
                             (file-system-mapping
                              (source gn-auth-conf)
                              (target source))
                             (file-system-mapping
                              (source auth-db)
                              (target source)
                              (writable? #t))
                             (file-system-mapping
                              (source gn-auth-secrets)
                              (target source))
			     ;; Getting:
			     ;; OSError: [Errno 30] Read-only file system: '/var/ssl/clients-public-keys'
			     (file-system-mapping
                              (source ssl-path)
                              (target source)
			      (writable? #t)))))))))

(define (genenetwork-nginx-server-blocks config)
  "Return a list of @code{<nginx-server-configuration>} records specifying
reverse proxies for the genenetwork service described by @var{config},
a @code{<genenetwork-configuration>} record."
  (match-record config <genenetwork-configuration>
    (server-name gn-auth-server-name gn2-port gn3-port gn-auth-port)
    (list (nginx-server-configuration
           (server-name (list server-name))
           (locations
            (list (nginx-location-configuration
                   (uri "/")
                   (body (list (string-append "proxy_pass http://localhost:"
                                              (number->string gn2-port) ";")
                               "proxy_set_header Host $host;"
                               "proxy_read_timeout 20m;"
                               "proxy_set_header X-Forwarded-Proto $scheme;")))
                  (nginx-location-configuration
                   (uri "/api3/")
                   (body (list "rewrite /api3/(.*) /api/$1 break;"
                               (string-append "proxy_pass http://localhost:"
                                              (number->string gn3-port) ";")
                               "proxy_set_header Host $host;"))))))
          (nginx-server-configuration
           (server-name (list gn-auth-server-name))
           (locations
            (list (nginx-location-configuration
	           (uri "/")
                   (body (list (string-append "proxy_pass http://localhost:"
				              (number->string gn-auth-port) ";")
                               "proxy_set_header Host $host;")))))))))

(define genenetwork-service-type
  (service-type
   (name 'genenetwork)
   (description "Run GeneNetwork")
   (extensions
    (list (service-extension account-service-type
                             (const %genenetwork-accounts))
          (service-extension activation-service-type
                             genenetwork-activation)
          (service-extension gunicorn-service-type
                             genenetwork-gunicorn-apps)
          (service-extension forge-nginx-service-type
                             genenetwork-nginx-server-blocks)))
   (default-value (genenetwork-configuration))))

(define (gn-uploader-activation config)
  (match-record config <gn-uploader-configuration>
    (secrets data-directory)
    (with-imported-modules '((guix build utils))
      #~(begin
          (use-modules (guix build utils))
          ;; Let service user own their own secrets files.
          (chown #$secrets
                 (passwd:uid (getpw "gunicorn-gn-uploader"))
                 (passwd:gid (getpw "gunicorn-gn-uploader")))
          ;; Set owner-only permissions on secrets files.
          (for-each (lambda (file)
                      (chmod file #o600))
                    (list #$secrets))
          ;; Let gn-uploader service own its data-directory
          (chown #$data-directory
                 (passwd:uid (getpw "gunicorn-gn-uploader"))
                 (passwd:gid (getpw "gunicorn-gn-uploader")))))))

(define (gn-uploader-gunicorn-app config)
  (match-record config <gn-uploader-configuration>
    (gn-uploader sql-uri port data-directory secrets log-level)
    ;; If we mapped only the mysqld.sock socket file, it would break
    ;; when the external mysqld server is restarted.
    (let ((database-mapping (file-system-mapping
                             (source "/run/mysqld")
                             (target source)
                             (writable? #t)))
          (gn-uploader-conf (computed-file "gn-uploader.conf"
                                           (configuration-file-gexp
                                            `(("QCAPP_SECRETS" ,secrets)
                                              ("SQL_URI" ,sql-uri)
                                              ("UPLOAD_FOLDER" ,(string-append data-directory
                                                                               "/uploads")))))))
      (list (gunicorn-app
             (name "gn-uploader")
             (package gn-uploader)
             (sockets (list (forge-ip-socket
                             (port port))))
             (wsgi-app-module "scripts.qcapp_wsgi:app")
             (workers 20)
             (environment-variables
              (list (environment-variable
                     (name "QCAPP_CONF")
                     (value gn-uploader-conf))
                    (environment-variable
                     (name "HOME")
                     (value "/tmp"))))
             (mappings (list database-mapping
                             (file-system-mapping
                              (source gn-uploader-conf)
                              (target source))
                             (file-system-mapping
                              (source secrets)
                              (target source))
                             (file-system-mapping
                              (source data-directory)
                              (target source)
                              (writable? #t))))
             (extra-cli-arguments (list "--log-level" log-level)))))))

(define (gn-uploader-nginx-server-block config)
  (match-record config <gn-uploader-configuration>
   (server-name port)
    (list (nginx-server-configuration
           (server-name (list server-name))
           (locations
            (list (nginx-location-configuration
                   (uri "/")
                   (body (list (string-append "proxy_pass http://localhost:"
                                              (number->string port) ";")
                               "proxy_set_header Host $host;"
                               "client_max_body_size 500M;")))))))))

(define gn-uploader-service-type
  (service-type
   (name 'gn-uploader)
   (description "GeneNetwork data uploader service.")
   (extensions
    (list (service-extension activation-service-type
                             gn-uploader-activation)
          (service-extension gunicorn-service-type
                             gn-uploader-gunicorn-app)
          (service-extension forge-nginx-service-type
                             gn-uploader-nginx-server-block)))
   (default-value (genenetwork-configuration))))