diff options
Diffstat (limited to 'genenetwork-development.scm')
-rw-r--r-- | genenetwork-development.scm | 557 |
1 files changed, 439 insertions, 118 deletions
diff --git a/genenetwork-development.scm b/genenetwork-development.scm index 5fafc12..130a610 100644 --- a/genenetwork-development.scm +++ b/genenetwork-development.scm @@ -21,18 +21,21 @@ ;;; <https://www.gnu.org/licenses/>. (use-modules (gnu) - ((gn packages genenetwork) #:select (genenetwork2 genenetwork3 gn-auth)) + ((gn packages genenetwork) #:select (genenetwork2 genenetwork3 gn-auth gn-libs)) (gn services databases) + ((gn packages guile) #:select (gn-guile)) (gnu build linux-container) ((gnu packages admin) #:select (shepherd shadow)) - ((gnu packages base) #:select (gnu-make tar)) + ((gnu packages base) #:select (gnu-make tar coreutils-minimal)) ((gnu packages bash) #:select (bash)) ((gnu packages bioinformatics) #:select (ccwl) #:prefix guix:) ((gnu packages certs) #:select (nss-certs)) ((gnu packages check) #:select (python-pylint)) + ((gnu packages curl) #:select (curl)) ((gnu packages ci) #:select (laminar)) ((gnu packages compression) #:select (gzip)) - ((gnu packages databases) #:select (mariadb virtuoso-ose)) + ((gnu packages databases) #:select (mariadb redis)) + ((gn packages databases) #:select (virtuoso-ose)); restore guix's virtuoso-ose once changes are upstreamed. ((gnu packages gnupg) #:select (guile-gcrypt)) ((gnu packages graphviz) #:select (graphviz)) ((gnu packages guile) #:select (guile-3.0 guile-git guile-zlib)) @@ -64,6 +67,7 @@ (guix packages) (guix profiles) (guix records) + (guix search-paths) (guix store) (guix utils) (forge acme) @@ -120,7 +124,11 @@ be imported into G-expressions." (gn3-repository genenetwork-configuration-gn3-repository (default "https://github.com/genenetwork/genenetwork3")) (gn-auth-repository genenetwork-configuration-gn-auth-repository - (default "https://git.genenetwork.org/gn-auth")) + (default "https://git.genenetwork.org/gn-auth")) + (gn-libs-repository genenetwork-configuration-gn-libs-repository + (default "https://git.genenetwork.org/gn-libs")) + (gn-guile-repository genenetwork-configuration-gn-libs-repository + (default "https://git.genenetwork.org/gn-guile")) (gn2-port genenetwork-configuration-gn2-port (default 8082)) (gn3-port genenetwork-configuration-gn3-port @@ -144,7 +152,13 @@ be imported into G-expressions." (auth-db-path genenetwork-auth-db-path (default "/export/data/genenetwork-sqlite/auth.db")) (llm-db-path genenetwork-llm-db-path - (default "/export/data/genenetwork-sqlite/llm.db"))) + (default "/export/data/genenetwork-sqlite/llm.db")) + (lmdb-data-path genenetwork-lmdb-data-path + (default "/export/data/lmdb")) + (gn-guile-port genenetwork-configuration-gn-guile-port + (default 8091)) + (gn-doc-git-checkout genenetwork-configuration-gn-doc-git-checkout + (default "/export/data/gn-docs"))) ;;; @@ -182,11 +196,12 @@ described by CONFIG, a <genenetwork-configuration> object. TEST-COMMAND is a list of strings specifying the command to be executed." (match-record config <genenetwork-configuration> - (gn2-repository gn3-repository gn3-port genotype-files) + (gn2-repository gn3-repository gn-libs-repository gn3-port genotype-files) (with-imported-modules '((guix build utils)) (with-packages (list bash coreutils git-minimal nss-certs) #~(begin - (use-modules (guix build utils)) + (use-modules (guix build utils) + (srfi srfi-26)) (define (hline) "Print a horizontal line 50 '=' characters long." @@ -199,9 +214,19 @@ executed." (invoke "git" "log" "--max-count" "1") (hline)) + (define (call-with-temporary-directory proc) + (let ((tmp-dir (mkdtemp "/tmp/gn.XXXXXX"))) + (dynamic-wind + (const #t) + (cut proc tmp-dir) + (cut delete-file-recursively tmp-dir)))) + (invoke "git" "clone" "--depth" "1" #$gn3-repository) (with-directory-excursion "genenetwork3" (show-head-commit)) + (invoke "git" "clone" "--depth" "1" #$gn-libs-repository) + (with-directory-excursion "gn-libs" + (show-head-commit)) (invoke "git" "clone" "--depth" "1" #$gn2-repository) (with-directory-excursion "genenetwork2" (show-head-commit)) @@ -222,9 +247,15 @@ executed." (setenv "GN3_LOCAL_URL" (string-append "http://localhost:" (number->string #$gn3-port))) (setenv "GENENETWORK_FILES" #$genotype-files) (setenv "HOME" "/tmp") - (setenv "SQL_URI" "mysql://webqtlout:webqtlout@localhost/db_webqtl") + (setenv "SQL_URI" "mysql://webqtlout:webqtlout@localhost/db_webqtl?unix_socket=/run/mysqld/mysqld.sock&charset=utf8") (chdir "genenetwork2") - (apply invoke '#$test-command)))))) + ;; XXXX: FIXME: R/Qtl tests fail because files are generated in + ;; the "/tmp" directory. Currently, "/tmp" is mapped by gn2/gn3 + ;; so tests will fail because of permission issues. + (call-with-temporary-directory + (lambda (tmp-dir) + (setenv "TMPDIR" tmp-dir) + (apply invoke '#$test-command)))))))) (define %xapian-directory "/export/data/genenetwork-xapian") @@ -252,7 +283,7 @@ genenetwork3 source from the latest commit of @var{project}." (setenv "PYTHONPATH" (getcwd)) (invoke "./scripts/index-genenetwork" "create-xapian-index" xapian-build-directory - "mysql://webqtlout:webqtlout@localhost/db_webqtl" + "mysql://webqtlout:webqtlout@localhost/db_webqtl?unix_socket=/run/mysqld/mysqld.sock&charset=utf8" "http://localhost:9082/sparql") ;; Stop genenetwork3, replace old xapian index and ;; start genenetwork3. @@ -297,7 +328,7 @@ genenetwork3 source from the latest commit of @var{project}." (system* (string-append gn3-dir "/scripts/index-genenetwork") "is-data-modified" #$%xapian-directory - "mysql://webqtlout:webqtlout@localhost/db_webqtl" + "mysql://webqtlout:webqtlout@localhost/db_webqtl?unix_socket=/run/mysqld/mysqld.sock" "http://localhost:9082/sparql")))) (setenv "LAMINAR_REASON" "Nightly xapian index rebuild") (invoke #$(file-append laminar "/bin/laminarc") @@ -307,7 +338,7 @@ genenetwork3 source from the latest commit of @var{project}." "Return forge projects for genenetwork described by CONFIG, a <genenetwork-configuration> object." (match-record config <genenetwork-configuration> - (gn2-repository gn3-repository gn-auth-repository gn2-port) + (gn2-repository gn3-repository gn-auth-repository gn-libs-repository gn2-port gn-guile-port gn-guile-repository) (list (forge-project (name "genenetwork2") (repository gn2-repository) @@ -380,6 +411,33 @@ genenetwork3 source from the latest commit of @var{project}." (trigger? #f)))) (ci-jobs-trigger 'webhook)) (forge-project + (name "gn-libs") + (repository gn-libs-repository) + (ci-jobs (list (forge-laminar-job + (name "gn-libs") + (run (guix-channel-job-gexp + (list (channel + (name 'gn-libs) + (url (forge-project-repository this-forge-project)) + (branch "main"))) + #:variables (list (variable-specification + (module '(gn-libs)) + (name 'gn-libs))) + #:guix-daemon-uri %guix-daemon-uri))))) + (ci-jobs-trigger 'webhook)) + (forge-project + (name "gn-guile") + (repository gn-guile-repository) + (ci-jobs (list (forge-laminar-job + (name "gn-guile") + (run (with-imported-modules '((guix build utils)) + #~(begin + (use-modules (guix build utils)) + (invoke #$sudo + #$(file-append shepherd "/bin/herd") + "restart" "gn-guile"))))))) + (ci-jobs-trigger 'webhook)) + (forge-project (name "gn-auth") (repository gn-auth-repository) (ci-jobs (list (forge-laminar-job @@ -421,7 +479,7 @@ genenetwork3 source from the latest commit of @var{project}." "Return a G-expression that runs the latest genenetwork2 development server described by CONFIG, a <genenetwork-configuration> object." (match-record config <genenetwork-configuration> - (gn2-repository gn3-repository gn2-port gn3-port gn2-secrets genotype-files) + (gn2-repository gn3-repository gn2-port gn3-port gn2-secrets genotype-files gn-guile-port) (with-packages (list coreutils git-minimal gunicorn nss-certs) (with-imported-modules '((guix build utils)) #~(begin @@ -453,31 +511,29 @@ server described by CONFIG, a <genenetwork-configuration> object." (string-append (getcwd) "/genenetwork3")) ;; Set other environment variables required by ;; genenetwork2. - (setenv "SERVER_PORT" #$(number->string gn2-port)) (setenv "GN2_PROFILE" #$(profile (content (package->development-manifest genenetwork2)) (allow-collisions? #t))) - (setenv "GN_SERVER_URL" "https://cd.genenetwork.org/api3/") - (setenv "GN3_LOCAL_URL" - #$(string-append "http://localhost:" - (number->string gn3-port))) - (setenv "GENENETWORK_FILES" #$genotype-files) - (setenv "SQL_URI" "mysql://webqtlout:webqtlout@localhost/db_webqtl") - (setenv "HOME" "/tmp") - (setenv "NO_REDIS" "no-redis") - (setenv "RUST_BACKTRACE" "1") - + (setenv "REQUESTS_CA_BUNDLE" (string-append + (getenv "GN2_PROFILE") + "/etc/ssl/certs/ca-certificates.crt")) (setenv "GN2_SETTINGS" #$(mixed-text-file "gn2.conf" "GN2_SECRETS=\"" gn2-secrets "/gn2-secrets.py\"\n" + "AI_SEARCH_ENABLED=True\n" + "TEST_FEATURE_SWITCH=True\n" "GN3_LOCAL_URL=\"" (string-append "http://localhost:" (number->string gn3-port)) "\"\n" + "GN_GUILE_SERVER_URL=\"" + (string-append "http://localhost:" + (number->string gn-guile-port)) + "\"\n" "GN_SERVER_URL=\"https://cd.genenetwork.org/api3/\"\n" "AUTH_SERVER_URL=\"https://auth-cd.genenetwork.org/\"\n" - "SQL_URI=\"mysql://webqtlout:webqtlout@localhost/db_webqtl\"\n" + "SQL_URI=\"mysql://webqtlout:webqtlout@localhost/db_webqtl?unix_socket=/run/mysqld/mysqld.sock\"\n" "SSL_PRIVATE_KEY=\"" gn2-secrets "/gn2-ssl-private-key.pem\"\n" "AUTH_SERVER_SSL_PUBLIC_KEY=\"" gn2-secrets "/gn-auth-ssl-public-key.pem\"\n")) @@ -490,7 +546,7 @@ server described by CONFIG, a <genenetwork-configuration> object." "Return a G-expression that runs the latest genenetwork3 development server described by CONFIG, a <genenetwork-configuration> object." (match-record config <genenetwork-configuration> - (gn3-repository gn3-port gn3-secrets sparql-endpoint data-directory xapian-db-path auth-db-path llm-db-path) + (gn3-repository gn3-port gn3-secrets sparql-endpoint data-directory xapian-db-path auth-db-path llm-db-path lmdb-data-path) (with-manifest (package->development-manifest genenetwork3) (with-packages (list git-minimal nss-certs) (with-imported-modules '((guix build utils)) @@ -511,16 +567,29 @@ server described by CONFIG, a <genenetwork-configuration> object." ;; Clone the latest genenetwork3 repository. (invoke "git" "clone" "--depth" "1" #$gn3-repository) + (setenv "GN3_PROFILE" #$(profile + (content (package->development-manifest genenetwork3)) + (allow-collisions? #t))) + (setenv "REQUESTS_CA_BUNDLE" (string-append + (getenv "GN3_PROFILE") + "/etc/ssl/certs/ca-certificates.crt")) ;; Configure genenetwork3. (setenv "GN3_CONF" #$(mixed-text-file "gn3.conf" "SPARQL_ENDPOINT=\"" sparql-endpoint "\"\n" "DATA_DIR=\"" data-directory "\"\n" + "LMDB_DATA_PATH=\"" lmdb-data-path "\"\n" + "AUTH_SERVER_URL=\"https://auth-cd.genenetwork.org/\"\n" "XAPIAN_DB_PATH=\"" xapian-db-path "\"\n" "AUTH_DB=\"" auth-db-path "\"\n" "LLM_DB_PATH=\"" llm-db-path "\"\n")) (setenv "HOME" "/tmp") (setenv "GN3_SECRETS" #$gn3-secrets) + (setenv "RSCRIPT" #$(file-append + (profile + (content (package->development-manifest genenetwork3)) + (allow-collisions? #t)) + "/bin/Rscript")) ;; Run genenetwork3. (with-directory-excursion "genenetwork3" (show-head-commit) @@ -559,6 +628,13 @@ server described by CONFIG, a <genenetwork-configuration> object." ;; Clone the latest gn-auth repository. (invoke "git" "clone" "--depth" "1" #$gn-auth-repository) ;; Configure gn-auth. + (setenv "GN_AUTH_PROFILE" #$(profile + (content (package->development-manifest gn-auth)) + (allow-collisions? #t))) + (setenv "REQUESTS_CA_BUNDLE" (string-append + (getenv "GN_AUTH_PROFILE") + "/etc/ssl/certs/ca-certificates.crt")) + (setenv "GN_AUTH_CONF" #$(mixed-text-file "gn-auth.conf" "AUTH_DB=\"" auth-db-path "\"\n" @@ -575,40 +651,177 @@ server described by CONFIG, a <genenetwork-configuration> object." "--workers" "8" "gn_auth.wsgi:app")))))))) +(define (gn-guile-gexp gn-guile-port) + (with-packages + (list coreutils git-minimal nss-certs) + (with-imported-modules '((guix build utils)) + #~(begin + (use-modules (guix build utils)) + + (define (hline) + "Print a horizontal line 50 '=' characters long." + (display (make-string 50 #\=)) + (newline) + (force-output)) + + (define (show-head-commit) + (hline) + (invoke "git" "log" "--max-count" "1") + (hline)) + ;; KLUDGE: Here we set all the certificates properly. In gn-guile, + ;; we make request to external services. Here's an example: + ;; curl http://localhost:8091/gene/aliases/Shh + ;; + ;; Without certs, we run into: + ;; 2025-07-22 08:27:11 GET /gene/aliases/Shh + ;; [...] + ;; 2025-07-22 08:27:19 signer-not-found invalid + (setenv "GN_GUILE_PROFILE" #$(profile + (content (package->development-manifest gn-guile)) + (allow-collisions? #t))) + (setenv "SSL_CERT_DIR" (string-append + (getenv "GN_GUILE_PROFILE") + "/etc/ssl/certs")) + (setenv "SSL_CERT_FILE" (string-append + (getenv "GN_GUILE_PROFILE") + "/etc/ssl/certs/ca-certificates.crt")) + (setenv "GIT_SSL_CAINFO" (getenv "SSL_CERT_FILE")) + (setenv "CURL_CA_BUNDLE" (getenv "SSL_CERT_FILE")) + (setenv "REQUESTS_CA_BUNDLE" (getenv "SSL_CERT_FILE")) + + (let ((current-repo-path (string-append (getcwd) "/gn-docs"))) + (when (file-exists? current-repo-path) + (delete-file-recursively current-repo-path)) + (setenv "CURRENT_REPO_PATH" current-repo-path) + (invoke #$(file-append git-minimal "/bin/git") + "clone" "--depth" "1" (getenv "CGIT_REPO_PATH"))) + (invoke "git" "clone" "--depth" "1" "https://git.genenetwork.org/gn-guile") + + ;; We have a gn-guile-dev wrapper script that sets a "./" in the + ;; GN_GUILE_LOAD_PATH hence allowing this to be run from the gn-guile + ;; directory. This allows gn-guile to be run from the latest + ;; upstream commits without pinning to guix. + (with-directory-excursion "gn-guile" + (show-head-commit) + (invoke #$(file-append gn-guile "/bin/gn-guile-dev") + (number->string #$gn-guile-port))))))) + (define (genenetwork-shepherd-services config) "Return shepherd services to run the genenetwork development server described by CONFIG, a <genenetwork-configuration> object." (match-record config <genenetwork-configuration> - (gn2-port gn3-port gn-auth-port genotype-files data-directory xapian-db-path gn2-secrets auth-db-path gn-auth-secrets llm-db-path) + (gn2-port gn3-port gn-auth-port genotype-files data-directory xapian-db-path gn2-secrets auth-db-path gn-auth-secrets llm-db-path lmdb-data-path gn-doc-git-checkout gn-guile-port) (list (shepherd-service + (documentation "Run gn-guile server.") + (provision '(gn-guile)) + (requirement '(networking)) + (modules '((ice-9 match) + (srfi srfi-1))) + (start + (let* ((gn-guile-settings + `(("CGIT_REPO_PATH" ,gn-doc-git-checkout) + ("LC_ALL" "en_US.UTF-8") + ("GIT_COMMITTER_NAME" "genenetwork") + ("GIT_COMMITTER_EMAIL" "no-reply@git.genenetwork.org")))) + #~(make-forkexec-constructor + (list #$(least-authority-wrapper + (program-file "gn-guile" + (gn-guile-gexp gn-guile-port)) + #:name "gn-guile-pola-wrapper" + #:preserved-environment-variables + (map first gn-guile-settings) + #:mappings (list (file-system-mapping + (source gn-doc-git-checkout) + (target source) + (writable? #t))) + #:namespaces (delq 'net %namespaces)) + "127.0.0.1" #$(number->string gn-guile-port)) + #:user "genenetwork" + #:group "genenetwork" + #:environment-variables + (map (match-lambda + ((spec value) + (string-append spec "=" value))) + '#$gn-guile-settings) + #:log-file "/var/log/cd/gn-guile.log"))) + (stop #~(make-kill-destructor))) + (shepherd-service (documentation "Run GeneNetwork 2 development server.") (provision '(genenetwork2)) ;; FIXME: The genenetwork2 service should depend on redis. (requirement '(networking genenetwork3)) - (start #~(make-forkexec-constructor - (list #$(least-authority-wrapper - (program-file "genenetwork2" - (genenetwork2-cd-gexp config)) - #:name "genenetwork2-pola-wrapper" - ;; If we mapped only the mysqld.sock - ;; socket file, it would break when the - ;; external mysqld server is restarted. - #:mappings (list (file-system-mapping - (source genotype-files) - (target source)) - (file-system-mapping - (source "/run/mysqld") - (target source) - (writable? #t)) - (file-system-mapping - (source gn2-secrets) - (target source) - (writable? #t))) - #:namespaces (delq 'net %namespaces)) - "127.0.0.1" #$(number->string gn2-port)) - #:user "genenetwork" - #:group "genenetwork" - #:log-file "/var/log/cd/genenetwork2.log")) + (modules '((guix search-paths) + (ice-9 match) + (srfi srfi-1))) + (start + (let* ((gn2-manifest (packages->manifest (list genenetwork2))) + (gn2-profile (profile + (content gn2-manifest) + (allow-collisions? #t))) + (gn2-settings + `(("SERVER_PORT" ,(number->string gn2-port)) + ("GENENETWORK_FILES" ,genotype-files) + ("HOME" "/tmp") + ("LC_ALL" "en_US.UTF-8") + ("NO_REDIS" "no-redis") + ("RUST_BACKTRACE" "1")))) + (with-imported-modules (source-module-closure '((guix search-paths))) + #~(make-forkexec-constructor + (list #$(least-authority-wrapper + (program-file "genenetwork2" + (genenetwork2-cd-gexp config)) + #:name "genenetwork2-pola-wrapper" + #:preserved-environment-variables + (append '("REQUESTS_CA_BUNDLE") + (map first gn2-settings) + (map search-path-specification-variable + (manifest-search-paths gn2-manifest))) + ;; If we mapped only the mysqld.sock + ;; socket file, it would break when the + ;; external mysqld server is restarted. + #:mappings (list (file-system-mapping + (source genotype-files) + (target source)) + (file-system-mapping + (source "/run/mysqld") + (target source) + (writable? #t)) + ;; XXXX: FIXME: R/Qtl generates + ;; files in "/tmp" and + ;; "/tmp/gn2". These files are + ;; accessed by gn3 for R/Qtl + ;; mapping + (file-system-mapping + (source "/tmp") + (target source) + (writable? #t)) + (file-system-mapping + (source gn2-secrets) + (target source) + (writable? #t))) + #:namespaces (delq 'net %namespaces)) + "127.0.0.1" #$(number->string gn2-port)) + #:user "genenetwork" + #:group "genenetwork" + #:environment-variables + (append + '("REQUESTS_CA_BUNDLE=" + #$(file-append gn2-profile "/etc/ssl/certs/ca-certificates.crt")) + (map (match-lambda + ((spec . value) + (string-append (search-path-specification-variable spec) + "=" + value))) + (evaluate-search-paths + (map sexp->search-path-specification + '#$(map search-path-specification->sexp + (manifest-search-paths gn2-manifest))) + (list #$gn2-profile))) + (map (match-lambda + ((spec value) + (string-append spec "=" value))) + '#$gn2-settings)) + #:log-file "/var/log/cd/genenetwork2.log")))) (stop #~(make-kill-destructor))) (shepherd-service (documentation "Run GeneNetwork 3 development server.") @@ -627,6 +840,19 @@ described by CONFIG, a <genenetwork-configuration> object." (target source) (writable? #t)) (file-system-mapping + (source lmdb-data-path) + (target source) + (writable? #t)) + ;; XXXX: FIXME: R/Qtl generates + ;; files in "/tmp" and + ;; "/tmp/gn2". These files are + ;; accessed by gn3 for R/Qtl + ;; mapping + (file-system-mapping + (source "/tmp") + (target source) + (writable? #t)) + (file-system-mapping (source data-directory) (target source)) (file-system-mapping @@ -711,6 +937,8 @@ described by CONFIG, a <genenetwork-configuration> object." (cons* #$gn3-secrets (append (find-files #$gn2-secrets #:directories? #t) + (find-files "/export/data/gn-docs" + #:directories? #t) (find-files #$(dirname auth-db-path) #:directories? #t) (find-files #$gn-auth-secrets @@ -744,10 +972,6 @@ described by CONFIG, a <genenetwork-configuration> object." ;;; transform-genenetwork-database ;;; -;; Path to genenetwork database dump export directory that has lots of -;; free space -(define %transform-genenetwork-database-export-directory - "/export/genenetwork-database-dump") ;; Unreleased version of ccwl that is required by ;; transform-genenetwork-database for its graphql library. @@ -804,57 +1028,54 @@ described by CONFIG, a <genenetwork-configuration> object." (description "run64 is a SRFI-64 test runner for Scheme.") (license license:gpl3+))) -(define (transform-genenetwork-database project) +;; Connection settings for Virtuoso and MySQL used to load data into Virtuoso +(define %connection-settings + "/etc/genenetwork/conf/gn-transform-database/conn.scm") + +;; Path to where the data directory from which virtuoso loads all the files +(define %virtuoso-data-dir "/var/lib/data") + +(define (transform-genenetwork-database-gexp connection-settings virtuoso-data-dir repository) (with-imported-modules '((guix build utils)) (with-packages (list ccwl git-minimal gnu-make guile-3.0 guile-dbd-mysql guile-dbi guile-hashing guile-libyaml guile-sparql - guile-zlib nss-certs virtuoso-ose) - #~(begin - (use-modules (guix build utils) - (srfi srfi-26) - (ice-9 threads)) - - (invoke "git" "clone" - "--depth" "1" - #$(forge-project-repository project) - ".") - (invoke "make" "-j" (number->string (current-processor-count))) - (let ((connection-settings-file #$(string-append %transform-genenetwork-database-export-directory - "/conn.scm")) - (dump-directory #$(string-append %transform-genenetwork-database-export-directory - "/dump"))) - (when (file-exists? dump-directory) - (delete-file-recursively dump-directory)) - (mkdir-p dump-directory) - ;; Dump data to RDF. - (invoke "./pre-inst-env" "./dump.scm" - connection-settings-file - dump-directory) - ;; Validate dumped RDF, sending the error output to - ;; oblivion because we don't want to print out potentially - ;; sensitive data. - (with-error-to-file "/dev/null" - (cut invoke - #$(file-append raptor2 "/bin/rapper") - "--input" "turtle" - "--count" - (string-append dump-directory "/dump.ttl"))) - ;; Load RDF into virtuoso. - (invoke "./pre-inst-env" "./load-rdf.scm" - connection-settings-file - (string-append dump-directory "/dump.ttl")) - ;; Visualize schema and archive results. - (invoke "./pre-inst-env" "./visualize-schema.scm" - connection-settings-file) - (invoke #$(file-append graphviz "/bin/dot") - "-Tsvg" "sql.dot" (string-append "-o" (getenv "ARCHIVE") "/sql.svg")) - (invoke #$(file-append graphviz "/bin/dot") - "-Tsvg" "rdf.dot" (string-append "-o" (getenv "ARCHIVE") "/rdf.svg"))))))) + guile-zlib nss-certs virtuoso-ose raptor2) + #~(begin + (use-modules (guix build utils) + (srfi srfi-26) + (ice-9 threads)) + (setenv "LC_ALL" "en_US.UTF-8") + (let ((build-directory (string-append #$virtuoso-data-dir + "/build"))) + ;; Only run this job if the build directory does not + ;; exists. This ensures that no other process is + ;; running this. + (unless (file-exists? build-directory) + (invoke "git" "clone" "--depth" "1" #$repository ".") + (invoke "make" "-j" (number->string (current-processor-count))) + (invoke "./generate-ttl-files.scm" "--settings" + #$connection-settings "--output" build-directory) + ;; First clear all the files in our virtuoso directory + (for-each (lambda (file) + (unless (string-suffix? "build" (dirname file)) + (delete-file file))) + (find-files #$virtuoso-data-dir ".ttl")) + ;; Move data into the container's virtuoso data directory + (copy-recursively build-directory #$virtuoso-data-dir) + ;; Load RDF into virtuoso. + (invoke "./pre-inst-env" "./load-rdf.scm" #$connection-settings) + ;; Visualize schema and archive results. + (invoke "./pre-inst-env" "./visualize-schema.scm" #$connection-settings) + (invoke #$(file-append graphviz "/bin/dot") + "-Tsvg" "sql.dot" (string-append "-o" (getenv "ARCHIVE") "/sql.svg")) + (invoke #$(file-append graphviz "/bin/dot") + "-Tsvg" "rdf.dot" (string-append "-o" (getenv "ARCHIVE") "/rdf.svg")) + (delete-file-recursively build-directory))))))) (define transform-genenetwork-database-project (forge-project (name "transform-genenetwork-database") - (repository "/home/git/public/gn-transform-databases/") + (repository "/home/git/public/gn-transform-databases") (ci-jobs (list (forge-laminar-job (name "transform-genenetwork-database-tests") (run (guix-channel-job-gexp @@ -866,7 +1087,10 @@ described by CONFIG, a <genenetwork-configuration> object." #:guix-daemon-uri %guix-daemon-uri))) (forge-laminar-job (name "transform-genenetwork-database") - (run (transform-genenetwork-database this-forge-project))))))) + (run (transform-genenetwork-database-gexp + %connection-settings + %virtuoso-data-dir + "https://git.genenetwork.org/gn-transform-databases"))))))) ;;; @@ -884,6 +1108,7 @@ described by CONFIG, a <genenetwork-configuration> object." #~(begin (use-modules (guix build utils)) + (setenv "LC_ALL" "en_US.UTF-8") (invoke #$(file-append tissue "/bin/tissue") "pull" "issues.genenetwork.org")))))))) (ci-jobs-trigger 'webhook))) @@ -1137,6 +1362,20 @@ gn-auth." ";") "proxy_set_header Host $host;"))))))) +(define set-build-directory-permissions-gexp + (with-imported-modules '((guix build utils)) + #~(begin + (use-modules (guix build utils)) + + (for-each (lambda (file) + (chown file + (passwd:uid (getpw "laminar")) + (passwd:gid (getpw "laminar")))) + (append (find-files #$%xapian-directory + #:directories? #t) + (find-files #$%virtuoso-data-dir + #:directories? #t)))))) + ;; Port on which webhook is listening (define %webhook-port 9091) ;; Port on which genenetwork2 is listening @@ -1148,6 +1387,84 @@ gn-auth." ;; Port on which virtuoso's SPARQL endpoint is listening (define %virtuoso-sparql-port 9082) + +;; KLUDGE: There's a bug in shepherd with syslogd that has since been fixed in +;; shepherd 1.0.5. See: +;; https://lists.gnu.org/archive/html/emacs-bug-tracker/2025-03/msg00231.html +;; We can't immediately upgrade to shepherd 1.0.5 since it bumps up Python to +;; 3.11. Delete this after upgrading shepherd. +(define-record-type* <redis-configuration> + redis-configuration make-redis-configuration + redis-configuration? + (redis redis-configuration-redis ;file-like + (default redis)) + (bind redis-configuration-bind + (default "127.0.0.1")) + (port redis-configuration-port + (default 6379)) + (working-directory redis-configuration-working-directory + (default "/var/lib/redis")) + (config-file redis-configuration-config-file + (default #f))) + +(define (default-redis.conf bind port working-directory) + (mixed-text-file "redis.conf" + "bind " bind "\n" + "port " (number->string port) "\n" + "dir " working-directory "\n" + "daemonize no\n")) + +(define %redis-accounts + (list (user-group (name "redis") (system? #t)) + (user-account + (name "redis") + (group "redis") + (system? #t) + (comment "Redis server user") + (home-directory "/var/empty") + (shell (file-append shadow "/sbin/nologin"))))) + +(define redis-activation + (match-lambda + (($ <redis-configuration> redis bind port working-directory config-file) + #~(begin + (use-modules (guix build utils) + (ice-9 match)) + (let ((user (getpwnam "redis"))) + (mkdir-p #$working-directory) + (chown #$working-directory (passwd:uid user) (passwd:gid user))))))) + +(define redis-shepherd-service + (match-lambda + (($ <redis-configuration> redis bind port working-directory config-file) + (let ((config-file + (or config-file + (default-redis.conf bind port working-directory)))) + (list (shepherd-service + (provision '(redis)) + (documentation "Run the Redis daemon.") + (requirement '(user-processes)) ; Removed syslogd + (actions (list (shepherd-configuration-action config-file))) + (start #~(make-forkexec-constructor + '(#$(file-append redis "/bin/redis-server") + #$config-file) + #:user "redis" + #:group "redis")) + (stop #~(make-kill-destructor)))))))) + +(define custom-redis-service-type + (service-type + (name 'custom-redis) + (extensions + (list (service-extension shepherd-root-service-type + redis-shepherd-service) + (service-extension activation-service-type + redis-activation) + (service-extension account-service-type + (const %redis-accounts)))) + (default-value (redis-configuration)) + (description "Run a customized Redis daemon without syslogd dependency."))) + (operating-system (host-name "genenetwork-development") (timezone "UTC") @@ -1157,13 +1474,14 @@ gn-auth." (targets (list "/dev/sdX")))) (file-systems %base-file-systems) (users %base-user-accounts) - (packages %base-packages) + (packages (cons* curl coreutils-minimal %base-packages)) (sudoers-file (mixed-text-file "sudoers" "@include " %sudoers-specification ;; Permit the laminar user to restart genenetwork2 ;; and genenetwork3. "\nlaminar ALL = NOPASSWD: " + (file-append shepherd "/bin/herd") " restart gn-guile, " (file-append shepherd "/bin/herd") " restart genenetwork2, " (file-append shepherd "/bin/herd") " start genenetwork3, " (file-append shepherd "/bin/herd") " stop genenetwork3, " @@ -1194,6 +1512,15 @@ gn-auth." (jobs (list #~(job '(next-hour) #$(program-file "build-xapian-index-cron" build-xapian-index-cron-gexp) + #:user "laminar") + ;; Run cron once a week at midnight on Sunday morning + ;; Verify using: https://crontab.guru/#0_0_*_*_0 + #~(job "0 0 * * 0" + #$(program-file "update-virtuoso" + (transform-genenetwork-database-gexp + %connection-settings + %virtuoso-data-dir + "https://git.genenetwork.org/gn-transform-databases")) #:user "laminar"))))) (simple-service 'install-laminar-template activation-service-type @@ -1206,13 +1533,13 @@ gn-auth." (socket (forge-ip-socket (ip "127.0.0.1") (port %webhook-port))))) - (service redis-service-type) + (service custom-redis-service-type) (service virtuoso-service-type (virtuoso-configuration (number-of-buffers 4000000) (maximum-dirty-buffers 3000000) (server-port 9081) - (dirs-allowed "/var/lib/data") + (dirs-allowed (list "/var/lib/data")) (http-server-port %virtuoso-sparql-port))) (service genenetwork-service-type (genenetwork-configuration @@ -1230,18 +1557,7 @@ gn-auth." (xapian-db-path %xapian-directory))) (simple-service 'set-build-directory-permissions activation-service-type - (with-imported-modules '((guix build utils)) - #~(begin - (use-modules (guix build utils)) - - (for-each (lambda (file) - (chown file - (passwd:uid (getpw "laminar")) - (passwd:gid (getpw "laminar")))) - (append (find-files #$%xapian-directory - #:directories? #t) - (find-files #$%transform-genenetwork-database-export-directory - #:directories? #t)))))) + set-build-directory-permissions-gexp) (service tissue-service-type (tissue-configuration (socket @@ -1250,8 +1566,12 @@ gn-auth." (hosts (list (tissue-host (name "issues.genenetwork.org") - (user "laminar") - (upstream-repository "https://github.com/genenetwork/gn-gemtext-threads")))))) + (projects (list (tissue-project + (name "issues.genenetwork.org") + (user "laminar") + (base-path "/") + (upstream-repository + "https://github.com/genenetwork/gn-gemtext-threads"))))))))) (service forge-nginx-service-type (forge-nginx-configuration (http-listen (forge-ip-socket @@ -1265,7 +1585,8 @@ gn-auth." %genenetwork2-port %genenetwork3-port) (laminar-reverse-proxy-server-block "localhost:9089" %webhook-port - (list 'gn-bioinformatics)) + (list 'gn-bioinformatics + 'guix-bioinformatics)) (tissue-reverse-proxy-server-block) (gn-auth-reverse-proxy-server-block))))) (service acme-service-type |