diff options
Diffstat (limited to 'genenetwork-development.scm')
-rw-r--r-- | genenetwork-development.scm | 133 |
1 files changed, 72 insertions, 61 deletions
diff --git a/genenetwork-development.scm b/genenetwork-development.scm index 5fafc12..5894cef 100644 --- a/genenetwork-development.scm +++ b/genenetwork-development.scm @@ -471,6 +471,7 @@ server described by CONFIG, a <genenetwork-configuration> object." "GN2_SETTINGS" #$(mixed-text-file "gn2.conf" "GN2_SECRETS=\"" gn2-secrets "/gn2-secrets.py\"\n" + "AI_SEARCH_ENABLED=True\n" "GN3_LOCAL_URL=\"" (string-append "http://localhost:" (number->string gn3-port)) @@ -516,6 +517,7 @@ server described by CONFIG, a <genenetwork-configuration> object." #$(mixed-text-file "gn3.conf" "SPARQL_ENDPOINT=\"" sparql-endpoint "\"\n" "DATA_DIR=\"" data-directory "\"\n" + "AUTH_SERVER_URL=\"https://auth-cd.genenetwork.org/\"\n" "XAPIAN_DB_PATH=\"" xapian-db-path "\"\n" "AUTH_DB=\"" auth-db-path "\"\n" "LLM_DB_PATH=\"" llm-db-path "\"\n")) @@ -744,10 +746,6 @@ described by CONFIG, a <genenetwork-configuration> object." ;;; transform-genenetwork-database ;;; -;; Path to genenetwork database dump export directory that has lots of -;; free space -(define %transform-genenetwork-database-export-directory - "/export/genenetwork-database-dump") ;; Unreleased version of ccwl that is required by ;; transform-genenetwork-database for its graphql library. @@ -804,57 +802,53 @@ described by CONFIG, a <genenetwork-configuration> object." (description "run64 is a SRFI-64 test runner for Scheme.") (license license:gpl3+))) -(define (transform-genenetwork-database project) +;; Connection settings for Virtuoso and MySQL used to load data into Virtuoso +(define %connection-settings + "/etc/genenetwork/conf/gn-transform-database/conn.scm") + +;; Path to where the data directory from which virtuoso loads all the files +(define %virtuoso-data-dir "/var/lib/data") + +(define (transform-genenetwork-database-gexp connection-settings virtuoso-data-dir repository) (with-imported-modules '((guix build utils)) (with-packages (list ccwl git-minimal gnu-make guile-3.0 guile-dbd-mysql guile-dbi guile-hashing guile-libyaml guile-sparql - guile-zlib nss-certs virtuoso-ose) - #~(begin - (use-modules (guix build utils) - (srfi srfi-26) - (ice-9 threads)) - - (invoke "git" "clone" - "--depth" "1" - #$(forge-project-repository project) - ".") - (invoke "make" "-j" (number->string (current-processor-count))) - (let ((connection-settings-file #$(string-append %transform-genenetwork-database-export-directory - "/conn.scm")) - (dump-directory #$(string-append %transform-genenetwork-database-export-directory - "/dump"))) - (when (file-exists? dump-directory) - (delete-file-recursively dump-directory)) - (mkdir-p dump-directory) - ;; Dump data to RDF. - (invoke "./pre-inst-env" "./dump.scm" - connection-settings-file - dump-directory) - ;; Validate dumped RDF, sending the error output to - ;; oblivion because we don't want to print out potentially - ;; sensitive data. - (with-error-to-file "/dev/null" - (cut invoke - #$(file-append raptor2 "/bin/rapper") - "--input" "turtle" - "--count" - (string-append dump-directory "/dump.ttl"))) - ;; Load RDF into virtuoso. - (invoke "./pre-inst-env" "./load-rdf.scm" - connection-settings-file - (string-append dump-directory "/dump.ttl")) - ;; Visualize schema and archive results. - (invoke "./pre-inst-env" "./visualize-schema.scm" - connection-settings-file) - (invoke #$(file-append graphviz "/bin/dot") - "-Tsvg" "sql.dot" (string-append "-o" (getenv "ARCHIVE") "/sql.svg")) - (invoke #$(file-append graphviz "/bin/dot") - "-Tsvg" "rdf.dot" (string-append "-o" (getenv "ARCHIVE") "/rdf.svg"))))))) + guile-zlib nss-certs virtuoso-ose raptor2) + #~(begin + (use-modules (guix build utils) + (srfi srfi-26) + (ice-9 threads)) + (setenv "LC_ALL" "en_US.UTF-8") + (let ((build-directory (string-append #$virtuoso-data-dir + "/build"))) + ;; Only run this job if the build directory does not + ;; exists. This ensures that no other process is + ;; running this. + (unless (file-exists? build-directory) + (invoke "git" "clone" "--depth" "1" #$repository ".") + (invoke "make" "-j" (number->string (current-processor-count))) + (invoke "./generate-ttl-files.scm" "--settings" + #$connection-settings "--output" build-directory) + ;; First clear all the files in our virtuoso directory + (for-each (lambda (file) + (delete-file file)) + (find-files #$virtuoso-data-dir ".ttl")) + ;; Move data into the container's virtuoso data directory + (copy-recursively build-directory #$virtuoso-data-dir) + ;; Load RDF into virtuoso. + (invoke "./pre-inst-env" "./load-rdf.scm" #$connection-settings) + ;; Visualize schema and archive results. + (invoke "./pre-inst-env" "./visualize-schema.scm" #$connection-settings) + (invoke #$(file-append graphviz "/bin/dot") + "-Tsvg" "sql.dot" (string-append "-o" (getenv "ARCHIVE") "/sql.svg")) + (invoke #$(file-append graphviz "/bin/dot") + "-Tsvg" "rdf.dot" (string-append "-o" (getenv "ARCHIVE") "/rdf.svg")) + (delete-file-recursively build-directory))))))) (define transform-genenetwork-database-project (forge-project (name "transform-genenetwork-database") - (repository "/home/git/public/gn-transform-databases/") + (repository "https://git.genenetwork.org/gn-transform-databases") (ci-jobs (list (forge-laminar-job (name "transform-genenetwork-database-tests") (run (guix-channel-job-gexp @@ -866,7 +860,11 @@ described by CONFIG, a <genenetwork-configuration> object." #:guix-daemon-uri %guix-daemon-uri))) (forge-laminar-job (name "transform-genenetwork-database") - (run (transform-genenetwork-database this-forge-project))))))) + (run (transform-genenetwork-database-gexp + %connection-settings + %virtuoso-data-dir + "https://git.genenetwork.org/gn-transform-databases"))))) + (ci-jobs-trigger 'webhook))) ;;; @@ -884,6 +882,7 @@ described by CONFIG, a <genenetwork-configuration> object." #~(begin (use-modules (guix build utils)) + (setenv "LC_ALL" "en_US.UTF-8") (invoke #$(file-append tissue "/bin/tissue") "pull" "issues.genenetwork.org")))))))) (ci-jobs-trigger 'webhook))) @@ -1137,6 +1136,20 @@ gn-auth." ";") "proxy_set_header Host $host;"))))))) +(define set-build-directory-permissions-gexp + (with-imported-modules '((guix build utils)) + #~(begin + (use-modules (guix build utils)) + + (for-each (lambda (file) + (chown file + (passwd:uid (getpw "laminar")) + (passwd:gid (getpw "laminar")))) + (append (find-files #$%xapian-directory + #:directories? #t) + (find-files #$%virtuoso-data-dir + #:directories? #t)))))) + ;; Port on which webhook is listening (define %webhook-port 9091) ;; Port on which genenetwork2 is listening @@ -1194,6 +1207,15 @@ gn-auth." (jobs (list #~(job '(next-hour) #$(program-file "build-xapian-index-cron" build-xapian-index-cron-gexp) + #:user "laminar") + ;; Run cron once a week at midnight on Sunday morning + ;; Verify using: https://crontab.guru/#0_0_*_*_0 + #~(job "0 0 * * 0" + #$(program-file "update-virtuoso" + (transform-genenetwork-database-gexp + %connection-settings + %virtuoso-data-dir + "https://git.genenetwork.org/gn-transform-databases")) #:user "laminar"))))) (simple-service 'install-laminar-template activation-service-type @@ -1230,18 +1252,7 @@ gn-auth." (xapian-db-path %xapian-directory))) (simple-service 'set-build-directory-permissions activation-service-type - (with-imported-modules '((guix build utils)) - #~(begin - (use-modules (guix build utils)) - - (for-each (lambda (file) - (chown file - (passwd:uid (getpw "laminar")) - (passwd:gid (getpw "laminar")))) - (append (find-files #$%xapian-directory - #:directories? #t) - (find-files #$%transform-genenetwork-database-export-directory - #:directories? #t)))))) + set-build-directory-permissions-gexp) (service tissue-service-type (tissue-configuration (socket |