aboutsummaryrefslogtreecommitdiff
path: root/genenetwork-development.scm
diff options
context:
space:
mode:
Diffstat (limited to 'genenetwork-development.scm')
-rw-r--r--genenetwork-development.scm133
1 files changed, 72 insertions, 61 deletions
diff --git a/genenetwork-development.scm b/genenetwork-development.scm
index 5fafc12..5894cef 100644
--- a/genenetwork-development.scm
+++ b/genenetwork-development.scm
@@ -471,6 +471,7 @@ server described by CONFIG, a <genenetwork-configuration> object."
"GN2_SETTINGS"
#$(mixed-text-file "gn2.conf"
"GN2_SECRETS=\"" gn2-secrets "/gn2-secrets.py\"\n"
+ "AI_SEARCH_ENABLED=True\n"
"GN3_LOCAL_URL=\""
(string-append "http://localhost:"
(number->string gn3-port))
@@ -516,6 +517,7 @@ server described by CONFIG, a <genenetwork-configuration> object."
#$(mixed-text-file "gn3.conf"
"SPARQL_ENDPOINT=\"" sparql-endpoint "\"\n"
"DATA_DIR=\"" data-directory "\"\n"
+ "AUTH_SERVER_URL=\"https://auth-cd.genenetwork.org/\"\n"
"XAPIAN_DB_PATH=\"" xapian-db-path "\"\n"
"AUTH_DB=\"" auth-db-path "\"\n"
"LLM_DB_PATH=\"" llm-db-path "\"\n"))
@@ -744,10 +746,6 @@ described by CONFIG, a <genenetwork-configuration> object."
;;; transform-genenetwork-database
;;;
-;; Path to genenetwork database dump export directory that has lots of
-;; free space
-(define %transform-genenetwork-database-export-directory
- "/export/genenetwork-database-dump")
;; Unreleased version of ccwl that is required by
;; transform-genenetwork-database for its graphql library.
@@ -804,57 +802,53 @@ described by CONFIG, a <genenetwork-configuration> object."
(description "run64 is a SRFI-64 test runner for Scheme.")
(license license:gpl3+)))
-(define (transform-genenetwork-database project)
+;; Connection settings for Virtuoso and MySQL used to load data into Virtuoso
+(define %connection-settings
+ "/etc/genenetwork/conf/gn-transform-database/conn.scm")
+
+;; Path to where the data directory from which virtuoso loads all the files
+(define %virtuoso-data-dir "/var/lib/data")
+
+(define (transform-genenetwork-database-gexp connection-settings virtuoso-data-dir repository)
(with-imported-modules '((guix build utils))
(with-packages (list ccwl git-minimal gnu-make guile-3.0 guile-dbd-mysql
guile-dbi guile-hashing guile-libyaml guile-sparql
- guile-zlib nss-certs virtuoso-ose)
- #~(begin
- (use-modules (guix build utils)
- (srfi srfi-26)
- (ice-9 threads))
-
- (invoke "git" "clone"
- "--depth" "1"
- #$(forge-project-repository project)
- ".")
- (invoke "make" "-j" (number->string (current-processor-count)))
- (let ((connection-settings-file #$(string-append %transform-genenetwork-database-export-directory
- "/conn.scm"))
- (dump-directory #$(string-append %transform-genenetwork-database-export-directory
- "/dump")))
- (when (file-exists? dump-directory)
- (delete-file-recursively dump-directory))
- (mkdir-p dump-directory)
- ;; Dump data to RDF.
- (invoke "./pre-inst-env" "./dump.scm"
- connection-settings-file
- dump-directory)
- ;; Validate dumped RDF, sending the error output to
- ;; oblivion because we don't want to print out potentially
- ;; sensitive data.
- (with-error-to-file "/dev/null"
- (cut invoke
- #$(file-append raptor2 "/bin/rapper")
- "--input" "turtle"
- "--count"
- (string-append dump-directory "/dump.ttl")))
- ;; Load RDF into virtuoso.
- (invoke "./pre-inst-env" "./load-rdf.scm"
- connection-settings-file
- (string-append dump-directory "/dump.ttl"))
- ;; Visualize schema and archive results.
- (invoke "./pre-inst-env" "./visualize-schema.scm"
- connection-settings-file)
- (invoke #$(file-append graphviz "/bin/dot")
- "-Tsvg" "sql.dot" (string-append "-o" (getenv "ARCHIVE") "/sql.svg"))
- (invoke #$(file-append graphviz "/bin/dot")
- "-Tsvg" "rdf.dot" (string-append "-o" (getenv "ARCHIVE") "/rdf.svg")))))))
+ guile-zlib nss-certs virtuoso-ose raptor2)
+ #~(begin
+ (use-modules (guix build utils)
+ (srfi srfi-26)
+ (ice-9 threads))
+ (setenv "LC_ALL" "en_US.UTF-8")
+ (let ((build-directory (string-append #$virtuoso-data-dir
+ "/build")))
+ ;; Only run this job if the build directory does not
+ ;; exists. This ensures that no other process is
+ ;; running this.
+ (unless (file-exists? build-directory)
+ (invoke "git" "clone" "--depth" "1" #$repository ".")
+ (invoke "make" "-j" (number->string (current-processor-count)))
+ (invoke "./generate-ttl-files.scm" "--settings"
+ #$connection-settings "--output" build-directory)
+ ;; First clear all the files in our virtuoso directory
+ (for-each (lambda (file)
+ (delete-file file))
+ (find-files #$virtuoso-data-dir ".ttl"))
+ ;; Move data into the container's virtuoso data directory
+ (copy-recursively build-directory #$virtuoso-data-dir)
+ ;; Load RDF into virtuoso.
+ (invoke "./pre-inst-env" "./load-rdf.scm" #$connection-settings)
+ ;; Visualize schema and archive results.
+ (invoke "./pre-inst-env" "./visualize-schema.scm" #$connection-settings)
+ (invoke #$(file-append graphviz "/bin/dot")
+ "-Tsvg" "sql.dot" (string-append "-o" (getenv "ARCHIVE") "/sql.svg"))
+ (invoke #$(file-append graphviz "/bin/dot")
+ "-Tsvg" "rdf.dot" (string-append "-o" (getenv "ARCHIVE") "/rdf.svg"))
+ (delete-file-recursively build-directory)))))))
(define transform-genenetwork-database-project
(forge-project
(name "transform-genenetwork-database")
- (repository "/home/git/public/gn-transform-databases/")
+ (repository "https://git.genenetwork.org/gn-transform-databases")
(ci-jobs (list (forge-laminar-job
(name "transform-genenetwork-database-tests")
(run (guix-channel-job-gexp
@@ -866,7 +860,11 @@ described by CONFIG, a <genenetwork-configuration> object."
#:guix-daemon-uri %guix-daemon-uri)))
(forge-laminar-job
(name "transform-genenetwork-database")
- (run (transform-genenetwork-database this-forge-project)))))))
+ (run (transform-genenetwork-database-gexp
+ %connection-settings
+ %virtuoso-data-dir
+ "https://git.genenetwork.org/gn-transform-databases")))))
+ (ci-jobs-trigger 'webhook)))
;;;
@@ -884,6 +882,7 @@ described by CONFIG, a <genenetwork-configuration> object."
#~(begin
(use-modules (guix build utils))
+ (setenv "LC_ALL" "en_US.UTF-8")
(invoke #$(file-append tissue "/bin/tissue")
"pull" "issues.genenetwork.org"))))))))
(ci-jobs-trigger 'webhook)))
@@ -1137,6 +1136,20 @@ gn-auth."
";")
"proxy_set_header Host $host;")))))))
+(define set-build-directory-permissions-gexp
+ (with-imported-modules '((guix build utils))
+ #~(begin
+ (use-modules (guix build utils))
+
+ (for-each (lambda (file)
+ (chown file
+ (passwd:uid (getpw "laminar"))
+ (passwd:gid (getpw "laminar"))))
+ (append (find-files #$%xapian-directory
+ #:directories? #t)
+ (find-files #$%virtuoso-data-dir
+ #:directories? #t))))))
+
;; Port on which webhook is listening
(define %webhook-port 9091)
;; Port on which genenetwork2 is listening
@@ -1194,6 +1207,15 @@ gn-auth."
(jobs (list #~(job '(next-hour)
#$(program-file "build-xapian-index-cron"
build-xapian-index-cron-gexp)
+ #:user "laminar")
+ ;; Run cron once a week at midnight on Sunday morning
+ ;; Verify using: https://crontab.guru/#0_0_*_*_0
+ #~(job "0 0 * * 0"
+ #$(program-file "update-virtuoso"
+ (transform-genenetwork-database-gexp
+ %connection-settings
+ %virtuoso-data-dir
+ "https://git.genenetwork.org/gn-transform-databases"))
#:user "laminar")))))
(simple-service 'install-laminar-template
activation-service-type
@@ -1230,18 +1252,7 @@ gn-auth."
(xapian-db-path %xapian-directory)))
(simple-service 'set-build-directory-permissions
activation-service-type
- (with-imported-modules '((guix build utils))
- #~(begin
- (use-modules (guix build utils))
-
- (for-each (lambda (file)
- (chown file
- (passwd:uid (getpw "laminar"))
- (passwd:gid (getpw "laminar"))))
- (append (find-files #$%xapian-directory
- #:directories? #t)
- (find-files #$%transform-genenetwork-database-export-directory
- #:directories? #t))))))
+ set-build-directory-permissions-gexp)
(service tissue-service-type
(tissue-configuration
(socket