aboutsummaryrefslogtreecommitdiff
path: root/genenetwork-development.scm
diff options
context:
space:
mode:
authorMunyoki Kilyungi2024-10-28 09:54:49 +0300
committerMunyoki Kilyungi2024-11-12 11:45:16 +0300
commit7478f59d12705043439c9428777e969780be6bbf (patch)
tree46fb60a83494b68264d0712666b47dfdb6562b58 /genenetwork-development.scm
parent7306f1127df9d4193adfbfa51295615f13d32b55 (diff)
downloadgn-machines-7478f59d12705043439c9428777e969780be6bbf.tar.gz
Refactor ttl and data-loading into Virtuoso in ci job.
* genenetwork-development.scm (transform-genenetwork-database): Use a script to generate and validate ttl files. (%connection-settings): New variable. Contains the sparql and mysql login credentials. (%virtuoso-data-dir): New variable. (transform-genenetwork-database-project): Update how the transform-genenetwork-database gexp is called. Add web hook.
Diffstat (limited to 'genenetwork-development.scm')
-rw-r--r--genenetwork-development.scm90
1 files changed, 45 insertions, 45 deletions
diff --git a/genenetwork-development.scm b/genenetwork-development.scm
index 6f3cfa4..1e098ef 100644
--- a/genenetwork-development.scm
+++ b/genenetwork-development.scm
@@ -806,57 +806,53 @@ described by CONFIG, a <genenetwork-configuration> object."
(description "run64 is a SRFI-64 test runner for Scheme.")
(license license:gpl3+)))
-(define (transform-genenetwork-database project)
+;; Connection settings for Virtuoso and MySQL used to load data into Virtuoso
+(define %connection-settings
+ "/etc/genenetwork/conf/gn-transform-database/conn.scm")
+
+;; Path to where the data directory from which virtuoso loads all the files
+(define %virtuoso-data-dir "/var/lib/data")
+
+(define (transform-genenetwork-database connection-settings virtuoso-data-dir project)
(with-imported-modules '((guix build utils))
(with-packages (list ccwl git-minimal gnu-make guile-3.0 guile-dbd-mysql
guile-dbi guile-hashing guile-libyaml guile-sparql
- guile-zlib nss-certs virtuoso-ose)
- #~(begin
- (use-modules (guix build utils)
- (srfi srfi-26)
- (ice-9 threads))
-
- (invoke "git" "clone"
- "--depth" "1"
- #$(forge-project-repository project)
- ".")
- (invoke "make" "-j" (number->string (current-processor-count)))
- (let ((connection-settings-file #$(string-append %transform-genenetwork-database-export-directory
- "/conn.scm"))
- (dump-directory #$(string-append %transform-genenetwork-database-export-directory
- "/dump")))
- (when (file-exists? dump-directory)
- (delete-file-recursively dump-directory))
- (mkdir-p dump-directory)
- ;; Dump data to RDF.
- (invoke "./pre-inst-env" "./dump.scm"
- connection-settings-file
- dump-directory)
- ;; Validate dumped RDF, sending the error output to
- ;; oblivion because we don't want to print out potentially
- ;; sensitive data.
- (with-error-to-file "/dev/null"
- (cut invoke
- #$(file-append raptor2 "/bin/rapper")
- "--input" "turtle"
- "--count"
- (string-append dump-directory "/dump.ttl")))
- ;; Load RDF into virtuoso.
- (invoke "./pre-inst-env" "./load-rdf.scm"
- connection-settings-file
- (string-append dump-directory "/dump.ttl"))
- ;; Visualize schema and archive results.
- (invoke "./pre-inst-env" "./visualize-schema.scm"
- connection-settings-file)
- (invoke #$(file-append graphviz "/bin/dot")
- "-Tsvg" "sql.dot" (string-append "-o" (getenv "ARCHIVE") "/sql.svg"))
- (invoke #$(file-append graphviz "/bin/dot")
- "-Tsvg" "rdf.dot" (string-append "-o" (getenv "ARCHIVE") "/rdf.svg")))))))
+ guile-zlib nss-certs virtuoso-ose raptor2)
+ #~(begin
+ (use-modules (guix build utils)
+ (srfi srfi-26)
+ (ice-9 threads))
+ (setenv "LC_ALL" "en_US.UTF-8")
+ (let ((build-directory (string-append #$virtuoso-data-dir
+ "/build")))
+ ;; Only run this job if the build directory does not
+ ;; exists. This ensures that no other process is
+ ;; running this.
+ (unless (file-exists? build-directory)
+ (invoke "git" "clone" "--depth" "1" #$(forge-project-repository project) ".")
+ (invoke "make" "-j" (number->string (current-processor-count)))
+ (invoke "./generate-ttl-files.scm" "--settings"
+ #$connection-settings "--output" build-directory)
+ ;; First clear all the files in our virtuoso directory
+ (for-each (lambda (file)
+ (delete-file file))
+ (find-files #$virtuoso-data-dir ".ttl"))
+ ;; Move data into the container's virtuoso data directory
+ (copy-recursively build-directory #$virtuoso-data-dir)
+ ;; Load RDF into virtuoso.
+ (invoke "./pre-inst-env" "./load-rdf.scm" #$connection-settings)
+ ;; Visualize schema and archive results.
+ (invoke "./pre-inst-env" "./visualize-schema.scm" #$connection-settings)
+ (invoke #$(file-append graphviz "/bin/dot")
+ "-Tsvg" "sql.dot" (string-append "-o" (getenv "ARCHIVE") "/sql.svg"))
+ (invoke #$(file-append graphviz "/bin/dot")
+ "-Tsvg" "rdf.dot" (string-append "-o" (getenv "ARCHIVE") "/rdf.svg"))
+ (delete-file-recursively build-directory)))))))
(define transform-genenetwork-database-project
(forge-project
(name "transform-genenetwork-database")
- (repository "/home/git/public/gn-transform-databases/")
+ (repository "https://git.genenetwork.org/gn-transform-databases")
(ci-jobs (list (forge-laminar-job
(name "transform-genenetwork-database-tests")
(run (guix-channel-job-gexp
@@ -868,7 +864,11 @@ described by CONFIG, a <genenetwork-configuration> object."
#:guix-daemon-uri %guix-daemon-uri)))
(forge-laminar-job
(name "transform-genenetwork-database")
- (run (transform-genenetwork-database this-forge-project)))))))
+ (run (transform-genenetwork-database
+ %connection-settings
+ %virtuoso-data-dir
+ this-forge-project)))))
+ (ci-jobs-trigger 'webhook)))
;;;