From 7478f59d12705043439c9428777e969780be6bbf Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Mon, 28 Oct 2024 09:54:49 +0300 Subject: Refactor ttl and data-loading into Virtuoso in ci job. * genenetwork-development.scm (transform-genenetwork-database): Use a script to generate and validate ttl files. (%connection-settings): New variable. Contains the sparql and mysql login credentials. (%virtuoso-data-dir): New variable. (transform-genenetwork-database-project): Update how the transform-genenetwork-database gexp is called. Add web hook. --- genenetwork-development.scm | 90 ++++++++++++++++++++++----------------------- 1 file changed, 45 insertions(+), 45 deletions(-) diff --git a/genenetwork-development.scm b/genenetwork-development.scm index 6f3cfa4..1e098ef 100644 --- a/genenetwork-development.scm +++ b/genenetwork-development.scm @@ -806,57 +806,53 @@ described by CONFIG, a object." (description "run64 is a SRFI-64 test runner for Scheme.") (license license:gpl3+))) -(define (transform-genenetwork-database project) +;; Connection settings for Virtuoso and MySQL used to load data into Virtuoso +(define %connection-settings + "/etc/genenetwork/conf/gn-transform-database/conn.scm") + +;; Path to where the data directory from which virtuoso loads all the files +(define %virtuoso-data-dir "/var/lib/data") + +(define (transform-genenetwork-database connection-settings virtuoso-data-dir project) (with-imported-modules '((guix build utils)) (with-packages (list ccwl git-minimal gnu-make guile-3.0 guile-dbd-mysql guile-dbi guile-hashing guile-libyaml guile-sparql - guile-zlib nss-certs virtuoso-ose) - #~(begin - (use-modules (guix build utils) - (srfi srfi-26) - (ice-9 threads)) - - (invoke "git" "clone" - "--depth" "1" - #$(forge-project-repository project) - ".") - (invoke "make" "-j" (number->string (current-processor-count))) - (let ((connection-settings-file #$(string-append %transform-genenetwork-database-export-directory - "/conn.scm")) - (dump-directory #$(string-append %transform-genenetwork-database-export-directory - "/dump"))) - (when (file-exists? dump-directory) - (delete-file-recursively dump-directory)) - (mkdir-p dump-directory) - ;; Dump data to RDF. - (invoke "./pre-inst-env" "./dump.scm" - connection-settings-file - dump-directory) - ;; Validate dumped RDF, sending the error output to - ;; oblivion because we don't want to print out potentially - ;; sensitive data. - (with-error-to-file "/dev/null" - (cut invoke - #$(file-append raptor2 "/bin/rapper") - "--input" "turtle" - "--count" - (string-append dump-directory "/dump.ttl"))) - ;; Load RDF into virtuoso. - (invoke "./pre-inst-env" "./load-rdf.scm" - connection-settings-file - (string-append dump-directory "/dump.ttl")) - ;; Visualize schema and archive results. - (invoke "./pre-inst-env" "./visualize-schema.scm" - connection-settings-file) - (invoke #$(file-append graphviz "/bin/dot") - "-Tsvg" "sql.dot" (string-append "-o" (getenv "ARCHIVE") "/sql.svg")) - (invoke #$(file-append graphviz "/bin/dot") - "-Tsvg" "rdf.dot" (string-append "-o" (getenv "ARCHIVE") "/rdf.svg"))))))) + guile-zlib nss-certs virtuoso-ose raptor2) + #~(begin + (use-modules (guix build utils) + (srfi srfi-26) + (ice-9 threads)) + (setenv "LC_ALL" "en_US.UTF-8") + (let ((build-directory (string-append #$virtuoso-data-dir + "/build"))) + ;; Only run this job if the build directory does not + ;; exists. This ensures that no other process is + ;; running this. + (unless (file-exists? build-directory) + (invoke "git" "clone" "--depth" "1" #$(forge-project-repository project) ".") + (invoke "make" "-j" (number->string (current-processor-count))) + (invoke "./generate-ttl-files.scm" "--settings" + #$connection-settings "--output" build-directory) + ;; First clear all the files in our virtuoso directory + (for-each (lambda (file) + (delete-file file)) + (find-files #$virtuoso-data-dir ".ttl")) + ;; Move data into the container's virtuoso data directory + (copy-recursively build-directory #$virtuoso-data-dir) + ;; Load RDF into virtuoso. + (invoke "./pre-inst-env" "./load-rdf.scm" #$connection-settings) + ;; Visualize schema and archive results. + (invoke "./pre-inst-env" "./visualize-schema.scm" #$connection-settings) + (invoke #$(file-append graphviz "/bin/dot") + "-Tsvg" "sql.dot" (string-append "-o" (getenv "ARCHIVE") "/sql.svg")) + (invoke #$(file-append graphviz "/bin/dot") + "-Tsvg" "rdf.dot" (string-append "-o" (getenv "ARCHIVE") "/rdf.svg")) + (delete-file-recursively build-directory))))))) (define transform-genenetwork-database-project (forge-project (name "transform-genenetwork-database") - (repository "/home/git/public/gn-transform-databases/") + (repository "https://git.genenetwork.org/gn-transform-databases") (ci-jobs (list (forge-laminar-job (name "transform-genenetwork-database-tests") (run (guix-channel-job-gexp @@ -868,7 +864,11 @@ described by CONFIG, a object." #:guix-daemon-uri %guix-daemon-uri))) (forge-laminar-job (name "transform-genenetwork-database") - (run (transform-genenetwork-database this-forge-project))))))) + (run (transform-genenetwork-database + %connection-settings + %virtuoso-data-dir + this-forge-project))))) + (ci-jobs-trigger 'webhook))) ;;; -- cgit v1.2.3