about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2024-10-28 09:54:49 +0300
committerMunyoki Kilyungi2024-11-12 11:45:16 +0300
commit7478f59d12705043439c9428777e969780be6bbf (patch)
tree46fb60a83494b68264d0712666b47dfdb6562b58
parent7306f1127df9d4193adfbfa51295615f13d32b55 (diff)
downloadgn-machines-7478f59d12705043439c9428777e969780be6bbf.tar.gz
Refactor ttl and data-loading into Virtuoso in ci job.
* genenetwork-development.scm (transform-genenetwork-database): Use a script
to generate and validate ttl files.
(%connection-settings): New variable.  Contains the sparql and mysql
login credentials.
(%virtuoso-data-dir): New variable.
(transform-genenetwork-database-project): Update how the
transform-genenetwork-database gexp is called.  Add web hook.
-rw-r--r--genenetwork-development.scm90
1 files changed, 45 insertions, 45 deletions
diff --git a/genenetwork-development.scm b/genenetwork-development.scm
index 6f3cfa4..1e098ef 100644
--- a/genenetwork-development.scm
+++ b/genenetwork-development.scm
@@ -806,57 +806,53 @@ described by CONFIG, a <genenetwork-configuration> object."
     (description "run64 is a SRFI-64 test runner for Scheme.")
     (license license:gpl3+)))
 
-(define (transform-genenetwork-database project)
+;; Connection settings for Virtuoso and MySQL used to load data into Virtuoso
+(define %connection-settings
+  "/etc/genenetwork/conf/gn-transform-database/conn.scm")
+
+;; Path to where the data directory from which virtuoso loads all the files
+(define %virtuoso-data-dir "/var/lib/data")
+
+(define (transform-genenetwork-database connection-settings virtuoso-data-dir project)
   (with-imported-modules '((guix build utils))
     (with-packages (list ccwl git-minimal gnu-make guile-3.0 guile-dbd-mysql
                          guile-dbi guile-hashing guile-libyaml guile-sparql
-                         guile-zlib nss-certs virtuoso-ose)
-      #~(begin
-          (use-modules (guix build utils)
-                       (srfi srfi-26)
-                       (ice-9 threads))
-
-          (invoke "git" "clone"
-                  "--depth" "1"
-                  #$(forge-project-repository project)
-                  ".")
-          (invoke "make" "-j" (number->string (current-processor-count)))
-          (let ((connection-settings-file #$(string-append %transform-genenetwork-database-export-directory
-                                                           "/conn.scm"))
-                (dump-directory #$(string-append %transform-genenetwork-database-export-directory
-                                                 "/dump")))
-            (when (file-exists? dump-directory)
-              (delete-file-recursively dump-directory))
-            (mkdir-p dump-directory)
-            ;; Dump data to RDF.
-            (invoke "./pre-inst-env" "./dump.scm"
-                    connection-settings-file
-                    dump-directory)
-            ;; Validate dumped RDF, sending the error output to
-            ;; oblivion because we don't want to print out potentially
-            ;; sensitive data.
-            (with-error-to-file "/dev/null"
-              (cut invoke
-                   #$(file-append raptor2 "/bin/rapper")
-                   "--input" "turtle"
-                   "--count"
-                   (string-append dump-directory "/dump.ttl")))
-            ;; Load RDF into virtuoso.
-            (invoke "./pre-inst-env" "./load-rdf.scm"
-                    connection-settings-file
-                    (string-append dump-directory "/dump.ttl"))
-            ;; Visualize schema and archive results.
-            (invoke "./pre-inst-env" "./visualize-schema.scm"
-                    connection-settings-file)
-            (invoke #$(file-append graphviz "/bin/dot")
-                    "-Tsvg" "sql.dot" (string-append "-o" (getenv "ARCHIVE") "/sql.svg"))
-            (invoke #$(file-append graphviz "/bin/dot")
-                    "-Tsvg" "rdf.dot" (string-append "-o" (getenv "ARCHIVE") "/rdf.svg")))))))
+                         guile-zlib nss-certs virtuoso-ose raptor2)
+     #~(begin
+         (use-modules (guix build utils)
+                      (srfi srfi-26)
+                      (ice-9 threads))
+         (setenv "LC_ALL" "en_US.UTF-8")
+         (let ((build-directory (string-append #$virtuoso-data-dir
+                                               "/build")))
+           ;; Only run this job if the build directory does not
+           ;; exists.  This ensures that no other process is
+           ;; running this.
+           (unless (file-exists? build-directory)
+             (invoke "git" "clone" "--depth" "1" #$(forge-project-repository project) ".")
+             (invoke "make" "-j" (number->string (current-processor-count)))
+             (invoke "./generate-ttl-files.scm" "--settings"
+                     #$connection-settings "--output" build-directory)
+             ;; First clear all the files in our virtuoso directory
+             (for-each (lambda (file)
+                         (delete-file file))
+                       (find-files #$virtuoso-data-dir ".ttl"))
+             ;; Move data into the container's virtuoso data directory
+             (copy-recursively build-directory #$virtuoso-data-dir)
+             ;; Load RDF into virtuoso.
+             (invoke "./pre-inst-env" "./load-rdf.scm" #$connection-settings)
+             ;; Visualize schema and archive results.
+             (invoke "./pre-inst-env" "./visualize-schema.scm" #$connection-settings)
+             (invoke #$(file-append graphviz "/bin/dot")
+                     "-Tsvg" "sql.dot" (string-append "-o" (getenv "ARCHIVE") "/sql.svg"))
+             (invoke #$(file-append graphviz "/bin/dot")
+                     "-Tsvg" "rdf.dot" (string-append "-o" (getenv "ARCHIVE") "/rdf.svg"))
+             (delete-file-recursively build-directory)))))))
 
 (define transform-genenetwork-database-project
   (forge-project
    (name "transform-genenetwork-database")
-   (repository "/home/git/public/gn-transform-databases/")
+   (repository "https://git.genenetwork.org/gn-transform-databases")
    (ci-jobs (list (forge-laminar-job
                    (name "transform-genenetwork-database-tests")
                    (run (guix-channel-job-gexp
@@ -868,7 +864,11 @@ described by CONFIG, a <genenetwork-configuration> object."
                          #:guix-daemon-uri %guix-daemon-uri)))
                   (forge-laminar-job
                    (name "transform-genenetwork-database")
-                   (run (transform-genenetwork-database this-forge-project)))))))
+                   (run (transform-genenetwork-database
+                         %connection-settings
+                         %virtuoso-data-dir
+                         this-forge-project)))))
+   (ci-jobs-trigger 'webhook)))
 
 
 ;;;