about summary refs log tree commit diff
diff options
context:
space:
mode:
-rwxr-xr-xload-rdf.scm193
-rw-r--r--manifest.scm4
2 files changed, 196 insertions, 1 deletions
diff --git a/load-rdf.scm b/load-rdf.scm
new file mode 100755
index 0000000..f6e0f62
--- /dev/null
+++ b/load-rdf.scm
@@ -0,0 +1,193 @@
+#! /usr/bin/env guile
+!#
+
+(use-modules (rnrs io ports)
+             (srfi srfi-1)
+             (srfi srfi-26)
+             (srfi srfi-71)
+             (srfi srfi-171)
+             (ice-9 match)
+             (ice-9 popen)
+             (hashing md5)
+             ((web client) #:select (http-head open-socket-for-uri))
+             (web request)
+             (web response)
+             (web uri))
+
+(define %graph-uri
+  "http://genenetwork.org")
+
+(define (random-cnonce len)
+  "Return a random hexadecimal string LEN characters long."
+  (list->string (map (lambda _
+                       (let ((i (random 16)))
+                         (integer->char
+                          (if (< i 10)
+                              (+ i (char->integer #\0))
+                              (+ (- i 10)
+                                 (char->integer #\a))))))
+                     (iota len))))
+
+(define (md5-digest-authorization method uri username password)
+  "Return a digest authorization header to access URI with METHOD,
+USERNAME and PASSWORD."
+  (let* ((response response-body (http-head uri))
+         (challenge (or (find (match-lambda
+                                (('digest . challenge)
+                                 (let ((algorithm (assq-ref challenge 'algorithm)))
+                                   (or (not algorithm)
+                                       (string=? algorithm "MD5")))))
+                              (assq-ref (response-headers response)
+                                        'www-authenticate))
+                        (error "No MD5 authentication challenges advertised")))
+         (realm (assq-ref challenge 'realm))
+         (nonce (assq-ref challenge 'nonce))
+         (qop (assq-ref challenge 'qop))
+         ;; Hard-code the request counter (nc) to "1".
+         (nc "1")
+         (cnonce (random-cnonce 8)))
+    `(digest (username . ,username)
+             (realm . ,realm)
+             (nonce . ,nonce)
+             (uri . ,(uri-path uri))
+             (algorithm . "MD5")
+             (qop . ,qop)
+             (nc . ,nc)
+             (cnonce . ,cnonce)
+             (response . ,(md5->string
+                           (md5 (string->bytevector
+                                 (string-join
+                                  (list (md5->string
+                                         (md5 (string->bytevector (string-join
+                                                                   (list username realm password)
+                                                                   ":")
+                                                                  (make-transcoder (utf-8-codec)))))
+                                        nonce
+                                        nc
+                                        cnonce
+                                        qop
+                                        (md5->string
+                                         (md5 (string->bytevector (string-join (list (symbol->string method)
+                                                                                     (uri-path uri))
+                                                                               ":")
+                                                                  (make-transcoder (utf-8-codec))))))
+                                  ":")
+                                 (make-transcoder (utf-8-codec))))))
+             (opaque . ,(assq-ref challenge 'opaque)))))
+
+(define* (http-upload-file method uri file #:key username password)
+  "Upload FILE to URI using METHOD. USERNAME and PASSWORD, if
+provided, are the username and password to authenticate with."
+  (let ((authorization-headers
+         (if username
+             `((authorization . ,(md5-digest-authorization method uri username password)))
+             '())))
+    (call-with-port (open-socket-for-uri uri)
+      (lambda (port)
+        (let ((request (write-request (build-request
+                                       uri
+                                       #:method method
+                                       #:headers `((connection close)
+                                                   (content-length . ,(stat:size (stat file)))
+                                                   ,@authorization-headers)
+                                       #:port port)
+                                      port)))
+          (when file
+            (call-with-input-file file
+              (lambda (in)
+                (port-transduce (tmap (cut write-request-body request <>))
+                                (const #t)
+                                get-bytevector-some
+                                in)
+                (force-output (request-port request)))))
+          (read-response port))))))
+
+(define (call-with-pipe proc mode program . args)
+  "Execute PROGRAM ARGS ... in a subprocess with a pipe of MODE to
+it. Call PROC with a port to that pipe. Close the pipe once PROC
+exits, even if it exits non-locally. Return the value returned by
+PROC."
+  (let ((port #f))
+    (dynamic-wind (lambda () (set! port (apply open-pipe* mode program args)))
+                  (cut proc port)
+                  (lambda ()
+                    (let ((return-value (status:exit-val (close-pipe port))))
+                      (unless (and return-value
+                                   (zero? return-value))
+                        (error "Invocation of program failed" (cons program args))))))))
+
+(define (put-graph sparql-endpoint username password rdf-file graph)
+  "Load RDF-FILE into GRAPH at SPARQL-ENDPOINT, a SPARQL 1.1 Graph
+Store HTTP Protocol endpoint, authenticating with USERNAME and
+PASSWORD. The PUT method is used, and therefore, any existing data in
+the graph is deleted."
+  (let ((response (http-upload-file 'PUT
+                                    (build-uri (uri-scheme sparql-endpoint)
+                                               #:host (uri-host sparql-endpoint)
+                                               #:port (uri-port sparql-endpoint)
+                                               #:path (uri-path sparql-endpoint)
+                                               #:query (string-append "graph-uri=" (uri-encode graph)))
+                                    rdf-file
+                                    #:username username
+                                    #:password password)))
+    ;; Fail if response code is not 2xx.
+    (unless (= (quotient (response-code response)
+                         100)
+               2)
+      (error "Putting graph failed" response))))
+
+(define (delete-graph port graph)
+  "Delete GRAPH from virtuoso connecting to virtuoso on PORT."
+  ;; We do this with SQL because doing it with SPARQL is too
+  ;; slow. Note that this does not delete free-text index data, if
+  ;; any. See
+  ;; http://vos.openlinksw.com/owiki/wiki/VOS/VirtTipsAndTricksGuideDeleteLargeGraphs
+  (call-with-pipe
+   (lambda (port)
+     (format port
+             "DELETE FROM rdf_quad WHERE g = iri_to_id ('~a');"
+             graph))
+   OPEN_WRITE
+   "isql" "-S" (number->string port)))
+
+(define (time-thunk thunk)
+  "Run THUNK and return the time taken in seconds."
+  (let ((start-time (current-time)))
+    (thunk)
+    (- (current-time) start-time)))
+
+(define main
+  (match-lambda*
+    ((_ connection-settings-file rdf-file)
+     (let ((connection-settings
+            (call-with-input-file connection-settings-file
+              read)))
+       ;; Delete existing data. We do not rely on the implicit
+       ;; deletion in the PUT method of the SPARQL 1.1 Graph Store
+       ;; HTTP Protocol because that is too slow.
+       (format (current-output-port)
+               "Existing virtuoso data deleted in ~a seconds~%"
+               (time-thunk
+                (cut delete-graph
+                     (assq-ref connection-settings 'virtuoso-port)
+                     %graph-uri)))
+       ;; Load data into virtuoso.
+       (format (current-output-port)
+               "~a loaded into virtuoso in ~a seconds~%"
+               rdf-file
+               (time-thunk
+                (cut put-graph
+                     (build-uri
+                      (assq-ref connection-settings 'sparql-scheme)
+                      #:host (assq-ref connection-settings 'sparql-host)
+                      #:port (assq-ref connection-settings 'sparql-port)
+                      #:path "/sparql-graph-crud-auth")
+                     (assq-ref connection-settings 'virtuoso-username)
+                     (assq-ref connection-settings 'virtuoso-password)
+                     rdf-file
+                     %graph-uri)))))
+    ((arg0 _ ...)
+     (format (current-error-port) "Usage: ~a CONNECTION-SETTINGS-FILE RDF-FILE~%" arg0)
+     (exit #f))))
+
+(apply main (command-line))
diff --git a/manifest.scm b/manifest.scm
index 9e7c150..c8cd15a 100644
--- a/manifest.scm
+++ b/manifest.scm
@@ -6,6 +6,7 @@
 
 (use-modules (gnu packages autotools)
              ((gnu packages bioinformatics) #:prefix guix:)
+             ((gnu packages databases) #:select (virtuoso-ose))
              (gnu packages graphviz)
              (gnu packages guile)
              ((gnu packages guile-xyz) #:select (guile-sparql) #:prefix guix:)
@@ -71,4 +72,5 @@
  (list guile-3.0 guile-dbi guile-dbd-mysql
        ;; We abuse (ccwl graphviz) as a library to visualize the database
        ;; schema. Hence we need ccwl and guile-libyaml.
-       ccwl graphviz guile-libyaml guile-sparql run64))
+       ccwl graphviz guile-hashing guile-libyaml guile-sparql
+       run64 virtuoso-ose))