aboutsummaryrefslogtreecommitdiff
#! /usr/bin/env guile
!#

(use-modules (rnrs io ports)
             (srfi srfi-1)
             (srfi srfi-26)
             (srfi srfi-71)
             (srfi srfi-171)
             (ice-9 ftw)
             (ice-9 match)
             (ice-9 popen)
             (hashing md5)
             ((web client) #:select (http-head open-socket-for-uri))
             (web request)
             (web response)
             (web uri))

(define %graph-uri
  "http://genenetwork.org")

(define (call-with-pipe proc mode program . args)
  "Execute PROGRAM ARGS ... in a subprocess with a pipe of MODE to
it. Call PROC with a port to that pipe. Close the pipe once PROC
exits, even if it exits non-locally. Return the value returned by
PROC."
  (let ((port #f))
    (dynamic-wind (lambda () (set! port (apply open-pipe* mode program args)))
                  (cut proc port)
                  (lambda ()
                    (let ((return-value (status:exit-val (close-pipe port))))
                      (unless (and return-value
                                   (zero? return-value))
                        (error "Invocation of program failed" (cons program args))))))))


(define (delete-graph port password graph)
  "Delete GRAPH from virtuoso connecting to virtuoso on PORT
authenticating as the dba user with PASSWORD."
  ;; We do this with SQL because doing it with SPARQL is too
  ;; slow. Note that this does not delete free-text index data, if
  ;; any. See
  ;; http://vos.openlinksw.com/owiki/wiki/VOS/VirtTipsAndTricksGuideDeleteLargeGraphs
  (call-with-pipe
   (lambda (out)
     (format out
             "SET DSN=localhost:~a;
SET PWD=~s;
DELETE FROM rdf_quad WHERE g = iri_to_id ('~a');"
             port
             password
             graph))
   OPEN_WRITE
   "isql"))

(define (empty-load-queue port password)
  "Empty the "
  (call-with-pipe
   (lambda (out)
     (format out
             "SET DSN=localhost:~a;
SET PWD=~s;
DELETE FROM DB.DBA.load_list;"
             port
             password))
   OPEN_WRITE
   "isql"))

(define (bulk-load-data port password graph)
  "Bulk load data into virtuoso"
  (call-with-pipe
   (lambda (out)
     (format out
             "SET DSN=localhost:~a;
SET PWD=~s;
ld_dir('/var/lib/data', '*.ttl', '~a');
rdf_loader_run();
CHECKPOINT;
"
             port
             password
             graph))
   OPEN_WRITE
   "isql"))

(define (index-data port password)
  "Index all text data for quicker search"
  (call-with-pipe
   (lambda (out)
     (format out
             "SET DSN=localhost:~a;
SET PWD=~s;
DB.DBA.RDF_OBJ_FT_RULE_ADD (null, null, 'All');
DB.DBA.VT_INC_INDEX_DB_DBA_RDF_OBJ();
quit;
"
             port
             password))
   OPEN_WRITE
   "isql"))

(define (time-thunk thunk)
  "Run THUNK and return the time taken in seconds."
  (let ((start-time (current-time)))
    (thunk)
    (- (current-time) start-time)))

(define main
  (match-lambda*
    ((_ connection-settings-file)
     (let ((connection-settings
            (call-with-input-file connection-settings-file
              read)))
       ;; Delete existing data. We do not rely on the implicit
       ;; deletion in the PUT method of the SPARQL 1.1 Graph Store
       ;; HTTP Protocol because that is too slow.
       (format (current-output-port)
               "Existing virtuoso data deleted in ~a seconds~%"
               (time-thunk
                (cut delete-graph
                     (assq-ref connection-settings 'virtuoso-port)
                     (assq-ref connection-settings 'virtuoso-password)
                     %graph-uri)))
       ;; Delete the load queue
       (format (current-output-port)
               "Existing DB.LOAD queue deleted in ~a seconds~%"
               (time-thunk
                (cut empty-load-queue
                     (assq-ref connection-settings 'virtuoso-port)
                     (assq-ref connection-settings 'virtuoso-password))))
       ;; Bulk load data
       (format (current-output-port)
               "Existing virtuoso data uploaded in ~a seconds~%"
               (time-thunk
                (cut bulk-load-data
                     (assq-ref connection-settings 'virtuoso-port)
                     (assq-ref connection-settings 'virtuoso-password)
                     %graph-uri)))
       ;; Index the data
       (format (current-output-port)
               "Indexing the data"
               (time-thunk
                (cut index-data
                     (assq-ref connection-settings 'virtuoso-port)
                     (assq-ref connection-settings 'virtuoso-password))))))
    ((arg0 _ ...)
     (format (current-error-port) "Usage: ~a CONNECTION-SETTINGS-FILE~%" arg0)
     (exit #f))))

(apply main (command-line))