#! /usr/bin/env guile !# (use-modules (rnrs io ports) (srfi srfi-1) (srfi srfi-26) (srfi srfi-71) (srfi srfi-171) (ice-9 ftw) (ice-9 match) (ice-9 popen) (hashing md5) ((web client) #:select (http-head open-socket-for-uri)) (web request) (web response) (web uri)) (define %graph-uri "http://genenetwork.org") (define (call-with-pipe proc mode program . args) "Execute PROGRAM ARGS ... in a subprocess with a pipe of MODE to it. Call PROC with a port to that pipe. Close the pipe once PROC exits, even if it exits non-locally. Return the value returned by PROC." (let ((port #f)) (dynamic-wind (lambda () (set! port (apply open-pipe* mode program args))) (cut proc port) (lambda () (let ((return-value (status:exit-val (close-pipe port)))) (unless (and return-value (zero? return-value)) (error "Invocation of program failed" (cons program args)))))))) (define (delete-graph port password graph) "Delete GRAPH from virtuoso connecting to virtuoso on PORT authenticating as the dba user with PASSWORD." ;; We do this with SQL because doing it with SPARQL is too ;; slow. Note that this does not delete free-text index data, if ;; any. See ;; http://vos.openlinksw.com/owiki/wiki/VOS/VirtTipsAndTricksGuideDeleteLargeGraphs (call-with-pipe (lambda (out) (format out "SET DSN=localhost:~a; SET PWD=~s; DELETE FROM rdf_quad WHERE g = iri_to_id ('~a');" port password graph)) OPEN_WRITE "isql")) (define (empty-load-queue port password) "Empty the " (call-with-pipe (lambda (out) (format out "SET DSN=localhost:~a; SET PWD=~s; DELETE FROM DB.DBA.load_list;" port password)) OPEN_WRITE "isql")) (define (bulk-load-data port password graph) "Bulk load data into virtuoso" (call-with-pipe (lambda (out) (format out "SET DSN=localhost:~a; SET PWD=~s; ld_dir('/var/lib/data', '*.ttl', '~a'); rdf_loader_run(); CHECKPOINT; " port password graph)) OPEN_WRITE "isql")) (define (index-data port password) "Index all text data for quicker search" (call-with-pipe (lambda (out) (format out "SET DSN=localhost:~a; SET PWD=~s; DB.DBA.RDF_OBJ_FT_RULE_ADD (null, null, 'All'); DB.DBA.VT_INC_INDEX_DB_DBA_RDF_OBJ(); quit; " port password)) OPEN_WRITE "isql")) (define (time-thunk thunk) "Run THUNK and return the time taken in seconds." (let ((start-time (current-time))) (thunk) (- (current-time) start-time))) (define main (match-lambda* ((_ connection-settings-file) (let ((connection-settings (call-with-input-file connection-settings-file read))) ;; Delete existing data. We do not rely on the implicit ;; deletion in the PUT method of the SPARQL 1.1 Graph Store ;; HTTP Protocol because that is too slow. (format (current-output-port) "Existing virtuoso data deleted in ~a seconds~%" (time-thunk (cut delete-graph (assq-ref connection-settings 'virtuoso-port) (assq-ref connection-settings 'virtuoso-password) %graph-uri))) ;; Delete the load queue (format (current-output-port) "Existing DB.LOAD queue deleted in ~a seconds~%" (time-thunk (cut empty-load-queue (assq-ref connection-settings 'virtuoso-port) (assq-ref connection-settings 'virtuoso-password)))) ;; Bulk load data (format (current-output-port) "Existing virtuoso data uploaded in ~a seconds~%" (time-thunk (cut bulk-load-data (assq-ref connection-settings 'virtuoso-port) (assq-ref connection-settings 'virtuoso-password) %graph-uri))) ;; Index the data (format (current-output-port) "Indexing the data" (time-thunk (cut index-data (assq-ref connection-settings 'virtuoso-port) (assq-ref connection-settings 'virtuoso-password)))))) ((arg0 _ ...) (format (current-error-port) "Usage: ~a CONNECTION-SETTINGS-FILE~%" arg0) (exit #f)))) (apply main (command-line))