aboutsummaryrefslogtreecommitdiff
path: root/load-rdf.scm
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-08-24 12:44:06 +0300
committerMunyoki Kilyungi2023-08-24 12:44:06 +0300
commit8ccc7ba28046ed43ac7b8b5831b0eebb5a9b78c9 (patch)
treee134f19cdb97446b1bec5d783ea44a4c0031a5b3 /load-rdf.scm
parentc619876af9633ba5c3193599bc0ff4891c9af1f7 (diff)
downloadgn-transform-databases-8ccc7ba28046ed43ac7b8b5831b0eebb5a9b78c9.tar.gz
Delete http methods for adding data
* load-rdf.scm (random-cnonce, md5-digest-authorization, http-upload-file, put-graph, delete-graph): Delete (main): Remove call to put-graph. Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'load-rdf.scm')
-rwxr-xr-xload-rdf.scm152
1 files changed, 15 insertions, 137 deletions
diff --git a/load-rdf.scm b/load-rdf.scm
index b5ed5ac..6f469b8 100755
--- a/load-rdf.scm
+++ b/load-rdf.scm
@@ -18,91 +18,6 @@
(define %graph-uri
"http://genenetwork.org")
-(define (random-cnonce len)
- "Return a random hexadecimal string LEN characters long."
- (list->string (map (lambda _
- (let ((i (random 16)))
- (integer->char
- (if (< i 10)
- (+ i (char->integer #\0))
- (+ (- i 10)
- (char->integer #\a))))))
- (iota len))))
-
-(define (md5-digest-authorization method uri username password)
- "Return a digest authorization header to access URI with METHOD,
-USERNAME and PASSWORD."
- (let* ((response response-body (http-head uri))
- (challenge (or (find (match-lambda
- (('digest . challenge)
- (let ((algorithm (assq-ref challenge 'algorithm)))
- (or (not algorithm)
- (string=? algorithm "MD5")))))
- (assq-ref (response-headers response)
- 'www-authenticate))
- (error "No MD5 authentication challenges advertised")))
- (realm (assq-ref challenge 'realm))
- (nonce (assq-ref challenge 'nonce))
- (qop (assq-ref challenge 'qop))
- ;; Hard-code the request counter (nc) to "1".
- (nc "1")
- (cnonce (random-cnonce 8)))
- `(digest (username . ,username)
- (realm . ,realm)
- (nonce . ,nonce)
- (uri . ,(uri-path uri))
- (algorithm . "MD5")
- (qop . ,qop)
- (nc . ,nc)
- (cnonce . ,cnonce)
- (response . ,(md5->string
- (md5 (string->bytevector
- (string-join
- (list (md5->string
- (md5 (string->bytevector (string-join
- (list username realm password)
- ":")
- (make-transcoder (utf-8-codec)))))
- nonce
- nc
- cnonce
- qop
- (md5->string
- (md5 (string->bytevector (string-join (list (symbol->string method)
- (uri-path uri))
- ":")
- (make-transcoder (utf-8-codec))))))
- ":")
- (make-transcoder (utf-8-codec))))))
- (opaque . ,(assq-ref challenge 'opaque)))))
-
-(define* (http-upload-file method uri file #:key username password)
- "Upload FILE to URI using METHOD. USERNAME and PASSWORD, if
-provided, are the username and password to authenticate with."
- (let ((authorization-headers
- (if username
- `((authorization . ,(md5-digest-authorization method uri username password)))
- '())))
- (call-with-port (open-socket-for-uri uri)
- (lambda (port)
- (let ((request (write-request (build-request
- uri
- #:method method
- #:headers `((connection close)
- (content-length . ,(stat:size (stat file)))
- ,@authorization-headers)
- #:port port)
- port)))
- (when file
- (call-with-input-file file
- (lambda (in)
- (port-transduce (tmap (cut write-request-body request <>))
- (const #t)
- get-bytevector-some
- in)
- (force-output (request-port request)))))
- (read-response port))))))
-
(define (call-with-pipe proc mode program . args)
"Execute PROGRAM ARGS ... in a subprocess with a pipe of MODE to
it. Call PROC with a port to that pipe. Close the pipe once PROC
@@ -117,31 +32,6 @@ PROC."
(zero? return-value))
(error "Invocation of program failed" (cons program args))))))))
-(define* (put-graph sparql-endpoint username password rdf-file graph #:optional retry?)
- "Load RDF-FILE into GRAPH at SPARQL-ENDPOINT, a SPARQL 1.1 Graph
-Store HTTP Protocol endpoint, authenticating with USERNAME and
-PASSWORD. Note that when the PUT method is used, any existing data in
-the graph is deleted. Therefore, a POST method is used."
- (let ((response (http-upload-file 'POST
- (build-uri (uri-scheme sparql-endpoint)
- #:host (uri-host sparql-endpoint)
- #:port (uri-port sparql-endpoint)
- #:path (uri-path sparql-endpoint)
- #:query (string-append "graph-uri=" (uri-encode graph)))
- rdf-file
- #:username username
- #:password password)))
- ;; Fail if response code is not 2xx.
- (unless (= (quotient (response-code response)
- 100)
- 2)
- ;; When uploading a large RDF file into a clean virtuoso, it
- ;; fails for the first time, and succeeds only the second
- ;; time. It is unclear why. So, perform this ugly hack of trying
- ;; twice.
- (if retry?
- (put-graph sparql-endpoint username password rdf-file graph #f)
- (error "Putting graph failed" response)))))
(define (delete-graph port password graph)
"Delete GRAPH from virtuoso connecting to virtuoso on PORT
@@ -155,7 +45,6 @@ authenticating as the dba user with PASSWORD."
(format out
"SET DSN=localhost:~a;
SET PWD=~s;
-log_enable(3, 1);
DELETE FROM rdf_quad WHERE g = iri_to_id ('~a');"
port
password
@@ -185,32 +74,21 @@ DELETE FROM rdf_quad WHERE g = iri_to_id ('~a');"
(assq-ref connection-settings 'virtuoso-port)
(assq-ref connection-settings 'virtuoso-password)
%graph-uri)))
- ;; Load data into virtuoso.
- (let ((host (assq-ref connection-settings 'sparql-host))
- (port (assq-ref connection-settings 'sparql-port))
- (path "/sparql-graph-crud-auth")
- (username (assq-ref connection-settings 'virtuoso-username))
- (password (assq-ref connection-settings 'virtuoso-password)))
- (ftw rdf-file
- (lambda (filename statinfo flag)
- (begin
- (when (eq? 'regular (stat:type statinfo))
- (format (current-output-port)
- "~a loaded into virtuoso in ~a seconds~%"
- filename
- (time-thunk
- (cut put-graph
- (build-uri
- (assq-ref connection-settings 'sparql-scheme)
- #:host host
- #:port port
- #:path path)
- username
- password
- filename
- %graph-uri
- #t))))
- #t))))))
+ ;; Delete the load queue
+ (format (current-output-port)
+ "Existing virtuoso data deleted in ~a seconds~%"
+ (time-thunk
+ (cut empty-load-queue
+ (assq-ref connection-settings 'virtuoso-port)
+ (assq-ref connection-settings 'virtuoso-password))))
+ ;; Bulk load data
+ (format (current-output-port)
+ "Existing virtuoso data deleted in ~a seconds~%"
+ (time-thunk
+ (cut bulk-load-data
+ (assq-ref connection-settings 'virtuoso-port)
+ (assq-ref connection-settings 'virtuoso-password)
+ %graph-uri)))))
((arg0 _ ...)
(format (current-error-port) "Usage: ~a CONNECTION-SETTINGS-FILE RDF-FILE-OR-RDF-DIR~%" arg0)
(exit #f))))