about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--transform/special-forms.scm356
-rw-r--r--transform/strings.scm13
2 files changed, 207 insertions, 162 deletions
diff --git a/transform/special-forms.scm b/transform/special-forms.scm
index 8de4966..0c07a0a 100644
--- a/transform/special-forms.scm
+++ b/transform/special-forms.scm
@@ -6,6 +6,7 @@
   #:use-module (transform sql)
   #:use-module (transform table)
   #:use-module (transform triples)
+  #:use-module (transform strings)
   #:export (translate-forms
             collect-forms
             collect-keys
@@ -22,36 +23,47 @@
             emit-short-turtle
             define-transformer))
 
+(define (emittable-object? o)
+  (cond
+    ((null? o) #f)
+    ((not o)   #f)
+    ((and (string? o) (string-blank? o)) #f)
+    (else #t)))
+
 (define (emit-short-turtle subject po-alist)
   (let loop ((pairs po-alist) (first? #t))
     (match pairs
       (((p . o) rest ...)
-       ;; subject only on first line
-       (when first?
-         (format #t "~a " subject))
-       (when (not first?)
-         (format #t "\t"))   ; indent following lines
-
-       (match o
-         ((? symbol?)
-          (format #t "~a ~a" p (symbol->string o)))
-         ((or (? (lambda (el) (and (string? el)
-                                   (string-match "^\\(.*\\)$" el))))
-              (? (lambda (el) (and (string? el)
-                                   (string-match "^\\[.*\\]$" el)))))
-          (format #t "~a ~s" p o))
-         (_
-          (format #t "~a \"~a\"" p o)))
-
-       (if (null? rest)
-           (format #t " .~%")   ; last triple
-           (format #t " ;~%"))  ; continuation
-
-       (loop rest #f))
-
+       (if (not (emittable-object? o))
+           (loop rest first?)   ; skip malformed or empty objects
+           (begin
+             ;; subject only once
+             (when first?
+               (format #t "~a " subject))
+             (when (not first?)
+               (format #t "\t"))
+
+             ;; emit predicate–object
+             (match o
+               ((? symbol?)
+                (format #t "~a ~a" p (symbol->string o)))
+               ((? string?)
+                (format #t "~a \"~a\"" p o))
+               (_
+                (format #t "~a ~s" p o)))
+
+             ;; separator depends on *remaining emittable pairs*
+             (if (any (match-lambda
+                        ((p . o) (emittable-object? o)))
+                      rest)
+                 (format #t " ;~%")
+                 (format #t " .~%"))
+
+             (loop rest #f))))
       (() #f))))
 
 
+
 (define (key->assoc-ref alist x)
   "Recursively translate (key k) forms in source X to (assoc-ref ALIST
 k) forms."
@@ -407,57 +419,68 @@ must be remedied."
          #`(define* (name db #:key
                           (metadata? #f)
                           (data? #t)
-                          (documentation? #f))
-             (when metadata?
-               #,@(let ((table (symbol->string (syntax->datum #'primary-table)))
-                        (subject-type (any (lambda (predicate)
-                                             (syntax-case predicate (rdf:type)
-                                               ((_ rdf:type type) #'type)
-                                               (_ #f)))
-                                           #'(predicate-clauses ...))))
-                    (map (lambda (predicate-clause)
-                           (syntax-case predicate-clause ()
-                             ((_ predicate _)
-                              ;; Dump metadata about the transform itself.
-                              #`(begin
-                                  (scm->triples
-                                   (map-alist '()
-	        		     (set rdf:type 'gn-id:transform)
-	        		     (set gn-term:createsPredicate 'predicate)
-	        		     (filter-set gn-term:forSubjectType #,subject-type)
-	        		     (multiset gn-term:dependsOn
-	        			       '#,(map (lambda (field)
-	        					 (match (syntax->datum field)
-	        					   ((table-name column-name _ ...)
-	        					    (datum->syntax
-	        					     x (column-id (symbol->string table-name)
-	        							  (symbol->string column-name))))
-	        					   (((query alias))
-	        					    (datum->syntax
-	        					     x (column-id query (symbol->string alias))))))
-	        				       (collect-fields predicate-clause))))
-                                   #,(id table (syntax->datum #'predicate)))
-                                  ;; Automatically create domain triples
-                                  ;; for predicates.
-                                  (when #,subject-type
-                                    (triple 'predicate 'rdfs:domain #,subject-type))))
-                             (_ (error "Invalid predicate clause:" predicate-clause))))
-                         #'(predicate-clauses ...))))
-             (when documentation?
-               (format #t "~%## '~a'~%~%" (syntax->datum #'name))
-               #,(syntax-case #'schema-triples-clause (schema-triples)
-                   ((schema-triples (triple-subject triple-predicate triple-object) ...)
-                    #`(begin
-                        (when (not (list 'triple-subject ...))
-                          (format #t "## Schema Triples:~%~%```text~%")
-                          (for-each (lambda (s p o)
-                                      (format #t "~a -> ~a -> ~a~%" s p o))
-                                    (list 'triple-subject ...)
-                                    (list 'triple-predicate ...)
-                                    (list 'triple-object ...))
-                          (format #t "```"))))
-                   (_ (error "Invalid schema triples clause:" #'schema-triples-clause)))
-               (format #t "## Generated Triples:
+                          (documentation? #f)
+                          (limit #f)
+                          (offset #f))
+             (let* ((base-sql
+                     (select-query #,(collect-fields #'(subject predicate-clauses ...))
+                                   (primary-table other-tables ...)
+                                   tables-raw ...))
+                    (sql
+		      (if (and limit offset)
+			  (format #f "~a LIMIT ~a OFFSET ~a"
+				  base-sql limit offset)
+			  base-sql)))
+               (when metadata?
+                 #,@(let ((table (symbol->string (syntax->datum #'primary-table)))
+                          (subject-type (any (lambda (predicate)
+                                               (syntax-case predicate (rdf:type)
+                                                 ((_ rdf:type type) #'type)
+                                                 (_ #f)))
+                                             #'(predicate-clauses ...))))
+                      (map (lambda (predicate-clause)
+                             (syntax-case predicate-clause ()
+                               ((_ predicate _)
+                                ;; Dump metadata about the transform itself.
+                                #`(begin
+                                    (scm->triples
+                                     (map-alist '()
+	        		       (set rdf:type 'gn-id:transform)
+	        		       (set gn-term:createsPredicate 'predicate)
+	        		       (filter-set gn-term:forSubjectType #,subject-type)
+	        		       (multiset gn-term:dependsOn
+	        			         '#,(map (lambda (field)
+	        					   (match (syntax->datum field)
+	        					     ((table-name column-name _ ...)
+	        					      (datum->syntax
+	        					       x (column-id (symbol->string table-name)
+	        							    (symbol->string column-name))))
+	        					     (((query alias))
+	        					      (datum->syntax
+	        					       x (column-id query (symbol->string alias))))))
+	        				         (collect-fields predicate-clause))))
+                                     #,(id table (syntax->datum #'predicate)))
+                                    ;; Automatically create domain triples
+                                    ;; for predicates.
+                                    (when #,subject-type
+                                      (triple 'predicate 'rdfs:domain #,subject-type))))
+                               (_ (error "Invalid predicate clause:" predicate-clause))))
+                           #'(predicate-clauses ...))))
+               (when documentation?
+                 (format #t "~%## '~a'~%~%" (syntax->datum #'name))
+                 #,(syntax-case #'schema-triples-clause (schema-triples)
+                     ((schema-triples (triple-subject triple-predicate triple-object) ...)
+                      #`(begin
+                          (when (not (list 'triple-subject ...))
+                            (format #t "## Schema Triples:~%~%```text~%")
+                            (for-each (lambda (s p o)
+                                        (format #t "~a -> ~a -> ~a~%" s p o))
+                                      (list 'triple-subject ...)
+                                      (list 'triple-predicate ...)
+                                      (list 'triple-object ...))
+                            (format #t "```"))))
+                     (_ (error "Invalid schema triples clause:" #'schema-triples-clause)))
+                 (format #t "## Generated Triples:
 
 The following SQL query was executed:
 
@@ -469,67 +492,64 @@ The above query results to triples that have the form:
 
 ```text
 "
-                       (select-query #,(collect-fields #'(subject predicate-clauses ...))
-                                     (primary-table other-tables ...)
-                                     tables-raw ...))
-               (for-each (match-lambda
-                           ((predicate . object)
-                            (format #t "~a -> ~a -> ~a ~%"
-                                    (if (symbol? #,(field->datum #'subject))
-                                        (symbol->string #,(field->datum #'subject))
-                                        #,(field->datum #'subject))
-                                    predicate
-                                    (if (symbol? object)
-                                        (symbol->string object)
-                                        object))))
-                         (map-alist
-                             '()
-                           #,@(field->datum #'(predicate-clauses ...))))
-               (format #t "```~%Here's an example query:~%~%```sparql~%")
-               (documentation?)
-               (newline)
-               (let* ((result
-                       (map-alist (sql-find
-                                   db
-                                   (format #f "~a LIMIT 1"
-                                           (select-query #,(collect-fields #'(subject predicate-clauses ...))
-                                                         (primary-table other-tables ...)
-                                                         tables-raw ...)))
-        		 #,@(field->key #'(predicate-clauses ...))))
-                      (first-n (list-head result
-                                          (let ((n
-                                                 (min 4 (truncate
-                                                         (+ (exact-integer-sqrt (length result)) 1)))))
-                                            (if (< n 3)
-                                                (length result)
-                                                n)))))
-                 (format #t "SELECT * WHERE { ~%")
+                         (select-query #,(collect-fields #'(subject predicate-clauses ...))
+                                       (primary-table other-tables ...)
+                                       tables-raw ...))
                  (for-each (match-lambda
                              ((predicate . object)
-                              (match object
-                                        ((or (?  symbol? object)
-                                             (?  (lambda (el) (string-match "^\\[ .* \\]$" el)) object))
-                                         (format #t "    ?s ~a ~a .~%" predicate object))
-                                        ((and (? string? object)
-                                              (? (lambda (el) (not (string-null? el))) object))
-                                         (format #t "    ?s ~a \"~a\" .~%" predicate object))
-                                        (_ ""))))
-                           first-n)
-                 (format #t "    ?s ?p ?o .~%}~%```~%"))
-               (format #t "~%Expected Result:~%~%```rdf~%")
-               (sql-for-each (lambda (row)
-                               (scm->triples
-                                (map-alist row #,@(field->key #'(predicate-clauses ...)))
-                                #,(field->assoc-ref #'row #'subject)
-                                (lambda (s p o)
-                                  (triple s p o))))
-                             db
-                             (format #f "~a LIMIT 1"
-                                     (select-query #,(collect-fields #'(subject predicate-clauses ...))
-                                                   (primary-table other-tables ...)
-                                                   tables-raw ...)))
-               (format #t "```~%~%"))
-             (when data?
+                              (format #t "~a -> ~a -> ~a ~%"
+                                      (if (symbol? #,(field->datum #'subject))
+                                          (symbol->string #,(field->datum #'subject))
+                                          #,(field->datum #'subject))
+                                      predicate
+                                      (if (symbol? object)
+                                          (symbol->string object)
+                                          object))))
+                           (map-alist
+                               '()
+                             #,@(field->datum #'(predicate-clauses ...))))
+                 (format #t "```~%Here's an example query:~%~%```sparql~%")
+                 (documentation?)
+                 (newline)
+                 (let* ((result
+                         (map-alist (sql-find
+                                     db
+                                     (format #f "~a LIMIT 1"
+                                             (select-query #,(collect-fields #'(subject predicate-clauses ...))
+                                                           (primary-table other-tables ...)
+                                                           tables-raw ...)))
+        		   #,@(field->key #'(predicate-clauses ...))))
+                        (first-n (list-head result
+                                            (let ((n
+                                                   (min 4 (truncate
+                                                           (+ (exact-integer-sqrt (length result)) 1)))))
+                                              (if (< n 3)
+                                                  (length result)
+                                                  n)))))
+                   (format #t "SELECT * WHERE { ~%")
+                   (for-each (match-lambda
+                               ((predicate . object)
+                                (match object
+                                  ((or (?  symbol? object)
+                                       (?  (lambda (el) (string-match "^\\[ .* \\]$" el)) object))
+                                   (format #t "    ?s ~a ~a .~%" predicate object))
+                                  ((and (? string? object)
+                                        (? (lambda (el) (not (string-null? el))) object))
+                                   (format #t "    ?s ~a \"~a\" .~%" predicate object))
+                                  (_ ""))))
+                             first-n)
+                   (format #t "    ?s ?p ?o .~%}~%```~%"))
+                 (format #t "~%Expected Result:~%~%```rdf~%")
+                 (sql-for-each (lambda (row)
+                                 (scm->triples
+                                  (map-alist row #,@(field->key #'(predicate-clauses ...)))
+                                  #,(field->assoc-ref #'row #'subject)
+                                  (lambda (s p o)
+                                    (triple s p o))))
+                               db
+                               (format #f "~a LIMIT 1" base-sql))
+                 (format #t "```~%~%"))
+               (when data?
                #,(syntax-case #'schema-triples-clause (schema-triples)
                    ((schema-triples (triple-subject triple-predicate triple-object) ...)
                     #`(for-each triple
@@ -537,16 +557,14 @@ The above query results to triples that have the form:
                                 (list 'triple-predicate ...)
                                 (list 'triple-object ...)))
                    (_ (error "Invalid schema triples clause:" #'schema-triples-clause)))
-	       (sql-for-each  (lambda (row)
-                                (let* ((subject-val #,(field->assoc-ref #'row #'subject))
-                                       (po-alist
-                                        (map-alist row #,@(field->key #'(predicate-clauses ...)))))
-                                  (emit-short-turtle subject-val po-alist)))
-                             db
-                             (select-query #,(collect-fields #'(subject predicate-clauses ...))
-                                           (primary-table other-tables ...)
-                                           tables-raw ...)))
-             )))
+	       (sql-for-each
+		  (lambda (row)
+                    (let* ((subject-val #,(field->assoc-ref #'row #'subject))
+                           (po-alist
+                            (map-alist row #,@(field->key #'(predicate-clauses ...)))))
+                      (emit-short-turtle subject-val po-alist)))
+                  db
+                  sql))))))
       (_ (error "Invalid define-transformer syntax:" (syntax->datum x))))))
 
 (define (get-keyword-value args keyword default)
@@ -565,8 +583,14 @@ The above query results to triples that have the form:
             (prefixes (assoc-ref alist 'prefixes))
             (inputs (assoc-ref alist 'inputs))
             (outputs (assoc-ref alist 'outputs))
-            (rdf-path (get-keyword-value outputs #:rdf ""))
-            (doc-path (get-keyword-value outputs #:documentation "")))
+            (total-rows (assoc-ref alist 'total-rows))
+            (rows-per-chunk (assoc-ref alist 'rows-per-chunk))
+            (chunking? (and total-rows rows-per-chunk))
+            (chunks (if chunking?
+                        (ceiling (/ total-rows rows-per-chunk))
+                        1))
+            (rdf-path (get-keyword-value outputs #:rdf #f))
+            (doc-path (get-keyword-value outputs #:documentation #f)))
        (call-with-target-database
         connection
         (lambda (db)
@@ -592,20 +616,30 @@ The above query results to triples that have the form:
 
           ;; Dumping the actual data
           (when rdf-path
-            (with-output-to-file
-                rdf-path
-              (lambda ()
-                ;; Add the prefixes
-                (for-each
-                 (match-lambda
-                   ((k v)
-                    (begin
-                      (prefix k v))))
-                 prefixes)
-                (newline)
-                (for-each
-                 (lambda (proc)
-                   (proc db #:metadata? table-metadata?))
-                 inputs))
-              #:encoding "UTF-8"))))))))
+            (do ((i 0 (+ i 1)))
+                ((>= i chunks))
+              (let* ((offset (* i (or rows-per-chunk 0)))
+                     (out-file
+                      (if (= chunks 1)
+                          rdf-path
+                          (string-append (path-without-extension rdf-path)
+                                         "." (number->string (+ i 1)) ".ttl"))))
+                (with-output-to-file
+                    out-file
+                  (lambda ()
+                    ;; Add the prefixes
+                    (for-each
+                     (match-lambda
+                       ((k v)
+                        (begin
+                          (prefix k v))))
+                     prefixes)
+                    (newline)
+                    (for-each
+                     (lambda (proc)
+                       (proc db #:metadata? table-metadata?
+                             #:limit rows-per-chunk
+                             #:offset offset))
+                     inputs))
+                  #:encoding "UTF-8"))))))))))
 
diff --git a/transform/strings.scm b/transform/strings.scm
index 7b62349..8b4ee45 100644
--- a/transform/strings.scm
+++ b/transform/strings.scm
@@ -2,6 +2,7 @@
   #:use-module (srfi srfi-1)
   #:use-module (srfi srfi-19)
   #:use-module (ice-9 match)
+  #:use-module (ice-9 rdelim)
   #:use-module (ice-9 string-fun)
   #:use-module (ice-9 textual-ports)
   #:export (string-blank?
@@ -18,11 +19,21 @@
             normalize-string-field
             fix-email-id
             blank-p
-            investigator-attributes->id))
+            investigator-attributes->id
+            path-without-extension))
 
 (define (blank-p str)
   (if (string-blank? str) #f str))
 
+(define (path-without-extension path)
+  (let* ((dir (dirname path))                ; directory part
+	 (base (basename path))              ; filename part
+	 (dot-pos (string-rindex base #\.))) ; last dot position
+    (string-append dir "/"		     ; reconstruct path
+		   (if dot-pos
+		       (substring base 0 dot-pos) ; strip extension
+		       base))))
+
 (define (lower-case-and-replace-spaces str)
   (string-map
    (lambda (c)