about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2026-02-10 09:41:51 +0300
committerMunyoki Kilyungi2026-02-10 09:41:51 +0300
commit9dd449a2615fd18968c2dc84142f809241b498ad (patch)
treebce49bd55d05c43c4955144f3a89150f563800dd
parent576ace2d3155411c42d483d0091efb3ca3386b17 (diff)
downloadgn-transform-databases-9dd449a2615fd18968c2dc84142f809241b498ad.tar.gz
Give "with-documentation" & "define-transformer" chunking ability.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rw-r--r--transform/special-forms.scm297
1 files changed, 159 insertions, 138 deletions
diff --git a/transform/special-forms.scm b/transform/special-forms.scm
index 425061d..d8abf2c 100644
--- a/transform/special-forms.scm
+++ b/transform/special-forms.scm
@@ -419,57 +419,68 @@ must be remedied."
          #`(define* (name db #:key
                           (metadata? #f)
                           (data? #t)
-                          (documentation? #f))
-             (when metadata?
-               #,@(let ((table (symbol->string (syntax->datum #'primary-table)))
-                        (subject-type (any (lambda (predicate)
-                                             (syntax-case predicate (rdf:type)
-                                               ((_ rdf:type type) #'type)
-                                               (_ #f)))
-                                           #'(predicate-clauses ...))))
-                    (map (lambda (predicate-clause)
-                           (syntax-case predicate-clause ()
-                             ((_ predicate _)
-                              ;; Dump metadata about the transform itself.
-                              #`(begin
-                                  (scm->triples
-                                   (map-alist '()
-	        		     (set rdf:type 'gn-id:transform)
-	        		     (set gn-term:createsPredicate 'predicate)
-	        		     (filter-set gn-term:forSubjectType #,subject-type)
-	        		     (multiset gn-term:dependsOn
-	        			       '#,(map (lambda (field)
-	        					 (match (syntax->datum field)
-	        					   ((table-name column-name _ ...)
-	        					    (datum->syntax
-	        					     x (column-id (symbol->string table-name)
-	        							  (symbol->string column-name))))
-	        					   (((query alias))
-	        					    (datum->syntax
-	        					     x (column-id query (symbol->string alias))))))
-	        				       (collect-fields predicate-clause))))
-                                   #,(id table (syntax->datum #'predicate)))
-                                  ;; Automatically create domain triples
-                                  ;; for predicates.
-                                  (when #,subject-type
-                                    (triple 'predicate 'rdfs:domain #,subject-type))))
-                             (_ (error "Invalid predicate clause:" predicate-clause))))
-                         #'(predicate-clauses ...))))
-             (when documentation?
-               (format #t "~%## '~a'~%~%" (syntax->datum #'name))
-               #,(syntax-case #'schema-triples-clause (schema-triples)
-                   ((schema-triples (triple-subject triple-predicate triple-object) ...)
-                    #`(begin
-                        (when (not (list 'triple-subject ...))
-                          (format #t "## Schema Triples:~%~%```text~%")
-                          (for-each (lambda (s p o)
-                                      (format #t "~a -> ~a -> ~a~%" s p o))
-                                    (list 'triple-subject ...)
-                                    (list 'triple-predicate ...)
-                                    (list 'triple-object ...))
-                          (format #t "```"))))
-                   (_ (error "Invalid schema triples clause:" #'schema-triples-clause)))
-               (format #t "## Generated Triples:
+                          (documentation? #f)
+                          (limit #f)
+                          (offset #f))
+             (let* ((base-sql
+                     (select-query #,(collect-fields #'(subject predicate-clauses ...))
+                                   (primary-table other-tables ...)
+                                   tables-raw ...))
+                    (sql
+		      (if (and limit offset)
+			  (format #f "~a LIMIT ~a OFFSET ~a"
+				  base-sql limit offset)
+			  base-sql)))
+               (when metadata?
+                 #,@(let ((table (symbol->string (syntax->datum #'primary-table)))
+                          (subject-type (any (lambda (predicate)
+                                               (syntax-case predicate (rdf:type)
+                                                 ((_ rdf:type type) #'type)
+                                                 (_ #f)))
+                                             #'(predicate-clauses ...))))
+                      (map (lambda (predicate-clause)
+                             (syntax-case predicate-clause ()
+                               ((_ predicate _)
+                                ;; Dump metadata about the transform itself.
+                                #`(begin
+                                    (scm->triples
+                                     (map-alist '()
+	        		       (set rdf:type 'gn-id:transform)
+	        		       (set gn-term:createsPredicate 'predicate)
+	        		       (filter-set gn-term:forSubjectType #,subject-type)
+	        		       (multiset gn-term:dependsOn
+	        			         '#,(map (lambda (field)
+	        					   (match (syntax->datum field)
+	        					     ((table-name column-name _ ...)
+	        					      (datum->syntax
+	        					       x (column-id (symbol->string table-name)
+	        							    (symbol->string column-name))))
+	        					     (((query alias))
+	        					      (datum->syntax
+	        					       x (column-id query (symbol->string alias))))))
+	        				         (collect-fields predicate-clause))))
+                                     #,(id table (syntax->datum #'predicate)))
+                                    ;; Automatically create domain triples
+                                    ;; for predicates.
+                                    (when #,subject-type
+                                      (triple 'predicate 'rdfs:domain #,subject-type))))
+                               (_ (error "Invalid predicate clause:" predicate-clause))))
+                           #'(predicate-clauses ...))))
+               (when documentation?
+                 (format #t "~%## '~a'~%~%" (syntax->datum #'name))
+                 #,(syntax-case #'schema-triples-clause (schema-triples)
+                     ((schema-triples (triple-subject triple-predicate triple-object) ...)
+                      #`(begin
+                          (when (not (list 'triple-subject ...))
+                            (format #t "## Schema Triples:~%~%```text~%")
+                            (for-each (lambda (s p o)
+                                        (format #t "~a -> ~a -> ~a~%" s p o))
+                                      (list 'triple-subject ...)
+                                      (list 'triple-predicate ...)
+                                      (list 'triple-object ...))
+                            (format #t "```"))))
+                     (_ (error "Invalid schema triples clause:" #'schema-triples-clause)))
+                 (format #t "## Generated Triples:
 
 The following SQL query was executed:
 
@@ -481,67 +492,64 @@ The above query results to triples that have the form:
 
 ```text
 "
-                       (select-query #,(collect-fields #'(subject predicate-clauses ...))
-                                     (primary-table other-tables ...)
-                                     tables-raw ...))
-               (for-each (match-lambda
-                           ((predicate . object)
-                            (format #t "~a -> ~a -> ~a ~%"
-                                    (if (symbol? #,(field->datum #'subject))
-                                        (symbol->string #,(field->datum #'subject))
-                                        #,(field->datum #'subject))
-                                    predicate
-                                    (if (symbol? object)
-                                        (symbol->string object)
-                                        object))))
-                         (map-alist
-                             '()
-                           #,@(field->datum #'(predicate-clauses ...))))
-               (format #t "```~%Here's an example query:~%~%```sparql~%")
-               (documentation?)
-               (newline)
-               (let* ((result
-                       (map-alist (sql-find
-                                   db
-                                   (format #f "~a LIMIT 1"
-                                           (select-query #,(collect-fields #'(subject predicate-clauses ...))
-                                                         (primary-table other-tables ...)
-                                                         tables-raw ...)))
-        		 #,@(field->key #'(predicate-clauses ...))))
-                      (first-n (list-head result
-                                          (let ((n
-                                                 (min 4 (truncate
-                                                         (+ (exact-integer-sqrt (length result)) 1)))))
-                                            (if (< n 3)
-                                                (length result)
-                                                n)))))
-                 (format #t "SELECT * WHERE { ~%")
+                         (select-query #,(collect-fields #'(subject predicate-clauses ...))
+                                       (primary-table other-tables ...)
+                                       tables-raw ...))
                  (for-each (match-lambda
                              ((predicate . object)
-                              (match object
-                                        ((or (?  symbol? object)
-                                             (?  (lambda (el) (string-match "^\\[ .* \\]$" el)) object))
-                                         (format #t "    ?s ~a ~a .~%" predicate object))
-                                        ((and (? string? object)
-                                              (? (lambda (el) (not (string-null? el))) object))
-                                         (format #t "    ?s ~a \"~a\" .~%" predicate object))
-                                        (_ ""))))
-                           first-n)
-                 (format #t "    ?s ?p ?o .~%}~%```~%"))
-               (format #t "~%Expected Result:~%~%```rdf~%")
-               (sql-for-each (lambda (row)
-                               (scm->triples
-                                (map-alist row #,@(field->key #'(predicate-clauses ...)))
-                                #,(field->assoc-ref #'row #'subject)
-                                (lambda (s p o)
-                                  (triple s p o))))
-                             db
-                             (format #f "~a LIMIT 1"
-                                     (select-query #,(collect-fields #'(subject predicate-clauses ...))
-                                                   (primary-table other-tables ...)
-                                                   tables-raw ...)))
-               (format #t "```~%~%"))
-             (when data?
+                              (format #t "~a -> ~a -> ~a ~%"
+                                      (if (symbol? #,(field->datum #'subject))
+                                          (symbol->string #,(field->datum #'subject))
+                                          #,(field->datum #'subject))
+                                      predicate
+                                      (if (symbol? object)
+                                          (symbol->string object)
+                                          object))))
+                           (map-alist
+                               '()
+                             #,@(field->datum #'(predicate-clauses ...))))
+                 (format #t "```~%Here's an example query:~%~%```sparql~%")
+                 (documentation?)
+                 (newline)
+                 (let* ((result
+                         (map-alist (sql-find
+                                     db
+                                     (format #f "~a LIMIT 1"
+                                             (select-query #,(collect-fields #'(subject predicate-clauses ...))
+                                                           (primary-table other-tables ...)
+                                                           tables-raw ...)))
+        		   #,@(field->key #'(predicate-clauses ...))))
+                        (first-n (list-head result
+                                            (let ((n
+                                                   (min 4 (truncate
+                                                           (+ (exact-integer-sqrt (length result)) 1)))))
+                                              (if (< n 3)
+                                                  (length result)
+                                                  n)))))
+                   (format #t "SELECT * WHERE { ~%")
+                   (for-each (match-lambda
+                               ((predicate . object)
+                                (match object
+                                  ((or (?  symbol? object)
+                                       (?  (lambda (el) (string-match "^\\[ .* \\]$" el)) object))
+                                   (format #t "    ?s ~a ~a .~%" predicate object))
+                                  ((and (? string? object)
+                                        (? (lambda (el) (not (string-null? el))) object))
+                                   (format #t "    ?s ~a \"~a\" .~%" predicate object))
+                                  (_ ""))))
+                             first-n)
+                   (format #t "    ?s ?p ?o .~%}~%```~%"))
+                 (format #t "~%Expected Result:~%~%```rdf~%")
+                 (sql-for-each (lambda (row)
+                                 (scm->triples
+                                  (map-alist row #,@(field->key #'(predicate-clauses ...)))
+                                  #,(field->assoc-ref #'row #'subject)
+                                  (lambda (s p o)
+                                    (triple s p o))))
+                               db
+                               (format #f "~a LIMIT 1" base-sql))
+                 (format #t "```~%~%"))
+               (when data?
                #,(syntax-case #'schema-triples-clause (schema-triples)
                    ((schema-triples (triple-subject triple-predicate triple-object) ...)
                     #`(for-each triple
@@ -549,16 +557,14 @@ The above query results to triples that have the form:
                                 (list 'triple-predicate ...)
                                 (list 'triple-object ...)))
                    (_ (error "Invalid schema triples clause:" #'schema-triples-clause)))
-	       (sql-for-each  (lambda (row)
-                                (let* ((subject-val #,(field->assoc-ref #'row #'subject))
-                                       (po-alist
-                                        (map-alist row #,@(field->key #'(predicate-clauses ...)))))
-                                  (emit-short-turtle subject-val po-alist)))
-                             db
-                             (select-query #,(collect-fields #'(subject predicate-clauses ...))
-                                           (primary-table other-tables ...)
-                                           tables-raw ...)))
-             )))
+	       (sql-for-each
+		  (lambda (row)
+                    (let* ((subject-val #,(field->assoc-ref #'row #'subject))
+                           (po-alist
+                            (map-alist row #,@(field->key #'(predicate-clauses ...)))))
+                      (emit-short-turtle subject-val po-alist)))
+                  db
+                  sql))))))
       (_ (error "Invalid define-transformer syntax:" (syntax->datum x))))))
 
 (define (get-keyword-value args keyword default)
@@ -577,8 +583,14 @@ The above query results to triples that have the form:
             (prefixes (assoc-ref alist 'prefixes))
             (inputs (assoc-ref alist 'inputs))
             (outputs (assoc-ref alist 'outputs))
-            (rdf-path (get-keyword-value outputs #:rdf ""))
-            (doc-path (get-keyword-value outputs #:documentation "")))
+            (total-rows (assoc-ref alist 'total-rows))
+            (rows-per-chunk (assoc-ref alist 'rows-per-chunk))
+            (chunking? (and total-rows rows-per-chunk))
+            (chunks (if chunking?
+                        (ceiling (/ total-rows rows-per-chunk))
+                        1))
+            (rdf-path (get-keyword-value outputs #:rdf #f))
+            (doc-path (get-keyword-value outputs #:documentation #f)))
        (call-with-target-database
         connection
         (lambda (db)
@@ -604,20 +616,29 @@ The above query results to triples that have the form:
 
           ;; Dumping the actual data
           (when rdf-path
-            (with-output-to-file
-                rdf-path
-              (lambda ()
-                ;; Add the prefixes
-                (for-each
-                 (match-lambda
-                   ((k v)
-                    (begin
-                      (prefix k v))))
-                 prefixes)
-                (newline)
-                (for-each
-                 (lambda (proc)
-                   (proc db #:metadata? table-metadata?))
-                 inputs))
-              #:encoding "UTF-8"))))))))
+            (do ((i 0 (+ i 1)))
+                ((>= i chunks))
+              (let* ((offset (* i (or rows-per-chunk 0)))
+                     (out-file
+                      (if (= chunks 1)
+                          rdf-path
+                          (string-append rdf-path "." (number->string (+ i 1)) ".ttl"))))
+                (with-output-to-file
+                    out-file
+                  (lambda ()
+                    ;; Add the prefixes
+                    (for-each
+                     (match-lambda
+                       ((k v)
+                        (begin
+                          (prefix k v))))
+                     prefixes)
+                    (newline)
+                    (for-each
+                     (lambda (proc)
+                       (proc db #:metadata? table-metadata?
+                             #:limit rows-per-chunk
+                             #:offset offset))
+                     inputs))
+                  #:encoding "UTF-8"))))))))))