diff options
| author | Munyoki Kilyungi | 2026-02-10 09:41:51 +0300 |
|---|---|---|
| committer | Munyoki Kilyungi | 2026-02-10 09:41:51 +0300 |
| commit | 9dd449a2615fd18968c2dc84142f809241b498ad (patch) | |
| tree | bce49bd55d05c43c4955144f3a89150f563800dd | |
| parent | 576ace2d3155411c42d483d0091efb3ca3386b17 (diff) | |
| download | gn-transform-databases-9dd449a2615fd18968c2dc84142f809241b498ad.tar.gz | |
Give "with-documentation" & "define-transformer" chunking ability.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
| -rw-r--r-- | transform/special-forms.scm | 297 |
1 files changed, 159 insertions, 138 deletions
diff --git a/transform/special-forms.scm b/transform/special-forms.scm index 425061d..d8abf2c 100644 --- a/transform/special-forms.scm +++ b/transform/special-forms.scm @@ -419,57 +419,68 @@ must be remedied." #`(define* (name db #:key (metadata? #f) (data? #t) - (documentation? #f)) - (when metadata? - #,@(let ((table (symbol->string (syntax->datum #'primary-table))) - (subject-type (any (lambda (predicate) - (syntax-case predicate (rdf:type) - ((_ rdf:type type) #'type) - (_ #f))) - #'(predicate-clauses ...)))) - (map (lambda (predicate-clause) - (syntax-case predicate-clause () - ((_ predicate _) - ;; Dump metadata about the transform itself. - #`(begin - (scm->triples - (map-alist '() - (set rdf:type 'gn-id:transform) - (set gn-term:createsPredicate 'predicate) - (filter-set gn-term:forSubjectType #,subject-type) - (multiset gn-term:dependsOn - '#,(map (lambda (field) - (match (syntax->datum field) - ((table-name column-name _ ...) - (datum->syntax - x (column-id (symbol->string table-name) - (symbol->string column-name)))) - (((query alias)) - (datum->syntax - x (column-id query (symbol->string alias)))))) - (collect-fields predicate-clause)))) - #,(id table (syntax->datum #'predicate))) - ;; Automatically create domain triples - ;; for predicates. - (when #,subject-type - (triple 'predicate 'rdfs:domain #,subject-type)))) - (_ (error "Invalid predicate clause:" predicate-clause)))) - #'(predicate-clauses ...)))) - (when documentation? - (format #t "~%## '~a'~%~%" (syntax->datum #'name)) - #,(syntax-case #'schema-triples-clause (schema-triples) - ((schema-triples (triple-subject triple-predicate triple-object) ...) - #`(begin - (when (not (list 'triple-subject ...)) - (format #t "## Schema Triples:~%~%```text~%") - (for-each (lambda (s p o) - (format #t "~a -> ~a -> ~a~%" s p o)) - (list 'triple-subject ...) - (list 'triple-predicate ...) - (list 'triple-object ...)) - (format #t "```")))) - (_ (error "Invalid schema triples clause:" #'schema-triples-clause))) - (format #t "## Generated Triples: + (documentation? #f) + (limit #f) + (offset #f)) + (let* ((base-sql + (select-query #,(collect-fields #'(subject predicate-clauses ...)) + (primary-table other-tables ...) + tables-raw ...)) + (sql + (if (and limit offset) + (format #f "~a LIMIT ~a OFFSET ~a" + base-sql limit offset) + base-sql))) + (when metadata? + #,@(let ((table (symbol->string (syntax->datum #'primary-table))) + (subject-type (any (lambda (predicate) + (syntax-case predicate (rdf:type) + ((_ rdf:type type) #'type) + (_ #f))) + #'(predicate-clauses ...)))) + (map (lambda (predicate-clause) + (syntax-case predicate-clause () + ((_ predicate _) + ;; Dump metadata about the transform itself. + #`(begin + (scm->triples + (map-alist '() + (set rdf:type 'gn-id:transform) + (set gn-term:createsPredicate 'predicate) + (filter-set gn-term:forSubjectType #,subject-type) + (multiset gn-term:dependsOn + '#,(map (lambda (field) + (match (syntax->datum field) + ((table-name column-name _ ...) + (datum->syntax + x (column-id (symbol->string table-name) + (symbol->string column-name)))) + (((query alias)) + (datum->syntax + x (column-id query (symbol->string alias)))))) + (collect-fields predicate-clause)))) + #,(id table (syntax->datum #'predicate))) + ;; Automatically create domain triples + ;; for predicates. + (when #,subject-type + (triple 'predicate 'rdfs:domain #,subject-type)))) + (_ (error "Invalid predicate clause:" predicate-clause)))) + #'(predicate-clauses ...)))) + (when documentation? + (format #t "~%## '~a'~%~%" (syntax->datum #'name)) + #,(syntax-case #'schema-triples-clause (schema-triples) + ((schema-triples (triple-subject triple-predicate triple-object) ...) + #`(begin + (when (not (list 'triple-subject ...)) + (format #t "## Schema Triples:~%~%```text~%") + (for-each (lambda (s p o) + (format #t "~a -> ~a -> ~a~%" s p o)) + (list 'triple-subject ...) + (list 'triple-predicate ...) + (list 'triple-object ...)) + (format #t "```")))) + (_ (error "Invalid schema triples clause:" #'schema-triples-clause))) + (format #t "## Generated Triples: The following SQL query was executed: @@ -481,67 +492,64 @@ The above query results to triples that have the form: ```text " - (select-query #,(collect-fields #'(subject predicate-clauses ...)) - (primary-table other-tables ...) - tables-raw ...)) - (for-each (match-lambda - ((predicate . object) - (format #t "~a -> ~a -> ~a ~%" - (if (symbol? #,(field->datum #'subject)) - (symbol->string #,(field->datum #'subject)) - #,(field->datum #'subject)) - predicate - (if (symbol? object) - (symbol->string object) - object)))) - (map-alist - '() - #,@(field->datum #'(predicate-clauses ...)))) - (format #t "```~%Here's an example query:~%~%```sparql~%") - (documentation?) - (newline) - (let* ((result - (map-alist (sql-find - db - (format #f "~a LIMIT 1" - (select-query #,(collect-fields #'(subject predicate-clauses ...)) - (primary-table other-tables ...) - tables-raw ...))) - #,@(field->key #'(predicate-clauses ...)))) - (first-n (list-head result - (let ((n - (min 4 (truncate - (+ (exact-integer-sqrt (length result)) 1))))) - (if (< n 3) - (length result) - n))))) - (format #t "SELECT * WHERE { ~%") + (select-query #,(collect-fields #'(subject predicate-clauses ...)) + (primary-table other-tables ...) + tables-raw ...)) (for-each (match-lambda ((predicate . object) - (match object - ((or (? symbol? object) - (? (lambda (el) (string-match "^\\[ .* \\]$" el)) object)) - (format #t " ?s ~a ~a .~%" predicate object)) - ((and (? string? object) - (? (lambda (el) (not (string-null? el))) object)) - (format #t " ?s ~a \"~a\" .~%" predicate object)) - (_ "")))) - first-n) - (format #t " ?s ?p ?o .~%}~%```~%")) - (format #t "~%Expected Result:~%~%```rdf~%") - (sql-for-each (lambda (row) - (scm->triples - (map-alist row #,@(field->key #'(predicate-clauses ...))) - #,(field->assoc-ref #'row #'subject) - (lambda (s p o) - (triple s p o)))) - db - (format #f "~a LIMIT 1" - (select-query #,(collect-fields #'(subject predicate-clauses ...)) - (primary-table other-tables ...) - tables-raw ...))) - (format #t "```~%~%")) - (when data? + (format #t "~a -> ~a -> ~a ~%" + (if (symbol? #,(field->datum #'subject)) + (symbol->string #,(field->datum #'subject)) + #,(field->datum #'subject)) + predicate + (if (symbol? object) + (symbol->string object) + object)))) + (map-alist + '() + #,@(field->datum #'(predicate-clauses ...)))) + (format #t "```~%Here's an example query:~%~%```sparql~%") + (documentation?) + (newline) + (let* ((result + (map-alist (sql-find + db + (format #f "~a LIMIT 1" + (select-query #,(collect-fields #'(subject predicate-clauses ...)) + (primary-table other-tables ...) + tables-raw ...))) + #,@(field->key #'(predicate-clauses ...)))) + (first-n (list-head result + (let ((n + (min 4 (truncate + (+ (exact-integer-sqrt (length result)) 1))))) + (if (< n 3) + (length result) + n))))) + (format #t "SELECT * WHERE { ~%") + (for-each (match-lambda + ((predicate . object) + (match object + ((or (? symbol? object) + (? (lambda (el) (string-match "^\\[ .* \\]$" el)) object)) + (format #t " ?s ~a ~a .~%" predicate object)) + ((and (? string? object) + (? (lambda (el) (not (string-null? el))) object)) + (format #t " ?s ~a \"~a\" .~%" predicate object)) + (_ "")))) + first-n) + (format #t " ?s ?p ?o .~%}~%```~%")) + (format #t "~%Expected Result:~%~%```rdf~%") + (sql-for-each (lambda (row) + (scm->triples + (map-alist row #,@(field->key #'(predicate-clauses ...))) + #,(field->assoc-ref #'row #'subject) + (lambda (s p o) + (triple s p o)))) + db + (format #f "~a LIMIT 1" base-sql)) + (format #t "```~%~%")) + (when data? #,(syntax-case #'schema-triples-clause (schema-triples) ((schema-triples (triple-subject triple-predicate triple-object) ...) #`(for-each triple @@ -549,16 +557,14 @@ The above query results to triples that have the form: (list 'triple-predicate ...) (list 'triple-object ...))) (_ (error "Invalid schema triples clause:" #'schema-triples-clause))) - (sql-for-each (lambda (row) - (let* ((subject-val #,(field->assoc-ref #'row #'subject)) - (po-alist - (map-alist row #,@(field->key #'(predicate-clauses ...))))) - (emit-short-turtle subject-val po-alist))) - db - (select-query #,(collect-fields #'(subject predicate-clauses ...)) - (primary-table other-tables ...) - tables-raw ...))) - ))) + (sql-for-each + (lambda (row) + (let* ((subject-val #,(field->assoc-ref #'row #'subject)) + (po-alist + (map-alist row #,@(field->key #'(predicate-clauses ...))))) + (emit-short-turtle subject-val po-alist))) + db + sql)))))) (_ (error "Invalid define-transformer syntax:" (syntax->datum x)))))) (define (get-keyword-value args keyword default) @@ -577,8 +583,14 @@ The above query results to triples that have the form: (prefixes (assoc-ref alist 'prefixes)) (inputs (assoc-ref alist 'inputs)) (outputs (assoc-ref alist 'outputs)) - (rdf-path (get-keyword-value outputs #:rdf "")) - (doc-path (get-keyword-value outputs #:documentation ""))) + (total-rows (assoc-ref alist 'total-rows)) + (rows-per-chunk (assoc-ref alist 'rows-per-chunk)) + (chunking? (and total-rows rows-per-chunk)) + (chunks (if chunking? + (ceiling (/ total-rows rows-per-chunk)) + 1)) + (rdf-path (get-keyword-value outputs #:rdf #f)) + (doc-path (get-keyword-value outputs #:documentation #f))) (call-with-target-database connection (lambda (db) @@ -604,20 +616,29 @@ The above query results to triples that have the form: ;; Dumping the actual data (when rdf-path - (with-output-to-file - rdf-path - (lambda () - ;; Add the prefixes - (for-each - (match-lambda - ((k v) - (begin - (prefix k v)))) - prefixes) - (newline) - (for-each - (lambda (proc) - (proc db #:metadata? table-metadata?)) - inputs)) - #:encoding "UTF-8")))))))) + (do ((i 0 (+ i 1))) + ((>= i chunks)) + (let* ((offset (* i (or rows-per-chunk 0))) + (out-file + (if (= chunks 1) + rdf-path + (string-append rdf-path "." (number->string (+ i 1)) ".ttl")))) + (with-output-to-file + out-file + (lambda () + ;; Add the prefixes + (for-each + (match-lambda + ((k v) + (begin + (prefix k v)))) + prefixes) + (newline) + (for-each + (lambda (proc) + (proc db #:metadata? table-metadata? + #:limit rows-per-chunk + #:offset offset)) + inputs)) + #:encoding "UTF-8")))))))))) |
