diff options
Diffstat (limited to 'transform/special-forms.scm')
| -rw-r--r-- | transform/special-forms.scm | 356 |
1 files changed, 195 insertions, 161 deletions
diff --git a/transform/special-forms.scm b/transform/special-forms.scm index 8de4966..0c07a0a 100644 --- a/transform/special-forms.scm +++ b/transform/special-forms.scm @@ -6,6 +6,7 @@ #:use-module (transform sql) #:use-module (transform table) #:use-module (transform triples) + #:use-module (transform strings) #:export (translate-forms collect-forms collect-keys @@ -22,36 +23,47 @@ emit-short-turtle define-transformer)) +(define (emittable-object? o) + (cond + ((null? o) #f) + ((not o) #f) + ((and (string? o) (string-blank? o)) #f) + (else #t))) + (define (emit-short-turtle subject po-alist) (let loop ((pairs po-alist) (first? #t)) (match pairs (((p . o) rest ...) - ;; subject only on first line - (when first? - (format #t "~a " subject)) - (when (not first?) - (format #t "\t")) ; indent following lines - - (match o - ((? symbol?) - (format #t "~a ~a" p (symbol->string o))) - ((or (? (lambda (el) (and (string? el) - (string-match "^\\(.*\\)$" el)))) - (? (lambda (el) (and (string? el) - (string-match "^\\[.*\\]$" el))))) - (format #t "~a ~s" p o)) - (_ - (format #t "~a \"~a\"" p o))) - - (if (null? rest) - (format #t " .~%") ; last triple - (format #t " ;~%")) ; continuation - - (loop rest #f)) - + (if (not (emittable-object? o)) + (loop rest first?) ; skip malformed or empty objects + (begin + ;; subject only once + (when first? + (format #t "~a " subject)) + (when (not first?) + (format #t "\t")) + + ;; emit predicate–object + (match o + ((? symbol?) + (format #t "~a ~a" p (symbol->string o))) + ((? string?) + (format #t "~a \"~a\"" p o)) + (_ + (format #t "~a ~s" p o))) + + ;; separator depends on *remaining emittable pairs* + (if (any (match-lambda + ((p . o) (emittable-object? o))) + rest) + (format #t " ;~%") + (format #t " .~%")) + + (loop rest #f)))) (() #f)))) + (define (key->assoc-ref alist x) "Recursively translate (key k) forms in source X to (assoc-ref ALIST k) forms." @@ -407,57 +419,68 @@ must be remedied." #`(define* (name db #:key (metadata? #f) (data? #t) - (documentation? #f)) - (when metadata? - #,@(let ((table (symbol->string (syntax->datum #'primary-table))) - (subject-type (any (lambda (predicate) - (syntax-case predicate (rdf:type) - ((_ rdf:type type) #'type) - (_ #f))) - #'(predicate-clauses ...)))) - (map (lambda (predicate-clause) - (syntax-case predicate-clause () - ((_ predicate _) - ;; Dump metadata about the transform itself. - #`(begin - (scm->triples - (map-alist '() - (set rdf:type 'gn-id:transform) - (set gn-term:createsPredicate 'predicate) - (filter-set gn-term:forSubjectType #,subject-type) - (multiset gn-term:dependsOn - '#,(map (lambda (field) - (match (syntax->datum field) - ((table-name column-name _ ...) - (datum->syntax - x (column-id (symbol->string table-name) - (symbol->string column-name)))) - (((query alias)) - (datum->syntax - x (column-id query (symbol->string alias)))))) - (collect-fields predicate-clause)))) - #,(id table (syntax->datum #'predicate))) - ;; Automatically create domain triples - ;; for predicates. - (when #,subject-type - (triple 'predicate 'rdfs:domain #,subject-type)))) - (_ (error "Invalid predicate clause:" predicate-clause)))) - #'(predicate-clauses ...)))) - (when documentation? - (format #t "~%## '~a'~%~%" (syntax->datum #'name)) - #,(syntax-case #'schema-triples-clause (schema-triples) - ((schema-triples (triple-subject triple-predicate triple-object) ...) - #`(begin - (when (not (list 'triple-subject ...)) - (format #t "## Schema Triples:~%~%```text~%") - (for-each (lambda (s p o) - (format #t "~a -> ~a -> ~a~%" s p o)) - (list 'triple-subject ...) - (list 'triple-predicate ...) - (list 'triple-object ...)) - (format #t "```")))) - (_ (error "Invalid schema triples clause:" #'schema-triples-clause))) - (format #t "## Generated Triples: + (documentation? #f) + (limit #f) + (offset #f)) + (let* ((base-sql + (select-query #,(collect-fields #'(subject predicate-clauses ...)) + (primary-table other-tables ...) + tables-raw ...)) + (sql + (if (and limit offset) + (format #f "~a LIMIT ~a OFFSET ~a" + base-sql limit offset) + base-sql))) + (when metadata? + #,@(let ((table (symbol->string (syntax->datum #'primary-table))) + (subject-type (any (lambda (predicate) + (syntax-case predicate (rdf:type) + ((_ rdf:type type) #'type) + (_ #f))) + #'(predicate-clauses ...)))) + (map (lambda (predicate-clause) + (syntax-case predicate-clause () + ((_ predicate _) + ;; Dump metadata about the transform itself. + #`(begin + (scm->triples + (map-alist '() + (set rdf:type 'gn-id:transform) + (set gn-term:createsPredicate 'predicate) + (filter-set gn-term:forSubjectType #,subject-type) + (multiset gn-term:dependsOn + '#,(map (lambda (field) + (match (syntax->datum field) + ((table-name column-name _ ...) + (datum->syntax + x (column-id (symbol->string table-name) + (symbol->string column-name)))) + (((query alias)) + (datum->syntax + x (column-id query (symbol->string alias)))))) + (collect-fields predicate-clause)))) + #,(id table (syntax->datum #'predicate))) + ;; Automatically create domain triples + ;; for predicates. + (when #,subject-type + (triple 'predicate 'rdfs:domain #,subject-type)))) + (_ (error "Invalid predicate clause:" predicate-clause)))) + #'(predicate-clauses ...)))) + (when documentation? + (format #t "~%## '~a'~%~%" (syntax->datum #'name)) + #,(syntax-case #'schema-triples-clause (schema-triples) + ((schema-triples (triple-subject triple-predicate triple-object) ...) + #`(begin + (when (not (list 'triple-subject ...)) + (format #t "## Schema Triples:~%~%```text~%") + (for-each (lambda (s p o) + (format #t "~a -> ~a -> ~a~%" s p o)) + (list 'triple-subject ...) + (list 'triple-predicate ...) + (list 'triple-object ...)) + (format #t "```")))) + (_ (error "Invalid schema triples clause:" #'schema-triples-clause))) + (format #t "## Generated Triples: The following SQL query was executed: @@ -469,67 +492,64 @@ The above query results to triples that have the form: ```text " - (select-query #,(collect-fields #'(subject predicate-clauses ...)) - (primary-table other-tables ...) - tables-raw ...)) - (for-each (match-lambda - ((predicate . object) - (format #t "~a -> ~a -> ~a ~%" - (if (symbol? #,(field->datum #'subject)) - (symbol->string #,(field->datum #'subject)) - #,(field->datum #'subject)) - predicate - (if (symbol? object) - (symbol->string object) - object)))) - (map-alist - '() - #,@(field->datum #'(predicate-clauses ...)))) - (format #t "```~%Here's an example query:~%~%```sparql~%") - (documentation?) - (newline) - (let* ((result - (map-alist (sql-find - db - (format #f "~a LIMIT 1" - (select-query #,(collect-fields #'(subject predicate-clauses ...)) - (primary-table other-tables ...) - tables-raw ...))) - #,@(field->key #'(predicate-clauses ...)))) - (first-n (list-head result - (let ((n - (min 4 (truncate - (+ (exact-integer-sqrt (length result)) 1))))) - (if (< n 3) - (length result) - n))))) - (format #t "SELECT * WHERE { ~%") + (select-query #,(collect-fields #'(subject predicate-clauses ...)) + (primary-table other-tables ...) + tables-raw ...)) (for-each (match-lambda ((predicate . object) - (match object - ((or (? symbol? object) - (? (lambda (el) (string-match "^\\[ .* \\]$" el)) object)) - (format #t " ?s ~a ~a .~%" predicate object)) - ((and (? string? object) - (? (lambda (el) (not (string-null? el))) object)) - (format #t " ?s ~a \"~a\" .~%" predicate object)) - (_ "")))) - first-n) - (format #t " ?s ?p ?o .~%}~%```~%")) - (format #t "~%Expected Result:~%~%```rdf~%") - (sql-for-each (lambda (row) - (scm->triples - (map-alist row #,@(field->key #'(predicate-clauses ...))) - #,(field->assoc-ref #'row #'subject) - (lambda (s p o) - (triple s p o)))) - db - (format #f "~a LIMIT 1" - (select-query #,(collect-fields #'(subject predicate-clauses ...)) - (primary-table other-tables ...) - tables-raw ...))) - (format #t "```~%~%")) - (when data? + (format #t "~a -> ~a -> ~a ~%" + (if (symbol? #,(field->datum #'subject)) + (symbol->string #,(field->datum #'subject)) + #,(field->datum #'subject)) + predicate + (if (symbol? object) + (symbol->string object) + object)))) + (map-alist + '() + #,@(field->datum #'(predicate-clauses ...)))) + (format #t "```~%Here's an example query:~%~%```sparql~%") + (documentation?) + (newline) + (let* ((result + (map-alist (sql-find + db + (format #f "~a LIMIT 1" + (select-query #,(collect-fields #'(subject predicate-clauses ...)) + (primary-table other-tables ...) + tables-raw ...))) + #,@(field->key #'(predicate-clauses ...)))) + (first-n (list-head result + (let ((n + (min 4 (truncate + (+ (exact-integer-sqrt (length result)) 1))))) + (if (< n 3) + (length result) + n))))) + (format #t "SELECT * WHERE { ~%") + (for-each (match-lambda + ((predicate . object) + (match object + ((or (? symbol? object) + (? (lambda (el) (string-match "^\\[ .* \\]$" el)) object)) + (format #t " ?s ~a ~a .~%" predicate object)) + ((and (? string? object) + (? (lambda (el) (not (string-null? el))) object)) + (format #t " ?s ~a \"~a\" .~%" predicate object)) + (_ "")))) + first-n) + (format #t " ?s ?p ?o .~%}~%```~%")) + (format #t "~%Expected Result:~%~%```rdf~%") + (sql-for-each (lambda (row) + (scm->triples + (map-alist row #,@(field->key #'(predicate-clauses ...))) + #,(field->assoc-ref #'row #'subject) + (lambda (s p o) + (triple s p o)))) + db + (format #f "~a LIMIT 1" base-sql)) + (format #t "```~%~%")) + (when data? #,(syntax-case #'schema-triples-clause (schema-triples) ((schema-triples (triple-subject triple-predicate triple-object) ...) #`(for-each triple @@ -537,16 +557,14 @@ The above query results to triples that have the form: (list 'triple-predicate ...) (list 'triple-object ...))) (_ (error "Invalid schema triples clause:" #'schema-triples-clause))) - (sql-for-each (lambda (row) - (let* ((subject-val #,(field->assoc-ref #'row #'subject)) - (po-alist - (map-alist row #,@(field->key #'(predicate-clauses ...))))) - (emit-short-turtle subject-val po-alist))) - db - (select-query #,(collect-fields #'(subject predicate-clauses ...)) - (primary-table other-tables ...) - tables-raw ...))) - ))) + (sql-for-each + (lambda (row) + (let* ((subject-val #,(field->assoc-ref #'row #'subject)) + (po-alist + (map-alist row #,@(field->key #'(predicate-clauses ...))))) + (emit-short-turtle subject-val po-alist))) + db + sql)))))) (_ (error "Invalid define-transformer syntax:" (syntax->datum x)))))) (define (get-keyword-value args keyword default) @@ -565,8 +583,14 @@ The above query results to triples that have the form: (prefixes (assoc-ref alist 'prefixes)) (inputs (assoc-ref alist 'inputs)) (outputs (assoc-ref alist 'outputs)) - (rdf-path (get-keyword-value outputs #:rdf "")) - (doc-path (get-keyword-value outputs #:documentation ""))) + (total-rows (assoc-ref alist 'total-rows)) + (rows-per-chunk (assoc-ref alist 'rows-per-chunk)) + (chunking? (and total-rows rows-per-chunk)) + (chunks (if chunking? + (ceiling (/ total-rows rows-per-chunk)) + 1)) + (rdf-path (get-keyword-value outputs #:rdf #f)) + (doc-path (get-keyword-value outputs #:documentation #f))) (call-with-target-database connection (lambda (db) @@ -592,20 +616,30 @@ The above query results to triples that have the form: ;; Dumping the actual data (when rdf-path - (with-output-to-file - rdf-path - (lambda () - ;; Add the prefixes - (for-each - (match-lambda - ((k v) - (begin - (prefix k v)))) - prefixes) - (newline) - (for-each - (lambda (proc) - (proc db #:metadata? table-metadata?)) - inputs)) - #:encoding "UTF-8")))))))) + (do ((i 0 (+ i 1))) + ((>= i chunks)) + (let* ((offset (* i (or rows-per-chunk 0))) + (out-file + (if (= chunks 1) + rdf-path + (string-append (path-without-extension rdf-path) + "." (number->string (+ i 1)) ".ttl")))) + (with-output-to-file + out-file + (lambda () + ;; Add the prefixes + (for-each + (match-lambda + ((k v) + (begin + (prefix k v)))) + prefixes) + (newline) + (for-each + (lambda (proc) + (proc db #:metadata? table-metadata? + #:limit rows-per-chunk + #:offset offset)) + inputs)) + #:encoding "UTF-8")))))))))) |
