about summary refs log tree commit diff
path: root/transform
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-08-21 15:03:20 +0300
committerMunyoki Kilyungi2023-08-21 15:06:06 +0300
commit8e1e4cceab516afab46ccced63ca9edab663ca11 (patch)
treecad625c3ecf0a555d7b56b777cdade535cb30d07 /transform
parent51b3c0548c98e0bc05e11a89cbf6b75d31b9f8d5 (diff)
downloadgn-transform-databases-8e1e4cceab516afab46ccced63ca9edab663ca11.tar.gz
Rename dump -> transform
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'transform')
-rw-r--r--transform/schema-dump.scm125
-rw-r--r--transform/special-forms.scm599
-rw-r--r--transform/sql.scm114
-rw-r--r--transform/string-similarity.scm37
-rw-r--r--transform/strings.scm101
-rw-r--r--transform/table.scm28
-rw-r--r--transform/triples.scm110
7 files changed, 1114 insertions, 0 deletions
diff --git a/transform/schema-dump.scm b/transform/schema-dump.scm
new file mode 100644
index 0000000..bf7f8cb
--- /dev/null
+++ b/transform/schema-dump.scm
@@ -0,0 +1,125 @@
+(define-module (transform schema)
+  #:use-module (ice-9 match)
+  #:use-module (ice-9 srfi-26)
+  #:use-module (transform sql)
+  #:use-module (transform triples)
+  #:use-module (transform strings)
+  #:use-module (transform table))
+
+
+(define (table-fields db table)
+  (format #t "* ~a~%" table)
+  (match (sql-find db
+                   (select-query ((TableComments Comment))
+                                 (TableComments)
+                                 (format #f "WHERE TableName = '~a'" table)))
+    ((("Comment" . comment))
+     (format #t "~a~%" comment)))
+  (sql-for-each (lambda (row)
+                  (match row
+                    ((("TableField" . table-field)
+                      ("Foreign_Key" . foreign-key)
+                      ("Annotation" . annotation))
+                     (format #t "** ~a~%" (substring table-field (1+ (string-length table))))
+                     (unless (string-null? foreign-key)
+                       (format #t "Foreign key to ~a~%" foreign-key))
+                     (unless (string-null? annotation)
+                       (display annotation)
+                       (newline)))))
+                db
+                (select-query ((TableFieldAnnotation TableField)
+                               (TableFieldAnnotation Foreign_Key)
+                               (TableFieldAnnotation Annotation))
+                              (TableFieldAnnotation)
+                              (format #f "WHERE TableField LIKE '~a.%'" table)))
+  (newline))
+
+(define (get-tables-from-comments db)
+  (sql-map (match-lambda
+             ((("TableName" . table)) table))
+           db
+           (select-query ((TableComments TableName))
+                         (TableComments))))
+
+(define (schema-annotations db)
+  (call-with-target-database
+   (lambda (db)
+     (for-each (cut table-fields db <>)
+               (get-tables-from-comments db)))))
+
+(define (tables db)
+  "Return list of all tables in DB. Each element of the returned list
+is a <table> object."
+  (map (lambda (table)
+         (set-table-columns table
+           (sql-map (lambda (row)
+                      (make-column (assoc-ref row "Field")
+                                   (assoc-ref row "Type")))
+                    db
+                    (format #f "SHOW COLUMNS FROM ~a" (table-name table)))))
+       (sql-map (lambda (row)
+                  (make-table (assoc-ref row "table_name")
+                              ;; FIXME: This is probably correct only for
+                              ;; MyISAM tables.
+                              (assoc-ref row "data_length")
+                              #f))
+                db
+                (select-query ((information_schema.tables table_name)
+                               (information_schema.tables data_length))
+                              (information_schema.tables)
+                              (format #f "WHERE table_schema = '~a'"
+                                      (assq-ref %connection-settings 'sql-database))))))
+
+(define (schema db)
+  (let ((tables (tables db)))
+    (for-each (lambda (table)
+                (let ((table-id (string->identifier
+                                 "table"
+                                 ;; We downcase table names in
+                                 ;; identifiers. So, we distinguish
+                                 ;; between the user and User tables.
+                                 (if (string=? (table-name table) "User")
+                                     "user2"
+                                     (table-name table)))))
+                  (triple table-id 'rdf:type 'gn:sqlTable)
+                  (triple table-id 'gn:name (table-name table))
+                  (triple table-id 'gn:hasSize (table-size table))
+                  (for-each (lambda (column)
+                              (let ((column-id (column-id (table-name table)
+                                                          (column-name column))))
+                                (triple column-id 'rdf:type 'gn:sqlTableField)
+                                (triple column-id 'gn:name (column-name column))
+                                (triple column-id 'gn:sqlFieldType (column-type column))
+                                (triple table-id 'gn:hasField column-id)))
+                            (table-columns table))))
+              tables)))
+
+(define* (data-table db table-name data-field
+                          #:optional (default-directory ""))
+  (let ((directory (string-append default-directory "/" table-name))
+        (port #f)
+        (current-strain-id #f))
+    (unless (file-exists? directory)
+      (mkdir directory))
+    (sql-for-each (match-lambda
+                    (((_ . strain-id)
+                      (_ . value))
+                     ;; Close file if new strain.
+                     (when (and port
+                                (not (= current-strain-id strain-id)))
+                       (close-port port)
+                       (set! port #f))
+                     ;; If no file is open, open new file.
+                     (unless port
+                       (set! current-strain-id strain-id)
+                       (let ((filename (string-append directory
+                                                      "/" (number->string strain-id))))
+                         (display filename (current-error-port))
+                         (newline (current-error-port))
+                         (set! port (open-output-file filename))))
+                     (display value port)
+                     (newline port)))
+                  db
+                  (format #f "SELECT StrainId, ~a FROM ~a ORDER BY StrainId"
+                          data-field table-name))
+    (close-port port)))
diff --git a/transform/special-forms.scm b/transform/special-forms.scm
new file mode 100644
index 0000000..4b69337
--- /dev/null
+++ b/transform/special-forms.scm
@@ -0,0 +1,599 @@
+(define-module (transform special-forms)
+  #:use-module (srfi srfi-1)
+  #:use-module (ice-9 match)
+  #:use-module (srfi srfi-26)
+  #:use-module (transform sql)
+  #:use-module (transform table)
+  #:use-module (transform triples)
+  #:export (translate-forms
+            collect-forms
+            collect-keys
+            field->key
+            field->assoc-ref
+            collect-fields
+            find-clause
+            remove-namespace
+            column-id
+            id
+            syntax-let
+            blank-node
+            map-alist
+	    with-documentation
+            define-transformer))
+
+(define (key->assoc-ref alist x)
+  "Recursively translate (key k) forms in source X to (assoc-ref ALIST
+k) forms."
+  (translate-forms 'key
+                   ;; (syntax-rules (key)
+                   ;;   ((key k) (assoc-ref alist k)))
+                   (lambda (x)
+                     (syntax-case x (key)
+                       ((key k) #`(assoc-ref #,alist 'k))))
+                   x))
+
+(define (alist-delete* alist keys)
+  "Delete entries from ALIST whose key is equal (in the sense of
+equal?) to any in KEYS, a list."
+  (remove (match-lambda
+            ((key . value)
+             (member key keys))
+            (x (error "malformed alist element" x)))
+          alist))
+
+(define (translate-forms from translator x)
+  "Recursively pass (FROM ...) forms in source X to TRANSLATOR, and
+replace them with the return value."
+  (syntax-case x ()
+    ;; Handle translation base case.
+    ((head tail ...) (eq? (syntax->datum #'head)
+                          from)
+     (translator x))
+    ;; Recurse.
+    ((head tail ...)
+     (cons (translate-forms from translator #'head)
+           (map (cut translate-forms from translator <>)
+                #'(tail ...))))
+    ;; Handle leaf base case.
+    (leaf #'leaf)))
+
+(define (collect-forms target x)
+  "Recursively collect (TARGET ...) forms in source X and return them
+as a list."
+  (syntax-case x ()
+    ;; Handle collection base case.
+    ((head tail ...) (eq? (syntax->datum #'head)
+                          target)
+     (list x))
+    ;; Recurse.
+    ((head tail ...)
+     (append (collect-forms target #'head)
+             (append-map (cut collect-forms target <>) #'(tail ...))))
+    ;; Handle leaf base case.
+    (leaf (list))))
+
+(define (collect-keys x)
+  "Recursively collect (key k) forms from source X and return as a
+list of all K."
+  (map (syntax-rules (key)
+         ((key k) 'k))
+       (collect-forms 'key x)))
+
+(define-syntax map-alist
+  (lambda (x)
+    "Transform (aka map) ALIST, an association list, into another
+association list. The returned association list may contain multiple
+associations for the same key. equal? is used in all association list
+key comparisons.
+
+Syntax:
+
+(map-alist alist
+  (verb key expression) ...
+  (else=> proc))
+
+VERB must be one of set, filter-set, multiset and remove.
+
+For the set VERB, KEY is set to the result of evaluating
+EXPRESSION. Multiple set verbs on the same key will result in multiple
+associations for that key.
+
+For the filter-set VERB, KEY is set to the result of evaluating
+EXPRESSION only if that result is not #f.
+
+For the multiset VERB, EXPRESSION must return a list and KEY is
+associated multiple times once with each element of the returned list.
+
+For the remove VERB, KEY is discarded.
+
+EXPRESSIONs must reference elements of ALIST using (key k) forms where
+K is the key to be referenced from ALIST. K must not be quoted. That
+is, if K is the symbol 'bar, it must be referenced as (key bar), not
+as (key 'bar).
+
+The else=> clause is optional.
+
+If the else=> clause is present, PROC is passed all pairs of ALIST
+that are not set by an earlier (verb key expression) action. PROC must
+return #f or a pair to replace its input pair. If PROC returns #f,
+that pair is discarded.
+
+If the else=> clause is absent, all unset pairs are discarded.
+
+Example:
+
+(map-alist '((\"foo\" . 1)
+             (bar . 2)
+             (foobar . 5)
+             (fubar . 3))
+  (set spam (1+ (key \"foo\")))
+  (set ham (* 2 (key bar)))
+  (set eggs (* 3 (key bar)))
+  (set aal (+ (key \"foo\")
+              (key bar)))
+  (multiset vel (iota (* 2 (key bar))))
+  (remove foobar)
+  (else=> (match-lambda
+            ((key . value)
+             (cons key (expt 2 value))))))
+=> ((spam . 2) (ham . 4) (eggs . 6) (aal . 3)
+    (vel . 0) (vel . 1) (vel . 2) (vel . 3) (fubar . 8))"
+    (syntax-case x ()
+      ((_ alist actions ...)
+       ;; TODO: Check that all actions are valid.
+       #`(let ((evaluated-alist alist))
+           (append (remove (match-lambda
+                             ;; Filter out results of filter-set actions.
+                             ((key . #f)
+                              (member key '#,(filter-map (lambda (action)
+                                                           (syntax-case action (filter-set)
+                                                             ((filter-set key expression) #'key)
+                                                             (_ #f)))
+                                                         #'(actions ...))))
+                             (_ #f))
+                           ;; Do set and filter-set.
+                           `#,(filter-map (lambda (action)
+                                            (syntax-case action (set filter-set)
+                                              ((set key expression)
+                                               #`(key . ,#,(key->assoc-ref #'evaluated-alist #'expression)))
+                                              ((filter-set key expression)
+                                               #`(key . ,#,(key->assoc-ref #'evaluated-alist #'expression)))
+                                              (_ #f)))
+                                          #'(actions ...)))
+                   ;; Do multiset.
+                   #,@(filter-map (lambda (action)
+                                    (syntax-case action (multiset)
+                                      ((multiset key expression)
+                                       #`(map (cut cons 'key <>)
+                                              #,(key->assoc-ref #'evaluated-alist #'expression)))
+                                      (_ #f)))
+                                  #'(actions ...))
+                   ;; Apply else=> procedure on unspecified keys. If
+                   ;; no else=> procedure is specified, delete
+                   ;; unspecified keys.
+                   (filter-map #,(or (any (lambda (action)
+                                            (syntax-case action (else=>)
+                                              ((else=> proc) #'proc)
+                                              (_ #f)))
+                                          #'(actions ...))
+                                     #'(const #f))
+                               ;; The unspecified part of the input
+                               ;; alist
+                               (alist-delete* evaluated-alist
+                                              (list
+                                               ;; Keys that were referenced
+                                               #,@(append-map (lambda (action)
+                                                                (syntax-case action ()
+                                                                  ((_ key expression)
+                                                                   (collect-keys #'expression))
+                                                                  (_ '())))
+                                                              #'(actions ...))
+                                               ;; Keys that were deleted
+                                               #,@(filter-map (lambda (action)
+                                                                (syntax-case action (remove)
+                                                                  ((remove key) #''key)
+                                                                  (_ #f)))
+                                                              #'(actions ...))
+                                               ;; Keys that were set
+                                               #,@(filter-map (lambda (action)
+                                                                (syntax-case action ()
+                                                                  ((_ key expression) #''key)
+                                                                  (_ #f)))
+                                                              #'(actions ...)))))))))))
+
+
+
+(eval-when (expand load )
+  (define (field->datum x)
+    (translate-forms
+     'field
+     (lambda (x)
+       (syntax-case x (field)
+         ((field (query alias))
+          #`(format #f "~a" (syntax->datum #'alias)))
+         ((field table column)
+          #`(format #f "~a(~a)"
+                    (syntax->datum #'table)
+                    (syntax->datum #'column)))
+         ((field table column alias)
+          #`(format #f "~a(~a)"
+                    (syntax->datum #'table)
+                    (syntax->datum #'alias)))))
+     x))
+
+  (define (field->key x)
+    (translate-forms 'field
+                     (lambda (x)
+                       #`(key #,(symbol->string
+                                 (syntax->datum
+                                  ((syntax-rules (field)
+                                     ((field (query alias)) alias)
+                                     ((field table column) column)
+                                     ((field table column alias) alias))
+                                   x)))))
+                     x))
+
+  (define (field->assoc-ref alist x)
+    "Recursively translate field references in source X to (assoc-ref
+ALIST field-name) forms."
+    (translate-forms 'field
+                     (lambda (x)
+                       #`(assoc-ref #,alist
+                                    #,(symbol->string
+                                       (syntax->datum
+                                        ((syntax-rules (field)
+                                           ((field (query alias)) alias)
+                                           ((field table column) column)
+                                           ((field table column alias) alias))
+                                         x)))))
+                     x))
+
+  (define (collect-fields x)
+    (map (syntax-rules (field)
+           ((field reference ...)
+            (reference ...)))
+         (collect-forms 'field x)))
+
+  (define (find-clause clauses key)
+    "Find (KEY ...) clause among CLAUSES, a list of syntax forms."
+    (find (lambda (x)
+            (syntax-case x ()
+              ((clause-key _ ...)
+               (eq? (syntax->datum #'clause-key)
+                    key))))
+          clauses))
+
+  (define (remove-namespace str)
+    "Remove RDF namespace prefix from STR."
+    (substring str (1+ (string-index str #\:))))
+
+  (define (column-id table-name column-name)
+    (string->identifier
+     "field" (string-append
+              ;; We downcase table names in identifiers. So, we
+              ;; distinguish between the user and User tables.
+              (if (string=? table-name "User")
+                  "user2" table-name)
+              "__" column-name)))
+
+  (define (id table predicate)
+    (symbol->string
+     (string->identifier
+      "transform"
+      (string-append
+       table "_" (remove-namespace (symbol->string predicate)))))))
+
+(define-syntax blank-node
+  (syntax-rules ()
+    "Allow having set and multiset within the context of a blank-node"
+    [(_ (op predicate object) ...)
+     (let [(node (string-join
+                  (filter-map (match-lambda
+                                ((pred . obj)
+                                 (match obj
+                                   ((and (?  string? obj)
+                                         (?  string-null? obj))
+                                    #f)
+                                   ((?  symbol? obj)
+                                    (format #f "~a ~a" pred (symbol->string obj)))
+                                   (_
+                                    (format #f "~a ~s" pred obj)))))
+                              (map-alist '()
+                                (op predicate object) ...))
+                  " ; "))]
+       (if (string-null? node)
+           ""
+           (format #f "[ ~a ]" node)))]))
+
+(define-syntax syntax-let
+  (syntax-rules ()
+    "Like match-let, but for syntax.
+
+(syntax-let ((pattern literals expression))
+  body ...)
+≡
+(syntax-case expression ()
+  (pattern literals
+   body ...))
+
+literals is optional. So,
+
+(syntax-let ((pattern expression))
+  body ...)
+≡
+(syntax-case expression ()
+  (pattern
+   body ...))
+"
+    ((_ () body ...)
+     (begin body ...))
+    ((_ ((pattern expression)
+         bindings ...)
+        body ...)
+     (syntax-case expression ()
+       (pattern
+        (syntax-let (bindings ...)
+          body ...))))
+    ((_ ((pattern literals expression)
+         bindings ...)
+        body ...)
+     (syntax-case expression literals
+       (pattern
+        (syntax-let (bindings ...)
+          body ...))))))
+
+(define-syntax define-transformer
+  (lambda (x)
+    "Define FUNCTION-NAME, a function that transforms a view of the database.
+
+define-transformer consists of three order-agnostic clauses---tables,
+schema-triples and triples---in the form shown below.
+
+(define-transformer function-name
+  (tables (table ...) raw-forms ...)
+  (schema-triples
+   (subject predicate object) ...)
+  (triples subject
+    (verb predicate object) ...))
+
+The tables clause specifies the database tables to be joined to
+construct the view to be transformed. TABLE must be either of the form
+TABLE-NAME or of the form (JOIN-OPERATOR TABLE-NAME
+RAW-CONDITION). TABLE-NAME must, obviously, be the name of the
+table. JOIN-OPERATOR must be one of join, left-join and
+inner-join. RAW-CONDITION should be the join condition as a raw
+string. This is usually something like
+\"USING (SpeciesId)\". RAW-FORMS are expressions that must evaluate to
+strings to be appended to the SQL query.
+
+The schema-triples clause specifies the list of triples to be written
+once when the transform starts.
+
+The triples clause specifies the triples to be transformed once for each
+row in the view. All triples have a common SUBJECT. The (verb
+predicate object) clauses are described below.
+
+VERB must either be set or multiset. For the set VERB, a single triple
+(SUBJECT PREDICATE OBJECT-VALUE) is written where OBJECT-VALUE is the
+result of evaluating OBJECT. For the multiset VERB, OBJECT must
+evaluate to a list, and a triple (SUBJECT PREDICATE
+OBJECT-VALUE-ELEMENT) is created for each element OBJECT-VALUE-ELEMENT
+of that list.
+
+The SUBJECT and OBJECT expressions in the triples clause must
+reference database fields using a (field TABLE COLUMN) clause where
+TABLE and COLUMN refer to the table and column of the field being
+referenced. Database fields can also be referenced using (field TABLE
+COLUMN ALIAS) where ALIAS is an alias for that column in the SQL
+query. Specification of ALIAS is indeed a leak in the abstraction, and
+must be remedied."
+    (syntax-case x (tables schema-triples triples)
+      ((_ name clauses ...)
+       (syntax-let (((tables (primary-table other-tables ...) tables-raw ...) (tables)
+                     (find-clause #'(clauses ...) 'tables))
+                    (schema-triples-clause (or (find-clause #'(clauses ...) 'schema-triples)
+                                               #'(schema-triples)))
+                    ((triples subject predicate-clauses ...) (triples)
+                     (find-clause #'(clauses ...) 'triples)))
+         #`(define* (name db #:key
+                          (metadata? #f)
+                          (data? #t)
+                          (documentation? #f))
+             (when metadata?
+               #,@(let ((table (symbol->string (syntax->datum #'primary-table)))
+                        (subject-type (any (lambda (predicate)
+                                             (syntax-case predicate (rdf:type)
+                                               ((_ rdf:type type) #'type)
+                                               (_ #f)))
+                                           #'(predicate-clauses ...))))
+                    (map (lambda (predicate-clause)
+                           (syntax-case predicate-clause ()
+                             ((_ predicate _)
+                              ;; Dump metadata about the transform itself.
+                              #`(begin
+                                  (scm->triples
+                                   (map-alist '()
+	        		     (set rdf:type 'gn-id:transform)
+	        		     (set gn-term:createsPredicate 'predicate)
+	        		     (filter-set gn-term:forSubjectType #,subject-type)
+	        		     (multiset gn-term:dependsOn
+	        			       '#,(map (lambda (field)
+	        					 (match (syntax->datum field)
+	        					   ((table-name column-name _ ...)
+	        					    (datum->syntax
+	        					     x (column-id (symbol->string table-name)
+	        							  (symbol->string column-name))))
+	        					   (((query alias))
+	        					    (datum->syntax
+	        					     x (column-id query (symbol->string alias))))))
+	        				       (collect-fields predicate-clause))))
+                                   #,(id table (syntax->datum #'predicate)))
+                                  ;; Automatically create domain triples
+                                  ;; for predicates.
+                                  (when #,subject-type
+                                    (triple 'predicate 'rdfs:domain #,subject-type))))
+                             (_ (error "Invalid predicate clause:" predicate-clause))))
+                         #'(predicate-clauses ...))))
+             (when documentation?
+               (format #t "~%## '~a'~%~%" (syntax->datum #'name))
+               #,(syntax-case #'schema-triples-clause (schema-triples)
+                   ((schema-triples (triple-subject triple-predicate triple-object) ...)
+                    #`(begin
+                        (when (not (list 'triple-subject ...))
+                          (format #t "## Schema Triples:~%~%```text~%")
+                          (for-each (lambda (s p o)
+                                      (format #t "~a -> ~a -> ~a~%" s p o))
+                                    (list 'triple-subject ...)
+                                    (list 'triple-predicate ...)
+                                    (list 'triple-object ...))
+                          (format #t "```"))))
+                   (_ (error "Invalid schema triples clause:" #'schema-triples-clause)))
+               (format #t "## Generated Triples:
+
+The following SQL query was executed:
+
+```sql
+~a
+```
+
+The above query results to triples that have the form:
+
+```text
+"
+                       (select-query #,(collect-fields #'(subject predicate-clauses ...))
+                                     (primary-table other-tables ...)
+                                     tables-raw ...))
+               (for-each (match-lambda
+                           ((predicate . object)
+                            (format #t "~a -> ~a -> ~a ~%"
+                                    (if (symbol? #,(field->datum #'subject))
+                                        (symbol->string #,(field->datum #'subject))
+                                        #,(field->datum #'subject))
+                                    predicate
+                                    (if (symbol? object)
+                                        (symbol->string object)
+                                        object))))
+                         (map-alist
+                             '()
+                           #,@(field->datum #'(predicate-clauses ...))))
+               (format #t "```~%Here's an example query:~%~%```sparql~%")
+               (documentation?)
+               (newline)
+               (let* ((result
+                       (map-alist (sql-find
+                                   db
+                                   (format #f "~a LIMIT 1"
+                                           (select-query #,(collect-fields #'(subject predicate-clauses ...))
+                                                         (primary-table other-tables ...)
+                                                         tables-raw ...)))
+        		 #,@(field->key #'(predicate-clauses ...))))
+                      (first-n (list-head result
+                                          (let ((n
+                                                 (min 4 (truncate
+                                                         (+ (exact-integer-sqrt (length result)) 1)))))
+                                            (if (< n 3)
+                                                (length result)
+                                                n)))))
+                 (format #t "SELECT * WHERE { ~%")
+                 (for-each (match-lambda
+                             ((predicate . object)
+                              (match object
+                                        ((or (?  symbol? object)
+                                             (?  (lambda (el) (string-match "^\\[ .* \\]$" el)) object))
+                                         (format #t "    ?s ~a ~a .~%" predicate object))
+                                        ((and (? string? object)
+                                              (? (lambda (el) (not (string-null? el))) object))
+                                         (format #t "    ?s ~a \"~a\" .~%" predicate object))
+                                        (_ ""))))
+                           first-n)
+                 (format #t "    ?s ?p ?o .~%}~%```~%"))
+               (format #t "~%Expected Result:~%~%```rdf~%")
+               (sql-for-each (lambda (row)
+                               (scm->triples
+                                (map-alist row #,@(field->key #'(predicate-clauses ...)))
+                                #,(field->assoc-ref #'row #'subject)
+                                (lambda (s p o)
+                                  (triple s p o))))
+                             db
+                             (format #f "~a LIMIT 1"
+                                     (select-query #,(collect-fields #'(subject predicate-clauses ...))
+                                                   (primary-table other-tables ...)
+                                                   tables-raw ...)))
+               (format #t "```~%~%"))
+             (when data?
+               #,(syntax-case #'schema-triples-clause (schema-triples)
+                   ((schema-triples (triple-subject triple-predicate triple-object) ...)
+                    #`(for-each triple
+                                (list 'triple-subject ...)
+                                (list 'triple-predicate ...)
+                                (list 'triple-object ...)))
+                   (_ (error "Invalid schema triples clause:" #'schema-triples-clause)))
+	       (sql-for-each (lambda (row)
+                               (scm->triples
+                                (map-alist row #,@(field->key #'(predicate-clauses ...)))
+                                #,(field->assoc-ref #'row #'subject)))
+                             db
+                             (select-query #,(collect-fields #'(subject predicate-clauses ...))
+                                           (primary-table other-tables ...)
+                                           tables-raw ...)))
+             )))
+      (_ (error "Invalid define-transformer syntax:" (syntax->datum x))))))
+
+(define (get-keyword-value args keyword default)
+  (let ((kv (memq keyword args)))
+    (if (and kv (>= (length kv) 2))
+        (cadr kv)
+        default)))
+
+(define-syntax with-documentation
+  (syntax-rules ()
+    ((_ (key value) ...)
+     (let* ((alist `((key . ,value) ...))
+            (name (assoc-ref alist 'name))
+            (connection (assoc-ref alist 'connection))
+            (table-metadata? (assoc-ref alist 'table-metadata?))
+            (prefixes (assoc-ref alist 'prefixes))
+            (inputs (assoc-ref alist 'inputs))
+            (outputs (assoc-ref alist 'outputs))
+            (rdf-path (get-keyword-value outputs #:rdf ""))
+            (doc-path (get-keyword-value outputs #:documentation "")))
+       (call-with-target-database
+        connection
+        (lambda (db)
+          (with-output-to-file        ;
+              doc-path
+            (lambda ()
+              (format #t "# ~a" name)
+              (for-each
+               (lambda (proc)
+                 (proc db
+                       #:metadata? #f
+                       #:data? #f
+                       #:documentation?
+                       (lambda () (for-each
+                                   (match-lambda
+                                     ((k v)
+                                      (begin
+                                        (prefix k v #f))))
+                                   prefixes))))
+               inputs))
+            #:encoding "UTF-8")
+
+          ;; Dumping the actual data
+          (with-output-to-file
+              rdf-path
+            (lambda ()
+              ;; Add the prefixes
+              (for-each
+               (match-lambda
+                 ((k v)
+                  (begin
+                    (prefix k v))))
+               prefixes)
+              (newline)
+              (for-each
+               (lambda (proc)
+                 (proc db #:metadata? table-metadata?))
+               inputs))
+            #:encoding "UTF-8")))))))
+
diff --git a/transform/sql.scm b/transform/sql.scm
new file mode 100644
index 0000000..a8962c8
--- /dev/null
+++ b/transform/sql.scm
@@ -0,0 +1,114 @@
+;;; Database helpers
+;;;
+;;; Most of these functions should have been a part of
+;;; guile-dbi. Never too late to contribute upstream!
+
+(define-module (transform sql)
+  #:use-module (srfi srfi-1)
+  #:use-module (srfi srfi-26)
+  #:use-module (ice-9 match)
+  #:use-module (dbi dbi)
+  #:export (select-query
+            call-with-database
+            call-with-target-database
+            sql-exec
+            sql-fold
+            sql-map
+            sql-for-each
+            sql-find))
+
+;; A half-baked macro to make SQL SELECT queries a bit more
+;; S-expression friendly
+(define-syntax select-query
+  (lambda (x)
+    (syntax-case x ()
+      ((_ fields tables raw-forms ...)
+       #`(string-append "SELECT "
+                        #,(syntax-case #'fields (distinct)
+                            ((distinct _ ...)
+                             "DISTINCT ")
+                            (_ ""))
+                        #,(string-join (filter-map (match-lambda
+                                                     ('distinct #f)
+                                                     (((query alias))
+                                                      (format #f "~a AS ~a" query alias))
+                                                     ((table column)
+                                                      (format #f "~a.~a" table column))
+                                                     ((table column alias)
+                                                      (format #f "~a.~a AS ~a" table column alias))
+                                                     (field-spec
+                                                      (error "Invalid field specification" field-spec)))
+                                                   (syntax->datum #'fields))
+                                       ", ")
+                        " FROM "
+                        #,(string-join (map (match-lambda
+                                              ((join table condition)
+                                               (format #f "~a ~a ~a"
+                                                       (case join
+                                                         ((join) "JOIN")
+                                                         ((left-join) "LEFT JOIN")
+                                                         ((inner-join) "INNER JOIN")
+                                                         (else (error "Invalid join operator" join)))
+                                                       table condition))
+                                              ((? symbol? table)
+                                               (symbol->string table))
+                                              (table-spec
+                                               (error "Invalid table specification" table-spec)))
+                                            (syntax->datum #'tables))
+                                       " ")
+                        #,(syntax-case #'(raw-forms ...) ()
+                            (() "")
+                            (_ " "))
+                        raw-forms ...))
+      (_ (error "Invalid SQL select query" (syntax->datum x))))))
+
+(define (call-with-database backend connection-string proc)
+  (let ((db #f))
+    (dynamic-wind (lambda ()
+                    (set! db (dbi-open backend connection-string)))
+                  (cut proc db)
+                  (cut dbi-close db))))
+
+(define (database-check-status db)
+  (match (dbi-get_status db)
+    ((code . str)
+     (unless (zero? code)
+       (error str)))))
+
+(define (sql-exec db statement)
+  (dbi-query db statement)
+  (database-check-status db))
+
+(define (sql-fold proc init db statement)
+  (sql-exec db statement)
+  (let loop ((result init))
+    (let ((row (dbi-get_row db)))
+      (if row
+          (loop (proc row result))
+          result))))
+
+(define (sql-map proc db statement)
+  (sql-fold (lambda (row result)
+              (cons (proc row) result))
+            (list) db statement))
+
+(define (sql-for-each proc db statement)
+  (sql-fold (lambda (row _)
+              (proc row))
+            #f db statement))
+
+(define (sql-find db statement)
+  (sql-exec db statement)
+  (dbi-get_row db))
+
+(define (call-with-target-database connection-settings proc)
+  (call-with-database "mysql" (string-join
+                               (list (assq-ref connection-settings 'sql-username)
+                                     (assq-ref connection-settings 'sql-password)
+                                     (assq-ref connection-settings 'sql-database)
+                                     "tcp"
+                                     (assq-ref connection-settings 'sql-host)
+                                     (number->string
+                                      (assq-ref connection-settings 'sql-port)))
+                               ":")
+                      proc))
diff --git a/transform/string-similarity.scm b/transform/string-similarity.scm
new file mode 100644
index 0000000..c715856
--- /dev/null
+++ b/transform/string-similarity.scm
@@ -0,0 +1,37 @@
+(define-module (transform string-similarity)
+  #:use-module (srfi srfi-1)
+  #:export (jaccard-string-similar?))
+
+(define (trigrams str)
+  "Return all trigrams in STR."
+  (if (< (string-length str) 3)
+      '()
+      (map (lambda (start)
+             (substring str start (+ start 3)))
+           (iota (- (string-length str) 2)))))
+
+(define (jaccard-index set1 set2)
+  "Return the Jaccard similarity coefficient between lists SET1 and
+SET2. Similarity between null sets is defined to be 0."
+  (if (and (null? set1)
+           (null? set2))
+      0
+      (let ((length-of-intersection (length (lset-intersection equal? set1 set2))))
+        (exact->inexact
+         (/ length-of-intersection
+            (- (+ (length set1) (length set2))
+               length-of-intersection))))))
+
+(define (jaccard-string-similarity str1 str2)
+  "Return the trigram similarity between strings STR1 and STR2 as
+defined by the Jaccard index."
+  (jaccard-index (trigrams (string-downcase str1))
+                 (trigrams (string-downcase str2))))
+
+(define (jaccard-string-similar? str1 str2)
+  "Return #t if STR1 and STR2 have a trigram similarity greater than
+0.8. Else, return #f. The Jaccard index is used as the similarity
+metric."
+  (let ((similarity-threshold 0.8))
+    (> (jaccard-string-similarity str1 str2)
+       similarity-threshold)))
diff --git a/transform/strings.scm b/transform/strings.scm
new file mode 100644
index 0000000..98f828f
--- /dev/null
+++ b/transform/strings.scm
@@ -0,0 +1,101 @@
+(define-module (transform strings)
+  #:use-module (srfi srfi-1)
+  #:use-module (srfi srfi-19)
+  #:use-module (ice-9 match)
+  #:use-module (ice-9 string-fun)
+  #:use-module (ice-9 textual-ports)
+  #:export (string-blank?
+            time-unix->string
+            string-blank?
+            string-split-substring
+            delete-substrings
+            replace-substrings
+            sanitize-rdf-string
+            snake->lower-camel
+            string-capitalize-first))
+
+(define (time-unix->string seconds . maybe-format)
+  "Given an integer saying the number of seconds since the Unix
+epoch (1970-01-01 00:00:00), SECONDS, format it as a human readable
+date and time-string, possible using the MAYBE-FORMAT."
+  (letrec ([time-unix->time-utc
+            (lambda (seconds)
+              (add-duration
+               (date->time-utc (make-date 0 0 0 0 1 1 1970 0))
+               (make-time time-duration 0 seconds)))])
+    (apply date->string
+           (time-utc->date (time-unix->time-utc seconds))
+           maybe-format)))
+
+(define (string-blank? str)
+  "Return non-#f if STR consists only of whitespace characters."
+  (string-every char-set:whitespace str))
+
+(define (string-split-substring str substr)
+  "Split the string @var{str} into a list of substrings delimited by the
+substring @var{substr}."
+
+  (define substrlen (string-length substr))
+  (define strlen (string-length str))
+
+  (define (loop index start)
+    (cond
+     ((>= start strlen) (list ""))
+     ((not index) (list (substring str start)))
+     (else
+      (cons (substring str start index)
+            (let ((new-start (+ index substrlen)))
+              (loop (string-contains str substr new-start)
+                    new-start))))))
+
+  (cond
+   ((string-contains str substr) => (lambda (idx) (loop idx 0)))
+   (else (list str))))
+
+(define (delete-substrings str . substrings)
+  "Delete SUBSTRINGS, a list of strings, from STR."
+  (fold (lambda (substring result)
+          (string-replace-substring result substring ""))
+        str
+        substrings))
+
+(define (replace-substrings str replacement-alist)
+  "Replace substrings in STR according to REPLACEMENT-ALIST, an
+association list mapping substrings to their replacements."
+  (fold (match-lambda*
+          (((substring . replacement) str)
+           (string-replace-substring str substring replacement)))
+        str
+        replacement-alist))
+
+(define (sanitize-rdf-string str)
+  (replace-substrings
+   (string-trim-both str)
+   '(("\r" . "\\r")
+     ("\n" . "\\n")
+     ("\"" . "'")
+     ("\v" . "")
+     ("\xad" . "")
+     ("\x28" . "")
+     ("\x29" . "")
+     ("\xa0" . " ")
+     ("\x02" . "")
+     ("\x01" . ""))))
+
+(define (snake->lower-camel str)
+  (let ((char-list (string->list str)))
+    (call-with-output-string
+      (lambda (port)
+        (put-char port (char-downcase (string-ref str 0)))
+        (map (lambda (char previous-char)
+               (unless (char=? char #\_)
+                 (put-char port (if (char=? previous-char #\_)
+                                    (char-upcase char)
+                                    char))))
+             (drop char-list 1)
+             char-list)))))
+
+(define (string-capitalize-first string)
+  (string-titlecase
+   (string-downcase string) 0 1))
+
diff --git a/transform/table.scm b/transform/table.scm
new file mode 100644
index 0000000..e6254d0
--- /dev/null
+++ b/transform/table.scm
@@ -0,0 +1,28 @@
+(define-module (transform table)
+  #:use-module (srfi srfi-9 gnu)
+  #:export (make-table
+            table-name
+            table-size
+            table-columns
+            set-table-columns
+            make-column
+            column-name
+            column-type
+            column-transformed?))
+
+(define-immutable-record-type <table>
+  (make-table name size columns)
+  table?
+  (name table-name)
+  (size table-size)
+  (columns table-columns set-table-columns))
+
+(define-immutable-record-type <column>
+  (column-constructor name type transformed?)
+  column?
+  (name column-name)
+  (type column-type)
+  (transformed? column-transformed?))
+
+(define* (make-column name type #:optional transformed?)
+  (column-constructor name type transformed?))
diff --git a/transform/triples.scm b/transform/triples.scm
new file mode 100644
index 0000000..8a64c5b
--- /dev/null
+++ b/transform/triples.scm
@@ -0,0 +1,110 @@
+(define-module (transform triples)
+  #:use-module (ice-9 regex)
+  #:use-module (ice-9 match)
+  #:use-module (transform strings)
+  #:export (ontology
+            string->identifier
+            prefix
+            triple
+            scm->triples
+            annotate-field
+            string->binomial-name))
+
+(define (annotate-field field schema)
+  (let ([schema (cond ((symbol? schema)
+                       (symbol->string schema))
+                      ((string? schema) schema)
+                      (else
+                       (error "Use a string/symbol")))]
+        [string-field (if (number? field) (number->string field) field)])
+    (if (or  (and (string? string-field) (string-null? string-field))
+             (eq? string-field #f))
+        ""
+        (string->symbol
+         (format #f "~s~a" string-field schema)))))
+
+(define* (string->identifier prefix str
+                             #:optional #:key
+                             (ontology "gn:")
+                             (separator "_")
+                             (proc string-downcase))
+  "Convert STR to a turtle identifier after replacing illegal
+characters with an underscore and prefixing with gn:PREFIX."
+  (if (or (and (string? str) (string-null? str))
+          (eq? str #f))
+      ""
+      (string->symbol
+       (string-append ontology prefix separator
+                      (string-delete
+                       (lambda (c)
+                         (eq? c #\)))
+                       (string-map (lambda (c)
+                                     (case c
+                                       ((#\/ #\< #\> #\+ #\( #\space #\@) #\_)
+                                       (else c)))
+                                   (proc
+                                    (string-trim-right str #\.))))))))
+
+
+(define* (prefix prefix iri #:optional (ttl? #t))
+  (format #t
+	  (if ttl?
+	      "@prefix ~a ~a .~%"
+	      "PREFIX ~a ~a ~%")
+	  prefix iri))
+
+(define (ontology prefix value)
+  (if (and (string? value) (string-null? value))
+      ""
+      (string->symbol
+       `,(format #f "~a~a" prefix value))))
+
+(define (triple subject predicate object)
+  (unless (or (string? subject)
+              (symbol? subject))
+    (error "Triple subject not a string or symbol:"
+           (list subject predicate object)))
+  (unless (or (string? predicate)
+              (symbol? predicate))
+    (error "Triple predicate not a string or symbol:"
+           (list subject predicate object)))
+  (unless (or (string? object)
+              (symbol? object)
+              (number? object))
+    (error "Triple object not a string, symbol or number:"
+           (list subject predicate object)))
+  (let ([pattern (match object
+                   ((or (?  symbol? object)
+                        (? (lambda (el) (string-match "^\\[ .* \\]$" el)) object))
+                    "~a ~a ~a .~%")
+                   (_ "~a ~a \"~a\" .~%"))])
+    (format #t pattern subject predicate
+            (if (symbol? object) (symbol->string object) object))))
+
+(define* (scm->triples alist id
+                       #:optional
+                       (fn triple))
+  (for-each (match-lambda
+              ((predicate . object)
+               (when (cond
+                      ((string? object)
+                       (not (string-blank? object)))
+                      (else object))
+                 (fn id predicate object))))
+            alist))
+
+(define (string->binomial-name name)
+  (let ((binomial?
+         (string-match
+          "\\\(.+\\)"
+          name)))
+    (string->identifier
+     ""
+     (if binomial?
+         (regexp-substitute/global
+          #f "[^[:space:]A-Za-z0-9:]"
+          (match:substring binomial?)
+          'pre "" 'post)
+         name)
+     #:separator ""
+     #:proc string-capitalize-first)))