aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xdump.scm366
-rw-r--r--dump/schema-dump.scm120
-rw-r--r--dump/special-forms.scm430
-rw-r--r--dump/sql.scm13
-rw-r--r--dump/strings.scm88
-rw-r--r--dump/triples.scm35
-rw-r--r--dump/utils.scm208
7 files changed, 678 insertions, 582 deletions
diff --git a/dump.scm b/dump.scm
index 7ee0462..0b2318c 100755
--- a/dump.scm
+++ b/dump.scm
@@ -5,15 +5,12 @@
(rnrs io ports)
(srfi srfi-1)
(srfi srfi-26)
- (srfi srfi-171)
(ice-9 match)
(ice-9 regex)
- (ice-9 string-fun)
+ (dump strings)
(dump sql)
- (dump table)
(dump triples)
- (dump utils)
- (zlib))
+ (dump special-forms))
;;; GeneNetwork database connection parameters and dump path
@@ -22,371 +19,12 @@
(call-with-input-file (list-ref (command-line) 1)
read))
-(define (call-with-genenetwork-database proc)
- (call-with-database "mysql" (string-join
- (list (assq-ref %connection-settings 'sql-username)
- (assq-ref %connection-settings 'sql-password)
- (assq-ref %connection-settings 'sql-database)
- "tcp"
- (assq-ref %connection-settings 'sql-host)
- (number->string
- (assq-ref %connection-settings 'sql-port)))
- ":")
- proc))
-
(define %dump-directory
(list-ref (command-line) 2))
-(define (call-with-dump-file filename proc)
- (let ((absolute-path (string-append %dump-directory filename)))
- (display absolute-path)
- (newline)
- (call-with-output-file absolute-path proc)))
-
-
-;; Dump schema annotations to org
-
-(define (get-tables-from-comments db)
- (sql-map (match-lambda
- ((("TableName" . table)) table))
- db
- (select-query ((TableComments TableName))
- (TableComments))))
-
-(define (dump-table-fields db table)
- (format #t "* ~a~%" table)
- (match (sql-find db
- (select-query ((TableComments Comment))
- (TableComments)
- (format #f "WHERE TableName = '~a'" table)))
- ((("Comment" . comment))
- (format #t "~a~%" comment)))
- (sql-for-each (lambda (row)
- (match row
- ((("TableField" . table-field)
- ("Foreign_Key" . foreign-key)
- ("Annotation" . annotation))
- (format #t "** ~a~%" (substring table-field (1+ (string-length table))))
- (unless (string-null? foreign-key)
- (format #t "Foreign key to ~a~%" foreign-key))
- (unless (string-null? annotation)
- (display annotation)
- (newline)))))
- db
- (select-query ((TableFieldAnnotation TableField)
- (TableFieldAnnotation Foreign_Key)
- (TableFieldAnnotation Annotation))
- (TableFieldAnnotation)
- (format #f "WHERE TableField LIKE '~a.%'" table)))
- (newline))
-
-(define (dump-schema-annotations db)
- (call-with-genenetwork-database
- (lambda (db)
- (for-each (cut dump-table-fields db <>)
- (get-tables-from-comments db)))))
-
;;; Dump tables
-(define (annotate-field field schema)
- (let ([schema (cond ((symbol? schema)
- (symbol->string schema))
- ((string? schema) schema)
- (else
- (error "Use a string/symbol")))]
- [string-field (if (number? field) (number->string field) field)])
- (if (string-null? string-field)
- ""
- (string->symbol
- (format #f "~s~a" string-field schema)))))
-
-(define (string-split-substring str substr)
- "Split the string @var{str} into a list of substrings delimited by the
-substring @var{substr}."
-
- (define substrlen (string-length substr))
- (define strlen (string-length str))
-
- (define (loop index start)
- (cond
- ((>= start strlen) (list ""))
- ((not index) (list (substring str start)))
- (else
- (cons (substring str start index)
- (let ((new-start (+ index substrlen)))
- (loop (string-contains str substr new-start)
- new-start))))))
-
- (cond
- ((string-contains str substr) => (lambda (idx) (loop idx 0)))
- (else (list str))))
-
-(define (delete-substrings str . substrings)
- "Delete SUBSTRINGS, a list of strings, from STR."
- (fold (lambda (substring result)
- (string-replace-substring result substring ""))
- str
- substrings))
-
-(define (replace-substrings str replacement-alist)
- "Replace substrings in STR according to REPLACEMENT-ALIST, an
-association list mapping substrings to their replacements."
- (fold (match-lambda*
- (((substring . replacement) str)
- (string-replace-substring str substring replacement)))
- str
- replacement-alist))
-
-(define (sanitize-rdf-string str)
- (replace-substrings
- (string-trim-both str)
- '(("\r" . "\\r")
- ("\n" . "\\n")
- ("\"" . "'")
- ("\v" . ""))))
-
-(define (snake->lower-camel str)
- (let ((char-list (string->list str)))
- (call-with-output-string
- (lambda (port)
- (put-char port (char-downcase (string-ref str 0)))
- (map (lambda (char previous-char)
- (unless (char=? char #\_)
- (put-char port (if (char=? previous-char #\_)
- (char-upcase char)
- char))))
- (drop char-list 1)
- char-list)))))
-
-(eval-when (expand load eval)
- (define (field->key x)
- (translate-forms 'field
- (lambda (x)
- #`(key #,(symbol->string
- (syntax->datum
- ((syntax-rules (field)
- ((field (query alias)) alias)
- ((field table column) column)
- ((field table column alias) alias))
- x)))))
- x))
-
- (define (field->assoc-ref alist x)
- "Recursively translate field references in source X to (assoc-ref
-ALIST field-name) forms."
- (translate-forms 'field
- (lambda (x)
- #`(assoc-ref #,alist
- #,(symbol->string
- (syntax->datum
- ((syntax-rules (field)
- ((field (query alias)) alias)
- ((field table column) column)
- ((field table column alias) alias))
- x)))))
- x))
-
- (define (collect-fields x)
- (map (syntax-rules (field)
- ((field reference ...)
- (reference ...)))
- (collect-forms 'field x)))
-
- (define (find-clause clauses key)
- "Find (KEY ...) clause among CLAUSES, a list of syntax forms."
- (find (lambda (x)
- (syntax-case x ()
- ((clause-key _ ...)
- (eq? (syntax->datum #'clause-key)
- key))))
- clauses))
-
- (define (remove-namespace str)
- "Remove RDF namespace prefix from STR."
- (substring str (1+ (string-index str #\:))))
-
- (define (column-id table-name column-name)
- (string->identifier
- "field" (string-append
- ;; We downcase table names in identifiers. So, we
- ;; distinguish between the user and User tables.
- (if (string=? table-name "User")
- "user2" table-name)
- "__" column-name)))
-
- (define (dump-id dump-table predicate)
- (symbol->string
- (string->identifier
- "dump"
- (string-append
- dump-table "_" (remove-namespace (symbol->string predicate)))))))
-
-(define-syntax blank-node
- (syntax-rules ()
- "Allow having set and multiset within the context of a blank-node"
- [(_ (op predicate object) ...)
- (let [(node (string-join
- (filter-map (match-lambda
- ((pred . obj)
- (match obj
- ((and (? string? obj)
- (? string-null? obj))
- #f)
- ((? symbol? obj)
- (format #f "~a ~a" pred (symbol->string obj)))
- (_
- (format #f "~a ~s" pred obj)))))
- (map-alist '()
- (op predicate object) ...))
- " ; "))]
- (if (string-null? node)
- ""
- (format #f "[ ~a ]" node)))]))
-
-(define-syntax syntax-let
- (syntax-rules ()
- "Like match-let, but for syntax.
-
-(syntax-let ((pattern literals expression))
- body ...)
-≡
-(syntax-case expression ()
- (pattern literals
- body ...))
-
-literals is optional. So,
-
-(syntax-let ((pattern expression))
- body ...)
-≡
-(syntax-case expression ()
- (pattern
- body ...))
-"
- ((_ () body ...)
- (begin body ...))
- ((_ ((pattern expression)
- bindings ...)
- body ...)
- (syntax-case expression ()
- (pattern
- (syntax-let (bindings ...)
- body ...))))
- ((_ ((pattern literals expression)
- bindings ...)
- body ...)
- (syntax-case expression literals
- (pattern
- (syntax-let (bindings ...)
- body ...))))))
-
-(define-syntax define-dump
- (lambda (x)
- "Define FUNCTION-NAME, a function that dumps a view of the database.
-
-define-dump consists of three order-agnostic clauses---tables,
-schema-triples and triples---in the form shown below.
-
-(define-dump function-name
- (tables (table ...) raw-forms ...)
- (schema-triples
- (subject predicate object) ...)
- (triples subject
- (verb predicate object) ...))
-
-The tables clause specifies the database tables to be joined to
-construct the view to be dumped. TABLE must be either of the form
-TABLE-NAME or of the form (JOIN-OPERATOR TABLE-NAME
-RAW-CONDITION). TABLE-NAME must, obviously, be the name of the
-table. JOIN-OPERATOR must be one of join, left-join and
-inner-join. RAW-CONDITION should be the join condition as a raw
-string. This is usually something like
-\"USING (SpeciesId)\". RAW-FORMS are expressions that must evaluate to
-strings to be appended to the SQL query.
-
-The schema-triples clause specifies the list of triples to be written
-once when the dump starts.
-
-The triples clause specifies the triples to be dumped once for each
-row in the view. All triples have a common SUBJECT. The (verb
-predicate object) clauses are described below.
-
-VERB must either be set or multiset. For the set VERB, a single triple
-(SUBJECT PREDICATE OBJECT-VALUE) is written where OBJECT-VALUE is the
-result of evaluating OBJECT. For the multiset VERB, OBJECT must
-evaluate to a list, and a triple (SUBJECT PREDICATE
-OBJECT-VALUE-ELEMENT) is created for each element OBJECT-VALUE-ELEMENT
-of that list.
-
-The SUBJECT and OBJECT expressions in the triples clause must
-reference database fields using a (field TABLE COLUMN) clause where
-TABLE and COLUMN refer to the table and column of the field being
-referenced. Database fields can also be referenced using (field TABLE
-COLUMN ALIAS) where ALIAS is an alias for that column in the SQL
-query. Specification of ALIAS is indeed a leak in the abstraction, and
-must be remedied."
- (syntax-case x (tables schema-triples triples)
- ((_ name clauses ...)
- (syntax-let (((tables (primary-table other-tables ...) tables-raw ...) (tables)
- (find-clause #'(clauses ...) 'tables))
- (schema-triples-clause (or (find-clause #'(clauses ...) 'schema-triples)
- #'(schema-triples)))
- ((triples subject predicate-clauses ...) (triples)
- (find-clause #'(clauses ...) 'triples)))
- #`(define* (name db #:optional (table-metadata? #f))
- #,(syntax-case #'schema-triples-clause (schema-triples)
- ((schema-triples (triple-subject triple-predicate triple-object) ...)
- #`(for-each triple
- (list 'triple-subject ...)
- (list 'triple-predicate ...)
- (list 'triple-object ...)))
- (_ (error "Invalid schema triples clause:" #'schema-triples-clause)))
- (when table-metadata?
- #,@(let ((dump-table (symbol->string (syntax->datum #'primary-table)))
- (subject-type (any (lambda (predicate)
- (syntax-case predicate (rdf:type)
- ((_ rdf:type type) #'type)
- (_ #f)))
- #'(predicate-clauses ...))))
- (map (lambda (predicate-clause)
- (syntax-case predicate-clause ()
- ((_ predicate _)
- ;; Dump metadata about the dump itself.
- #`(begin
- (scm->triples
- (map-alist '()
- (set rdf:type 'gn:dump)
- (set gn:createsPredicate 'predicate)
- (filter-set gn:forSubjectType #,subject-type)
- (multiset gn:dependsOn
- '#,(map (lambda (field)
- (match (syntax->datum field)
- ((table-name column-name _ ...)
- (datum->syntax
- x (column-id (symbol->string table-name)
- (symbol->string column-name))))
- (((query alias))
- (datum->syntax
- x (column-id query (symbol->string alias))))))
- (collect-fields predicate-clause))))
- #,(dump-id dump-table (syntax->datum #'predicate)))
- ;; Automatically create domain triples
- ;; for predicates.
- (when #,subject-type
- (triple 'predicate 'rdfs:domain #,subject-type))))
- (_ (error "Invalid predicate clause:" predicate-clause))))
- #'(predicate-clauses ...))))
- (sql-for-each (lambda (row)
- (scm->triples
- (map-alist row #,@(field->key #'(predicate-clauses ...)))
- #,(field->assoc-ref #'row #'subject)))
- db
- (select-query #,(collect-fields #'(subject predicate-clauses ...))
- (primary-table other-tables ...)
- tables-raw ...)))))
- (_ (error "Invalid define-dump syntax:" (syntax->datum x))))))
-
(define binomial-name->species-id
(cut string->identifier "species" <>))
diff --git a/dump/schema-dump.scm b/dump/schema-dump.scm
new file mode 100644
index 0000000..cda3fd0
--- /dev/null
+++ b/dump/schema-dump.scm
@@ -0,0 +1,120 @@
+(define-module (dump schema)
+ #:use-module (ice-9 match)
+ #:use-module (dump sql))
+
+
+(define (dump-table-fields db table)
+ (format #t "* ~a~%" table)
+ (match (sql-find db
+ (select-query ((TableComments Comment))
+ (TableComments)
+ (format #f "WHERE TableName = '~a'" table)))
+ ((("Comment" . comment))
+ (format #t "~a~%" comment)))
+ (sql-for-each (lambda (row)
+ (match row
+ ((("TableField" . table-field)
+ ("Foreign_Key" . foreign-key)
+ ("Annotation" . annotation))
+ (format #t "** ~a~%" (substring table-field (1+ (string-length table))))
+ (unless (string-null? foreign-key)
+ (format #t "Foreign key to ~a~%" foreign-key))
+ (unless (string-null? annotation)
+ (display annotation)
+ (newline)))))
+ db
+ (select-query ((TableFieldAnnotation TableField)
+ (TableFieldAnnotation Foreign_Key)
+ (TableFieldAnnotation Annotation))
+ (TableFieldAnnotation)
+ (format #f "WHERE TableField LIKE '~a.%'" table)))
+ (newline))
+
+(define (get-tables-from-comments db)
+ (sql-map (match-lambda
+ ((("TableName" . table)) table))
+ db
+ (select-query ((TableComments TableName))
+ (TableComments))))
+
+(define (dump-schema-annotations db)
+ (call-with-genenetwork-database
+ (lambda (db)
+ (for-each (cut dump-table-fields db <>)
+ (get-tables-from-comments db)))))
+
+(define (tables db)
+ "Return list of all tables in DB. Each element of the returned list
+is a <table> object."
+ (map (lambda (table)
+ (set-table-columns table
+ (sql-map (lambda (row)
+ (make-column (assoc-ref row "Field")
+ (assoc-ref row "Type")))
+ db
+ (format #f "SHOW COLUMNS FROM ~a" (table-name table)))))
+ (sql-map (lambda (row)
+ (make-table (assoc-ref row "table_name")
+ ;; FIXME: This is probably correct only for
+ ;; MyISAM tables.
+ (assoc-ref row "data_length")
+ #f))
+ db
+ (select-query ((information_schema.tables table_name)
+ (information_schema.tables data_length))
+ (information_schema.tables)
+ (format #f "WHERE table_schema = '~a'"
+ (assq-ref %connection-settings 'sql-database))))))
+
+(define (dump-schema db)
+ (let ((tables (tables db)))
+ (for-each (lambda (table)
+ (let ((table-id (string->identifier
+ "table"
+ ;; We downcase table names in
+ ;; identifiers. So, we distinguish
+ ;; between the user and User tables.
+ (if (string=? (table-name table) "User")
+ "user2"
+ (table-name table)))))
+ (triple table-id 'rdf:type 'gn:sqlTable)
+ (triple table-id 'gn:name (table-name table))
+ (triple table-id 'gn:hasSize (table-size table))
+ (for-each (lambda (column)
+ (let ((column-id (column-id (table-name table)
+ (column-name column))))
+ (triple column-id 'rdf:type 'gn:sqlTableField)
+ (triple column-id 'gn:name (column-name column))
+ (triple column-id 'gn:sqlFieldType (column-type column))
+ (triple table-id 'gn:hasField column-id)))
+ (table-columns table))))
+ tables)))
+
+(define (dump-data-table db table-name data-field)
+ (let ((dump-directory (string-append %dump-directory "/" table-name))
+ (port #f)
+ (current-strain-id #f))
+ (unless (file-exists? dump-directory)
+ (mkdir dump-directory))
+ (sql-for-each (match-lambda
+ (((_ . strain-id)
+ (_ . value))
+ ;; Close file if new strain.
+ (when (and port
+ (not (= current-strain-id strain-id)))
+ (close-port port)
+ (set! port #f))
+ ;; If no file is open, open new file.
+ (unless port
+ (set! current-strain-id strain-id)
+ (let ((filename (string-append dump-directory
+ "/" (number->string strain-id))))
+ (display filename (current-error-port))
+ (newline (current-error-port))
+ (set! port (open-output-file filename))))
+ (display value port)
+ (newline port)))
+ db
+ (format #f "SELECT StrainId, ~a FROM ~a ORDER BY StrainId"
+ data-field table-name))
+ (close-port port)))
diff --git a/dump/special-forms.scm b/dump/special-forms.scm
new file mode 100644
index 0000000..9b295ba
--- /dev/null
+++ b/dump/special-forms.scm
@@ -0,0 +1,430 @@
+(define-module (dump special-forms)
+ #:use-module (srfi srfi-1)
+ #:use-module (ice-9 match)
+ #:use-module (srfi srfi-26)
+ #:use-module (dump sql)
+ #:use-module (dump table)
+ #:use-module (dump triples)
+ #:export (translate-forms
+ collect-forms
+ collect-keys
+ field->key
+ field->assoc-ref
+ collect-fields
+ find-clause
+ remove-namespace
+ column-id
+ dump-id
+ syntax-let
+ blank-node
+ define-dump))
+
+(define (key->assoc-ref alist x)
+ "Recursively translate (key k) forms in source X to (assoc-ref ALIST
+k) forms."
+ (translate-forms 'key
+ ;; (syntax-rules (key)
+ ;; ((key k) (assoc-ref alist k)))
+ (lambda (x)
+ (syntax-case x (key)
+ ((key k) #`(assoc-ref #,alist 'k))))
+ x))
+
+(define (alist-delete* alist keys)
+ "Delete entries from ALIST whose key is equal (in the sense of
+equal?) to any in KEYS, a list."
+ (remove (match-lambda
+ ((key . value)
+ (member key keys))
+ (x (error "malformed alist element" x)))
+ alist))
+
+(define (translate-forms from translator x)
+ "Recursively pass (FROM ...) forms in source X to TRANSLATOR, and
+replace them with the return value."
+ (syntax-case x ()
+ ;; Handle translation base case.
+ ((head tail ...) (eq? (syntax->datum #'head)
+ from)
+ (translator x))
+ ;; Recurse.
+ ((head tail ...)
+ (cons (translate-forms from translator #'head)
+ (map (cut translate-forms from translator <>)
+ #'(tail ...))))
+ ;; Handle leaf base case.
+ (leaf #'leaf)))
+
+(define (collect-forms target x)
+ "Recursively collect (TARGET ...) forms in source X and return them
+as a list."
+ (syntax-case x ()
+ ;; Handle collection base case.
+ ((head tail ...) (eq? (syntax->datum #'head)
+ target)
+ (list x))
+ ;; Recurse.
+ ((head tail ...)
+ (append (collect-forms target #'head)
+ (append-map (cut collect-forms target <>) #'(tail ...))))
+ ;; Handle leaf base case.
+ (leaf (list))))
+
+(define (collect-keys x)
+ "Recursively collect (key k) forms from source X and return as a
+list of all K."
+ (map (syntax-rules (key)
+ ((key k) 'k))
+ (collect-forms 'key x)))
+
+(define-syntax map-alist
+ (lambda (x)
+ "Transform (aka map) ALIST, an association list, into another
+association list. The returned association list may contain multiple
+associations for the same key. equal? is used in all association list
+key comparisons.
+
+Syntax:
+
+(map-alist alist
+ (verb key expression) ...
+ (else=> proc))
+
+VERB must be one of set, filter-set, multiset and remove.
+
+For the set VERB, KEY is set to the result of evaluating
+EXPRESSION. Multiple set verbs on the same key will result in multiple
+associations for that key.
+
+For the filter-set VERB, KEY is set to the result of evaluating
+EXPRESSION only if that result is not #f.
+
+For the multiset VERB, EXPRESSION must return a list and KEY is
+associated multiple times once with each element of the returned list.
+
+For the remove VERB, KEY is discarded.
+
+EXPRESSIONs must reference elements of ALIST using (key k) forms where
+K is the key to be referenced from ALIST. K must not be quoted. That
+is, if K is the symbol 'bar, it must be referenced as (key bar), not
+as (key 'bar).
+
+The else=> clause is optional.
+
+If the else=> clause is present, PROC is passed all pairs of ALIST
+that are not set by an earlier (verb key expression) action. PROC must
+return #f or a pair to replace its input pair. If PROC returns #f,
+that pair is discarded.
+
+If the else=> clause is absent, all unset pairs are discarded.
+
+Example:
+
+(map-alist '((\"foo\" . 1)
+ (bar . 2)
+ (foobar . 5)
+ (fubar . 3))
+ (set spam (1+ (key \"foo\")))
+ (set ham (* 2 (key bar)))
+ (set eggs (* 3 (key bar)))
+ (set aal (+ (key \"foo\")
+ (key bar)))
+ (multiset vel (iota (* 2 (key bar))))
+ (remove foobar)
+ (else=> (match-lambda
+ ((key . value)
+ (cons key (expt 2 value))))))
+=> ((spam . 2) (ham . 4) (eggs . 6) (aal . 3)
+ (vel . 0) (vel . 1) (vel . 2) (vel . 3) (fubar . 8))"
+ (syntax-case x ()
+ ((_ alist actions ...)
+ ;; TODO: Check that all actions are valid.
+ #`(let ((evaluated-alist alist))
+ (append (remove (match-lambda
+ ;; Filter out results of filter-set actions.
+ ((key . #f)
+ (member key '#,(filter-map (lambda (action)
+ (syntax-case action (filter-set)
+ ((filter-set key expression) #'key)
+ (_ #f)))
+ #'(actions ...))))
+ (_ #f))
+ ;; Do set and filter-set.
+ `#,(filter-map (lambda (action)
+ (syntax-case action (set filter-set)
+ ((set key expression)
+ #`(key . ,#,(key->assoc-ref #'evaluated-alist #'expression)))
+ ((filter-set key expression)
+ #`(key . ,#,(key->assoc-ref #'evaluated-alist #'expression)))
+ (_ #f)))
+ #'(actions ...)))
+ ;; Do multiset.
+ #,@(filter-map (lambda (action)
+ (syntax-case action (multiset)
+ ((multiset key expression)
+ #`(map (cut cons 'key <>)
+ #,(key->assoc-ref #'evaluated-alist #'expression)))
+ (_ #f)))
+ #'(actions ...))
+ ;; Apply else=> procedure on unspecified keys. If
+ ;; no else=> procedure is specified, delete
+ ;; unspecified keys.
+ (filter-map #,(or (any (lambda (action)
+ (syntax-case action (else=>)
+ ((else=> proc) #'proc)
+ (_ #f)))
+ #'(actions ...))
+ #'(const #f))
+ ;; The unspecified part of the input
+ ;; alist
+ (alist-delete* evaluated-alist
+ (list
+ ;; Keys that were referenced
+ #,@(append-map (lambda (action)
+ (syntax-case action ()
+ ((_ key expression)
+ (collect-keys #'expression))
+ (_ '())))
+ #'(actions ...))
+ ;; Keys that were deleted
+ #,@(filter-map (lambda (action)
+ (syntax-case action (remove)
+ ((remove key) #''key)
+ (_ #f)))
+ #'(actions ...))
+ ;; Keys that were set
+ #,@(filter-map (lambda (action)
+ (syntax-case action ()
+ ((_ key expression) #''key)
+ (_ #f)))
+ #'(actions ...)))))))))))
+
+
+
+(eval-when (expand load eval)
+ (define (field->key x)
+ (translate-forms 'field
+ (lambda (x)
+ #`(key #,(symbol->string
+ (syntax->datum
+ ((syntax-rules (field)
+ ((field (query alias)) alias)
+ ((field table column) column)
+ ((field table column alias) alias))
+ x)))))
+ x))
+
+ (define (field->assoc-ref alist x)
+ "Recursively translate field references in source X to (assoc-ref
+ALIST field-name) forms."
+ (translate-forms 'field
+ (lambda (x)
+ #`(assoc-ref #,alist
+ #,(symbol->string
+ (syntax->datum
+ ((syntax-rules (field)
+ ((field (query alias)) alias)
+ ((field table column) column)
+ ((field table column alias) alias))
+ x)))))
+ x))
+
+ (define (collect-fields x)
+ (map (syntax-rules (field)
+ ((field reference ...)
+ (reference ...)))
+ (collect-forms 'field x)))
+
+ (define (find-clause clauses key)
+ "Find (KEY ...) clause among CLAUSES, a list of syntax forms."
+ (find (lambda (x)
+ (syntax-case x ()
+ ((clause-key _ ...)
+ (eq? (syntax->datum #'clause-key)
+ key))))
+ clauses))
+
+ (define (remove-namespace str)
+ "Remove RDF namespace prefix from STR."
+ (substring str (1+ (string-index str #\:))))
+
+ (define (column-id table-name column-name)
+ (string->identifier
+ "field" (string-append
+ ;; We downcase table names in identifiers. So, we
+ ;; distinguish between the user and User tables.
+ (if (string=? table-name "User")
+ "user2" table-name)
+ "__" column-name)))
+
+ (define (dump-id dump-table predicate)
+ (symbol->string
+ (string->identifier
+ "dump"
+ (string-append
+ dump-table "_" (remove-namespace (symbol->string predicate)))))))
+
+(define-syntax blank-node
+ (syntax-rules ()
+ "Allow having set and multiset within the context of a blank-node"
+ [(_ (op predicate object) ...)
+ (let [(node (string-join
+ (filter-map (match-lambda
+ ((pred . obj)
+ (match obj
+ ((and (? string? obj)
+ (? string-null? obj))
+ #f)
+ ((? symbol? obj)
+ (format #f "~a ~a" pred (symbol->string obj)))
+ (_
+ (format #f "~a ~s" pred obj)))))
+ (map-alist '()
+ (op predicate object) ...))
+ " ; "))]
+ (if (string-null? node)
+ ""
+ (format #f "[ ~a ]" node)))]))
+
+(define-syntax syntax-let
+ (syntax-rules ()
+ "Like match-let, but for syntax.
+
+(syntax-let ((pattern literals expression))
+ body ...)
+≡
+(syntax-case expression ()
+ (pattern literals
+ body ...))
+
+literals is optional. So,
+
+(syntax-let ((pattern expression))
+ body ...)
+≡
+(syntax-case expression ()
+ (pattern
+ body ...))
+"
+ ((_ () body ...)
+ (begin body ...))
+ ((_ ((pattern expression)
+ bindings ...)
+ body ...)
+ (syntax-case expression ()
+ (pattern
+ (syntax-let (bindings ...)
+ body ...))))
+ ((_ ((pattern literals expression)
+ bindings ...)
+ body ...)
+ (syntax-case expression literals
+ (pattern
+ (syntax-let (bindings ...)
+ body ...))))))
+
+(define-syntax define-dump
+ (lambda (x)
+ "Define FUNCTION-NAME, a function that dumps a view of the database.
+
+define-dump consists of three order-agnostic clauses---tables,
+schema-triples and triples---in the form shown below.
+
+(define-dump function-name
+ (tables (table ...) raw-forms ...)
+ (schema-triples
+ (subject predicate object) ...)
+ (triples subject
+ (verb predicate object) ...))
+
+The tables clause specifies the database tables to be joined to
+construct the view to be dumped. TABLE must be either of the form
+TABLE-NAME or of the form (JOIN-OPERATOR TABLE-NAME
+RAW-CONDITION). TABLE-NAME must, obviously, be the name of the
+table. JOIN-OPERATOR must be one of join, left-join and
+inner-join. RAW-CONDITION should be the join condition as a raw
+string. This is usually something like
+\"USING (SpeciesId)\". RAW-FORMS are expressions that must evaluate to
+strings to be appended to the SQL query.
+
+The schema-triples clause specifies the list of triples to be written
+once when the dump starts.
+
+The triples clause specifies the triples to be dumped once for each
+row in the view. All triples have a common SUBJECT. The (verb
+predicate object) clauses are described below.
+
+VERB must either be set or multiset. For the set VERB, a single triple
+(SUBJECT PREDICATE OBJECT-VALUE) is written where OBJECT-VALUE is the
+result of evaluating OBJECT. For the multiset VERB, OBJECT must
+evaluate to a list, and a triple (SUBJECT PREDICATE
+OBJECT-VALUE-ELEMENT) is created for each element OBJECT-VALUE-ELEMENT
+of that list.
+
+The SUBJECT and OBJECT expressions in the triples clause must
+reference database fields using a (field TABLE COLUMN) clause where
+TABLE and COLUMN refer to the table and column of the field being
+referenced. Database fields can also be referenced using (field TABLE
+COLUMN ALIAS) where ALIAS is an alias for that column in the SQL
+query. Specification of ALIAS is indeed a leak in the abstraction, and
+must be remedied."
+ (syntax-case x (tables schema-triples triples)
+ ((_ name clauses ...)
+ (syntax-let (((tables (primary-table other-tables ...) tables-raw ...) (tables)
+ (find-clause #'(clauses ...) 'tables))
+ (schema-triples-clause (or (find-clause #'(clauses ...) 'schema-triples)
+ #'(schema-triples)))
+ ((triples subject predicate-clauses ...) (triples)
+ (find-clause #'(clauses ...) 'triples)))
+ #`(define* (name db #:optional (table-metadata? #f))
+ #,(syntax-case #'schema-triples-clause (schema-triples)
+ ((schema-triples (triple-subject triple-predicate triple-object) ...)
+ #`(for-each triple
+ (list 'triple-subject ...)
+ (list 'triple-predicate ...)
+ (list 'triple-object ...)))
+ (_ (error "Invalid schema triples clause:" #'schema-triples-clause)))
+ (when table-metadata?
+ #,@(let ((dump-table (symbol->string (syntax->datum #'primary-table)))
+ (subject-type (any (lambda (predicate)
+ (syntax-case predicate (rdf:type)
+ ((_ rdf:type type) #'type)
+ (_ #f)))
+ #'(predicate-clauses ...))))
+ (map (lambda (predicate-clause)
+ (syntax-case predicate-clause ()
+ ((_ predicate _)
+ ;; Dump metadata about the dump itself.
+ #`(begin
+ (scm->triples
+ (map-alist '()
+ (set rdf:type 'gn:dump)
+ (set gn:createsPredicate 'predicate)
+ (filter-set gn:forSubjectType #,subject-type)
+ (multiset gn:dependsOn
+ '#,(map (lambda (field)
+ (match (syntax->datum field)
+ ((table-name column-name _ ...)
+ (datum->syntax
+ x (column-id (symbol->string table-name)
+ (symbol->string column-name))))
+ (((query alias))
+ (datum->syntax
+ x (column-id query (symbol->string alias))))))
+ (collect-fields predicate-clause))))
+ #,(dump-id dump-table (syntax->datum #'predicate)))
+ ;; Automatically create domain triples
+ ;; for predicates.
+ (when #,subject-type
+ (triple 'predicate 'rdfs:domain #,subject-type))))
+ (_ (error "Invalid predicate clause:" predicate-clause))))
+ #'(predicate-clauses ...))))
+ (sql-for-each (lambda (row)
+ (scm->triples
+ (map-alist row #,@(field->key #'(predicate-clauses ...)))
+ #,(field->assoc-ref #'row #'subject)))
+ db
+ (select-query #,(collect-fields #'(subject predicate-clauses ...))
+ (primary-table other-tables ...)
+ tables-raw ...)))))
+ (_ (error "Invalid define-dump syntax:" (syntax->datum x))))))
diff --git a/dump/sql.scm b/dump/sql.scm
index 76c9e0e..9e6b21a 100644
--- a/dump/sql.scm
+++ b/dump/sql.scm
@@ -10,6 +10,7 @@
#:use-module (dbi dbi)
#:export (select-query
call-with-database
+ call-with-target-database
sql-exec
sql-fold
sql-map
@@ -99,3 +100,15 @@
(define (sql-find db statement)
(sql-exec db statement)
(dbi-get_row db))
+
+(define (call-with-target-database connection-settings proc)
+ (call-with-database "mysql" (string-join
+ (list (assq-ref connection-settings 'sql-username)
+ (assq-ref connection-settings 'sql-password)
+ (assq-ref connection-settings 'sql-database)
+ "tcp"
+ (assq-ref connection-settings 'sql-host)
+ (number->string
+ (assq-ref connection-settings 'sql-port)))
+ ":")
+ proc))
diff --git a/dump/strings.scm b/dump/strings.scm
new file mode 100644
index 0000000..c67d7fc
--- /dev/null
+++ b/dump/strings.scm
@@ -0,0 +1,88 @@
+(define-module (dump strings)
+ #:use-module (srfi srfi-1)
+ #:use-module (srfi srfi-19)
+ #:use-module (ice-9 match)
+ #:use-module (ice-9 string-fun)
+ #:export (string-blank?
+ time-unix->string
+ string-blank?
+ string-split-substring
+ delete-substrings
+ replace-substrings
+ sanitize-rdf-string
+ snake->lower-camel))
+
+(define (time-unix->string seconds . maybe-format)
+ "Given an integer saying the number of seconds since the Unix
+epoch (1970-01-01 00:00:00), SECONDS, format it as a human readable
+date and time-string, possible using the MAYBE-FORMAT."
+ (letrec ([time-unix->time-utc
+ (lambda (seconds)
+ (add-duration
+ (date->time-utc (make-date 0 0 0 0 1 1 1970 0))
+ (make-time time-duration 0 seconds)))])
+ (apply date->string
+ (time-utc->date (time-unix->time-utc seconds))
+ maybe-format)))
+
+(define (string-blank? str)
+ "Return non-#f if STR consists only of whitespace characters."
+ (string-every char-set:whitespace str))
+
+(define (string-split-substring str substr)
+ "Split the string @var{str} into a list of substrings delimited by the
+substring @var{substr}."
+
+ (define substrlen (string-length substr))
+ (define strlen (string-length str))
+
+ (define (loop index start)
+ (cond
+ ((>= start strlen) (list ""))
+ ((not index) (list (substring str start)))
+ (else
+ (cons (substring str start index)
+ (let ((new-start (+ index substrlen)))
+ (loop (string-contains str substr new-start)
+ new-start))))))
+
+ (cond
+ ((string-contains str substr) => (lambda (idx) (loop idx 0)))
+ (else (list str))))
+
+(define (delete-substrings str . substrings)
+ "Delete SUBSTRINGS, a list of strings, from STR."
+ (fold (lambda (substring result)
+ (string-replace-substring result substring ""))
+ str
+ substrings))
+
+(define (replace-substrings str replacement-alist)
+ "Replace substrings in STR according to REPLACEMENT-ALIST, an
+association list mapping substrings to their replacements."
+ (fold (match-lambda*
+ (((substring . replacement) str)
+ (string-replace-substring str substring replacement)))
+ str
+ replacement-alist))
+
+(define (sanitize-rdf-string str)
+ (replace-substrings
+ (string-trim-both str)
+ '(("\r" . "\\r")
+ ("\n" . "\\n")
+ ("\"" . "'")
+ ("\v" . ""))))
+
+(define (snake->lower-camel str)
+ (let ((char-list (string->list str)))
+ (call-with-output-string
+ (lambda (port)
+ (put-char port (char-downcase (string-ref str 0)))
+ (map (lambda (char previous-char)
+ (unless (char=? char #\_)
+ (put-char port (if (char=? previous-char #\_)
+ (char-upcase char)
+ char))))
+ (drop char-list 1)
+ char-list)))))
diff --git a/dump/triples.scm b/dump/triples.scm
index 32cd243..a0f8213 100644
--- a/dump/triples.scm
+++ b/dump/triples.scm
@@ -1,24 +1,39 @@
(define-module (dump triples)
#:use-module (ice-9 regex)
#:use-module (ice-9 match)
- #:use-module (dump utils)
+ #:use-module (dump strings)
#:export (ontology
string->identifier
prefix
triple
- scm->triples))
+ scm->triples
+ annotate-field))
+
+(define (annotate-field field schema)
+ (let ([schema (cond ((symbol? schema)
+ (symbol->string schema))
+ ((string? schema) schema)
+ (else
+ (error "Use a string/symbol")))]
+ [string-field (if (number? field) (number->string field) field)])
+ (if (string-null? string-field)
+ ""
+ (string->symbol
+ (format #f "~s~a" string-field schema)))))
(define (string->identifier prefix str)
"Convert STR to a turtle identifier after replacing illegal
characters with an underscore and prefixing with gn:PREFIX."
- (string->symbol
- (string-append "gn:" prefix "_"
- (string-map (lambda (c)
- (case c
- ((#\/ #\< #\> #\+ #\( #\) #\space #\@) #\_)
- (else c)))
- (string-downcase
- (string-trim-right str #\.))))))
+ (if (string-null? str)
+ ""
+ (string->symbol
+ (string-append "gn:" prefix "_"
+ (string-map (lambda (c)
+ (case c
+ ((#\/ #\< #\> #\+ #\( #\) #\space #\@) #\_)
+ (else c)))
+ (string-downcase
+ (string-trim-right str #\.)))))))
(define (prefix prefix iri)
(format #t "@prefix ~a ~a .~%" prefix iri))
diff --git a/dump/utils.scm b/dump/utils.scm
deleted file mode 100644
index 8544eec..0000000
--- a/dump/utils.scm
+++ /dev/null
@@ -1,208 +0,0 @@
-(define-module (dump utils)
- #:use-module (srfi srfi-1)
- #:use-module (srfi srfi-19)
- #:use-module (srfi srfi-26)
- #:use-module (ice-9 match)
- #:export (string-blank?
- translate-forms
- collect-forms
- map-alist
- time-unix->string))
-
-
-(define (time-unix->string seconds . maybe-format)
- "Given an integer saying the number of seconds since the Unix
-epoch (1970-01-01 00:00:00), SECONDS, format it as a human readable
-date and time-string, possible using the MAYBE-FORMAT."
- (letrec ([time-unix->time-utc
- (lambda (seconds)
- (add-duration
- (date->time-utc (make-date 0 0 0 0 1 1 1970 0))
- (make-time time-duration 0 seconds)))])
- (apply date->string
- (time-utc->date (time-unix->time-utc seconds))
- maybe-format)))
-
-(define (string-blank? str)
- "Return non-#f if STR consists only of whitespace characters."
- (string-every char-set:whitespace str))
-
-(define (translate-forms from translator x)
- "Recursively pass (FROM ...) forms in source X to TRANSLATOR, and
-replace them with the return value."
- (syntax-case x ()
- ;; Handle translation base case.
- ((head tail ...) (eq? (syntax->datum #'head)
- from)
- (translator x))
- ;; Recurse.
- ((head tail ...)
- (cons (translate-forms from translator #'head)
- (map (cut translate-forms from translator <>)
- #'(tail ...))))
- ;; Handle leaf base case.
- (leaf #'leaf)))
-
-(define (key->assoc-ref alist x)
- "Recursively translate (key k) forms in source X to (assoc-ref ALIST
-k) forms."
- (translate-forms 'key
- ;; (syntax-rules (key)
- ;; ((key k) (assoc-ref alist k)))
- (lambda (x)
- (syntax-case x (key)
- ((key k) #`(assoc-ref #,alist 'k))))
- x))
-
-(define (collect-forms target x)
- "Recursively collect (TARGET ...) forms in source X and return them
-as a list."
- (syntax-case x ()
- ;; Handle collection base case.
- ((head tail ...) (eq? (syntax->datum #'head)
- target)
- (list x))
- ;; Recurse.
- ((head tail ...)
- (append (collect-forms target #'head)
- (append-map (cut collect-forms target <>) #'(tail ...))))
- ;; Handle leaf base case.
- (leaf (list))))
-
-(define (collect-keys x)
- "Recursively collect (key k) forms from source X and return as a
-list of all K."
- (map (syntax-rules (key)
- ((key k) 'k))
- (collect-forms 'key x)))
-
-(define (alist-delete* alist keys)
- "Delete entries from ALIST whose key is equal (in the sense of
-equal?) to any in KEYS, a list."
- (remove (match-lambda
- ((key . value)
- (member key keys))
- (x (error "malformed alist element" x)))
- alist))
-
-(define-syntax map-alist
- (lambda (x)
- "Transform (aka map) ALIST, an association list, into another
-association list. The returned association list may contain multiple
-associations for the same key. equal? is used in all association list
-key comparisons.
-
-Syntax:
-
-(map-alist alist
- (verb key expression) ...
- (else=> proc))
-
-VERB must be one of set, filter-set, multiset and remove.
-
-For the set VERB, KEY is set to the result of evaluating
-EXPRESSION. Multiple set verbs on the same key will result in multiple
-associations for that key.
-
-For the filter-set VERB, KEY is set to the result of evaluating
-EXPRESSION only if that result is not #f.
-
-For the multiset VERB, EXPRESSION must return a list and KEY is
-associated multiple times once with each element of the returned list.
-
-For the remove VERB, KEY is discarded.
-
-EXPRESSIONs must reference elements of ALIST using (key k) forms where
-K is the key to be referenced from ALIST. K must not be quoted. That
-is, if K is the symbol 'bar, it must be referenced as (key bar), not
-as (key 'bar).
-
-The else=> clause is optional.
-
-If the else=> clause is present, PROC is passed all pairs of ALIST
-that are not set by an earlier (verb key expression) action. PROC must
-return #f or a pair to replace its input pair. If PROC returns #f,
-that pair is discarded.
-
-If the else=> clause is absent, all unset pairs are discarded.
-
-Example:
-
-(map-alist '((\"foo\" . 1)
- (bar . 2)
- (foobar . 5)
- (fubar . 3))
- (set spam (1+ (key \"foo\")))
- (set ham (* 2 (key bar)))
- (set eggs (* 3 (key bar)))
- (set aal (+ (key \"foo\")
- (key bar)))
- (multiset vel (iota (* 2 (key bar))))
- (remove foobar)
- (else=> (match-lambda
- ((key . value)
- (cons key (expt 2 value))))))
-=> ((spam . 2) (ham . 4) (eggs . 6) (aal . 3)
- (vel . 0) (vel . 1) (vel . 2) (vel . 3) (fubar . 8))"
- (syntax-case x ()
- ((_ alist actions ...)
- ;; TODO: Check that all actions are valid.
- #`(let ((evaluated-alist alist))
- (append (remove (match-lambda
- ;; Filter out results of filter-set actions.
- ((key . #f)
- (member key '#,(filter-map (lambda (action)
- (syntax-case action (filter-set)
- ((filter-set key expression) #'key)
- (_ #f)))
- #'(actions ...))))
- (_ #f))
- ;; Do set and filter-set.
- `#,(filter-map (lambda (action)
- (syntax-case action (set filter-set)
- ((set key expression)
- #`(key . ,#,(key->assoc-ref #'evaluated-alist #'expression)))
- ((filter-set key expression)
- #`(key . ,#,(key->assoc-ref #'evaluated-alist #'expression)))
- (_ #f)))
- #'(actions ...)))
- ;; Do multiset.
- #,@(filter-map (lambda (action)
- (syntax-case action (multiset)
- ((multiset key expression)
- #`(map (cut cons 'key <>)
- #,(key->assoc-ref #'evaluated-alist #'expression)))
- (_ #f)))
- #'(actions ...))
- ;; Apply else=> procedure on unspecified keys. If
- ;; no else=> procedure is specified, delete
- ;; unspecified keys.
- (filter-map #,(or (any (lambda (action)
- (syntax-case action (else=>)
- ((else=> proc) #'proc)
- (_ #f)))
- #'(actions ...))
- #'(const #f))
- ;; The unspecified part of the input
- ;; alist
- (alist-delete* evaluated-alist
- (list
- ;; Keys that were referenced
- #,@(append-map (lambda (action)
- (syntax-case action ()
- ((_ key expression)
- (collect-keys #'expression))
- (_ '())))
- #'(actions ...))
- ;; Keys that were deleted
- #,@(filter-map (lambda (action)
- (syntax-case action (remove)
- ((remove key) #''key)
- (_ #f)))
- #'(actions ...))
- ;; Keys that were set
- #,@(filter-map (lambda (action)
- (syntax-case action ()
- ((_ key expression) #''key)
- (_ #f)))
- #'(actions ...)))))))))))