diff options
Diffstat (limited to 'dump.scm')
-rwxr-xr-x | dump.scm | 366 |
1 files changed, 2 insertions, 364 deletions
diff --git a/dump.scm b/dump.scm index 7ee0462..0b2318c 100755 --- a/dump.scm +++ b/dump.scm @@ -5,15 +5,12 @@ (rnrs io ports) (srfi srfi-1) (srfi srfi-26) - (srfi srfi-171) (ice-9 match) (ice-9 regex) - (ice-9 string-fun) + (dump strings) (dump sql) - (dump table) (dump triples) - (dump utils) - (zlib)) + (dump special-forms)) ;;; GeneNetwork database connection parameters and dump path @@ -22,371 +19,12 @@ (call-with-input-file (list-ref (command-line) 1) read)) -(define (call-with-genenetwork-database proc) - (call-with-database "mysql" (string-join - (list (assq-ref %connection-settings 'sql-username) - (assq-ref %connection-settings 'sql-password) - (assq-ref %connection-settings 'sql-database) - "tcp" - (assq-ref %connection-settings 'sql-host) - (number->string - (assq-ref %connection-settings 'sql-port))) - ":") - proc)) - (define %dump-directory (list-ref (command-line) 2)) -(define (call-with-dump-file filename proc) - (let ((absolute-path (string-append %dump-directory filename))) - (display absolute-path) - (newline) - (call-with-output-file absolute-path proc))) - - -;; Dump schema annotations to org - -(define (get-tables-from-comments db) - (sql-map (match-lambda - ((("TableName" . table)) table)) - db - (select-query ((TableComments TableName)) - (TableComments)))) - -(define (dump-table-fields db table) - (format #t "* ~a~%" table) - (match (sql-find db - (select-query ((TableComments Comment)) - (TableComments) - (format #f "WHERE TableName = '~a'" table))) - ((("Comment" . comment)) - (format #t "~a~%" comment))) - (sql-for-each (lambda (row) - (match row - ((("TableField" . table-field) - ("Foreign_Key" . foreign-key) - ("Annotation" . annotation)) - (format #t "** ~a~%" (substring table-field (1+ (string-length table)))) - (unless (string-null? foreign-key) - (format #t "Foreign key to ~a~%" foreign-key)) - (unless (string-null? annotation) - (display annotation) - (newline))))) - db - (select-query ((TableFieldAnnotation TableField) - (TableFieldAnnotation Foreign_Key) - (TableFieldAnnotation Annotation)) - (TableFieldAnnotation) - (format #f "WHERE TableField LIKE '~a.%'" table))) - (newline)) - -(define (dump-schema-annotations db) - (call-with-genenetwork-database - (lambda (db) - (for-each (cut dump-table-fields db <>) - (get-tables-from-comments db))))) - ;;; Dump tables -(define (annotate-field field schema) - (let ([schema (cond ((symbol? schema) - (symbol->string schema)) - ((string? schema) schema) - (else - (error "Use a string/symbol")))] - [string-field (if (number? field) (number->string field) field)]) - (if (string-null? string-field) - "" - (string->symbol - (format #f "~s~a" string-field schema))))) - -(define (string-split-substring str substr) - "Split the string @var{str} into a list of substrings delimited by the -substring @var{substr}." - - (define substrlen (string-length substr)) - (define strlen (string-length str)) - - (define (loop index start) - (cond - ((>= start strlen) (list "")) - ((not index) (list (substring str start))) - (else - (cons (substring str start index) - (let ((new-start (+ index substrlen))) - (loop (string-contains str substr new-start) - new-start)))))) - - (cond - ((string-contains str substr) => (lambda (idx) (loop idx 0))) - (else (list str)))) - -(define (delete-substrings str . substrings) - "Delete SUBSTRINGS, a list of strings, from STR." - (fold (lambda (substring result) - (string-replace-substring result substring "")) - str - substrings)) - -(define (replace-substrings str replacement-alist) - "Replace substrings in STR according to REPLACEMENT-ALIST, an -association list mapping substrings to their replacements." - (fold (match-lambda* - (((substring . replacement) str) - (string-replace-substring str substring replacement))) - str - replacement-alist)) - -(define (sanitize-rdf-string str) - (replace-substrings - (string-trim-both str) - '(("\r" . "\\r") - ("\n" . "\\n") - ("\"" . "'") - ("\v" . "")))) - -(define (snake->lower-camel str) - (let ((char-list (string->list str))) - (call-with-output-string - (lambda (port) - (put-char port (char-downcase (string-ref str 0))) - (map (lambda (char previous-char) - (unless (char=? char #\_) - (put-char port (if (char=? previous-char #\_) - (char-upcase char) - char)))) - (drop char-list 1) - char-list))))) - -(eval-when (expand load eval) - (define (field->key x) - (translate-forms 'field - (lambda (x) - #`(key #,(symbol->string - (syntax->datum - ((syntax-rules (field) - ((field (query alias)) alias) - ((field table column) column) - ((field table column alias) alias)) - x))))) - x)) - - (define (field->assoc-ref alist x) - "Recursively translate field references in source X to (assoc-ref -ALIST field-name) forms." - (translate-forms 'field - (lambda (x) - #`(assoc-ref #,alist - #,(symbol->string - (syntax->datum - ((syntax-rules (field) - ((field (query alias)) alias) - ((field table column) column) - ((field table column alias) alias)) - x))))) - x)) - - (define (collect-fields x) - (map (syntax-rules (field) - ((field reference ...) - (reference ...))) - (collect-forms 'field x))) - - (define (find-clause clauses key) - "Find (KEY ...) clause among CLAUSES, a list of syntax forms." - (find (lambda (x) - (syntax-case x () - ((clause-key _ ...) - (eq? (syntax->datum #'clause-key) - key)))) - clauses)) - - (define (remove-namespace str) - "Remove RDF namespace prefix from STR." - (substring str (1+ (string-index str #\:)))) - - (define (column-id table-name column-name) - (string->identifier - "field" (string-append - ;; We downcase table names in identifiers. So, we - ;; distinguish between the user and User tables. - (if (string=? table-name "User") - "user2" table-name) - "__" column-name))) - - (define (dump-id dump-table predicate) - (symbol->string - (string->identifier - "dump" - (string-append - dump-table "_" (remove-namespace (symbol->string predicate))))))) - -(define-syntax blank-node - (syntax-rules () - "Allow having set and multiset within the context of a blank-node" - [(_ (op predicate object) ...) - (let [(node (string-join - (filter-map (match-lambda - ((pred . obj) - (match obj - ((and (? string? obj) - (? string-null? obj)) - #f) - ((? symbol? obj) - (format #f "~a ~a" pred (symbol->string obj))) - (_ - (format #f "~a ~s" pred obj))))) - (map-alist '() - (op predicate object) ...)) - " ; "))] - (if (string-null? node) - "" - (format #f "[ ~a ]" node)))])) - -(define-syntax syntax-let - (syntax-rules () - "Like match-let, but for syntax. - -(syntax-let ((pattern literals expression)) - body ...) -≡ -(syntax-case expression () - (pattern literals - body ...)) - -literals is optional. So, - -(syntax-let ((pattern expression)) - body ...) -≡ -(syntax-case expression () - (pattern - body ...)) -" - ((_ () body ...) - (begin body ...)) - ((_ ((pattern expression) - bindings ...) - body ...) - (syntax-case expression () - (pattern - (syntax-let (bindings ...) - body ...)))) - ((_ ((pattern literals expression) - bindings ...) - body ...) - (syntax-case expression literals - (pattern - (syntax-let (bindings ...) - body ...)))))) - -(define-syntax define-dump - (lambda (x) - "Define FUNCTION-NAME, a function that dumps a view of the database. - -define-dump consists of three order-agnostic clauses---tables, -schema-triples and triples---in the form shown below. - -(define-dump function-name - (tables (table ...) raw-forms ...) - (schema-triples - (subject predicate object) ...) - (triples subject - (verb predicate object) ...)) - -The tables clause specifies the database tables to be joined to -construct the view to be dumped. TABLE must be either of the form -TABLE-NAME or of the form (JOIN-OPERATOR TABLE-NAME -RAW-CONDITION). TABLE-NAME must, obviously, be the name of the -table. JOIN-OPERATOR must be one of join, left-join and -inner-join. RAW-CONDITION should be the join condition as a raw -string. This is usually something like -\"USING (SpeciesId)\". RAW-FORMS are expressions that must evaluate to -strings to be appended to the SQL query. - -The schema-triples clause specifies the list of triples to be written -once when the dump starts. - -The triples clause specifies the triples to be dumped once for each -row in the view. All triples have a common SUBJECT. The (verb -predicate object) clauses are described below. - -VERB must either be set or multiset. For the set VERB, a single triple -(SUBJECT PREDICATE OBJECT-VALUE) is written where OBJECT-VALUE is the -result of evaluating OBJECT. For the multiset VERB, OBJECT must -evaluate to a list, and a triple (SUBJECT PREDICATE -OBJECT-VALUE-ELEMENT) is created for each element OBJECT-VALUE-ELEMENT -of that list. - -The SUBJECT and OBJECT expressions in the triples clause must -reference database fields using a (field TABLE COLUMN) clause where -TABLE and COLUMN refer to the table and column of the field being -referenced. Database fields can also be referenced using (field TABLE -COLUMN ALIAS) where ALIAS is an alias for that column in the SQL -query. Specification of ALIAS is indeed a leak in the abstraction, and -must be remedied." - (syntax-case x (tables schema-triples triples) - ((_ name clauses ...) - (syntax-let (((tables (primary-table other-tables ...) tables-raw ...) (tables) - (find-clause #'(clauses ...) 'tables)) - (schema-triples-clause (or (find-clause #'(clauses ...) 'schema-triples) - #'(schema-triples))) - ((triples subject predicate-clauses ...) (triples) - (find-clause #'(clauses ...) 'triples))) - #`(define* (name db #:optional (table-metadata? #f)) - #,(syntax-case #'schema-triples-clause (schema-triples) - ((schema-triples (triple-subject triple-predicate triple-object) ...) - #`(for-each triple - (list 'triple-subject ...) - (list 'triple-predicate ...) - (list 'triple-object ...))) - (_ (error "Invalid schema triples clause:" #'schema-triples-clause))) - (when table-metadata? - #,@(let ((dump-table (symbol->string (syntax->datum #'primary-table))) - (subject-type (any (lambda (predicate) - (syntax-case predicate (rdf:type) - ((_ rdf:type type) #'type) - (_ #f))) - #'(predicate-clauses ...)))) - (map (lambda (predicate-clause) - (syntax-case predicate-clause () - ((_ predicate _) - ;; Dump metadata about the dump itself. - #`(begin - (scm->triples - (map-alist '() - (set rdf:type 'gn:dump) - (set gn:createsPredicate 'predicate) - (filter-set gn:forSubjectType #,subject-type) - (multiset gn:dependsOn - '#,(map (lambda (field) - (match (syntax->datum field) - ((table-name column-name _ ...) - (datum->syntax - x (column-id (symbol->string table-name) - (symbol->string column-name)))) - (((query alias)) - (datum->syntax - x (column-id query (symbol->string alias)))))) - (collect-fields predicate-clause)))) - #,(dump-id dump-table (syntax->datum #'predicate))) - ;; Automatically create domain triples - ;; for predicates. - (when #,subject-type - (triple 'predicate 'rdfs:domain #,subject-type)))) - (_ (error "Invalid predicate clause:" predicate-clause)))) - #'(predicate-clauses ...)))) - (sql-for-each (lambda (row) - (scm->triples - (map-alist row #,@(field->key #'(predicate-clauses ...))) - #,(field->assoc-ref #'row #'subject))) - db - (select-query #,(collect-fields #'(subject predicate-clauses ...)) - (primary-table other-tables ...) - tables-raw ...))))) - (_ (error "Invalid define-dump syntax:" (syntax->datum x)))))) - (define binomial-name->species-id (cut string->identifier "species" <>)) |