From 8e1e4cceab516afab46ccced63ca9edab663ca11 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Mon, 21 Aug 2023 15:03:20 +0300 Subject: Rename dump -> transform Signed-off-by: Munyoki Kilyungi --- dump/schema-dump.scm | 125 --------- dump/special-forms.scm | 599 ---------------------------------------- dump/sql.scm | 114 -------- dump/string-similarity.scm | 37 --- dump/strings.scm | 101 ------- dump/table.scm | 28 -- dump/triples.scm | 110 -------- examples/dataset-metadata.scm | 8 +- examples/generif.scm | 10 +- examples/genotype.scm | 10 +- examples/phenotype.scm | 8 +- examples/probeset-data.scm | 8 +- examples/probeset.scm | 8 +- examples/publication.scm | 8 +- examples/species-metadata.scm | 8 +- examples/tissue.scm | 8 +- json-dump.scm | 2 +- tests.scm | 16 +- transform/schema-dump.scm | 125 +++++++++ transform/special-forms.scm | 599 ++++++++++++++++++++++++++++++++++++++++ transform/sql.scm | 114 ++++++++ transform/string-similarity.scm | 37 +++ transform/strings.scm | 101 +++++++ transform/table.scm | 28 ++ transform/triples.scm | 110 ++++++++ visualize-schema.scm | 16 +- 26 files changed, 1169 insertions(+), 1169 deletions(-) delete mode 100644 dump/schema-dump.scm delete mode 100644 dump/special-forms.scm delete mode 100644 dump/sql.scm delete mode 100644 dump/string-similarity.scm delete mode 100644 dump/strings.scm delete mode 100644 dump/table.scm delete mode 100644 dump/triples.scm create mode 100644 transform/schema-dump.scm create mode 100644 transform/special-forms.scm create mode 100644 transform/sql.scm create mode 100644 transform/string-similarity.scm create mode 100644 transform/strings.scm create mode 100644 transform/table.scm create mode 100644 transform/triples.scm diff --git a/dump/schema-dump.scm b/dump/schema-dump.scm deleted file mode 100644 index 525bf65..0000000 --- a/dump/schema-dump.scm +++ /dev/null @@ -1,125 +0,0 @@ -(define-module (dump schema) - #:use-module (ice-9 match) - #:use-module (ice-9 srfi-26) - #:use-module (dump sql) - #:use-module (dump triples) - #:use-module (dump strings) - #:use-module (dump table)) - - -(define (table-fields db table) - (format #t "* ~a~%" table) - (match (sql-find db - (select-query ((TableComments Comment)) - (TableComments) - (format #f "WHERE TableName = '~a'" table))) - ((("Comment" . comment)) - (format #t "~a~%" comment))) - (sql-for-each (lambda (row) - (match row - ((("TableField" . table-field) - ("Foreign_Key" . foreign-key) - ("Annotation" . annotation)) - (format #t "** ~a~%" (substring table-field (1+ (string-length table)))) - (unless (string-null? foreign-key) - (format #t "Foreign key to ~a~%" foreign-key)) - (unless (string-null? annotation) - (display annotation) - (newline))))) - db - (select-query ((TableFieldAnnotation TableField) - (TableFieldAnnotation Foreign_Key) - (TableFieldAnnotation Annotation)) - (TableFieldAnnotation) - (format #f "WHERE TableField LIKE '~a.%'" table))) - (newline)) - -(define (get-tables-from-comments db) - (sql-map (match-lambda - ((("TableName" . table)) table)) - db - (select-query ((TableComments TableName)) - (TableComments)))) - -(define (schema-annotations db) - (call-with-target-database - (lambda (db) - (for-each (cut table-fields db <>) - (get-tables-from-comments db))))) - -(define (tables db) - "Return list of all tables in DB. Each element of the returned list -is a object." - (map (lambda (table) - (set-table-columns table - (sql-map (lambda (row) - (make-column (assoc-ref row "Field") - (assoc-ref row "Type"))) - db - (format #f "SHOW COLUMNS FROM ~a" (table-name table))))) - (sql-map (lambda (row) - (make-table (assoc-ref row "table_name") - ;; FIXME: This is probably correct only for - ;; MyISAM tables. - (assoc-ref row "data_length") - #f)) - db - (select-query ((information_schema.tables table_name) - (information_schema.tables data_length)) - (information_schema.tables) - (format #f "WHERE table_schema = '~a'" - (assq-ref %connection-settings 'sql-database)))))) - -(define (schema db) - (let ((tables (tables db))) - (for-each (lambda (table) - (let ((table-id (string->identifier - "table" - ;; We downcase table names in - ;; identifiers. So, we distinguish - ;; between the user and User tables. - (if (string=? (table-name table) "User") - "user2" - (table-name table))))) - (triple table-id 'rdf:type 'gn:sqlTable) - (triple table-id 'gn:name (table-name table)) - (triple table-id 'gn:hasSize (table-size table)) - (for-each (lambda (column) - (let ((column-id (column-id (table-name table) - (column-name column)))) - (triple column-id 'rdf:type 'gn:sqlTableField) - (triple column-id 'gn:name (column-name column)) - (triple column-id 'gn:sqlFieldType (column-type column)) - (triple table-id 'gn:hasField column-id))) - (table-columns table)))) - tables))) - -(define* (data-table db table-name data-field - #:optional (default-directory "")) - (let ((directory (string-append default-directory "/" table-name)) - (port #f) - (current-strain-id #f)) - (unless (file-exists? directory) - (mkdir directory)) - (sql-for-each (match-lambda - (((_ . strain-id) - (_ . value)) - ;; Close file if new strain. - (when (and port - (not (= current-strain-id strain-id))) - (close-port port) - (set! port #f)) - ;; If no file is open, open new file. - (unless port - (set! current-strain-id strain-id) - (let ((filename (string-append directory - "/" (number->string strain-id)))) - (display filename (current-error-port)) - (newline (current-error-port)) - (set! port (open-output-file filename)))) - (display value port) - (newline port))) - db - (format #f "SELECT StrainId, ~a FROM ~a ORDER BY StrainId" - data-field table-name)) - (close-port port))) diff --git a/dump/special-forms.scm b/dump/special-forms.scm deleted file mode 100644 index 2650580..0000000 --- a/dump/special-forms.scm +++ /dev/null @@ -1,599 +0,0 @@ -(define-module (dump special-forms) - #:use-module (srfi srfi-1) - #:use-module (ice-9 match) - #:use-module (srfi srfi-26) - #:use-module (dump sql) - #:use-module (dump table) - #:use-module (dump triples) - #:export (translate-forms - collect-forms - collect-keys - field->key - field->assoc-ref - collect-fields - find-clause - remove-namespace - column-id - id - syntax-let - blank-node - map-alist - with-documentation - define-transformer)) - -(define (key->assoc-ref alist x) - "Recursively translate (key k) forms in source X to (assoc-ref ALIST -k) forms." - (translate-forms 'key - ;; (syntax-rules (key) - ;; ((key k) (assoc-ref alist k))) - (lambda (x) - (syntax-case x (key) - ((key k) #`(assoc-ref #,alist 'k)))) - x)) - -(define (alist-delete* alist keys) - "Delete entries from ALIST whose key is equal (in the sense of -equal?) to any in KEYS, a list." - (remove (match-lambda - ((key . value) - (member key keys)) - (x (error "malformed alist element" x))) - alist)) - -(define (translate-forms from translator x) - "Recursively pass (FROM ...) forms in source X to TRANSLATOR, and -replace them with the return value." - (syntax-case x () - ;; Handle translation base case. - ((head tail ...) (eq? (syntax->datum #'head) - from) - (translator x)) - ;; Recurse. - ((head tail ...) - (cons (translate-forms from translator #'head) - (map (cut translate-forms from translator <>) - #'(tail ...)))) - ;; Handle leaf base case. - (leaf #'leaf))) - -(define (collect-forms target x) - "Recursively collect (TARGET ...) forms in source X and return them -as a list." - (syntax-case x () - ;; Handle collection base case. - ((head tail ...) (eq? (syntax->datum #'head) - target) - (list x)) - ;; Recurse. - ((head tail ...) - (append (collect-forms target #'head) - (append-map (cut collect-forms target <>) #'(tail ...)))) - ;; Handle leaf base case. - (leaf (list)))) - -(define (collect-keys x) - "Recursively collect (key k) forms from source X and return as a -list of all K." - (map (syntax-rules (key) - ((key k) 'k)) - (collect-forms 'key x))) - -(define-syntax map-alist - (lambda (x) - "Transform (aka map) ALIST, an association list, into another -association list. The returned association list may contain multiple -associations for the same key. equal? is used in all association list -key comparisons. - -Syntax: - -(map-alist alist - (verb key expression) ... - (else=> proc)) - -VERB must be one of set, filter-set, multiset and remove. - -For the set VERB, KEY is set to the result of evaluating -EXPRESSION. Multiple set verbs on the same key will result in multiple -associations for that key. - -For the filter-set VERB, KEY is set to the result of evaluating -EXPRESSION only if that result is not #f. - -For the multiset VERB, EXPRESSION must return a list and KEY is -associated multiple times once with each element of the returned list. - -For the remove VERB, KEY is discarded. - -EXPRESSIONs must reference elements of ALIST using (key k) forms where -K is the key to be referenced from ALIST. K must not be quoted. That -is, if K is the symbol 'bar, it must be referenced as (key bar), not -as (key 'bar). - -The else=> clause is optional. - -If the else=> clause is present, PROC is passed all pairs of ALIST -that are not set by an earlier (verb key expression) action. PROC must -return #f or a pair to replace its input pair. If PROC returns #f, -that pair is discarded. - -If the else=> clause is absent, all unset pairs are discarded. - -Example: - -(map-alist '((\"foo\" . 1) - (bar . 2) - (foobar . 5) - (fubar . 3)) - (set spam (1+ (key \"foo\"))) - (set ham (* 2 (key bar))) - (set eggs (* 3 (key bar))) - (set aal (+ (key \"foo\") - (key bar))) - (multiset vel (iota (* 2 (key bar)))) - (remove foobar) - (else=> (match-lambda - ((key . value) - (cons key (expt 2 value)))))) -=> ((spam . 2) (ham . 4) (eggs . 6) (aal . 3) - (vel . 0) (vel . 1) (vel . 2) (vel . 3) (fubar . 8))" - (syntax-case x () - ((_ alist actions ...) - ;; TODO: Check that all actions are valid. - #`(let ((evaluated-alist alist)) - (append (remove (match-lambda - ;; Filter out results of filter-set actions. - ((key . #f) - (member key '#,(filter-map (lambda (action) - (syntax-case action (filter-set) - ((filter-set key expression) #'key) - (_ #f))) - #'(actions ...)))) - (_ #f)) - ;; Do set and filter-set. - `#,(filter-map (lambda (action) - (syntax-case action (set filter-set) - ((set key expression) - #`(key . ,#,(key->assoc-ref #'evaluated-alist #'expression))) - ((filter-set key expression) - #`(key . ,#,(key->assoc-ref #'evaluated-alist #'expression))) - (_ #f))) - #'(actions ...))) - ;; Do multiset. - #,@(filter-map (lambda (action) - (syntax-case action (multiset) - ((multiset key expression) - #`(map (cut cons 'key <>) - #,(key->assoc-ref #'evaluated-alist #'expression))) - (_ #f))) - #'(actions ...)) - ;; Apply else=> procedure on unspecified keys. If - ;; no else=> procedure is specified, delete - ;; unspecified keys. - (filter-map #,(or (any (lambda (action) - (syntax-case action (else=>) - ((else=> proc) #'proc) - (_ #f))) - #'(actions ...)) - #'(const #f)) - ;; The unspecified part of the input - ;; alist - (alist-delete* evaluated-alist - (list - ;; Keys that were referenced - #,@(append-map (lambda (action) - (syntax-case action () - ((_ key expression) - (collect-keys #'expression)) - (_ '()))) - #'(actions ...)) - ;; Keys that were deleted - #,@(filter-map (lambda (action) - (syntax-case action (remove) - ((remove key) #''key) - (_ #f))) - #'(actions ...)) - ;; Keys that were set - #,@(filter-map (lambda (action) - (syntax-case action () - ((_ key expression) #''key) - (_ #f))) - #'(actions ...))))))))))) - - - -(eval-when (expand load ) - (define (field->datum x) - (translate-forms - 'field - (lambda (x) - (syntax-case x (field) - ((field (query alias)) - #`(format #f "~a" (syntax->datum #'alias))) - ((field table column) - #`(format #f "~a(~a)" - (syntax->datum #'table) - (syntax->datum #'column))) - ((field table column alias) - #`(format #f "~a(~a)" - (syntax->datum #'table) - (syntax->datum #'alias))))) - x)) - - (define (field->key x) - (translate-forms 'field - (lambda (x) - #`(key #,(symbol->string - (syntax->datum - ((syntax-rules (field) - ((field (query alias)) alias) - ((field table column) column) - ((field table column alias) alias)) - x))))) - x)) - - (define (field->assoc-ref alist x) - "Recursively translate field references in source X to (assoc-ref -ALIST field-name) forms." - (translate-forms 'field - (lambda (x) - #`(assoc-ref #,alist - #,(symbol->string - (syntax->datum - ((syntax-rules (field) - ((field (query alias)) alias) - ((field table column) column) - ((field table column alias) alias)) - x))))) - x)) - - (define (collect-fields x) - (map (syntax-rules (field) - ((field reference ...) - (reference ...))) - (collect-forms 'field x))) - - (define (find-clause clauses key) - "Find (KEY ...) clause among CLAUSES, a list of syntax forms." - (find (lambda (x) - (syntax-case x () - ((clause-key _ ...) - (eq? (syntax->datum #'clause-key) - key)))) - clauses)) - - (define (remove-namespace str) - "Remove RDF namespace prefix from STR." - (substring str (1+ (string-index str #\:)))) - - (define (column-id table-name column-name) - (string->identifier - "field" (string-append - ;; We downcase table names in identifiers. So, we - ;; distinguish between the user and User tables. - (if (string=? table-name "User") - "user2" table-name) - "__" column-name))) - - (define (id table predicate) - (symbol->string - (string->identifier - "dump" - (string-append - table "_" (remove-namespace (symbol->string predicate))))))) - -(define-syntax blank-node - (syntax-rules () - "Allow having set and multiset within the context of a blank-node" - [(_ (op predicate object) ...) - (let [(node (string-join - (filter-map (match-lambda - ((pred . obj) - (match obj - ((and (? string? obj) - (? string-null? obj)) - #f) - ((? symbol? obj) - (format #f "~a ~a" pred (symbol->string obj))) - (_ - (format #f "~a ~s" pred obj))))) - (map-alist '() - (op predicate object) ...)) - " ; "))] - (if (string-null? node) - "" - (format #f "[ ~a ]" node)))])) - -(define-syntax syntax-let - (syntax-rules () - "Like match-let, but for syntax. - -(syntax-let ((pattern literals expression)) - body ...) -≡ -(syntax-case expression () - (pattern literals - body ...)) - -literals is optional. So, - -(syntax-let ((pattern expression)) - body ...) -≡ -(syntax-case expression () - (pattern - body ...)) -" - ((_ () body ...) - (begin body ...)) - ((_ ((pattern expression) - bindings ...) - body ...) - (syntax-case expression () - (pattern - (syntax-let (bindings ...) - body ...)))) - ((_ ((pattern literals expression) - bindings ...) - body ...) - (syntax-case expression literals - (pattern - (syntax-let (bindings ...) - body ...)))))) - -(define-syntax define-transformer - (lambda (x) - "Define FUNCTION-NAME, a function that dumps a view of the database. - -define-transformer consists of three order-agnostic clauses---tables, -schema-triples and triples---in the form shown below. - -(define-transformer function-name - (tables (table ...) raw-forms ...) - (schema-triples - (subject predicate object) ...) - (triples subject - (verb predicate object) ...)) - -The tables clause specifies the database tables to be joined to -construct the view to be dumped. TABLE must be either of the form -TABLE-NAME or of the form (JOIN-OPERATOR TABLE-NAME -RAW-CONDITION). TABLE-NAME must, obviously, be the name of the -table. JOIN-OPERATOR must be one of join, left-join and -inner-join. RAW-CONDITION should be the join condition as a raw -string. This is usually something like -\"USING (SpeciesId)\". RAW-FORMS are expressions that must evaluate to -strings to be appended to the SQL query. - -The schema-triples clause specifies the list of triples to be written -once when the dump starts. - -The triples clause specifies the triples to be dumped once for each -row in the view. All triples have a common SUBJECT. The (verb -predicate object) clauses are described below. - -VERB must either be set or multiset. For the set VERB, a single triple -(SUBJECT PREDICATE OBJECT-VALUE) is written where OBJECT-VALUE is the -result of evaluating OBJECT. For the multiset VERB, OBJECT must -evaluate to a list, and a triple (SUBJECT PREDICATE -OBJECT-VALUE-ELEMENT) is created for each element OBJECT-VALUE-ELEMENT -of that list. - -The SUBJECT and OBJECT expressions in the triples clause must -reference database fields using a (field TABLE COLUMN) clause where -TABLE and COLUMN refer to the table and column of the field being -referenced. Database fields can also be referenced using (field TABLE -COLUMN ALIAS) where ALIAS is an alias for that column in the SQL -query. Specification of ALIAS is indeed a leak in the abstraction, and -must be remedied." - (syntax-case x (tables schema-triples triples) - ((_ name clauses ...) - (syntax-let (((tables (primary-table other-tables ...) tables-raw ...) (tables) - (find-clause #'(clauses ...) 'tables)) - (schema-triples-clause (or (find-clause #'(clauses ...) 'schema-triples) - #'(schema-triples))) - ((triples subject predicate-clauses ...) (triples) - (find-clause #'(clauses ...) 'triples))) - #`(define* (name db #:key - (metadata? #f) - (data? #t) - (documentation? #f)) - (when metadata? - #,@(let ((table (symbol->string (syntax->datum #'primary-table))) - (subject-type (any (lambda (predicate) - (syntax-case predicate (rdf:type) - ((_ rdf:type type) #'type) - (_ #f))) - #'(predicate-clauses ...)))) - (map (lambda (predicate-clause) - (syntax-case predicate-clause () - ((_ predicate _) - ;; Dump metadata about the dump itself. - #`(begin - (scm->triples - (map-alist '() - (set rdf:type 'gn-id:dump) - (set gn-term:createsPredicate 'predicate) - (filter-set gn-term:forSubjectType #,subject-type) - (multiset gn-term:dependsOn - '#,(map (lambda (field) - (match (syntax->datum field) - ((table-name column-name _ ...) - (datum->syntax - x (column-id (symbol->string table-name) - (symbol->string column-name)))) - (((query alias)) - (datum->syntax - x (column-id query (symbol->string alias)))))) - (collect-fields predicate-clause)))) - #,(id table (syntax->datum #'predicate))) - ;; Automatically create domain triples - ;; for predicates. - (when #,subject-type - (triple 'predicate 'rdfs:domain #,subject-type)))) - (_ (error "Invalid predicate clause:" predicate-clause)))) - #'(predicate-clauses ...)))) - (when documentation? - (format #t "~%## '~a'~%~%" (syntax->datum #'name)) - #,(syntax-case #'schema-triples-clause (schema-triples) - ((schema-triples (triple-subject triple-predicate triple-object) ...) - #`(begin - (when (not (list 'triple-subject ...)) - (format #t "## Schema Triples:~%~%```text~%") - (for-each (lambda (s p o) - (format #t "~a -> ~a -> ~a~%" s p o)) - (list 'triple-subject ...) - (list 'triple-predicate ...) - (list 'triple-object ...)) - (format #t "```")))) - (_ (error "Invalid schema triples clause:" #'schema-triples-clause))) - (format #t "## Generated Triples: - -The following SQL query was executed: - -```sql -~a -``` - -The above query results to triples that have the form: - -```text -" - (select-query #,(collect-fields #'(subject predicate-clauses ...)) - (primary-table other-tables ...) - tables-raw ...)) - (for-each (match-lambda - ((predicate . object) - (format #t "~a -> ~a -> ~a ~%" - (if (symbol? #,(field->datum #'subject)) - (symbol->string #,(field->datum #'subject)) - #,(field->datum #'subject)) - predicate - (if (symbol? object) - (symbol->string object) - object)))) - (map-alist - '() - #,@(field->datum #'(predicate-clauses ...)))) - (format #t "```~%Here's an example query:~%~%```sparql~%") - (documentation?) - (newline) - (let* ((result - (map-alist (sql-find - db - (format #f "~a LIMIT 1" - (select-query #,(collect-fields #'(subject predicate-clauses ...)) - (primary-table other-tables ...) - tables-raw ...))) - #,@(field->key #'(predicate-clauses ...)))) - (first-n (list-head result - (let ((n - (min 4 (truncate - (+ (exact-integer-sqrt (length result)) 1))))) - (if (< n 3) - (length result) - n))))) - (format #t "SELECT * WHERE { ~%") - (for-each (match-lambda - ((predicate . object) - (match object - ((or (? symbol? object) - (? (lambda (el) (string-match "^\\[ .* \\]$" el)) object)) - (format #t " ?s ~a ~a .~%" predicate object)) - ((and (? string? object) - (? (lambda (el) (not (string-null? el))) object)) - (format #t " ?s ~a \"~a\" .~%" predicate object)) - (_ "")))) - first-n) - (format #t " ?s ?p ?o .~%}~%```~%")) - (format #t "~%Expected Result:~%~%```rdf~%") - (sql-for-each (lambda (row) - (scm->triples - (map-alist row #,@(field->key #'(predicate-clauses ...))) - #,(field->assoc-ref #'row #'subject) - (lambda (s p o) - (triple s p o)))) - db - (format #f "~a LIMIT 1" - (select-query #,(collect-fields #'(subject predicate-clauses ...)) - (primary-table other-tables ...) - tables-raw ...))) - (format #t "```~%~%")) - (when data? - #,(syntax-case #'schema-triples-clause (schema-triples) - ((schema-triples (triple-subject triple-predicate triple-object) ...) - #`(for-each triple - (list 'triple-subject ...) - (list 'triple-predicate ...) - (list 'triple-object ...))) - (_ (error "Invalid schema triples clause:" #'schema-triples-clause))) - (sql-for-each (lambda (row) - (scm->triples - (map-alist row #,@(field->key #'(predicate-clauses ...))) - #,(field->assoc-ref #'row #'subject))) - db - (select-query #,(collect-fields #'(subject predicate-clauses ...)) - (primary-table other-tables ...) - tables-raw ...))) - ))) - (_ (error "Invalid define-transformer syntax:" (syntax->datum x)))))) - -(define (get-keyword-value args keyword default) - (let ((kv (memq keyword args))) - (if (and kv (>= (length kv) 2)) - (cadr kv) - default))) - -(define-syntax with-documentation - (syntax-rules () - ((_ (key value) ...) - (let* ((alist `((key . ,value) ...)) - (name (assoc-ref alist 'name)) - (connection (assoc-ref alist 'connection)) - (table-metadata? (assoc-ref alist 'table-metadata?)) - (prefixes (assoc-ref alist 'prefixes)) - (inputs (assoc-ref alist 'inputs)) - (outputs (assoc-ref alist 'outputs)) - (rdf-path (get-keyword-value outputs #:rdf "")) - (doc-path (get-keyword-value outputs #:documentation ""))) - (call-with-target-database - connection - (lambda (db) - (with-output-to-file ; - doc-path - (lambda () - (format #t "# ~a" name) - (for-each - (lambda (proc) - (proc db - #:metadata? #f - #:data? #f - #:documentation? - (lambda () (for-each - (match-lambda - ((k v) - (begin - (prefix k v #f)))) - prefixes)))) - inputs)) - #:encoding "UTF-8") - - ;; Dumping the actual data - (with-output-to-file - rdf-path - (lambda () - ;; Add the prefixes - (for-each - (match-lambda - ((k v) - (begin - (prefix k v)))) - prefixes) - (newline) - (for-each - (lambda (proc) - (proc db #:metadata? table-metadata?)) - inputs)) - #:encoding "UTF-8"))))))) - diff --git a/dump/sql.scm b/dump/sql.scm deleted file mode 100644 index 9e6b21a..0000000 --- a/dump/sql.scm +++ /dev/null @@ -1,114 +0,0 @@ -;;; Database helpers -;;; -;;; Most of these functions should have been a part of -;;; guile-dbi. Never too late to contribute upstream! - -(define-module (dump sql) - #:use-module (srfi srfi-1) - #:use-module (srfi srfi-26) - #:use-module (ice-9 match) - #:use-module (dbi dbi) - #:export (select-query - call-with-database - call-with-target-database - sql-exec - sql-fold - sql-map - sql-for-each - sql-find)) - -;; A half-baked macro to make SQL SELECT queries a bit more -;; S-expression friendly -(define-syntax select-query - (lambda (x) - (syntax-case x () - ((_ fields tables raw-forms ...) - #`(string-append "SELECT " - #,(syntax-case #'fields (distinct) - ((distinct _ ...) - "DISTINCT ") - (_ "")) - #,(string-join (filter-map (match-lambda - ('distinct #f) - (((query alias)) - (format #f "~a AS ~a" query alias)) - ((table column) - (format #f "~a.~a" table column)) - ((table column alias) - (format #f "~a.~a AS ~a" table column alias)) - (field-spec - (error "Invalid field specification" field-spec))) - (syntax->datum #'fields)) - ", ") - " FROM " - #,(string-join (map (match-lambda - ((join table condition) - (format #f "~a ~a ~a" - (case join - ((join) "JOIN") - ((left-join) "LEFT JOIN") - ((inner-join) "INNER JOIN") - (else (error "Invalid join operator" join))) - table condition)) - ((? symbol? table) - (symbol->string table)) - (table-spec - (error "Invalid table specification" table-spec))) - (syntax->datum #'tables)) - " ") - #,(syntax-case #'(raw-forms ...) () - (() "") - (_ " ")) - raw-forms ...)) - (_ (error "Invalid SQL select query" (syntax->datum x)))))) - -(define (call-with-database backend connection-string proc) - (let ((db #f)) - (dynamic-wind (lambda () - (set! db (dbi-open backend connection-string))) - (cut proc db) - (cut dbi-close db)))) - -(define (database-check-status db) - (match (dbi-get_status db) - ((code . str) - (unless (zero? code) - (error str))))) - -(define (sql-exec db statement) - (dbi-query db statement) - (database-check-status db)) - -(define (sql-fold proc init db statement) - (sql-exec db statement) - (let loop ((result init)) - (let ((row (dbi-get_row db))) - (if row - (loop (proc row result)) - result)))) - -(define (sql-map proc db statement) - (sql-fold (lambda (row result) - (cons (proc row) result)) - (list) db statement)) - -(define (sql-for-each proc db statement) - (sql-fold (lambda (row _) - (proc row)) - #f db statement)) - -(define (sql-find db statement) - (sql-exec db statement) - (dbi-get_row db)) - -(define (call-with-target-database connection-settings proc) - (call-with-database "mysql" (string-join - (list (assq-ref connection-settings 'sql-username) - (assq-ref connection-settings 'sql-password) - (assq-ref connection-settings 'sql-database) - "tcp" - (assq-ref connection-settings 'sql-host) - (number->string - (assq-ref connection-settings 'sql-port))) - ":") - proc)) diff --git a/dump/string-similarity.scm b/dump/string-similarity.scm deleted file mode 100644 index 4bcdf7c..0000000 --- a/dump/string-similarity.scm +++ /dev/null @@ -1,37 +0,0 @@ -(define-module (dump string-similarity) - #:use-module (srfi srfi-1) - #:export (jaccard-string-similar?)) - -(define (trigrams str) - "Return all trigrams in STR." - (if (< (string-length str) 3) - '() - (map (lambda (start) - (substring str start (+ start 3))) - (iota (- (string-length str) 2))))) - -(define (jaccard-index set1 set2) - "Return the Jaccard similarity coefficient between lists SET1 and -SET2. Similarity between null sets is defined to be 0." - (if (and (null? set1) - (null? set2)) - 0 - (let ((length-of-intersection (length (lset-intersection equal? set1 set2)))) - (exact->inexact - (/ length-of-intersection - (- (+ (length set1) (length set2)) - length-of-intersection)))))) - -(define (jaccard-string-similarity str1 str2) - "Return the trigram similarity between strings STR1 and STR2 as -defined by the Jaccard index." - (jaccard-index (trigrams (string-downcase str1)) - (trigrams (string-downcase str2)))) - -(define (jaccard-string-similar? str1 str2) - "Return #t if STR1 and STR2 have a trigram similarity greater than -0.8. Else, return #f. The Jaccard index is used as the similarity -metric." - (let ((similarity-threshold 0.8)) - (> (jaccard-string-similarity str1 str2) - similarity-threshold))) diff --git a/dump/strings.scm b/dump/strings.scm deleted file mode 100644 index e965f03..0000000 --- a/dump/strings.scm +++ /dev/null @@ -1,101 +0,0 @@ -(define-module (dump strings) - #:use-module (srfi srfi-1) - #:use-module (srfi srfi-19) - #:use-module (ice-9 match) - #:use-module (ice-9 string-fun) - #:use-module (ice-9 textual-ports) - #:export (string-blank? - time-unix->string - string-blank? - string-split-substring - delete-substrings - replace-substrings - sanitize-rdf-string - snake->lower-camel - string-capitalize-first)) - -(define (time-unix->string seconds . maybe-format) - "Given an integer saying the number of seconds since the Unix -epoch (1970-01-01 00:00:00), SECONDS, format it as a human readable -date and time-string, possible using the MAYBE-FORMAT." - (letrec ([time-unix->time-utc - (lambda (seconds) - (add-duration - (date->time-utc (make-date 0 0 0 0 1 1 1970 0)) - (make-time time-duration 0 seconds)))]) - (apply date->string - (time-utc->date (time-unix->time-utc seconds)) - maybe-format))) - -(define (string-blank? str) - "Return non-#f if STR consists only of whitespace characters." - (string-every char-set:whitespace str)) - -(define (string-split-substring str substr) - "Split the string @var{str} into a list of substrings delimited by the -substring @var{substr}." - - (define substrlen (string-length substr)) - (define strlen (string-length str)) - - (define (loop index start) - (cond - ((>= start strlen) (list "")) - ((not index) (list (substring str start))) - (else - (cons (substring str start index) - (let ((new-start (+ index substrlen))) - (loop (string-contains str substr new-start) - new-start)))))) - - (cond - ((string-contains str substr) => (lambda (idx) (loop idx 0))) - (else (list str)))) - -(define (delete-substrings str . substrings) - "Delete SUBSTRINGS, a list of strings, from STR." - (fold (lambda (substring result) - (string-replace-substring result substring "")) - str - substrings)) - -(define (replace-substrings str replacement-alist) - "Replace substrings in STR according to REPLACEMENT-ALIST, an -association list mapping substrings to their replacements." - (fold (match-lambda* - (((substring . replacement) str) - (string-replace-substring str substring replacement))) - str - replacement-alist)) - -(define (sanitize-rdf-string str) - (replace-substrings - (string-trim-both str) - '(("\r" . "\\r") - ("\n" . "\\n") - ("\"" . "'") - ("\v" . "") - ("\xad" . "") - ("\x28" . "") - ("\x29" . "") - ("\xa0" . " ") - ("\x02" . "") - ("\x01" . "")))) - -(define (snake->lower-camel str) - (let ((char-list (string->list str))) - (call-with-output-string - (lambda (port) - (put-char port (char-downcase (string-ref str 0))) - (map (lambda (char previous-char) - (unless (char=? char #\_) - (put-char port (if (char=? previous-char #\_) - (char-upcase char) - char)))) - (drop char-list 1) - char-list))))) - -(define (string-capitalize-first string) - (string-titlecase - (string-downcase string) 0 1)) - diff --git a/dump/table.scm b/dump/table.scm deleted file mode 100644 index 834c4fd..0000000 --- a/dump/table.scm +++ /dev/null @@ -1,28 +0,0 @@ -(define-module (dump table) - #:use-module (srfi srfi-9 gnu) - #:export (make-table - table-name - table-size - table-columns - set-table-columns - make-column - column-name - column-type - column-dumped?)) - -(define-immutable-record-type
- (make-table name size columns) - table? - (name table-name) - (size table-size) - (columns table-columns set-table-columns)) - -(define-immutable-record-type - (column-constructor name type dumped?) - column? - (name column-name) - (type column-type) - (dumped? column-dumped?)) - -(define* (make-column name type #:optional dumped?) - (column-constructor name type dumped?)) diff --git a/dump/triples.scm b/dump/triples.scm deleted file mode 100644 index b0eb1e4..0000000 --- a/dump/triples.scm +++ /dev/null @@ -1,110 +0,0 @@ -(define-module (dump triples) - #:use-module (ice-9 regex) - #:use-module (ice-9 match) - #:use-module (dump strings) - #:export (ontology - string->identifier - prefix - triple - scm->triples - annotate-field - string->binomial-name)) - -(define (annotate-field field schema) - (let ([schema (cond ((symbol? schema) - (symbol->string schema)) - ((string? schema) schema) - (else - (error "Use a string/symbol")))] - [string-field (if (number? field) (number->string field) field)]) - (if (or (and (string? string-field) (string-null? string-field)) - (eq? string-field #f)) - "" - (string->symbol - (format #f "~s~a" string-field schema))))) - -(define* (string->identifier prefix str - #:optional #:key - (ontology "gn:") - (separator "_") - (proc string-downcase)) - "Convert STR to a turtle identifier after replacing illegal -characters with an underscore and prefixing with gn:PREFIX." - (if (or (and (string? str) (string-null? str)) - (eq? str #f)) - "" - (string->symbol - (string-append ontology prefix separator - (string-delete - (lambda (c) - (eq? c #\))) - (string-map (lambda (c) - (case c - ((#\/ #\< #\> #\+ #\( #\space #\@) #\_) - (else c))) - (proc - (string-trim-right str #\.)))))))) - - -(define* (prefix prefix iri #:optional (ttl? #t)) - (format #t - (if ttl? - "@prefix ~a ~a .~%" - "PREFIX ~a ~a ~%") - prefix iri)) - -(define (ontology prefix value) - (if (and (string? value) (string-null? value)) - "" - (string->symbol - `,(format #f "~a~a" prefix value)))) - -(define (triple subject predicate object) - (unless (or (string? subject) - (symbol? subject)) - (error "Triple subject not a string or symbol:" - (list subject predicate object))) - (unless (or (string? predicate) - (symbol? predicate)) - (error "Triple predicate not a string or symbol:" - (list subject predicate object))) - (unless (or (string? object) - (symbol? object) - (number? object)) - (error "Triple object not a string, symbol or number:" - (list subject predicate object))) - (let ([pattern (match object - ((or (? symbol? object) - (? (lambda (el) (string-match "^\\[ .* \\]$" el)) object)) - "~a ~a ~a .~%") - (_ "~a ~a \"~a\" .~%"))]) - (format #t pattern subject predicate - (if (symbol? object) (symbol->string object) object)))) - -(define* (scm->triples alist id - #:optional - (fn triple)) - (for-each (match-lambda - ((predicate . object) - (when (cond - ((string? object) - (not (string-blank? object))) - (else object)) - (fn id predicate object)))) - alist)) - -(define (string->binomial-name name) - (let ((binomial? - (string-match - "\\\(.+\\)" - name))) - (string->identifier - "" - (if binomial? - (regexp-substitute/global - #f "[^[:space:]A-Za-z0-9:]" - (match:substring binomial?) - 'pre "" 'post) - name) - #:separator "" - #:proc string-capitalize-first))) diff --git a/examples/dataset-metadata.scm b/examples/dataset-metadata.scm index 5680a2b..783b2b7 100755 --- a/examples/dataset-metadata.scm +++ b/examples/dataset-metadata.scm @@ -5,10 +5,10 @@ (srfi srfi-26) (ice-9 match) (ice-9 regex) - (dump strings) - (dump sql) - (dump triples) - (dump special-forms)) + (transform strings) + (transform sql) + (transform triples) + (transform special-forms)) diff --git a/examples/generif.scm b/examples/generif.scm index 0b3c8e4..76f55ee 100755 --- a/examples/generif.scm +++ b/examples/generif.scm @@ -5,10 +5,10 @@ (srfi srfi-26) (ice-9 match) (ice-9 regex) - (dump strings) - (dump sql) - (dump triples) - (dump special-forms)) + (transform strings) + (transform sql) + (transform triples) + (transform special-forms)) @@ -64,7 +64,7 @@ 'gn:geneWikiEntry)) (set gnt:wikiEntryOfSpecies (string->binomial-name (field Species FullName))) - ;; This only dumps symbols not present in the GeneRIF_BASIC table + ;; This only transforms symbols not present in the GeneRIF_BASIC table (set gnt:symbol (let ([geneid (field GeneRIF_BASIC GeneId)]) (if (eq? geneid 0) (field GeneRIF symbol) diff --git a/examples/genotype.scm b/examples/genotype.scm index 63b85a7..6fe60c9 100755 --- a/examples/genotype.scm +++ b/examples/genotype.scm @@ -7,10 +7,10 @@ (srfi srfi-26) (ice-9 match) (ice-9 regex) - (dump strings) - (dump sql) - (dump triples) - (dump special-forms)) + (transform strings) + (transform sql) + (transform triples) + (transform special-forms)) @@ -86,7 +86,7 @@ '^^xsd:double)) (set gnt:hasSequence (field Geno Sequence)) (set gnt:hasSource (field Geno Source)) - ;; Only dump Source2 if it differs from Source + ;; Only transform Source2 if it differs from Source (set gnt:hasAltSourceName (field ("IF((Source2 = Source), NULL, Source2)" Source2))) diff --git a/examples/phenotype.scm b/examples/phenotype.scm index 1c68159..31d7972 100755 --- a/examples/phenotype.scm +++ b/examples/phenotype.scm @@ -7,10 +7,10 @@ (srfi srfi-26) (ice-9 match) (ice-9 regex) - (dump strings) - (dump sql) - (dump triples) - (dump special-forms)) + (transform strings) + (transform sql) + (transform triples) + (transform special-forms)) diff --git a/examples/probeset-data.scm b/examples/probeset-data.scm index d46bcda..ebb6a92 100755 --- a/examples/probeset-data.scm +++ b/examples/probeset-data.scm @@ -5,10 +5,10 @@ (srfi srfi-26) (ice-9 match) (ice-9 regex) - (dump strings) - (dump sql) - (dump triples) - (dump special-forms)) + (transform strings) + (transform sql) + (transform triples) + (transform special-forms)) diff --git a/examples/probeset.scm b/examples/probeset.scm index 68ddb59..2dfdbc2 100755 --- a/examples/probeset.scm +++ b/examples/probeset.scm @@ -5,10 +5,10 @@ (srfi srfi-26) (ice-9 match) (ice-9 regex) - (dump strings) - (dump sql) - (dump triples) - (dump special-forms)) + (transform strings) + (transform sql) + (transform triples) + (transform special-forms)) diff --git a/examples/publication.scm b/examples/publication.scm index 313ee96..d9e2aa5 100755 --- a/examples/publication.scm +++ b/examples/publication.scm @@ -5,10 +5,10 @@ (srfi srfi-26) (ice-9 match) (ice-9 regex) - (dump strings) - (dump sql) - (dump triples) - (dump special-forms)) + (transform strings) + (transform sql) + (transform triples) + (transform special-forms)) diff --git a/examples/species-metadata.scm b/examples/species-metadata.scm index f3794b8..b67c0bc 100755 --- a/examples/species-metadata.scm +++ b/examples/species-metadata.scm @@ -5,10 +5,10 @@ (srfi srfi-26) (ice-9 match) (ice-9 regex) - (dump strings) - (dump sql) - (dump triples) - (dump special-forms)) + (transform strings) + (transform sql) + (transform triples) + (transform special-forms)) diff --git a/examples/tissue.scm b/examples/tissue.scm index 8ce96c8..22b715e 100755 --- a/examples/tissue.scm +++ b/examples/tissue.scm @@ -5,10 +5,10 @@ (srfi srfi-26) (ice-9 match) (ice-9 regex) - (dump strings) - (dump sql) - (dump triples) - (dump special-forms)) + (transform strings) + (transform sql) + (transform triples) + (transform special-forms)) diff --git a/json-dump.scm b/json-dump.scm index 8625139..0a054c5 100755 --- a/json-dump.scm +++ b/json-dump.scm @@ -4,7 +4,7 @@ (use-modules (json) (ice-9 ftw) (ice-9 match) - (dump triples)) + (transform triples)) diff --git a/tests.scm b/tests.scm index 7ba4261..1dfa2ac 100644 --- a/tests.scm +++ b/tests.scm @@ -1,9 +1,9 @@ (use-modules (srfi srfi-64) (ice-9 match) - (dump sql) - (dump string-similarity) - (dump strings) - (dump special-forms)) + (transform sql) + (transform string-similarity) + (transform strings) + (transform special-forms)) (test-begin "sql") @@ -24,23 +24,23 @@ (test-equal "trigrams of a string" (list "coe" "oef" "eff" "ffi" "fic" "ici" "cie" "ien" "ent") - ((@@ (dump string-similarity) trigrams) "coefficient")) + ((@@ (transform string-similarity) trigrams) "coefficient")) (test-equal "Jaccard index" 0.4 - ((@@ (dump string-similarity) jaccard-index) + ((@@ (transform string-similarity) jaccard-index) (list 0 1 2 5 6 8 9) (list 0 2 3 4 5 7 9))) (test-equal "Jaccard index of equal sets" 1.0 - ((@@ (dump string-similarity) jaccard-index) + ((@@ (transform string-similarity) jaccard-index) (iota 10) (iota 10))) (test-equal "Jaccard index of disjoint sets" 0.0 - ((@@ (dump string-similarity) jaccard-index) + ((@@ (transform string-similarity) jaccard-index) (iota 10) (iota 10 10))) diff --git a/transform/schema-dump.scm b/transform/schema-dump.scm new file mode 100644 index 0000000..bf7f8cb --- /dev/null +++ b/transform/schema-dump.scm @@ -0,0 +1,125 @@ +(define-module (transform schema) + #:use-module (ice-9 match) + #:use-module (ice-9 srfi-26) + #:use-module (transform sql) + #:use-module (transform triples) + #:use-module (transform strings) + #:use-module (transform table)) + + +(define (table-fields db table) + (format #t "* ~a~%" table) + (match (sql-find db + (select-query ((TableComments Comment)) + (TableComments) + (format #f "WHERE TableName = '~a'" table))) + ((("Comment" . comment)) + (format #t "~a~%" comment))) + (sql-for-each (lambda (row) + (match row + ((("TableField" . table-field) + ("Foreign_Key" . foreign-key) + ("Annotation" . annotation)) + (format #t "** ~a~%" (substring table-field (1+ (string-length table)))) + (unless (string-null? foreign-key) + (format #t "Foreign key to ~a~%" foreign-key)) + (unless (string-null? annotation) + (display annotation) + (newline))))) + db + (select-query ((TableFieldAnnotation TableField) + (TableFieldAnnotation Foreign_Key) + (TableFieldAnnotation Annotation)) + (TableFieldAnnotation) + (format #f "WHERE TableField LIKE '~a.%'" table))) + (newline)) + +(define (get-tables-from-comments db) + (sql-map (match-lambda + ((("TableName" . table)) table)) + db + (select-query ((TableComments TableName)) + (TableComments)))) + +(define (schema-annotations db) + (call-with-target-database + (lambda (db) + (for-each (cut table-fields db <>) + (get-tables-from-comments db))))) + +(define (tables db) + "Return list of all tables in DB. Each element of the returned list +is a
object." + (map (lambda (table) + (set-table-columns table + (sql-map (lambda (row) + (make-column (assoc-ref row "Field") + (assoc-ref row "Type"))) + db + (format #f "SHOW COLUMNS FROM ~a" (table-name table))))) + (sql-map (lambda (row) + (make-table (assoc-ref row "table_name") + ;; FIXME: This is probably correct only for + ;; MyISAM tables. + (assoc-ref row "data_length") + #f)) + db + (select-query ((information_schema.tables table_name) + (information_schema.tables data_length)) + (information_schema.tables) + (format #f "WHERE table_schema = '~a'" + (assq-ref %connection-settings 'sql-database)))))) + +(define (schema db) + (let ((tables (tables db))) + (for-each (lambda (table) + (let ((table-id (string->identifier + "table" + ;; We downcase table names in + ;; identifiers. So, we distinguish + ;; between the user and User tables. + (if (string=? (table-name table) "User") + "user2" + (table-name table))))) + (triple table-id 'rdf:type 'gn:sqlTable) + (triple table-id 'gn:name (table-name table)) + (triple table-id 'gn:hasSize (table-size table)) + (for-each (lambda (column) + (let ((column-id (column-id (table-name table) + (column-name column)))) + (triple column-id 'rdf:type 'gn:sqlTableField) + (triple column-id 'gn:name (column-name column)) + (triple column-id 'gn:sqlFieldType (column-type column)) + (triple table-id 'gn:hasField column-id))) + (table-columns table)))) + tables))) + +(define* (data-table db table-name data-field + #:optional (default-directory "")) + (let ((directory (string-append default-directory "/" table-name)) + (port #f) + (current-strain-id #f)) + (unless (file-exists? directory) + (mkdir directory)) + (sql-for-each (match-lambda + (((_ . strain-id) + (_ . value)) + ;; Close file if new strain. + (when (and port + (not (= current-strain-id strain-id))) + (close-port port) + (set! port #f)) + ;; If no file is open, open new file. + (unless port + (set! current-strain-id strain-id) + (let ((filename (string-append directory + "/" (number->string strain-id)))) + (display filename (current-error-port)) + (newline (current-error-port)) + (set! port (open-output-file filename)))) + (display value port) + (newline port))) + db + (format #f "SELECT StrainId, ~a FROM ~a ORDER BY StrainId" + data-field table-name)) + (close-port port))) diff --git a/transform/special-forms.scm b/transform/special-forms.scm new file mode 100644 index 0000000..4b69337 --- /dev/null +++ b/transform/special-forms.scm @@ -0,0 +1,599 @@ +(define-module (transform special-forms) + #:use-module (srfi srfi-1) + #:use-module (ice-9 match) + #:use-module (srfi srfi-26) + #:use-module (transform sql) + #:use-module (transform table) + #:use-module (transform triples) + #:export (translate-forms + collect-forms + collect-keys + field->key + field->assoc-ref + collect-fields + find-clause + remove-namespace + column-id + id + syntax-let + blank-node + map-alist + with-documentation + define-transformer)) + +(define (key->assoc-ref alist x) + "Recursively translate (key k) forms in source X to (assoc-ref ALIST +k) forms." + (translate-forms 'key + ;; (syntax-rules (key) + ;; ((key k) (assoc-ref alist k))) + (lambda (x) + (syntax-case x (key) + ((key k) #`(assoc-ref #,alist 'k)))) + x)) + +(define (alist-delete* alist keys) + "Delete entries from ALIST whose key is equal (in the sense of +equal?) to any in KEYS, a list." + (remove (match-lambda + ((key . value) + (member key keys)) + (x (error "malformed alist element" x))) + alist)) + +(define (translate-forms from translator x) + "Recursively pass (FROM ...) forms in source X to TRANSLATOR, and +replace them with the return value." + (syntax-case x () + ;; Handle translation base case. + ((head tail ...) (eq? (syntax->datum #'head) + from) + (translator x)) + ;; Recurse. + ((head tail ...) + (cons (translate-forms from translator #'head) + (map (cut translate-forms from translator <>) + #'(tail ...)))) + ;; Handle leaf base case. + (leaf #'leaf))) + +(define (collect-forms target x) + "Recursively collect (TARGET ...) forms in source X and return them +as a list." + (syntax-case x () + ;; Handle collection base case. + ((head tail ...) (eq? (syntax->datum #'head) + target) + (list x)) + ;; Recurse. + ((head tail ...) + (append (collect-forms target #'head) + (append-map (cut collect-forms target <>) #'(tail ...)))) + ;; Handle leaf base case. + (leaf (list)))) + +(define (collect-keys x) + "Recursively collect (key k) forms from source X and return as a +list of all K." + (map (syntax-rules (key) + ((key k) 'k)) + (collect-forms 'key x))) + +(define-syntax map-alist + (lambda (x) + "Transform (aka map) ALIST, an association list, into another +association list. The returned association list may contain multiple +associations for the same key. equal? is used in all association list +key comparisons. + +Syntax: + +(map-alist alist + (verb key expression) ... + (else=> proc)) + +VERB must be one of set, filter-set, multiset and remove. + +For the set VERB, KEY is set to the result of evaluating +EXPRESSION. Multiple set verbs on the same key will result in multiple +associations for that key. + +For the filter-set VERB, KEY is set to the result of evaluating +EXPRESSION only if that result is not #f. + +For the multiset VERB, EXPRESSION must return a list and KEY is +associated multiple times once with each element of the returned list. + +For the remove VERB, KEY is discarded. + +EXPRESSIONs must reference elements of ALIST using (key k) forms where +K is the key to be referenced from ALIST. K must not be quoted. That +is, if K is the symbol 'bar, it must be referenced as (key bar), not +as (key 'bar). + +The else=> clause is optional. + +If the else=> clause is present, PROC is passed all pairs of ALIST +that are not set by an earlier (verb key expression) action. PROC must +return #f or a pair to replace its input pair. If PROC returns #f, +that pair is discarded. + +If the else=> clause is absent, all unset pairs are discarded. + +Example: + +(map-alist '((\"foo\" . 1) + (bar . 2) + (foobar . 5) + (fubar . 3)) + (set spam (1+ (key \"foo\"))) + (set ham (* 2 (key bar))) + (set eggs (* 3 (key bar))) + (set aal (+ (key \"foo\") + (key bar))) + (multiset vel (iota (* 2 (key bar)))) + (remove foobar) + (else=> (match-lambda + ((key . value) + (cons key (expt 2 value)))))) +=> ((spam . 2) (ham . 4) (eggs . 6) (aal . 3) + (vel . 0) (vel . 1) (vel . 2) (vel . 3) (fubar . 8))" + (syntax-case x () + ((_ alist actions ...) + ;; TODO: Check that all actions are valid. + #`(let ((evaluated-alist alist)) + (append (remove (match-lambda + ;; Filter out results of filter-set actions. + ((key . #f) + (member key '#,(filter-map (lambda (action) + (syntax-case action (filter-set) + ((filter-set key expression) #'key) + (_ #f))) + #'(actions ...)))) + (_ #f)) + ;; Do set and filter-set. + `#,(filter-map (lambda (action) + (syntax-case action (set filter-set) + ((set key expression) + #`(key . ,#,(key->assoc-ref #'evaluated-alist #'expression))) + ((filter-set key expression) + #`(key . ,#,(key->assoc-ref #'evaluated-alist #'expression))) + (_ #f))) + #'(actions ...))) + ;; Do multiset. + #,@(filter-map (lambda (action) + (syntax-case action (multiset) + ((multiset key expression) + #`(map (cut cons 'key <>) + #,(key->assoc-ref #'evaluated-alist #'expression))) + (_ #f))) + #'(actions ...)) + ;; Apply else=> procedure on unspecified keys. If + ;; no else=> procedure is specified, delete + ;; unspecified keys. + (filter-map #,(or (any (lambda (action) + (syntax-case action (else=>) + ((else=> proc) #'proc) + (_ #f))) + #'(actions ...)) + #'(const #f)) + ;; The unspecified part of the input + ;; alist + (alist-delete* evaluated-alist + (list + ;; Keys that were referenced + #,@(append-map (lambda (action) + (syntax-case action () + ((_ key expression) + (collect-keys #'expression)) + (_ '()))) + #'(actions ...)) + ;; Keys that were deleted + #,@(filter-map (lambda (action) + (syntax-case action (remove) + ((remove key) #''key) + (_ #f))) + #'(actions ...)) + ;; Keys that were set + #,@(filter-map (lambda (action) + (syntax-case action () + ((_ key expression) #''key) + (_ #f))) + #'(actions ...))))))))))) + + + +(eval-when (expand load ) + (define (field->datum x) + (translate-forms + 'field + (lambda (x) + (syntax-case x (field) + ((field (query alias)) + #`(format #f "~a" (syntax->datum #'alias))) + ((field table column) + #`(format #f "~a(~a)" + (syntax->datum #'table) + (syntax->datum #'column))) + ((field table column alias) + #`(format #f "~a(~a)" + (syntax->datum #'table) + (syntax->datum #'alias))))) + x)) + + (define (field->key x) + (translate-forms 'field + (lambda (x) + #`(key #,(symbol->string + (syntax->datum + ((syntax-rules (field) + ((field (query alias)) alias) + ((field table column) column) + ((field table column alias) alias)) + x))))) + x)) + + (define (field->assoc-ref alist x) + "Recursively translate field references in source X to (assoc-ref +ALIST field-name) forms." + (translate-forms 'field + (lambda (x) + #`(assoc-ref #,alist + #,(symbol->string + (syntax->datum + ((syntax-rules (field) + ((field (query alias)) alias) + ((field table column) column) + ((field table column alias) alias)) + x))))) + x)) + + (define (collect-fields x) + (map (syntax-rules (field) + ((field reference ...) + (reference ...))) + (collect-forms 'field x))) + + (define (find-clause clauses key) + "Find (KEY ...) clause among CLAUSES, a list of syntax forms." + (find (lambda (x) + (syntax-case x () + ((clause-key _ ...) + (eq? (syntax->datum #'clause-key) + key)))) + clauses)) + + (define (remove-namespace str) + "Remove RDF namespace prefix from STR." + (substring str (1+ (string-index str #\:)))) + + (define (column-id table-name column-name) + (string->identifier + "field" (string-append + ;; We downcase table names in identifiers. So, we + ;; distinguish between the user and User tables. + (if (string=? table-name "User") + "user2" table-name) + "__" column-name))) + + (define (id table predicate) + (symbol->string + (string->identifier + "transform" + (string-append + table "_" (remove-namespace (symbol->string predicate))))))) + +(define-syntax blank-node + (syntax-rules () + "Allow having set and multiset within the context of a blank-node" + [(_ (op predicate object) ...) + (let [(node (string-join + (filter-map (match-lambda + ((pred . obj) + (match obj + ((and (? string? obj) + (? string-null? obj)) + #f) + ((? symbol? obj) + (format #f "~a ~a" pred (symbol->string obj))) + (_ + (format #f "~a ~s" pred obj))))) + (map-alist '() + (op predicate object) ...)) + " ; "))] + (if (string-null? node) + "" + (format #f "[ ~a ]" node)))])) + +(define-syntax syntax-let + (syntax-rules () + "Like match-let, but for syntax. + +(syntax-let ((pattern literals expression)) + body ...) +≡ +(syntax-case expression () + (pattern literals + body ...)) + +literals is optional. So, + +(syntax-let ((pattern expression)) + body ...) +≡ +(syntax-case expression () + (pattern + body ...)) +" + ((_ () body ...) + (begin body ...)) + ((_ ((pattern expression) + bindings ...) + body ...) + (syntax-case expression () + (pattern + (syntax-let (bindings ...) + body ...)))) + ((_ ((pattern literals expression) + bindings ...) + body ...) + (syntax-case expression literals + (pattern + (syntax-let (bindings ...) + body ...)))))) + +(define-syntax define-transformer + (lambda (x) + "Define FUNCTION-NAME, a function that transforms a view of the database. + +define-transformer consists of three order-agnostic clauses---tables, +schema-triples and triples---in the form shown below. + +(define-transformer function-name + (tables (table ...) raw-forms ...) + (schema-triples + (subject predicate object) ...) + (triples subject + (verb predicate object) ...)) + +The tables clause specifies the database tables to be joined to +construct the view to be transformed. TABLE must be either of the form +TABLE-NAME or of the form (JOIN-OPERATOR TABLE-NAME +RAW-CONDITION). TABLE-NAME must, obviously, be the name of the +table. JOIN-OPERATOR must be one of join, left-join and +inner-join. RAW-CONDITION should be the join condition as a raw +string. This is usually something like +\"USING (SpeciesId)\". RAW-FORMS are expressions that must evaluate to +strings to be appended to the SQL query. + +The schema-triples clause specifies the list of triples to be written +once when the transform starts. + +The triples clause specifies the triples to be transformed once for each +row in the view. All triples have a common SUBJECT. The (verb +predicate object) clauses are described below. + +VERB must either be set or multiset. For the set VERB, a single triple +(SUBJECT PREDICATE OBJECT-VALUE) is written where OBJECT-VALUE is the +result of evaluating OBJECT. For the multiset VERB, OBJECT must +evaluate to a list, and a triple (SUBJECT PREDICATE +OBJECT-VALUE-ELEMENT) is created for each element OBJECT-VALUE-ELEMENT +of that list. + +The SUBJECT and OBJECT expressions in the triples clause must +reference database fields using a (field TABLE COLUMN) clause where +TABLE and COLUMN refer to the table and column of the field being +referenced. Database fields can also be referenced using (field TABLE +COLUMN ALIAS) where ALIAS is an alias for that column in the SQL +query. Specification of ALIAS is indeed a leak in the abstraction, and +must be remedied." + (syntax-case x (tables schema-triples triples) + ((_ name clauses ...) + (syntax-let (((tables (primary-table other-tables ...) tables-raw ...) (tables) + (find-clause #'(clauses ...) 'tables)) + (schema-triples-clause (or (find-clause #'(clauses ...) 'schema-triples) + #'(schema-triples))) + ((triples subject predicate-clauses ...) (triples) + (find-clause #'(clauses ...) 'triples))) + #`(define* (name db #:key + (metadata? #f) + (data? #t) + (documentation? #f)) + (when metadata? + #,@(let ((table (symbol->string (syntax->datum #'primary-table))) + (subject-type (any (lambda (predicate) + (syntax-case predicate (rdf:type) + ((_ rdf:type type) #'type) + (_ #f))) + #'(predicate-clauses ...)))) + (map (lambda (predicate-clause) + (syntax-case predicate-clause () + ((_ predicate _) + ;; Dump metadata about the transform itself. + #`(begin + (scm->triples + (map-alist '() + (set rdf:type 'gn-id:transform) + (set gn-term:createsPredicate 'predicate) + (filter-set gn-term:forSubjectType #,subject-type) + (multiset gn-term:dependsOn + '#,(map (lambda (field) + (match (syntax->datum field) + ((table-name column-name _ ...) + (datum->syntax + x (column-id (symbol->string table-name) + (symbol->string column-name)))) + (((query alias)) + (datum->syntax + x (column-id query (symbol->string alias)))))) + (collect-fields predicate-clause)))) + #,(id table (syntax->datum #'predicate))) + ;; Automatically create domain triples + ;; for predicates. + (when #,subject-type + (triple 'predicate 'rdfs:domain #,subject-type)))) + (_ (error "Invalid predicate clause:" predicate-clause)))) + #'(predicate-clauses ...)))) + (when documentation? + (format #t "~%## '~a'~%~%" (syntax->datum #'name)) + #,(syntax-case #'schema-triples-clause (schema-triples) + ((schema-triples (triple-subject triple-predicate triple-object) ...) + #`(begin + (when (not (list 'triple-subject ...)) + (format #t "## Schema Triples:~%~%```text~%") + (for-each (lambda (s p o) + (format #t "~a -> ~a -> ~a~%" s p o)) + (list 'triple-subject ...) + (list 'triple-predicate ...) + (list 'triple-object ...)) + (format #t "```")))) + (_ (error "Invalid schema triples clause:" #'schema-triples-clause))) + (format #t "## Generated Triples: + +The following SQL query was executed: + +```sql +~a +``` + +The above query results to triples that have the form: + +```text +" + (select-query #,(collect-fields #'(subject predicate-clauses ...)) + (primary-table other-tables ...) + tables-raw ...)) + (for-each (match-lambda + ((predicate . object) + (format #t "~a -> ~a -> ~a ~%" + (if (symbol? #,(field->datum #'subject)) + (symbol->string #,(field->datum #'subject)) + #,(field->datum #'subject)) + predicate + (if (symbol? object) + (symbol->string object) + object)))) + (map-alist + '() + #,@(field->datum #'(predicate-clauses ...)))) + (format #t "```~%Here's an example query:~%~%```sparql~%") + (documentation?) + (newline) + (let* ((result + (map-alist (sql-find + db + (format #f "~a LIMIT 1" + (select-query #,(collect-fields #'(subject predicate-clauses ...)) + (primary-table other-tables ...) + tables-raw ...))) + #,@(field->key #'(predicate-clauses ...)))) + (first-n (list-head result + (let ((n + (min 4 (truncate + (+ (exact-integer-sqrt (length result)) 1))))) + (if (< n 3) + (length result) + n))))) + (format #t "SELECT * WHERE { ~%") + (for-each (match-lambda + ((predicate . object) + (match object + ((or (? symbol? object) + (? (lambda (el) (string-match "^\\[ .* \\]$" el)) object)) + (format #t " ?s ~a ~a .~%" predicate object)) + ((and (? string? object) + (? (lambda (el) (not (string-null? el))) object)) + (format #t " ?s ~a \"~a\" .~%" predicate object)) + (_ "")))) + first-n) + (format #t " ?s ?p ?o .~%}~%```~%")) + (format #t "~%Expected Result:~%~%```rdf~%") + (sql-for-each (lambda (row) + (scm->triples + (map-alist row #,@(field->key #'(predicate-clauses ...))) + #,(field->assoc-ref #'row #'subject) + (lambda (s p o) + (triple s p o)))) + db + (format #f "~a LIMIT 1" + (select-query #,(collect-fields #'(subject predicate-clauses ...)) + (primary-table other-tables ...) + tables-raw ...))) + (format #t "```~%~%")) + (when data? + #,(syntax-case #'schema-triples-clause (schema-triples) + ((schema-triples (triple-subject triple-predicate triple-object) ...) + #`(for-each triple + (list 'triple-subject ...) + (list 'triple-predicate ...) + (list 'triple-object ...))) + (_ (error "Invalid schema triples clause:" #'schema-triples-clause))) + (sql-for-each (lambda (row) + (scm->triples + (map-alist row #,@(field->key #'(predicate-clauses ...))) + #,(field->assoc-ref #'row #'subject))) + db + (select-query #,(collect-fields #'(subject predicate-clauses ...)) + (primary-table other-tables ...) + tables-raw ...))) + ))) + (_ (error "Invalid define-transformer syntax:" (syntax->datum x)))))) + +(define (get-keyword-value args keyword default) + (let ((kv (memq keyword args))) + (if (and kv (>= (length kv) 2)) + (cadr kv) + default))) + +(define-syntax with-documentation + (syntax-rules () + ((_ (key value) ...) + (let* ((alist `((key . ,value) ...)) + (name (assoc-ref alist 'name)) + (connection (assoc-ref alist 'connection)) + (table-metadata? (assoc-ref alist 'table-metadata?)) + (prefixes (assoc-ref alist 'prefixes)) + (inputs (assoc-ref alist 'inputs)) + (outputs (assoc-ref alist 'outputs)) + (rdf-path (get-keyword-value outputs #:rdf "")) + (doc-path (get-keyword-value outputs #:documentation ""))) + (call-with-target-database + connection + (lambda (db) + (with-output-to-file ; + doc-path + (lambda () + (format #t "# ~a" name) + (for-each + (lambda (proc) + (proc db + #:metadata? #f + #:data? #f + #:documentation? + (lambda () (for-each + (match-lambda + ((k v) + (begin + (prefix k v #f)))) + prefixes)))) + inputs)) + #:encoding "UTF-8") + + ;; Dumping the actual data + (with-output-to-file + rdf-path + (lambda () + ;; Add the prefixes + (for-each + (match-lambda + ((k v) + (begin + (prefix k v)))) + prefixes) + (newline) + (for-each + (lambda (proc) + (proc db #:metadata? table-metadata?)) + inputs)) + #:encoding "UTF-8"))))))) + diff --git a/transform/sql.scm b/transform/sql.scm new file mode 100644 index 0000000..a8962c8 --- /dev/null +++ b/transform/sql.scm @@ -0,0 +1,114 @@ +;;; Database helpers +;;; +;;; Most of these functions should have been a part of +;;; guile-dbi. Never too late to contribute upstream! + +(define-module (transform sql) + #:use-module (srfi srfi-1) + #:use-module (srfi srfi-26) + #:use-module (ice-9 match) + #:use-module (dbi dbi) + #:export (select-query + call-with-database + call-with-target-database + sql-exec + sql-fold + sql-map + sql-for-each + sql-find)) + +;; A half-baked macro to make SQL SELECT queries a bit more +;; S-expression friendly +(define-syntax select-query + (lambda (x) + (syntax-case x () + ((_ fields tables raw-forms ...) + #`(string-append "SELECT " + #,(syntax-case #'fields (distinct) + ((distinct _ ...) + "DISTINCT ") + (_ "")) + #,(string-join (filter-map (match-lambda + ('distinct #f) + (((query alias)) + (format #f "~a AS ~a" query alias)) + ((table column) + (format #f "~a.~a" table column)) + ((table column alias) + (format #f "~a.~a AS ~a" table column alias)) + (field-spec + (error "Invalid field specification" field-spec))) + (syntax->datum #'fields)) + ", ") + " FROM " + #,(string-join (map (match-lambda + ((join table condition) + (format #f "~a ~a ~a" + (case join + ((join) "JOIN") + ((left-join) "LEFT JOIN") + ((inner-join) "INNER JOIN") + (else (error "Invalid join operator" join))) + table condition)) + ((? symbol? table) + (symbol->string table)) + (table-spec + (error "Invalid table specification" table-spec))) + (syntax->datum #'tables)) + " ") + #,(syntax-case #'(raw-forms ...) () + (() "") + (_ " ")) + raw-forms ...)) + (_ (error "Invalid SQL select query" (syntax->datum x)))))) + +(define (call-with-database backend connection-string proc) + (let ((db #f)) + (dynamic-wind (lambda () + (set! db (dbi-open backend connection-string))) + (cut proc db) + (cut dbi-close db)))) + +(define (database-check-status db) + (match (dbi-get_status db) + ((code . str) + (unless (zero? code) + (error str))))) + +(define (sql-exec db statement) + (dbi-query db statement) + (database-check-status db)) + +(define (sql-fold proc init db statement) + (sql-exec db statement) + (let loop ((result init)) + (let ((row (dbi-get_row db))) + (if row + (loop (proc row result)) + result)))) + +(define (sql-map proc db statement) + (sql-fold (lambda (row result) + (cons (proc row) result)) + (list) db statement)) + +(define (sql-for-each proc db statement) + (sql-fold (lambda (row _) + (proc row)) + #f db statement)) + +(define (sql-find db statement) + (sql-exec db statement) + (dbi-get_row db)) + +(define (call-with-target-database connection-settings proc) + (call-with-database "mysql" (string-join + (list (assq-ref connection-settings 'sql-username) + (assq-ref connection-settings 'sql-password) + (assq-ref connection-settings 'sql-database) + "tcp" + (assq-ref connection-settings 'sql-host) + (number->string + (assq-ref connection-settings 'sql-port))) + ":") + proc)) diff --git a/transform/string-similarity.scm b/transform/string-similarity.scm new file mode 100644 index 0000000..c715856 --- /dev/null +++ b/transform/string-similarity.scm @@ -0,0 +1,37 @@ +(define-module (transform string-similarity) + #:use-module (srfi srfi-1) + #:export (jaccard-string-similar?)) + +(define (trigrams str) + "Return all trigrams in STR." + (if (< (string-length str) 3) + '() + (map (lambda (start) + (substring str start (+ start 3))) + (iota (- (string-length str) 2))))) + +(define (jaccard-index set1 set2) + "Return the Jaccard similarity coefficient between lists SET1 and +SET2. Similarity between null sets is defined to be 0." + (if (and (null? set1) + (null? set2)) + 0 + (let ((length-of-intersection (length (lset-intersection equal? set1 set2)))) + (exact->inexact + (/ length-of-intersection + (- (+ (length set1) (length set2)) + length-of-intersection)))))) + +(define (jaccard-string-similarity str1 str2) + "Return the trigram similarity between strings STR1 and STR2 as +defined by the Jaccard index." + (jaccard-index (trigrams (string-downcase str1)) + (trigrams (string-downcase str2)))) + +(define (jaccard-string-similar? str1 str2) + "Return #t if STR1 and STR2 have a trigram similarity greater than +0.8. Else, return #f. The Jaccard index is used as the similarity +metric." + (let ((similarity-threshold 0.8)) + (> (jaccard-string-similarity str1 str2) + similarity-threshold))) diff --git a/transform/strings.scm b/transform/strings.scm new file mode 100644 index 0000000..98f828f --- /dev/null +++ b/transform/strings.scm @@ -0,0 +1,101 @@ +(define-module (transform strings) + #:use-module (srfi srfi-1) + #:use-module (srfi srfi-19) + #:use-module (ice-9 match) + #:use-module (ice-9 string-fun) + #:use-module (ice-9 textual-ports) + #:export (string-blank? + time-unix->string + string-blank? + string-split-substring + delete-substrings + replace-substrings + sanitize-rdf-string + snake->lower-camel + string-capitalize-first)) + +(define (time-unix->string seconds . maybe-format) + "Given an integer saying the number of seconds since the Unix +epoch (1970-01-01 00:00:00), SECONDS, format it as a human readable +date and time-string, possible using the MAYBE-FORMAT." + (letrec ([time-unix->time-utc + (lambda (seconds) + (add-duration + (date->time-utc (make-date 0 0 0 0 1 1 1970 0)) + (make-time time-duration 0 seconds)))]) + (apply date->string + (time-utc->date (time-unix->time-utc seconds)) + maybe-format))) + +(define (string-blank? str) + "Return non-#f if STR consists only of whitespace characters." + (string-every char-set:whitespace str)) + +(define (string-split-substring str substr) + "Split the string @var{str} into a list of substrings delimited by the +substring @var{substr}." + + (define substrlen (string-length substr)) + (define strlen (string-length str)) + + (define (loop index start) + (cond + ((>= start strlen) (list "")) + ((not index) (list (substring str start))) + (else + (cons (substring str start index) + (let ((new-start (+ index substrlen))) + (loop (string-contains str substr new-start) + new-start)))))) + + (cond + ((string-contains str substr) => (lambda (idx) (loop idx 0))) + (else (list str)))) + +(define (delete-substrings str . substrings) + "Delete SUBSTRINGS, a list of strings, from STR." + (fold (lambda (substring result) + (string-replace-substring result substring "")) + str + substrings)) + +(define (replace-substrings str replacement-alist) + "Replace substrings in STR according to REPLACEMENT-ALIST, an +association list mapping substrings to their replacements." + (fold (match-lambda* + (((substring . replacement) str) + (string-replace-substring str substring replacement))) + str + replacement-alist)) + +(define (sanitize-rdf-string str) + (replace-substrings + (string-trim-both str) + '(("\r" . "\\r") + ("\n" . "\\n") + ("\"" . "'") + ("\v" . "") + ("\xad" . "") + ("\x28" . "") + ("\x29" . "") + ("\xa0" . " ") + ("\x02" . "") + ("\x01" . "")))) + +(define (snake->lower-camel str) + (let ((char-list (string->list str))) + (call-with-output-string + (lambda (port) + (put-char port (char-downcase (string-ref str 0))) + (map (lambda (char previous-char) + (unless (char=? char #\_) + (put-char port (if (char=? previous-char #\_) + (char-upcase char) + char)))) + (drop char-list 1) + char-list))))) + +(define (string-capitalize-first string) + (string-titlecase + (string-downcase string) 0 1)) + diff --git a/transform/table.scm b/transform/table.scm new file mode 100644 index 0000000..e6254d0 --- /dev/null +++ b/transform/table.scm @@ -0,0 +1,28 @@ +(define-module (transform table) + #:use-module (srfi srfi-9 gnu) + #:export (make-table + table-name + table-size + table-columns + set-table-columns + make-column + column-name + column-type + column-transformed?)) + +(define-immutable-record-type
+ (make-table name size columns) + table? + (name table-name) + (size table-size) + (columns table-columns set-table-columns)) + +(define-immutable-record-type + (column-constructor name type transformed?) + column? + (name column-name) + (type column-type) + (transformed? column-transformed?)) + +(define* (make-column name type #:optional transformed?) + (column-constructor name type transformed?)) diff --git a/transform/triples.scm b/transform/triples.scm new file mode 100644 index 0000000..8a64c5b --- /dev/null +++ b/transform/triples.scm @@ -0,0 +1,110 @@ +(define-module (transform triples) + #:use-module (ice-9 regex) + #:use-module (ice-9 match) + #:use-module (transform strings) + #:export (ontology + string->identifier + prefix + triple + scm->triples + annotate-field + string->binomial-name)) + +(define (annotate-field field schema) + (let ([schema (cond ((symbol? schema) + (symbol->string schema)) + ((string? schema) schema) + (else + (error "Use a string/symbol")))] + [string-field (if (number? field) (number->string field) field)]) + (if (or (and (string? string-field) (string-null? string-field)) + (eq? string-field #f)) + "" + (string->symbol + (format #f "~s~a" string-field schema))))) + +(define* (string->identifier prefix str + #:optional #:key + (ontology "gn:") + (separator "_") + (proc string-downcase)) + "Convert STR to a turtle identifier after replacing illegal +characters with an underscore and prefixing with gn:PREFIX." + (if (or (and (string? str) (string-null? str)) + (eq? str #f)) + "" + (string->symbol + (string-append ontology prefix separator + (string-delete + (lambda (c) + (eq? c #\))) + (string-map (lambda (c) + (case c + ((#\/ #\< #\> #\+ #\( #\space #\@) #\_) + (else c))) + (proc + (string-trim-right str #\.)))))))) + + +(define* (prefix prefix iri #:optional (ttl? #t)) + (format #t + (if ttl? + "@prefix ~a ~a .~%" + "PREFIX ~a ~a ~%") + prefix iri)) + +(define (ontology prefix value) + (if (and (string? value) (string-null? value)) + "" + (string->symbol + `,(format #f "~a~a" prefix value)))) + +(define (triple subject predicate object) + (unless (or (string? subject) + (symbol? subject)) + (error "Triple subject not a string or symbol:" + (list subject predicate object))) + (unless (or (string? predicate) + (symbol? predicate)) + (error "Triple predicate not a string or symbol:" + (list subject predicate object))) + (unless (or (string? object) + (symbol? object) + (number? object)) + (error "Triple object not a string, symbol or number:" + (list subject predicate object))) + (let ([pattern (match object + ((or (? symbol? object) + (? (lambda (el) (string-match "^\\[ .* \\]$" el)) object)) + "~a ~a ~a .~%") + (_ "~a ~a \"~a\" .~%"))]) + (format #t pattern subject predicate + (if (symbol? object) (symbol->string object) object)))) + +(define* (scm->triples alist id + #:optional + (fn triple)) + (for-each (match-lambda + ((predicate . object) + (when (cond + ((string? object) + (not (string-blank? object))) + (else object)) + (fn id predicate object)))) + alist)) + +(define (string->binomial-name name) + (let ((binomial? + (string-match + "\\\(.+\\)" + name))) + (string->identifier + "" + (if binomial? + (regexp-substitute/global + #f "[^[:space:]A-Za-z0-9:]" + (match:substring binomial?) + 'pre "" 'post) + name) + #:separator "" + #:proc string-capitalize-first))) diff --git a/visualize-schema.scm b/visualize-schema.scm index 6850cb8..92f9272 100755 --- a/visualize-schema.scm +++ b/visualize-schema.scm @@ -12,8 +12,8 @@ (sparql driver) (sparql lang) (sparql util) - (dump string-similarity) - (dump table)) + (transform string-similarity) + (transform table)) (define rdfs (prefix "http://www.w3.org/2000/01/rdf-schema#")) @@ -83,7 +83,7 @@ "Return HTML string label for TABLE." (sxml->graphviz-html `(table (@ (cellborder 0) - (bgcolor ,(if (any column-dumped? + (bgcolor ,(if (any column-transformed? (table-columns table)) "lightgrey" "white"))) @@ -94,7 +94,7 @@ (human-units (table-size table))))) ,@(map (lambda (column) `(tr (td (@ (port ,(column-name column)) - ,@(if (column-dumped? column) + ,@(if (column-transformed? column) `((bgcolor "green")) '())) ,(column-name column)))) @@ -140,17 +140,17 @@ return #f. ALL-TABLES is a list of all tables in the database." "Return list of all tables in DB. Each element of the returned list is a
object." (map (match-lambda - ((table size fields field-types field-dumped) + ((table size fields field-types field-transformed) (make-table table (string->number size) (map make-column (string-split fields #\,) (string-split field-types #\,) (map (cut string=? <> "1") - (string-split field-dumped #\,)))))) + (string-split field-transformed #\,)))))) (sparql-query-records "PREFIX gn: -SELECT SAMPLE(?tablename) SAMPLE(?size) GROUP_CONCAT(?fieldname ; separator=\",\") GROUP_CONCAT(?fieldtype ; separator=\",\") GROUP_CONCAT(EXISTS{ ?dump rdf:type gn:dump . ?dump gn:dependsOn ?field .} ; separator=\",\") +SELECT SAMPLE(?tablename) SAMPLE(?size) GROUP_CONCAT(?fieldname ; separator=\",\") GROUP_CONCAT(?fieldtype ; separator=\",\") GROUP_CONCAT(EXISTS{ ?transform rdf:type gn:transform . ?transform gn:dependsOn ?field .} ; separator=\",\") WHERE { ?table rdf:type gn:sqlTable ; @@ -239,7 +239,7 @@ WHERE { ?predicate rdfs:domain ?type ; rdfs:range rdfs:Literal . - ?dump rdf:type gn:dump ; + ?transform rdf:type gn:transform ; gn:createsPredicate ?predicate ; gn:forSubjectType ?type ; gn:dependsOn ?field . -- cgit v1.2.3