diff options
author | Arun Isaac | 2021-12-11 12:59:20 +0530 |
---|---|---|
committer | Arun Isaac | 2021-12-11 14:19:34 +0530 |
commit | bc4b04a3dbaf40a6365a0d7087ef9ca66d6dedb6 (patch) | |
tree | e62d7e12e47b279fc2014136ba2c7e2bd290b58f | |
parent | 1ea4d75e093a43394e32f8662ffc1a42b870b0ea (diff) | |
download | gn-transform-databases-bc4b04a3dbaf40a6365a0d7087ef9ca66d6dedb6.tar.gz |
Visualize schema.
* .dir-locals.el (scheme-mode): Indent set-table-columns correctly.
* dump.scm: Import (srfi srfi-9 gnu).
(%database-name): New variable.
(<table>): New type.
(tables, string-remove-suffix-ci, human-units, graph->dot,
dump-schema): New functions.
Invoke dump-schema.
* guix.scm: Import (gnu packages bioinformatics). Add ccwl, graphviz
and guile-libyaml to the manifest.
-rw-r--r-- | .dir-locals.el | 3 | ||||
-rwxr-xr-x | dump.scm | 121 | ||||
-rw-r--r-- | guix.scm | 9 |
3 files changed, 129 insertions, 4 deletions
diff --git a/.dir-locals.el b/.dir-locals.el index dfb0049..f0a8de9 100644 --- a/.dir-locals.el +++ b/.dir-locals.el @@ -4,4 +4,5 @@ ((nil (indent-tabs-mode)) (scheme-mode - (eval put 'map-alist 'scheme-indent-function 1))) + (eval put 'map-alist 'scheme-indent-function 1) + (eval put 'set-table-columns 'scheme-indent-function 1))) @@ -5,6 +5,7 @@ (use-modules (rnrs io ports) (srfi srfi-1) + (srfi srfi-9 gnu) (srfi srfi-26) (ice-9 match) (ice-9 string-fun) @@ -27,6 +28,10 @@ ":") proc))) +(define %database-name + (assq-ref (call-with-input-file "conn.scm" read) + 'database)) + (define %dump-directory (string-append (getenv "HOME") "/data/dump")) @@ -478,6 +483,118 @@ characters with an underscore and prefixing with gn:PREFIX." data-field table-name)) (close-port port))) + +;;; Visualize schema + +(define-immutable-record-type <table> + (make-table name size columns) + table? + (name table-name) + (size table-size) + (columns table-columns set-table-columns)) + +(define (tables db) + "Return list of all tables in DB. Each element of the returned list +is a <table> object." + (map (lambda (table) + (set-table-columns table + (sql-map (cut assoc-ref <> "Field") + db + (format #f "SHOW COLUMNS FROM ~a" (table-name table))))) + (sql-map (lambda (row) + (make-table (assoc-ref row "table_name") + ;; FIXME: This is probably correct only for + ;; MyISAM tables. + (assoc-ref row "data_length") + #f)) + db + (select-query ((information_schema.tables table_name) + (information_schema.tables data_length)) + (information_schema.tables) + (format #f "WHERE table_schema = '~a'" %database-name))))) + +(define (string-remove-suffix-ci suffix str) + "Remove SUFFIX from STR if present. Suffix check is +case-insensitive." + (if (string-suffix-ci? suffix str) + (substring str 0 (- (string-length str) + (string-length suffix))) + str)) + +(define (human-units bytes) + "Return number of BYTES as a string with human-readable units." + (cond + ((< bytes 1024) + (format #f "~a B" bytes)) + ((< bytes (expt 1024 2)) + (format #f "~a KiB" (round-quotient bytes 1024))) + ((< bytes (expt 1024 3)) + (format #f "~a MiB" (round-quotient bytes (expt 1024 2)))) + (else + (format #f "~a GiB" (round-quotient bytes (expt 1024 3)))))) + +;; This wrapper function is necessary to work around a bug in (ccwl +;; graphviz) whereby backslashes in node labels are escaped and +;; printed as \\. +(define (graph->dot graph) + (put-string (current-output-port) + (string-replace-substring + (call-with-output-string + (cut (@@ (ccwl graphviz) graph->dot) graph <>)) + "\\\\" "\\"))) + +(define (dump-schema db) + (let ((tables (tables db))) + (graph->dot + ((@@ (ccwl graphviz) graph) 'schema + #:nodes (map (lambda (table) + ((@@ (ccwl graphviz) graph-node) + (table-name table) + `((shape . "record") + (label . ,(format #f "{~a (~a) | ~a}" + (table-name table) + (human-units (table-size table)) + (string-replace-substring + (string-replace-substring + (string-join (table-columns table) "\\l" 'suffix) + "<" "\\<") + ">" "\\>")))))) + tables) + #:edges (append-map (lambda (table) + (filter-map (lambda (column) + (and=> (cond + ((string-prefix-ci? "StrainId" column) + 'Strain) + ((let ((target-table (string-remove-suffix-ci "id" column))) + ;; Column has an "id" suffix. + (and (string-suffix-ci? "id" column) + ;; Column is not the original key. + (not (string=? (table-name table) target-table)) + ;; Prefix is a table name, at least approximately. + (or (find (lambda (table) + (string=? target-table (table-name table))) + tables) + ;; Try deleting underscores and ignoring case. + (find (lambda (table) + (string-ci=? (string-delete #\_ target-table) + (string-delete #\_ (table-name table)))) + tables) + ;; Try pluralization. + (let ((target-table (string-append target-table "s"))) + (and (not (string=? (table-name table) target-table)) + (find (lambda (table) + (string-ci=? target-table + (table-name table))) + tables)))))) + => table-name) + (else #f)) + (cut cons (table-name table) <>))) + (table-columns table))) + tables))))) + + +;; Main function + (define (prefix prefix iri) (format #t "@prefix ~a ~a .~%" prefix iri)) @@ -500,4 +617,6 @@ characters with an underscore and prefixing with gn:PREFIX." (dump-investigators db) (dump-avg-method db) (dump-gene-chip db) - (dump-info-files db))))) + (dump-info-files db))) + (with-output-to-file (string-append %dump-directory "/schema.dot") + (cut dump-schema db)))) @@ -4,8 +4,13 @@ ;; ;; Happy hacking! -(use-modules (gnu packages guile) +(use-modules (gnu packages bioinformatics) + (gnu packages graphviz) + (gnu packages guile) (gnu packages guile-xyz)) (packages->manifest - (list guile-3.0 guile-dbi guile-dbd-mysql)) + (list guile-3.0 guile-dbi guile-dbd-mysql + ;; We abuse (ccwl graphviz) as a library to visualize the database + ;; schema. Hence we need ccwl and guile-libyaml. + ccwl graphviz guile-libyaml)) |