aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArun Isaac2021-12-11 12:59:20 +0530
committerArun Isaac2021-12-11 14:19:34 +0530
commitbc4b04a3dbaf40a6365a0d7087ef9ca66d6dedb6 (patch)
treee62d7e12e47b279fc2014136ba2c7e2bd290b58f
parent1ea4d75e093a43394e32f8662ffc1a42b870b0ea (diff)
downloadgn-transform-databases-bc4b04a3dbaf40a6365a0d7087ef9ca66d6dedb6.tar.gz
Visualize schema.
* .dir-locals.el (scheme-mode): Indent set-table-columns correctly. * dump.scm: Import (srfi srfi-9 gnu). (%database-name): New variable. (<table>): New type. (tables, string-remove-suffix-ci, human-units, graph->dot, dump-schema): New functions. Invoke dump-schema. * guix.scm: Import (gnu packages bioinformatics). Add ccwl, graphviz and guile-libyaml to the manifest.
-rw-r--r--.dir-locals.el3
-rwxr-xr-xdump.scm121
-rw-r--r--guix.scm9
3 files changed, 129 insertions, 4 deletions
diff --git a/.dir-locals.el b/.dir-locals.el
index dfb0049..f0a8de9 100644
--- a/.dir-locals.el
+++ b/.dir-locals.el
@@ -4,4 +4,5 @@
((nil
(indent-tabs-mode))
(scheme-mode
- (eval put 'map-alist 'scheme-indent-function 1)))
+ (eval put 'map-alist 'scheme-indent-function 1)
+ (eval put 'set-table-columns 'scheme-indent-function 1)))
diff --git a/dump.scm b/dump.scm
index 63b9229..8051426 100755
--- a/dump.scm
+++ b/dump.scm
@@ -5,6 +5,7 @@
(use-modules (rnrs io ports)
(srfi srfi-1)
+ (srfi srfi-9 gnu)
(srfi srfi-26)
(ice-9 match)
(ice-9 string-fun)
@@ -27,6 +28,10 @@
":")
proc)))
+(define %database-name
+ (assq-ref (call-with-input-file "conn.scm" read)
+ 'database))
+
(define %dump-directory
(string-append (getenv "HOME") "/data/dump"))
@@ -478,6 +483,118 @@ characters with an underscore and prefixing with gn:PREFIX."
data-field table-name))
(close-port port)))
+
+;;; Visualize schema
+
+(define-immutable-record-type <table>
+ (make-table name size columns)
+ table?
+ (name table-name)
+ (size table-size)
+ (columns table-columns set-table-columns))
+
+(define (tables db)
+ "Return list of all tables in DB. Each element of the returned list
+is a <table> object."
+ (map (lambda (table)
+ (set-table-columns table
+ (sql-map (cut assoc-ref <> "Field")
+ db
+ (format #f "SHOW COLUMNS FROM ~a" (table-name table)))))
+ (sql-map (lambda (row)
+ (make-table (assoc-ref row "table_name")
+ ;; FIXME: This is probably correct only for
+ ;; MyISAM tables.
+ (assoc-ref row "data_length")
+ #f))
+ db
+ (select-query ((information_schema.tables table_name)
+ (information_schema.tables data_length))
+ (information_schema.tables)
+ (format #f "WHERE table_schema = '~a'" %database-name)))))
+
+(define (string-remove-suffix-ci suffix str)
+ "Remove SUFFIX from STR if present. Suffix check is
+case-insensitive."
+ (if (string-suffix-ci? suffix str)
+ (substring str 0 (- (string-length str)
+ (string-length suffix)))
+ str))
+
+(define (human-units bytes)
+ "Return number of BYTES as a string with human-readable units."
+ (cond
+ ((< bytes 1024)
+ (format #f "~a B" bytes))
+ ((< bytes (expt 1024 2))
+ (format #f "~a KiB" (round-quotient bytes 1024)))
+ ((< bytes (expt 1024 3))
+ (format #f "~a MiB" (round-quotient bytes (expt 1024 2))))
+ (else
+ (format #f "~a GiB" (round-quotient bytes (expt 1024 3))))))
+
+;; This wrapper function is necessary to work around a bug in (ccwl
+;; graphviz) whereby backslashes in node labels are escaped and
+;; printed as \\.
+(define (graph->dot graph)
+ (put-string (current-output-port)
+ (string-replace-substring
+ (call-with-output-string
+ (cut (@@ (ccwl graphviz) graph->dot) graph <>))
+ "\\\\" "\\")))
+
+(define (dump-schema db)
+ (let ((tables (tables db)))
+ (graph->dot
+ ((@@ (ccwl graphviz) graph) 'schema
+ #:nodes (map (lambda (table)
+ ((@@ (ccwl graphviz) graph-node)
+ (table-name table)
+ `((shape . "record")
+ (label . ,(format #f "{~a (~a) | ~a}"
+ (table-name table)
+ (human-units (table-size table))
+ (string-replace-substring
+ (string-replace-substring
+ (string-join (table-columns table) "\\l" 'suffix)
+ "<" "\\<")
+ ">" "\\>"))))))
+ tables)
+ #:edges (append-map (lambda (table)
+ (filter-map (lambda (column)
+ (and=> (cond
+ ((string-prefix-ci? "StrainId" column)
+ 'Strain)
+ ((let ((target-table (string-remove-suffix-ci "id" column)))
+ ;; Column has an "id" suffix.
+ (and (string-suffix-ci? "id" column)
+ ;; Column is not the original key.
+ (not (string=? (table-name table) target-table))
+ ;; Prefix is a table name, at least approximately.
+ (or (find (lambda (table)
+ (string=? target-table (table-name table)))
+ tables)
+ ;; Try deleting underscores and ignoring case.
+ (find (lambda (table)
+ (string-ci=? (string-delete #\_ target-table)
+ (string-delete #\_ (table-name table))))
+ tables)
+ ;; Try pluralization.
+ (let ((target-table (string-append target-table "s")))
+ (and (not (string=? (table-name table) target-table))
+ (find (lambda (table)
+ (string-ci=? target-table
+ (table-name table)))
+ tables))))))
+ => table-name)
+ (else #f))
+ (cut cons (table-name table) <>)))
+ (table-columns table)))
+ tables)))))
+
+
+;; Main function
+
(define (prefix prefix iri)
(format #t "@prefix ~a ~a .~%" prefix iri))
@@ -500,4 +617,6 @@ characters with an underscore and prefixing with gn:PREFIX."
(dump-investigators db)
(dump-avg-method db)
(dump-gene-chip db)
- (dump-info-files db)))))
+ (dump-info-files db)))
+ (with-output-to-file (string-append %dump-directory "/schema.dot")
+ (cut dump-schema db))))
diff --git a/guix.scm b/guix.scm
index d328ac8..75b3a8d 100644
--- a/guix.scm
+++ b/guix.scm
@@ -4,8 +4,13 @@
;;
;; Happy hacking!
-(use-modules (gnu packages guile)
+(use-modules (gnu packages bioinformatics)
+ (gnu packages graphviz)
+ (gnu packages guile)
(gnu packages guile-xyz))
(packages->manifest
- (list guile-3.0 guile-dbi guile-dbd-mysql))
+ (list guile-3.0 guile-dbi guile-dbd-mysql
+ ;; We abuse (ccwl graphviz) as a library to visualize the database
+ ;; schema. Hence we need ccwl and guile-libyaml.
+ ccwl graphviz guile-libyaml))