diff options
author | Arun Isaac | 2021-12-20 16:30:53 +0530 |
---|---|---|
committer | Arun Isaac | 2021-12-20 16:37:33 +0530 |
commit | b4bc965ba4022b4dda1ee9e75cce5815ab0defa2 (patch) | |
tree | 6f2083b61d4181ae8725664a369806ab351f9cca | |
parent | f529d95759683e4c1cf2551fc27125c1308ee2dd (diff) | |
download | gn-transform-databases-b4bc965ba4022b4dda1ee9e75cce5815ab0defa2.tar.gz |
Move schema visualization to separate script.
* dump.scm: Do not import (sxml simple) and (dump string-similarity).
(string-remove-suffix-ci, floor-log1024, human-units,
human-units-color, sxml->xml-string, sxml->graphviz-html, table-label,
table->graphviz-node, column->foreign-table, tables->graphviz-edges):
Move to ...
(dump-schema): Dump schema to RDF.
(main): Call dump-schema without setting schema.dot as the output
file.
* visualize-schema.scm: ... here.
-rwxr-xr-x | dump.scm | 136 | ||||
-rw-r--r-- | visualize-schema.scm | 170 |
2 files changed, 188 insertions, 118 deletions
@@ -8,9 +8,7 @@ (srfi srfi-26) (ice-9 match) (ice-9 string-fun) - (sxml simple) (dump sql) - (dump string-similarity) (dump table) (dump utils)) @@ -501,7 +499,7 @@ ALIST field-name) forms." (close-port port))) -;;; Visualize schema +;;; Dump schema (define (tables db) "Return list of all tables in DB. Each element of the returned list @@ -525,47 +523,6 @@ is a <table> object." (information_schema.tables) (format #f "WHERE table_schema = '~a'" %database-name))))) -(define (string-remove-suffix-ci suffix str) - "Remove SUFFIX from STR if present. Suffix check is -case-insensitive." - (if (string-suffix-ci? suffix str) - (substring str 0 (- (string-length str) - (string-length suffix))) - str)) - -(define (floor-log1024 x) - "Return the floor of the base 1024 logarithm of X." - (if (< x 1024) - 0 - (1+ (floor-log1024 (/ x 1024))))) - -(define (human-units bytes) - "Return number of BYTES as a string with human-readable units." - (let* ((integer-log (floor-log1024 bytes))) - (format #f "~a ~a" - (round-quotient bytes (expt 1024 (min integer-log 3))) - (case integer-log - ((0) "B") - ((1) "KiB") - ((2) "MiB") - (else "GiB"))))) - -(define (human-units-color bytes) - "Return the table header color coding for a table of size BYTES." - (let* ((color-scheme "purd4")) - (format #f "/~a/~a" - color-scheme - (1+ (min (floor-log1024 bytes) 3))))) - -(define (sxml->xml-string tree) - "Serialize sxml TREE to a string. Return the serialized string." - (call-with-output-string - (cut sxml->xml tree <>))) - -(define (sxml->graphviz-html tree) - "Convert sxml TREE to a graphviz <html-string>, and return it." - ((@@ (ccwl graphviz) html-string) (sxml->xml-string tree))) - (define (dumped-table? table) "Return non-#f if TABLE has been dumped. Else, return #f." (any (match-lambda @@ -575,79 +532,23 @@ case-insensitive." (x (error "Malformed entry in %dumped:" x))) %dumped)) -(define (table-label table) - "Return HTML string label for TABLE." - (sxml->graphviz-html - `(table (@ (cellborder 0) - (bgcolor ,(if (dumped-table? table) "lightgrey" "white"))) - (tr (td (@ (border 1) - (bgcolor ,(human-units-color (table-size table)))) - ,(format #f "~a (~a)" - (table-name table) - (human-units (table-size table))))) - ,@(map (lambda (column) - `(tr (td (@ (port ,(column-name column)) - ,@(if (member (cons (string->symbol (table-name table)) - (string->symbol (column-name column))) - %dumped) - '((bgcolor "green")) - '())) - ,(column-name column)))) - (table-columns table))))) - -(define (table->graphviz-node table) - "Convert TABLE to graphviz node, and return it." - ((@@ (ccwl graphviz) graph-node) - (table-name table) - `((shape . "none") - (label . ,(table-label table))))) - -(define (column->foreign-table table column all-tables) - "If COLUMN in TABLE is a foreign key, return the table it refers to. Else, -return #f. ALL-TABLES is a list of all tables in the database." - (cond - ((and (string=? (column-name column) "UserId") - (string=? (table-name table) "UserPrivilege")) - 'User) - ((string-ci=? (column-name column) "GenbankID") - 'Genbank) - ((not (column-int? column)) #f) - ((let ((string-similarity-threshold 0.8) - (target-table - (or (and=> (find (lambda (suffix) - (string-suffix-ci? suffix (column-name column))) - (list "id1" "id2" "_id" "id")) - (cut string-remove-suffix-ci <> (column-name column))) - (column-name column)))) - (and (not (jaccard-string-similar? target-table - (table-name table))) - (find (lambda (table) - (jaccard-string-similar? - target-table (table-name table))) - all-tables))) - => table-name) - (else #f))) - -(define (tables->graphviz-edges tables) - "Return the list of graphviz edges representing foreign key -relations in TABLES." - (append-map (lambda (table) - (filter-map (lambda (column) - (and=> (column->foreign-table table column tables) - (cut cons - ((@@ (ccwl graphviz) graph-port) - (table-name table) - (column-name column)) - <>))) - (table-columns table))) - tables)) - (define (dump-schema db) (let ((tables (tables db))) - ((@@ (ccwl graphviz) graph->dot) - ((@@ (ccwl graphviz) graph) 'schema - #:nodes (map table->graphviz-node tables) - #:edges (tables->graphviz-edges tables))))) + (for-each (lambda (table) + (let ((table-id (string->identifier "table" (table-name table)))) + (triple table-id 'rdf:type 'gn:sqlTable) + (triple table-id 'gn:name (table-name table)) + (triple table-id 'gn:hasSize (table-size table)) + (for-each (lambda (column) + (let ((column-id (string->identifier + "field" (string-append (table-name table) + "__" (column-name column))))) + (triple column-id 'rdf:type 'gn:sqlTableField) + (triple column-id 'gn:name (column-name column)) + (triple column-id 'gn:sqlFieldType (column-type column)) + (triple table-id 'gn:hasField column-id))) + (table-columns table)))) + tables))) ;; Main function @@ -675,6 +576,5 @@ relations in TABLES." (dump-investigators db) (dump-avg-method db) (dump-gene-chip db) - (dump-info-files db))) - (with-output-to-file (string-append %dump-directory "/schema.dot") - (cut dump-schema db)))) + (dump-info-files db) + (dump-schema db))))) diff --git a/visualize-schema.scm b/visualize-schema.scm new file mode 100644 index 0000000..091a74a --- /dev/null +++ b/visualize-schema.scm @@ -0,0 +1,170 @@ +(use-modules (rnrs io ports) + (srfi srfi-1) + (srfi srfi-26) + (srfi srfi-28) + (srfi srfi-71) + (srfi srfi-171) + (ice-9 match) + (sxml simple) + (sparql driver) + (sparql lang) + (sparql util) + (dump string-similarity) + (dump table)) + +(define rdfs + (prefix "http://www.w3.org/2000/01/rdf-schema#")) + +(define rdf + (prefix "http://www.w3.org/1999/02/22-rdf-syntax-ns#")) + +(define gn + (prefix "http://genenetwork.org/")) + +(define (sparql-query-records . args) + (query-results->list (apply sparql-query + (append args + (list #:host "127.0.0.1" + #:port 8891))) + #t)) + +(define (floor-log1024 x) + "Return the floor of the base 1024 logarithm of X." + (if (< x 1024) + 0 + (1+ (floor-log1024 (/ x 1024))))) + +(define (human-units bytes) + "Return number of BYTES as a string with human-readable units." + (let* ((integer-log (floor-log1024 bytes))) + (format "~a ~a" + (round-quotient bytes (expt 1024 (min integer-log 3))) + (case integer-log + ((0) "B") + ((1) "KiB") + ((2) "MiB") + (else "GiB"))))) + +(define (human-units-color bytes) + "Return the table header color coding for a table of size BYTES." + (let* ((color-scheme "purd4")) + (format "/~a/~a" + color-scheme + (1+ (min (floor-log1024 bytes) 3))))) + +(define (sxml->xml-string tree) + "Serialize sxml TREE to a string. Return the serialized string." + (call-with-output-string + (cut sxml->xml tree <>))) + +(define (sxml->graphviz-html tree) + "Convert sxml TREE to a graphviz <html-string>, and return it." + ((@@ (ccwl graphviz) html-string) (sxml->xml-string tree))) + +(define (table-label table) + "Return HTML string label for TABLE." + (sxml->graphviz-html + `(table (@ (cellborder 0) + (bgcolor "white")) + (tr (td (@ (border 1) + (bgcolor ,(human-units-color (table-size table)))) + ,(format "~a (~a)" + (table-name table) + (human-units (table-size table))))) + ,@(map (lambda (column) + `(tr (td (@ (port ,(column-name column))) + ,(column-name column)))) + (table-columns table))))) + +(define (string-remove-suffix-ci suffix str) + "Remove SUFFIX from STR if present. Suffix check is +case-insensitive." + (if (string-suffix-ci? suffix str) + (substring str 0 (- (string-length str) + (string-length suffix))) + str)) + +(define (column->foreign-table table column all-tables) + "If COLUMN in TABLE is a foreign key, return the table it refers to. Else, +return #f. ALL-TABLES is a list of all tables in the database." + (cond + ((and (string=? (column-name column) "UserId") + (string=? (table-name table) "UserPrivilege")) + 'User) + ((string-ci=? (column-name column) "GenbankID") + 'Genbank) + ((not (or (string-prefix? "int" (column-type column)) + (string-prefix? "smallint" (column-type column)))) + #f) + ((let ((string-similarity-threshold 0.8) + (target-table + (or (and=> (find (lambda (suffix) + (string-suffix-ci? suffix (column-name column))) + (list "id1" "id2" "_id" "id")) + (cut string-remove-suffix-ci <> (column-name column))) + (column-name column)))) + (and (not (jaccard-string-similar? target-table + (table-name table))) + (find (lambda (table) + (jaccard-string-similar? + target-table (table-name table))) + all-tables))) + => table-name) + (else #f))) + +(define (tables) + "Return list of all tables in DB. Each element of the returned list +is a <table> object." + (map (match-lambda + ((table size) + (make-table table + ;; FIXME: Why is size coming out as a string? + (string->number size) + (map (match-lambda + ((field type) (make-column field type))) + (sparql-query-records + ;; We use format to construct the query instead of + ;; select due to an outstanding bug in + ;; guile-sparql. See + ;; https://github.com/roelj/guile-sparql/issues/5 + (format + "SELECT ?fieldname ?fieldtype +WHERE +{ + ?table <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://genenetwork.org/sqlTable> . + ?table <http://genenetwork.org/name> ~s . + ?field <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://genenetwork.org/sqlTableField> . + ?table <http://genenetwork.org/hasField> ?field . + ?field <http://genenetwork.org/name> ?fieldname . + ?field <http://genenetwork.org/sqlFieldType> ?fieldtype . +}" table)))))) + (sparql-query-records + (select '(tablename size) + `((table ,(rdf "type") ,(gn "sqlTable")) + (table ,(gn "name") tablename) + (table ,(gn "hasSize") size)))))) + +(define (foreign-key-graphviz-edges tables) + "Return the list of graphviz edges representing foreign key +relations in TABLES." + (append-map (lambda (table) + (filter-map (lambda (column) + (and=> (column->foreign-table table column tables) + (cut cons + ((@@ (ccwl graphviz) graph-port) + (table-name table) + (column-name column)) + <>))) + (table-columns table))) + tables)) + +(let ((all-tables (tables))) + ((@@ (ccwl graphviz) graph->dot) + ((@@ (ccwl graphviz) graph) 'schema + #:nodes (map (lambda (table) + ((@@ (ccwl graphviz) graph-node) + (table-name table) + `((shape . "none") + (label . ,(table-label table))))) + all-tables) + #:edges (foreign-key-graphviz-edges all-tables)))) |