aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArun Isaac2021-12-20 16:30:53 +0530
committerArun Isaac2021-12-20 16:37:33 +0530
commitb4bc965ba4022b4dda1ee9e75cce5815ab0defa2 (patch)
tree6f2083b61d4181ae8725664a369806ab351f9cca
parentf529d95759683e4c1cf2551fc27125c1308ee2dd (diff)
downloadgn-transform-databases-b4bc965ba4022b4dda1ee9e75cce5815ab0defa2.tar.gz
Move schema visualization to separate script.
* dump.scm: Do not import (sxml simple) and (dump string-similarity). (string-remove-suffix-ci, floor-log1024, human-units, human-units-color, sxml->xml-string, sxml->graphviz-html, table-label, table->graphviz-node, column->foreign-table, tables->graphviz-edges): Move to ... (dump-schema): Dump schema to RDF. (main): Call dump-schema without setting schema.dot as the output file. * visualize-schema.scm: ... here.
-rwxr-xr-xdump.scm136
-rw-r--r--visualize-schema.scm170
2 files changed, 188 insertions, 118 deletions
diff --git a/dump.scm b/dump.scm
index e35b377..2c0d55c 100755
--- a/dump.scm
+++ b/dump.scm
@@ -8,9 +8,7 @@
(srfi srfi-26)
(ice-9 match)
(ice-9 string-fun)
- (sxml simple)
(dump sql)
- (dump string-similarity)
(dump table)
(dump utils))
@@ -501,7 +499,7 @@ ALIST field-name) forms."
(close-port port)))
-;;; Visualize schema
+;;; Dump schema
(define (tables db)
"Return list of all tables in DB. Each element of the returned list
@@ -525,47 +523,6 @@ is a <table> object."
(information_schema.tables)
(format #f "WHERE table_schema = '~a'" %database-name)))))
-(define (string-remove-suffix-ci suffix str)
- "Remove SUFFIX from STR if present. Suffix check is
-case-insensitive."
- (if (string-suffix-ci? suffix str)
- (substring str 0 (- (string-length str)
- (string-length suffix)))
- str))
-
-(define (floor-log1024 x)
- "Return the floor of the base 1024 logarithm of X."
- (if (< x 1024)
- 0
- (1+ (floor-log1024 (/ x 1024)))))
-
-(define (human-units bytes)
- "Return number of BYTES as a string with human-readable units."
- (let* ((integer-log (floor-log1024 bytes)))
- (format #f "~a ~a"
- (round-quotient bytes (expt 1024 (min integer-log 3)))
- (case integer-log
- ((0) "B")
- ((1) "KiB")
- ((2) "MiB")
- (else "GiB")))))
-
-(define (human-units-color bytes)
- "Return the table header color coding for a table of size BYTES."
- (let* ((color-scheme "purd4"))
- (format #f "/~a/~a"
- color-scheme
- (1+ (min (floor-log1024 bytes) 3)))))
-
-(define (sxml->xml-string tree)
- "Serialize sxml TREE to a string. Return the serialized string."
- (call-with-output-string
- (cut sxml->xml tree <>)))
-
-(define (sxml->graphviz-html tree)
- "Convert sxml TREE to a graphviz <html-string>, and return it."
- ((@@ (ccwl graphviz) html-string) (sxml->xml-string tree)))
-
(define (dumped-table? table)
"Return non-#f if TABLE has been dumped. Else, return #f."
(any (match-lambda
@@ -575,79 +532,23 @@ case-insensitive."
(x (error "Malformed entry in %dumped:" x)))
%dumped))
-(define (table-label table)
- "Return HTML string label for TABLE."
- (sxml->graphviz-html
- `(table (@ (cellborder 0)
- (bgcolor ,(if (dumped-table? table) "lightgrey" "white")))
- (tr (td (@ (border 1)
- (bgcolor ,(human-units-color (table-size table))))
- ,(format #f "~a (~a)"
- (table-name table)
- (human-units (table-size table)))))
- ,@(map (lambda (column)
- `(tr (td (@ (port ,(column-name column))
- ,@(if (member (cons (string->symbol (table-name table))
- (string->symbol (column-name column)))
- %dumped)
- '((bgcolor "green"))
- '()))
- ,(column-name column))))
- (table-columns table)))))
-
-(define (table->graphviz-node table)
- "Convert TABLE to graphviz node, and return it."
- ((@@ (ccwl graphviz) graph-node)
- (table-name table)
- `((shape . "none")
- (label . ,(table-label table)))))
-
-(define (column->foreign-table table column all-tables)
- "If COLUMN in TABLE is a foreign key, return the table it refers to. Else,
-return #f. ALL-TABLES is a list of all tables in the database."
- (cond
- ((and (string=? (column-name column) "UserId")
- (string=? (table-name table) "UserPrivilege"))
- 'User)
- ((string-ci=? (column-name column) "GenbankID")
- 'Genbank)
- ((not (column-int? column)) #f)
- ((let ((string-similarity-threshold 0.8)
- (target-table
- (or (and=> (find (lambda (suffix)
- (string-suffix-ci? suffix (column-name column)))
- (list "id1" "id2" "_id" "id"))
- (cut string-remove-suffix-ci <> (column-name column)))
- (column-name column))))
- (and (not (jaccard-string-similar? target-table
- (table-name table)))
- (find (lambda (table)
- (jaccard-string-similar?
- target-table (table-name table)))
- all-tables)))
- => table-name)
- (else #f)))
-
-(define (tables->graphviz-edges tables)
- "Return the list of graphviz edges representing foreign key
-relations in TABLES."
- (append-map (lambda (table)
- (filter-map (lambda (column)
- (and=> (column->foreign-table table column tables)
- (cut cons
- ((@@ (ccwl graphviz) graph-port)
- (table-name table)
- (column-name column))
- <>)))
- (table-columns table)))
- tables))
-
(define (dump-schema db)
(let ((tables (tables db)))
- ((@@ (ccwl graphviz) graph->dot)
- ((@@ (ccwl graphviz) graph) 'schema
- #:nodes (map table->graphviz-node tables)
- #:edges (tables->graphviz-edges tables)))))
+ (for-each (lambda (table)
+ (let ((table-id (string->identifier "table" (table-name table))))
+ (triple table-id 'rdf:type 'gn:sqlTable)
+ (triple table-id 'gn:name (table-name table))
+ (triple table-id 'gn:hasSize (table-size table))
+ (for-each (lambda (column)
+ (let ((column-id (string->identifier
+ "field" (string-append (table-name table)
+ "__" (column-name column)))))
+ (triple column-id 'rdf:type 'gn:sqlTableField)
+ (triple column-id 'gn:name (column-name column))
+ (triple column-id 'gn:sqlFieldType (column-type column))
+ (triple table-id 'gn:hasField column-id)))
+ (table-columns table))))
+ tables)))
;; Main function
@@ -675,6 +576,5 @@ relations in TABLES."
(dump-investigators db)
(dump-avg-method db)
(dump-gene-chip db)
- (dump-info-files db)))
- (with-output-to-file (string-append %dump-directory "/schema.dot")
- (cut dump-schema db))))
+ (dump-info-files db)
+ (dump-schema db)))))
diff --git a/visualize-schema.scm b/visualize-schema.scm
new file mode 100644
index 0000000..091a74a
--- /dev/null
+++ b/visualize-schema.scm
@@ -0,0 +1,170 @@
+(use-modules (rnrs io ports)
+ (srfi srfi-1)
+ (srfi srfi-26)
+ (srfi srfi-28)
+ (srfi srfi-71)
+ (srfi srfi-171)
+ (ice-9 match)
+ (sxml simple)
+ (sparql driver)
+ (sparql lang)
+ (sparql util)
+ (dump string-similarity)
+ (dump table))
+
+(define rdfs
+ (prefix "http://www.w3.org/2000/01/rdf-schema#"))
+
+(define rdf
+ (prefix "http://www.w3.org/1999/02/22-rdf-syntax-ns#"))
+
+(define gn
+ (prefix "http://genenetwork.org/"))
+
+(define (sparql-query-records . args)
+ (query-results->list (apply sparql-query
+ (append args
+ (list #:host "127.0.0.1"
+ #:port 8891)))
+ #t))
+
+(define (floor-log1024 x)
+ "Return the floor of the base 1024 logarithm of X."
+ (if (< x 1024)
+ 0
+ (1+ (floor-log1024 (/ x 1024)))))
+
+(define (human-units bytes)
+ "Return number of BYTES as a string with human-readable units."
+ (let* ((integer-log (floor-log1024 bytes)))
+ (format "~a ~a"
+ (round-quotient bytes (expt 1024 (min integer-log 3)))
+ (case integer-log
+ ((0) "B")
+ ((1) "KiB")
+ ((2) "MiB")
+ (else "GiB")))))
+
+(define (human-units-color bytes)
+ "Return the table header color coding for a table of size BYTES."
+ (let* ((color-scheme "purd4"))
+ (format "/~a/~a"
+ color-scheme
+ (1+ (min (floor-log1024 bytes) 3)))))
+
+(define (sxml->xml-string tree)
+ "Serialize sxml TREE to a string. Return the serialized string."
+ (call-with-output-string
+ (cut sxml->xml tree <>)))
+
+(define (sxml->graphviz-html tree)
+ "Convert sxml TREE to a graphviz <html-string>, and return it."
+ ((@@ (ccwl graphviz) html-string) (sxml->xml-string tree)))
+
+(define (table-label table)
+ "Return HTML string label for TABLE."
+ (sxml->graphviz-html
+ `(table (@ (cellborder 0)
+ (bgcolor "white"))
+ (tr (td (@ (border 1)
+ (bgcolor ,(human-units-color (table-size table))))
+ ,(format "~a (~a)"
+ (table-name table)
+ (human-units (table-size table)))))
+ ,@(map (lambda (column)
+ `(tr (td (@ (port ,(column-name column)))
+ ,(column-name column))))
+ (table-columns table)))))
+
+(define (string-remove-suffix-ci suffix str)
+ "Remove SUFFIX from STR if present. Suffix check is
+case-insensitive."
+ (if (string-suffix-ci? suffix str)
+ (substring str 0 (- (string-length str)
+ (string-length suffix)))
+ str))
+
+(define (column->foreign-table table column all-tables)
+ "If COLUMN in TABLE is a foreign key, return the table it refers to. Else,
+return #f. ALL-TABLES is a list of all tables in the database."
+ (cond
+ ((and (string=? (column-name column) "UserId")
+ (string=? (table-name table) "UserPrivilege"))
+ 'User)
+ ((string-ci=? (column-name column) "GenbankID")
+ 'Genbank)
+ ((not (or (string-prefix? "int" (column-type column))
+ (string-prefix? "smallint" (column-type column))))
+ #f)
+ ((let ((string-similarity-threshold 0.8)
+ (target-table
+ (or (and=> (find (lambda (suffix)
+ (string-suffix-ci? suffix (column-name column)))
+ (list "id1" "id2" "_id" "id"))
+ (cut string-remove-suffix-ci <> (column-name column)))
+ (column-name column))))
+ (and (not (jaccard-string-similar? target-table
+ (table-name table)))
+ (find (lambda (table)
+ (jaccard-string-similar?
+ target-table (table-name table)))
+ all-tables)))
+ => table-name)
+ (else #f)))
+
+(define (tables)
+ "Return list of all tables in DB. Each element of the returned list
+is a <table> object."
+ (map (match-lambda
+ ((table size)
+ (make-table table
+ ;; FIXME: Why is size coming out as a string?
+ (string->number size)
+ (map (match-lambda
+ ((field type) (make-column field type)))
+ (sparql-query-records
+ ;; We use format to construct the query instead of
+ ;; select due to an outstanding bug in
+ ;; guile-sparql. See
+ ;; https://github.com/roelj/guile-sparql/issues/5
+ (format
+ "SELECT ?fieldname ?fieldtype
+WHERE
+{
+ ?table <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://genenetwork.org/sqlTable> .
+ ?table <http://genenetwork.org/name> ~s .
+ ?field <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://genenetwork.org/sqlTableField> .
+ ?table <http://genenetwork.org/hasField> ?field .
+ ?field <http://genenetwork.org/name> ?fieldname .
+ ?field <http://genenetwork.org/sqlFieldType> ?fieldtype .
+}" table))))))
+ (sparql-query-records
+ (select '(tablename size)
+ `((table ,(rdf "type") ,(gn "sqlTable"))
+ (table ,(gn "name") tablename)
+ (table ,(gn "hasSize") size))))))
+
+(define (foreign-key-graphviz-edges tables)
+ "Return the list of graphviz edges representing foreign key
+relations in TABLES."
+ (append-map (lambda (table)
+ (filter-map (lambda (column)
+ (and=> (column->foreign-table table column tables)
+ (cut cons
+ ((@@ (ccwl graphviz) graph-port)
+ (table-name table)
+ (column-name column))
+ <>)))
+ (table-columns table)))
+ tables))
+
+(let ((all-tables (tables)))
+ ((@@ (ccwl graphviz) graph->dot)
+ ((@@ (ccwl graphviz) graph) 'schema
+ #:nodes (map (lambda (table)
+ ((@@ (ccwl graphviz) graph-node)
+ (table-name table)
+ `((shape . "none")
+ (label . ,(table-label table)))))
+ all-tables)
+ #:edges (foreign-key-graphviz-edges all-tables))))