aboutsummaryrefslogtreecommitdiff
#! /usr/bin/env guile
!#

(use-modules (rnrs io ports)
             (srfi srfi-1)
             (srfi srfi-26)
             (srfi srfi-28)
             (srfi srfi-71)
             (srfi srfi-171)
             (ice-9 match)
             (sxml simple)
             (sparql driver)
             (sparql lang)
             (sparql util)
             (transform string-similarity)
             (transform table))

(define rdfs
  (prefix "http://www.w3.org/2000/01/rdf-schema#"))

(define rdf
  (prefix "http://www.w3.org/1999/02/22-rdf-syntax-ns#"))

(define gn
  (prefix "http://genenetwork.org/"))

(define graph (@@ (ccwl graphviz) graph))
(define graph-node (@@ (ccwl graphviz) graph-node))
(define graph-edge (@@ (ccwl graphviz) graph-edge))
(define graph-port (@@ (ccwl graphviz) graph-port))
(define html-string (@@ (ccwl graphviz) html-string))
(define graph->dot (@@ (ccwl graphviz) graph->dot))

(define %sparql-host
  (make-parameter #f))

(define %sparql-port
  (make-parameter #f))

(define (sparql-query-records . args)
  ;; TODO: Use the JSON query results so that types can be converted
  ;; correctly.
  (query-results->list (apply sparql-query
                              (append args
                                      (list #:host (%sparql-host)
                                            #:port (%sparql-port))))
                       #t))

(define (floor-log1024 x)
  "Return the floor of the base 1024 logarithm of X."
  (if (< x 1024)
      0
      (1+ (floor-log1024 (/ x 1024)))))

(define (human-units bytes)
  "Return number of BYTES as a string with human-readable units."
  (let* ((integer-log (floor-log1024 bytes)))
    (format "~a ~a"
            (round-quotient bytes (expt 1024 (min integer-log 3)))
            (case integer-log
              ((0) "B")
              ((1) "KiB")
              ((2) "MiB")
              (else "GiB")))))

(define (human-units-color bytes)
  "Return the table header color coding for a table of size BYTES."
  (let* ((color-scheme "purd4"))
    (format "/~a/~a"
            color-scheme
            (1+ (min (floor-log1024 bytes) 3)))))

(define (sxml->xml-string tree)
  "Serialize sxml TREE to a string. Return the serialized string."
  (call-with-output-string
    (cut sxml->xml tree <>)))

(define (sxml->graphviz-html tree)
  "Convert sxml TREE to a graphviz <html-string>, and return it."
  (html-string (sxml->xml-string tree)))

(define (table-label table)
  "Return HTML string label for TABLE."
  (sxml->graphviz-html
   `(table (@ (cellborder 0)
              (bgcolor ,(if (any column-transformed?
                                 (table-columns table))
                            "lightgrey"
                            "white")))
           (tr (td (@ (border 1)
                      (bgcolor ,(human-units-color (table-size table))))
                   ,(format "~a (~a)"
                            (table-name table)
                            (human-units (table-size table)))))
           ,@(map (lambda (column)
                    `(tr (td (@ (port ,(column-name column))
                                ,@(if (column-transformed? column)
                                      `((bgcolor "green"))
                                      '()))
                             ,(column-name column))))
                  (table-columns table)))))

(define (string-remove-suffix-ci suffix str)
  "Remove SUFFIX from STR if present. Suffix check is
case-insensitive."
  (if (string-suffix-ci? suffix str)
      (substring str 0 (- (string-length str)
                          (string-length suffix)))
      str))

(define (column->foreign-table table column all-tables)
  "If COLUMN in TABLE is a foreign key, return the table it refers to. Else,
return #f. ALL-TABLES is a list of all tables in the database."
  (cond
   ((and (string=? (column-name column) "UserId")
         (string=? (table-name table) "UserPrivilege"))
    'User)
   ((string-ci=? (column-name column) "GenbankID")
    'Genbank)
   ((not (or (string-prefix? "int" (column-type column))
             (string-prefix? "smallint" (column-type column))))
    #f)
   ((let ((string-similarity-threshold 0.8)
          (target-table
           (or (and=> (find (lambda (suffix)
                              (string-suffix-ci? suffix (column-name column)))
                            (list "id1" "id2" "_id" "id"))
                      (cut string-remove-suffix-ci <> (column-name column)))
               (column-name column))))
      (and (not (jaccard-string-similar? target-table
                                         (table-name table)))
           (find (lambda (table)
                   (jaccard-string-similar?
                    target-table (table-name table)))
                 all-tables)))
    => table-name)
   (else #f)))

(define (tables)
  "Return list of all tables in DB. Each element of the returned list
is a <table> object."
  (map (match-lambda
         ((table size fields field-types field-transformed)
          (make-table table
                      (string->number size)
                      (map make-column
                           (string-split fields #\,)
                           (string-split field-types #\,)
                           (map (cut string=? <> "1")
                                (string-split field-transformed #\,))))))
       (sparql-query-records
        "PREFIX gn: <http://genenetwork.org/>
SELECT SAMPLE(?tablename) SAMPLE(?size) GROUP_CONCAT(?fieldname ; separator=\",\") GROUP_CONCAT(?fieldtype ; separator=\",\") GROUP_CONCAT(EXISTS{ ?transform rdf:type gn:transform . ?transform gn:dependsOn ?field .} ; separator=\",\")
WHERE
{
  ?table rdf:type gn:sqlTable ;
         gn:name ?tablename ;
         gn:hasSize ?size ;
         gn:hasField ?field .
  ?field rdf:type gn:sqlTableField ;
         gn:name ?fieldname ;
         gn:sqlFieldType ?fieldtype .
} GROUP BY ?table")))

(define (foreign-key-graphviz-edges tables)
  "Return the list of graphviz edges representing foreign key
relations in TABLES."
  (append-map (lambda (table)
                (filter-map (lambda (column)
                              (and=> (column->foreign-table table column tables)
                                     (cut cons
                                          (graph-port (table-name table)
                                                      (column-name column))
                                          <>)))
                            (table-columns table)))
              tables))

(define (write-sql-visualization port)
  "Write a visualization of the SQL schema in graphviz dot syntax to
PORT."
  (let ((all-tables (tables)))
    (graph->dot
     (graph 'schema
            #:nodes (map (lambda (table)
                           (graph-node
                            (table-name table)
                            `((shape . "none")
                              (label . ,(table-label table)))))
                         all-tables)
            #:edges (foreign-key-graphviz-edges all-tables))
     port)))

(define (literal-node-id domain predicate)
  "Return the graphviz node identifier for an RDF literal node which
is the range of PREDICATE. The domain of PREDICATE is DOMAIN."
  (string-append "literal_node__"
                 (basename domain)
                 "_"
                 (basename predicate)))

(define (rdf-type-nodes)
  "Return the list of all graphviz nodes representing types."
  (map (match-lambda
         ((type)
          (graph-node (basename type)
                      `((fillcolor . lightgreen)
                        (style . filled)))))
       (sparql-query-records
        "SELECT DISTINCT ?type
WHERE {
  { ?predicate rdfs:domain ?type }
  UNION
  { ?predicate rdfs:range ?type }
  MINUS
  { ?predicate rdfs:range rdfs:Literal }
}")))

(define (rdf-literal-nodes)
  "Return the list of all graphviz nodes representing literal
properties."
  (map (match-lambda
         ((type predicate tables fields)
          (graph-node
           (literal-node-id type predicate)
           `((shape . box)
             (style . filled)
             (fillcolor . gold)
             (label . ,(basename predicate))
             (tooltip . ,(string-join
                          (map (lambda (table field)
                                 (string-append table "." field))
                               (string-split tables #\,)
                               (string-split fields #\,))
                          ", "))))))
       (sparql-query-records
        "PREFIX gn: <http://genenetwork.org/>
SELECT ?type ?predicate GROUP_CONCAT(?tablename ; separator=\",\") GROUP_CONCAT(?fieldname ; separator=\",\")
WHERE
{
  ?predicate rdfs:domain ?type ;
             rdfs:range rdfs:Literal .
  ?transform rdf:type gn:transform ;
        gn:createsPredicate ?predicate ;
        gn:forSubjectType ?type ;
        gn:dependsOn ?field .
  ?field rdf:type gn:sqlTableField ;
         gn:name ?fieldname .
  ?table rdf:type gn:sqlTable ;
         gn:hasField ?field ;
         gn:name ?tablename .
} GROUP BY ?type ?predicate
")))

(define (rdf-edges)
  "Return the list of all graphviz edges in the RDF visualization."
  (map (match-lambda
         ((type predicate range)
          (if (string=? range (string-trim-both (rdfs "Literal")
                                                (char-set #\< #\>)))
              ;; Literal properties
              (graph-edge (basename type)
                          (literal-node-id type predicate))
              ;; Relations between classes
              (graph-edge (basename type)
                          (basename range)))))
       (sparql-query-records
        (select '(type predicate range)
                `((predicate ,(rdfs "domain") type)
                  (predicate ,(rdfs "range") range))))))

(define (write-rdf-visualization port)
  "Write a visualization of the RDF schema in graphviz dot syntax to
PORT."
  (graph->dot
   (graph 'rdf
          #:nodes (append (rdf-type-nodes)
                      (rdf-literal-nodes))
          #:edges (rdf-edges))
   port))

(define main
  (match-lambda*
    ((_ connection-settings-file)
     (let ((connection-settings (call-with-input-file connection-settings-file
                                  read)))
       (parameterize ((%sparql-host (assq-ref connection-settings 'sparql-host))
                      (%sparql-port (assq-ref connection-settings 'sparql-port)))
         (call-with-output-file "sql.dot"
           write-sql-visualization)
         (call-with-output-file "rdf.dot"
           write-rdf-visualization))))
    ((arg0 _ ...)
     (display (format "Usage: ~a CONNECTION-SETTINGS-FILE~%" arg0)
              (current-error-port))
     (exit #f))))

(apply main (command-line))