about summary refs log tree commit diff
path: root/transform
diff options
context:
space:
mode:
Diffstat (limited to 'transform')
-rw-r--r--transform/schema.scm (renamed from transform/schema-dump.scm)20
-rw-r--r--transform/special-forms.scm70
-rw-r--r--transform/sql.scm19
-rw-r--r--transform/strings.scm23
-rw-r--r--transform/triples.scm24
-rw-r--r--transform/uuid.scm234
6 files changed, 88 insertions, 302 deletions
diff --git a/transform/schema-dump.scm b/transform/schema.scm
index 18df5da..f3896a7 100644
--- a/transform/schema-dump.scm
+++ b/transform/schema.scm
@@ -4,7 +4,13 @@
   #:use-module (transform sql)
   #:use-module (transform triples)
   #:use-module (transform strings)
-  #:use-module (transform table))
+  #:use-module (transform table)
+  #:export (table-fields
+            get-tables-from-comments
+            schema-annotations
+            tables
+            schema
+            data-table))
 
 
 (define (table-fields db table)
@@ -47,7 +53,7 @@
      (for-each (cut table-fields db <>)
                (get-tables-from-comments db)))))
 
-(define (tables db)
+(define (tables connection-settings db)
   "Return list of all tables in DB. Each element of the returned list
 is a <table> object."
   (map (lambda (table)
@@ -68,7 +74,7 @@ is a <table> object."
                                (information_schema.tables data_length))
                               (information_schema.tables)
                               (format #f "WHERE table_schema = '~a'"
-                                      (assq-ref %connection-settings 'sql-database))))))
+                                      (assq-ref connection-settings 'sql-database))))))
 
 (define (schema db)
   (let ((tables (tables db)))
@@ -83,14 +89,14 @@ is a <table> object."
                                      (table-name table)))))
                   (triple table-id 'rdf:type 'gn:sqlTable)
                   (triple table-id 'gn:name (table-name table))
-                  (triple table-id 'gn:hasSize (table-size table))
+                  (triple table-id 'gn:has_size (table-size table))
                   (for-each (lambda (column)
                               (let ((column-id (column-id (table-name table)
                                                           (column-name column))))
-                                (triple column-id 'rdf:type 'gn:sqlTableField)
+                                (triple column-id 'rdf:type 'gn:sql_table_field)
                                 (triple column-id 'gn:name (column-name column))
-                                (triple column-id 'gn:sqlFieldType (column-type column))
-                                (triple table-id 'gn:hasField column-id)))
+                                (triple column-id 'gn:sql_field_type (column-type column))
+                                (triple table-id 'gn:has_field column-id)))
                             (table-columns table))))
               tables)))
 
diff --git a/transform/special-forms.scm b/transform/special-forms.scm
index 99b30df..ddb3180 100644
--- a/transform/special-forms.scm
+++ b/transform/special-forms.scm
@@ -537,40 +537,42 @@ The above query results to triples that have the form:
        (call-with-target-database
         connection
         (lambda (db)
-          (with-output-to-file        ;
-              doc-path
-            (lambda ()
-              (format #t "# ~a" name)
-              (for-each
-               (lambda (proc)
-                 (proc db
-                       #:metadata? #f
-                       #:data? #f
-                       #:documentation?
-                       (lambda () (for-each
-                                   (match-lambda
-                                     ((k v)
-                                      (begin
-                                        (prefix k v #f))))
-                                   prefixes))))
-               inputs))
-            #:encoding "UTF-8")
+          (when doc-path
+            (with-output-to-file        ;
+                doc-path
+              (lambda ()
+                (format #t "# ~a" name)
+                (for-each
+                 (lambda (proc)
+                   (proc db
+                         #:metadata? #f
+                         #:data? #f
+                         #:documentation?
+                         (lambda () (for-each
+                                     (match-lambda
+                                       ((k v)
+                                        (begin
+                                          (prefix k v #f))))
+                                     prefixes))))
+                 inputs))
+              #:encoding "UTF-8"))
 
           ;; Dumping the actual data
-          (with-output-to-file
-              rdf-path
-            (lambda ()
-              ;; Add the prefixes
-              (for-each
-               (match-lambda
-                 ((k v)
-                  (begin
-                    (prefix k v))))
-               prefixes)
-              (newline)
-              (for-each
-               (lambda (proc)
-                 (proc db #:metadata? table-metadata?))
-               inputs))
-            #:encoding "UTF-8")))))))
+          (when rdf-path
+            (with-output-to-file
+                rdf-path
+              (lambda ()
+                ;; Add the prefixes
+                (for-each
+                 (match-lambda
+                   ((k v)
+                    (begin
+                      (prefix k v))))
+                 prefixes)
+                (newline)
+                (for-each
+                 (lambda (proc)
+                   (proc db #:metadata? table-metadata?))
+                 inputs))
+              #:encoding "UTF-8"))))))))
 
diff --git a/transform/sql.scm b/transform/sql.scm
index a8962c8..daedf97 100644
--- a/transform/sql.scm
+++ b/transform/sql.scm
@@ -102,13 +102,14 @@
   (dbi-get_row db))
 
 (define (call-with-target-database connection-settings proc)
-  (call-with-database "mysql" (string-join
-                               (list (assq-ref connection-settings 'sql-username)
-                                     (assq-ref connection-settings 'sql-password)
-                                     (assq-ref connection-settings 'sql-database)
-                                     "tcp"
-                                     (assq-ref connection-settings 'sql-host)
-                                     (number->string
-                                      (assq-ref connection-settings 'sql-port)))
-                               ":")
+  (call-with-database "mysql" (string-append (string-join
+                                              (list (assq-ref connection-settings 'sql-username)
+                                                    (assq-ref connection-settings 'sql-password)
+                                                    (assq-ref connection-settings 'sql-database)
+                                                    "tcp"
+                                                    (assq-ref connection-settings 'sql-host)
+                                                    (number->string
+                                                     (assq-ref connection-settings 'sql-port)))
+                                              ":")
+                                             "?charset=utf8")
                       proc))
diff --git a/transform/strings.scm b/transform/strings.scm
index 7545f62..aba554a 100644
--- a/transform/strings.scm
+++ b/transform/strings.scm
@@ -11,11 +11,11 @@
             delete-substrings
             replace-substrings
             remove-duplicates
-            remap-species-identifiers str
             sanitize-rdf-string
             snake->lower-camel
             lower-case-and-replace-spaces
-            string-capitalize-first))
+            string-capitalize-first
+            normalize-string-field))
 
 (define (lower-case-and-replace-spaces str)
   (string-map
@@ -121,13 +121,12 @@ association list mapping substrings to their replacements."
       ((memq (car lst) result) (loop (cdr lst) result))
       (else (loop (cdr lst) (cons (car lst) result))))))
 
-
-(define (remap-species-identifiers str)
-  "This procedure remaps identifiers to standard binominal. Obviously this should
-   be sorted by correcting the database!"
-  (match str
-    ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"]
-    ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"]
-    ["Macaca mulatta" "Macaca nemestrina"]
-    ["Bat (Glossophaga soricina)" "Glossophaga soricina"]
-    [str str]))
+(define (normalize-string-field field)
+  (let ((field (string-trim-both field)))
+    (match field
+      ((?  string? field)
+       (if (or (string-blank? field)
+               (string=? (string-downcase field) "none"))
+           ""
+           field))
+      (_ ""))))
diff --git a/transform/triples.scm b/transform/triples.scm
index 9775d36..13758e5 100644
--- a/transform/triples.scm
+++ b/transform/triples.scm
@@ -8,8 +8,19 @@
             triple
             scm->triples
             annotate-field
+            remap-species-identifiers
             string->binomial-name))
 
+(define (remap-species-identifiers str)
+  "This procedure remaps identifiers to standard binominal. Obviously this should
+   be sorted by correcting the database!"
+  (match str
+    ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"]
+    ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"]
+    ["Macaca mulatta" "Macaca nemestrina"]
+    ["Bat (Glossophaga soricina)" "Glossophaga soricina"]
+    [str str]))
+
 (define (annotate-field field schema)
   (let ([schema (cond ((symbol? schema)
                        (symbol->string schema))
@@ -28,7 +39,7 @@
           #:optional #:key
           (ontology "gn:")
           (separator "")
-          (proc string-capitalize-first))
+          (proc (lambda (x) x)))
   "Convert STR to a turtle identifier after replacing illegal
 characters with an underscore and prefixing with gn:PREFIX."
   (if (or (and (string? str) (string-null? str))
@@ -40,11 +51,12 @@ characters with an underscore and prefixing with gn:PREFIX."
                        (lambda (c)
                          (eq? c #\)))
                        (string-map (lambda (c)
-                                     (case c
-                                       ((#\/ #\< #\> #\+ #\( #\space #\@) #\_)
-                                       (else c)))
-                                   (proc
-                                    (string-trim-right str #\.))))))))
+                                     (if (or (char-alphabetic? c)
+                                             (char-numeric? c)
+                                             (char=? c #\_))
+                                         c
+                                         #\_))
+                                   (proc str)))))))
 
 
 (define* (prefix prefix iri #:optional (ttl? #t))
diff --git a/transform/uuid.scm b/transform/uuid.scm
deleted file mode 100644
index be0e592..0000000
--- a/transform/uuid.scm
+++ /dev/null
@@ -1,234 +0,0 @@
-;; CREDIT: https://lists.gnu.org/archive/html/guile-user/2018-01/msg00019.html
-(define-module (transform uuid)
-  #:use-module (srfi srfi-1)
-  #:use-module (srfi srfi-11)
-  #:use-module (rnrs bytevectors)
-  #:use-module (ice-9 iconv)
-  #:export (bytevector->md5
-            make-version-3-uuid))
-
-(define (bytevector->md5 bytevector)
-  "Convert BYTEVECTOR to a bytevector containing the MD5 hash of
-BYTEVECTOR."
-  ;; Implemented along RFC 1321.  It should be easy to verify that
-  ;; this procedure performs the operations specified therein.
-  (define (append-padding-bits bytevector)
-    "Makes a list from BYTEVECTOR with padding as per RFC 1321 3.1."
-    (let* ((length-in-bits (* 8 (bytevector-length bytevector)))
-           (padding-bits (- 512 (modulo (- length-in-bits 448) 512))))
-      (append (bytevector->u8-list bytevector)
-              '(128) ; #*10000000
-               (iota
-		(- (/ padding-bits 8) 1)
-		0 0))))
-  (define (append-length msg-list message-length)
-    "Append MESSAGE-LENGTH as 8 byte values from a uint64 to MSG-LIST."
-    (append msg-list
-            ;; For numbers too large for an uint64, only the low-order
-            ;; bytes are returned.
-            (bytevector->u8-list (u64vector
-                                  (modulo
-                                   (* message-length 8) ; bits
-                                   (1+ #xffffffffffffffff))))))
-  (let hash ((AA #x67452301)
-             (BB #xefcdab89)
-             (CC #x98badcfe)
-             (DD #x10325476)
-             (to-digest
-              (append-length
-               (append-padding-bits
-                bytevector)
-               (bytevector-length bytevector))))
-    (define (F X Y Z)
-      (logior (logand X Y) (logand (lognot X) Z)))
-    (define (G X Y Z)
-      (logior (logand X Z) (logand Y (lognot Z))))
-    (define (H X Y Z)
-      (logxor X Y Z))
-    (define (I X Y Z)
-      (logxor Y (logior X (lognot Z))))
-    (define (T i)
-      (inexact->exact (floor (* 4294967296 (abs (sin i))))))
-    (define (number->u32 n)
-      "Cut off all bits that do not fit in a uint32."
-      (bit-extract n 0 32))
-    (define (lsh32 n count)
-      (number->u32 (logior (ash n count)
-                           (bit-extract n (- 32 count) 32))))
-    (if (not (null? to-digest))
-        (let* ((block (u8-list->bytevector
-                       (list-head to-digest (/ 512 8))))
-               (X (lambda (j) (bytevector-u32-ref
-                               block (* 4 j) (endianness little))))
-               (do-round1
-                (lambda (A B C D)
-                  (define (operation a b c d k s i)
-                    (number->u32
-                     (+ b (lsh32 (+ a (F b c d) (X k) (T i)) s))))
-                  (let* ((A (operation A B C D 0 7 1))
-                         (D (operation D A B C 1 12 2))
-                         (C (operation C D A B 2 17 3))
-                         (B (operation B C D A 3 22 4))
-                         (A (operation A B C D 4 7 5))
-                         (D (operation D A B C 5 12 6))
-                         (C (operation C D A B 6 17 7))
-                         (B (operation B C D A 7 22 8))
-                         (A (operation A B C D 8 7 9))
-                         (D (operation D A B C 9 12 10))
-                         (C (operation C D A B 10 17 11))
-                         (B (operation B C D A 11 22 12))
-                         (A (operation A B C D 12 7 13))
-                         (D (operation D A B C 13 12 14))
-                         (C (operation C D A B 14 17 15))
-                         (B (operation B C D A 15 22 16)))
-                    (values A B C D))))
-               (do-round2
-                (lambda (A B C D)
-                  (define (operation a b c d k s i)
-                    (number->u32
-                     (+ b (lsh32 (+ a (G b c d) (X k) (T i)) s))))
-                  (let* ((A (operation A B C D 1 5 17))
-                         (D (operation D A B C 6 9 18))
-                         (C (operation C D A B 11 14 19))
-                         (B (operation B C D A 0 20 20))
-                         (A (operation A B C D 5 5 21))
-                         (D (operation D A B C 10 9 22))
-                         (C (operation C D A B 15 14 23))
-                         (B (operation B C D A 4 20 24))
-                         (A (operation A B C D 9 5 25))
-                         (D (operation D A B C 14 9 26))
-                         (C (operation C D A B 3 14 27))
-                         (B (operation B C D A 8 20 28))
-                         (A (operation A B C D 13 5 29))
-                         (D (operation D A B C 2 9 30))
-                         (C (operation C D A B 7 14 31))
-                         (B (operation B C D A 12 20 32)))
-                    (values A B C D))))
-               (do-round3
-                (lambda (A B C D)
-                  (define (operation a b c d k s i)
-                    (number->u32
-                     (+ b (lsh32 (+ a (H b c d) (X k) (T i)) s))))
-                  (let* ((A (operation A B C D 5 4 33))
-                         (D (operation D A B C 8 11 34))
-                         (C (operation C D A B 11 16 35))
-                         (B (operation B C D A 14 23 36))
-                         (A (operation A B C D 1 4 37))
-                         (D (operation D A B C 4 11 38))
-                         (C (operation C D A B 7 16 39))
-                         (B (operation B C D A 10 23 40))
-                         (A (operation A B C D 13 4 41))
-                         (D (operation D A B C 0 11 42))
-                         (C (operation C D A B 3 16 43))
-                         (B (operation B C D A 6 23 44))
-                         (A (operation A B C D 9 4 45))
-                         (D (operation D A B C 12 11 46))
-                         (C (operation C D A B 15 16 47))
-                         (B (operation B C D A 2 23 48)))
-                    (values A B C D))))
-               (do-round4
-                (lambda (A B C D)
-                  (define (operation a b c d k s i)
-                    (number->u32
-                     (+ b (lsh32 (+ a (I b c d) (X k) (T i)) s))))
-                  (let* ((A (operation A B C D 0 6 49))
-                         (D (operation D A B C 7 10 50))
-                         (C (operation C D A B 14 15 51))
-                         (B (operation B C D A 5 21 52))
-                         (A (operation A B C D 12 6 53))
-                         (D (operation D A B C 3 10 54))
-                         (C (operation C D A B 10 15 55))
-                         (B (operation B C D A 1 21 56))
-                         (A (operation A B C D 8 6 57))
-                         (D (operation D A B C 15 10 58))
-                         (C (operation C D A B 6 15 59))
-                         (B (operation B C D A 13 21 60))
-                         (A (operation A B C D 4 6 61))
-                         (D (operation D A B C 11 10 62))
-                         (C (operation C D A B 2 15 63))
-                         (B (operation B C D A 9 21 64)))
-                    (values A B C D)))))
-          (let*-values (((A B C D) (values AA BB CC DD))
-                        ((A B C D) (do-round1 A B C D))
-                        ((A B C D) (do-round2 A B C D))
-                        ((A B C D) (do-round3 A B C D))
-                        ((A B C D) (do-round4 A B C D)))
-            (hash (number->u32 (+ A AA))
-                  (number->u32 (+ B BB))
-                  (number->u32 (+ C CC))
-                  (number->u32 (+ D DD))
-                  (list-tail to-digest (/ 512 8)))))
-        ;; we’re done:
-        (u8-list->bytevector
-         (append
-          (bytevector->u8-list (u32vector AA))
-          (bytevector->u8-list (u32vector BB))
-          (bytevector->u8-list (u32vector CC))
-          (bytevector->u8-list (u32vector DD)))))))
-
-(define* (make-version-3-uuid namespace-uuid str #:optional (prefix "urn:uuid:"))
-  "Generates a UUID string by computing the MD5 hash of NAMESPACE-UUID
-and STR.  NAMESPACE-UUID must be a bytevector consisting of the UUID’s
-bytes, *not* the UUID’s string representation."
-  (define (half-byte->hex-char number)
-    "Returns the corresponding hexadecimal digit for a number NUMBER
-between 0 and 15."
-    (case number
-      ((0) #\0)
-      ((1) #\1)
-      ((2) #\2)
-      ((3) #\3)
-      ((4) #\4)
-      ((5) #\5)
-      ((6) #\6)
-      ((7) #\7)
-      ((8) #\8)
-      ((9) #\9)
-      ((10) #\a)
-      ((11) #\b)
-      ((12) #\c)
-      ((13) #\d)
-      ((14) #\e)
-      ((15) #\f)))
-  (define (byte->hex-string bv index)
-    "Convert the byte at INDEX of bytevector BV to a hex string."
-    (let ((byte (bytevector-u8-ref bv index)))
-      (string (half-byte->hex-char (quotient byte 16))
-              (half-byte->hex-char (modulo byte 16)))))
-  (let ((md5 (bytevector->md5
-              (u8-list->bytevector
-               (append (bytevector->u8-list namespace-uuid)
-                       (bytevector->u8-list (string->utf8 str)))))))
-    (string-append prefix
-                   ;; time_low field:
-                   (byte->hex-string md5 0)
-                   (byte->hex-string md5 1)
-                   (byte->hex-string md5 2)
-                   (byte->hex-string md5 3)
-                   "-"
-                   ;; time_mid field:
-                   (byte->hex-string md5 4)
-                   (byte->hex-string md5 5)
-                   "-"
-                   ;; time_hi_and_version field:
-                   (let ((byte (bytevector-u8-ref md5 6)))
-                     (string (half-byte->hex-char 3) ; UUID version 3
-                             (half-byte->hex-char (modulo byte 16))))
-                   (byte->hex-string md5 7)
-                   "-"
-                   ;; clock_seq_hi_and_reserved field:
-                   (let ((byte (bytevector-u8-ref md5 8)))
-                     (string (half-byte->hex-char
-                              (logior #b1000 ; most significant bits are 10
-                                      (bit-extract (quotient byte 16) 0 2)))
-                             (half-byte->hex-char (modulo byte 16))))
-                   ;; clock_seq_low field:
-                   (byte->hex-string md5 9)
-                   "-"
-                   ;; node field:
-                   (byte->hex-string md5 10)
-                   (byte->hex-string md5 11)
-                   (byte->hex-string md5 12)
-                   (byte->hex-string md5 13)
-                   (byte->hex-string md5 14)
-                   (byte->hex-string md5 15))))