about summary refs log tree commit diff
path: root/transform
diff options
context:
space:
mode:
Diffstat (limited to 'transform')
-rw-r--r--transform/schema.scm8
-rw-r--r--transform/sql.scm19
-rw-r--r--transform/strings.scm39
-rw-r--r--transform/triples.scm24
4 files changed, 54 insertions, 36 deletions
diff --git a/transform/schema.scm b/transform/schema.scm
index cdfc834..f3896a7 100644
--- a/transform/schema.scm
+++ b/transform/schema.scm
@@ -89,14 +89,14 @@ is a <table> object."
                                      (table-name table)))))
                   (triple table-id 'rdf:type 'gn:sqlTable)
                   (triple table-id 'gn:name (table-name table))
-                  (triple table-id 'gn:hasSize (table-size table))
+                  (triple table-id 'gn:has_size (table-size table))
                   (for-each (lambda (column)
                               (let ((column-id (column-id (table-name table)
                                                           (column-name column))))
-                                (triple column-id 'rdf:type 'gn:sqlTableField)
+                                (triple column-id 'rdf:type 'gn:sql_table_field)
                                 (triple column-id 'gn:name (column-name column))
-                                (triple column-id 'gn:sqlFieldType (column-type column))
-                                (triple table-id 'gn:hasField column-id)))
+                                (triple column-id 'gn:sql_field_type (column-type column))
+                                (triple table-id 'gn:has_field column-id)))
                             (table-columns table))))
               tables)))
 
diff --git a/transform/sql.scm b/transform/sql.scm
index a8962c8..daedf97 100644
--- a/transform/sql.scm
+++ b/transform/sql.scm
@@ -102,13 +102,14 @@
   (dbi-get_row db))
 
 (define (call-with-target-database connection-settings proc)
-  (call-with-database "mysql" (string-join
-                               (list (assq-ref connection-settings 'sql-username)
-                                     (assq-ref connection-settings 'sql-password)
-                                     (assq-ref connection-settings 'sql-database)
-                                     "tcp"
-                                     (assq-ref connection-settings 'sql-host)
-                                     (number->string
-                                      (assq-ref connection-settings 'sql-port)))
-                               ":")
+  (call-with-database "mysql" (string-append (string-join
+                                              (list (assq-ref connection-settings 'sql-username)
+                                                    (assq-ref connection-settings 'sql-password)
+                                                    (assq-ref connection-settings 'sql-database)
+                                                    "tcp"
+                                                    (assq-ref connection-settings 'sql-host)
+                                                    (number->string
+                                                     (assq-ref connection-settings 'sql-port)))
+                                              ":")
+                                             "?charset=utf8")
                       proc))
diff --git a/transform/strings.scm b/transform/strings.scm
index 7545f62..7b62349 100644
--- a/transform/strings.scm
+++ b/transform/strings.scm
@@ -11,19 +11,25 @@
             delete-substrings
             replace-substrings
             remove-duplicates
-            remap-species-identifiers str
             sanitize-rdf-string
             snake->lower-camel
             lower-case-and-replace-spaces
-            string-capitalize-first))
+            string-capitalize-first
+            normalize-string-field
+            fix-email-id
+            blank-p
+            investigator-attributes->id))
+
+(define (blank-p str)
+  (if (string-blank? str) #f str))
 
 (define (lower-case-and-replace-spaces str)
   (string-map
-    (lambda (c)
-      (if (char=? c #\space)
-          #\- ; replace space with hyphen
-          c)) ; convert character to lower case
-    (string-downcase str)))
+   (lambda (c)
+     (if (char=? c #\space)
+         #\-                         ; replace space with hyphen
+         c))                         ; convert character to lower case
+   (string-downcase str)))
 
 (define (time-unix->string seconds . maybe-format)
   "Given an integer saying the number of seconds since the Unix
@@ -121,13 +127,12 @@ association list mapping substrings to their replacements."
       ((memq (car lst) result) (loop (cdr lst) result))
       (else (loop (cdr lst) (cons (car lst) result))))))
 
-
-(define (remap-species-identifiers str)
-  "This procedure remaps identifiers to standard binominal. Obviously this should
-   be sorted by correcting the database!"
-  (match str
-    ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"]
-    ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"]
-    ["Macaca mulatta" "Macaca nemestrina"]
-    ["Bat (Glossophaga soricina)" "Glossophaga soricina"]
-    [str str]))
+(define (normalize-string-field field)
+  (let ((field (string-trim-both field)))
+    (match field
+      ((?  string? field)
+       (if (or (string-blank? field)
+               (string=? (string-downcase field) "none"))
+           ""
+           field))
+      (_ ""))))
diff --git a/transform/triples.scm b/transform/triples.scm
index 9775d36..13758e5 100644
--- a/transform/triples.scm
+++ b/transform/triples.scm
@@ -8,8 +8,19 @@
             triple
             scm->triples
             annotate-field
+            remap-species-identifiers
             string->binomial-name))
 
+(define (remap-species-identifiers str)
+  "This procedure remaps identifiers to standard binominal. Obviously this should
+   be sorted by correcting the database!"
+  (match str
+    ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"]
+    ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"]
+    ["Macaca mulatta" "Macaca nemestrina"]
+    ["Bat (Glossophaga soricina)" "Glossophaga soricina"]
+    [str str]))
+
 (define (annotate-field field schema)
   (let ([schema (cond ((symbol? schema)
                        (symbol->string schema))
@@ -28,7 +39,7 @@
           #:optional #:key
           (ontology "gn:")
           (separator "")
-          (proc string-capitalize-first))
+          (proc (lambda (x) x)))
   "Convert STR to a turtle identifier after replacing illegal
 characters with an underscore and prefixing with gn:PREFIX."
   (if (or (and (string? str) (string-null? str))
@@ -40,11 +51,12 @@ characters with an underscore and prefixing with gn:PREFIX."
                        (lambda (c)
                          (eq? c #\)))
                        (string-map (lambda (c)
-                                     (case c
-                                       ((#\/ #\< #\> #\+ #\( #\space #\@) #\_)
-                                       (else c)))
-                                   (proc
-                                    (string-trim-right str #\.))))))))
+                                     (if (or (char-alphabetic? c)
+                                             (char-numeric? c)
+                                             (char=? c #\_))
+                                         c
+                                         #\_))
+                                   (proc str)))))))
 
 
 (define* (prefix prefix iri #:optional (ttl? #t))