diff options
Diffstat (limited to 'transform')
| -rw-r--r-- | transform/schema.scm | 8 | ||||
| -rw-r--r-- | transform/sql.scm | 19 | ||||
| -rw-r--r-- | transform/strings.scm | 39 | ||||
| -rw-r--r-- | transform/triples.scm | 24 |
4 files changed, 54 insertions, 36 deletions
diff --git a/transform/schema.scm b/transform/schema.scm index cdfc834..f3896a7 100644 --- a/transform/schema.scm +++ b/transform/schema.scm @@ -89,14 +89,14 @@ is a <table> object." (table-name table))))) (triple table-id 'rdf:type 'gn:sqlTable) (triple table-id 'gn:name (table-name table)) - (triple table-id 'gn:hasSize (table-size table)) + (triple table-id 'gn:has_size (table-size table)) (for-each (lambda (column) (let ((column-id (column-id (table-name table) (column-name column)))) - (triple column-id 'rdf:type 'gn:sqlTableField) + (triple column-id 'rdf:type 'gn:sql_table_field) (triple column-id 'gn:name (column-name column)) - (triple column-id 'gn:sqlFieldType (column-type column)) - (triple table-id 'gn:hasField column-id))) + (triple column-id 'gn:sql_field_type (column-type column)) + (triple table-id 'gn:has_field column-id))) (table-columns table)))) tables))) diff --git a/transform/sql.scm b/transform/sql.scm index a8962c8..daedf97 100644 --- a/transform/sql.scm +++ b/transform/sql.scm @@ -102,13 +102,14 @@ (dbi-get_row db)) (define (call-with-target-database connection-settings proc) - (call-with-database "mysql" (string-join - (list (assq-ref connection-settings 'sql-username) - (assq-ref connection-settings 'sql-password) - (assq-ref connection-settings 'sql-database) - "tcp" - (assq-ref connection-settings 'sql-host) - (number->string - (assq-ref connection-settings 'sql-port))) - ":") + (call-with-database "mysql" (string-append (string-join + (list (assq-ref connection-settings 'sql-username) + (assq-ref connection-settings 'sql-password) + (assq-ref connection-settings 'sql-database) + "tcp" + (assq-ref connection-settings 'sql-host) + (number->string + (assq-ref connection-settings 'sql-port))) + ":") + "?charset=utf8") proc)) diff --git a/transform/strings.scm b/transform/strings.scm index 7545f62..7b62349 100644 --- a/transform/strings.scm +++ b/transform/strings.scm @@ -11,19 +11,25 @@ delete-substrings replace-substrings remove-duplicates - remap-species-identifiers str sanitize-rdf-string snake->lower-camel lower-case-and-replace-spaces - string-capitalize-first)) + string-capitalize-first + normalize-string-field + fix-email-id + blank-p + investigator-attributes->id)) + +(define (blank-p str) + (if (string-blank? str) #f str)) (define (lower-case-and-replace-spaces str) (string-map - (lambda (c) - (if (char=? c #\space) - #\- ; replace space with hyphen - c)) ; convert character to lower case - (string-downcase str))) + (lambda (c) + (if (char=? c #\space) + #\- ; replace space with hyphen + c)) ; convert character to lower case + (string-downcase str))) (define (time-unix->string seconds . maybe-format) "Given an integer saying the number of seconds since the Unix @@ -121,13 +127,12 @@ association list mapping substrings to their replacements." ((memq (car lst) result) (loop (cdr lst) result)) (else (loop (cdr lst) (cons (car lst) result)))))) - -(define (remap-species-identifiers str) - "This procedure remaps identifiers to standard binominal. Obviously this should - be sorted by correcting the database!" - (match str - ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"] - ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"] - ["Macaca mulatta" "Macaca nemestrina"] - ["Bat (Glossophaga soricina)" "Glossophaga soricina"] - [str str])) +(define (normalize-string-field field) + (let ((field (string-trim-both field))) + (match field + ((? string? field) + (if (or (string-blank? field) + (string=? (string-downcase field) "none")) + "" + field)) + (_ "")))) diff --git a/transform/triples.scm b/transform/triples.scm index 9775d36..13758e5 100644 --- a/transform/triples.scm +++ b/transform/triples.scm @@ -8,8 +8,19 @@ triple scm->triples annotate-field + remap-species-identifiers string->binomial-name)) +(define (remap-species-identifiers str) + "This procedure remaps identifiers to standard binominal. Obviously this should + be sorted by correcting the database!" + (match str + ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"] + ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"] + ["Macaca mulatta" "Macaca nemestrina"] + ["Bat (Glossophaga soricina)" "Glossophaga soricina"] + [str str])) + (define (annotate-field field schema) (let ([schema (cond ((symbol? schema) (symbol->string schema)) @@ -28,7 +39,7 @@ #:optional #:key (ontology "gn:") (separator "") - (proc string-capitalize-first)) + (proc (lambda (x) x))) "Convert STR to a turtle identifier after replacing illegal characters with an underscore and prefixing with gn:PREFIX." (if (or (and (string? str) (string-null? str)) @@ -40,11 +51,12 @@ characters with an underscore and prefixing with gn:PREFIX." (lambda (c) (eq? c #\))) (string-map (lambda (c) - (case c - ((#\/ #\< #\> #\+ #\( #\space #\@) #\_) - (else c))) - (proc - (string-trim-right str #\.)))))))) + (if (or (char-alphabetic? c) + (char-numeric? c) + (char=? c #\_)) + c + #\_)) + (proc str))))))) (define* (prefix prefix iri #:optional (ttl? #t)) |
