about summary refs log tree commit diff
path: root/transform/triples.scm
diff options
context:
space:
mode:
Diffstat (limited to 'transform/triples.scm')
-rw-r--r--transform/triples.scm24
1 files changed, 18 insertions, 6 deletions
diff --git a/transform/triples.scm b/transform/triples.scm
index 9775d36..13758e5 100644
--- a/transform/triples.scm
+++ b/transform/triples.scm
@@ -8,8 +8,19 @@
             triple
             scm->triples
             annotate-field
+            remap-species-identifiers
             string->binomial-name))
 
+(define (remap-species-identifiers str)
+  "This procedure remaps identifiers to standard binominal. Obviously this should
+   be sorted by correcting the database!"
+  (match str
+    ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"]
+    ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"]
+    ["Macaca mulatta" "Macaca nemestrina"]
+    ["Bat (Glossophaga soricina)" "Glossophaga soricina"]
+    [str str]))
+
 (define (annotate-field field schema)
   (let ([schema (cond ((symbol? schema)
                        (symbol->string schema))
@@ -28,7 +39,7 @@
           #:optional #:key
           (ontology "gn:")
           (separator "")
-          (proc string-capitalize-first))
+          (proc (lambda (x) x)))
   "Convert STR to a turtle identifier after replacing illegal
 characters with an underscore and prefixing with gn:PREFIX."
   (if (or (and (string? str) (string-null? str))
@@ -40,11 +51,12 @@ characters with an underscore and prefixing with gn:PREFIX."
                        (lambda (c)
                          (eq? c #\)))
                        (string-map (lambda (c)
-                                     (case c
-                                       ((#\/ #\< #\> #\+ #\( #\space #\@) #\_)
-                                       (else c)))
-                                   (proc
-                                    (string-trim-right str #\.))))))))
+                                     (if (or (char-alphabetic? c)
+                                             (char-numeric? c)
+                                             (char=? c #\_))
+                                         c
+                                         #\_))
+                                   (proc str)))))))
 
 
 (define* (prefix prefix iri #:optional (ttl? #t))