about summary refs log tree commit diff
path: root/transform/strings.scm
diff options
context:
space:
mode:
Diffstat (limited to 'transform/strings.scm')
-rw-r--r--transform/strings.scm61
1 files changed, 44 insertions, 17 deletions
diff --git a/transform/strings.scm b/transform/strings.scm
index 7545f62..c0f02e5 100644
--- a/transform/strings.scm
+++ b/transform/strings.scm
@@ -1,7 +1,13 @@
 (define-module (transform strings)
   #:use-module (srfi srfi-1)
   #:use-module (srfi srfi-19)
+  #:use-module (rnrs bytevectors)
+  #:use-module (uuid generate)
+  #:use-module (uuid utils)
+  #:use-module (uuid well-known)
+  #:use-module (ice-9 iconv)
   #:use-module (ice-9 match)
+  #:use-module (ice-9 rdelim)
   #:use-module (ice-9 string-fun)
   #:use-module (ice-9 textual-ports)
   #:export (string-blank?
@@ -11,19 +17,41 @@
             delete-substrings
             replace-substrings
             remove-duplicates
-            remap-species-identifiers str
             sanitize-rdf-string
             snake->lower-camel
             lower-case-and-replace-spaces
-            string-capitalize-first))
+            string-capitalize-first
+            normalize-string-field
+            fix-email-id
+            blank-p
+            investigator-attributes->id
+            path-without-extension
+            gn-uuid))
+
+(define (gn-uuid string)
+  (generate-string-uuid
+   'uuidv5
+   (string->bytevector string "UTF-8")))
+
+(define (blank-p str)
+  (if (string-blank? str) #f str))
+
+(define (path-without-extension path)
+  (let* ((dir (dirname path))                ; directory part
+	 (base (basename path))              ; filename part
+	 (dot-pos (string-rindex base #\.))) ; last dot position
+    (string-append dir "/"		     ; reconstruct path
+		   (if dot-pos
+		       (substring base 0 dot-pos) ; strip extension
+		       base))))
 
 (define (lower-case-and-replace-spaces str)
   (string-map
-    (lambda (c)
-      (if (char=? c #\space)
-          #\- ; replace space with hyphen
-          c)) ; convert character to lower case
-    (string-downcase str)))
+   (lambda (c)
+     (if (char=? c #\space)
+         #\-                         ; replace space with hyphen
+         c))                         ; convert character to lower case
+   (string-downcase str)))
 
 (define (time-unix->string seconds . maybe-format)
   "Given an integer saying the number of seconds since the Unix
@@ -121,13 +149,12 @@ association list mapping substrings to their replacements."
       ((memq (car lst) result) (loop (cdr lst) result))
       (else (loop (cdr lst) (cons (car lst) result))))))
 
-
-(define (remap-species-identifiers str)
-  "This procedure remaps identifiers to standard binominal. Obviously this should
-   be sorted by correcting the database!"
-  (match str
-    ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"]
-    ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"]
-    ["Macaca mulatta" "Macaca nemestrina"]
-    ["Bat (Glossophaga soricina)" "Glossophaga soricina"]
-    [str str]))
+(define (normalize-string-field field)
+  (let ((field (string-trim-both field)))
+    (match field
+      ((?  string? field)
+       (if (or (string-blank? field)
+               (string=? (string-downcase field) "none"))
+           ""
+           field))
+      (_ ""))))