(define-module (transform strings) #:use-module (srfi srfi-1) #:use-module (srfi srfi-19) #:use-module (ice-9 match) #:use-module (ice-9 string-fun) #:use-module (ice-9 textual-ports) #:export (string-blank? time-unix->string string-blank? string-split-substring delete-substrings replace-substrings remove-duplicates remap-species-identifiers str sanitize-rdf-string snake->lower-camel lower-case-and-replace-spaces string-capitalize-first)) (define (lower-case-and-replace-spaces str) (string-map (lambda (c) (if (char=? c #\space) #\- ; replace space with hyphen c)) ; convert character to lower case (string-downcase str))) (define (time-unix->string seconds . maybe-format) "Given an integer saying the number of seconds since the Unix epoch (1970-01-01 00:00:00), SECONDS, format it as a human readable date and time-string, possible using the MAYBE-FORMAT." (letrec ([time-unix->time-utc (lambda (seconds) (add-duration (date->time-utc (make-date 0 0 0 0 1 1 1970 0)) (make-time time-duration 0 seconds)))]) (apply date->string (time-utc->date (time-unix->time-utc seconds)) maybe-format))) (define (string-blank? str) "Return non-#f if STR consists only of whitespace characters." (string-every char-set:whitespace str)) (define (string-split-substring str substr) "Split the string @var{str} into a list of substrings delimited by the substring @var{substr}." (define substrlen (string-length substr)) (define strlen (string-length str)) (define (loop index start) (cond ((>= start strlen) (list "")) ((not index) (list (substring str start))) (else (cons (substring str start index) (let ((new-start (+ index substrlen))) (loop (string-contains str substr new-start) new-start)))))) (cond ((string-contains str substr) => (lambda (idx) (loop idx 0))) (else (list str)))) (define (delete-substrings str . substrings) "Delete SUBSTRINGS, a list of strings, from STR." (fold (lambda (substring result) (string-replace-substring result substring "")) str substrings)) (define (replace-substrings str replacement-alist) "Replace substrings in STR according to REPLACEMENT-ALIST, an association list mapping substrings to their replacements." (fold (match-lambda* (((substring . replacement) str) (string-replace-substring str substring replacement))) str replacement-alist)) (define (sanitize-rdf-string str) (replace-substrings (string-trim-both str) '(("\r" . "\\r") ("\n" . "\\n") ("\"" . "'") ("\v" . "") ("â\x81„" . "/") ("’" . "’") ("â€" . "‒") ("\x9d" . "") ("\xad" . "") ("\x28" . "") ("\x29" . "") ("\xa0" . " ") ("\x02" . "") ("\x01" . "")))) (define (snake->lower-camel str) (let ((char-list (string->list str))) (call-with-output-string (lambda (port) (put-char port (char-downcase (string-ref str 0))) (map (lambda (char previous-char) (unless (char=? char #\_) (put-char port (if (char=? previous-char #\_) (char-upcase char) char)))) (drop char-list 1) char-list))))) (define (string-capitalize-first string) (string-titlecase (string-downcase string) 0 1)) (define (remove-duplicates lst) (let loop ((lst lst) (result '())) (cond ((null? lst) (reverse result)) ((memq (car lst) result) (loop (cdr lst) result)) (else (loop (cdr lst) (cons (car lst) result)))))) (define (remap-species-identifiers str) "This procedure remaps identifiers to standard binominal. Obviously this should be sorted by correcting the database!" (match str ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"] ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"] ["Macaca mulatta" "Macaca nemestrina"] ["Bat (Glossophaga soricina)" "Glossophaga soricina"] [str str]))