(define-module (transform strings) #:use-module (srfi srfi-1) #:use-module (srfi srfi-19) #:use-module (ice-9 match) #:use-module (ice-9 string-fun) #:use-module (ice-9 textual-ports) #:export (string-blank? time-unix->string string-blank? string-split-substring delete-substrings replace-substrings remove-duplicates sanitize-rdf-string snake->lower-camel lower-case-and-replace-spaces string-capitalize-first normalize-string-field fix-email-id investigator-attributes->id)) ;; One email ID in the Investigators table has spaces in it. This ;; function fixes that. (define (fix-email-id email) (string-delete #\space email)) (define (investigator-attributes->id first-name last-name email) ;; There is just one record corresponding to "Evan Williams" which ;; does not have an email ID. To accommodate that record, we ;; construct the investigator ID from not just the email ID, but ;; also the first and the last names. It would be preferable to just ;; find Evan Williams' email ID and insert it into the database. (string->identifier "investigator" (string-join (list first-name last-name (fix-email-id email)) "_") #:separator "_")) (define (lower-case-and-replace-spaces str) (string-map (lambda (c) (if (char=? c #\space) #\- ; replace space with hyphen c)) ; convert character to lower case (string-downcase str))) (define (time-unix->string seconds . maybe-format) "Given an integer saying the number of seconds since the Unix epoch (1970-01-01 00:00:00), SECONDS, format it as a human readable date and time-string, possible using the MAYBE-FORMAT." (letrec ([time-unix->time-utc (lambda (seconds) (add-duration (date->time-utc (make-date 0 0 0 0 1 1 1970 0)) (make-time time-duration 0 seconds)))]) (apply date->string (time-utc->date (time-unix->time-utc seconds)) maybe-format))) (define (string-blank? str) "Return non-#f if STR consists only of whitespace characters." (string-every char-set:whitespace str)) (define (string-split-substring str substr) "Split the string @var{str} into a list of substrings delimited by the substring @var{substr}." (define substrlen (string-length substr)) (define strlen (string-length str)) (define (loop index start) (cond ((>= start strlen) (list "")) ((not index) (list (substring str start))) (else (cons (substring str start index) (let ((new-start (+ index substrlen))) (loop (string-contains str substr new-start) new-start)))))) (cond ((string-contains str substr) => (lambda (idx) (loop idx 0))) (else (list str)))) (define (delete-substrings str . substrings) "Delete SUBSTRINGS, a list of strings, from STR." (fold (lambda (substring result) (string-replace-substring result substring "")) str substrings)) (define (replace-substrings str replacement-alist) "Replace substrings in STR according to REPLACEMENT-ALIST, an association list mapping substrings to their replacements." (fold (match-lambda* (((substring . replacement) str) (string-replace-substring str substring replacement))) str replacement-alist)) (define (sanitize-rdf-string str) (replace-substrings (string-trim-both str) '(("\r" . "\\r") ("\n" . "\\n") ("\"" . "'") ("\v" . "") ("â\x81„" . "/") ("’" . "’") ("â€" . "‒") ("\x9d" . "") ("\xad" . "") ("\x28" . "") ("\x29" . "") ("\xa0" . " ") ("\x02" . "") ("\x01" . "")))) (define (snake->lower-camel str) (let ((char-list (string->list str))) (call-with-output-string (lambda (port) (put-char port (char-downcase (string-ref str 0))) (map (lambda (char previous-char) (unless (char=? char #\_) (put-char port (if (char=? previous-char #\_) (char-upcase char) char)))) (drop char-list 1) char-list))))) (define (string-capitalize-first string) (string-titlecase (string-downcase string) 0 1)) (define (remove-duplicates lst) (let loop ((lst lst) (result '())) (cond ((null? lst) (reverse result)) ((memq (car lst) result) (loop (cdr lst) result)) (else (loop (cdr lst) (cons (car lst) result)))))) (define (normalize-string-field field) (let ((field (string-trim-both field))) (match field ((? string? field) (if (or (string-blank? field) (string=? (string-downcase field) "none")) "" field)) (_ ""))))