aboutsummaryrefslogtreecommitdiff
path: root/transform/strings.scm
blob: 7545f62fbf294938ff02834f76c4e951b3cffc6e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
(define-module (transform strings)
  #:use-module (srfi srfi-1)
  #:use-module (srfi srfi-19)
  #:use-module (ice-9 match)
  #:use-module (ice-9 string-fun)
  #:use-module (ice-9 textual-ports)
  #:export (string-blank?
            time-unix->string
            string-blank?
            string-split-substring
            delete-substrings
            replace-substrings
            remove-duplicates
            remap-species-identifiers str
            sanitize-rdf-string
            snake->lower-camel
            lower-case-and-replace-spaces
            string-capitalize-first))

(define (lower-case-and-replace-spaces str)
  (string-map
    (lambda (c)
      (if (char=? c #\space)
          #\- ; replace space with hyphen
          c)) ; convert character to lower case
    (string-downcase str)))

(define (time-unix->string seconds . maybe-format)
  "Given an integer saying the number of seconds since the Unix
epoch (1970-01-01 00:00:00), SECONDS, format it as a human readable
date and time-string, possible using the MAYBE-FORMAT."
  (letrec ([time-unix->time-utc
            (lambda (seconds)
              (add-duration
               (date->time-utc (make-date 0 0 0 0 1 1 1970 0))
               (make-time time-duration 0 seconds)))])
    (apply date->string
           (time-utc->date (time-unix->time-utc seconds))
           maybe-format)))

(define (string-blank? str)
  "Return non-#f if STR consists only of whitespace characters."
  (string-every char-set:whitespace str))

(define (string-split-substring str substr)
  "Split the string @var{str} into a list of substrings delimited by the
substring @var{substr}."

  (define substrlen (string-length substr))
  (define strlen (string-length str))

  (define (loop index start)
    (cond
     ((>= start strlen) (list ""))
     ((not index) (list (substring str start)))
     (else
      (cons (substring str start index)
            (let ((new-start (+ index substrlen)))
              (loop (string-contains str substr new-start)
                    new-start))))))

  (cond
   ((string-contains str substr) => (lambda (idx) (loop idx 0)))
   (else (list str))))

(define (delete-substrings str . substrings)
  "Delete SUBSTRINGS, a list of strings, from STR."
  (fold (lambda (substring result)
          (string-replace-substring result substring ""))
        str
        substrings))

(define (replace-substrings str replacement-alist)
  "Replace substrings in STR according to REPLACEMENT-ALIST, an
association list mapping substrings to their replacements."
  (fold (match-lambda*
          (((substring . replacement) str)
           (string-replace-substring str substring replacement)))
        str
        replacement-alist))

(define (sanitize-rdf-string str)
  (replace-substrings
   (string-trim-both str)
   '(("\r" . "\\r")
     ("\n" . "\\n")
     ("\"" . "'")
     ("\v" . "")
     (\x81„" . "/")
     ("’" . "’")
     ("â€" . "‒")
     ("\x9d" . "")
     ("\xad" . "")
     ("\x28" . "")
     ("\x29" . "")
     ("\xa0" . " ")
     ("\x02" . "")
     ("\x01" . ""))))

(define (snake->lower-camel str)
  (let ((char-list (string->list str)))
    (call-with-output-string
      (lambda (port)
        (put-char port (char-downcase (string-ref str 0)))
        (map (lambda (char previous-char)
               (unless (char=? char #\_)
                 (put-char port (if (char=? previous-char #\_)
                                    (char-upcase char)
                                    char))))
             (drop char-list 1)
             char-list)))))

(define (string-capitalize-first string)
  (string-titlecase
   (string-downcase string) 0 1))

(define (remove-duplicates lst)
  (let loop ((lst lst) (result '()))
    (cond
      ((null? lst) (reverse result))
      ((memq (car lst) result) (loop (cdr lst) result))
      (else (loop (cdr lst) (cons (car lst) result))))))


(define (remap-species-identifiers str)
  "This procedure remaps identifiers to standard binominal. Obviously this should
   be sorted by correcting the database!"
  (match str
    ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"]
    ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"]
    ["Macaca mulatta" "Macaca nemestrina"]
    ["Bat (Glossophaga soricina)" "Glossophaga soricina"]
    [str str]))