about summary refs log tree commit diff
path: root/transform/triples.scm
blob: 55edf8a69351fe4cbb6e0d50ee0dfc235f0ed07b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
(define-module (transform triples)
  #:use-module (ice-9 regex)
  #:use-module (ice-9 match)
  #:use-module (transform strings)
  #:export (ontology
            string->identifier
            prefix
            triple
            scm->triples
            annotate-field
            remap-species-identifiers
            string->binomial-name))

(define (remap-species-identifiers str)
  "This procedure remaps identifiers to standard binominal. Obviously this should
   be sorted by correcting the database!"
  (match str
    ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"]
    ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"]
    ["Macaca mulatta" "Macaca nemestrina"]
    ["Bat (Glossophaga soricina)" "Glossophaga soricina"]
    [str str]))

(define (annotate-field field schema)
  (let ([schema (cond ((symbol? schema)
                       (symbol->string schema))
                      ((string? schema) schema)
                      (else
                       (error "Use a string/symbol")))]
        [string-field (if (number? field) (number->string field) field)])
    (if (or  (and (string? string-field) (string-null? string-field))
             (eq? string-field #f))
        ""
        (string->symbol
         (format #f "~s~a" string-field schema)))))

(define* (string->identifier
	  prefix str
	  #:optional #:key
	  (ontology "gn:")
	  (separator "")
	  (proc string-capitalize-first))
  "Convert STR to a turtle identifier after replacing illegal
characters with an underscore and prefixing with gn:PREFIX."
  (if (or (and (string? str) (string-null? str))
	  (eq? str #f))
      ""
      (string->symbol
       (string-append ontology prefix separator
		      (string-delete
		       (lambda (c)
			 (eq? c #\)))
		       (string-map (lambda (c)
				     (case c
				       ((#\/ #\< #\> #\+ #\( #\space #\@) #\-)
				       (else c)))
				   (proc
				    (string-trim-right
				     (regexp-substitute/global
				      #f "_"
				      str
				      'pre "-" 'post) #\.))))))))


(define* (prefix prefix iri #:optional (ttl? #t))
  (format #t
	  (if ttl?
	      "@prefix ~a ~a .~%"
	      "PREFIX ~a ~a ~%")
	  prefix iri))

(define (ontology prefix value)
  (if (and (string? value) (string-null? value))
      ""
      (string->symbol
       `,(format #f "~a~a" prefix value))))

(define (triple subject predicate object)
  (unless (or (string? subject)
              (symbol? subject))
    (error "Triple subject not a string or symbol:"
           (list subject predicate object)))
  (unless (or (string? predicate)
              (symbol? predicate))
    (error "Triple predicate not a string or symbol:"
           (list subject predicate object)))
  (unless (or (string? object)
              (symbol? object)
              (number? object))
    (error "Triple object not a string, symbol or number:"
           (list subject predicate object)))
  (let ([pattern (match object
                   ((or (?  symbol? object)
                        (? (lambda (el) (string-match "^\\( .* \\)$" el)) object)
                        (? (lambda (el) (string-match "^\\[ .* \\]$" el)) object))
                    "~a ~a ~a .~%")
                   (_ "~a ~a \"~a\" .~%"))])
    (format #t pattern subject predicate
            (if (symbol? object) (symbol->string object) object))))

(define* (scm->triples alist id
                       #:optional
                       (fn triple))
  (for-each (match-lambda
              ((predicate . object)
               (when (cond
                      ((string? object)
                       (not (string-blank? object)))
                      (else object))
                 (fn id predicate object))))
            alist))

(define (string->binomial-name name)
  (let ((binomial?
         (string-match
          "\\\(.+\\)"
          name)))
    (string->identifier
     ""
     (if binomial?
         (regexp-substitute/global
          #f "[^[:space:]A-Za-z0-9:]"
          (match:substring binomial?)
          'pre "" 'post)
         name)
     #:separator ""
     #:proc string-capitalize-first)))