about summary refs log tree commit diff
path: root/examples/classification.scm
blob: 4af9a3cdcca1b1c4259933c820dd9cebd8a44f0c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
#! /usr/bin/env guile
!#

(use-modules (srfi srfi-1)
             (srfi srfi-26)
             (ice-9 getopt-long)
             (ice-9 match)
             (ice-9 regex)
             (transform strings)
             (transform sql)
             (transform triples)
             (transform special-forms))



;; Classification Scheme
(define-transformer classification-scheme-species
  (tables (Species))
  (schema-triples
   (gnc:resource_classification_scheme a skos:ConceptScheme)
   (gnc:resource_classification_scheme skos:prefLabel "GeneNetwork Resource Classification Scheme")
   (gnc:resource_classification_scheme skos:definition "A hierarchical classification scheme for organizing GeneNetwork resources by dataset type, resource set (inbredset group), or species.")
   (gnc:resource_classification_scheme xkos:numberOfLevels "4")
   (gnc:resource_classification_scheme xkos:levels gnc:taxonomic_family)
   (gnc:resource_classification_scheme xkos:levels gnc:species)
   (gnc:resource_classification_scheme xkos:levels gnc:set)
   (gnc:resource_classification_scheme xkos:levels gnc:population_category)
   (gnc:population_category a xkos:ClassificationLevel)
   (gnc:population_category skos:inScheme gnc:resource_classification_scheme)
   (gnc:population_category xkos:nextLevel gnc:set)
   (gnc:population_category skos:prefLabel "Species")
   (gnc:population_category rdfs:label "Population Category")
   (gnc:population_category xkos:depth "4")
   (gnt:population_category skos:definition "Classification of genetic populations by breeding design and data aggregation.")
   (gnc:species a xkos:ClassificationLevel)
   (gnc:species skos:inScheme gnc:resource_classification_scheme)
   (gnc:species xkos:previousLevel gnc:taxonomic_family)
   (gnc:species xkos:nextLevel gnc:set)
   (gnc:species skos:prefLabel "Species")
   (gnc:species skos:definition "A classification level that that associates a given resource to a species in GeneNetwork.")
   (gnc:species xkos:depth "2"))
  (triples "gnc:species"
    (set skos:member
         (string->identifier "" (remap-species-identifiers (field Species Fullname))))))

(define-transformer classification-scheme-set
  (tables (InbredSet)
          "WHERE public > 0")
  (schema-triples
   (gnc:set a xkos:ClassificationLevel)
   (gnc:set skos:inScheme gnc:resource_classification_scheme)
   (gnc:set xkos:nextLevel gnc:population_category)
   (gnc:set xkos:previousLevel gnc:species)
   (gnc:set skos:prefLabel "InbredSet Group")
   (gnc:set skos:definition "A category representing groups of genetically related strains or individuals (inbred sets, recombinant inbred lines, etc.).")
   (gnc:set xkos:depth "3"))
  (triples "gnc:set"
    (set skos:member
         (string->identifier
          "set" (field InbredSet Name InbredSetName) #:separator "_"))))

(define-transformer species
  (tables (Species))
  (schema-triples
   (gnt:has_uniprot_taxon_id a owl:ObjectProperty)
   (gnt:has_uniprot_taxon_id rdfs:label "has uniprot taxonomic id")
   (gnt:has_taxonomic_family a owl:ObjectProperty)
   (gnt:has_taxonomic_family rdfs:label "has family")
   (gnt:has_taxonomic_family skos:definition "Links a species to its taxonomic family")
   (gnt:has_taxonomic_family schema:domainIncludes gnc:species)
   (gnt:has_taxonomic_family schema:domainIncludes gnc:set)
   (gnt:short_name a owl:DatatypeProperty)
   (gnt:short_name rdfs:label "has short name")
   (gnt:short_name rdfs:domain gnc:species)
   (gnt:short_name skos:definition "The short name of a given resource")
   (gnt:has_species a owl:ObjectProperty)
   (gnt:has_species rdf:comment "This resource belongs to this species")
   (gnt:has_species rdfs:label "belongs to species")
   (gnt:has_species rdfs:range gnc:species))
  (triples
      (string->identifier "" (remap-species-identifiers (field Species Fullname)))
    (set rdf:type 'gnc:species)
    (set rdfs:label (remap-species-identifiers (field Species Fullname)))
    (set skos:prefLabel (field Species MenuName))
    (set skos:altLabel (field Species SpeciesName))
    (set gnt:short_name (field Species Name))
    (set gnt:has_taxonomic_family (string->identifier "family" (field Species Family) #:separator "_"))
    (set gnt:has_uniprot_taxon_id (ontology
                                   'taxon:
                                   (field Species TaxonomyId)))))

(define-transformer species-fan-out
  (tables (InbredSet
           (left-join Species "ON InbredSet.SpeciesId=Species.Id"))
          "WHERE public > 0")
  (schema-triples
   (gnt:has_strain a owl:ObjectProperty)
   (gnt:has_strain rdfs:range gnc:set)
   (gnt:has_strain rdfs:domain gnc:species)
   (gnt:has_strain rdfs:label "this resource belongs to this strain.")
   (gnt:has_strain skos:definition "Lists all strains that belong to this resource."))
  (triples (string->identifier "" (remap-species-identifiers (field Species Fullname)))
    (set gnt:has_strain
         (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_"))))


(define-transformer species-families-list
  (tables (Species)
          "GROUP BY FAMILY")
  (schema-triples
   (gnc:taxonomic_family a xkos:ClassificationLevel)
   (gnc:taxonomic_family skos:inScheme gnc:resource_classification_scheme)
   (gnc:taxonomic_family skos:prefLabel "Family")
   (gnc:taxonomic_family skos:definition "An organizational classification level used in GeneNetwork to group resources into families.")
   (gnc:taxonomic_family xkos:depth "1")
   (gnc:taxonomic_family xkos:nextLevel gnc:species)
   (gnt:has_family_order_id a owl:DatatypeProperty)
   (gnt:has_family_order_id rdfs:range xsd:integer))
  (triples (string->identifier "family" (field Species Family) #:separator "_")
    (set gnt:has_species
         (string->identifier "" (remap-species-identifiers (field Species Fullname))))
    (set rdfs:label (field Species Family))
    (set gnt:has_family_order_id
         (annotate-field (field Species OrderId)
                         '^^xsd:integer))))

(define-transformer species-families-fanout
  (tables (Species))
  (schema-triples
   (gnt:has_family_order_id a owl:DatatypeProperty))
  (triples (string->identifier "family" (field Species Family) #:separator "_")
    (set gnt:has_species
         (string->identifier "" (remap-species-identifiers (field Species Fullname))))))


(define-transformer inbred-set
  (tables (InbredSet
           (left-join Species "ON InbredSet.SpeciesId=Species.Id")
           (left-join MappingMethod
                      "ON InbredSet.MappingMethodId=MappingMethod.Id"))
          "WHERE public > 0")
  (schema-triples
   (gnt:genetic_type a owl:DatatypeProperty)
   (gnt:genetic_type rdfs:label "has genetic type")
   (gnt:genetic_type skos:definition "Describes the genetic architecture of a resource set (e.g., intercross, riset).")
   (gnt:genetic_type rdfs:domain gnc:set)
   (gnt:genetic_type rdfs:range xsd:string)
   (gnt:has_set_code a owl:DatatypeProperty)
   (gnt:has_set_code rdfs:label "has set code")
   (gnt:has_set_code skos:definition "Provides a unique identifier code for a resource set.")
   (gnt:has_set_code rdfs:domain gnc:set)
   (gnt:has_set_code rdfs:range xsd:string)
   (gnt:uses_mapping_method a owl:ObjectProperty)
   (gnt:uses_mapping_method rdfs:label "mapping method")
   (gnt:uses_mapping_method rdfs:domain gnc:set)
   (gnt:uses_mapping_method rdfs:range gnc:mapping_method)
   (gnt:uses_mapping_method rdfs:comment "The method used to map genetic or experimental data for this resource.")
   (gnt:has_strain a owl:ObjectProperty)
   (gnt:has_strain rdf:comment "Indicates the group the resources belongs to")
   (gnt:has_strain schema:domainIncludes dcat:Dataset)
   (gnt:has_strain schema:domainIncludes gnc:species)
   (gnt:has_strain rdfs:range gnc:set)
   (gnt:has_strain rdfs:label "belongs-to-group"))
  (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")
    (set rdf:type 'gnc:set)
    (set rdfs:label (field InbredSet FullName))
    (set skos:prefLabel (field InbredSet Name InbredSetName))
    (set gnt:genetic_type (field InbredSet GeneticType))
    (set dct:description (annotate-field (field InbredSet description)
                                         '^^rdf:HTML))
    (set gnt:uses_mapping_method
         (string->identifier "mapping_method" (field MappingMethod Name) #:separator "_"))
    (set gnt:has_set_code (field InbredSet InbredSetCode))
    (set gnt:has_species
         (string->identifier "" (remap-species-identifiers (field Species Fullname))))))

(define-transformer inbredset-population*
  (tables (InbredSet)
          "WHERE Family IS NOT NULL")
  (schema-triples
   (gnt:has_population_category rdfs:domain gnc:set)
   (gnt:has_population_category a owl:ObjectProperty)
   (gnt:has_population_category rdfs:comment "This group belongs to this population category.")
   (gnt:has_population_category rdfs:label "belongs to population category."))
  (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")
    (set gnt:has_population_category
         (string->identifier "population" (field InbredSet Family) #:separator "_"))))

(define-transformer inbredset-population-list
  (tables (InbredSet)
          "WHERE Family IS NOT NULL GROUP BY Family")
  (schema-triples
   (gnc:reference_population a skos:Concept)
   (gnc:reference_population a gnc:PopulationCategory)
   (gnc:reference_population skos:inScheme gnc:population_category_scheme)
   (gnc:reference_population skos:prefLabel "Reference population")
   (gnc:reference_population skos:definition "A genetic population"))
  (triples (string->identifier "population" (field InbredSet Family) #:separator "_")
    (set rdf:type 'gnc:reference_population)
    (set rdfs:label (field InbredSet Family))
    (set skos:member 'gnc:population_category)
    (set gnt:has_population_order_id
         (annotate-field (field InbredSet FamilyOrder)
                         '^^xsd:integer))))

(define-transformer inbredset-population-fanout
  (tables (InbredSet)
          "WHERE Family IS NOT NULL")
  (triples (string->identifier "population" (field InbredSet Family) #:separator "_")
    (set gnt:has_strain
         (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_"))))

(define-transformer population-category-inbredset
  (tables (InbredSet)
          "WHERE public > 0 GROUP BY Family")
  (triples "gnc:population_category"
    (set gnt:has_population_category
         (string->identifier "population" (field InbredSet Family) #:separator "_"))))

(define-transformer family-category-species
  (tables (Species)
          "GROUP BY Family")
  (schema-triples
   (gnt:assigned_species rdfs:domain gnc:set)
   (gnt:assigned_species a owl:ObjectProperty)
   (gnt:assigned_species rdfs:label "These families have been assigned to these species"))
  (triples "gnc:taxonomic_family"
    (set gnt:has_taxonomic_family
         (string->identifier "family" (field Species Family) #:separator "_"))))



(let* ((option-spec
        '((settings (single-char #\s) (value #t))
          (output (single-char #\o) (value #t))
          (documentation (single-char #\d) (value #t))))
       (options (getopt-long (command-line) option-spec))
       (settings (option-ref options 'settings #f))
       (output (option-ref options 'output #f))
       (documentation (option-ref options 'documentation #f))
       (%connection-settings
        (call-with-input-file settings
          read)))

  (with-documentation
   (name "Species Metadata")
   (connection %connection-settings)
   (table-metadata? #f)
   (prefixes
    '(("dcat:" "<http://www.w3.org/ns/dcat#>")
      ("dct:" "<http://purl.org/dc/terms/>")
      ("gn:" "<http://rdf.genenetwork.org/v1/id/>")
      ("gnc:" "<http://rdf.genenetwork.org/v1/category/>")
      ("owl:" "<http://www.w3.org/2002/07/owl#>")
      ("gnt:" "<http://rdf.genenetwork.org/v1/term/>")
      ("schema:" "<https://schema.org/>")
      ("skos:" "<http://www.w3.org/2004/02/skos/core#>")
      ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>")
      ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
      ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
      ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
      ("taxon:" "<http://purl.uniprot.org/taxonomy/>")))
   (inputs
    (list classification-scheme-species
          classification-scheme-set
          species
          species-fan-out
          inbred-set
          inbredset-population*
          species-families-list
          species-families-fanout
          inbredset-population-list
          inbredset-population-fanout
          population-category-inbredset
          family-category-species))
   (outputs
    `(#:documentation ,documentation
      #:rdf ,output))))