blob: 2f95ca3ed6cedba427466228193b61c48982e1d2 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
|
#! /usr/bin/env guile
!#
(use-modules (srfi srfi-1)
(srfi srfi-26)
(ice-9 getopt-long)
(ice-9 match)
(ice-9 regex)
(transform strings)
(transform sql)
(transform triples)
(transform special-forms))
(define-transformer gn:molecular-traits->gn:datasets
(tables (Tissue))
(schema-triples
(gnc:molecular_trait a owl:Class)
(gnc:molecular_trait a skos:Concept)
(gnc:molecular_trait rdfs:subClassOf obo:UBERON_0000479)
(gnc:molecular_trait rdfs:label "Molecular Trait. This describes a melecular trait of a given species. We combine the species name and the tissue name in order to differentiate the traits across different inbredset groups."))
(triples (string->identifier "trait" (field Tissue Short_Name) #:separator "_")
(set rdf:type 'gnc:molecular_trait)
(set skos:prefLabel (field Tissue Name))
(set skos:altLabel (field Tissue Short_Name))))
(define-transformer gnc:molecular_trait->gn:molecular_trait
(tables (Tissue))
(triples "gnc:molecular_trait"
(set skos:member (string->identifier "trait" (field Tissue Short_Name) #:separator "_"))))
(define-transformer gn:set->gn:dataset
(tables (Species
(inner-join InbredSet "ON InbredSet.SpeciesId = Species.Id")
(inner-join ProbeFreeze "ON ProbeFreeze.InbredSetId = InbredSet.Id")
(inner-join ProbeSetFreeze "ON ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id")
(inner-join Tissue "ON ProbeFreeze.TissueId = Tissue.Id"))
"WHERE ProbeSetFreeze.public > 0 AND Species.Name != 'monkey' GROUP BY Species.Name, Tissue.Short_Name")
(triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")
(multiset gnt:has_probeset_data
(map (cut string->identifier "dataset" <> #:separator "_")
(string-split
(field ("GROUP_CONCAT(ProbeSetFreeze.Name SEPARATOR ',')"
dataset_name))
#\,)))))
(define-transformer gn:dataset->set/species/molecular_trait
(tables (Species
(inner-join InbredSet "ON InbredSet.SpeciesId = Species.Id")
(inner-join ProbeFreeze "ON ProbeFreeze.InbredSetId = InbredSet.Id")
(inner-join ProbeSetFreeze "ON ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id")
(inner-join Tissue "ON ProbeFreeze.TissueId = Tissue.Id"))
"WHERE ProbeSetFreeze.public > 0 AND Species.Name != 'monkey'")
(schema-triples
(gnt:has_molecular_trait rdf:type owl:ObjectProperty)
(gnt:has_molecular_trait rdfs:domain gnc:set)
(gnt:has_molecular_trait rdfs:range gnc:molecular_trait)
(gnt:has_molecular_trait rdfs:label "has molecular trait"))
(triples (string->identifier "dataset" (field ProbeSetFreeze Name) #:separator "_")
(set gnt:has_strain
(string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_"))
(set gnt:has_species
(string->identifier "" (remap-species-identifiers (field Species Fullname))))
(set gnt:has_molecular_trait
(string->identifier "trait" (field Tissue Short_Name) #:separator "_"))))
(define-transformer gn:dataset->metadata
(tables (ProbeSetFreeze
(inner-join ProbeFreeze "ON ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id")
(inner-join InbredSet "ON InbredSet.Id = ProbeFreeze.InbredSetId")
(inner-join Species "ON InbredSet.SpeciesId = Species.Id")
(inner-join Tissue "ON ProbeFreeze.TissueId = Tissue.Id")
(inner-join AvgMethod "ON AvgMethod.AvgMethodId = ProbeSetFreeze.AvgID")
(inner-join InfoFiles "ON InfoFiles.InfoPageName = ProbeSetFreeze.Name")
(left-join GeneChip "ON GeneChip.Id = InfoFiles.GeneChipId"))
"WHERE ProbeSetFreeze.public > 0 AND Species.Name != 'monkey'")
(schema-triples
(gnt:has_case_info a owl:ObjectProperty)
(gnt:has_case_info rdfs:comment "Information about the cases used in this platform")
(gnt:has_case_info rdfs:domain dcat:Dataset)
(gnt:has_case_info rdfs:label "About Case")
(gnt:has_citation a owl:ObjectProperty)
(gnt:has_citation rdfs:comment "Citation for this dataset")
(gnt:has_citation rdfs:domain dcat:Dataset)
(gnt:has_citation rdfs:label "Citation")
(gnt:has_contributors a owl:ObjectProperty)
(gnt:has_contributors rdfs:comment "Contributors of this resource")
(gnt:has_contributors rdfs:comment "Contributors of this resource")
(gnt:has_contributors rdfs:domain dcat:Dataset)
(gnt:has_contributors rdfs:label "Contributors")
(gnt:has_data_processing_info a owl:ObjectProperty)
(gnt:has_data_processing_info rdfs:comment "Information about how this dataset was processed")
(gnt:has_data_processing_info rdfs:domain dcat:Dataset)
(gnt:has_data_processing_info rdfs:label "About Data Processing")
(gnt:has_experiment_design a owl:ObjectProperty)
(gnt:has_experiment_design rdfs:comment "Experiment Design for this resource")
(gnt:has_experiment_design rdfs:domain dcat:Dataset)
(gnt:has_experiment_design rdfs:label "Experiment Design")
(gnt:has_experiment_design_info a owl:ObjectProperty)
(gnt:has_experiment_design_info rdfs:comment "Information about how the experiment was designed")
(gnt:has_experiment_design_info rdfs:domain dcat:Dataset)
(gnt:has_experiment_design_info rdfs:label "Experiment Design")
(gnt:has_experiment_type a owl:ObjectProperty)
(gnt:has_experiment_type rdfs:comment "Information about the experiment type")
(gnt:has_experiment_type rdfs:comment "Information about the experiment type")
(gnt:has_experiment_type rdfs:domain dcat:Dataset)
(gnt:has_experiment_type rdfs:label "Experiment Type Metadata")
(gnt:has_platform_info a owl:ObjectProperty)
(gnt:has_platform_info rdfs:comment "Information about the platform that was used with this dataset")
(gnt:has_platform_info rdfs:domain dcat:Dataset)
(gnt:has_platform_info rdfs:label "About Platform")
(gnt:has_samples a owl:ObjectProperty)
(gnt:has_samples rdfs:domain dcat:Dataset)
(gnt:has_samples rdfs:label "Samples")
(gnt:has_specifics a owl:ObjectProperty)
(gnt:has_specifics rdfs:comment "Has specifics")
(gnt:has_specifics rdfs:domain dcat:Dataset)
(gnt:has_specifics rdfs:label "Specifics")
(gnt:has_summary a owl:ObjectProperty)
(gnt:has_summary rdfs:comment "Summary information about dataset")
(gnt:has_summary rdfs:domain dcat:Dataset)
(gnt:has_summary rdfs:label "Summary")
(gnt:has_tissue_info a owl:ObjectProperty)
(gnt:has_tissue_info rdfs:domain dcat:Dataset)
(gnt:has_tissue_info rdfs:label "Metadata about Tissue for this resource")
(gnt:uses_genechip a owl:ObjectProperty)
(gnt:uses_genechip rdfs:domain dcat:Dataset)
(gnt:uses_genechip skos:definition "The Platform this resource uses")
(gnt:uses_normalization_method rdfs:comment "The method used to map genetic or experimental data for this resource.")
(gnt:uses_normalization_method rdfs:domain dcat:Dataset)
(gnt:uses_normalization_method rdfs:label "Averaging method")
(gnt:uses_normalization_method rdfs:range gnc:avg_method))
(gnt:uses_normalization_method a owl:ObjectProperty)
(triples (string->identifier "dataset" (field ProbeSetFreeze Name) #:separator "_")
(set rdf:type 'dcat:Dataset)
(set skos:prefLabel (field ProbeSetFreeze Name))
(set dct:title (normalize-string-field (field InfoFiles InfoPageName)))
(set rdfs:label (normalize-string-field (field InfoFiles InfoPageName)))
(set dct:created (annotate-field (field ProbeSetFreeze CreateTime) '^^xsd:datetime))
(set gnt:uses_normalization_method
(string->identifier "avg_method" (field AvgMethod Name AvgMethodName) #:separator "_"))
(set gnt:has_strain
(string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_"))
(set gnt:has_species
(string->identifier "" (remap-species-identifiers (field Species Fullname))))
(set gnt:has_molecular_trait
(string->identifier "trait" (field Tissue Short_Name) #:separator "_"))
(set gnt:uses_genechip
(string->identifier "platform" (field GeneChip Name) #:separator "_"))
(set dct:identifier (format #f "GN~a" (field InfoFiles GN_AccesionId)))
(set gnt:has_experiment_type
(let ((experiment-type
(field InfoFiles Experiment_Type)))
(if (or (null? experiment-type) (string-blank? experiment-type))
"" (sanitize-rdf-string experiment-type))))
(set gnt:has_tissue_info
(let ((tissue-info
(field InfoFiles About_Tissue)))
(if (or (null? tissue-info) (string-blank? tissue-info))
"" (sanitize-rdf-string tissue-info))))
(set gnt:has_summary
(let* ((summary
(field InfoFiles Summary)))
(if (or (null? summary) (string-blank? summary))
"" (sanitize-rdf-string summary))))
(set gnt:has_citation
(let ((citation
(field InfoFiles Citation)))
(if (or (null? citation) (string-blank? citation))
"" (sanitize-rdf-string citation))))
(set gnt:has_samples
(let ((samples
(field InfoFiles samples)))
(if (or (null? samples) (string-blank? samples))
"" (sanitize-rdf-string samples))))
(set gnt:has_specifics
(let* ((specifics
(field InfoFiles Specifics)))
(if (or (null? specifics) (string-blank? specifics))
"" (sanitize-rdf-string specifics))))
(set gnt:has_case_info
(let ((cases
(field InfoFiles About_Cases)))
(if (or (null? cases) (string-blank? cases))
"" (sanitize-rdf-string cases))))
(set gnt:has_platform_info
(let* ((platform
(field InfoFiles About_Array_Platform)))
(if (or (null? platform) (string-blank? platform))
"" (sanitize-rdf-string platform))))
(set gnt:has_data_processing_info
(let* ((processing
(field InfoFiles About_Data_Values_Processing)))
(if (or (null? processing) (string-blank? processing))
"" (sanitize-rdf-string processing))))
(set gnt:has_experiment_type
(let ((experiment-type
(field InfoFiles Experiment_Type)))
(if (or (null? experiment-type) (string-blank? experiment-type))
"" (sanitize-rdf-string experiment-type))))
(set gnt:has_experiment_design
(let ((experiment-design
(field InfoFiles Overall_Design)))
(if (or (null? experiment-design) (string-blank? experiment-design))
"" (sanitize-rdf-string experiment-design))))
(set gnt:has_contributors
(let ((contributors
(field InfoFiles Contributor)))
(if (or (null? contributors) (string-blank? contributors))
"" (sanitize-rdf-string contributors))))))
(let* ((option-spec
'((settings (single-char #\s) (value #t))
(output (single-char #\o) (value #t))
(documentation (single-char #\d) (value #t))))
(options (getopt-long (command-line) option-spec))
(settings (option-ref options 'settings #f))
(output (option-ref options 'output #f))
(documentation (option-ref options 'documentation #f))
(%connection-settings
(call-with-input-file settings
read)))
(with-documentation
(name "Molecular Traits")
(connection %connection-settings)
(table-metadata? #f)
(prefixes
'(("dcat:" "<http://www.w3.org/ns/dcat#>")
("gn:" "<http://rdf.genenetwork.org/v1/id/>")
("obo:" "<http://purl.obolibrary.org/obo/>")
("owl:" "<http://www.w3.org/2002/07/owl#>")
("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
("dct:" "<http://purl.org/dc/terms/>")
("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>")
("gnt:" "<http://rdf.genenetwork.org/v1/term/>")
("skos:" "<http://www.w3.org/2004/02/skos/core#>")
("gnc:" "<http://rdf.genenetwork.org/v1/category/>")
("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")))
(inputs
(list
gn:dataset->metadata
gn:dataset->set/species/molecular_trait
gn:molecular-traits->gn:datasets
gn:set->gn:dataset
gnc:molecular_trait->gn:molecular_trait))
(outputs
`(#:documentation ,documentation
#:rdf ,output))))
|