about summary refs log tree commit diff
path: root/examples/datasets.scm
blob: 8abb84f33985cf0fd0dcc8e94d8f06ed732fac00 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#! /usr/bin/env guile
!#

(use-modules (rnrs programs)
             (rnrs io ports)
             (srfi srfi-1)
             (srfi srfi-26)
             (ice-9 getopt-long)
             (ice-9 match)
             (ice-9 regex)
             (transform strings)
             (transform sql)
             (transform triples)
             (transform special-forms))


(define-transformer gn:dataset->metadata
  (tables (Datasets
           (inner-join InfoFiles "ON InfoFiles.DatasetId = Datasets.DatasetId")
           (inner-join InbredSet "ON InbredSet.Id = InfoFiles.InbredSetId"))
          ;; Skip monkey datasets
          "WHERE InfoFiles.InfoPageName NOT LIKE 'INIA_MacFas_%'"
          "GROUP BY Datasets.DatasetId")
  (schema-triples
    (gnt:has_case_info a owl:ObjectProperty)
    (gnt:has_case_info rdfs:comment "Information about the cases used in this platform")
    (gnt:has_case_info rdfs:domain dcat:Dataset)
    (gnt:has_case_info rdfs:label "About Case")
    (gnt:has_citation a owl:ObjectProperty)
    (gnt:has_citation rdfs:comment "Citation for this dataset")
    (gnt:has_citation rdfs:domain dcat:Dataset)
    (gnt:has_citation rdfs:label "Citation")
    (gnt:has_contributors a owl:ObjectProperty)
    (gnt:has_contributors rdfs:comment "Contributors of this resource")
    (gnt:has_contributors rdfs:comment "Contributors of this resource")
    (gnt:has_contributors rdfs:domain dcat:Dataset)
    (gnt:has_contributors rdfs:label "Contributors")
    (gnt:has_data_processing_info a owl:ObjectProperty)
    (gnt:has_data_processing_info rdfs:comment "Information about how this dataset was processed")
    (gnt:has_data_processing_info rdfs:domain dcat:Dataset)
    (gnt:has_data_processing_info rdfs:label "About Data Processing")
    (gnt:has_experiment_design a owl:ObjectProperty)
    (gnt:has_experiment_design rdfs:comment "Experiment Design for this resource")
    (gnt:has_experiment_design rdfs:domain dcat:Dataset)
    (gnt:has_experiment_design rdfs:label "Experiment Design")
    (gnt:has_experiment_design_info a owl:ObjectProperty)
    (gnt:has_experiment_design_info rdfs:comment "Information about how the experiment was designed")
    (gnt:has_experiment_design_info rdfs:domain dcat:Dataset)
    (gnt:has_experiment_design_info rdfs:label "Experiment Design")
    (gnt:has_experiment_type a owl:ObjectProperty)
    (gnt:has_experiment_type rdfs:comment "Information about the experiment type")
    (gnt:has_experiment_type rdfs:domain dcat:Dataset)
    (gnt:has_experiment_type rdfs:label "Experiment Type Metadata")
    (gnt:has_platform_info a owl:ObjectProperty)
    (gnt:has_platform_info rdfs:comment "Information about the platform that was used with this dataset")
    (gnt:has_platform_info rdfs:domain dcat:Dataset)
    (gnt:has_platform_info rdfs:label "About Platform")
    (gnt:has_samples a owl:ObjectProperty)
    (gnt:has_samples rdfs:domain dcat:Dataset)
    (gnt:has_samples rdfs:label "Samples")
    (gnt:has_specifics a owl:ObjectProperty)
    (gnt:has_specifics rdfs:comment "Has specifics")
    (gnt:has_specifics rdfs:domain dcat:Dataset)
    (gnt:has_specifics rdfs:label "Specifics")
    (gnt:has_summary a owl:ObjectProperty)
    (gnt:has_summary rdfs:comment "Summary information about dataset")
    (gnt:has_summary rdfs:domain dcat:Dataset)
    (gnt:has_summary rdfs:label "Summary")
    (gnt:has_tissue_info a owl:ObjectProperty)
    (gnt:has_tissue_info rdfs:domain dcat:Dataset)
    (gnt:has_tissue_info rdfs:label "Metadata about Tissue for this resource"))
  (triples (string->identifier "dataset" (field InfoFiles InfoPageName) #:separator "_")
    (set rdf:type 'dcat:Dataset)
    (set dct:title (normalize-string-field (field InfoFiles InfoPageName)))
    (set dct:identifier (format #f "GN~a" (field InfoFiles GN_AccesionId)))
    (set gnt:has_strain
         (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_"))
    (set gnt:has_experiment_type
         (let ((experiment-type
                (field InfoFiles Experiment_Type)))
           (if (or (null? experiment-type) (string-blank? experiment-type))
               "" (sanitize-rdf-string experiment-type))))
    (set gnt:has_tissue_info
         (let ((tissue-info
                (field Datasets AboutTissue)))
           (if (or (null? tissue-info) (string-blank? tissue-info))
               "" (sanitize-rdf-string tissue-info))))
    (set gnt:has_summary
         (let* ((summary
                 (field Datasets Summary)))
           (if (or (null? summary) (string-blank? summary))
               "" (sanitize-rdf-string summary))))
    (set gnt:has_citation
         (let ((citation
                (field Datasets Citation)))
           (if (or (null? citation) (string-blank? citation))
               "" (sanitize-rdf-string citation))))
    (set gnt:has_samples
         (let ((samples
                (field InfoFiles samples)))
           (if (or (null? samples) (string-blank? samples))
               "" (sanitize-rdf-string samples))))
    (set gnt:has_specifics
         (let* ((specifics
                 (field InfoFiles Specifics)))
           (if (or (null? specifics) (string-blank? specifics))
               "" (sanitize-rdf-string specifics))))
    (set gnt:has_case_info
         (let ((cases
                (field Datasets AboutCases)))
           (if (or (null? cases) (string-blank? cases))
               "" (sanitize-rdf-string cases))))
    (set gnt:has_platform_info
         (let* ((platform
                 (field Datasets AboutPlatform)))
           (if (or (null? platform) (string-blank? platform))
               "" (sanitize-rdf-string platform))))
    (set gnt:has_data_processing_info
         (let* ((processing
                 (field Datasets AboutDataProcessing)))
           (if (or (null? processing) (string-blank? processing))
               "" (sanitize-rdf-string processing))))
    (set gnt:has_experiment_design
         (let ((experiment-design
                (field Datasets ExperimentDesign)))
           (if (or (null? experiment-design) (string-blank? experiment-design))
               "" (sanitize-rdf-string experiment-design))))
    (set gnt:has_contributors
         (let ((contributors
                (field Datasets Contributors)))
           (if (or (null? contributors) (string-blank? contributors))
               "" (sanitize-rdf-string contributors))))))


(let* ((option-spec
        '((settings (single-char #\s) (value #t))
          (output (single-char #\o) (value #t))
          (documentation (single-char #\d) (value #t))))
       (options (getopt-long (command-line) option-spec))
       (settings (option-ref options 'settings #f))
       (output (option-ref options 'output #f))
       (documentation (option-ref options 'documentation #f))
       (%connection-settings
        (call-with-input-file settings
          read)))
  (with-documentation
   (name "Datasets Metadata")
   (connection %connection-settings)
   (table-metadata? #f)
   (prefixes
    '(("dct:" "<http://purl.org/dc/terms/>")
      ("dcat:" "<http://www.w3.org/ns/dcat#>")
      ("gn:" "<http://rdf.genenetwork.org/v1/id/>")
      ("gnc:" "<http://rdf.genenetwork.org/v1/category/>")
      ("gnt:" "<http://rdf.genenetwork.org/v1/term/>")
      ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
      ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
      ("owl:" "<http://www.w3.org/2002/07/owl#>")
      ("skos:" "<http://www.w3.org/2004/02/skos/core#>")
      ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>")
      ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")))
   (inputs
    (list gn:dataset->metadata))
   (outputs
    `(#:documentation ,documentation
      #:rdf ,output))))