about summary refs log tree commit diff
path: root/examples/classification.scm
blob: b3c6acfd9e5df7bf010449a4b26425352acd0223 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
#! /usr/bin/env guile
!#

(use-modules (srfi srfi-1)
             (srfi srfi-26)
             (ice-9 getopt-long)
             (ice-9 match)
             (ice-9 regex)
             (transform strings)
             (transform sql)
             (transform triples)
             (transform special-forms))



;; Classification Scheme
(define-transformer classification-scheme-species
  (tables (Species))
  (schema-triples
   (gnc:resource_classification_scheme a skos:ConceptScheme)
   (gnc:resource_classification_scheme skos:prefLabel "GeneNetwork Resource Classification Scheme")
   (gnc:resource_classification_scheme skos:definition "A hierarchical classification scheme for organizing GeneNetwork resources by dataset type, resource set (inbredset group), or species.")
   (gnc:resource_classification_scheme xkos:numberOfLevels "3")
   (gnc:resource_classification_scheme xkos:levels gnc:dataset_type)
   (gnc:resource_classification_scheme xkos:levels gnc:set)
   (gnc:resource_classification_scheme xkos:levels gnc:species)
   (gnc:dataset_type a xkos:ClassificationLevel)
   (gnc:dataset_type skos:prefLabel "Dataset Type")
   (gnc:dataset_type skos:definition "Classification level describing the biological or experimental nature of a dataset.   A dataset can either be a probeSet, a genotype or a phenotype.")
   (gnc:dataset_type xkos:depth "1")
   (gnc:dataset_type xkos:nextLevel gnc:set)
   (gnc:dataset_type skos:member gnc:probeset)
   (gnc:dataset_type skos:member gnc:genotype)
   (gnc:dataset_type skos:member gnc:phenotype)
   (gnc:probeset a skos:Concept)
   (gnc:probeset skos:prefLabel "Transcriptomic Datasets")
   (gnc:probeset skos:altLabel "ProbeSet")
   (gnc:probeset skos:definition "A category representing microarray or sequencing probe sets that measure gene expression or other molecular traits.")
   (gnc:probeset skos:note "Individual probe sets are too numerous to list explicitly in this ontology but are available through the GeneNetwork API.")
   (gnc:genotype a skos:Concept)
   (gnc:genotype skos:prefLabel "Genotype Datasets")
   (gnc:genotype skos:altLabel "Genotype")
   (gnc:genotype skos:definition "A category representing genetic marker or variant datasets used for genetic mapping.")
   (gnc:phenotype a skos:Concept)
   (gnc:phenotype skos:prefLabel "Phenotype Datasets")
   (gnc:phenotype skos:altLabel "Phenotype")
   (gnc:phenotype skos:definition "A category representing measured traits or phenotypes for genetic analysis.")
   (gnc:species a xkos:ClassificationLevel)
   (gnc:species xkos:previousLevel gnc:set)
   (gnc:species skos:prefLabel "Species")
   (gnc:species skos:definition "A classification level that that associates a given resource to a species in GeneNetwork.")
   (gnc:species xkos:depth "3"))
  (triples "gnc:species"
    (set skos:member
         (string->identifier "" (remap-species-identifiers (field Species Fullname))))))

(define-transformer classification-scheme-set
  (tables (InbredSet))
  (schema-triples
   (gnc:set a xkos:ClassificationLevel)
   (gnc:set xkos:nextLevel gnc:species)
   (gnc:set xkos:previousLevel gnc:dataset_type)
   (gnc:set skos:prefLabel "InbredSet Group")
   (gnc:set skos:definition "A category representing groups of genetically related strains or individuals (inbred sets, recombinant inbred lines, etc.).")
   (gnc:set xkos:depth "2"))
  (triples "gnc:set"
    (set skos:member
         (string->identifier
          "set" (field InbredSet Name InbredSetName) #:separator "_"))))

(define-transformer species
  (tables (Species))
  (schema-triples
   (gnt:has_uniprot_taxon_id a owl:ObjectProperty)
   (gnt:has_uniprot_taxon_id rdfs:label "has uniprot taxonomic id")
   (gnt:has_family a owl:DatatypeProperty)
   (gnt:has_family rdfs:label "has family")
   (gnt:has_family rdfs:range xsd:string)
   (gnt:has_family skos:definition "Links a species to its taxonomic family")
   (gnt:short_name a owl:DatatypeProperty)
   (gnt:short_name rdfs:label "has short name")
   (gnt:short_name rdfs:domain gnc:species)
   (gnt:short_name skos:definition "The short name of a given resource")
   (gnt:belongs_to_species a owl:ObjectProperty)
   (gnt:belongs_to_species rdf:comment "This resource belongs to this species")
   (gnt:belongs_to_species rdfs:label "belongs to species")
   (gnt:belongs_to_species rdfs:range gnc:species))
  (triples
      (string->identifier "" (remap-species-identifiers (field Species Fullname)))
    (set rdfs:type 'gnc:species)
    (set rdfs:label (remap-species-identifiers (field Species Fullname)))
    (set skos:prefLabel (field Species MenuName))
    (set skos:altLabel (field Species SpeciesName))
    (set gnt:short_name (field Species Name))
    (set gnt:has_family (field Species Family))
    (set gnt:has_uniprot_taxon_id (ontology
                                   'taxon:
                                   (field Species TaxonomyId)))))

(define-transformer inbred-set
  (tables (InbredSet
           (left-join Species "ON InbredSet.SpeciesId=Species.Id")
           (left-join MappingMethod
                      "ON InbredSet.MappingMethodId=MappingMethod.Id")))
  (schema-triples
   (gnt:genetic_type a owl:DatatypeProperty)
   (gnt:genetic_type rdfs:label "has genetic type")
   (gnt:genetic_type skos:definition "Describes the genetic architecture of a resource set (e.g., intercross, riset).")
   (gnt:genetic_type rdfs:domain gnc:set)
   (gnt:genetic_type rdfs:range xsd:string)
   (gnt:has_set_code a owl:DatatypeProperty)
   (gnt:has_set_code rdfs:label "has set code")
   (gnt:has_set_code skos:definition "Provides a unique identifier code for a resource set.")
   (gnt:has_set_code rdfs:domain gnc:set)
   (gnt:has_set_code rdfs:range xsd:string)
   ;; Already defined as an owl prop in species
   (gnt:has_family rdfs:domain gnc:set)
   (gnt:uses_mapping_method a owl:ObjectProperty)
   (gnt:uses_mapping_method rdfs:label "mapping method")
   (gnt:uses_mapping_method rdfs:domain gnc:set)
   (gnt:uses_mapping_method rdfs:range gnc:mapping_method)
   (gnt:uses_mapping_method rdfs:comment "The method used to map genetic or experimental data for this resource.")
   (gnt:belongs_to_group a owl:ObjectProperty)
   (gnt:belongs_to_group rdf:comment "Indicates the group the resources belongs to")
   (gnt:belongs_to_group schema:domainIncludes dcat:Dataset)
   (gnt:belongs_to_group schema:domainIncludes gnc:species)
   (gnt:belongs_to_group rdfs:range gnc:set)
   (gnt:belongs_to_group rdfs:label "belongs-to-group"))
  (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")
    (set rdfs:type 'gnc:set)
    (set rdfs:label (field InbredSet FullName))
    (set skos:prefLabel (field InbredSet Name InbredSetName))
    (set gnt:genetic_type (field InbredSet GeneticType))
    (set gnt:has_family (field InbredSet Family))
    (set gnt:uses_mapping_method
         (string->identifier "mapping_method" (field MappingMethod Name) #:separator "_"))
    (set gnt:has_set_code (field InbredSet InbredSetCode))
    (set gnt:belongs_to_species
         (string->identifier "" (remap-species-identifiers (field Species Fullname))))))



(let* ((option-spec
        '((settings (single-char #\s) (value #t))
          (output (single-char #\o) (value #t))
          (documentation (single-char #\d) (value #t))))
       (options (getopt-long (command-line) option-spec))
       (settings (option-ref options 'settings #f))
       (output (option-ref options 'output #f))
       (documentation (option-ref options 'documentation #f))
       (%connection-settings
        (call-with-input-file settings
          read)))

  (with-documentation
   (name "Species Metadata")
   (connection %connection-settings)
   (table-metadata? #f)
   (prefixes
    '(("dcat:" "<http://www.w3.org/ns/dcat#>")
      ("gn:" "<http://rdf.genenetwork.org/v1/id/>")
      ("gnc:" "<http://rdf.genenetwork.org/v1/category/>")
      ("owl:" "<http://www.w3.org/2002/07/owl#>")
      ("gnt:" "<http://rdf.genenetwork.org/v1/term/>")
      ("schema:" "<https://schema.org/>")
      ("skos:" "<http://www.w3.org/2004/02/skos/core#>")
      ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>")
      ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
      ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
      ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
      ("taxon:" "<http://purl.uniprot.org/taxonomy/>")))
   (inputs
    (list classification-scheme-species
          classification-scheme-set
          species
          inbred-set))
   (outputs
    `(#:documentation ,documentation
      #:rdf ,output))))