aboutsummaryrefslogtreecommitdiff
path: root/examples/classification.scm
blob: 3024af621f2ecd47b4e24983e9b5d62e210483ba (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#! /usr/bin/env guile
!#

(use-modules (srfi srfi-1)
             (srfi srfi-26)
             (ice-9 getopt-long)
             (ice-9 match)
             (ice-9 regex)
             (transform strings)
             (transform sql)
             (transform triples)
             (transform special-forms))



(define (remap-species-identifiers str)
  "This procedure remaps identifiers to standard binominal. Obviously this should
   be sorted by correcting the database!"
  (match str
    ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"]
    ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"]
    ["Macaca mulatta" "Macaca nemestrina"]
    ["Bat (Glossophaga soricina)" "Glossophaga soricina"]
    [str str]))

;; Classification Scheme
(define-transformer classification-scheme-species
  (tables (Species))
  (schema-triples
   (gnc:ResourceClassificationScheme a skos:ConceptScheme)
   (gnc:ResourceClassificationScheme skos:prefLabel "GeneNetwork Classification Scheme For Resources")
   (gnc:ResourceClassificationScheme xkos:numberOfLevels "3")
   (gnc:ResourceClassificationScheme xkos:levels "( gnc:DatasetType gnc:Set gnc:Species )")
   (gnc:DatasetType a xkos:ClassificationLevel)
   (gnc:DatasetType skos:prefLabel "The Type of a Dataset which can be a ProbeSet, Genotype, or Phenotype")
   (gnc:DatasetType xkos:depth "1")
   (gnc:DatasetType skos:member gnc:Probeset)
   (gnc:DatasetType skos:member gnc:Genotype)
   (gnc:DatasetType skos:member gnc:Phenotype)
   (gnc:Probeset skos:prefLabel "mRNA Assay Datasets")
   (gnc:Probeset skos:altLabel "ProbeSet")
   (gnc:Genotype skos:prefLabel "Genotype")
   (gnc:Genotype skos:altLabel "DNA Markers and SNPs")
   (gnc:Phenotype skos:prefLabel "Phenotype")
   (gnc:Phenotype skos:altLabel "Traits and Cofactors")
   (gnc:Species a xkos:ClassificationLevel)
   (gnc:Species skos:prefLabel "The species in which this resource belongs")
   (gnc:Species xkos:depth "3")
   (gnc:Species xkos:specializes gnc:Set))
  (triples "gnc:Species"
    (set skos:member
         (string->identifier "" (remap-species-identifiers (field Species Fullname))
                             #:separator ""
                             #:proc string-capitalize-first))))

(define-transformer classification-scheme-set
  (tables (InbredSet))
  (schema-triples
   (gnc:Set a xkos:ClassificationLevel)
   (gnc:Set skos:prefLabel "The Type of Set, Ie InbredSet/OutbredSet that a resource can belong to")
   (gnc:Set xkos:depth "2")
   (gnc:Set xkos:generalizes gnc:Species))
  (triples "gnc:Set"
    (set skos:member
         (string->identifier
          "set" (field InbredSet Name InbredSetName)
          #:separator ""
          #:proc string-capitalize-first))))

(define-transformer species
  (tables (Species))
  (schema-triples
   (gnt:family a owl:ObjectProperty)
   (gnt:family rdfs:domain gnc:Species)
   (gnt:family skos:definition "This resource belongs to this family")
   (gnt:shortName a owl:ObjectProperty)
   (gnt:shortName rdfs:domain gnc:Species)
   (gnt:shortName skos:definition "The short name of a given resource")
   (gnt:belongsToSpecies a rdf:property)
   (gnt:belongsToSpecies rdf:comment "This resource given to this species")
   (gnt:belongsToSpecies rdf:label "belongsToSpecies"))
  (triples
      (string->identifier "" (remap-species-identifiers (field Species Fullname))
                          #:separator ""
                          #:proc string-capitalize-first)
    (set skos:inScheme 'gnc:ResourceClassificationScheme)
    (set rdfs:label (remap-species-identifiers (field Species Fullname)))
    (set skos:prefLabel (field Species MenuName))
    (set skos:altLabel (field Species SpeciesName))
    (set gnt:shortName (field Species Name))
    (set gnt:family (field Species Family))
    (set skos:notation (ontology
                        'taxon:
                        (field Species TaxonomyId)))))

(define-transformer inbred-set
  (tables (InbredSet
           (left-join Species "ON InbredSet.SpeciesId=Species.Id")
           (left-join MappingMethod
                      "ON InbredSet.MappingMethodId=MappingMethod.Id")))
  (schema-triples
   (gnt:geneticType a owl:ObjectProperty)
   (gnt:geneticType rdfs:domain gnc:set)
   (gnt:code a owl:ObjectProperty)
   (gnt:code rdfs:domain gnc:set)
   ;; Already defined as an owl prop in species
   (gnt:family rdfs:domain gnc:Set)
   (gnt:mappingMethod a owl:ObjectProperty)
   (gnt:mappingMethod rdfs:domain gnc:set)
   (gnt:belongsToGroup a rdf:property)
   (gnt:belongsToGroup rdf:comment "This resource given to this group")
   (gnt:belongsToGroup rdf:label "belongsToGroup"))
  (triples (string->identifier
          "set" (field InbredSet Name InbredSetName)
          #:separator ""
          #:proc string-capitalize-first)
    (set skos:inScheme 'gnc:ResourceClassificationScheme)
    (set rdfs:label (field InbredSet FullName))
    (set skos:prefLabel (field InbredSet Name InbredSetName))
    (set gnt:geneticType (field InbredSet GeneticType))
    (set gnt:family (field InbredSet Family))
    (set gnt:mappingMethod (field MappingMethod Name))
    (set gnt:code (field InbredSet InbredSetCode))
    (set xkos:generalizes
         (string->identifier "" (remap-species-identifiers (field Species Fullname))
                             #:separator ""
                             #:proc string-capitalize-first))))



(let* ((option-spec
        '((settings (single-char #\s) (value #t))
          (output (single-char #\o) (value #t))
          (documentation (single-char #\d) (value #t))))
       (options (getopt-long (command-line) option-spec))
       (settings (option-ref options 'settings #f))
       (output (option-ref options 'output #f))
       (documentation (option-ref options 'documentation #f))
       (%connection-settings
        (call-with-input-file settings
          read)))

  (with-documentation
   (name "Species Metadata")
   (connection %connection-settings)
   (table-metadata? #f)
   (prefixes
    '(("gn:" "<http://genenetwork.org/id/>")
      ("gnc:" "<http://genenetwork.org/category/>")
      ("owl:" "<http://www.w3.org/2002/07/owl#>")
      ("gnt:" "<http://genenetwork.org/term/>")
      ("skos:" "<http://www.w3.org/2004/02/skos/core#>")
      ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>")
      ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
      ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
      ("taxon:" "<http://purl.uniprot.org/taxonomy/>")))
   (inputs
    (list classification-scheme-species
          classification-scheme-set
          species
          inbred-set))
   (outputs
    `(#:documentation ,documentation
      #:rdf ,output))))