about summary refs log tree commit diff
path: root/examples/molecular-traits.scm
blob: f338693cad629fab933a9e5d19bfd6e65d516e7f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#! /usr/bin/env guile
!#

(use-modules (srfi srfi-1)
             (srfi srfi-26)
             (ice-9 getopt-long)
             (ice-9 match)
             (ice-9 regex)
             (transform strings)
             (transform sql)
             (transform triples)
             (transform special-forms))


(define-transformer gn:molecular-traits->gn:datasets
  (tables (Tissue))
  (schema-triples
   (gnc:molecular_trait a owl:Class)
   (gnc:molecular_trait a skos:Concept)
   (gnc:molecular_trait rdfs:subClassOf obo:UBERON_0000479)
   (gnc:molecular_trait rdfs:label "Molecular Trait.   This describes a melecular trait of a given species.  We combine the species name and the tissue name in order to differentiate the traits across different inbredset groups."))
  (triples (string->identifier "trait" (field Tissue Short_Name) #:separator "_")
    (set rdf:type 'gnc:molecular_trait)
    (set skos:prefLabel (field Tissue Name))
    (set skos:altLabel (field Tissue Short_Name))))

(define-transformer gnc:molecular_trait->gn:molecular_trait
  (tables (Tissue))
  (triples "gnc:molecular_trait"
    (set skos:member (string->identifier "trait" (field Tissue Short_Name) #:separator "_"))))

(define-transformer gn:set->gn:dataset
  (tables (Species
           (inner-join InbredSet "ON InbredSet.SpeciesId = Species.Id")
           (inner-join ProbeFreeze "ON ProbeFreeze.InbredSetId = InbredSet.Id")
           (inner-join ProbeSetFreeze "ON ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id")
           (inner-join Tissue "ON ProbeFreeze.TissueId = Tissue.Id"))
          "WHERE ProbeSetFreeze.public > 0 AND Species.Name != 'monkey' GROUP BY Species.Name, Tissue.Short_Name")
  (schema-triples
   (gnt:has_probeset_data rdf:type owl:ObjectProperty)
   (gnt:has_probeset_data rdfs:label "this resources has this probeset data.")
   (gnt:has_probeset_data rdfs:comment "Associates a resource with this probeset data.")
   (gnt:has_probeset_data rdfs:domain gnc:set)
   (gnt:has_probeset_data rdfs:range gnc:molecular_trait)
   (gnt:has_probeset_data rdfs:subPropertyOf dct:relation))
  (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")
    (multiset gnt:has_probeset_data
              (map (cut string->identifier "dataset" <> #:separator "_")
                   (string-split
                    (field ("GROUP_CONCAT(ProbeSetFreeze.Name SEPARATOR ',')"
                            dataset_name))
                    #\,)))))

(define-transformer gn:dataset->set/species/molecular_trait
  (tables (Species
           (inner-join InbredSet "ON InbredSet.SpeciesId = Species.Id")
           (inner-join ProbeFreeze "ON ProbeFreeze.InbredSetId = InbredSet.Id")
           (inner-join ProbeSetFreeze "ON ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id")
           (inner-join Tissue "ON ProbeFreeze.TissueId = Tissue.Id"))
          "WHERE ProbeSetFreeze.public > 0 AND Species.Name != 'monkey'")
  (schema-triples
   (gnt:has_molecular_trait rdf:type owl:ObjectProperty)
   (gnt:has_molecular_trait rdfs:domain gnc:set)
   (gnt:has_molecular_trait rdfs:range gnc:molecular_trait)
   (gnt:has_molecular_trait rdfs:label "has molecular trait"))
  (triples (string->identifier "dataset" (field ProbeSetFreeze Name) #:separator "_")
    (set gnt:has_strain
         (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_"))
    (set gnt:has_species
         (string->identifier "" (remap-species-identifiers (field Species Fullname))))
    (set gnt:has_molecular_trait
         (string->identifier "trait" (field Tissue Short_Name) #:separator "_"))))

(define-transformer gn:dataset->metadata
  (tables (ProbeSetFreeze
           (inner-join ProbeFreeze "ON ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id")
           (inner-join InbredSet "ON InbredSet.Id = ProbeFreeze.InbredSetId")
           (inner-join Species "ON InbredSet.SpeciesId = Species.Id")
           (inner-join Tissue "ON ProbeFreeze.TissueId = Tissue.Id")
           (inner-join AvgMethod "ON AvgMethod.AvgMethodId = ProbeSetFreeze.AvgID")
           (inner-join InfoFiles "ON InfoFiles.InfoPageName = ProbeSetFreeze.Name")
           (inner-join Datasets "ON InfoFiles.DatasetId = Datasets.DatasetId")
           (left-join GeneChip "ON GeneChip.Id =  InfoFiles.GeneChipId"))
          "WHERE ProbeSetFreeze.public > 0 AND Species.Name != 'monkey'")
  (schema-triples
   (gnt:uses_genechip a owl:ObjectProperty)
   (gnt:uses_genechip rdfs:domain dcat:Dataset)
   (gnt:uses_genechip skos:definition "The Platform this resource uses for it's molecular traits.")
   (gnt:uses_normalization_method rdfs:comment "The normalization method used for the molecular traits in this dataset")
   (gnt:uses_normalization_method rdfs:domain dcat:Dataset)
   (gnt:uses_normalization_method rdfs:label "Averaging method used for the molecular traits in this dataset.")
   (gnt:uses_normalization_method rdfs:range gnc:avg_method))
  (triples (string->identifier "dataset" (field ProbeSetFreeze Name) #:separator "_")
    (set dct:created (annotate-field (field ProbeSetFreeze CreateTime) '^^xsd:datetime))
    (set gnt:uses_normalization_method
         (string->identifier "avg_method" (field AvgMethod Name AvgMethodName) #:separator "_"))
    (set gnt:has_strain
         (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_"))
    (set gnt:has_species
         (string->identifier "" (remap-species-identifiers (field Species Fullname))))
    (set gnt:has_molecular_trait
         (string->identifier "trait" (field Tissue Short_Name) #:separator "_"))
    (set gnt:uses_genechip
         (string->identifier "platform" (field GeneChip Name) #:separator "_"))))


(let* ((option-spec
        '((settings (single-char #\s) (value #t))
          (output (single-char #\o) (value #t))
          (documentation (single-char #\d) (value #t))))
       (options (getopt-long (command-line) option-spec))
       (settings (option-ref options 'settings #f))
       (output (option-ref options 'output #f))
       (documentation (option-ref options 'documentation #f))
       (%connection-settings
        (call-with-input-file settings
          read)))
  (with-documentation
   (name "Molecular Traits")
   (connection %connection-settings)
   (table-metadata? #f)
   (prefixes
    '(("dcat:" "<http://www.w3.org/ns/dcat#>")
      ("gn:" "<http://rdf.genenetwork.org/v1/id/>")
      ("obo:" "<http://purl.obolibrary.org/obo/>")
      ("owl:" "<http://www.w3.org/2002/07/owl#>")
      ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
      ("dct:" "<http://purl.org/dc/terms/>")
      ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>")
      ("gnt:" "<http://rdf.genenetwork.org/v1/term/>")
      ("skos:" "<http://www.w3.org/2004/02/skos/core#>")
      ("gnc:" "<http://rdf.genenetwork.org/v1/category/>")
      ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
      ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")))
   (inputs
    (list
     gn:dataset->metadata
     gn:dataset->set/species/molecular_trait
     gn:molecular-traits->gn:datasets
     gn:set->gn:dataset
     gnc:molecular_trait->gn:molecular_trait))
   (outputs
    `(#:documentation ,documentation
      #:rdf ,output))))