1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
|
#! /usr/bin/env guile
!#
(use-modules (srfi srfi-1)
(srfi srfi-26)
(ice-9 getopt-long)
(ice-9 match)
(ice-9 regex)
(transform strings)
(transform sql)
(transform triples)
(transform special-forms)
(web uri))
(define-transformer probeset->metadata
(tables (ProbeSet
(left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId"))
"WHERE ProbeSet.Name IS NOT NULL AND TRIM(ProbeSet.Name) != ''")
(triples
(string->identifier "probeset" (field ProbeSet Name))
(set rdf:type 'gnc:probeset)
(set skos:prefLabel (field ProbeSet Name))
(multiset skos:altLabel
(map string-trim-both
(string-split (sanitize-rdf-string (field ProbeSet alias)) #\;)))
(set gnt:uses_genechip (string->identifier "platform" (field GeneChip Name) #:separator "_"))
(set gnt:has_target_id (string-trim-both (sanitize-rdf-string (field ProbeSet TargetId))))
(set gnt:symbol (string-trim-both (field ProbeSet Symbol)))
(set dct:description (sanitize-rdf-string (field ProbeSet description)))
(set gnt:targets_region (string-trim-both (sanitize-rdf-string (field ProbeSet Probe_set_target_region))))
(set gnt:chr (field ProbeSet Chr))
(set gnt:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double))
(set gnt:mb_mm8 (annotate-field (field ("IFNULL(ProbeSet.Mb_mm8, '')" Mb_mm8))
'^^xsd:double))
(set gnt:has_specificity
(field ("IFNULL(ProbeSet.Probe_set_specificity, '')"
Probe_set_specificity)))
(set gnt:has_blat_score
(field ("IFNULL(ProbeSet.Probe_set_BLAT_score, '')"
Probe_set_BLAT_score)))
(set gnt:has_blat_mb_start
(annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_start, '')"
Probe_set_Blat_Mb_start))
'^^xsd:double))
(set gnt:has_blat_mb_end
(annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_end, '')"
Probe_set_Blat_Mb_end))
'^^xsd:double))
(set gnt:has_blat_seq (sanitize-rdf-string (field ProbeSet BlatSeq)))
(set gnt:has_target_seq (sanitize-rdf-string (field ProbeSet TargetSeq)))
(set gnt:has_homologene_id (ontology 'homologene:
(uri-encode
(field ("IFNULL(ProbeSet.HomoloGeneID, '')"
HomoloGeneID)))))
(set gnt:has_uniprot_id (ontology 'uniprot:
(uri-encode
(field ("IFNULL(ProbeSet.UniProtID, '')"
UniProtID)))))
(set gnt:has_pub_chem_id (ontology
'pubchem:
(uri-encode
(field ("IFNULL(ProbeSet.PubChem_ID, '')"
PubChem_ID)))))
(set gnt:has_kegg_id (ontology
'kegg:
(uri-encode
(field ("IFNULL(ProbeSet.KEGG_ID, '')"
KEGG_ID)))))
(set gnt:has_omim_id (ontology
'omim:
(uri-encode
(let ((omim (field ("IFNULL(ProbeSet.OMIM, '')"
OMIM))))
(if (number? omim)
omim
(regexp-substitute/global
#f "[^0-9]"
omim
'pre "" 'post))))))
(set gnt:has_chebi_id (ontology
'chebi:
(uri-encode
(field ("IFNULL(ProbeSet.ChEBI_ID, '')"
ChEBI_ID)))))))
(let* ((option-spec
'((settings (single-char #\s) (value #t))
(output (single-char #\o) (value #t))
(documentation (single-char #\d) (value #t))))
(options (getopt-long (command-line) option-spec))
(settings (option-ref options 'settings #f))
(output (option-ref options 'output #f))
(documentation (option-ref options 'documentation #f))
(%connection-settings
(call-with-input-file settings
read)))
(call-with-target-database
%connection-settings
(lambda (db)
(with-documentation
(name "ProbeSet Metadata")
(connection %connection-settings)
(table-metadata? #f)
(total-rows (assoc-ref
(sql-find db "SELECT count(*) AS count from ProbeSet")
"count"))
(rows-per-chunk 1000000)
(prefixes
'(("gn:" "<http://genenetwork.org/id/>")
("probeset:" "<http://genenetwork.org/probeset/>")
("gnc:" "<http://genenetwork.org/category/>")
("gnt:" "<http://genenetwork.org/term/>")
("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
("kegg:" "<http://bio2rdf.org/ns/kegg#>")
("pubchem:" "<https://pubchem.ncbi.nlm.nih.gov/>")
("omim:" "<https://www.omim.org/entry/>")
("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
("uniprot:" "<http://purl.uniprot.org/uniprot/>")
("chebi:" "<http://purl.obolibrary.org/obo/CHEBI_>")
("dct:" "<http://purl.org/dc/terms/>")
("owl:" "<http://www.w3.org/2002/07/owl#>")
("homologene:" "<https://bio2rdf.org/homologene:>")
("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
("qb:" "<http://purl.org/linked-data/cube#>")
("sdmx-measure:" "<http://purl.org/linked-data/sdmx/2009/measure#>")
("skos:" "<http://www.w3.org/2004/02/skos/core#>")))
(inputs
(list probeset->metadata))
(outputs
`(#:documentation ,documentation
#:rdf ,output))))))
|