aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2023-08-15 14:06:45 +0300
committerMunyoki Kilyungi2023-08-15 19:32:48 +0300
commit8764d0d964e9ef89a41bbc8d1b7ea96646733c83 (patch)
treef25dfea2eb262cc6585725d7dc9946cdf63dccab
parent7b77057639891565ef249471b590fa35ad98c9d0 (diff)
downloadgn-transform-databases-8764d0d964e9ef89a41bbc8d1b7ea96646733c83.tar.gz
Update how probesetfreeze metadata is dumped
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rwxr-xr-xexamples/dump-probesetfreeze.scm76
1 files changed, 60 insertions, 16 deletions
diff --git a/examples/dump-probesetfreeze.scm b/examples/dump-probesetfreeze.scm
index 30ea9f4..86e614d 100755
--- a/examples/dump-probesetfreeze.scm
+++ b/examples/dump-probesetfreeze.scm
@@ -17,14 +17,45 @@
read))
+(define (remap-species-identifiers str)
+ "This procedure remaps identifiers to standard binominal. Obviously this should
+ be sorted by correcting the database!"
+ (match str
+ ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"]
+ ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"]
+ ["Macaca mulatta" "Macaca nemestrina"]
+ ["Bat (Glossophaga soricina)" "Glossophaga soricina"]
+ [str str]))
+
+
(define-dump dump-gene-chip
- (tables (GeneChip))
+ (tables (GeneChip
+ (left-join Species "USING (SpeciesId)")))
(schema-triples
- (gnt:name rdfs:range rdfs:Literal))
+ (gnc:geneChip a skos:Concept)
+ (gnc:geneChip
+ skos:description
+ "This is a set of controlled terms that are used to describe a given gene chip/platform")
+ (gnt:hasGeoSeriesId rdfs:domain gnc:platform)
+ (gnt:belongsToSpecies a owl:ObjectProperty)
+ (gnt:belongsToSpecies skos:definition "This resource belongs to this given species")
+ (gnt:belongsToSpecies rdfs:domain gnc:geneChip)
+ (gnt:hasGeoSeriesId rdfs:domain gnc:geneChip)
+ (gnt:hasGOTreeValue a owl:ObjectProperty)
+ (gnt:hasGOTreeValue skos:definition "This resource the following GO tree value")
+ (gnt:hasGOTreeValue rdfs:domain gnc:geneChip))
(triples (string->identifier "platform" (field GeneChip Name))
- (set rdf:type 'gnc:platform)
- (set gnt:name (field GeneChip GeneChipName))
- (set gnt:geoPlatform
+ (set rdf:type 'gnc:geneChip)
+ (set rdfs:label (field GeneChip GeneChipName))
+ (set skos:prefLabel (field GeneChip Name))
+ (set skos:altLabel (field ("IF(GeneChip.GeneChipName != GeneChip.Title, Title, NULL)"
+ Title)))
+ (set gnt:hasGOTreeValue (field GeneChip Go_tree_value))
+ (set gnt:belongsToSpecies
+ (string->identifier "" (remap-species-identifiers (field Species Fullname))
+ #:separator ""
+ #:proc string-capitalize-first))
+ (set gnt:hasGeoSeriesId
(ontology 'geoSeries:
(string-trim-both (field GeneChip GeoPlatform))))))
@@ -38,9 +69,11 @@
(left-join Tissue "ON ProbeFreeze.TissueId = Tissue.TissueId"))
"WHERE ProbeSetFreeze.public > 0 AND InfoFiles.InfoPageName IS NULL GROUP BY ProbeFreeze.Id")
(schema-triples
- (gnt:avgMethod rdfs:range rdfs:Literal)
- (gnt:dataScale rdfs:range rdfs:Literal)
- (gn:probesetDataset rdf:subClassOf gn:dataset))
+ (gnc:probeset rdfs:subClassOf gnc:dataset)
+ (gnt:usesNormalization rdfs:domain gnc:probeset)
+ (gnt:usesDataScale rdfs:domain gnc:probeset)
+ (gnt:usesDataScale a owl:ObjectProperty)
+ (gnt:usesDataScale skos:definition "Thi data scale this resource uses"))
(triples
(string->identifier
""
@@ -50,16 +83,25 @@
'pre "_" 'post)
#:separator ""
#:proc string-capitalize-first)
- (set rdf:type 'gnc:probesetDataset)
- (set gnt:avgMethod (string->identifier "avgmethod" (field AvgMethod Name)))
- (set gnt:fullName (field ProbeSetFreeze FullName))
- (set gnt:shortName (field ProbeSetFreeze ShortName))
+ (set rdf:type 'gnc:probeset)
+ (set gnt:usesNormalization
+ (string->identifier "avgmethod"
+ ;; If AvgMethodName is NULL, assume N/A.
+ (if (string-blank? (field AvgMethod Name AvgMethodName))
+ "N/A" (field AvgMethod Name AvgMethodName))))
+ (set dct:title (field ProbeSetFreeze FullName))
+ (set rdfs:label (field ProbeSetFreeze Name))
+ (set skos:altLabel (field ProbeSetFreeze Name2))
+ (set skos:prefLabel (field ProbeSetFreeze ShortName))
(set dct:created (annotate-field
(field ProbeSetFreeze CreateTime)
'^^xsd:datetime))
- (set gnt:dataScale (field ProbeSetFreeze DataScale))
- (set gnt:tissueName (string->identifier "tissue" (field Tissue Short_Name)))
- (set gnt:datasetOfInbredSet
+ (set gnt:usesDataScale (field ProbeSetFreeze DataScale))
+ (set gnt:hasTissue
+ (string->identifier
+ "tissue"
+ (field Tissue Short_Name)))
+ (set gnt:belongsToInbredSet
(string->identifier "inbredSet" (field InbredSet Name InbredSetName)))))
@@ -72,7 +114,9 @@
'(("geoSeries:" "<http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=>")
("gn:" "<http://genenetwork.org/id/>")
("gnc:" "<http://genenetwork.org/category/>")
- ("dct:" "<>")
+ ("dct:" "<http://purl.org/dc/terms/>")
+ ("owl:" "<http://www.w3.org/2002/07/owl#>")
+ ("skos:" "<http://www.w3.org/2004/02/skos/core#>")
("gnt:" "<http://genenetwork.org/term/>")
("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")