From 8764d0d964e9ef89a41bbc8d1b7ea96646733c83 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Tue, 15 Aug 2023 14:06:45 +0300 Subject: Update how probesetfreeze metadata is dumped Signed-off-by: Munyoki Kilyungi --- examples/dump-probesetfreeze.scm | 76 +++++++++++++++++++++++++++++++--------- 1 file changed, 60 insertions(+), 16 deletions(-) (limited to 'examples/dump-probesetfreeze.scm') diff --git a/examples/dump-probesetfreeze.scm b/examples/dump-probesetfreeze.scm index 30ea9f4..86e614d 100755 --- a/examples/dump-probesetfreeze.scm +++ b/examples/dump-probesetfreeze.scm @@ -17,14 +17,45 @@ read)) +(define (remap-species-identifiers str) + "This procedure remaps identifiers to standard binominal. Obviously this should + be sorted by correcting the database!" + (match str + ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"] + ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"] + ["Macaca mulatta" "Macaca nemestrina"] + ["Bat (Glossophaga soricina)" "Glossophaga soricina"] + [str str])) + + (define-dump dump-gene-chip - (tables (GeneChip)) + (tables (GeneChip + (left-join Species "USING (SpeciesId)"))) (schema-triples - (gnt:name rdfs:range rdfs:Literal)) + (gnc:geneChip a skos:Concept) + (gnc:geneChip + skos:description + "This is a set of controlled terms that are used to describe a given gene chip/platform") + (gnt:hasGeoSeriesId rdfs:domain gnc:platform) + (gnt:belongsToSpecies a owl:ObjectProperty) + (gnt:belongsToSpecies skos:definition "This resource belongs to this given species") + (gnt:belongsToSpecies rdfs:domain gnc:geneChip) + (gnt:hasGeoSeriesId rdfs:domain gnc:geneChip) + (gnt:hasGOTreeValue a owl:ObjectProperty) + (gnt:hasGOTreeValue skos:definition "This resource the following GO tree value") + (gnt:hasGOTreeValue rdfs:domain gnc:geneChip)) (triples (string->identifier "platform" (field GeneChip Name)) - (set rdf:type 'gnc:platform) - (set gnt:name (field GeneChip GeneChipName)) - (set gnt:geoPlatform + (set rdf:type 'gnc:geneChip) + (set rdfs:label (field GeneChip GeneChipName)) + (set skos:prefLabel (field GeneChip Name)) + (set skos:altLabel (field ("IF(GeneChip.GeneChipName != GeneChip.Title, Title, NULL)" + Title))) + (set gnt:hasGOTreeValue (field GeneChip Go_tree_value)) + (set gnt:belongsToSpecies + (string->identifier "" (remap-species-identifiers (field Species Fullname)) + #:separator "" + #:proc string-capitalize-first)) + (set gnt:hasGeoSeriesId (ontology 'geoSeries: (string-trim-both (field GeneChip GeoPlatform)))))) @@ -38,9 +69,11 @@ (left-join Tissue "ON ProbeFreeze.TissueId = Tissue.TissueId")) "WHERE ProbeSetFreeze.public > 0 AND InfoFiles.InfoPageName IS NULL GROUP BY ProbeFreeze.Id") (schema-triples - (gnt:avgMethod rdfs:range rdfs:Literal) - (gnt:dataScale rdfs:range rdfs:Literal) - (gn:probesetDataset rdf:subClassOf gn:dataset)) + (gnc:probeset rdfs:subClassOf gnc:dataset) + (gnt:usesNormalization rdfs:domain gnc:probeset) + (gnt:usesDataScale rdfs:domain gnc:probeset) + (gnt:usesDataScale a owl:ObjectProperty) + (gnt:usesDataScale skos:definition "Thi data scale this resource uses")) (triples (string->identifier "" @@ -50,16 +83,25 @@ 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) - (set rdf:type 'gnc:probesetDataset) - (set gnt:avgMethod (string->identifier "avgmethod" (field AvgMethod Name))) - (set gnt:fullName (field ProbeSetFreeze FullName)) - (set gnt:shortName (field ProbeSetFreeze ShortName)) + (set rdf:type 'gnc:probeset) + (set gnt:usesNormalization + (string->identifier "avgmethod" + ;; If AvgMethodName is NULL, assume N/A. + (if (string-blank? (field AvgMethod Name AvgMethodName)) + "N/A" (field AvgMethod Name AvgMethodName)))) + (set dct:title (field ProbeSetFreeze FullName)) + (set rdfs:label (field ProbeSetFreeze Name)) + (set skos:altLabel (field ProbeSetFreeze Name2)) + (set skos:prefLabel (field ProbeSetFreeze ShortName)) (set dct:created (annotate-field (field ProbeSetFreeze CreateTime) '^^xsd:datetime)) - (set gnt:dataScale (field ProbeSetFreeze DataScale)) - (set gnt:tissueName (string->identifier "tissue" (field Tissue Short_Name))) - (set gnt:datasetOfInbredSet + (set gnt:usesDataScale (field ProbeSetFreeze DataScale)) + (set gnt:hasTissue + (string->identifier + "tissue" + (field Tissue Short_Name))) + (set gnt:belongsToInbredSet (string->identifier "inbredSet" (field InbredSet Name InbredSetName))))) @@ -72,7 +114,9 @@ '(("geoSeries:" "") ("gn:" "") ("gnc:" "") - ("dct:" "<>") + ("dct:" "") + ("owl:" "") + ("skos:" "") ("gnt:" "") ("rdf:" "") ("rdfs:" "") -- cgit v1.2.3