about summary refs log tree commit diff
diff options
context:
space:
mode:
-rwxr-xr-xexamples/dump-probesetfreeze.scm76
1 files changed, 60 insertions, 16 deletions
diff --git a/examples/dump-probesetfreeze.scm b/examples/dump-probesetfreeze.scm
index 30ea9f4..86e614d 100755
--- a/examples/dump-probesetfreeze.scm
+++ b/examples/dump-probesetfreeze.scm
@@ -17,14 +17,45 @@
     read))
 
 
+(define (remap-species-identifiers str)
+  "This procedure remaps identifiers to standard binominal. Obviously this should
+   be sorted by correcting the database!"
+  (match str
+    ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"]
+    ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"]
+    ["Macaca mulatta" "Macaca nemestrina"]
+    ["Bat (Glossophaga soricina)" "Glossophaga soricina"]
+    [str str]))
+
+
 (define-dump dump-gene-chip
-  (tables (GeneChip))
+  (tables (GeneChip
+           (left-join Species "USING (SpeciesId)")))
   (schema-triples
-   (gnt:name rdfs:range rdfs:Literal))
+   (gnc:geneChip a skos:Concept)
+   (gnc:geneChip
+    skos:description
+    "This is a set of controlled terms that are used to describe a given gene chip/platform")
+   (gnt:hasGeoSeriesId rdfs:domain gnc:platform)
+   (gnt:belongsToSpecies a owl:ObjectProperty)
+   (gnt:belongsToSpecies skos:definition "This resource belongs to this given species")
+   (gnt:belongsToSpecies rdfs:domain gnc:geneChip)
+   (gnt:hasGeoSeriesId rdfs:domain gnc:geneChip)
+   (gnt:hasGOTreeValue a owl:ObjectProperty)
+   (gnt:hasGOTreeValue skos:definition "This resource the following GO tree value")
+   (gnt:hasGOTreeValue rdfs:domain gnc:geneChip))
   (triples (string->identifier "platform" (field GeneChip Name))
-    (set rdf:type 'gnc:platform)
-    (set gnt:name (field GeneChip GeneChipName))
-    (set gnt:geoPlatform
+    (set rdf:type 'gnc:geneChip)
+    (set rdfs:label (field GeneChip GeneChipName))
+    (set skos:prefLabel (field GeneChip Name))
+    (set skos:altLabel (field ("IF(GeneChip.GeneChipName != GeneChip.Title, Title, NULL)"
+                               Title)))
+    (set gnt:hasGOTreeValue (field GeneChip Go_tree_value))
+    (set gnt:belongsToSpecies
+         (string->identifier "" (remap-species-identifiers (field Species Fullname))
+                             #:separator ""
+                             #:proc string-capitalize-first))
+    (set gnt:hasGeoSeriesId
          (ontology 'geoSeries:
                    (string-trim-both (field GeneChip GeoPlatform))))))
 
@@ -38,9 +69,11 @@
            (left-join Tissue "ON ProbeFreeze.TissueId = Tissue.TissueId"))
           "WHERE ProbeSetFreeze.public > 0 AND InfoFiles.InfoPageName IS NULL GROUP BY ProbeFreeze.Id")
   (schema-triples
-   (gnt:avgMethod rdfs:range rdfs:Literal)
-   (gnt:dataScale rdfs:range rdfs:Literal)
-   (gn:probesetDataset rdf:subClassOf gn:dataset))
+   (gnc:probeset rdfs:subClassOf gnc:dataset)
+   (gnt:usesNormalization rdfs:domain gnc:probeset)
+   (gnt:usesDataScale rdfs:domain gnc:probeset)
+   (gnt:usesDataScale a owl:ObjectProperty)
+   (gnt:usesDataScale skos:definition "Thi data scale this resource uses"))
   (triples
       (string->identifier
        ""
@@ -50,16 +83,25 @@
         'pre "_" 'post)
        #:separator ""
        #:proc string-capitalize-first)
-    (set rdf:type 'gnc:probesetDataset)
-    (set gnt:avgMethod (string->identifier "avgmethod" (field AvgMethod Name)))
-    (set gnt:fullName (field ProbeSetFreeze FullName))
-    (set gnt:shortName (field ProbeSetFreeze ShortName))
+    (set rdf:type 'gnc:probeset)
+    (set gnt:usesNormalization
+         (string->identifier "avgmethod"
+                             ;; If AvgMethodName is NULL, assume N/A.
+                             (if (string-blank? (field AvgMethod Name AvgMethodName))
+                                 "N/A" (field AvgMethod Name AvgMethodName))))
+    (set dct:title (field ProbeSetFreeze FullName))
+    (set rdfs:label (field ProbeSetFreeze Name))
+    (set skos:altLabel (field ProbeSetFreeze Name2))
+    (set skos:prefLabel (field ProbeSetFreeze ShortName))
     (set dct:created (annotate-field
                       (field ProbeSetFreeze CreateTime)
                       '^^xsd:datetime))
-    (set gnt:dataScale (field ProbeSetFreeze DataScale))
-    (set gnt:tissueName (string->identifier "tissue" (field Tissue Short_Name)))
-    (set gnt:datasetOfInbredSet
+    (set gnt:usesDataScale (field ProbeSetFreeze DataScale))
+    (set gnt:hasTissue
+         (string->identifier
+          "tissue"
+          (field Tissue Short_Name)))
+    (set gnt:belongsToInbredSet
          (string->identifier "inbredSet" (field InbredSet Name InbredSetName)))))
 
 
@@ -72,7 +114,9 @@
   '(("geoSeries:" "<http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=>")
     ("gn:" "<http://genenetwork.org/id/>")
     ("gnc:" "<http://genenetwork.org/category/>")
-    ("dct:" "<>")
+    ("dct:" "<http://purl.org/dc/terms/>")
+    ("owl:" "<http://www.w3.org/2002/07/owl#>")
+    ("skos:" "<http://www.w3.org/2004/02/skos/core#>")
     ("gnt:" "<http://genenetwork.org/term/>")
     ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
     ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")