about summary refs log tree commit diff
path: root/examples/classification.scm
diff options
context:
space:
mode:
Diffstat (limited to 'examples/classification.scm')
-rwxr-xr-xexamples/classification.scm228
1 files changed, 124 insertions, 104 deletions
diff --git a/examples/classification.scm b/examples/classification.scm
index 3024af6..130bec8 100755
--- a/examples/classification.scm
+++ b/examples/classification.scm
@@ -13,118 +13,126 @@
 
 
 
-(define (remap-species-identifiers str)
-  "This procedure remaps identifiers to standard binominal. Obviously this should
-   be sorted by correcting the database!"
-  (match str
-    ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"]
-    ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"]
-    ["Macaca mulatta" "Macaca nemestrina"]
-    ["Bat (Glossophaga soricina)" "Glossophaga soricina"]
-    [str str]))
-
 ;; Classification Scheme
-(define-transformer classification-scheme-species
-  (tables (Species))
-  (schema-triples
-   (gnc:ResourceClassificationScheme a skos:ConceptScheme)
-   (gnc:ResourceClassificationScheme skos:prefLabel "GeneNetwork Classification Scheme For Resources")
-   (gnc:ResourceClassificationScheme xkos:numberOfLevels "3")
-   (gnc:ResourceClassificationScheme xkos:levels "( gnc:DatasetType gnc:Set gnc:Species )")
-   (gnc:DatasetType a xkos:ClassificationLevel)
-   (gnc:DatasetType skos:prefLabel "The Type of a Dataset which can be a ProbeSet, Genotype, or Phenotype")
-   (gnc:DatasetType xkos:depth "1")
-   (gnc:DatasetType skos:member gnc:Probeset)
-   (gnc:DatasetType skos:member gnc:Genotype)
-   (gnc:DatasetType skos:member gnc:Phenotype)
-   (gnc:Probeset skos:prefLabel "mRNA Assay Datasets")
-   (gnc:Probeset skos:altLabel "ProbeSet")
-   (gnc:Genotype skos:prefLabel "Genotype")
-   (gnc:Genotype skos:altLabel "DNA Markers and SNPs")
-   (gnc:Phenotype skos:prefLabel "Phenotype")
-   (gnc:Phenotype skos:altLabel "Traits and Cofactors")
-   (gnc:Species a xkos:ClassificationLevel)
-   (gnc:Species skos:prefLabel "The species in which this resource belongs")
-   (gnc:Species xkos:depth "3")
-   (gnc:Species xkos:specializes gnc:Set))
-  (triples "gnc:Species"
+(define-transformer gnc:species->gn:species
+  (tables (Species)
+          "WHERE Name != 'monkey'")
+  (triples "gnc:species"
     (set skos:member
-         (string->identifier "" (remap-species-identifiers (field Species Fullname))
-                             #:separator ""
-                             #:proc string-capitalize-first))))
+         (string->identifier "" (remap-species-identifiers (field Species Fullname))))))
 
-(define-transformer classification-scheme-set
-  (tables (InbredSet))
-  (schema-triples
-   (gnc:Set a xkos:ClassificationLevel)
-   (gnc:Set skos:prefLabel "The Type of Set, Ie InbredSet/OutbredSet that a resource can belong to")
-   (gnc:Set xkos:depth "2")
-   (gnc:Set xkos:generalizes gnc:Species))
-  (triples "gnc:Set"
+(define-transformer gnc:set->gn:set
+  (tables (InbredSet)
+          "WHERE public > 0 AND FullName NOT LIKE '%monkey%'")
+  (triples "gnc:set"
     (set skos:member
          (string->identifier
-          "set" (field InbredSet Name InbredSetName)
-          #:separator ""
-          #:proc string-capitalize-first))))
+          "set" (field InbredSet Name InbredSetName) #:separator "_"))))
 
-(define-transformer species
-  (tables (Species))
-  (schema-triples
-   (gnt:family a owl:ObjectProperty)
-   (gnt:family rdfs:domain gnc:Species)
-   (gnt:family skos:definition "This resource belongs to this family")
-   (gnt:shortName a owl:ObjectProperty)
-   (gnt:shortName rdfs:domain gnc:Species)
-   (gnt:shortName skos:definition "The short name of a given resource")
-   (gnt:belongsToSpecies a rdf:property)
-   (gnt:belongsToSpecies rdf:comment "This resource given to this species")
-   (gnt:belongsToSpecies rdf:label "belongsToSpecies"))
+(define-transformer gnc:species->metadata
+  (tables (Species)
+          "WHERE Name != 'monkey'")
   (triples
-      (string->identifier "" (remap-species-identifiers (field Species Fullname))
-                          #:separator ""
-                          #:proc string-capitalize-first)
-    (set skos:inScheme 'gnc:ResourceClassificationScheme)
+      (string->identifier "" (remap-species-identifiers (field Species Fullname)))
+    (set rdf:type 'gnc:species)
     (set rdfs:label (remap-species-identifiers (field Species Fullname)))
     (set skos:prefLabel (field Species MenuName))
     (set skos:altLabel (field Species SpeciesName))
-    (set gnt:shortName (field Species Name))
-    (set gnt:family (field Species Family))
-    (set skos:notation (ontology
-                        'taxon:
-                        (field Species TaxonomyId)))))
+    (set gnt:short_name (field Species Name))
+    (set gnt:has_taxonomic_family (string->identifier "family" (field Species Family) #:separator "_"))
+    (set gnt:has_uniprot_taxon_id (ontology
+                                   'taxon:
+                                   (field Species TaxonomyId)))))
+
+(define-transformer gnc:species->gn:set
+  (tables (InbredSet
+           (left-join Species "ON InbredSet.SpeciesId=Species.Id"))
+          "WHERE public > 0 AND Species.Name != 'monkey'")
+  (triples (string->identifier "" (remap-species-identifiers (field Species Fullname)))
+    (set gnt:has_strain
+         (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_"))))
+
+(define-transformer gn:family->gn:species/metadata
+  (tables (Species)
+          "WHERE Name != 'monkey' GROUP BY FAMILY")
+  (triples (string->identifier "family" (field Species Family) #:separator "_")
+    (set gnt:has_species
+         (string->identifier "" (remap-species-identifiers (field Species Fullname))))
+    (set rdfs:label (field Species Family))
+    (set gnt:has_family_order_id
+         (annotate-field (field Species OrderId)
+                         '^^xsd:integer))))
+
+(define-transformer gn:family->gn:species
+  (tables (Species)
+          "WHERE Name != 'monkey'")
+  (triples (string->identifier "family" (field Species Family) #:separator "_")
+    (set gnt:has_species
+         (string->identifier "" (remap-species-identifiers (field Species Fullname))))))
 
-(define-transformer inbred-set
+
+(define-transformer gn:set->metadata
   (tables (InbredSet
            (left-join Species "ON InbredSet.SpeciesId=Species.Id")
            (left-join MappingMethod
-                      "ON InbredSet.MappingMethodId=MappingMethod.Id")))
-  (schema-triples
-   (gnt:geneticType a owl:ObjectProperty)
-   (gnt:geneticType rdfs:domain gnc:set)
-   (gnt:code a owl:ObjectProperty)
-   (gnt:code rdfs:domain gnc:set)
-   ;; Already defined as an owl prop in species
-   (gnt:family rdfs:domain gnc:Set)
-   (gnt:mappingMethod a owl:ObjectProperty)
-   (gnt:mappingMethod rdfs:domain gnc:set)
-   (gnt:belongsToGroup a rdf:property)
-   (gnt:belongsToGroup rdf:comment "This resource given to this group")
-   (gnt:belongsToGroup rdf:label "belongsToGroup"))
-  (triples (string->identifier
-          "set" (field InbredSet Name InbredSetName)
-          #:separator ""
-          #:proc string-capitalize-first)
-    (set skos:inScheme 'gnc:ResourceClassificationScheme)
+                      "ON InbredSet.MappingMethodId=MappingMethod.Id"))
+          "WHERE public > 0 AND Species.Name != 'monkey'")
+  (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")
+    (set rdf:type 'gnc:set)
     (set rdfs:label (field InbredSet FullName))
     (set skos:prefLabel (field InbredSet Name InbredSetName))
-    (set gnt:geneticType (field InbredSet GeneticType))
-    (set gnt:family (field InbredSet Family))
-    (set gnt:mappingMethod (field MappingMethod Name))
-    (set gnt:code (field InbredSet InbredSetCode))
-    (set xkos:generalizes
-         (string->identifier "" (remap-species-identifiers (field Species Fullname))
-                             #:separator ""
-                             #:proc string-capitalize-first))))
+    (set gnt:genetic_type (field InbredSet GeneticType))
+    (set dct:description (annotate-field (sanitize-rdf-string (field InbredSet description))
+                                         '^^rdf:HTML))
+    (set gnt:uses_mapping_method
+         (string->identifier "mapping_method" (field MappingMethod Name) #:separator "_"))
+    (set gnt:has_set_code (field InbredSet InbredSetCode))
+    (set gnt:has_species
+         (string->identifier "" (remap-species-identifiers (field Species Fullname))))))
+
+(define-transformer gn:set->gn:population
+  (tables (InbredSet)
+          "WHERE Family IS NOT NULL AND FullName NOT LIKE '%monkey%'")
+  (schema-triples
+   (gnt:has_reference_population rdfs:domain gnc:set)
+   (gnt:has_reference_population a owl:ObjectProperty)
+   (gnt:has_reference_population rdfs:comment "This group belongs to this population category.")
+   (gnt:has_reference_population rdfs:label "belongs to population category."))
+  (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")
+    (set gnt:has_reference_population
+         (string->identifier "population" (field InbredSet Family) #:separator "_"))))
+
+(define-transformer gn:population->metadata
+  (tables (InbredSet)
+          "WHERE Family IS NOT NULL AND FullName NOT LIKE '%monkey%' GROUP BY Family")
+  (triples (string->identifier "population" (field InbredSet Family) #:separator "_")
+    (set rdf:type 'gnc:reference_population)
+    (set rdfs:label (field InbredSet Family))
+    (set skos:member 'gnc:population_category)
+    (set gnt:has_population_order_id
+         (annotate-field (field InbredSet FamilyOrder)
+                         '^^xsd:integer))))
+
+(define-transformer gn:population->gn:set
+  (tables (InbredSet)
+          "WHERE Family IS NOT NULL AND FullName NOT LIKE '%monkey%'")
+  (triples (string->identifier "population" (field InbredSet Family) #:separator "_")
+    (set gnt:has_strain
+         (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_"))))
+
+(define-transformer gnc:population_category->gn:population
+  (tables (InbredSet)
+          "WHERE public > 0 AND FullName NOT LIKE '%monkey%' GROUP BY Family")
+  (triples "gnc:population_category"
+    (set gnt:has_reference_population
+         (string->identifier "population" (field InbredSet Family) #:separator "_"))))
+
+(define-transformer gnc:taxonomic_family->gn:family
+  (tables (Species)
+          "WHERE Name != 'monkey' GROUP BY Family")
+  (triples "gnc:taxonomic_family"
+    (set gnt:has_taxonomic_family
+         (string->identifier "family" (field Species Family) #:separator "_"))))
 
 
 
@@ -141,24 +149,36 @@
           read)))
 
   (with-documentation
-   (name "Species Metadata")
+   (name "GN Classification Hierarchy")
    (connection %connection-settings)
    (table-metadata? #f)
    (prefixes
-    '(("gn:" "<http://genenetwork.org/id/>")
-      ("gnc:" "<http://genenetwork.org/category/>")
+    '(("dcat:" "<http://www.w3.org/ns/dcat#>")
+      ("dct:" "<http://purl.org/dc/terms/>")
+      ("gn:" "<http://rdf.genenetwork.org/v1/id/>")
+      ("gnc:" "<http://rdf.genenetwork.org/v1/category/>")
       ("owl:" "<http://www.w3.org/2002/07/owl#>")
-      ("gnt:" "<http://genenetwork.org/term/>")
+      ("gnt:" "<http://rdf.genenetwork.org/v1/term/>")
+      ("schema:" "<https://schema.org/>")
       ("skos:" "<http://www.w3.org/2004/02/skos/core#>")
       ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>")
+      ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
       ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
       ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
       ("taxon:" "<http://purl.uniprot.org/taxonomy/>")))
    (inputs
-    (list classification-scheme-species
-          classification-scheme-set
-          species
-          inbred-set))
+    (list gnc:species->gn:species
+          gnc:set->gn:set
+          gnc:species->metadata
+          gnc:species->gn:set
+          gn:family->gn:species/metadata
+          gn:family->gn:species
+          gn:set->metadata
+          gn:set->gn:population
+          gn:population->metadata
+          gn:population->gn:set
+          gnc:population_category->gn:population
+          gnc:taxonomic_family->gn:family))
    (outputs
     `(#:documentation ,documentation
       #:rdf ,output))))