From c9a84ecb21c7fcfdb9e5d277bd2a1c43b73f4f9f Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Fri, 25 Aug 2023 18:07:34 +0300 Subject: Add new file with classification hierarchy ontology Signed-off-by: Munyoki Kilyungi --- examples/classification.scm | 152 ++++++++++++++++++++++++++++++++++++++++++ examples/species-metadata.scm | 77 --------------------- 2 files changed, 152 insertions(+), 77 deletions(-) create mode 100755 examples/classification.scm (limited to 'examples') diff --git a/examples/classification.scm b/examples/classification.scm new file mode 100755 index 0000000..64aeef3 --- /dev/null +++ b/examples/classification.scm @@ -0,0 +1,152 @@ +#! /usr/bin/env guile +!# + +(use-modules (srfi srfi-1) + (srfi srfi-26) + (ice-9 getopt-long) + (ice-9 match) + (ice-9 regex) + (transform strings) + (transform sql) + (transform triples) + (transform special-forms)) + + + +(define (remap-species-identifiers str) + "This procedure remaps identifiers to standard binominal. Obviously this should + be sorted by correcting the database!" + (match str + ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"] + ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"] + ["Macaca mulatta" "Macaca nemestrina"] + ["Bat (Glossophaga soricina)" "Glossophaga soricina"] + [str str])) + +;; Classification Scheme +(define-transformer classification-scheme-species + (tables (Species)) + (schema-triples + (gnc:ResourceClassificationScheme a skos:ConceptScheme) + (gnc:ResourceClassificationScheme skos:prefLabel "GeneNetwork Classification Scheme For Resources") + (gnc:ResourceClassificationScheme xkos:numberOfLevels "3") + (gnc:ResourceClassificationScheme xkos:levels "( gnc:Type gnc:Set gnc:Species )") + (gnc:Type a xkos:ClassificationLevel) + (gnc:Type skos:prefLabel "The Type of a Dataset which can be a ProbeSet, Genotype, or Phenotype") + (gnc:Type xkos:depth "1") + (gnc:Type skos:member gn:ProbeSet) + (gnc:Type skos:member gn:Genotype) + (gn:Genotype skos:altLabel "DNA Markers and SNPs") + (gn:Genotype skos:prefLabel "Genotype") + (gnc:Type skos:member gn:Phenotype) + (gn:Phenotype skos:prefLabel "Phenotype") + (gn:Phenotype skos:altLabel "Traits and Cofactors") + (gnc:Species a xkos:ClassificationLevel) + (gnc:Species skos:prefLabel "The species in which this resource belongs") + (gnc:Species xkos:depth "3") + (gnc:Species xkos:specializes gnc:Set)) + (triples "gnc:Species" + (set skos:member (string->identifier "" (remap-species-identifiers (field Species Fullname)) + #:separator "" + #:proc string-capitalize-first)))) + +(define-transformer classification-scheme-set + (tables (InbredSet)) + (schema-triples + (gnc:Set a xkos:ClassificationLevel) + (gnc:Set skos:prefLabel "The Type of Set, Ie InbredSet/OutbredSet that a resource can belong to") + (gnc:Set xkos:depth "2") + (gnc:Set xkos:generalizes gnc:Species)) + (triples "gnc:Set" + (set skos:member + (string->identifier + "set" (field InbredSet Name) + #:separator "" + #:proc string-capitalize-first)))) + +(define-transformer species + (tables (Species)) + (schema-triples + (gnt:family a owl:ObjectProperty) + (gnt:family rdfs:domain gnc:Species) + (gnt:family skos:definition "This resource belongs to this family")) + (triples + (string->identifier "" (remap-species-identifiers (field Species Fullname)) + #:separator "" + #:proc string-capitalize-first) + (set skos:inScheme 'gnc:ResourceClassificationScheme) + (set rdfs:label (field Species FullName)) + (set skos:prefLabel (field Species MenuName)) + (set skos:altLabel (field Species SpeciesName)) + (set skos:altLabel (field Species Name)) + (set gnt:family (field Species Family)) + (set skos:notation (ontology + 'taxon: + (field Species TaxonomyId))))) + +(define-transformer inbred-set + (tables (InbredSet + (left-join Species "ON InbredSet.SpeciesId=Species.Id") + (left-join MappingMethod + "ON InbredSet.MappingMethodId=MappingMethod.Id"))) + (schema-triples + (gnt:geneticType a owl:ObjectProperty) + (gnt:geneticType rdfs:domain gnc:set) + (gnt:code a owl:ObjectProperty) + (gnt:code rdfs:domain gnc:set) + ;; Already defined as an owl prop in species + (gnt:family rdfs:domain gnc:Set) + (gnt:mappingMethod a owl:ObjectProperty) + (gnt:mappingMethod rdfs:domain gnc:set)) + (triples (string->identifier + "set" (field InbredSet Name) + #:separator "" + #:proc string-capitalize-first) + (set skos:inScheme 'gnc:ResourceClassificationScheme) + (set rdfs:label (field InbredSet FullName)) + (set skos:prefLabel (field InbredSet Name)) + (set gnt:geneticType (field InbredSet GeneticType)) + (set gnt:family (field InbredSet Family)) + (set gnt:mappingMethod (field MappingMethod Name)) + (set gnt:code (field InbredSet InbredSetCode)) + (set xkos:generalizes + (string->identifier "" (remap-species-identifiers (field Species Fullname)) + #:separator "" + #:proc string-capitalize-first)))) + + + +(let* ((option-spec + '((settings (single-char #\s) (value #t)) + (output (single-char #\o) (value #t)) + (documentation (single-char #\d) (value #t)))) + (options (getopt-long (command-line) option-spec)) + (settings (option-ref options 'settings #f)) + (output (option-ref options 'output #f)) + (documentation (option-ref options 'documentation #f)) + (%connection-settings + (call-with-input-file settings + read))) + + (with-documentation + (name "Species Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("gn:" "") + ("gnc:" "") + ("owl:" "") + ("gnt:" "") + ("skos:" "") + ("xkos:" "") + ("rdf:" "") + ("rdfs:" "") + ("taxon:" ""))) + (inputs + (list classification-scheme-species + classification-scheme-set + species + inbred-set)) + (outputs + `(#:documentation ,documentation + #:rdf ,output)))) diff --git a/examples/species-metadata.scm b/examples/species-metadata.scm index b330b12..89f2e27 100755 --- a/examples/species-metadata.scm +++ b/examples/species-metadata.scm @@ -21,33 +21,6 @@ ["Bat (Glossophaga soricina)" "Glossophaga soricina"] [str str])) -(define-transformer species - (tables (Species)) - (schema-triples - (gnc:species a skos:Concept) - (gnc:species skos:description "This is a set of controlled terms that are used to describe a given species") - (gnc:species skos:broader gnc:family) - (gnt:binomialName a owl:ObjectProperty) - (gnt:binomialName rdfs:domain gnc:species) - (gnt:family a owl:ObjectProperty) - (gnt:family rdfs:domain gnc:species) - (gnt:family skos:definition "This resource belongs to this family") - (gnt:organism a owl:ObjectProperty) - (gnt:organism rdfs:domain gnc:species) - (gnt:shortName a owl:ObjectProperty) - (gnt:shortName rdfs:domain gnc:species)) - (triples - (string->identifier "" (remap-species-identifiers (field Species Fullname)) - #:separator "" - #:proc string-capitalize-first) - (set rdf:type 'gnc:species) - (set skos:label (field Species SpeciesName)) - (set skos:altLabel (field Species Name)) - (set rdfs:label (field Species MenuName)) - (set gnt:binomialName (field Species FullName)) - (set gnt:family (field Species Family)) - (set gnt:organism (ontology 'taxon: (field Species TaxonomyId))))) - #! The ProbeData table contains StrainID. @@ -132,56 +105,6 @@ At this point it is not very clear how Name, Name2, Symbol and Alias are used. (set rdf:type 'gnc:mappingMethod) (set rdfs:label (field MappingMethod Name)))) - -(define-transformer inbred-set - (tables (InbredSet - (left-join Species "ON InbredSet.SpeciesId=Species.Id") - (left-join MappingMethod - "ON InbredSet.MappingMethodId=MappingMethod.Id"))) - (schema-triples - (gnc:set skos:broader gnc:species) - (gnc:set skos:definition "A set of terms used to describe an set, which can be inbredSet, outbredSet etc etc.") - (gnt:geneticType a owl:ObjectProperty) - (gnt:geneticType rdfs:domain gnc:set) - (gnt:code a owl:ObjectProperty) - (gnt:code rdfs:domain gnc:set) - ;; Already defined as an owl prop in species - (gnt:family rdfs:domain gnc:set) - (gnt:phenotype a owl:ObjectProperty) - (gnt:phenotype rdfs:domain gnc:set) - (gnt:genotype a owl:ObjectProperty) - (gnt:genotype rdfs:domain gnt:inbredSet) - (gnt:mappingMethod a owl:ObjectProperty) - (gnt:mappingMethod rdfs:domain gnc:set)) - (triples (string->identifier - "set" (field InbredSet Name) - #:separator "" - #:proc string-capitalize-first) - (set rdf:type 'gnc:set) - (set rdfs:label (field InbredSet FullName)) - (set skos:altLabel (field InbredSet Name)) - (set gnt:geneticType (field InbredSet GeneticType)) - (set gnt:family (field InbredSet Family)) - (set gnt:mappingMethod (field MappingMethod Name)) - (set gnt:code (field InbredSet InbredSetCode)) - (set gnt:belongsToSpecies - (string->identifier "" (remap-species-identifiers (field Species Fullname)) - #:separator "" - #:proc string-capitalize-first)) - (set gnt:genotype - (field ("IF ((SELECT PublishFreeze.Name FROM PublishFreeze WHERE PublishFreeze.InbredSetId = InbredSet.Id LIMIT 1) IS NOT NULL, 'Traits and Cofactors', '')" genotypeP))) - (set gnt:phenotype - (field ("IF ((SELECT GenoFreeze.Name FROM GenoFreeze WHERE GenoFreeze.InbredSetId = InbredSet.Id LIMIT 1) IS NOT NULL, 'DNA Markers and SNPs', '')" phenotypeP))) - (multiset gnt:hasTissue - (map - (lambda (x) - (string->identifier "tissue" - x)) - (string-split-substring - (field ("(SELECT GROUP_CONCAT(DISTINCT Tissue.Short_Name SEPARATOR'||') AS MolecularTraits FROM ProbeFreeze, ProbeSetFreeze, InbredSet, Tissue, Species WHERE ProbeFreeze.TissueId = Tissue.Id AND ProbeFreeze.InbredSetId = InbredSet.Id AND ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id ORDER BY Tissue.Name)" - molecularTrait)) - "||"))))) - (define-transformer avg-method ;; The Name and Normalization fields seem to be the same. Dump only ;; the Name field. -- cgit v1.2.3