diff options
Diffstat (limited to 'examples/classification.scm')
| -rwxr-xr-x | examples/classification.scm | 228 |
1 files changed, 124 insertions, 104 deletions
diff --git a/examples/classification.scm b/examples/classification.scm index 3024af6..130bec8 100755 --- a/examples/classification.scm +++ b/examples/classification.scm @@ -13,118 +13,126 @@ -(define (remap-species-identifiers str) - "This procedure remaps identifiers to standard binominal. Obviously this should - be sorted by correcting the database!" - (match str - ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"] - ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"] - ["Macaca mulatta" "Macaca nemestrina"] - ["Bat (Glossophaga soricina)" "Glossophaga soricina"] - [str str])) - ;; Classification Scheme -(define-transformer classification-scheme-species - (tables (Species)) - (schema-triples - (gnc:ResourceClassificationScheme a skos:ConceptScheme) - (gnc:ResourceClassificationScheme skos:prefLabel "GeneNetwork Classification Scheme For Resources") - (gnc:ResourceClassificationScheme xkos:numberOfLevels "3") - (gnc:ResourceClassificationScheme xkos:levels "( gnc:DatasetType gnc:Set gnc:Species )") - (gnc:DatasetType a xkos:ClassificationLevel) - (gnc:DatasetType skos:prefLabel "The Type of a Dataset which can be a ProbeSet, Genotype, or Phenotype") - (gnc:DatasetType xkos:depth "1") - (gnc:DatasetType skos:member gnc:Probeset) - (gnc:DatasetType skos:member gnc:Genotype) - (gnc:DatasetType skos:member gnc:Phenotype) - (gnc:Probeset skos:prefLabel "mRNA Assay Datasets") - (gnc:Probeset skos:altLabel "ProbeSet") - (gnc:Genotype skos:prefLabel "Genotype") - (gnc:Genotype skos:altLabel "DNA Markers and SNPs") - (gnc:Phenotype skos:prefLabel "Phenotype") - (gnc:Phenotype skos:altLabel "Traits and Cofactors") - (gnc:Species a xkos:ClassificationLevel) - (gnc:Species skos:prefLabel "The species in which this resource belongs") - (gnc:Species xkos:depth "3") - (gnc:Species xkos:specializes gnc:Set)) - (triples "gnc:Species" +(define-transformer gnc:species->gn:species + (tables (Species) + "WHERE Name != 'monkey'") + (triples "gnc:species" (set skos:member - (string->identifier "" (remap-species-identifiers (field Species Fullname)) - #:separator "" - #:proc string-capitalize-first)))) + (string->identifier "" (remap-species-identifiers (field Species Fullname)))))) -(define-transformer classification-scheme-set - (tables (InbredSet)) - (schema-triples - (gnc:Set a xkos:ClassificationLevel) - (gnc:Set skos:prefLabel "The Type of Set, Ie InbredSet/OutbredSet that a resource can belong to") - (gnc:Set xkos:depth "2") - (gnc:Set xkos:generalizes gnc:Species)) - (triples "gnc:Set" +(define-transformer gnc:set->gn:set + (tables (InbredSet) + "WHERE public > 0 AND FullName NOT LIKE '%monkey%'") + (triples "gnc:set" (set skos:member (string->identifier - "set" (field InbredSet Name InbredSetName) - #:separator "" - #:proc string-capitalize-first)))) + "set" (field InbredSet Name InbredSetName) #:separator "_")))) -(define-transformer species - (tables (Species)) - (schema-triples - (gnt:family a owl:ObjectProperty) - (gnt:family rdfs:domain gnc:Species) - (gnt:family skos:definition "This resource belongs to this family") - (gnt:shortName a owl:ObjectProperty) - (gnt:shortName rdfs:domain gnc:Species) - (gnt:shortName skos:definition "The short name of a given resource") - (gnt:belongsToSpecies a rdf:property) - (gnt:belongsToSpecies rdf:comment "This resource given to this species") - (gnt:belongsToSpecies rdf:label "belongsToSpecies")) +(define-transformer gnc:species->metadata + (tables (Species) + "WHERE Name != 'monkey'") (triples - (string->identifier "" (remap-species-identifiers (field Species Fullname)) - #:separator "" - #:proc string-capitalize-first) - (set skos:inScheme 'gnc:ResourceClassificationScheme) + (string->identifier "" (remap-species-identifiers (field Species Fullname))) + (set rdf:type 'gnc:species) (set rdfs:label (remap-species-identifiers (field Species Fullname))) (set skos:prefLabel (field Species MenuName)) (set skos:altLabel (field Species SpeciesName)) - (set gnt:shortName (field Species Name)) - (set gnt:family (field Species Family)) - (set skos:notation (ontology - 'taxon: - (field Species TaxonomyId))))) + (set gnt:short_name (field Species Name)) + (set gnt:has_taxonomic_family (string->identifier "family" (field Species Family) #:separator "_")) + (set gnt:has_uniprot_taxon_id (ontology + 'taxon: + (field Species TaxonomyId))))) + +(define-transformer gnc:species->gn:set + (tables (InbredSet + (left-join Species "ON InbredSet.SpeciesId=Species.Id")) + "WHERE public > 0 AND Species.Name != 'monkey'") + (triples (string->identifier "" (remap-species-identifiers (field Species Fullname))) + (set gnt:has_strain + (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")))) + +(define-transformer gn:family->gn:species/metadata + (tables (Species) + "WHERE Name != 'monkey' GROUP BY FAMILY") + (triples (string->identifier "family" (field Species Family) #:separator "_") + (set gnt:has_species + (string->identifier "" (remap-species-identifiers (field Species Fullname)))) + (set rdfs:label (field Species Family)) + (set gnt:has_family_order_id + (annotate-field (field Species OrderId) + '^^xsd:integer)))) + +(define-transformer gn:family->gn:species + (tables (Species) + "WHERE Name != 'monkey'") + (triples (string->identifier "family" (field Species Family) #:separator "_") + (set gnt:has_species + (string->identifier "" (remap-species-identifiers (field Species Fullname)))))) -(define-transformer inbred-set + +(define-transformer gn:set->metadata (tables (InbredSet (left-join Species "ON InbredSet.SpeciesId=Species.Id") (left-join MappingMethod - "ON InbredSet.MappingMethodId=MappingMethod.Id"))) - (schema-triples - (gnt:geneticType a owl:ObjectProperty) - (gnt:geneticType rdfs:domain gnc:set) - (gnt:code a owl:ObjectProperty) - (gnt:code rdfs:domain gnc:set) - ;; Already defined as an owl prop in species - (gnt:family rdfs:domain gnc:Set) - (gnt:mappingMethod a owl:ObjectProperty) - (gnt:mappingMethod rdfs:domain gnc:set) - (gnt:belongsToGroup a rdf:property) - (gnt:belongsToGroup rdf:comment "This resource given to this group") - (gnt:belongsToGroup rdf:label "belongsToGroup")) - (triples (string->identifier - "set" (field InbredSet Name InbredSetName) - #:separator "" - #:proc string-capitalize-first) - (set skos:inScheme 'gnc:ResourceClassificationScheme) + "ON InbredSet.MappingMethodId=MappingMethod.Id")) + "WHERE public > 0 AND Species.Name != 'monkey'") + (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_") + (set rdf:type 'gnc:set) (set rdfs:label (field InbredSet FullName)) (set skos:prefLabel (field InbredSet Name InbredSetName)) - (set gnt:geneticType (field InbredSet GeneticType)) - (set gnt:family (field InbredSet Family)) - (set gnt:mappingMethod (field MappingMethod Name)) - (set gnt:code (field InbredSet InbredSetCode)) - (set xkos:generalizes - (string->identifier "" (remap-species-identifiers (field Species Fullname)) - #:separator "" - #:proc string-capitalize-first)))) + (set gnt:genetic_type (field InbredSet GeneticType)) + (set dct:description (annotate-field (sanitize-rdf-string (field InbredSet description)) + '^^rdf:HTML)) + (set gnt:uses_mapping_method + (string->identifier "mapping_method" (field MappingMethod Name) #:separator "_")) + (set gnt:has_set_code (field InbredSet InbredSetCode)) + (set gnt:has_species + (string->identifier "" (remap-species-identifiers (field Species Fullname)))))) + +(define-transformer gn:set->gn:population + (tables (InbredSet) + "WHERE Family IS NOT NULL AND FullName NOT LIKE '%monkey%'") + (schema-triples + (gnt:has_reference_population rdfs:domain gnc:set) + (gnt:has_reference_population a owl:ObjectProperty) + (gnt:has_reference_population rdfs:comment "This group belongs to this population category.") + (gnt:has_reference_population rdfs:label "belongs to population category.")) + (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_") + (set gnt:has_reference_population + (string->identifier "population" (field InbredSet Family) #:separator "_")))) + +(define-transformer gn:population->metadata + (tables (InbredSet) + "WHERE Family IS NOT NULL AND FullName NOT LIKE '%monkey%' GROUP BY Family") + (triples (string->identifier "population" (field InbredSet Family) #:separator "_") + (set rdf:type 'gnc:reference_population) + (set rdfs:label (field InbredSet Family)) + (set skos:member 'gnc:population_category) + (set gnt:has_population_order_id + (annotate-field (field InbredSet FamilyOrder) + '^^xsd:integer)))) + +(define-transformer gn:population->gn:set + (tables (InbredSet) + "WHERE Family IS NOT NULL AND FullName NOT LIKE '%monkey%'") + (triples (string->identifier "population" (field InbredSet Family) #:separator "_") + (set gnt:has_strain + (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")))) + +(define-transformer gnc:population_category->gn:population + (tables (InbredSet) + "WHERE public > 0 AND FullName NOT LIKE '%monkey%' GROUP BY Family") + (triples "gnc:population_category" + (set gnt:has_reference_population + (string->identifier "population" (field InbredSet Family) #:separator "_")))) + +(define-transformer gnc:taxonomic_family->gn:family + (tables (Species) + "WHERE Name != 'monkey' GROUP BY Family") + (triples "gnc:taxonomic_family" + (set gnt:has_taxonomic_family + (string->identifier "family" (field Species Family) #:separator "_")))) @@ -141,24 +149,36 @@ read))) (with-documentation - (name "Species Metadata") + (name "GN Classification Hierarchy") (connection %connection-settings) (table-metadata? #f) (prefixes - '(("gn:" "<http://genenetwork.org/id/>") - ("gnc:" "<http://genenetwork.org/category/>") + '(("dcat:" "<http://www.w3.org/ns/dcat#>") + ("dct:" "<http://purl.org/dc/terms/>") + ("gn:" "<http://rdf.genenetwork.org/v1/id/>") + ("gnc:" "<http://rdf.genenetwork.org/v1/category/>") ("owl:" "<http://www.w3.org/2002/07/owl#>") - ("gnt:" "<http://genenetwork.org/term/>") + ("gnt:" "<http://rdf.genenetwork.org/v1/term/>") + ("schema:" "<https://schema.org/>") ("skos:" "<http://www.w3.org/2004/02/skos/core#>") ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>") + ("xsd:" "<http://www.w3.org/2001/XMLSchema#>") ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") ("taxon:" "<http://purl.uniprot.org/taxonomy/>"))) (inputs - (list classification-scheme-species - classification-scheme-set - species - inbred-set)) + (list gnc:species->gn:species + gnc:set->gn:set + gnc:species->metadata + gnc:species->gn:set + gn:family->gn:species/metadata + gn:family->gn:species + gn:set->metadata + gn:set->gn:population + gn:population->metadata + gn:population->gn:set + gnc:population_category->gn:population + gnc:taxonomic_family->gn:family)) (outputs `(#:documentation ,documentation #:rdf ,output)))) |
