#! /usr/bin/env guile !# (use-modules (srfi srfi-1) (srfi srfi-26) (ice-9 getopt-long) (ice-9 match) (ice-9 regex) (transform strings) (transform sql) (transform triples) (transform special-forms)) (define (remap-species-identifiers str) "This procedure remaps identifiers to standard binominal. Obviously this should be sorted by correcting the database!" (match str ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"] ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"] ["Macaca mulatta" "Macaca nemestrina"] ["Bat (Glossophaga soricina)" "Glossophaga soricina"] [str str])) ;; Classification Scheme (define-transformer classification-scheme-species (tables (Species)) (schema-triples (gnc:ResourceClassificationScheme a skos:ConceptScheme) (gnc:ResourceClassificationScheme skos:prefLabel "GeneNetwork Classification Scheme For Resources") (gnc:ResourceClassificationScheme xkos:numberOfLevels "3") (gnc:ResourceClassificationScheme xkos:levels "( gnc:DatasetType gnc:Set gnc:Species )") (gnc:DatasetType a xkos:ClassificationLevel) (gnc:DatasetType skos:prefLabel "The Type of a Dataset which can be a ProbeSet, Genotype, or Phenotype") (gnc:DatasetType xkos:depth "1") (gnc:DatasetType skos:member gnc:Probeset) (gnc:DatasetType skos:member gnc:Genotype) (gnc:DatasetType skos:member gnc:Phenotype) (gnc:Probeset skos:prefLabel "mRNA Assay Datasets") (gnc:Probeset skos:altLabel "ProbeSet") (gnc:Genotype skos:prefLabel "Genotype") (gnc:Genotype skos:altLabel "DNA Markers and SNPs") (gnc:Phenotype skos:prefLabel "Phenotype") (gnc:Phenotype skos:altLabel "Traits and Cofactors") (gnc:Species a xkos:ClassificationLevel) (gnc:Species skos:prefLabel "The species in which this resource belongs") (gnc:Species xkos:depth "3") (gnc:Species xkos:specializes gnc:Set)) (triples "gnc:Species" (set skos:member (string->identifier "" (remap-species-identifiers (field Species Fullname)) #:separator "" #:proc string-capitalize-first)))) (define-transformer classification-scheme-set (tables (InbredSet)) (schema-triples (gnc:Set a xkos:ClassificationLevel) (gnc:Set skos:prefLabel "The Type of Set, Ie InbredSet/OutbredSet that a resource can belong to") (gnc:Set xkos:depth "2") (gnc:Set xkos:generalizes gnc:Species)) (triples "gnc:Set" (set skos:member (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "" #:proc string-capitalize-first)))) (define-transformer species (tables (Species)) (schema-triples (gnt:family a owl:ObjectProperty) (gnt:family rdfs:domain gnc:Species) (gnt:family skos:definition "This resource belongs to this family") (gnt:shortName a owl:ObjectProperty) (gnt:shortName rdfs:domain gnc:Species) (gnt:shortName skos:definition "The short name of a given resource") (gnt:belongsToSpecies a rdf:property) (gnt:belongsToSpecies rdf:comment "This resource given to this species") (gnt:belongsToSpecies rdf:label "belongsToSpecies")) (triples (string->identifier "" (remap-species-identifiers (field Species Fullname)) #:separator "" #:proc string-capitalize-first) (set skos:inScheme 'gnc:ResourceClassificationScheme) (set rdfs:label (remap-species-identifiers (field Species Fullname))) (set skos:prefLabel (field Species MenuName)) (set skos:altLabel (field Species SpeciesName)) (set gnt:shortName (field Species Name)) (set gnt:family (field Species Family)) (set skos:notation (ontology 'taxon: (field Species TaxonomyId))))) (define-transformer inbred-set (tables (InbredSet (left-join Species "ON InbredSet.SpeciesId=Species.Id") (left-join MappingMethod "ON InbredSet.MappingMethodId=MappingMethod.Id"))) (schema-triples (gnt:geneticType a owl:ObjectProperty) (gnt:geneticType rdfs:domain gnc:set) (gnt:code a owl:ObjectProperty) (gnt:code rdfs:domain gnc:set) ;; Already defined as an owl prop in species (gnt:family rdfs:domain gnc:Set) (gnt:mappingMethod a owl:ObjectProperty) (gnt:mappingMethod rdfs:domain gnc:set) (gnt:belongsToGroup a rdf:property) (gnt:belongsToGroup rdf:comment "This resource given to this group") (gnt:belongsToGroup rdf:label "belongsToGroup")) (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "" #:proc string-capitalize-first) (set skos:inScheme 'gnc:ResourceClassificationScheme) (set rdfs:label (field InbredSet FullName)) (set skos:prefLabel (field InbredSet Name InbredSetName)) (set gnt:geneticType (field InbredSet GeneticType)) (set gnt:family (field InbredSet Family)) (set gnt:mappingMethod (field MappingMethod Name)) (set gnt:code (field InbredSet InbredSetCode)) (set xkos:generalizes (string->identifier "" (remap-species-identifiers (field Species Fullname)) #:separator "" #:proc string-capitalize-first)))) (let* ((option-spec '((settings (single-char #\s) (value #t)) (output (single-char #\o) (value #t)) (documentation (single-char #\d) (value #t)))) (options (getopt-long (command-line) option-spec)) (settings (option-ref options 'settings #f)) (output (option-ref options 'output #f)) (documentation (option-ref options 'documentation #f)) (%connection-settings (call-with-input-file settings read))) (with-documentation (name "Species Metadata") (connection %connection-settings) (table-metadata? #f) (prefixes '(("gn:" "") ("gnc:" "") ("owl:" "") ("gnt:" "") ("skos:" "") ("xkos:" "") ("rdf:" "") ("rdfs:" "") ("taxon:" ""))) (inputs (list classification-scheme-species classification-scheme-set species inbred-set)) (outputs `(#:documentation ,documentation #:rdf ,output))))