#! /usr/bin/env guile
!#
(use-modules (srfi srfi-1)
(srfi srfi-26)
(ice-9 getopt-long)
(ice-9 match)
(ice-9 regex)
(transform strings)
(transform sql)
(transform triples)
(transform special-forms))
(define (remap-species-identifiers str)
"This procedure remaps identifiers to standard binominal. Obviously this should
be sorted by correcting the database!"
(match str
["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"]
["Oryzias latipes (Japanese medaka)" "Oryzias latipes"]
["Macaca mulatta" "Macaca nemestrina"]
["Bat (Glossophaga soricina)" "Glossophaga soricina"]
[str str]))
;; Classification Scheme
(define-transformer classification-scheme-species
(tables (Species))
(schema-triples
(gnc:resource_classification_scheme a skos:ConceptScheme)
(gnc:resource_classification_scheme skos:prefLabel "GeneNetwork Classification Scheme For Resources which are either defines as a dataset, an inbred group, or a species.")
(gnc:resource_classification_scheme xkos:numberOfLevels "3")
(gnc:resource_classification_scheme xkos:levels "( gnc:dataset_type gnc:set gnc:species )")
(gnc:dataset_type a xkos:ClassificationLevel)
(gnc:dataset_type skos:prefLabel "The Type of a Dataset which can be a ProbeSet, Genotype, or Phenotype")
(gnc:dataset_type xkos:depth "1")
(gnc:dataset_type skos:member gnc:probeset)
(gnc:dataset_type skos:member gnc:genotype)
(gnc:dataset_type skos:member gnc:phenotype)
(gnc:probeset skos:prefLabel "mRNA Assay Datasets")
(gnc:probeset skos:altLabel "ProbeSet")
(gnc:genotype skos:prefLabel "Genotype")
(gnc:genotype skos:altLabel "DNA Markers and SNPs")
(gnc:phenotype skos:prefLabel "Phenotype")
(gnc:phenotype skos:altLabel "Traits and Cofactors")
(gnc:species a xkos:ClassificationLevel)
(gnc:species skos:prefLabel "The species in which this resource belongs")
(gnc:species xkos:depth "3")
(gnc:species xkos:specializes gnc:set))
(triples "gnc:species"
(set skos:member
(string->identifier "" (remap-species-identifiers (field Species Fullname))
#:separator "_"
#:proc string-downcase))))
(define-transformer classification-scheme-set
(tables (InbredSet))
(schema-triples
(gnc:set a xkos:ClassificationLevel)
(gnc:set skos:prefLabel "The Type of Set, Ie InbredSet/OutbredSet that a resource can belong to")
(gnc:set xkos:depth "2")
(gnc:set xkos:generalizes gnc:species))
(triples "gnc:set"
(set skos:member
(string->identifier
"set" (field InbredSet Name InbredSetName)
#:separator "_"
#:proc (lambda (x) x)))))
(define-transformer species
(tables (Species))
(schema-triples
(gnt:family a owl:ObjectProperty)
(gnt:family rdfs:domain gnc:species)
(gnt:family skos:definition "This resource belongs to this family")
(gnt:short_name a owl:ObjectProperty)
(gnt:short_name rdfs:domain gnc:species)
(gnt:short_name skos:definition "The short name of a given resource")
(gnt:belongs_to_species a rdf:property)
(gnt:belongs_to_species rdf:comment "This resource given to this species")
(gnt:belongs_to_species rdf:label "belongsToSpecies"))
(triples
(string->identifier "" (remap-species-identifiers (field Species Fullname))
#:separator "_"
#:proc string-downcase)
(set skos:inScheme 'gnc:resource_classification_scheme)
(set rdfs:label (remap-species-identifiers (field Species Fullname)))
(set skos:prefLabel (field Species MenuName))
(set skos:altLabel (field Species SpeciesName))
(set gnt:short_name (field Species Name))
(set gnt:family (field Species Family))
(set skos:notation (ontology
'taxon:
(field Species TaxonomyId)))))
(define-transformer inbred-set
(tables (InbredSet
(left-join Species "ON InbredSet.SpeciesId=Species.Id")
(left-join MappingMethod
"ON InbredSet.MappingMethodId=MappingMethod.Id")))
(schema-triples
(gnt:genetic_type a owl:ObjectProperty)
(gnt:genetic_type rdfs:domain gnc:set)
(gnt:code a owl:ObjectProperty)
(gnt:code rdfs:domain gnc:set)
;; Already defined as an owl prop in species
(gnt:family rdfs:domain gnc:set)
(gnt:mapping_method a owl:ObjectProperty)
(gnt:mapping_method rdfs:domain gnc:set)
(gnt:belongs_to_group a rdf:property)
(gnt:belongs_to_group rdf:comment "This resource given to this group")
(gnt:belongs_to_group rdf:label "belongs_to_group"))
(triples (string->identifier
"set" (field InbredSet Name InbredSetName)
#:separator "_"
#:proc (lambda (x) x))
(set skos:inScheme 'gnc:resource_classification_scheme)
(set rdfs:label (field InbredSet FullName))
(set skos:prefLabel (field InbredSet Name InbredSetName))
(set gnt:genetic_type (field InbredSet GeneticType))
(set gnt:family (field InbredSet Family))
(set gnt:mapping_method (field MappingMethod Name))
(set gnt:code (field InbredSet InbredSetCode))
(set xkos:generalizes
(string->identifier "" (remap-species-identifiers (field Species Fullname))
#:separator "_"
#:proc string-downcase))))
(let* ((option-spec
'((settings (single-char #\s) (value #t))
(output (single-char #\o) (value #t))
(documentation (single-char #\d) (value #t))))
(options (getopt-long (command-line) option-spec))
(settings (option-ref options 'settings #f))
(output (option-ref options 'output #f))
(documentation (option-ref options 'documentation #f))
(%connection-settings
(call-with-input-file settings
read)))
(with-documentation
(name "Species Metadata")
(connection %connection-settings)
(table-metadata? #f)
(prefixes
'(("gn:" "")
("gnc:" "")
("owl:" "")
("gnt:" "")
("skos:" "")
("xkos:" "")
("rdf:" "")
("rdfs:" "")
("taxon:" "")))
(inputs
(list classification-scheme-species
classification-scheme-set
species
inbred-set))
(outputs
`(#:documentation ,documentation
#:rdf ,output))))