#! /usr/bin/env guile
!#
(use-modules (srfi srfi-1)
(srfi srfi-26)
(ice-9 getopt-long)
(ice-9 match)
(ice-9 regex)
(transform strings)
(transform sql)
(transform triples)
(transform special-forms))
(define (remap-species-identifiers str)
"This procedure remaps identifiers to standard binominal. Obviously this should
be sorted by correcting the database!"
(match str
["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"]
["Oryzias latipes (Japanese medaka)" "Oryzias latipes"]
["Macaca mulatta" "Macaca nemestrina"]
["Bat (Glossophaga soricina)" "Glossophaga soricina"]
[str str]))
;; Classification Scheme
(define-transformer classification-scheme-species
(tables (Species))
(schema-triples
(gnc:ResourceClassificationScheme a skos:ConceptScheme)
(gnc:ResourceClassificationScheme skos:prefLabel "GeneNetwork Classification Scheme For Resources")
(gnc:ResourceClassificationScheme xkos:numberOfLevels "3")
(gnc:ResourceClassificationScheme xkos:levels "( gnc:DatasetType gnc:Set gnc:Species )")
(gnc:DatasetType a xkos:ClassificationLevel)
(gnc:DatasetType skos:prefLabel "The Type of a Dataset which can be a ProbeSet, Genotype, or Phenotype")
(gnc:DatasetType xkos:depth "1")
(gnc:DatasetType skos:member gnc:Probeset)
(gnc:DatasetType skos:member gnc:Genotype)
(gnc:DatasetType skos:member gnc:Phenotype)
(gnc:Probeset skos:prefLabel "mRNA Assay Datasets")
(gnc:Probeset skos:altLabel "ProbeSet")
(gnc:Genotype skos:prefLabel "Genotype")
(gnc:Genotype skos:altLabel "DNA Markers and SNPs")
(gnc:Phenotype skos:prefLabel "Phenotype")
(gnc:Phenotype skos:altLabel "Traits and Cofactors")
(gnc:Species a xkos:ClassificationLevel)
(gnc:Species skos:prefLabel "The species in which this resource belongs")
(gnc:Species xkos:depth "3")
(gnc:Species xkos:specializes gnc:Set))
(triples "gnc:Species"
(set skos:member
(string->identifier "" (remap-species-identifiers (field Species Fullname))
#:separator ""
#:proc string-capitalize-first))))
(define-transformer classification-scheme-set
(tables (InbredSet))
(schema-triples
(gnc:Set a xkos:ClassificationLevel)
(gnc:Set skos:prefLabel "The Type of Set, Ie InbredSet/OutbredSet that a resource can belong to")
(gnc:Set xkos:depth "2")
(gnc:Set xkos:generalizes gnc:Species))
(triples "gnc:Set"
(set skos:member
(string->identifier
"set" (field InbredSet Name InbredSetName)
#:separator ""
#:proc string-capitalize-first))))
(define-transformer species
(tables (Species))
(schema-triples
(gnt:family a owl:ObjectProperty)
(gnt:family rdfs:domain gnc:Species)
(gnt:family skos:definition "This resource belongs to this family")
(gnt:shortName a owl:ObjectProperty)
(gnt:shortName rdfs:domain gnc:Species)
(gnt:shortName skos:definition "The short name of a given resource")
(gnt:belongsToSpecies a rdf:property)
(gnt:belongsToSpecies rdf:comment "This resource given to this species")
(gnt:belongsToSpecies rdf:label "belongsToSpecies"))
(triples
(string->identifier "" (remap-species-identifiers (field Species Fullname))
#:separator ""
#:proc string-capitalize-first)
(set skos:inScheme 'gnc:ResourceClassificationScheme)
(set rdfs:label (remap-species-identifiers (field Species Fullname)))
(set skos:prefLabel (field Species MenuName))
(set skos:altLabel (field Species SpeciesName))
(set gnt:shortName (field Species Name))
(set gnt:family (field Species Family))
(set skos:notation (ontology
'taxon:
(field Species TaxonomyId)))))
(define-transformer inbred-set
(tables (InbredSet
(left-join Species "ON InbredSet.SpeciesId=Species.Id")
(left-join MappingMethod
"ON InbredSet.MappingMethodId=MappingMethod.Id")))
(schema-triples
(gnt:geneticType a owl:ObjectProperty)
(gnt:geneticType rdfs:domain gnc:set)
(gnt:code a owl:ObjectProperty)
(gnt:code rdfs:domain gnc:set)
;; Already defined as an owl prop in species
(gnt:family rdfs:domain gnc:Set)
(gnt:mappingMethod a owl:ObjectProperty)
(gnt:mappingMethod rdfs:domain gnc:set)
(gnt:belongsToGroup a rdf:property)
(gnt:belongsToGroup rdf:comment "This resource given to this group")
(gnt:belongsToGroup rdf:label "belongsToGroup"))
(triples (string->identifier
"set" (field InbredSet Name InbredSetName)
#:separator ""
#:proc string-capitalize-first)
(set skos:inScheme 'gnc:ResourceClassificationScheme)
(set rdfs:label (field InbredSet FullName))
(set skos:prefLabel (field InbredSet Name InbredSetName))
(set gnt:geneticType (field InbredSet GeneticType))
(set gnt:family (field InbredSet Family))
(set gnt:mappingMethod (field MappingMethod Name))
(set gnt:code (field InbredSet InbredSetCode))
(set xkos:generalizes
(string->identifier "" (remap-species-identifiers (field Species Fullname))
#:separator ""
#:proc string-capitalize-first))))
(let* ((option-spec
'((settings (single-char #\s) (value #t))
(output (single-char #\o) (value #t))
(documentation (single-char #\d) (value #t))))
(options (getopt-long (command-line) option-spec))
(settings (option-ref options 'settings #f))
(output (option-ref options 'output #f))
(documentation (option-ref options 'documentation #f))
(%connection-settings
(call-with-input-file settings
read)))
(with-documentation
(name "Species Metadata")
(connection %connection-settings)
(table-metadata? #f)
(prefixes
'(("gn:" "")
("gnc:" "")
("owl:" "")
("gnt:" "")
("skos:" "")
("xkos:" "")
("rdf:" "")
("rdfs:" "")
("taxon:" "")))
(inputs
(list classification-scheme-species
classification-scheme-set
species
inbred-set))
(outputs
`(#:documentation ,documentation
#:rdf ,output))))