#! /usr/bin/env guile
!#
(use-modules (srfi srfi-1)
(srfi srfi-26)
(ice-9 getopt-long)
(ice-9 match)
(ice-9 regex)
(transform strings)
(transform sql)
(transform triples)
(transform special-forms))
;; Classification Scheme
(define-transformer gnc:species->gn:species
(tables (Species)
"WHERE Name != 'monkey'")
(triples "gnc:species"
(set skos:member
(string->identifier "" (remap-species-identifiers (field Species Fullname))))))
(define-transformer gnc:set->gn:set
(tables (InbredSet)
"WHERE public > 0 AND FullName NOT LIKE '%monkey%'")
(triples "gnc:set"
(set skos:member
(string->identifier
"set" (field InbredSet Name InbredSetName) #:separator "_"))))
(define-transformer gnc:species->metadata
(tables (Species)
"WHERE Name != 'monkey'")
(triples
(string->identifier "" (remap-species-identifiers (field Species Fullname)))
(set rdf:type 'gnc:species)
(set rdfs:label (remap-species-identifiers (field Species Fullname)))
(set skos:prefLabel (field Species MenuName))
(set skos:altLabel (field Species SpeciesName))
(set gnt:short_name (field Species Name))
(set gnt:has_taxonomic_family (string->identifier "family" (field Species Family) #:separator "_"))
(set gnt:has_uniprot_taxon_id (ontology
'taxon:
(field Species TaxonomyId)))))
(define-transformer gnc:species->gn:set
(tables (InbredSet
(left-join Species "ON InbredSet.SpeciesId=Species.Id"))
"WHERE public > 0 AND Species.Name != 'monkey'")
(triples (string->identifier "" (remap-species-identifiers (field Species Fullname)))
(set gnt:has_strain
(string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_"))))
(define-transformer gn:family->gn:species/metadata
(tables (Species)
"WHERE Name != 'monkey' GROUP BY FAMILY")
(triples (string->identifier "family" (field Species Family) #:separator "_")
(set gnt:has_species
(string->identifier "" (remap-species-identifiers (field Species Fullname))))
(set rdfs:label (field Species Family))
(set gnt:has_family_order_id
(annotate-field (field Species OrderId)
'^^xsd:integer))))
(define-transformer gn:family->gn:species
(tables (Species)
"WHERE Name != 'monkey'")
(triples (string->identifier "family" (field Species Family) #:separator "_")
(set gnt:has_species
(string->identifier "" (remap-species-identifiers (field Species Fullname))))))
(define-transformer gn:set->metadata
(tables (InbredSet
(left-join Species "ON InbredSet.SpeciesId=Species.Id")
(left-join MappingMethod
"ON InbredSet.MappingMethodId=MappingMethod.Id"))
"WHERE public > 0 AND Species.Name != 'monkey'")
(triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")
(set rdf:type 'gnc:set)
(set rdfs:label (field InbredSet FullName))
(set skos:prefLabel (field InbredSet Name InbredSetName))
(set gnt:genetic_type (field InbredSet GeneticType))
(set dct:description (annotate-field (sanitize-rdf-string (field InbredSet description))
'^^rdf:HTML))
(set gnt:uses_mapping_method
(string->identifier "mapping_method" (field MappingMethod Name) #:separator "_"))
(set gnt:has_set_code (field InbredSet InbredSetCode))
(set gnt:has_species
(string->identifier "" (remap-species-identifiers (field Species Fullname))))))
(define-transformer gn:set->gn:population
(tables (InbredSet)
"WHERE Family IS NOT NULL AND FullName NOT LIKE '%monkey%'")
(schema-triples
(gnt:has_reference_population rdfs:domain gnc:set)
(gnt:has_reference_population a owl:ObjectProperty)
(gnt:has_reference_population rdfs:comment "This group belongs to this population category.")
(gnt:has_reference_population rdfs:label "belongs to population category."))
(triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")
(set gnt:has_reference_population
(string->identifier "population" (field InbredSet Family) #:separator "_"))))
(define-transformer gn:population->metadata
(tables (InbredSet)
"WHERE Family IS NOT NULL AND FullName NOT LIKE '%monkey%' GROUP BY Family")
(triples (string->identifier "population" (field InbredSet Family) #:separator "_")
(set rdf:type 'gnc:reference_population)
(set rdfs:label (field InbredSet Family))
(set skos:member 'gnc:population_category)
(set gnt:has_population_order_id
(annotate-field (field InbredSet FamilyOrder)
'^^xsd:integer))))
(define-transformer gn:population->gn:set
(tables (InbredSet)
"WHERE Family IS NOT NULL AND FullName NOT LIKE '%monkey%'")
(triples (string->identifier "population" (field InbredSet Family) #:separator "_")
(set gnt:has_strain
(string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_"))))
(define-transformer gnc:population_category->gn:population
(tables (InbredSet)
"WHERE public > 0 AND FullName NOT LIKE '%monkey%' GROUP BY Family")
(triples "gnc:population_category"
(set gnt:has_reference_population
(string->identifier "population" (field InbredSet Family) #:separator "_"))))
(define-transformer gnc:taxonomic_family->gn:family
(tables (Species)
"WHERE Name != 'monkey' GROUP BY Family")
(triples "gnc:taxonomic_family"
(set gnt:has_taxonomic_family
(string->identifier "family" (field Species Family) #:separator "_"))))
(let* ((option-spec
'((settings (single-char #\s) (value #t))
(output (single-char #\o) (value #t))
(documentation (single-char #\d) (value #t))))
(options (getopt-long (command-line) option-spec))
(settings (option-ref options 'settings #f))
(output (option-ref options 'output #f))
(documentation (option-ref options 'documentation #f))
(%connection-settings
(call-with-input-file settings
read)))
(with-documentation
(name "GN Classification Hierarchy")
(connection %connection-settings)
(table-metadata? #f)
(prefixes
'(("dcat:" "<http://www.w3.org/ns/dcat#>")
("dct:" "<http://purl.org/dc/terms/>")
("gn:" "<http://rdf.genenetwork.org/v1/id/>")
("gnc:" "<http://rdf.genenetwork.org/v1/category/>")
("owl:" "<http://www.w3.org/2002/07/owl#>")
("gnt:" "<http://rdf.genenetwork.org/v1/term/>")
("schema:" "<https://schema.org/>")
("skos:" "<http://www.w3.org/2004/02/skos/core#>")
("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>")
("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
("taxon:" "<http://purl.uniprot.org/taxonomy/>")))
(inputs
(list gnc:species->gn:species
gnc:set->gn:set
gnc:species->metadata
gnc:species->gn:set
gn:family->gn:species/metadata
gn:family->gn:species
gn:set->metadata
gn:set->gn:population
gn:population->metadata
gn:population->gn:set
gnc:population_category->gn:population
gnc:taxonomic_family->gn:family))
(outputs
`(#:documentation ,documentation
#:rdf ,output))))