#! /usr/bin/env guile !# (use-modules (srfi srfi-1) (srfi srfi-26) (ice-9 getopt-long) (ice-9 match) (ice-9 regex) (transform strings) (transform sql) (transform triples) (transform special-forms)) ;; Classification Scheme (define-transformer gnc:species->gn:species (tables (Species) "WHERE Name != 'monkey'") (schema-triples (gnc:resource_classification_scheme a skos:ConceptScheme) (gnc:resource_classification_scheme skos:prefLabel "GeneNetwork Resource Classification Scheme") (gnc:resource_classification_scheme skos:definition "A hierarchical classification scheme for organizing GeneNetwork resources by dataset type, resource set (inbredset group), or species.") (gnc:resource_classification_scheme xkos:numberOfLevels "4") (gnc:resource_classification_scheme xkos:levels gnc:taxonomic_family) (gnc:resource_classification_scheme xkos:levels gnc:species) (gnc:resource_classification_scheme xkos:levels gnc:population_category) (gnc:resource_classification_scheme xkos:levels gnc:set) (gnc:population_category a xkos:ClassificationLevel) (gnc:population_category skos:inScheme gnc:resource_classification_scheme) (gnc:population_category xkos:previousLevel gnc:species) (gnc:population_category xkos:nextLevel gnc:set) (gnc:population_category skos:prefLabel "Species") (gnc:population_category rdfs:label "Population Category") (gnc:population_category xkos:depth "3") (gnt:population_category skos:definition "Classification of genetic populations by breeding design and data aggregation.") (gnc:species a xkos:ClassificationLevel) (gnc:species skos:inScheme gnc:resource_classification_scheme) (gnc:species xkos:previousLevel gnc:taxonomic_family) (gnc:species xkos:nextLevel gnc:population_category) (gnc:species skos:prefLabel "Species") (gnc:species skos:definition "A classification level that that associates a given resource to a species in GeneNetwork.") (gnc:species xkos:depth "2")) (triples "gnc:species" (set skos:member (string->identifier "" (remap-species-identifiers (field Species Fullname)))))) (define-transformer gnc:set->gn:set (tables (InbredSet) "WHERE public > 0 AND FullName NOT LIKE '%monkey%'") (schema-triples (gnc:set a xkos:ClassificationLevel) (gnc:set skos:inScheme gnc:resource_classification_scheme) (gnc:set xkos:previousLevel gnc:population_category) (gnc:set skos:prefLabel "InbredSet Group") (gnc:set skos:definition "A category representing groups of genetically related strains or individuals (inbred sets, recombinant inbred lines, etc.).") (gnc:set xkos:depth "4")) (triples "gnc:set" (set skos:member (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")))) (define-transformer gnc:species->metadata (tables (Species) "WHERE Name != 'monkey'") (schema-triples (gnt:has_uniprot_taxon_id a owl:ObjectProperty) (gnt:has_uniprot_taxon_id rdfs:label "has uniprot taxonomic id") (gnt:has_taxonomic_family a owl:ObjectProperty) (gnt:has_taxonomic_family rdfs:label "has family") (gnt:has_taxonomic_family skos:definition "Links a species to its taxonomic family") (gnt:has_taxonomic_family schema:domainIncludes gnc:species) (gnt:has_taxonomic_family schema:domainIncludes gnc:set) (gnt:short_name a owl:DatatypeProperty) (gnt:short_name rdfs:label "has short name") (gnt:short_name rdfs:domain gnc:species) (gnt:short_name skos:definition "The short name of a given resource") (gnt:has_species a owl:ObjectProperty) (gnt:has_species rdf:comment "This resource belongs to this species") (gnt:has_species rdfs:label "belongs to species") (gnt:has_species rdfs:range gnc:species)) (triples (string->identifier "" (remap-species-identifiers (field Species Fullname))) (set rdf:type 'gnc:species) (set rdfs:label (remap-species-identifiers (field Species Fullname))) (set skos:prefLabel (field Species MenuName)) (set skos:altLabel (field Species SpeciesName)) (set gnt:short_name (field Species Name)) (set gnt:has_taxonomic_family (string->identifier "family" (field Species Family) #:separator "_")) (set gnt:has_uniprot_taxon_id (ontology 'taxon: (field Species TaxonomyId))))) (define-transformer gnc:species->gn:set (tables (InbredSet (left-join Species "ON InbredSet.SpeciesId=Species.Id")) "WHERE public > 0 AND Species.Name != 'monkey'") (schema-triples (gnt:has_strain a owl:ObjectProperty) (gnt:has_strain rdfs:range gnc:set) (gnt:has_strain rdfs:domain gnc:species) (gnt:has_strain rdfs:label "this resource belongs to this strain.") (gnt:has_strain skos:definition "Lists all strains that belong to this resource.")) (triples (string->identifier "" (remap-species-identifiers (field Species Fullname))) (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")))) (define-transformer gn:family->gn:species/metadata (tables (Species) "WHERE Name != 'monkey' GROUP BY FAMILY") (schema-triples (gnc:taxonomic_family a xkos:ClassificationLevel) (gnc:taxonomic_family skos:inScheme gnc:resource_classification_scheme) (gnc:taxonomic_family skos:prefLabel "Family") (gnc:taxonomic_family skos:definition "An organizational classification level used in GeneNetwork to group resources into families.") (gnc:taxonomic_family xkos:depth "1") (gnc:taxonomic_family xkos:nextLevel gnc:species) (gnt:has_family_order_id a owl:DatatypeProperty) (gnt:has_family_order_id rdfs:range xsd:integer)) (triples (string->identifier "family" (field Species Family) #:separator "_") (set gnt:has_species (string->identifier "" (remap-species-identifiers (field Species Fullname)))) (set rdfs:label (field Species Family)) (set gnt:has_family_order_id (annotate-field (field Species OrderId) '^^xsd:integer)))) (define-transformer gn:family->gn:species (tables (Species) "WHERE Name != 'monkey'") (schema-triples (gnt:has_family_order_id a owl:DatatypeProperty)) (triples (string->identifier "family" (field Species Family) #:separator "_") (set gnt:has_species (string->identifier "" (remap-species-identifiers (field Species Fullname)))))) (define-transformer gn:set->metadata (tables (InbredSet (left-join Species "ON InbredSet.SpeciesId=Species.Id") (left-join MappingMethod "ON InbredSet.MappingMethodId=MappingMethod.Id")) "WHERE public > 0 AND Species.Name != 'monkey'") (schema-triples (gnt:genetic_type a owl:DatatypeProperty) (gnt:genetic_type rdfs:label "has genetic type") (gnt:genetic_type skos:definition "Describes the genetic architecture of a resource set (e.g., intercross, riset).") (gnt:genetic_type rdfs:domain gnc:set) (gnt:genetic_type rdfs:range xsd:string) (gnt:has_set_code a owl:DatatypeProperty) (gnt:has_set_code rdfs:label "has set code") (gnt:has_set_code skos:definition "Provides a unique identifier code for a resource set.") (gnt:has_set_code rdfs:domain gnc:set) (gnt:has_set_code rdfs:range xsd:string) (gnt:uses_mapping_method a owl:ObjectProperty) (gnt:uses_mapping_method rdfs:label "mapping method") (gnt:uses_mapping_method rdfs:domain gnc:set) (gnt:uses_mapping_method rdfs:range gnc:mapping_method) (gnt:uses_mapping_method rdfs:comment "The method used to map genetic or experimental data for this resource.") (gnt:has_strain a owl:ObjectProperty) (gnt:has_strain rdf:comment "Indicates the group the resources belongs to") (gnt:has_strain schema:domainIncludes dcat:Dataset) (gnt:has_strain schema:domainIncludes gnc:species) (gnt:has_strain rdfs:range gnc:set) (gnt:has_strain rdfs:label "belongs-to-group")) (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_") (set rdf:type 'gnc:set) (set rdfs:label (field InbredSet FullName)) (set skos:prefLabel (field InbredSet Name InbredSetName)) (set gnt:genetic_type (field InbredSet GeneticType)) (set dct:description (annotate-field (sanitize-rdf-string (field InbredSet description)) '^^rdf:HTML)) (set gnt:uses_mapping_method (string->identifier "mapping_method" (field MappingMethod Name) #:separator "_")) (set gnt:has_set_code (field InbredSet InbredSetCode)) (set gnt:has_species (string->identifier "" (remap-species-identifiers (field Species Fullname)))))) (define-transformer gn:set->gn:population (tables (InbredSet) "WHERE Family IS NOT NULL AND FullName NOT LIKE '%monkey%'") (schema-triples (gnt:has_reference_population rdfs:domain gnc:set) (gnt:has_reference_population a owl:ObjectProperty) (gnt:has_reference_population rdfs:comment "This group belongs to this population category.") (gnt:has_reference_population rdfs:label "belongs to population category.")) (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_") (set gnt:has_reference_population (string->identifier "population" (field InbredSet Family) #:separator "_")))) (define-transformer gn:population->metadata (tables (InbredSet) "WHERE Family IS NOT NULL AND FullName NOT LIKE '%monkey%' GROUP BY Family") (schema-triples (gnc:reference_population a skos:Concept) (gnc:reference_population skos:inScheme gnc:population_category) (gnc:reference_population skos:prefLabel "Reference population") (gnc:reference_population skos:definition "A genetic population")) (triples (string->identifier "population" (field InbredSet Family) #:separator "_") (set rdf:type 'gnc:reference_population) (set rdfs:label (field InbredSet Family)) (set skos:member 'gnc:population_category) (set gnt:has_population_order_id (annotate-field (field InbredSet FamilyOrder) '^^xsd:integer)))) (define-transformer gn:population->gn:set (tables (InbredSet) "WHERE Family IS NOT NULL AND FullName NOT LIKE '%monkey%'") (triples (string->identifier "population" (field InbredSet Family) #:separator "_") (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")))) (define-transformer gnc:population_category->gn:population (tables (InbredSet) "WHERE public > 0 AND FullName NOT LIKE '%monkey%' GROUP BY Family") (triples "gnc:population_category" (set gnt:has_reference_population (string->identifier "population" (field InbredSet Family) #:separator "_")))) (define-transformer gnc:taxonomic_family->gn:family (tables (Species) "WHERE Name != 'monkey' GROUP BY Family") (schema-triples (gnt:assigned_species rdfs:domain gnc:set) (gnt:assigned_species a owl:ObjectProperty) (gnt:assigned_species rdfs:label "These families have been assigned to these species")) (triples "gnc:taxonomic_family" (set gnt:has_taxonomic_family (string->identifier "family" (field Species Family) #:separator "_")))) (let* ((option-spec '((settings (single-char #\s) (value #t)) (output (single-char #\o) (value #t)) (documentation (single-char #\d) (value #t)))) (options (getopt-long (command-line) option-spec)) (settings (option-ref options 'settings #f)) (output (option-ref options 'output #f)) (documentation (option-ref options 'documentation #f)) (%connection-settings (call-with-input-file settings read))) (with-documentation (name "Species Metadata") (connection %connection-settings) (table-metadata? #f) (prefixes '(("dcat:" "") ("dct:" "") ("gn:" "") ("gnc:" "") ("owl:" "") ("gnt:" "") ("schema:" "") ("skos:" "") ("xkos:" "") ("xsd:" "") ("rdf:" "") ("rdfs:" "") ("taxon:" ""))) (inputs (list gnc:species->gn:species gnc:set->gn:set gnc:species->metadata gnc:species->gn:set gn:family->gn:species/metadata gn:family->gn:species gn:set->metadata gn:set->gn:population gn:population->metadata gn:population->gn:set gnc:population_category->gn:population gnc:taxonomic_family->gn:family)) (outputs `(#:documentation ,documentation #:rdf ,output))))