#! /usr/bin/env guile !# (use-modules (srfi srfi-1) (srfi srfi-26) (ice-9 getopt-long) (ice-9 match) (ice-9 regex) (transform strings) (transform sql) (transform triples) (transform special-forms)) ;; Classification Scheme (define-transformer classification-scheme-species (tables (Species)) (schema-triples (gnc:resource_classification_scheme a skos:ConceptScheme) (gnc:resource_classification_scheme skos:prefLabel "GeneNetwork Resource Classification Scheme") (gnc:resource_classification_scheme skos:definition "A hierarchical classification scheme for organizing GeneNetwork resources by dataset type, resource set (inbredset group), or species.") (gnc:resource_classification_scheme xkos:numberOfLevels "3") (gnc:resource_classification_scheme xkos:levels gnc:dataset_type) (gnc:resource_classification_scheme xkos:levels gnc:set) (gnc:resource_classification_scheme xkos:levels gnc:species) (gnc:dataset_type a xkos:ClassificationLevel) (gnc:dataset_type skos:inScheme gnc:resource_classification_scheme) (gnc:dataset_type skos:prefLabel "Dataset Type") (gnc:dataset_type skos:definition "Classification level describing the biological or experimental nature of a dataset. A dataset can either be a probeSet, a genotype or a phenotype.") (gnc:dataset_type xkos:depth "1") (gnc:dataset_type xkos:nextLevel gnc:set) (gnc:dataset_type skos:member gnc:probeset) (gnc:dataset_type skos:member gnc:genotype) (gnc:dataset_type skos:member gnc:phenotype) (gnc:probeset a skos:Concept) (gnc:probeset skos:inScheme gnc:dataset_type) (gnc:probeset skos:prefLabel "Transcriptomic Datasets") (gnc:probeset skos:altLabel "ProbeSet") (gnc:probeset skos:definition "A category representing microarray or sequencing probe sets that measure gene expression or other molecular traits.") (gnc:probeset skos:note "Individual probe sets are too numerous to list explicitly in this ontology but are available through the GeneNetwork API.") (gnc:genotype a skos:Concept) (gnc:genotype skos:inScheme gnc:dataset_type) (gnc:genotype skos:prefLabel "Genotype Datasets") (gnc:genotype skos:altLabel "Genotype") (gnc:genotype skos:definition "A category representing genetic marker or variant datasets used for genetic mapping.") (gnc:phenotype a skos:Concept) (gnc:phenotype skos:inScheme gnc:dataset_type) (gnc:phenotype skos:prefLabel "Phenotype Datasets") (gnc:phenotype skos:altLabel "Phenotype") (gnc:phenotype skos:definition "A category representing measured traits or phenotypes for genetic analysis.") (gnc:species a xkos:ClassificationLevel) (gnc:species skos:inScheme gnc:resource_classification_scheme) (gnc:species xkos:previousLevel gnc:set) (gnc:species skos:prefLabel "Species") (gnc:species skos:definition "A classification level that that associates a given resource to a species in GeneNetwork.") (gnc:species xkos:depth "3")) (triples "gnc:species" (set skos:member (string->identifier "" (remap-species-identifiers (field Species Fullname)))))) (define-transformer classification-scheme-set (tables (InbredSet) "WHERE public > 0") (schema-triples (gnc:set a xkos:ClassificationLevel) (gnc:set skos:inScheme gnc:resource_classification_scheme) (gnc:set xkos:nextLevel gnc:species) (gnc:set xkos:previousLevel gnc:dataset_type) (gnc:set skos:prefLabel "InbredSet Group") (gnc:set skos:definition "A category representing groups of genetically related strains or individuals (inbred sets, recombinant inbred lines, etc.).") (gnc:set xkos:depth "2")) (triples "gnc:set" (set skos:member (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")))) (define-transformer species (tables (Species)) (schema-triples (gnt:has_uniprot_taxon_id a owl:ObjectProperty) (gnt:has_uniprot_taxon_id rdfs:label "has uniprot taxonomic id") (gnt:has_family a owl:ObjectProperty) (gnt:has_family rdfs:label "has family") (gnt:has_family skos:definition "Links a species to its taxonomic family") (gnt:has_family schema:domainIncludes gnc:species) (gnt:has_family schema:domainIncludes gnc:set) (gnt:short_name a owl:DatatypeProperty) (gnt:short_name rdfs:label "has short name") (gnt:short_name rdfs:domain gnc:species) (gnt:short_name skos:definition "The short name of a given resource") (gnt:belongs_to_species a owl:ObjectProperty) (gnt:belongs_to_species rdf:comment "This resource belongs to this species") (gnt:belongs_to_species rdfs:label "belongs to species") (gnt:belongs_to_species rdfs:range gnc:species)) (triples (string->identifier "" (remap-species-identifiers (field Species Fullname))) (set rdf:type 'gnc:species) (set rdfs:label (remap-species-identifiers (field Species Fullname))) (set skos:prefLabel (field Species MenuName)) (set skos:altLabel (field Species SpeciesName)) (set gnt:short_name (field Species Name)) (set gnt:has_family (field Species Family)) (set gnt:has_uniprot_taxon_id (ontology 'taxon: (field Species TaxonomyId))))) (define-transformer species-fan-out (tables (InbredSet (left-join Species "ON InbredSet.SpeciesId=Species.Id")) "WHERE public > 0") (schema-triples (gnt:has_strain a owl:ObjectProperty) (gnt:has_strain rdfs:range gnc:set) (gnt:has_strain rdfs:domain gnc:species) (gnt:has_strain owl:inverseOf gnt:belongs_to_species) (gnt:has_strain rdfs:label "this resource belongs to this strain.") (gnt:has_strain skos:definition "Lists all strains that belong to this resource.")) (triples (string->identifier "" (remap-species-identifiers (field Species Fullname))) (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")))) (define-transformer inbred-set (tables (InbredSet (left-join Species "ON InbredSet.SpeciesId=Species.Id") (left-join MappingMethod "ON InbredSet.MappingMethodId=MappingMethod.Id")) "WHERE public > 0") (schema-triples (gnt:genetic_type a owl:DatatypeProperty) (gnt:genetic_type rdfs:label "has genetic type") (gnt:genetic_type skos:definition "Describes the genetic architecture of a resource set (e.g., intercross, riset).") (gnt:genetic_type rdfs:domain gnc:set) (gnt:genetic_type rdfs:range xsd:string) (gnt:has_set_code a owl:DatatypeProperty) (gnt:has_set_code rdfs:label "has set code") (gnt:has_set_code skos:definition "Provides a unique identifier code for a resource set.") (gnt:has_set_code rdfs:domain gnc:set) (gnt:has_set_code rdfs:range xsd:string) ;; Already defined as an owl prop in species (gnt:has_family rdfs:domain gnc:set) (gnt:uses_mapping_method a owl:ObjectProperty) (gnt:uses_mapping_method rdfs:label "mapping method") (gnt:uses_mapping_method rdfs:domain gnc:set) (gnt:uses_mapping_method rdfs:range gnc:mapping_method) (gnt:uses_mapping_method rdfs:comment "The method used to map genetic or experimental data for this resource.") (gnt:has_strain a owl:ObjectProperty) (gnt:has_strain rdf:comment "Indicates the group the resources belongs to") (gnt:has_strain schema:domainIncludes dcat:Dataset) (gnt:has_strain schema:domainIncludes gnc:species) (gnt:has_strain rdfs:range gnc:set) (gnt:has_strain rdfs:label "belongs-to-group")) (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_") (set rdf:type 'gnc:set) (set rdfs:label (field InbredSet FullName)) (set skos:prefLabel (field InbredSet Name InbredSetName)) (set gnt:genetic_type (field InbredSet GeneticType)) (set gnt:uses_mapping_method (string->identifier "mapping_method" (field MappingMethod Name) #:separator "_")) (set gnt:has_set_code (field InbredSet InbredSetCode)) (set gnt:belongs_to_species (string->identifier "" (remap-species-identifiers (field Species Fullname)))))) (let* ((option-spec '((settings (single-char #\s) (value #t)) (output (single-char #\o) (value #t)) (documentation (single-char #\d) (value #t)))) (options (getopt-long (command-line) option-spec)) (settings (option-ref options 'settings #f)) (output (option-ref options 'output #f)) (documentation (option-ref options 'documentation #f)) (%connection-settings (call-with-input-file settings read))) (with-documentation (name "Species Metadata") (connection %connection-settings) (table-metadata? #f) (prefixes '(("dcat:" "") ("gn:" "") ("gnc:" "") ("owl:" "") ("gnt:" "") ("schema:" "") ("skos:" "") ("xkos:" "") ("xsd:" "") ("rdf:" "") ("rdfs:" "") ("taxon:" ""))) (inputs (list classification-scheme-species classification-scheme-set species species-fan-out inbred-set)) (outputs `(#:documentation ,documentation #:rdf ,output))))