#! /usr/bin/env guile !# (use-modules (srfi srfi-1) (srfi srfi-26) (ice-9 getopt-long) (ice-9 match) (ice-9 regex) (transform strings) (transform sql) (transform triples) (transform special-forms)) (define-transformer molecular-entities (tables (Tissue)) (schema-triples (gnc:molecular_trait a owl:Class) (gnc:molecular_trait a skos:Concept) (gnc:molecular_trait rdfs:subClassOf obo:UBERON_0000479) (gnc:molecular_trait rdfs:label "Molecular Trait. This describes a melecular trait of a given species. We combine the species name and the tissue name in order to differentiate the traits across different inbredset groups.")) (triples (string->identifier "tissue" (field Tissue Short_Name) #:separator "_") (set rdf:type 'gnc:molecular_trait) (set skos:prefLabel (field Tissue Name)) (set skos:altLabel (field Tissue Short_Name)))) (define-transformer molecular-traits (tables (Species (inner-join InbredSet "ON InbredSet.SpeciesId = Species.Id") (inner-join ProbeFreeze "ON ProbeFreeze.InbredSetId = InbredSet.Id") (inner-join ProbeSetFreeze "ON ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id") (inner-join Tissue "ON ProbeFreeze.TissueId = Tissue.Id")) "WHERE ProbeSetFreeze.public > 0 GROUP BY Species.Name, Tissue.Short_Name") (schema-triples (gnt:has_molecular_trait rdf:type owl:ObjectProperty) (gnt:has_molecular_trait rdfs:domain gnc:set) (gnt:has_molecular_trait rdfs:range gnc:molecular_traits) (gnt:has_molecular_trait rdfs:label "has molecular trait")) (triples (string->identifier (format #f "trait_~a" (field Species Name)) (field Tissue Short_Name) #:separator "_") (set rdf:type 'gnc:molecular_entity) (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")) (set gnt:has_species (string->identifier "" (remap-species-identifiers (field Species Fullname)))) (multiset gnt:has_probeset_data (map (cut string->identifier "dataset" <> #:separator "_") (string-split (field ("GROUP_CONCAT(ProbeSetFreeze.Name SEPARATOR ',')" dataset_name)) #\,))) (set gnt:has_molecular_trait (string->identifier "tissue" (field Tissue Short_Name) #:separator "_")))) (define-transformer list-molecular-traits (tables (Species (inner-join InbredSet "ON InbredSet.SpeciesId = Species.Id") (inner-join ProbeFreeze "ON ProbeFreeze.InbredSetId = InbredSet.Id") (inner-join ProbeSetFreeze "ON ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id") (inner-join Tissue "ON ProbeFreeze.TissueId = Tissue.Id")) "WHERE ProbeSetFreeze.public > 0 GROUP BY Species.Name, Tissue.Short_Name") (schema-triples (gnc:molecular_entity a owl:Class) (gnc:molecular_entity a skos:Concept) (gnc:molecular_entity rdfs:subClassOf obo:UBERON_0000479) (gnc:molecular_entity rdfs:label "This points this to resource which has molecular trait.")) (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_") (set gnt:has_molecular_entity (string->identifier (format #f "trait_~a" (field Species Name)) (field Tissue Short_Name) #:separator "_")))) (let* ((option-spec '((settings (single-char #\s) (value #t)) (output (single-char #\o) (value #t)) (documentation (single-char #\d) (value #t)))) (options (getopt-long (command-line) option-spec)) (settings (option-ref options 'settings #f)) (output (option-ref options 'output #f)) (documentation (option-ref options 'documentation #f)) (%connection-settings (call-with-input-file settings read))) (with-documentation (name "Tissue Metadata") (connection %connection-settings) (table-metadata? #f) (prefixes '(("gn:" "") ("obo:" "") ("owl:" "") ("gnt:" "") ("skos:" "") ("gnc:" "") ("rdf:" "") ("rdfs:" ""))) (inputs (list molecular-entities molecular-traits list-molecular-traits)) (outputs `(#:documentation ,documentation #:rdf ,output))))