#! /usr/bin/env guile !# (use-modules (srfi srfi-1) (srfi srfi-26) (ice-9 getopt-long) (ice-9 match) (ice-9 regex) (transform strings) (transform sql) (transform triples) (transform special-forms)) (define-transformer gn:molecular-trait->gn:dataset (tables (Tissue)) (schema-triples (gnc:molecular_trait a owl:Class) (gnc:molecular_trait a skos:Concept) (gnc:molecular_trait rdfs:subClassOf obo:UBERON_0000479) (gnc:molecular_trait rdfs:label "Molecular Trait. This describes a melecular trait of a given species. We combine the species name and the tissue name in order to differentiate the traits across different inbredset groups.")) (triples (string->identifier "trait" (field Tissue Short_Name) #:separator "_") (set rdf:type 'gnc:molecular_trait) (set skos:prefLabel (field Tissue Name)) (set skos:altLabel (field Tissue Short_Name)))) (define-transformer gnc:molecular_trait->gn:molecular_trait (tables (Tissue)) (triples "gnc:molecular_trait" (set skos:member (string->identifier "trait" (field Tissue Short_Name) #:separator "_")))) (define-transformer gn:set->gn:dataset (tables (Species (inner-join InbredSet "ON InbredSet.SpeciesId = Species.Id") (inner-join ProbeFreeze "ON ProbeFreeze.InbredSetId = InbredSet.Id") (inner-join ProbeSetFreeze "ON ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id") (inner-join Tissue "ON ProbeFreeze.TissueId = Tissue.Id")) "WHERE ProbeSetFreeze.public > 0 AND Species.Name != 'monkey' GROUP BY Species.Name, Tissue.Short_Name") (schema-triples (gnt:has_probeset_data rdf:type owl:ObjectProperty) (gnt:has_probeset_data rdfs:label "this resources has this probeset data.") (gnt:has_probeset_data rdfs:comment "Associates a resource with this probeset data.") (gnt:has_probeset_data rdfs:domain gnc:set) (gnt:has_probeset_data rdfs:range gnc:molecular_trait) (gnt:has_probeset_data rdfs:subPropertyOf dct:relation)) (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_") (multiset gnt:has_probeset_data (map (cut string->identifier "dataset" <> #:separator "_") (string-split (field ("GROUP_CONCAT(ProbeSetFreeze.Name SEPARATOR ',')" dataset_name)) #\,))))) (define-transformer gn:dataset->metadata (tables (ProbeSetFreeze (inner-join ProbeFreeze "ON ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id") (inner-join InbredSet "ON InbredSet.Id = ProbeFreeze.InbredSetId") (inner-join Species "ON InbredSet.SpeciesId = Species.Id") (inner-join Tissue "ON ProbeFreeze.TissueId = Tissue.Id") (inner-join AvgMethod "ON AvgMethod.AvgMethodId = ProbeSetFreeze.AvgID") (inner-join InfoFiles "ON InfoFiles.InfoPageName = ProbeSetFreeze.Name") (inner-join Datasets "ON InfoFiles.DatasetId = Datasets.DatasetId") (left-join GeneChip "ON GeneChip.Id = InfoFiles.GeneChipId")) "WHERE ProbeSetFreeze.public > 0 AND Species.Name != 'monkey'") (schema-triples (gnt:has_molecular_trait rdf:type owl:ObjectProperty) (gnt:has_molecular_trait rdfs:domain gnc:set) (gnt:has_molecular_trait rdfs:range gnc:molecular_trait) (gnt:has_molecular_trait rdfs:label "has molecular trait") (gnt:uses_genechip a owl:ObjectProperty) (gnt:uses_genechip rdfs:domain dcat:Dataset) (gnt:uses_genechip skos:definition "The Platform this resource uses..") (gnt:uses_normalization_method rdfs:comment "The normalization method used for the molecular traits in this dataset") (gnt:uses_normalization_method rdfs:domain dcat:Dataset) (gnt:uses_normalization_method rdfs:label "Averaging method used for the molecular traits in this dataset.") (gnt:uses_normalization_method rdfs:range gnc:avg_method)) (triples (string->identifier "dataset" (field ProbeSetFreeze Name) #:separator "_") (set dct:created (annotate-field (field ProbeSetFreeze CreateTime) '^^xsd:datetime)) (set gnt:uses_normalization_method (string->identifier "avg_method" (field AvgMethod Name AvgMethodName) #:separator "_")) (set gnt:has_molecular_trait (string->identifier "trait" (field Tissue Short_Name) #:separator "_")) (set gnt:uses_genechip (string->identifier "platform" (field GeneChip Name) #:separator "_")))) (let* ((option-spec '((settings (single-char #\s) (value #t)) (output (single-char #\o) (value #t)) (documentation (single-char #\d) (value #t)))) (options (getopt-long (command-line) option-spec)) (settings (option-ref options 'settings #f)) (output (option-ref options 'output #f)) (documentation (option-ref options 'documentation #f)) (%connection-settings (call-with-input-file settings read))) (with-documentation (name "Molecular Traits") (connection %connection-settings) (table-metadata? #f) (prefixes '(("dcat:" "") ("gn:" "") ("obo:" "") ("owl:" "") ("xsd:" "") ("dct:" "") ("xkos:" "") ("gnt:" "") ("skos:" "") ("gnc:" "") ("rdf:" "") ("rdfs:" ""))) (inputs (list gn:dataset->metadata gn:molecular-trait->gn:dataset gn:set->gn:dataset gnc:molecular_trait->gn:molecular_trait)) (outputs `(#:documentation ,documentation #:rdf ,output))))