From 5b1c62fd359f6f12db7042d61b8af69ca3e2343a Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Fri, 30 Jan 2026 00:47:26 +0300 Subject: Rename {genotype,molecular-traits}.scm. Signed-off-by: Munyoki Kilyungi --- examples/molecular-traits-datasets.scm | 123 +++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100755 examples/molecular-traits-datasets.scm (limited to 'examples/molecular-traits-datasets.scm') diff --git a/examples/molecular-traits-datasets.scm b/examples/molecular-traits-datasets.scm new file mode 100755 index 0000000..77bba08 --- /dev/null +++ b/examples/molecular-traits-datasets.scm @@ -0,0 +1,123 @@ +#! /usr/bin/env guile +!# + +(use-modules (srfi srfi-1) + (srfi srfi-26) + (ice-9 getopt-long) + (ice-9 match) + (ice-9 regex) + (transform strings) + (transform sql) + (transform triples) + (transform special-forms)) + + +(define-transformer gn:molecular-trait->gn:dataset + (tables (Tissue)) + (schema-triples + (gnc:molecular_trait a owl:Class) + (gnc:molecular_trait a skos:Concept) + (gnc:molecular_trait rdfs:subClassOf obo:UBERON_0000479) + (gnc:molecular_trait rdfs:label "Molecular Trait. This describes a melecular trait of a given species. We combine the species name and the tissue name in order to differentiate the traits across different inbredset groups.")) + (triples (string->identifier "trait" (field Tissue Short_Name) #:separator "_") + (set rdf:type 'gnc:molecular_trait) + (set skos:prefLabel (field Tissue Name)) + (set skos:altLabel (field Tissue Short_Name)))) + +(define-transformer gnc:molecular_trait->gn:molecular_trait + (tables (Tissue)) + (triples "gnc:molecular_trait" + (set skos:member (string->identifier "trait" (field Tissue Short_Name) #:separator "_")))) + +(define-transformer gn:set->gn:dataset + (tables (Species + (inner-join InbredSet "ON InbredSet.SpeciesId = Species.Id") + (inner-join ProbeFreeze "ON ProbeFreeze.InbredSetId = InbredSet.Id") + (inner-join ProbeSetFreeze "ON ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id") + (inner-join Tissue "ON ProbeFreeze.TissueId = Tissue.Id")) + "WHERE ProbeSetFreeze.public > 0 AND Species.Name != 'monkey' GROUP BY Species.Name, Tissue.Short_Name") + (schema-triples + (gnt:has_probeset_data rdf:type owl:ObjectProperty) + (gnt:has_probeset_data rdfs:label "this resources has this probeset data.") + (gnt:has_probeset_data rdfs:comment "Associates a resource with this probeset data.") + (gnt:has_probeset_data rdfs:domain gnc:set) + (gnt:has_probeset_data rdfs:range gnc:molecular_trait) + (gnt:has_probeset_data rdfs:subPropertyOf dct:relation)) + (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_") + (multiset gnt:has_probeset_data + (map (cut string->identifier "dataset" <> #:separator "_") + (string-split + (field ("GROUP_CONCAT(ProbeSetFreeze.Name SEPARATOR ',')" + dataset_name)) + #\,))))) + +(define-transformer gn:dataset->metadata + (tables (ProbeSetFreeze + (inner-join ProbeFreeze "ON ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id") + (inner-join InbredSet "ON InbredSet.Id = ProbeFreeze.InbredSetId") + (inner-join Species "ON InbredSet.SpeciesId = Species.Id") + (inner-join Tissue "ON ProbeFreeze.TissueId = Tissue.Id") + (inner-join AvgMethod "ON AvgMethod.AvgMethodId = ProbeSetFreeze.AvgID") + (inner-join InfoFiles "ON InfoFiles.InfoPageName = ProbeSetFreeze.Name") + (inner-join Datasets "ON InfoFiles.DatasetId = Datasets.DatasetId") + (left-join GeneChip "ON GeneChip.Id = InfoFiles.GeneChipId")) + "WHERE ProbeSetFreeze.public > 0 AND Species.Name != 'monkey'") + (schema-triples + (gnt:has_molecular_trait rdf:type owl:ObjectProperty) + (gnt:has_molecular_trait rdfs:domain gnc:set) + (gnt:has_molecular_trait rdfs:range gnc:molecular_trait) + (gnt:has_molecular_trait rdfs:label "has molecular trait") + (gnt:uses_genechip a owl:ObjectProperty) + (gnt:uses_genechip rdfs:domain dcat:Dataset) + (gnt:uses_genechip skos:definition "The Platform this resource uses..") + (gnt:uses_normalization_method rdfs:comment "The normalization method used for the molecular traits in this dataset") + (gnt:uses_normalization_method rdfs:domain dcat:Dataset) + (gnt:uses_normalization_method rdfs:label "Averaging method used for the molecular traits in this dataset.") + (gnt:uses_normalization_method rdfs:range gnc:avg_method)) + (triples (string->identifier "dataset" (field ProbeSetFreeze Name) #:separator "_") + (set dct:created (annotate-field (field ProbeSetFreeze CreateTime) '^^xsd:datetime)) + (set gnt:uses_normalization_method + (string->identifier "avg_method" (field AvgMethod Name AvgMethodName) #:separator "_")) + (set gnt:has_molecular_trait + (string->identifier "trait" (field Tissue Short_Name) #:separator "_")) + (set gnt:uses_genechip + (string->identifier "platform" (field GeneChip Name) #:separator "_")))) + + +(let* ((option-spec + '((settings (single-char #\s) (value #t)) + (output (single-char #\o) (value #t)) + (documentation (single-char #\d) (value #t)))) + (options (getopt-long (command-line) option-spec)) + (settings (option-ref options 'settings #f)) + (output (option-ref options 'output #f)) + (documentation (option-ref options 'documentation #f)) + (%connection-settings + (call-with-input-file settings + read))) + (with-documentation + (name "Molecular Trait Datasets") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("dcat:" "") + ("gn:" "") + ("obo:" "") + ("owl:" "") + ("xsd:" "") + ("dct:" "") + ("xkos:" "") + ("gnt:" "") + ("skos:" "") + ("gnc:" "") + ("rdf:" "") + ("rdfs:" ""))) + (inputs + (list + gn:dataset->metadata + gn:molecular-trait->gn:dataset + gn:set->gn:dataset + gnc:molecular_trait->gn:molecular_trait)) + (outputs + `(#:documentation ,documentation + #:rdf ,output)))) -- cgit 1.4.1