diff options
| author | Munyoki Kilyungi | 2026-01-30 00:47:26 +0300 |
|---|---|---|
| committer | Munyoki Kilyungi | 2026-01-30 00:47:26 +0300 |
| commit | 5b1c62fd359f6f12db7042d61b8af69ca3e2343a (patch) | |
| tree | faa240bd5a5b8c9c12ff596cb563cd890b753d5f /examples/molecular-traits-datasets.scm | |
| parent | ab1e5b734d3ec6b80e571ee193251c5c96ef69e3 (diff) | |
| download | gn-transform-databases-5b1c62fd359f6f12db7042d61b8af69ca3e2343a.tar.gz | |
Rename {genotype,molecular-traits}.scm.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
Diffstat (limited to 'examples/molecular-traits-datasets.scm')
| -rwxr-xr-x | examples/molecular-traits-datasets.scm | 123 |
1 files changed, 123 insertions, 0 deletions
diff --git a/examples/molecular-traits-datasets.scm b/examples/molecular-traits-datasets.scm new file mode 100755 index 0000000..77bba08 --- /dev/null +++ b/examples/molecular-traits-datasets.scm @@ -0,0 +1,123 @@ +#! /usr/bin/env guile +!# + +(use-modules (srfi srfi-1) + (srfi srfi-26) + (ice-9 getopt-long) + (ice-9 match) + (ice-9 regex) + (transform strings) + (transform sql) + (transform triples) + (transform special-forms)) + + +(define-transformer gn:molecular-trait->gn:dataset + (tables (Tissue)) + (schema-triples + (gnc:molecular_trait a owl:Class) + (gnc:molecular_trait a skos:Concept) + (gnc:molecular_trait rdfs:subClassOf obo:UBERON_0000479) + (gnc:molecular_trait rdfs:label "Molecular Trait. This describes a melecular trait of a given species. We combine the species name and the tissue name in order to differentiate the traits across different inbredset groups.")) + (triples (string->identifier "trait" (field Tissue Short_Name) #:separator "_") + (set rdf:type 'gnc:molecular_trait) + (set skos:prefLabel (field Tissue Name)) + (set skos:altLabel (field Tissue Short_Name)))) + +(define-transformer gnc:molecular_trait->gn:molecular_trait + (tables (Tissue)) + (triples "gnc:molecular_trait" + (set skos:member (string->identifier "trait" (field Tissue Short_Name) #:separator "_")))) + +(define-transformer gn:set->gn:dataset + (tables (Species + (inner-join InbredSet "ON InbredSet.SpeciesId = Species.Id") + (inner-join ProbeFreeze "ON ProbeFreeze.InbredSetId = InbredSet.Id") + (inner-join ProbeSetFreeze "ON ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id") + (inner-join Tissue "ON ProbeFreeze.TissueId = Tissue.Id")) + "WHERE ProbeSetFreeze.public > 0 AND Species.Name != 'monkey' GROUP BY Species.Name, Tissue.Short_Name") + (schema-triples + (gnt:has_probeset_data rdf:type owl:ObjectProperty) + (gnt:has_probeset_data rdfs:label "this resources has this probeset data.") + (gnt:has_probeset_data rdfs:comment "Associates a resource with this probeset data.") + (gnt:has_probeset_data rdfs:domain gnc:set) + (gnt:has_probeset_data rdfs:range gnc:molecular_trait) + (gnt:has_probeset_data rdfs:subPropertyOf dct:relation)) + (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_") + (multiset gnt:has_probeset_data + (map (cut string->identifier "dataset" <> #:separator "_") + (string-split + (field ("GROUP_CONCAT(ProbeSetFreeze.Name SEPARATOR ',')" + dataset_name)) + #\,))))) + +(define-transformer gn:dataset->metadata + (tables (ProbeSetFreeze + (inner-join ProbeFreeze "ON ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id") + (inner-join InbredSet "ON InbredSet.Id = ProbeFreeze.InbredSetId") + (inner-join Species "ON InbredSet.SpeciesId = Species.Id") + (inner-join Tissue "ON ProbeFreeze.TissueId = Tissue.Id") + (inner-join AvgMethod "ON AvgMethod.AvgMethodId = ProbeSetFreeze.AvgID") + (inner-join InfoFiles "ON InfoFiles.InfoPageName = ProbeSetFreeze.Name") + (inner-join Datasets "ON InfoFiles.DatasetId = Datasets.DatasetId") + (left-join GeneChip "ON GeneChip.Id = InfoFiles.GeneChipId")) + "WHERE ProbeSetFreeze.public > 0 AND Species.Name != 'monkey'") + (schema-triples + (gnt:has_molecular_trait rdf:type owl:ObjectProperty) + (gnt:has_molecular_trait rdfs:domain gnc:set) + (gnt:has_molecular_trait rdfs:range gnc:molecular_trait) + (gnt:has_molecular_trait rdfs:label "has molecular trait") + (gnt:uses_genechip a owl:ObjectProperty) + (gnt:uses_genechip rdfs:domain dcat:Dataset) + (gnt:uses_genechip skos:definition "The Platform this resource uses..") + (gnt:uses_normalization_method rdfs:comment "The normalization method used for the molecular traits in this dataset") + (gnt:uses_normalization_method rdfs:domain dcat:Dataset) + (gnt:uses_normalization_method rdfs:label "Averaging method used for the molecular traits in this dataset.") + (gnt:uses_normalization_method rdfs:range gnc:avg_method)) + (triples (string->identifier "dataset" (field ProbeSetFreeze Name) #:separator "_") + (set dct:created (annotate-field (field ProbeSetFreeze CreateTime) '^^xsd:datetime)) + (set gnt:uses_normalization_method + (string->identifier "avg_method" (field AvgMethod Name AvgMethodName) #:separator "_")) + (set gnt:has_molecular_trait + (string->identifier "trait" (field Tissue Short_Name) #:separator "_")) + (set gnt:uses_genechip + (string->identifier "platform" (field GeneChip Name) #:separator "_")))) + + +(let* ((option-spec + '((settings (single-char #\s) (value #t)) + (output (single-char #\o) (value #t)) + (documentation (single-char #\d) (value #t)))) + (options (getopt-long (command-line) option-spec)) + (settings (option-ref options 'settings #f)) + (output (option-ref options 'output #f)) + (documentation (option-ref options 'documentation #f)) + (%connection-settings + (call-with-input-file settings + read))) + (with-documentation + (name "Molecular Trait Datasets") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("dcat:" "<http://www.w3.org/ns/dcat#>") + ("gn:" "<http://rdf.genenetwork.org/v1/id/>") + ("obo:" "<http://purl.obolibrary.org/obo/>") + ("owl:" "<http://www.w3.org/2002/07/owl#>") + ("xsd:" "<http://www.w3.org/2001/XMLSchema#>") + ("dct:" "<http://purl.org/dc/terms/>") + ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>") + ("gnt:" "<http://rdf.genenetwork.org/v1/term/>") + ("skos:" "<http://www.w3.org/2004/02/skos/core#>") + ("gnc:" "<http://rdf.genenetwork.org/v1/category/>") + ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") + ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>"))) + (inputs + (list + gn:dataset->metadata + gn:molecular-trait->gn:dataset + gn:set->gn:dataset + gnc:molecular_trait->gn:molecular_trait)) + (outputs + `(#:documentation ,documentation + #:rdf ,output)))) |
