diff options
Diffstat (limited to 'examples/phenotype-datasets.scm')
| -rwxr-xr-x | examples/phenotype-datasets.scm | 109 |
1 files changed, 109 insertions, 0 deletions
diff --git a/examples/phenotype-datasets.scm b/examples/phenotype-datasets.scm new file mode 100755 index 0000000..4819627 --- /dev/null +++ b/examples/phenotype-datasets.scm @@ -0,0 +1,109 @@ +#! /usr/bin/env guile +!# + +(use-modules (rnrs programs) + (rnrs io ports) + (srfi srfi-1) + (srfi srfi-26) + (ice-9 getopt-long) + (ice-9 match) + (ice-9 regex) + (transform strings) + (transform sql) + (transform triples) + (transform special-forms)) + + +(define-transformer gn:set->gn:dataset + (tables (Species + (inner-join InbredSet "ON InbredSet.SpeciesId = Species.Id") + (inner-join PublishFreeze "ON PublishFreeze.InbredSetId = InbredSet.Id")) + "WHERE PublishFreeze.public > 0 AND Species.Name != 'monkey' GROUP BY Species.Name, PublishFreeze.ShortName") + (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_") + (multiset gnt:has_phenotype_data + (map (cut string->identifier "dataset" <> #:separator "_") + (string-split + (field ("GROUP_CONCAT(PublishFreeze.Name SEPARATOR ',')" + dataset_name)) + #\,))))) + +(define-transformer gn:dataset->gn:set + (tables (Datasets + (inner-join InfoFiles "ON InfoFiles.DatasetId = Datasets.DatasetId") + (inner-join InbredSet "ON InbredSet.Id = InfoFiles.InbredSetId") + (inner-join PublishFreeze "ON PublishFreeze.InbredSetId = InbredSet.Id")) + "WHERE PublishFreeze.public > 0 GROUP BY Datasets.DatasetId") + (triples (string->identifier "dataset" (field PublishFreeze Name) #:separator "_") + (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")))) + +(define-transformer gn:dataset->metadata + (tables (PublishXRef + (inner-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId") + (inner-join Species "ON InbredSet.SpeciesId = Species.Id") + (inner-join PublishFreeze "ON PublishFreeze.InbredSetId = InbredSet.Id") + (inner-join Publication "ON Publication.Id = PublishXRef.PublicationId") + (inner-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId")) + "WHERE InbredSet.public > 0 GROUP BY Species.Name, PublishFreeze.Name") + (triples (string->identifier "dataset" (field PublishFreeze Name) #:separator "_") + (set dct:created (annotate-field (field PublishFreeze CreateTime) '^^xsd:datetime)) + (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")))) + +(define-transformer gn:dataset->gn:trait + (tables (PublishXRef + (inner-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId") + (inner-join Species "ON InbredSet.SpeciesId = Species.Id") + (inner-join PublishFreeze "ON PublishFreeze.InbredSetId = InbredSet.Id") + (inner-join Publication "ON Publication.Id = PublishXRef.PublicationId") + (inner-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId")) + "WHERE InbredSet.public > 0") + (triples (string->identifier "dataset" (field PublishFreeze Name) #:separator "_") + (set gnt:has_phenotype_trait + (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation))) + (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation))) + (post-desc (blank-p (field Phenotype Post_publication_description))) + (pre-desc (blank-p (field Phenotype Post_publication_description)))) + (string->identifier + "trait" + (format #f "~a_~a" (field PublishFreeze Name) + (or post-abbrev pre-abbrev post-desc pre-desc)) + #:separator "_"))) + (set dct:created (annotate-field (field PublishFreeze CreateTime) '^^xsd:datetime)) + (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")))) + + +(let* ((option-spec + '((settings (single-char #\s) (value #t)) + (output (single-char #\o) (value #t)) + (documentation (single-char #\d) (value #t)))) + (options (getopt-long (command-line) option-spec)) + (settings (option-ref options 'settings #f)) + (output (option-ref options 'output #f)) + (documentation (option-ref options 'documentation #f)) + (%connection-settings + (call-with-input-file settings + read))) + (with-documentation + (name "Phenotype Datasets") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("dcat:" "<http://www.w3.org/ns/dcat#>") + ("dct:" "<http://purl.org/dc/terms/>") + ("gn:" "<http://rdf.genenetwork.org/v1/id/>") + ("gnc:" "<http://rdf.genenetwork.org/v1/category/>") + ("gnt:" "<http://rdf.genenetwork.org/v1/term/>") + ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") + ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") + ("owl:" "<http://www.w3.org/2002/07/owl#>") + ("skos:" "<http://www.w3.org/2004/02/skos/core#>") + ("xkos:" "<http://rdf-vocabulary.ddialliance.org/xkos#>") + ("xsd:" "<http://www.w3.org/2001/XMLSchema#>"))) + (inputs + (list + gn:set->gn:dataset + gn:dataset->gn:set + gn:dataset->metadata + gn:dataset->gn:trait)) + (outputs + `(#:documentation ,documentation + #:rdf ,output)))) |
