#! /usr/bin/env guile !# (use-modules (rnrs programs) (rnrs io ports) (srfi srfi-1) (srfi srfi-26) (ice-9 getopt-long) (ice-9 match) (ice-9 regex) (transform strings) (transform sql) (transform triples) (transform special-forms)) (define-transformer gn:set->gn:dataset (tables (Species (inner-join InbredSet "ON InbredSet.SpeciesId = Species.Id") (inner-join PublishFreeze "ON PublishFreeze.InbredSetId = InbredSet.Id")) "WHERE PublishFreeze.public > 0 AND Species.Name != 'monkey' GROUP BY Species.Name, PublishFreeze.ShortName") (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_") (multiset gnt:has_phenotype_data (map (cut string->identifier "dataset" <> #:separator "_") (string-split (field ("GROUP_CONCAT(PublishFreeze.Name SEPARATOR ',')" dataset_name)) #\,))))) (define-transformer gn:dataset->gn:set (tables (Datasets (inner-join InfoFiles "ON InfoFiles.DatasetId = Datasets.DatasetId") (inner-join InbredSet "ON InbredSet.Id = InfoFiles.InbredSetId") (inner-join PublishFreeze "ON PublishFreeze.InbredSetId = InbredSet.Id")) "WHERE PublishFreeze.public > 0 GROUP BY Datasets.DatasetId") (triples (string->identifier "dataset" (field PublishFreeze Name) #:separator "_") (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")))) (define-transformer gn:dataset->metadata (tables (PublishXRef (inner-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId") (inner-join Species "ON InbredSet.SpeciesId = Species.Id") (inner-join PublishFreeze "ON PublishFreeze.InbredSetId = InbredSet.Id") (inner-join Publication "ON Publication.Id = PublishXRef.PublicationId") (inner-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId")) "WHERE InbredSet.public > 0 GROUP BY Species.Name, PublishFreeze.Name") (triples (string->identifier "dataset" (field PublishFreeze Name) #:separator "_") (set dct:created (annotate-field (field PublishFreeze CreateTime) '^^xsd:datetime)) (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")))) (define-transformer gn:dataset->gn:trait (tables (PublishXRef (inner-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId") (inner-join Species "ON InbredSet.SpeciesId = Species.Id") (inner-join PublishFreeze "ON PublishFreeze.InbredSetId = InbredSet.Id") (inner-join Publication "ON Publication.Id = PublishXRef.PublicationId") (inner-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId")) "WHERE InbredSet.public > 0") (triples (string->identifier "dataset" (field PublishFreeze Name) #:separator "_") (set gnt:has_phenotype_trait (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation))) (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation))) (post-desc (blank-p (field Phenotype Post_publication_description))) (pre-desc (blank-p (field Phenotype Post_publication_description)))) (string->identifier "trait" (format #f "~a_~a" (field PublishFreeze Name) (or post-abbrev pre-abbrev post-desc pre-desc)) #:separator "_"))) (set dct:created (annotate-field (field PublishFreeze CreateTime) '^^xsd:datetime)) (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")))) (let* ((option-spec '((settings (single-char #\s) (value #t)) (output (single-char #\o) (value #t)) (documentation (single-char #\d) (value #t)))) (options (getopt-long (command-line) option-spec)) (settings (option-ref options 'settings #f)) (output (option-ref options 'output #f)) (documentation (option-ref options 'documentation #f)) (%connection-settings (call-with-input-file settings read))) (with-documentation (name "Phenotype Datasets") (connection %connection-settings) (table-metadata? #f) (prefixes '(("dcat:" "") ("dct:" "") ("gn:" "") ("gnc:" "") ("gnt:" "") ("rdf:" "") ("rdfs:" "") ("owl:" "") ("skos:" "") ("xkos:" "") ("xsd:" ""))) (inputs (list gn:set->gn:dataset gn:dataset->gn:set gn:dataset->metadata gn:dataset->gn:trait)) (outputs `(#:documentation ,documentation #:rdf ,output))))