#! /usr/bin/env guile !# (use-modules (rnrs programs) (rnrs io ports) (srfi srfi-1) (srfi srfi-26) (ice-9 getopt-long) (ice-9 match) (ice-9 regex) (transform strings) (transform sql) (transform triples) (transform special-forms)) (define (blank-p str) (if (string-blank? str) #f str)) (define-transformer gn:set->gn:dataset (tables (Species (inner-join InbredSet "ON InbredSet.SpeciesId = Species.Id") (inner-join PublishFreeze "ON PublishFreeze.InbredSetId = InbredSet.Id")) "WHERE PublishFreeze.public > 0 AND Species.Name != 'monkey' GROUP BY Species.Name, PublishFreeze.ShortName") (triples (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_") (multiset gnt:has_phenotype_data (map (cut string->identifier "dataset" <> #:separator "_") (string-split (field ("GROUP_CONCAT(PublishFreeze.Name SEPARATOR ',')" dataset_name)) #\,))))) (define-transformer gn:dataset->gn:trait (tables (PublishXRef (left-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId") (inner-join PublishFreeze "ON PublishFreeze.InbredSetId = InbredSet.Id") (left-join Publication "ON Publication.Id = PublishXRef.PublicationId") (left-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId")) "WHERE InbredSet.public > 0") (triples (string->identifier "dataset" (field PublishFreeze Name) #:separator "_") (set gnt:has_phenotype_trait (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation))) (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation))) (post-desc (blank-p (field Phenotype Post_publication_description))) (pre-desc (blank-p (field Phenotype Post_publication_description)))) (string->identifier "trait" (format #f "~a_~a" (field PublishFreeze Name) (or post-abbrev pre-abbrev post-desc pre-desc)) #:separator "_"))))) (define-transformer gnc:phenotype->gn:phenotype (tables (Phenotype)) (triples "gnc:phenotype" (set skos:member (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation))) (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation))) (post-desc (blank-p (field Phenotype Post_publication_description))) (pre-desc (blank-p (field Phenotype Post_publication_description)))) (string->identifier "phenotype" (or post-abbrev pre-abbrev post-desc pre-desc) #:separator "_"))))) (define-transformer gn:phenotype->metadata (tables (Phenotype)) (triples (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation))) (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation))) (post-desc (blank-p (field Phenotype Post_publication_description))) (pre-desc (blank-p (field Phenotype Post_publication_description)))) (string->identifier "phenotype" (or post-abbrev pre-abbrev post-desc pre-desc) #:separator "_")) (set rdf:type 'gnc:phenotype) ;; All phenotypes have a post-publication description (set dct:description (sanitize-rdf-string (field Phenotype Post_publication_description))) ;; All phenotypes have a post-publication abbreviation (set gnt:abbreviation (sanitize-rdf-string (field Phenotype Post_publication_abbreviation))) (set gnt:has_lab_code (field Phenotype Lab_code)) (set gnt:submitter (sanitize-rdf-string (field Phenotype Submitter))) (set dct:contributor (sanitize-rdf-string (field Phenotype Owner))) (set skos:member (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation))) (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation))) (post-desc (blank-p (field Phenotype Post_publication_description))) (pre-desc (blank-p (field Phenotype Post_publication_description)))) (string->identifier "phenotype" (or post-abbrev pre-abbrev post-desc pre-desc) #:separator "_"))))) (define-transformer gn:trait->gn:phenotype (tables (PublishXRef (left-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId") (inner-join PublishFreeze "ON PublishFreeze.InbredSetId = InbredSet.Id") (left-join Publication "ON Publication.Id = PublishXRef.PublicationId") (left-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId")) "WHERE InbredSet.public > 0") (triples (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation))) (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation))) (post-desc (blank-p (field Phenotype Post_publication_description))) (pre-desc (blank-p (field Phenotype Post_publication_description)))) (string->identifier "trait" (format #f "~a_~a" (field PublishFreeze Name) (or post-abbrev pre-abbrev post-desc pre-desc)) #:separator "_")) (set rdf:type 'gnc:phenotype_trait) (set gnt:has_strain (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "_")) (set owl:equivalentClass (field ("CONCAT(IFNULL(InbredSet.InbredSetCode, PublishXRef.InbredSetId), '_', PublishXRef.Id)" Phenotype))) (set dct:references (let ((pmid (field ("IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT))" pmid))) (publication-id (field Publication Id))) (if (string-null? pmid) (string->identifier "unpublished" (number->string publication-id)) (ontology 'pubmed: pmid)))) (set gnt:has_phenotype (let ((post-abbrev (blank-p (field Phenotype Post_publication_abbreviation))) (pre-abbrev (blank-p (field Phenotype Pre_publication_abbreviation))) (post-desc (blank-p (field Phenotype Post_publication_description))) (pre-desc (blank-p (field Phenotype Post_publication_description)))) (string->identifier "phenotype" (or post-abbrev pre-abbrev post-desc pre-desc) #:separator "_"))) (set gnt:mean (annotate-field (field ("IFNULL(PublishXRef.mean, '')" mean)) '^^xsd:double)) (set gnt:locus (string->identifier "" (regexp-substitute/global #f "[^A-Za-z0-9:]" (sanitize-rdf-string (field PublishXRef Locus)) 'pre "_" 'post) #:separator "" #:proc string-capitalize-first)) (set gnt:lod_score (annotate-field (field ("IFNULL((PublishXRef.LRS/4.604), '')" lrs)) '^^xsd:double)) (set gnt:additive (annotate-field (field ("IFNULL(PublishXRef.additive, '')" additive)) '^^xsd:double)) (set gnt:sequence (annotate-field (field PublishXRef Sequence) '^^xsd:integer)) (set rdfs:comment (sanitize-rdf-string (field PublishXRef comments))))) (let* ((option-spec '((settings (single-char #\s) (value #t)) (output (single-char #\o) (value #t)) (documentation (single-char #\d) (value #t)))) (options (getopt-long (command-line) option-spec)) (settings (option-ref options 'settings #f)) (output (option-ref options 'output #f)) (documentation (option-ref options 'documentation #f)) (%connection-settings (call-with-input-file settings read))) (with-documentation (name "Phenotypes Metadata") (connection %connection-settings) (table-metadata? #f) (prefixes '(("dct:" "") ("gn:" "") ("owl:" "") ("gnc:" "") ("gnt:" "") ("sdmx-measure:" "") ("skos:" "") ("rdf:" "") ("rdfs:" "") ("xsd:" "") ("qb:" "") ("xkos:" "") ("pubmed:" ""))) (inputs (list gn:set->gn:dataset gn:dataset->gn:trait gnc:phenotype->gn:phenotype gn:phenotype->metadata gn:trait->gn:phenotype)) (outputs `(#:documentation ,documentation #:rdf ,output))))