#! /usr/bin/env guile !# (use-modules (rnrs programs) (rnrs io ports) (srfi srfi-1) (srfi srfi-26) (ice-9 getopt-long) (ice-9 match) (ice-9 regex) (transform strings) (transform sql) (transform triples) (transform special-forms)) (define-transformer phenotypes (tables (PublishXRef (left-join InbredSet "ON InbredSet.InbredSetId = PublishXRef.InbredSetId") (left-join Publication "ON Publication.Id = PublishXRef.PublicationId") (left-join Phenotype "ON Phenotype.Id = PublishXRef.PhenotypeId"))) (schema-triples (gnt:traitId a owl:ObjectProperty) (gnt:traitId rdfs:domain gnc:Phenotype) (gnt:traitId skos:definition "This is the unique trait id assigned from GeneNetwork") (gnt:abbreviation a owl:ObjectProperty) (gnt:abbreviation rdfs:domain gnc:Phenotype) (gnt:abbreviation skos:definition "The abbreviation used for this resource") (gnt:labCode a owl:ObjectProperty) (gnt:labCode rdfs:domain gnc:Phenotype) (gnt:submitter a owl:ObjectProperty) (gnt:submitter rdfs:domain gnc:Phenotype) (gnt:submitter skos:definition "A person who submitted this resource to GN") (gnt:mean a rdf:Property) (gnt:mean a qb:MeasureProperty) (gnt:mean rdfs:subPropertyOf sdmx-measure:obsValue) (gnt:mean rdfs:domain gnc:Phenotype) (gnt:mean rdfs:range xsd:double) (gnt:lodScore a rdf:Property) (gnt:lodScore a qb:MeasureProperty) (gnt:lodScore rdfs:subPropertyOf sdmx-measure:obsValue) (gnt:lodScore rdfs:domain gnc:Phenotype) (gnt:lodScore rdfs:range xsd:double) (gnt:lodScore rdfs:label "Peak -logP") (gnt:lodScore skos:definition "Statistical measurement assessing the likelihood of genetic linkage between traits or genetic markers.") (gnt:locus a rdf:Property) (gnt:locus a qb:MeasureProperty) (gnt:locus rdfs:subPropertyOf sdmx-measure:obsValue) (gnt:locus rdfs:domain gnc:Phenotype) (gnt:locus rdfs:range rdfs:Literal) (gnt:additive rdfs:domain gnc:Phenotype) (gnt:additive rdfs:range xsd:double) (gnt:sequence rdfs:domain gnc:Phenotype) (gnt:sequence rdfs:range xsd:integer)) (triples (string->identifier "trait" (field ("CONCAT(IFNULL(InbredSet.InbredSetCode, PublishXRef.InbredSetId), '_', PublishXRef.Id)" Phenotype))) (set rdf:type 'gnc:Phenotype) (set gnt:belongsToGroup (string->identifier "set" (field InbredSet Name InbredSetName) #:separator "" #:proc string-capitalize-first)) ;; This is the trait's name (set gnt:traitId (let ((trait-id (field PublishXRef Id))) (if (number? trait-id) (number->string trait-id) trait-id))) (set skos:altLabel (field ("CONCAT(IFNULL(InbredSet.InbredSetCode, PublishXRef.InbredSetId), '_', PublishXRef.Id)" Phenotype))) ;; All phenotypes have a post-publication description (set dct:description (sanitize-rdf-string (field Phenotype Post_publication_description))) ;; All phenotypes have a post-publication abbreviation (set gnt:abbreviation (field Phenotype Post_publication_abbreviation)) (set gnt:labCode (field Phenotype Lab_code)) (set gnt:submitter (sanitize-rdf-string (field Phenotype Submitter))) (set dct:contributor (sanitize-rdf-string (field Phenotype Owner))) (set gnt:mean (annotate-field (field ("IFNULL(PublishXRef.mean, '')" mean)) '^^xsd:double)) (set gnt:locus (string->identifier "" (regexp-substitute/global #f "[^A-Za-z0-9:]" (sanitize-rdf-string (field PublishXRef Locus)) 'pre "_" 'post) #:separator "" #:proc string-capitalize-first)) (set gnt:lodScore (annotate-field (field ("IFNULL((PublishXRef.LRS/4.604), '')" lrs)) '^^xsd:double)) (set gnt:additive (annotate-field (field ("IFNULL(PublishXRef.additive, '')" additive)) '^^xsd:double)) (set gnt:sequence (annotate-field (field PublishXRef Sequence) '^^xsd:integer)) (set dct:isReferencedBy (let ((pmid (field ("IF(Publication.PubMed_ID IS NULL, '', CONVERT(Publication.PubMed_Id, INT))" pmid))) (publication-id (field Publication Id PublicationId))) (if (string-null? pmid) (string->identifier "unpublished" (number->string publication-id)) (ontology 'pubmed: pmid)))))) (let* ((option-spec '((settings (single-char #\s) (value #t)) (output (single-char #\o) (value #t)) (documentation (single-char #\d) (value #t)))) (options (getopt-long (command-line) option-spec)) (settings (option-ref options 'settings #f)) (output (option-ref options 'output #f)) (documentation (option-ref options 'documentation #f)) (%connection-settings (call-with-input-file settings read))) (with-documentation (name "Phenotypes Metadata") (connection %connection-settings) (table-metadata? #f) (prefixes '(("dct:" "") ("gn:" "") ("owl:" "") ("gnc:" "") ("gnt:" "") ("sdmx-measure:" "") ("skos:" "") ("rdf:" "") ("rdfs:" "") ("xsd:" "") ("qb:" "") ("xkos:" "") ("pubmed:" ""))) (inputs (list phenotypes)) (outputs `(#:documentation ,documentation #:rdf ,output))))