#! /usr/bin/env guile !# (use-modules (rnrs programs) (rnrs io ports) (srfi srfi-1) (srfi srfi-26) (ice-9 match) (ice-9 regex) (transform strings) (transform sql) (transform triples) (transform special-forms)) (define %connection-settings (call-with-input-file (list-ref (command-line) 1) read)) (define (remap-species-identifiers str) "This procedure remaps identifiers to standard binominal. Obviously this should be sorted by correcting the database!" (match str ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"] ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"] ["Macaca mulatta" "Macaca nemestrina"] ["Bat (Glossophaga soricina)" "Glossophaga soricina"] [str str])) (define-transformer genotypes (tables (Geno (left-join Species "USING (SpeciesId)"))) (schema-triples (gnc:genotype a skos:Concept) (gnc:genotype skos:description "This is a set of controlled terms that are used to describe a given genotype") (gnt:chr a owl:ObjectProperty) (gnt:chr skos:description "This resource is located on a given chromosome") (gnt:chr rdfs:domain gnc:genotype) (gnt:mb a owl:ObjectProperty) (gnt:mb skos:definition "The size of this resource in Mb") (gnt:mb rdfs:domain gnc:genotype) (gnt:mbMm8 a owl:ObjectProperty) (gnt:mbMm8 skos:definition "TODO") (gnt:mbMm8 rdfs:domain gnc:genotype) (gnt:mb2016 a owl:ObjectProperty) (gnt:mb2016 skos:definition "TODO") (gnt:mb2016 rdfs:domain gnc:genotype) (gnt:hasSequence a owl:ObjectProperty) (gnt:hasSequence skos:definition "This resource has a given sequence") (gnt:hasSequence rdfs:domain gnc:genotype) (gnt:hasSource a owl:ObjectProperty) (gnt:hasSource rdfs:domain gnc:genotype) (gnt:hasSource skos:definition "This resource was obtained from this given source") (gnt:hasAltSourceName a owl:ObjectProperty) (gnt:hasAltSourceName rdfs:domain gnc:genotype) (gnt:hasAltSourceName skos:definition "The alternative name this resource was obtained from") (gnt:chrNum a owl:ObjectProperty) (gnt:chrNum rdfs:domain gnc:genotype) (gnt:chrNum skos:definition "The chromosome number for this resource") (gnt:chrNum skos:definition "The chromosome number for this resource")) (triples (string->identifier "" (regexp-substitute/global #f "[^A-Za-z0-9:]" (field Geno Name) 'pre "_" 'post) #:separator "" #:proc string-capitalize-first) (set rdf:type 'gnc:genotype) (set skos:prefLabel (sanitize-rdf-string (field Geno Name))) (set gnt:chr (field Geno Chr)) (set gnt:mb (annotate-field (field ("IFNULL(Geno.Mb, '')" Mb)) '^^xsd:double)) (set gnt:mbMm8 (annotate-field (field ("IFNULL(Geno.Mb_mm8, '')" Mb_mm8)) '^^xsd:double)) (set gnt:mb2016 (annotate-field (field ("IFNULL(Geno.Mb_2016, '')" Mb_2016)) '^^xsd:double)) (set gnt:hasSequence (field Geno Sequence)) (set gnt:hasSource (field Geno Source)) ;; Only transform Source2 if it differs from Source (set gnt:hasAltSourceName (field ("IF((Source2 = Source), NULL, Source2)" Source2))) (set gnt:belongsToSpecies (string->identifier "" (remap-species-identifiers (field Species Fullname)) #:separator "" #:proc string-capitalize-first)) (set gnt:chrNum (annotate-field (field Geno chr_num) '^^xsd:int)) (set rdfs:comments (field Geno Comments)))) (with-documentation (name "Genotype Metadata") (connection %connection-settings) (table-metadata? #f) (prefixes '(("dct:" "") ("gn:" "") ("gnc:" "") ("gnt:" "") ("rdf:" "") ("rdfs:" "") ("owl:" "") ("skos:" "") ("xsd:" ""))) (inputs (list genotypes)) (outputs '(#:documentation "./docs/genotype.md" #:rdf "/export/data/genenetwork-virtuoso/genotype.ttl")))