From 51b3c0548c98e0bc05e11a89cbf6b75d31b9f8d5 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Mon, 21 Aug 2023 14:54:21 +0300 Subject: Remove "dump-" prefix Signed-off-by: Munyoki Kilyungi --- examples/genotype.scm | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100755 examples/genotype.scm (limited to 'examples/genotype.scm') diff --git a/examples/genotype.scm b/examples/genotype.scm new file mode 100755 index 0000000..63b85a7 --- /dev/null +++ b/examples/genotype.scm @@ -0,0 +1,124 @@ +#! /usr/bin/env guile +!# + +(use-modules (rnrs programs) + (rnrs io ports) + (srfi srfi-1) + (srfi srfi-26) + (ice-9 match) + (ice-9 regex) + (dump strings) + (dump sql) + (dump triples) + (dump special-forms)) + + + +(define %connection-settings + (call-with-input-file (list-ref (command-line) 1) + read)) + + + +(define (remap-species-identifiers str) + "This procedure remaps identifiers to standard binominal. Obviously this should + be sorted by correcting the database!" + (match str + ["Fly (Drosophila melanogaster dm6)" "Drosophila melanogaster"] + ["Oryzias latipes (Japanese medaka)" "Oryzias latipes"] + ["Macaca mulatta" "Macaca nemestrina"] + ["Bat (Glossophaga soricina)" "Glossophaga soricina"] + [str str])) + +(define-transformer genotypes + (tables (Geno + (left-join Species "USING (SpeciesId)"))) + (schema-triples + (gnc:genotype a skos:Concept) + (gnc:genotype + skos:description + "This is a set of controlled terms that are used to describe a given genotype") + (gnt:chr a owl:ObjectProperty) + (gnt:chr skos:description "This resource is located on a given chromosome") + (gnt:chr rdfs:domain gnc:genotype) + (gnt:mb a owl:ObjectProperty) + (gnt:mb skos:definition "The size of this resource in Mb") + (gnt:mb rdfs:domain gnc:genotype) + (gnt:mbMm8 a owl:ObjectProperty) + (gnt:mbMm8 skos:definition "TODO") + (gnt:mbMm8 rdfs:domain gnc:genotype) + (gnt:mb2016 a owl:ObjectProperty) + (gnt:mb2016 skos:definition "TODO") + (gnt:mb2016 rdfs:domain gnc:genotype) + (gnt:hasSequence a owl:ObjectProperty) + (gnt:hasSequence skos:definition "This resource has a given sequence") + (gnt:hasSequence rdfs:domain gnc:genotype) + (gnt:hasSource a owl:ObjectProperty) + (gnt:hasSource rdfs:domain gnc:genotype) + (gnt:hasSource skos:definition "This resource was obtained from this given source") + (gnt:hasAltSourceName a owl:ObjectProperty) + (gnt:hasAltSourceName rdfs:domain gnc:genotype) + (gnt:hasAltSourceName + skos:definition + "The alternative name this resource was obtained from") + (gnt:chrNum a owl:ObjectProperty) + (gnt:chrNum rdfs:domain gnc:genotype) + (gnt:chrNum skos:definition "The chromosome number for this resource") + (gnt:chrNum skos:definition "The chromosome number for this resource")) + (triples + (string->identifier + "" + (regexp-substitute/global + #f "[^A-Za-z0-9:]" + (field Geno Name) + 'pre "_" 'post) + #:separator "" + #:proc string-capitalize-first) + (set rdf:type 'gnc:genotype) + (set skos:prefLabel (sanitize-rdf-string (field Geno Name))) + (set gnt:chr (field Geno Chr)) + (set gnt:mb (annotate-field + (field ("IFNULL(Geno.Mb, '')" Mb)) '^^xsd:double)) + (set gnt:mbMm8 (annotate-field (field ("IFNULL(Geno.Mb_mm8, '')" Mb_mm8)) + '^^xsd:double)) + (set gnt:mb2016 + (annotate-field (field ("IFNULL(Geno.Mb_2016, '')" Mb_2016)) + '^^xsd:double)) + (set gnt:hasSequence (field Geno Sequence)) + (set gnt:hasSource (field Geno Source)) + ;; Only dump Source2 if it differs from Source + (set gnt:hasAltSourceName + (field ("IF((Source2 = Source), NULL, Source2)" + Source2))) + (set gnt:belongsToSpecies + (string->identifier + "" (remap-species-identifiers (field Species Fullname)) + #:separator "" + #:proc string-capitalize-first)) + (set gnt:chrNum + (annotate-field + (field Geno chr_num) + '^^xsd:int)) + (set rdfs:comments (field Geno Comments)))) + + + +(with-documentation + (name "Genotype Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("dct:" "") + ("gn:" "") + ("gnc:" "") + ("gnt:" "") + ("rdf:" "") + ("rdfs:" "") + ("owl:" "") + ("skos:" "") + ("xsd:" ""))) + (inputs + (list genotypes)) + (outputs + '(#:documentation "./docs/genotype.md" + #:rdf "/export/data/genenetwork-virtuoso/genotype.ttl"))) -- cgit v1.2.3