From 51b3c0548c98e0bc05e11a89cbf6b75d31b9f8d5 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Mon, 21 Aug 2023 14:54:21 +0300 Subject: Remove "dump-" prefix Signed-off-by: Munyoki Kilyungi --- examples/probeset.scm | 184 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 184 insertions(+) create mode 100755 examples/probeset.scm (limited to 'examples/probeset.scm') diff --git a/examples/probeset.scm b/examples/probeset.scm new file mode 100755 index 0000000..68ddb59 --- /dev/null +++ b/examples/probeset.scm @@ -0,0 +1,184 @@ +#! /usr/bin/env guile +!# + +(use-modules (srfi srfi-1) + (srfi srfi-26) + (ice-9 match) + (ice-9 regex) + (dump strings) + (dump sql) + (dump triples) + (dump special-forms)) + + + +(define %connection-settings + (call-with-input-file (list-ref (command-line) 1) + read)) + + +(define-transformer probeset + (tables (ProbeSet + (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId"))) + (schema-triples + (gnc:probeset a skos:Concept) + (gnc:probeset + skos:description + "This is a set of controlled terms that are used to describe a given probeset") + (gnt:hasChip a owl:ObjectProperty) + (gnt:hasChip rdfs:domain gnc:probeset) + (gnt:hasTargetId a owl:ObjectProperty) + (gnt:hasTargetId rdfs:domain gnc:probeset) + (gnt:symbol rdfs:domain gnc:probeset) + (gnt:targetsRegion a owl:ObjectProperty) + (gnt:targetsRegion rdfs:domain gnc:probeset) + (gnt:chr rdfs:domain gnc:probeset) + (gnt:mb rdfs:domain gnc:probeset) + (gnt:mbMm8 rdfs:domain gnc:probeset) + (gnt:mb2016 rdfs:domain gnc:probeset) + (gnt:hasSpecificity a owl:ObjectProperty) + (gnt:hasSpecificity rdfs:domain gnc:probeset) + (gnt:hasBlatScore a owl:ObjectProperty) + (gnt:hasBlatScore rdfs:domain gnc:probeset) + (gnt:hasBlatMbStart a owl:ObjectProperty) + (gnt:hasBlatMbStart rdfs:domain gnc:probeset) + (gnt:hasBlatMbStart2016 a owl:ObjectProperty) + (gnt:hasBlatMbStart2016 rdfs:domain gnc:probeset) + (gnt:hasBlatMbEnd a owl:ObjectProperty) + (gnt:hasBlatMbEnd rdfs:domain gnc:probeset) + (gnt:hasBlatMbEnd2016 a owl:ObjectProperty) + (gnt:hasBlatMbEnd2016 rdfs:domain gnc:probeset) + (gnt:hasBlatSeq a owl:ObjectProperty) + (gnt:hasBlatSeq rdfs:domain gnc:probeset) + (gnt:hasTargetSeq a owl:ObjectProperty) + (gnt:hasTargetSeq rdfs:domain gnc:probeset) + (gnt:hasHomologeneId a owl:ObjectProperty) + (gnt:hasHomologeneId rdfs:domain gnc:probeset) + (gnt:hasPubChemId a owl:ObjectProperty) + (gnt:hasPubChemId rdfs:domain gnc:probeset) + (gnt:hasKeggId a owl:ObjectProperty) + (gnt:hasKeggId rdfs:domain gnc:probeset) + (gnt:hasOmimId a owl:ObjectProperty) + (gnt:hasOmimId rdfs:domain gnc:probeset) + (gnt:hasChebiId a owl:ObjectProperty) + (gnt:hasChebiId rdfs:domain gnc:probeset)) + (triples + (let ((id (field ("IF(NULLIF(TRIM(ProbeSet.Name), '') IS NULL, '', TRIM(ProbeSet.Name))" + ProbeSetIdName))) + (probeset-id (field ProbeSet Id))) + (string->identifier + "probeset" + (if (string-null? id) + (number->string probeset-id) + (regexp-substitute/global + #f "[^A-Za-z0-9:]" + id + 'pre "_" 'post)))) + (set rdf:type 'gnc:probeset) + (set rdfs:label (field ProbeSet Name)) + (set skos:altLabel + (replace-substrings + (field ProbeSet alias) + '(("\r\n" . "; ")))) + (set gnt:hasChip + (string->identifier + "platform" + (field ("IFNULL(GeneChip.Name, '')" GeneChipName)))) + (set gnt:hasTargetId + (field ("NULLIF(TRIM(ProbeSet.TargetId), '')" + TargetId))) + (set gnt:symbol (field ProbeSet Symbol)) + (set dct:description (sanitize-rdf-string (field ProbeSet description))) + (set gnt:targetsRegion + (sanitize-rdf-string + (field ("NULLIF(TRIM(ProbeSet.Probe_set_target_region), '')" + Probe_set_target_region)))) + (set gnt:chr (field ProbeSet Chr)) + (set gnt:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double)) + (set gnt:mbMm8 (annotate-field (field ("IFNULL(ProbeSet.Mb_mm8, '')" Mb_mm8)) + '^^xsd:double)) + (set gnt:mb2016 + (annotate-field (field ("IFNULL(ProbeSet.Mb_2016, '')" Mb_2016)) + '^^xsd:double)) + (set gnt:hasSpecificity + (field ("IFNULL(ProbeSet.Probe_set_specificity, '')" + Probe_set_specificity))) + (set gnt:hasBlatScore + (field ("IFNULL(ProbeSet.Probe_set_BLAT_score, '')" + Probe_set_BLAT_score))) + (set gnt:hasBlatMbStart + (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_start, '')" + Probe_set_Blat_Mb_start)) + '^^xsd:double)) + (set gnt:hasBlatMbStart2016 + (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_start_2016, '')" + Probe_set_Blat_Mb_start_2016)) + '^^xsd:double)) + (set gnt:hasBlatMbEnd + (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_end, '')" + Probe_set_Blat_Mb_end)) + '^^xsd:double)) + (set gnt:hasBlatMbEnd2016 + (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_start_2016, '')" + Probe_set_Blat_Mb_start_2016)) + '^^xsd:double)) + (set gnt:hasBlatSeq (sanitize-rdf-string (field ProbeSet BlatSeq))) + (set gnt:hasTargetSeq (sanitize-rdf-string (field ProbeSet TargetSeq))) + (set gnt:hasHomologeneId (ontology 'homologene: + (field ("IFNULL(ProbeSet.HomoloGeneID, '')" + HomoloGeneID)))) + (set gnt:hasUniprotId (ontology 'uniprot: + (field ("IFNULL(ProbeSet.UniProtID, '')" + UniProtID)))) + (set gnt:hasPubChemId (ontology + 'pubchem: + (field ("IFNULL(ProbeSet.PubChem_ID, '')" + PubChem_ID)))) + (set gnt:hasKeggId (ontology + 'kegg: + (field ("IFNULL(ProbeSet.KEGG_ID, '')" + KEGG_ID)))) + (set gnt:hasOmimId (ontology + 'omim: + (let ((omim (field ("IFNULL(ProbeSet.OMIM, '')" + OMIM)))) + (if (number? omim) + omim + (regexp-substitute/global + #f "[^0-9]" + omim + 'pre "" 'post))))) + (set gnt:hasChebiId (ontology + 'chebi: + (field ("IFNULL(ProbeSet.ChEBI_ID, '')" + ChEBI_ID)))))) + + + + +(with-documentation + (name "ProbeSet Metadata") + (connection %connection-settings) + (table-metadata? #f) + (prefixes + '(("gn:" "") + ("probeset:" "") + ("gnc:" "") + ("gnt:" "") + ("rdf:" "") + ("kegg:" "") + ("pubchem:" "") + ("omim:" "") + ("rdfs:" "") + ("uniprot:" "") + ("chebi:" "") + ("dct:" "") + ("owl:" "") + ("homologene:" "") + ("xsd:" "") + ("skos:" ""))) + (inputs + (list probeset)) + (outputs + '(#:documentation "./docs/probeset.md" + #:rdf "./verified-data/probeset.ttl"))) -- cgit v1.2.3