#! /usr/bin/env guile !# (use-modules (srfi srfi-1) (srfi srfi-26) (ice-9 getopt-long) (ice-9 match) (ice-9 regex) (transform strings) (transform sql) (transform triples) (transform special-forms)) (define-transformer probeset (tables (ProbeSet (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId"))) (schema-triples (gnt:hasChip a owl:ObjectProperty) (gnt:hasChip rdfs:domain gnc:Probeset) (gnt:hasTargetId a owl:ObjectProperty) (gnt:hasTargetId rdfs:domain gnc:Probeset) (gnt:symbol rdfs:domain gnc:Probeset) (gnt:targetsRegion a owl:ObjectProperty) (gnt:targetsRegion rdfs:domain gnc:Probeset) (gnt:chr rdfs:domain gnc:Probeset) (gnt:mb rdfs:domain gnc:Probeset) (gnt:hasSpecificity a owl:ObjectProperty) (gnt:hasSpecificity rdfs:domain gnc:Probeset) (gnt:hasBlatScore a owl:ObjectProperty) (gnt:hasBlatScore rdfs:domain gnc:Probeset) (gnt:hasBlatMbStart a owl:ObjectProperty) (gnt:hasBlatMbStart rdfs:domain gnc:Probeset) (gnt:hasBlatMbEnd a owl:ObjectProperty) (gnt:hasBlatMbEnd rdfs:domain gnc:Probeset) (gnt:hasBlatSeq a owl:ObjectProperty) (gnt:hasBlatSeq rdfs:domain gnc:Probeset) (gnt:hasTargetSeq a owl:ObjectProperty) (gnt:hasTargetSeq rdfs:domain gnc:Probeset) (gnt:hasHomologeneId a owl:ObjectProperty) (gnt:hasHomologeneId rdfs:domain gnc:Probeset) (gnt:hasPubChemId a owl:ObjectProperty) (gnt:hasPubChemId rdfs:domain gnc:Probeset) (gnt:hasKeggId a owl:ObjectProperty) (gnt:hasKeggId rdfs:domain gnc:Probeset) (gnt:hasOmimId a owl:ObjectProperty) (gnt:hasOmimId rdfs:domain gnc:Probeset) (gnt:hasChebiId a owl:ObjectProperty) (gnt:hasChebiId rdfs:domain gnc:Probeset)) (triples (let ((id (field ("IF(NULLIF(TRIM(ProbeSet.Name), '') IS NULL, '', TRIM(ProbeSet.Name))" ProbeSetIdName))) (probeset-id (field ProbeSet Id))) (string->identifier "probeset" (if (string-null? id) (number->string probeset-id) (regexp-substitute/global #f "[^A-Za-z0-9:]" id 'pre "_" 'post)))) (set rdf:type 'gnc:Probeset) (set rdfs:label (field ProbeSet Name)) (set skos:altLabel (replace-substrings (field ProbeSet alias) '(("\r\n" . "; ")))) (set gnt:hasChip (string->identifier "platform" (field ("IFNULL(GeneChip.Name, '')" GeneChipName)))) (set gnt:hasTargetId (field ("NULLIF(TRIM(ProbeSet.TargetId), '')" TargetId))) (set gnt:symbol (field ProbeSet Symbol)) (set dct:description (sanitize-rdf-string (field ProbeSet description))) (set gnt:targetsRegion (sanitize-rdf-string (field ("NULLIF(TRIM(ProbeSet.Probe_set_target_region), '')" Probe_set_target_region)))) (set gnt:chr (field ProbeSet Chr)) (set gnt:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double)) (set gnt:hasSpecificity (field ("IFNULL(ProbeSet.Probe_set_specificity, '')" Probe_set_specificity))) (set gnt:hasBlatScore (field ("IFNULL(ProbeSet.Probe_set_BLAT_score, '')" Probe_set_BLAT_score))) (set gnt:hasBlatMbStart (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_start, '')" Probe_set_Blat_Mb_start)) '^^xsd:double)) (set gnt:hasBlatMbEnd (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_end, '')" Probe_set_Blat_Mb_end)) '^^xsd:double)) (set gnt:hasBlatSeq (sanitize-rdf-string (field ProbeSet BlatSeq))) (set gnt:hasTargetSeq (sanitize-rdf-string (field ProbeSet TargetSeq))) (set gnt:hasHomologeneId (ontology 'homologene: (field ("IFNULL(ProbeSet.HomoloGeneID, '')" HomoloGeneID)))) (set gnt:hasUniprotId (ontology 'uniprot: (field ("IFNULL(ProbeSet.UniProtID, '')" UniProtID)))) (set gnt:hasPubChemId (ontology 'pubchem: (field ("IFNULL(ProbeSet.PubChem_ID, '')" PubChem_ID)))) (set gnt:hasKeggId (ontology 'kegg: (field ("IFNULL(ProbeSet.KEGG_ID, '')" KEGG_ID)))) (set gnt:hasOmimId (ontology 'omim: (let ((omim (field ("IFNULL(ProbeSet.OMIM, '')" OMIM)))) (if (number? omim) omim (regexp-substitute/global #f "[^0-9]" omim 'pre "" 'post))))) (set gnt:hasChebiId (ontology 'chebi: (field ("IFNULL(ProbeSet.ChEBI_ID, '')" ChEBI_ID)))))) (let* ((option-spec '((settings (single-char #\s) (value #t)) (output (single-char #\o) (value #t)) (documentation (single-char #\d) (value #t)))) (options (getopt-long (command-line) option-spec)) (settings (option-ref options 'settings #f)) (output (option-ref options 'output #f)) (documentation (option-ref options 'documentation #f)) (%connection-settings (call-with-input-file settings read))) (with-documentation (name "ProbeSet Metadata") (connection %connection-settings) (table-metadata? #f) (prefixes '(("gn:" "") ("probeset:" "") ("gnc:" "") ("gnt:" "") ("rdf:" "") ("kegg:" "") ("pubchem:" "") ("omim:" "") ("rdfs:" "") ("uniprot:" "") ("chebi:" "") ("dct:" "") ("owl:" "") ("homologene:" "") ("xsd:" "") ("qb:" "") ("sdmx-measure:" "") ("skos:" ""))) (inputs (list probeset)) (outputs `(#:documentation ,documentation #:rdf ,output))))