#! /usr/bin/env guile !# (use-modules (srfi srfi-1) (srfi srfi-26) (ice-9 getopt-long) (ice-9 match) (ice-9 regex) (transform strings) (transform sql) (transform triples) (transform special-forms) (web uri)) (define-transformer probeset->metadata (tables (ProbeSet (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId")) "WHERE ProbeSet.Name IS NOT NULL AND TRIM(ProbeSet.Name) != ''") (triples (string->identifier "probeset" (field ProbeSet Name)) (set rdf:type 'gnc:probeset) (set skos:prefLabel (field ProbeSet Name)) (multiset skos:altLabel (map string-trim-both (string-split (sanitize-rdf-string (field ProbeSet alias)) #\;))) (set gnt:uses_genechip (string->identifier "platform" (field GeneChip Name) #:separator "_")) (set gnt:has_target_id (string-trim-both (sanitize-rdf-string (field ProbeSet TargetId)))) (set gnt:symbol (string-trim-both (field ProbeSet Symbol))) (set dct:description (sanitize-rdf-string (field ProbeSet description))) (set gnt:targets_region (string-trim-both (sanitize-rdf-string (field ProbeSet Probe_set_target_region)))) (set gnt:chr (field ProbeSet Chr)) (set gnt:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double)) (set gnt:mb_mm8 (annotate-field (field ("IFNULL(ProbeSet.Mb_mm8, '')" Mb_mm8)) '^^xsd:double)) (set gnt:has_specificity (field ("IFNULL(ProbeSet.Probe_set_specificity, '')" Probe_set_specificity))) (set gnt:has_blat_score (field ("IFNULL(ProbeSet.Probe_set_BLAT_score, '')" Probe_set_BLAT_score))) (set gnt:has_blat_mb_start (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_start, '')" Probe_set_Blat_Mb_start)) '^^xsd:double)) (set gnt:has_blat_mb_end (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_end, '')" Probe_set_Blat_Mb_end)) '^^xsd:double)) (set gnt:has_blat_seq (sanitize-rdf-string (field ProbeSet BlatSeq))) (set gnt:has_target_seq (sanitize-rdf-string (field ProbeSet TargetSeq))) (set gnt:has_homologene_id (ontology 'homologene: (uri-encode (field ("IFNULL(ProbeSet.HomoloGeneID, '')" HomoloGeneID))))) (set gnt:has_uniprot_id (ontology 'uniprot: (uri-encode (field ("IFNULL(ProbeSet.UniProtID, '')" UniProtID))))) (set gnt:has_pub_chem_id (ontology 'pubchem: (uri-encode (field ("IFNULL(ProbeSet.PubChem_ID, '')" PubChem_ID))))) (set gnt:has_kegg_id (ontology 'kegg: (uri-encode (field ("IFNULL(ProbeSet.KEGG_ID, '')" KEGG_ID))))) (set gnt:has_omim_id (ontology 'omim: (uri-encode (let ((omim (field ("IFNULL(ProbeSet.OMIM, '')" OMIM)))) (if (number? omim) omim (regexp-substitute/global #f "[^0-9]" omim 'pre "" 'post)))))) (set gnt:has_chebi_id (ontology 'chebi: (uri-encode (field ("IFNULL(ProbeSet.ChEBI_ID, '')" ChEBI_ID))))))) (let* ((option-spec '((settings (single-char #\s) (value #t)) (output (single-char #\o) (value #t)) (documentation (single-char #\d) (value #t)))) (options (getopt-long (command-line) option-spec)) (settings (option-ref options 'settings #f)) (output (option-ref options 'output #f)) (documentation (option-ref options 'documentation #f)) (%connection-settings (call-with-input-file settings read))) (call-with-target-database %connection-settings (lambda (db) (with-documentation (name "ProbeSet Metadata") (connection %connection-settings) (table-metadata? #f) (total-rows (assoc-ref (sql-find db "SELECT count(*) AS count from ProbeSet") "count")) (rows-per-chunk 1000000) (prefixes '(("gn:" "") ("gnc:" "") ("gnt:" "") ("rdf:" "") ("kegg:" "") ("pubchem:" "") ("omim:" "") ("rdfs:" "") ("uniprot:" "") ("chebi:" "") ("dct:" "") ("owl:" "") ("homologene:" "") ("xsd:" "") ("qb:" "") ("sdmx-measure:" "") ("skos:" ""))) (inputs (list probeset->metadata)) (outputs `(#:documentation ,documentation #:rdf ,output))))))