From 6433d66eef6dade517dc2242fc45ed514fb926d0 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Wed, 16 Oct 2024 11:31:36 +0300 Subject: Delete probeset transform. This is still a WIP; and ATM this examples folder is being tested out for a CI job, and the probeset transform is one of the longest. Will re-add it later. Signed-off-by: Munyoki Kilyungi --- examples/probeset.scm | 205 -------------------------------------------------- 1 file changed, 205 deletions(-) delete mode 100755 examples/probeset.scm (limited to 'examples') diff --git a/examples/probeset.scm b/examples/probeset.scm deleted file mode 100755 index caf81aa..0000000 --- a/examples/probeset.scm +++ /dev/null @@ -1,205 +0,0 @@ -#! /usr/bin/env guile -!# - -(use-modules (srfi srfi-1) - (srfi srfi-26) - (ice-9 format) - (ice-9 getopt-long) - (ice-9 match) - (ice-9 regex) - (transform strings) - (transform sql) - (transform triples) - (transform special-forms) - (web uri)) - - -(define-transformer probeset - (tables (ProbeSet - (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId") - (left-join Species "ON GeneChip.SpeciesId = Species.Id")) - "WHERE ProbeSet.Name IS NOT NULL") - (schema-triples - (gnc:omimLink rdfs:Class gnc:ResourceLink) - (gnc:omimLink rdfs:label "OMIM") - (gnc:omimLink rdfs:comments "Summary from On Mendelion Inheritance in Man") - (gnc:homologeneLink rdfs:Class gnc:ResourceLink) - (gnc:homologeneLink rdfs:label "HomoloGene") - (gnc:homologeneLink rdfs:comments "Find similar genes in other species") - (gnc:uniprot a owl:ObjectProperty) - (gnc:uniprot rdfs:label "UniProt") - (gnc:uniprot rdfs:comments "UniProt resource") - (gnt:hasChip a owl:ObjectProperty) - (gnt:hasChip rdfs:domain gnc:Probeset) - (gnt:hasTargetId a owl:ObjectProperty) - (gnt:hasTargetId rdfs:domain gnc:Probeset) - (gnt:geneSymbol rdfs:domain gnc:Probeset) - (gnt:location rdfs:domain gnc:ProbeSet) - (gnt:location a owl:ObjectProperty) - (gnt:strandPosition rdfs:domain gnc:ProbeSet) - (gnt:strandPosition a owl:ObjectProperty) - (gnt:targetsRegion a owl:ObjectProperty) - (gnt:targetsRegion rdfs:domain gnc:Probeset) - (gnt:chr rdfs:domain gnc:Probeset) - (gnt:mb rdfs:domain gnc:Probeset) - (gnt:hasSpecificity a owl:ObjectProperty) - (gnt:hasSpecificity rdfs:domain gnc:Probeset) - (gnt:hasBlatScore a owl:ObjectProperty) - (gnt:hasBlatScore rdfs:domain gnc:Probeset) - (gnt:hasBlatMbStart a owl:ObjectProperty) - (gnt:hasBlatMbStart rdfs:domain gnc:Probeset) - (gnt:hasBlatMbEnd a owl:ObjectProperty) - (gnt:hasBlatMbEnd rdfs:domain gnc:Probeset) - (gnt:hasBlatSeq a owl:ObjectProperty) - (gnt:hasBlatSeq rdfs:domain gnc:Probeset) - (gnt:hasTargetSeq a owl:ObjectProperty) - (gnt:hasTargetSeq rdfs:domain gnc:Probeset)) - (triples - (let ((id (field ("IF(NULLIF(TRIM(ProbeSet.Name), '') IS NULL, '', TRIM(ProbeSet.Name))" - ProbeSetIdName))) - (probeset-id (field ProbeSet Id))) - (string->identifier - "probeset" - (if (string-null? id) - (number->string probeset-id) - (regexp-substitute/global - #f "[^A-Za-z0-9:]" - id - 'pre "_" 'post)))) - (set rdf:type 'gnc:Probeset) - (set rdfs:label (field ProbeSet Name)) - (set skos:altLabel - (replace-substrings - (field ProbeSet alias) - '(("\r\n" . "; ")))) - (set gnt:hasChip - (string->identifier - "platform" - (field ("IFNULL(GeneChip.Name, '')" GeneChipName)))) - (set gnt:hasTargetId - (field ("NULLIF(TRIM(ProbeSet.TargetId), '')" - TargetId))) - (multiset gnt:geneSymbol - (map string-trim (string-split - (field ProbeSet Symbol) - #\,))) - (set dct:description (sanitize-rdf-string (field ProbeSet description))) - (set gnt:targetsRegion - (sanitize-rdf-string - (field ("NULLIF(TRIM(ProbeSet.Probe_set_target_region), '')" - Probe_set_target_region)))) - (set gnt:chr (field ProbeSet Chr)) - (set gnt:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double)) - (set gnt:location - (let* ((mb (field ProbeSet Mb)) - (chr (field ProbeSet Chr)) - (strand-probe (field ProbeSet Strand_Probe)) - (location (list chr mb))) - (match location - (("Un" mb) - (format #f "Not available")) - ((chr "") - (if (string-blank? chr) - (format #f "Not available") - (format #f "Chr ~a @ Unknown position ~a~:[~;~a~]" - chr mb - (and (string? strand-probe) (or (string=? "+" strand-probe) - (string=? "-" strand-probe))) - (cond ((string=? "+" strand-probe) - "on the plus strand") - ((string=? "-" strand-probe) - "on the minus strand") - (else ""))))) - (_ - (format #f "Chr ~a @ ~a Mb ~:[~;~a~]" - chr mb - (and (string? strand-probe) (or (string=? "+" strand-probe) - (string=? "-" strand-probe))) - (cond ((string=? "+" strand-probe) - "on the plus strand") - ((string=? "-" strand-probe) - "on the minus strand") - (else ""))))))) - (set gnt:hasGeneId - (field ProbeSet GeneId)) - ;; OMIM Link - (set dct:references - (let ((omim (field ProbeSet OMIM))) - (if (not (string-blank? omim)) - (string->symbol - (format #f - "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" - "http://www.ncbi.nlm.nih.gov/omim/" - (uri-encode omim) - "a gnc:omimLink")) - ""))) - ;; Homologene Link - (set dct:references - (let ((homologene (field ProbeSet HomoloGeneID))) - (if (not (string-blank? homologene)) - (string->symbol - (format #f - "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" - "http://www.ncbi.nlm.nih.gov/homologene/?term=" - (uri-encode homologene) - "a gnc:homologeneLink")) - ""))) - (set gnt:uniprot - (ontology 'uniprot: (field ProbeSet UniProtID))) - (set gnt:strandProbe - (field ProbeSet Strand_Probe)) - (set gnt:hasSpecificity - (field ("IFNULL(ProbeSet.Probe_set_specificity, '')" - Probe_set_specificity))) - (set gnt:hasBlatScore - (field ("IFNULL(ProbeSet.Probe_set_BLAT_score, '')" - Probe_set_BLAT_score))) - (set gnt:hasBlatMbStart - (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_start, '')" - Probe_set_Blat_Mb_start)) - '^^xsd:double)) - (set gnt:hasBlatMbEnd - (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_end, '')" - Probe_set_Blat_Mb_end)) - '^^xsd:double)) - (set gnt:hasBlatSeq (sanitize-rdf-string (field ProbeSet BlatSeq))) - (set gnt:hasTargetSeq (sanitize-rdf-string (field ProbeSet TargetSeq))))) - - - - -(let* ((option-spec - '((settings (single-char #\s) (value #t)) - (output (single-char #\o) (value #t)) - (documentation (single-char #\d) (value #t)))) - (options (getopt-long (command-line) option-spec)) - (settings (option-ref options 'settings #f)) - (output (option-ref options 'output #f)) - (documentation (option-ref options 'documentation #f)) - (%connection-settings - (call-with-input-file settings - read))) - (with-documentation - (name "ProbeSet Metadata") - (connection %connection-settings) - (table-metadata? #f) - (prefixes - '(("gn:" "") - ("probeset:" "") - ("gnc:" "") - ("gene:" "") - ("gnt:" "") - ("rdf:" "") - ("rdfs:" "") - ("dct:" "") - ("uniprot:" "") - ("owl:" "") - ("xsd:" "") - ("qb:" "") - ("sdmx-measure:" "") - ("skos:" ""))) - (inputs - (list probeset)) - (outputs - `(#:documentation ,documentation - #:rdf ,output)))) -- cgit v1.2.3