aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2024-10-16 11:31:36 +0300
committerMunyoki Kilyungi2024-10-16 12:26:32 +0300
commit6433d66eef6dade517dc2242fc45ed514fb926d0 (patch)
treeb6a8338be9ac8f573c5d0f66d5e5ce31fa821ad2
parent5b9548f3acad39b379f8429df7e85e3023e029ef (diff)
downloadgn-transform-databases-6433d66eef6dade517dc2242fc45ed514fb926d0.tar.gz
Delete probeset transform.
This is still a WIP; and ATM this examples folder is being tested out for a CI job, and the probeset transform is one of the longest. Will re-add it later. Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rwxr-xr-xexamples/probeset.scm205
1 files changed, 0 insertions, 205 deletions
diff --git a/examples/probeset.scm b/examples/probeset.scm
deleted file mode 100755
index caf81aa..0000000
--- a/examples/probeset.scm
+++ /dev/null
@@ -1,205 +0,0 @@
-#! /usr/bin/env guile
-!#
-
-(use-modules (srfi srfi-1)
- (srfi srfi-26)
- (ice-9 format)
- (ice-9 getopt-long)
- (ice-9 match)
- (ice-9 regex)
- (transform strings)
- (transform sql)
- (transform triples)
- (transform special-forms)
- (web uri))
-
-
-(define-transformer probeset
- (tables (ProbeSet
- (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId")
- (left-join Species "ON GeneChip.SpeciesId = Species.Id"))
- "WHERE ProbeSet.Name IS NOT NULL")
- (schema-triples
- (gnc:omimLink rdfs:Class gnc:ResourceLink)
- (gnc:omimLink rdfs:label "OMIM")
- (gnc:omimLink rdfs:comments "Summary from On Mendelion Inheritance in Man")
- (gnc:homologeneLink rdfs:Class gnc:ResourceLink)
- (gnc:homologeneLink rdfs:label "HomoloGene")
- (gnc:homologeneLink rdfs:comments "Find similar genes in other species")
- (gnc:uniprot a owl:ObjectProperty)
- (gnc:uniprot rdfs:label "UniProt")
- (gnc:uniprot rdfs:comments "UniProt resource")
- (gnt:hasChip a owl:ObjectProperty)
- (gnt:hasChip rdfs:domain gnc:Probeset)
- (gnt:hasTargetId a owl:ObjectProperty)
- (gnt:hasTargetId rdfs:domain gnc:Probeset)
- (gnt:geneSymbol rdfs:domain gnc:Probeset)
- (gnt:location rdfs:domain gnc:ProbeSet)
- (gnt:location a owl:ObjectProperty)
- (gnt:strandPosition rdfs:domain gnc:ProbeSet)
- (gnt:strandPosition a owl:ObjectProperty)
- (gnt:targetsRegion a owl:ObjectProperty)
- (gnt:targetsRegion rdfs:domain gnc:Probeset)
- (gnt:chr rdfs:domain gnc:Probeset)
- (gnt:mb rdfs:domain gnc:Probeset)
- (gnt:hasSpecificity a owl:ObjectProperty)
- (gnt:hasSpecificity rdfs:domain gnc:Probeset)
- (gnt:hasBlatScore a owl:ObjectProperty)
- (gnt:hasBlatScore rdfs:domain gnc:Probeset)
- (gnt:hasBlatMbStart a owl:ObjectProperty)
- (gnt:hasBlatMbStart rdfs:domain gnc:Probeset)
- (gnt:hasBlatMbEnd a owl:ObjectProperty)
- (gnt:hasBlatMbEnd rdfs:domain gnc:Probeset)
- (gnt:hasBlatSeq a owl:ObjectProperty)
- (gnt:hasBlatSeq rdfs:domain gnc:Probeset)
- (gnt:hasTargetSeq a owl:ObjectProperty)
- (gnt:hasTargetSeq rdfs:domain gnc:Probeset))
- (triples
- (let ((id (field ("IF(NULLIF(TRIM(ProbeSet.Name), '') IS NULL, '', TRIM(ProbeSet.Name))"
- ProbeSetIdName)))
- (probeset-id (field ProbeSet Id)))
- (string->identifier
- "probeset"
- (if (string-null? id)
- (number->string probeset-id)
- (regexp-substitute/global
- #f "[^A-Za-z0-9:]"
- id
- 'pre "_" 'post))))
- (set rdf:type 'gnc:Probeset)
- (set rdfs:label (field ProbeSet Name))
- (set skos:altLabel
- (replace-substrings
- (field ProbeSet alias)
- '(("\r\n" . "; "))))
- (set gnt:hasChip
- (string->identifier
- "platform"
- (field ("IFNULL(GeneChip.Name, '')" GeneChipName))))
- (set gnt:hasTargetId
- (field ("NULLIF(TRIM(ProbeSet.TargetId), '')"
- TargetId)))
- (multiset gnt:geneSymbol
- (map string-trim (string-split
- (field ProbeSet Symbol)
- #\,)))
- (set dct:description (sanitize-rdf-string (field ProbeSet description)))
- (set gnt:targetsRegion
- (sanitize-rdf-string
- (field ("NULLIF(TRIM(ProbeSet.Probe_set_target_region), '')"
- Probe_set_target_region))))
- (set gnt:chr (field ProbeSet Chr))
- (set gnt:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double))
- (set gnt:location
- (let* ((mb (field ProbeSet Mb))
- (chr (field ProbeSet Chr))
- (strand-probe (field ProbeSet Strand_Probe))
- (location (list chr mb)))
- (match location
- (("Un" mb)
- (format #f "Not available"))
- ((chr "")
- (if (string-blank? chr)
- (format #f "Not available")
- (format #f "Chr ~a @ Unknown position ~a~:[~;~a~]"
- chr mb
- (and (string? strand-probe) (or (string=? "+" strand-probe)
- (string=? "-" strand-probe)))
- (cond ((string=? "+" strand-probe)
- "on the plus strand")
- ((string=? "-" strand-probe)
- "on the minus strand")
- (else "")))))
- (_
- (format #f "Chr ~a @ ~a Mb ~:[~;~a~]"
- chr mb
- (and (string? strand-probe) (or (string=? "+" strand-probe)
- (string=? "-" strand-probe)))
- (cond ((string=? "+" strand-probe)
- "on the plus strand")
- ((string=? "-" strand-probe)
- "on the minus strand")
- (else "")))))))
- (set gnt:hasGeneId
- (field ProbeSet GeneId))
- ;; OMIM Link
- (set dct:references
- (let ((omim (field ProbeSet OMIM)))
- (if (not (string-blank? omim))
- (string->symbol
- (format #f
- "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
- "http://www.ncbi.nlm.nih.gov/omim/"
- (uri-encode omim)
- "a gnc:omimLink"))
- "")))
- ;; Homologene Link
- (set dct:references
- (let ((homologene (field ProbeSet HomoloGeneID)))
- (if (not (string-blank? homologene))
- (string->symbol
- (format #f
- "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
- "http://www.ncbi.nlm.nih.gov/homologene/?term="
- (uri-encode homologene)
- "a gnc:homologeneLink"))
- "")))
- (set gnt:uniprot
- (ontology 'uniprot: (field ProbeSet UniProtID)))
- (set gnt:strandProbe
- (field ProbeSet Strand_Probe))
- (set gnt:hasSpecificity
- (field ("IFNULL(ProbeSet.Probe_set_specificity, '')"
- Probe_set_specificity)))
- (set gnt:hasBlatScore
- (field ("IFNULL(ProbeSet.Probe_set_BLAT_score, '')"
- Probe_set_BLAT_score)))
- (set gnt:hasBlatMbStart
- (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_start, '')"
- Probe_set_Blat_Mb_start))
- '^^xsd:double))
- (set gnt:hasBlatMbEnd
- (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_end, '')"
- Probe_set_Blat_Mb_end))
- '^^xsd:double))
- (set gnt:hasBlatSeq (sanitize-rdf-string (field ProbeSet BlatSeq)))
- (set gnt:hasTargetSeq (sanitize-rdf-string (field ProbeSet TargetSeq)))))
-
-
-
-
-(let* ((option-spec
- '((settings (single-char #\s) (value #t))
- (output (single-char #\o) (value #t))
- (documentation (single-char #\d) (value #t))))
- (options (getopt-long (command-line) option-spec))
- (settings (option-ref options 'settings #f))
- (output (option-ref options 'output #f))
- (documentation (option-ref options 'documentation #f))
- (%connection-settings
- (call-with-input-file settings
- read)))
- (with-documentation
- (name "ProbeSet Metadata")
- (connection %connection-settings)
- (table-metadata? #f)
- (prefixes
- '(("gn:" "<http://genenetwork.org/id/>")
- ("probeset:" "<http://genenetwork.org/probeset/>")
- ("gnc:" "<http://genenetwork.org/category/>")
- ("gene:" "<http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=>")
- ("gnt:" "<http://genenetwork.org/term/>")
- ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
- ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
- ("dct:" "<http://purl.org/dc/terms/>")
- ("uniprot:" "<http://purl.uniprot.org/uniprot/>")
- ("owl:" "<http://www.w3.org/2002/07/owl#>")
- ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
- ("qb:" "<http://purl.org/linked-data/cube#>")
- ("sdmx-measure:" "<http://purl.org/linked-data/sdmx/2009/measure#>")
- ("skos:" "<http://www.w3.org/2004/02/skos/core#>")))
- (inputs
- (list probeset))
- (outputs
- `(#:documentation ,documentation
- #:rdf ,output))))