about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMunyoki Kilyungi2024-10-16 11:31:36 +0300
committerMunyoki Kilyungi2024-10-16 12:26:32 +0300
commit6433d66eef6dade517dc2242fc45ed514fb926d0 (patch)
treeb6a8338be9ac8f573c5d0f66d5e5ce31fa821ad2
parent5b9548f3acad39b379f8429df7e85e3023e029ef (diff)
downloadgn-transform-databases-6433d66eef6dade517dc2242fc45ed514fb926d0.tar.gz
Delete probeset transform.
This is still a WIP; and ATM this examples folder is being tested out
for a CI job, and the probeset transform is one of the longest.  Will
re-add it later.

Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rwxr-xr-xexamples/probeset.scm205
1 files changed, 0 insertions, 205 deletions
diff --git a/examples/probeset.scm b/examples/probeset.scm
deleted file mode 100755
index caf81aa..0000000
--- a/examples/probeset.scm
+++ /dev/null
@@ -1,205 +0,0 @@
-#! /usr/bin/env guile
-!#
-
-(use-modules (srfi srfi-1)
-             (srfi srfi-26)
-             (ice-9 format)
-             (ice-9 getopt-long)
-             (ice-9 match)
-             (ice-9 regex)
-             (transform strings)
-             (transform sql)
-             (transform triples)
-             (transform special-forms)
-             (web uri))
-
-
-(define-transformer probeset
-  (tables (ProbeSet
-           (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId")
-           (left-join Species "ON GeneChip.SpeciesId = Species.Id"))
-          "WHERE ProbeSet.Name IS NOT NULL")
-  (schema-triples
-   (gnc:omimLink rdfs:Class gnc:ResourceLink)
-   (gnc:omimLink rdfs:label "OMIM")
-   (gnc:omimLink rdfs:comments "Summary from On Mendelion Inheritance in Man")
-   (gnc:homologeneLink rdfs:Class gnc:ResourceLink)
-   (gnc:homologeneLink rdfs:label "HomoloGene")
-   (gnc:homologeneLink rdfs:comments "Find similar genes in other species")
-   (gnc:uniprot a owl:ObjectProperty)
-   (gnc:uniprot rdfs:label "UniProt")
-   (gnc:uniprot rdfs:comments "UniProt resource")
-   (gnt:hasChip a owl:ObjectProperty)
-   (gnt:hasChip rdfs:domain gnc:Probeset)
-   (gnt:hasTargetId a owl:ObjectProperty)
-   (gnt:hasTargetId rdfs:domain gnc:Probeset)
-   (gnt:geneSymbol rdfs:domain gnc:Probeset)
-   (gnt:location rdfs:domain gnc:ProbeSet)
-   (gnt:location a owl:ObjectProperty)
-   (gnt:strandPosition rdfs:domain gnc:ProbeSet)
-   (gnt:strandPosition a owl:ObjectProperty)
-   (gnt:targetsRegion a owl:ObjectProperty)
-   (gnt:targetsRegion rdfs:domain gnc:Probeset)
-   (gnt:chr rdfs:domain gnc:Probeset)
-   (gnt:mb rdfs:domain gnc:Probeset)
-   (gnt:hasSpecificity a owl:ObjectProperty)
-   (gnt:hasSpecificity rdfs:domain gnc:Probeset)
-   (gnt:hasBlatScore a owl:ObjectProperty)
-   (gnt:hasBlatScore rdfs:domain gnc:Probeset)
-   (gnt:hasBlatMbStart a owl:ObjectProperty)
-   (gnt:hasBlatMbStart rdfs:domain gnc:Probeset)
-   (gnt:hasBlatMbEnd a owl:ObjectProperty)
-   (gnt:hasBlatMbEnd rdfs:domain gnc:Probeset)
-   (gnt:hasBlatSeq a owl:ObjectProperty)
-   (gnt:hasBlatSeq rdfs:domain gnc:Probeset)
-   (gnt:hasTargetSeq a owl:ObjectProperty)
-   (gnt:hasTargetSeq rdfs:domain gnc:Probeset))
-  (triples
-      (let ((id (field ("IF(NULLIF(TRIM(ProbeSet.Name), '') IS NULL, '', TRIM(ProbeSet.Name))"
-                        ProbeSetIdName)))
-            (probeset-id (field ProbeSet Id)))
-        (string->identifier
-         "probeset"
-         (if (string-null? id)
-             (number->string probeset-id)
-             (regexp-substitute/global
-              #f "[^A-Za-z0-9:]"
-              id
-              'pre "_" 'post))))
-    (set rdf:type 'gnc:Probeset)
-    (set rdfs:label (field ProbeSet Name))
-    (set skos:altLabel
-         (replace-substrings
-          (field ProbeSet alias)
-          '(("\r\n" . "; "))))
-    (set gnt:hasChip
-         (string->identifier
-          "platform"
-          (field ("IFNULL(GeneChip.Name, '')" GeneChipName))))
-    (set gnt:hasTargetId
-         (field ("NULLIF(TRIM(ProbeSet.TargetId), '')"
-                 TargetId)))
-    (multiset gnt:geneSymbol
-              (map string-trim (string-split
-                                (field ProbeSet Symbol)
-                                #\,)))
-    (set dct:description (sanitize-rdf-string (field ProbeSet description)))
-    (set gnt:targetsRegion
-         (sanitize-rdf-string
-          (field ("NULLIF(TRIM(ProbeSet.Probe_set_target_region), '')"
-                  Probe_set_target_region))))
-    (set gnt:chr (field ProbeSet Chr))
-    (set gnt:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double))
-    (set gnt:location
-         (let* ((mb (field ProbeSet Mb))
-                (chr (field ProbeSet Chr))
-                (strand-probe (field ProbeSet Strand_Probe))
-                (location (list chr mb)))
-           (match location
-             (("Un" mb)
-              (format #f "Not available"))
-             ((chr "")
-              (if (string-blank? chr)
-                  (format #f "Not available")
-                  (format #f "Chr ~a @ Unknown position ~a~:[~;~a~]"
-                          chr mb
-                          (and (string? strand-probe) (or (string=? "+" strand-probe)
-                                                          (string=? "-" strand-probe)))
-                          (cond ((string=? "+" strand-probe)
-                                 "on the plus strand")
-                                ((string=? "-" strand-probe)
-                                 "on the minus strand")
-                                (else "")))))
-             (_
-              (format #f "Chr ~a @ ~a Mb ~:[~;~a~]"
-                      chr mb
-                      (and (string? strand-probe) (or (string=? "+" strand-probe)
-                                                      (string=? "-" strand-probe)))
-                      (cond ((string=? "+" strand-probe)
-                             "on the plus strand")
-                            ((string=? "-" strand-probe)
-                             "on the minus strand")
-                            (else "")))))))
-    (set gnt:hasGeneId
-         (field ProbeSet GeneId))
-    ;; OMIM Link
-    (set dct:references
-         (let ((omim (field ProbeSet OMIM)))
-           (if (not (string-blank? omim))
-               (string->symbol
-                (format #f
-                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
-                        "http://www.ncbi.nlm.nih.gov/omim/"
-                        (uri-encode omim)
-                        "a gnc:omimLink"))
-               "")))
-    ;; Homologene Link
-    (set dct:references
-         (let ((homologene (field ProbeSet HomoloGeneID)))
-           (if (not (string-blank? homologene))
-               (string->symbol
-                (format #f
-                        "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a"
-                        "http://www.ncbi.nlm.nih.gov/homologene/?term="
-                        (uri-encode homologene)
-                        "a gnc:homologeneLink"))
-               "")))
-    (set gnt:uniprot
-         (ontology 'uniprot: (field ProbeSet UniProtID)))
-    (set gnt:strandProbe
-         (field ProbeSet Strand_Probe))
-    (set gnt:hasSpecificity
-         (field ("IFNULL(ProbeSet.Probe_set_specificity, '')"
-                 Probe_set_specificity)))
-    (set gnt:hasBlatScore
-         (field ("IFNULL(ProbeSet.Probe_set_BLAT_score, '')"
-                 Probe_set_BLAT_score)))
-    (set gnt:hasBlatMbStart
-         (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_start, '')"
-                                 Probe_set_Blat_Mb_start))
-                         '^^xsd:double))
-    (set gnt:hasBlatMbEnd
-         (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_end, '')"
-                                 Probe_set_Blat_Mb_end))
-                         '^^xsd:double))
-    (set gnt:hasBlatSeq (sanitize-rdf-string (field ProbeSet BlatSeq)))
-    (set gnt:hasTargetSeq (sanitize-rdf-string (field ProbeSet TargetSeq)))))
-
-
-
-
-(let* ((option-spec
-        '((settings (single-char #\s) (value #t))
-          (output (single-char #\o) (value #t))
-          (documentation (single-char #\d) (value #t))))
-       (options (getopt-long (command-line) option-spec))
-       (settings (option-ref options 'settings #f))
-       (output (option-ref options 'output #f))
-       (documentation (option-ref options 'documentation #f))
-       (%connection-settings
-        (call-with-input-file settings
-          read)))
-  (with-documentation
-   (name "ProbeSet Metadata")
-   (connection %connection-settings)
-   (table-metadata? #f)
-   (prefixes
-    '(("gn:" "<http://genenetwork.org/id/>")
-      ("probeset:" "<http://genenetwork.org/probeset/>")
-      ("gnc:" "<http://genenetwork.org/category/>")
-      ("gene:" "<http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=>")
-      ("gnt:" "<http://genenetwork.org/term/>")
-      ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
-      ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
-      ("dct:" "<http://purl.org/dc/terms/>")
-      ("uniprot:" "<http://purl.uniprot.org/uniprot/>")
-      ("owl:" "<http://www.w3.org/2002/07/owl#>")
-      ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")
-      ("qb:" "<http://purl.org/linked-data/cube#>")
-      ("sdmx-measure:" "<http://purl.org/linked-data/sdmx/2009/measure#>")
-      ("skos:" "<http://www.w3.org/2004/02/skos/core#>")))
-   (inputs
-    (list probeset))
-   (outputs
-    `(#:documentation ,documentation
-      #:rdf ,output))))