diff options
author | Munyoki Kilyungi | 2024-10-16 11:31:36 +0300 |
---|---|---|
committer | Munyoki Kilyungi | 2024-10-16 12:26:32 +0300 |
commit | 6433d66eef6dade517dc2242fc45ed514fb926d0 (patch) | |
tree | b6a8338be9ac8f573c5d0f66d5e5ce31fa821ad2 | |
parent | 5b9548f3acad39b379f8429df7e85e3023e029ef (diff) | |
download | gn-transform-databases-6433d66eef6dade517dc2242fc45ed514fb926d0.tar.gz |
Delete probeset transform.
This is still a WIP; and ATM this examples folder is being tested out
for a CI job, and the probeset transform is one of the longest. Will
re-add it later.
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
-rwxr-xr-x | examples/probeset.scm | 205 |
1 files changed, 0 insertions, 205 deletions
diff --git a/examples/probeset.scm b/examples/probeset.scm deleted file mode 100755 index caf81aa..0000000 --- a/examples/probeset.scm +++ /dev/null @@ -1,205 +0,0 @@ -#! /usr/bin/env guile -!# - -(use-modules (srfi srfi-1) - (srfi srfi-26) - (ice-9 format) - (ice-9 getopt-long) - (ice-9 match) - (ice-9 regex) - (transform strings) - (transform sql) - (transform triples) - (transform special-forms) - (web uri)) - - -(define-transformer probeset - (tables (ProbeSet - (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId") - (left-join Species "ON GeneChip.SpeciesId = Species.Id")) - "WHERE ProbeSet.Name IS NOT NULL") - (schema-triples - (gnc:omimLink rdfs:Class gnc:ResourceLink) - (gnc:omimLink rdfs:label "OMIM") - (gnc:omimLink rdfs:comments "Summary from On Mendelion Inheritance in Man") - (gnc:homologeneLink rdfs:Class gnc:ResourceLink) - (gnc:homologeneLink rdfs:label "HomoloGene") - (gnc:homologeneLink rdfs:comments "Find similar genes in other species") - (gnc:uniprot a owl:ObjectProperty) - (gnc:uniprot rdfs:label "UniProt") - (gnc:uniprot rdfs:comments "UniProt resource") - (gnt:hasChip a owl:ObjectProperty) - (gnt:hasChip rdfs:domain gnc:Probeset) - (gnt:hasTargetId a owl:ObjectProperty) - (gnt:hasTargetId rdfs:domain gnc:Probeset) - (gnt:geneSymbol rdfs:domain gnc:Probeset) - (gnt:location rdfs:domain gnc:ProbeSet) - (gnt:location a owl:ObjectProperty) - (gnt:strandPosition rdfs:domain gnc:ProbeSet) - (gnt:strandPosition a owl:ObjectProperty) - (gnt:targetsRegion a owl:ObjectProperty) - (gnt:targetsRegion rdfs:domain gnc:Probeset) - (gnt:chr rdfs:domain gnc:Probeset) - (gnt:mb rdfs:domain gnc:Probeset) - (gnt:hasSpecificity a owl:ObjectProperty) - (gnt:hasSpecificity rdfs:domain gnc:Probeset) - (gnt:hasBlatScore a owl:ObjectProperty) - (gnt:hasBlatScore rdfs:domain gnc:Probeset) - (gnt:hasBlatMbStart a owl:ObjectProperty) - (gnt:hasBlatMbStart rdfs:domain gnc:Probeset) - (gnt:hasBlatMbEnd a owl:ObjectProperty) - (gnt:hasBlatMbEnd rdfs:domain gnc:Probeset) - (gnt:hasBlatSeq a owl:ObjectProperty) - (gnt:hasBlatSeq rdfs:domain gnc:Probeset) - (gnt:hasTargetSeq a owl:ObjectProperty) - (gnt:hasTargetSeq rdfs:domain gnc:Probeset)) - (triples - (let ((id (field ("IF(NULLIF(TRIM(ProbeSet.Name), '') IS NULL, '', TRIM(ProbeSet.Name))" - ProbeSetIdName))) - (probeset-id (field ProbeSet Id))) - (string->identifier - "probeset" - (if (string-null? id) - (number->string probeset-id) - (regexp-substitute/global - #f "[^A-Za-z0-9:]" - id - 'pre "_" 'post)))) - (set rdf:type 'gnc:Probeset) - (set rdfs:label (field ProbeSet Name)) - (set skos:altLabel - (replace-substrings - (field ProbeSet alias) - '(("\r\n" . "; ")))) - (set gnt:hasChip - (string->identifier - "platform" - (field ("IFNULL(GeneChip.Name, '')" GeneChipName)))) - (set gnt:hasTargetId - (field ("NULLIF(TRIM(ProbeSet.TargetId), '')" - TargetId))) - (multiset gnt:geneSymbol - (map string-trim (string-split - (field ProbeSet Symbol) - #\,))) - (set dct:description (sanitize-rdf-string (field ProbeSet description))) - (set gnt:targetsRegion - (sanitize-rdf-string - (field ("NULLIF(TRIM(ProbeSet.Probe_set_target_region), '')" - Probe_set_target_region)))) - (set gnt:chr (field ProbeSet Chr)) - (set gnt:mb (annotate-field (field ("IFNULL(ProbeSet.Mb, '')" Mb)) '^^xsd:double)) - (set gnt:location - (let* ((mb (field ProbeSet Mb)) - (chr (field ProbeSet Chr)) - (strand-probe (field ProbeSet Strand_Probe)) - (location (list chr mb))) - (match location - (("Un" mb) - (format #f "Not available")) - ((chr "") - (if (string-blank? chr) - (format #f "Not available") - (format #f "Chr ~a @ Unknown position ~a~:[~;~a~]" - chr mb - (and (string? strand-probe) (or (string=? "+" strand-probe) - (string=? "-" strand-probe))) - (cond ((string=? "+" strand-probe) - "on the plus strand") - ((string=? "-" strand-probe) - "on the minus strand") - (else ""))))) - (_ - (format #f "Chr ~a @ ~a Mb ~:[~;~a~]" - chr mb - (and (string? strand-probe) (or (string=? "+" strand-probe) - (string=? "-" strand-probe))) - (cond ((string=? "+" strand-probe) - "on the plus strand") - ((string=? "-" strand-probe) - "on the minus strand") - (else ""))))))) - (set gnt:hasGeneId - (field ProbeSet GeneId)) - ;; OMIM Link - (set dct:references - (let ((omim (field ProbeSet OMIM))) - (if (not (string-blank? omim)) - (string->symbol - (format #f - "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" - "http://www.ncbi.nlm.nih.gov/omim/" - (uri-encode omim) - "a gnc:omimLink")) - ""))) - ;; Homologene Link - (set dct:references - (let ((homologene (field ProbeSet HomoloGeneID))) - (if (not (string-blank? homologene)) - (string->symbol - (format #f - "<~0@*~a~1@*~a> .~%<~0@*~a~1@*~a> ~2@*~a" - "http://www.ncbi.nlm.nih.gov/homologene/?term=" - (uri-encode homologene) - "a gnc:homologeneLink")) - ""))) - (set gnt:uniprot - (ontology 'uniprot: (field ProbeSet UniProtID))) - (set gnt:strandProbe - (field ProbeSet Strand_Probe)) - (set gnt:hasSpecificity - (field ("IFNULL(ProbeSet.Probe_set_specificity, '')" - Probe_set_specificity))) - (set gnt:hasBlatScore - (field ("IFNULL(ProbeSet.Probe_set_BLAT_score, '')" - Probe_set_BLAT_score))) - (set gnt:hasBlatMbStart - (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_start, '')" - Probe_set_Blat_Mb_start)) - '^^xsd:double)) - (set gnt:hasBlatMbEnd - (annotate-field (field ("IFNULL(ProbeSet.Probe_set_Blat_Mb_end, '')" - Probe_set_Blat_Mb_end)) - '^^xsd:double)) - (set gnt:hasBlatSeq (sanitize-rdf-string (field ProbeSet BlatSeq))) - (set gnt:hasTargetSeq (sanitize-rdf-string (field ProbeSet TargetSeq))))) - - - - -(let* ((option-spec - '((settings (single-char #\s) (value #t)) - (output (single-char #\o) (value #t)) - (documentation (single-char #\d) (value #t)))) - (options (getopt-long (command-line) option-spec)) - (settings (option-ref options 'settings #f)) - (output (option-ref options 'output #f)) - (documentation (option-ref options 'documentation #f)) - (%connection-settings - (call-with-input-file settings - read))) - (with-documentation - (name "ProbeSet Metadata") - (connection %connection-settings) - (table-metadata? #f) - (prefixes - '(("gn:" "<http://genenetwork.org/id/>") - ("probeset:" "<http://genenetwork.org/probeset/>") - ("gnc:" "<http://genenetwork.org/category/>") - ("gene:" "<http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=>") - ("gnt:" "<http://genenetwork.org/term/>") - ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>") - ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>") - ("dct:" "<http://purl.org/dc/terms/>") - ("uniprot:" "<http://purl.uniprot.org/uniprot/>") - ("owl:" "<http://www.w3.org/2002/07/owl#>") - ("xsd:" "<http://www.w3.org/2001/XMLSchema#>") - ("qb:" "<http://purl.org/linked-data/cube#>") - ("sdmx-measure:" "<http://purl.org/linked-data/sdmx/2009/measure#>") - ("skos:" "<http://www.w3.org/2004/02/skos/core#>"))) - (inputs - (list probeset)) - (outputs - `(#:documentation ,documentation - #:rdf ,output)))) |