From 304c35e25c33ee6af576c9122e897b0972602481 Mon Sep 17 00:00:00 2001 From: Munyoki Kilyungi Date: Fri, 26 May 2023 13:00:28 +0300 Subject: Move probeset metadata used for a given experiment to it's own dump Signed-off-by: Munyoki Kilyungi --- examples/dump-probeset-metadata.scm | 107 ++++++++++++++++++++++++++++++++++++ examples/dump-probeset.scm | 22 -------- 2 files changed, 107 insertions(+), 22 deletions(-) create mode 100755 examples/dump-probeset-metadata.scm (limited to 'examples') diff --git a/examples/dump-probeset-metadata.scm b/examples/dump-probeset-metadata.scm new file mode 100755 index 0000000..ae9931c --- /dev/null +++ b/examples/dump-probeset-metadata.scm @@ -0,0 +1,107 @@ +#! /usr/bin/env guile +!# + +(use-modules (srfi srfi-1) + (srfi srfi-26) + (ice-9 match) + (ice-9 regex) + (dump strings) + (dump sql) + (dump triples) + (dump special-forms)) + + + +(define %connection-settings + (call-with-input-file (list-ref (command-line) 1) + read)) + +(define %dump-directory + (list-ref (command-line) 2)) + + +(define-dump dump-probeset-metadata + (tables (ProbeSetXRef + (left-join ProbeSet "ON ProbeSetXRef.ProbeSetId = ProbeSet.Id") + (left-join ProbeSetFreeze "ON ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id"))) + (schema-triples + (gn:probesetData rdfs:range gn:probeset) + (gn:hasProbeset rdfs:range rdfs:Literal)) + (triples + (string->identifier + "probesetData" + (field ("CONCAT(ProbeSetFreeze.Name,':',IFNULL(ProbeSet.Name, ProbeSet.Id))" + ProbeSetName))) + (set rdf:type 'gn:probesetData) + (set gn:hasProbeset + (ontology + 'probeset: + (regexp-substitute/global + #f "[^A-Za-z0-9:]" + (field ("IFNULL(ProbeSet.Name, ProbeSet.Id)" + name)) + 'pre "_" 'post))) + (set gn:probesetOfDataset + (ontology + 'probeset: + (regexp-substitute/global #f "[^A-Za-z0-9:]" + (field ProbeSetFreeze Name) + 'pre "_" 'post))) + (set gn:mean + (annotate-field + (field ("IFNULL(ProbeSetXRef.mean, '')" mean)) + '^^xsd:double)) + (set gn:se + (annotate-field + (field ("IFNULL(ProbeSetXRef.se, '')" se)) + '^^xsd:double)) + (set gn:locus (field ProbeSetXRef Locus)) + (set gn:LRS + (annotate-field + (field ("IFNULL(ProbeSetXRef.LRS, '')" LRS)) + '^^xsd:double)) + (set gn:pValue + (annotate-field + (field ("IFNULL(ProbeSetXRef.pValue, '')" pValue)) + '^^xsd:double)) + (set gn:additive + (annotate-field + (field ("IFNULL(ProbeSetXRef.additive, '')" additive)) + '^^xsd:double)) + (set gn:h2 + (annotate-field + (field ("IFNULL(ProbeSetXRef.h2, '')" h2)) + '^^xsd:float)))) + + + +(call-with-target-database + %connection-settings + (lambda (db) + (with-output-to-file (string-append %dump-directory "dump-probeset-metadata.ttl") + (lambda () + (prefix "chebi:" "") + (prefix "dct:" "") + (prefix "foaf:" "") + (prefix "generif:" "") + (prefix "gn:" "") + (prefix "hgnc:" "") + (prefix "homologene:" "") + (prefix "kegg:" "") + (prefix "molecularTrait:" "") + (prefix "nuccore:" "") + (prefix "omim:" "") + (prefix "owl:" "") + (prefix "phenotype:" "") + (prefix "pubchem:" "") + (prefix "pubmed:" "") + (prefix "rdf:" "") + (prefix "rdfs:" "") + (prefix "taxon:" "") + (prefix "uniprot:" "") + (prefix "up:" "") + (prefix "xsd:" "") + (prefix "probeset:" "") + (newline) + (dump-probeset-metadata db)) + #:encoding "utf8"))) diff --git a/examples/dump-probeset.scm b/examples/dump-probeset.scm index e659120..4fa030f 100755 --- a/examples/dump-probeset.scm +++ b/examples/dump-probeset.scm @@ -22,8 +22,6 @@ (define-dump dump-probeset (tables (ProbeSet - (left-join ProbeSetXRef "ON ProbeSetXRef.ProbeSetId = ProbeSet.Id") - (left-join ProbeSetFreeze "ON ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id") (left-join GeneChip "ON GeneChip.Id = ProbeSet.ChipId"))) (schema-triples (gn:name rdfs:range rdfs:Literal)) @@ -31,26 +29,6 @@ 'probeset: (field ("IFNULL(ProbeSet.Name, ProbeSet.Id)" name))) - (set gn:probesetOfDataset - (ontology - 'probeset: - (regexp-substitute/global - #f "[^A-Za-z0-9:]" - (field ("IFNULL(ProbeSetFreeze.Name, '')" DatasetName)) - 'pre "_" 'post))) - (set gn:mean (annotate-field (field ("IFNULL(ProbeSetXRef.mean, '')" mean)) - '^^xsd:double)) - (set gn:se (annotate-field (field ("IFNULL(ProbeSetXRef.se, '')" se)) - '^^xsd:double)) - (set gn:LRS (annotate-field (field ("IFNULL(ProbeSetXRef.LRS, '')" LRS)) - '^^xsd:double)) - (set gn:pValue (annotate-field (field ("IFNULL(ProbeSetXRef.pValue, '')" pValue)) - '^^xsd:double)) - (set gn:additive (annotate-field (field ("IFNULL(ProbeSetXRef.additive, '')" additive)) - '^^xsd:double)) - (set gn:h2 (annotate-field (field ("IFNULL(ProbeSetXRef.h2, '')" h2)) - '^^xsd:float)) - (set gn:locus (field ProbeSetXRef Locus)) (set gn:chipOf (string->identifier "platform" (field GeneChip Name))) (set gn:name (field ProbeSet Name)) (set gn:symbol (field ProbeSet Symbol)) -- cgit v1.2.3