From 7dc82124e81c06f59f139870b0c932eafcefafc6 Mon Sep 17 00:00:00 2001
From: Munyoki Kilyungi
Date: Tue, 15 Aug 2023 18:57:10 +0300
Subject: Dump probeset summary stats

Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
---
 examples/dump-probeset-data.scm | 93 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 93 insertions(+)
 create mode 100755 examples/dump-probeset-data.scm

(limited to 'examples')

diff --git a/examples/dump-probeset-data.scm b/examples/dump-probeset-data.scm
new file mode 100755
index 0000000..c4e55ab
--- /dev/null
+++ b/examples/dump-probeset-data.scm
@@ -0,0 +1,93 @@
+#! /usr/bin/env guile
+!#
+
+(use-modules (srfi srfi-1)
+             (srfi srfi-26)
+             (ice-9 match)
+             (ice-9 regex)
+             (dump strings)
+             (dump sql)
+             (dump triples)
+             (dump special-forms))
+
+
+
+(define %connection-settings
+  (call-with-input-file (list-ref (command-line) 1)
+    read))
+
+
+
+(define-dump dump-probeset-data
+  (tables (ProbeSetXRef
+           (left-join ProbeSet "ON ProbeSet.Id = ProbeSetXRef.ProbeSetId")
+           (left-join ProbeSetFreeze "ON ProbeSet.ProbeSetFreezeId = ProbeSetFreeze.Id")))
+  "WHERE ProbeSetFreeze.public > 0 AND ProbeSetFreeze.confidentiality < 1"
+  (schema-triples
+   (gnt:mean rdfs:domain gnc:probeset)
+   (gnt:locus rdfs:domain gnc:probeset)
+   (gnt:LRS rdfs:domain gnc:probeset)
+   (gnt:stdErr rdfs:domain gnc:probeset)
+   (gnt:stdErr rdfs:range xsd:double)
+   (gnt:pValue rdfs:domain gnc:probeset)
+   (gnt:pValue rdfs:range xsd:double)
+   (gnt:h2 rdfs:domain gnc:probeset)
+   (gnt:h2 rdfs:range xsd:double))
+  (triples (let ((id (field ("IF(NULLIF(TRIM(ProbeSet.Name), '') IS NULL, '', TRIM(ProbeSet.Name))"
+                             ProbeSetIdName)))
+                 (probeset-id (field ProbeSet Id)))
+             (if (string-null? id)
+                 (string->identifier
+                  "probeset"
+                  (number->string
+                   probeset-id))
+                 (string->identifier
+                  ""
+                  (regexp-substitute/global
+                   #f "[^A-Za-z0-9:]"
+                   id
+                   'pre "_" 'post)
+                  #:separator ""
+                  #:proc string-capitalize-first)))
+    (set gnt:mean (annotate-field (field ("IFNULL(ProbeSetXRef.mean, '')" mean))
+                                  '^^xsd:double))
+    (set gnt:locus (field PublishXRef Locus))
+    (set gnt:LRS (annotate-field
+                  (field ("IFNULL(ProbeSetXRef.LRS, '')" lrs))
+                  '^^xsd:double))
+    (set gnt:additive
+         (annotate-field (field ("IFNULL(ProbeSetXRef.additive, '')" additive))
+                         '^^xsd:double))
+    (set gnt:stdErr (annotate-field (field ("IFNULL(ProbeSetXRef.se, '')" stdErr))
+                                    '^^xsd:double))
+    (set gnt:pValue (annotate-field (field ("IFNULL(ProbeSetXRef.pValue, '')" pValue))
+                                    '^^xsd:double))
+    (set gnt:h2 (annotate-field (field ("IFNULL(ProbeSetXRef.h2, '')" h2))
+                                '^^xsd:double))
+    (set gnt:belongsToDataset
+         (string->identifier
+          ""
+          (regexp-substitute/global #f "[^A-Za-z0-9:]"
+                                    (field ProbeSetFreeze Name)
+                                    'pre "_" 'post)
+          #:separator ""
+          #:proc string-capitalize-first))))
+
+
+
+(dump-with-documentation
+ (name "Probeset Summary Statistics")
+ (connection %connection-settings)
+ (table-metadata? #f)
+ (prefixes
+  '(("gn:" "<http://genenetwork.org/id/>")
+    ("gnc:" "<http://genenetwork.org/category/>")
+    ("gnt:" "<http://genenetwork.org/id/>")
+    ("rdf:" "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>")
+    ("rdfs:" "<http://www.w3.org/2000/01/rdf-schema#>")
+    ("xsd:" "<http://www.w3.org/2001/XMLSchema#>")))
+ (inputs
+  (list dump-probeset-metadata))
+ (outputs
+  '(#:documentation "./docs/dump-probeset-summary-stats.md"
+    #:rdf "./verified-data/dump-probeset-summary-stats.ttl")))
-- 
cgit v1.2.3