aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/precompute/list-traits-to-compute.scm76
-rwxr-xr-xscripts/precompute/run-gemma.scm23
2 files changed, 76 insertions, 23 deletions
diff --git a/scripts/precompute/list-traits-to-compute.scm b/scripts/precompute/list-traits-to-compute.scm
index 2c48d83..fbe89a6 100755
--- a/scripts/precompute/list-traits-to-compute.scm
+++ b/scripts/precompute/list-traits-to-compute.scm
@@ -78,19 +78,50 @@ When that is the case we might as well write the phenotype file because we have
(json)
(rnrs bytevectors)
(srfi srfi-1)
+ (srfi srfi-9)
(srfi srfi-19) ; time
)
+(define (get-trait db probeset-id)
+ (dbi-query db (string-append "select Id,Chr,Mb,Name,Symbol,description from ProbeSet where Id=" (int-to-string probeset-id) " limit 1"))
+ (get-row db)
+ )
+
+#!
+
+The following is produced by gemma-wrapper as metadata
+
+ "meta": {
+ "type": "gemma-wrapper",
+ "version": "0.99.7-pre1",
+ "population": "BXD",
+ "name": "HC_U_0304_R",
+ "trait": "101500_at",
+ "url": "https://genenetwork.org/show_trait?trait_id=101500_at&dataset=HC_U_0304_R",
+ "archive_GRM": "46bfba373fe8c19e68be6156cad3750120280e2e-gemma-cXX.tar.xz",
+ "archive_GWA": "779a54a59e4cd03608178db4068791db4ca44ab3-gemma-GWA.tar.xz",
+ "dataid": 75629,
+ "probesetid": 1097,
+ "probesetfreezeid": 7
+ }
-(define (write-json-ld id recs)
+!#
+
+
+(define (write-json-ld id name trait trait-name probesetfreeze-id probeset-id recs)
;; see also https://www.w3.org/2018/jsonld-cg-reports/json-ld/
(display id)
+ (display ":")
+ (display name)
+ (display ":")
+ (display trait-name)
(newline)
(let* [(traits (map (lambda (r)
(match r
[(strain-id . value) (cons (bxd-name strain-id) value)]
))
(reverse recs)))
+ (uri (format #f "https://genenetwork.org/show_trait?trait_id=~a&dataset=~a" trait-name name))
(sha256 (sha-256->string (sha-256 (string->utf8 (scm->json-string traits)))))
(json-data `(("@context" . "https://genenetwork.org/resource")
(type . traits)
@@ -98,8 +129,16 @@ When that is the case we might as well write the phenotype file because we have
(steps . ())
(sha256 . ((input-traits . ,sha256)))
(time . ,(date->string (time-utc->date (current-time))))))
- (traits .
- ((,id . ,traits)))))]
+ (data .
+ ((,id .
+ ((group . "BXD")
+ (probesetfreeze-id . ,probesetfreeze-id)
+ (probeset-id . ,probeset-id)
+ (name . ,name)
+ (trait-name . ,trait-name)
+ (uri . ,uri)
+ (traits . ,traits)
+ ))))))]
(call-with-output-file (string-append (number->string id) ".json")
(lambda (port)
(put-string port (scm->json-string json-data))))
@@ -110,23 +149,24 @@ When that is the case we might as well write the phenotype file because we have
(lambda (db)
(begin
;; (let [(bxd-strains (memo-bxd-strain-id-names #:used-for-mapping? #t))]
+ (display "writing-phenotypes...")
(define (run-list-traits-to-compute db num prev-id)
+ ;; ---- Build a query to collect num traits
(let* [(count (if (< batch-size num)
batch-size
num))
(rest (- num count))
(hits (get-precompute-hits db prev-id count))
- (data-ids (map (lambda (hit)
- (let* [(data-id (assoc-ref hit "DataId"))
- ; (data-id-str (int-to-string data-id))
- ]
- data-id))
+ (data-ids (map (lambda (h)
+ (hit-data-id h))
hits))
(data-str-ids (map (lambda (id) (string-append "Id=" (int-to-string id))) data-ids))
(data-ids-query (string-join data-str-ids " OR "))
(query (string-append "SELECT Id,StrainId,value FROM ProbeSetData WHERE " data-ids-query))
]
+ (display query)
(dbi-query db query)
+ ;; ---- Walk each resulting trait and build a hash of data-id and list of trait values
(let [(id-traits (get-rows db '()))
(nrecs '())]
(for-each (lambda (r)
@@ -140,12 +180,20 @@ When that is the case we might as well write the phenotype file because we have
)]
(set! nrecs (assoc-set! nrecs data-id lst))))
id-traits)
- (for-each (lambda (r)
- (match r
- ((id . recs) (if (has-bxd? recs)
- (write-json-ld id recs)
- ))
- )) nrecs)
+ ;; --- create the json output as a file by walking traits and hits
+ (for-each (lambda (h)
+ (let* [
+ (id (hit-data-id h))
+ (probeset-id (hit-probeset-id h))
+ (probesetfreeze-id (hit-probesetfreeze-id h))
+ (name (dataset-name db probesetfreeze-id))
+ (trait (get-trait db probeset-id))
+ (trait-name (assoc-ref trait "Name"))
+ (recs (assoc-ref nrecs id))
+ ]
+ (if (has-bxd? recs)
+ (write-json-ld id name trait trait-name probesetfreeze-id probeset-id recs)
+ ))) hits)
(if (> rest 0)
(run-list-traits-to-compute db rest (first (reverse data-ids)))) ;; start precompute
)))
diff --git a/scripts/precompute/run-gemma.scm b/scripts/precompute/run-gemma.scm
index e6a4e26..b0db622 100755
--- a/scripts/precompute/run-gemma.scm
+++ b/scripts/precompute/run-gemma.scm
@@ -9,7 +9,7 @@ Run from base dir with
and with some extra paths (for gemma)
-~/opt/guix-pull/bin/guix shell -C -F xz tar time parallel coreutils-minimal guile guile-dbi guile-json ruby --expose=/home/wrk/iwrk/opensource/code/genetics/gemma-wrapper/=/gemma-wrapper --expose=/home/wrk/iwrk/opensource/code/genetics/gemma/=/gemma -- env TMPDIR=tmp GEMMA_COMMAND=/gemma/bin/gemma-0.98.5-linux-static-debug guile -L . -e main -s ./scripts/precompute/run-gemma.scm
+~/opt/guix-pull/bin/guix shell -C -F xz python python-lmdb tar time parallel coreutils-minimal guile guile-dbi guile-json ruby --expose=/home/wrk/iwrk/opensource/code/genetics/gemma-wrapper/=/gemma-wrapper --expose=/home/wrk/iwrk/opensource/code/genetics/gemma/=/gemma -- env TMPDIR=tmp GEMMA_COMMAND=/gemma/bin/gemma-0.98.5-linux-static-debug guile -L . -e main -s ./scripts/precompute/run-gemma.scm test
!#
@@ -39,15 +39,20 @@ and with some extra paths (for gemma)
Usage: list-traits-to-compute [options...]
-h, --help Display this help
"))
- (let [(trait-name "115475")]
- (call-with-input-file "115475.json"
+ (let [(trait-id "115475")
+ (trait-fn "115475.json")
+ ]
+
+ (call-with-input-file trait-fn
(lambda (port)
(let* [(json (json->scm port))
- (dataset (assoc-ref json "traits"))
- (dataset-name (car (car dataset)))
- (traits (assoc-ref dataset dataset-name))
+ (dataset (car (assoc-ref json "data")))
+ (data (cdr dataset))
+ (dataset-name (assoc-ref data "name"))
+ (trait-name (assoc-ref data "trait-name"))
+ (traits (assoc-ref data "traits"))
+ (pheno-fn (string-append trait-id "-pheno.txt"))
]
- (display dataset)
- (write-pheno-file "pheno.txt" traits)
- (invoke-gemma-wrapper-loco dataset-name trait-name "pheno.txt")
+ (write-pheno-file pheno-fn traits)
+ (invoke-gemma-wrapper-loco dataset-name trait-name trait-fn pheno-fn "BXD.8_geno.txt.gz")
))))))