diff options
-rwxr-xr-x | .guix-shell | 2 | ||||
-rwxr-xr-x | scripts/precompute/list-traits-to-compute.scm | 28 |
2 files changed, 19 insertions, 11 deletions
diff --git a/.guix-shell b/.guix-shell index d5e9c38..bc81e06 100755 --- a/.guix-shell +++ b/.guix-shell @@ -4,4 +4,4 @@ echo "Create a shell to run tools." -guix shell -L ~/guix-bioinformatics -C -D -F --network coreutils guile guile-dbi guile-dbd-mysql guile-fibers guile-json guile-gnutls guile-readline guile-redis openssl nss-certs gemma parallel tar xz python python-lmdb python-cffi time gemma-gn2 $* +guix shell -L ~/guix-bioinformatics -C -D -F --network coreutils guile guile-dbi guile-dbd-mysql guile-fibers guile-json guile-gnutls guile-readline guile-redis openssl nss-certs gemma parallel tar xz python python-lmdb python-cffi guile-gcrypt guile-hashing time gemma-gn2 $* diff --git a/scripts/precompute/list-traits-to-compute.scm b/scripts/precompute/list-traits-to-compute.scm index 7e0660d..2c48d83 100755 --- a/scripts/precompute/list-traits-to-compute.scm +++ b/scripts/precompute/list-traits-to-compute.scm @@ -38,7 +38,7 @@ you should see Now list the next 1000 trait IDs: - . .guix-shell -- guile -L . -e main -s ./scripts/precompute/list-traits-to-compute.scm --first 0 --next 1000 + . .guix-shell -- guile -L . -e main -s ./scripts/precompute/list-traits-to-compute.scm --next 1000 The current logic is to list all datasets that contain a BXD. (bxd-strain-id-names #:used-for-mapping? #t) fetches all ids and @@ -71,27 +71,35 @@ When that is the case we might as well write the phenotype file because we have (gn util convert) (gn runner gemma) ; (rnrs base) + (hashing sha-2) (ice-9 getopt-long) (ice-9 match) (ice-9 textual-ports) (json) + (rnrs bytevectors) (srfi srfi-1) (srfi srfi-19) ; time ) -(define (write-json id recs) +(define (write-json-ld id recs) + ;; see also https://www.w3.org/2018/jsonld-cg-reports/json-ld/ (display id) (newline) (let* [(traits (map (lambda (r) - (match r - [(strain-id . value) (cons (bxd-name strain-id) value)] - )) + (match r + [(strain-id . value) (cons (bxd-name strain-id) value)] + )) (reverse recs))) - (json-data `((meta . - ((id . ,id) - (time . ,(date->string (time-utc->date (current-time)))))) - (traits . ,traits)))] + (sha256 (sha-256->string (sha-256 (string->utf8 (scm->json-string traits))))) + (json-data `(("@context" . "https://genenetwork.org/resource") + (type . traits) + (meta . ((version . "0.1") + (steps . ()) + (sha256 . ((input-traits . ,sha256))) + (time . ,(date->string (time-utc->date (current-time)))))) + (traits . + ((,id . ,traits)))))] (call-with-output-file (string-append (number->string id) ".json") (lambda (port) (put-string port (scm->json-string json-data)))) @@ -135,7 +143,7 @@ When that is the case we might as well write the phenotype file because we have (for-each (lambda (r) (match r ((id . recs) (if (has-bxd? recs) - (write-json id recs) + (write-json-ld id recs) )) )) nrecs) (if (> rest 0) |