aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPjotr Prins2024-05-21 07:49:27 -0500
committerPjotr Prins2024-05-21 07:49:27 -0500
commite12150797ca75f1c35d55e16c73848c30937ff77 (patch)
tree1374f25f9e5939414d4319f706ef98cf3b186c64
parentc49852be64533ad5deccfaf64a52017ba3744823 (diff)
downloadgn-guile-e12150797ca75f1c35d55e16c73848c30937ff77.tar.gz
Writing json-ld
-rwxr-xr-x.guix-shell2
-rwxr-xr-xscripts/precompute/list-traits-to-compute.scm28
2 files changed, 19 insertions, 11 deletions
diff --git a/.guix-shell b/.guix-shell
index d5e9c38..bc81e06 100755
--- a/.guix-shell
+++ b/.guix-shell
@@ -4,4 +4,4 @@
echo "Create a shell to run tools."
-guix shell -L ~/guix-bioinformatics -C -D -F --network coreutils guile guile-dbi guile-dbd-mysql guile-fibers guile-json guile-gnutls guile-readline guile-redis openssl nss-certs gemma parallel tar xz python python-lmdb python-cffi time gemma-gn2 $*
+guix shell -L ~/guix-bioinformatics -C -D -F --network coreutils guile guile-dbi guile-dbd-mysql guile-fibers guile-json guile-gnutls guile-readline guile-redis openssl nss-certs gemma parallel tar xz python python-lmdb python-cffi guile-gcrypt guile-hashing time gemma-gn2 $*
diff --git a/scripts/precompute/list-traits-to-compute.scm b/scripts/precompute/list-traits-to-compute.scm
index 7e0660d..2c48d83 100755
--- a/scripts/precompute/list-traits-to-compute.scm
+++ b/scripts/precompute/list-traits-to-compute.scm
@@ -38,7 +38,7 @@ you should see
Now list the next 1000 trait IDs:
- . .guix-shell -- guile -L . -e main -s ./scripts/precompute/list-traits-to-compute.scm --first 0 --next 1000
+ . .guix-shell -- guile -L . -e main -s ./scripts/precompute/list-traits-to-compute.scm --next 1000
The current logic is to list all datasets that contain a
BXD. (bxd-strain-id-names #:used-for-mapping? #t) fetches all ids and
@@ -71,27 +71,35 @@ When that is the case we might as well write the phenotype file because we have
(gn util convert)
(gn runner gemma)
; (rnrs base)
+ (hashing sha-2)
(ice-9 getopt-long)
(ice-9 match)
(ice-9 textual-ports)
(json)
+ (rnrs bytevectors)
(srfi srfi-1)
(srfi srfi-19) ; time
)
-(define (write-json id recs)
+(define (write-json-ld id recs)
+ ;; see also https://www.w3.org/2018/jsonld-cg-reports/json-ld/
(display id)
(newline)
(let* [(traits (map (lambda (r)
- (match r
- [(strain-id . value) (cons (bxd-name strain-id) value)]
- ))
+ (match r
+ [(strain-id . value) (cons (bxd-name strain-id) value)]
+ ))
(reverse recs)))
- (json-data `((meta .
- ((id . ,id)
- (time . ,(date->string (time-utc->date (current-time))))))
- (traits . ,traits)))]
+ (sha256 (sha-256->string (sha-256 (string->utf8 (scm->json-string traits)))))
+ (json-data `(("@context" . "https://genenetwork.org/resource")
+ (type . traits)
+ (meta . ((version . "0.1")
+ (steps . ())
+ (sha256 . ((input-traits . ,sha256)))
+ (time . ,(date->string (time-utc->date (current-time))))))
+ (traits .
+ ((,id . ,traits)))))]
(call-with-output-file (string-append (number->string id) ".json")
(lambda (port)
(put-string port (scm->json-string json-data))))
@@ -135,7 +143,7 @@ When that is the case we might as well write the phenotype file because we have
(for-each (lambda (r)
(match r
((id . recs) (if (has-bxd? recs)
- (write-json id recs)
+ (write-json-ld id recs)
))
)) nrecs)
(if (> rest 0)