about summary refs log tree commit diff
diff options
context:
space:
mode:
-rwxr-xr-x.guix-shell2
-rwxr-xr-xscripts/precompute/list-traits-to-compute.scm28
2 files changed, 19 insertions, 11 deletions
diff --git a/.guix-shell b/.guix-shell
index d5e9c38..bc81e06 100755
--- a/.guix-shell
+++ b/.guix-shell
@@ -4,4 +4,4 @@
 
 echo "Create a shell to run tools."
 
-guix shell  -L ~/guix-bioinformatics -C -D -F --network coreutils guile guile-dbi guile-dbd-mysql guile-fibers guile-json guile-gnutls guile-readline guile-redis openssl nss-certs gemma parallel tar xz python python-lmdb python-cffi time gemma-gn2 $*
+guix shell  -L ~/guix-bioinformatics -C -D -F --network coreutils guile guile-dbi guile-dbd-mysql guile-fibers guile-json guile-gnutls guile-readline guile-redis openssl nss-certs gemma parallel tar xz python python-lmdb python-cffi guile-gcrypt guile-hashing time gemma-gn2 $*
diff --git a/scripts/precompute/list-traits-to-compute.scm b/scripts/precompute/list-traits-to-compute.scm
index 7e0660d..2c48d83 100755
--- a/scripts/precompute/list-traits-to-compute.scm
+++ b/scripts/precompute/list-traits-to-compute.scm
@@ -38,7 +38,7 @@ you should see
 
 Now list the next 1000 trait IDs:
 
-    . .guix-shell -- guile -L . -e main -s ./scripts/precompute/list-traits-to-compute.scm --first 0 --next 1000
+    . .guix-shell -- guile -L . -e main -s ./scripts/precompute/list-traits-to-compute.scm --next 1000
 
 The current logic is to list all datasets that contain a
 BXD. (bxd-strain-id-names #:used-for-mapping? #t) fetches all ids and
@@ -71,27 +71,35 @@ When that is the case we might as well write the phenotype file because we have
              (gn util convert)
              (gn runner gemma)
              ; (rnrs base)
+             (hashing sha-2)
              (ice-9 getopt-long)
              (ice-9 match)
              (ice-9 textual-ports)
              (json)
+             (rnrs bytevectors)
              (srfi srfi-1)
              (srfi srfi-19) ; time
              )
 
 
-(define (write-json id recs)
+(define (write-json-ld id recs)
+  ;; see also https://www.w3.org/2018/jsonld-cg-reports/json-ld/
   (display id)
   (newline)
   (let* [(traits (map (lambda (r)
-                                                (match r
-                                                  [(strain-id . value) (cons (bxd-name strain-id) value)]
-                                                  ))
+                        (match r
+                          [(strain-id . value) (cons (bxd-name strain-id) value)]
+                          ))
                       (reverse recs)))
-         (json-data `((meta .
-                            ((id . ,id)
-                             (time . ,(date->string (time-utc->date (current-time))))))
-                      (traits . ,traits)))]
+         (sha256 (sha-256->string (sha-256 (string->utf8 (scm->json-string traits)))))
+         (json-data `(("@context" . "https://genenetwork.org/resource")
+                      (type . traits)
+                      (meta . ((version . "0.1")
+                               (steps . ())
+                               (sha256 . ((input-traits . ,sha256)))
+                               (time . ,(date->string (time-utc->date (current-time))))))
+                      (traits .
+                      ((,id . ,traits)))))]
     (call-with-output-file (string-append (number->string id) ".json")
       (lambda (port)
         (put-string port (scm->json-string json-data))))
@@ -135,7 +143,7 @@ When that is the case we might as well write the phenotype file because we have
              (for-each (lambda (r)
                          (match r
                            ((id . recs) (if (has-bxd? recs)
-                                              (write-json id recs)
+                                              (write-json-ld id recs)
                                             ))
                             )) nrecs)
              (if (> rest 0)