about summary refs log tree commit diff
path: root/scripts/precompute/list-traits-to-compute.scm
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/precompute/list-traits-to-compute.scm')
-rwxr-xr-xscripts/precompute/list-traits-to-compute.scm28
1 files changed, 18 insertions, 10 deletions
diff --git a/scripts/precompute/list-traits-to-compute.scm b/scripts/precompute/list-traits-to-compute.scm
index 7e0660d..2c48d83 100755
--- a/scripts/precompute/list-traits-to-compute.scm
+++ b/scripts/precompute/list-traits-to-compute.scm
@@ -38,7 +38,7 @@ you should see
 
 Now list the next 1000 trait IDs:
 
-    . .guix-shell -- guile -L . -e main -s ./scripts/precompute/list-traits-to-compute.scm --first 0 --next 1000
+    . .guix-shell -- guile -L . -e main -s ./scripts/precompute/list-traits-to-compute.scm --next 1000
 
 The current logic is to list all datasets that contain a
 BXD. (bxd-strain-id-names #:used-for-mapping? #t) fetches all ids and
@@ -71,27 +71,35 @@ When that is the case we might as well write the phenotype file because we have
              (gn util convert)
              (gn runner gemma)
              ; (rnrs base)
+             (hashing sha-2)
              (ice-9 getopt-long)
              (ice-9 match)
              (ice-9 textual-ports)
              (json)
+             (rnrs bytevectors)
              (srfi srfi-1)
              (srfi srfi-19) ; time
              )
 
 
-(define (write-json id recs)
+(define (write-json-ld id recs)
+  ;; see also https://www.w3.org/2018/jsonld-cg-reports/json-ld/
   (display id)
   (newline)
   (let* [(traits (map (lambda (r)
-                                                (match r
-                                                  [(strain-id . value) (cons (bxd-name strain-id) value)]
-                                                  ))
+                        (match r
+                          [(strain-id . value) (cons (bxd-name strain-id) value)]
+                          ))
                       (reverse recs)))
-         (json-data `((meta .
-                            ((id . ,id)
-                             (time . ,(date->string (time-utc->date (current-time))))))
-                      (traits . ,traits)))]
+         (sha256 (sha-256->string (sha-256 (string->utf8 (scm->json-string traits)))))
+         (json-data `(("@context" . "https://genenetwork.org/resource")
+                      (type . traits)
+                      (meta . ((version . "0.1")
+                               (steps . ())
+                               (sha256 . ((input-traits . ,sha256)))
+                               (time . ,(date->string (time-utc->date (current-time))))))
+                      (traits .
+                      ((,id . ,traits)))))]
     (call-with-output-file (string-append (number->string id) ".json")
       (lambda (port)
         (put-string port (scm->json-string json-data))))
@@ -135,7 +143,7 @@ When that is the case we might as well write the phenotype file because we have
              (for-each (lambda (r)
                          (match r
                            ((id . recs) (if (has-bxd? recs)
-                                              (write-json id recs)
+                                              (write-json-ld id recs)
                                             ))
                             )) nrecs)
              (if (> rest 0)