diff options
Diffstat (limited to 'scripts/precompute/list-traits-to-compute.scm')
| -rwxr-xr-x | scripts/precompute/list-traits-to-compute.scm | 80 |
1 files changed, 66 insertions, 14 deletions
diff --git a/scripts/precompute/list-traits-to-compute.scm b/scripts/precompute/list-traits-to-compute.scm index 2c48d83..102a6fa 100755 --- a/scripts/precompute/list-traits-to-compute.scm +++ b/scripts/precompute/list-traits-to-compute.scm @@ -15,6 +15,10 @@ You may want to forward a mysql port if there is no DB locally ssh -L 3306:127.0.0.1:3306 -f -N tux02.genenetwork.org +ignore IPv6 message: + + bind [::1]:3306: Cannot assign requested address + test connection with mysql client: mysql -uwebqtlout -pwebqtlout -A -h 127.0.0.1 -P 3306 db_webqtl -e "show tables;" @@ -78,19 +82,50 @@ When that is the case we might as well write the phenotype file because we have (json) (rnrs bytevectors) (srfi srfi-1) + (srfi srfi-9) (srfi srfi-19) ; time ) +(define (get-trait db probeset-id) + (dbi-query db (string-append "select Id,Chr,Mb,Name,Symbol,description from ProbeSet where Id=" (int-to-string probeset-id) " limit 1")) + (get-row db) + ) + +#! + +The following is produced by gemma-wrapper as metadata + + "meta": { + "type": "gemma-wrapper", + "version": "0.99.7-pre1", + "population": "BXD", + "name": "HC_U_0304_R", + "trait": "101500_at", + "url": "https://genenetwork.org/show_trait?trait_id=101500_at&dataset=HC_U_0304_R", + "archive_GRM": "46bfba373fe8c19e68be6156cad3750120280e2e-gemma-cXX.tar.xz", + "archive_GWA": "779a54a59e4cd03608178db4068791db4ca44ab3-gemma-GWA.tar.xz", + "dataid": 75629, + "probesetid": 1097, + "probesetfreezeid": 7 + } -(define (write-json-ld id recs) +!# + + +(define (write-json-ld id name trait trait-name probesetfreeze-id probeset-id recs) ;; see also https://www.w3.org/2018/jsonld-cg-reports/json-ld/ (display id) + (display ":") + (display name) + (display ":") + (display trait-name) (newline) (let* [(traits (map (lambda (r) (match r [(strain-id . value) (cons (bxd-name strain-id) value)] )) (reverse recs))) + (uri (format #f "https://genenetwork.org/show_trait?trait_id=~a&dataset=~a" trait-name name)) (sha256 (sha-256->string (sha-256 (string->utf8 (scm->json-string traits))))) (json-data `(("@context" . "https://genenetwork.org/resource") (type . traits) @@ -98,8 +133,16 @@ When that is the case we might as well write the phenotype file because we have (steps . ()) (sha256 . ((input-traits . ,sha256))) (time . ,(date->string (time-utc->date (current-time)))))) - (traits . - ((,id . ,traits)))))] + (data . + ((,id . + ((group . "BXD") + (probesetfreeze-id . ,probesetfreeze-id) + (probeset-id . ,probeset-id) + (name . ,name) + (trait-name . ,trait-name) + (uri . ,uri) + (traits . ,traits) + ))))))] (call-with-output-file (string-append (number->string id) ".json") (lambda (port) (put-string port (scm->json-string json-data)))) @@ -111,22 +154,23 @@ When that is the case we might as well write the phenotype file because we have (begin ;; (let [(bxd-strains (memo-bxd-strain-id-names #:used-for-mapping? #t))] (define (run-list-traits-to-compute db num prev-id) + ;; ---- Build a query to collect num traits (let* [(count (if (< batch-size num) batch-size num)) (rest (- num count)) (hits (get-precompute-hits db prev-id count)) - (data-ids (map (lambda (hit) - (let* [(data-id (assoc-ref hit "DataId")) - ; (data-id-str (int-to-string data-id)) - ] - data-id)) + (data-ids (map (lambda (h) + (hit-data-id h)) hits)) (data-str-ids (map (lambda (id) (string-append "Id=" (int-to-string id))) data-ids)) (data-ids-query (string-join data-str-ids " OR ")) (query (string-append "SELECT Id,StrainId,value FROM ProbeSetData WHERE " data-ids-query)) ] + (format #t "writing-phenotypes from ~d to ~d batch ~d to ~d\n" first-id (+ first-id num) prev-id (+ prev-id count)) + (display query) (dbi-query db query) + ;; ---- Walk each resulting trait and build a hash of data-id and list of trait values (let [(id-traits (get-rows db '())) (nrecs '())] (for-each (lambda (r) @@ -140,12 +184,20 @@ When that is the case we might as well write the phenotype file because we have )] (set! nrecs (assoc-set! nrecs data-id lst)))) id-traits) - (for-each (lambda (r) - (match r - ((id . recs) (if (has-bxd? recs) - (write-json-ld id recs) - )) - )) nrecs) + ;; --- create the json output as a file by walking traits and hits + (for-each (lambda (h) + (let* [ + (id (hit-data-id h)) + (probeset-id (hit-probeset-id h)) + (probesetfreeze-id (hit-probesetfreeze-id h)) + (name (dataset-name db probesetfreeze-id)) + (trait (get-trait db probeset-id)) + (trait-name (assoc-ref trait "Name")) + (recs (assoc-ref nrecs id)) + ] + (if (has-bxd? recs) + (write-json-ld id name trait trait-name probesetfreeze-id probeset-id recs) + ))) hits) (if (> rest 0) (run-list-traits-to-compute db rest (first (reverse data-ids)))) ;; start precompute ))) |
