From a950fc8d6c856bf700841514af113d689e30afc5 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Tue, 7 May 2024 13:53:32 +0200 Subject: Fetch a batch of traits so we can process faster --- scripts/precompute/list-traits-to-compute.scm | 89 +++++++++++++-------------- 1 file changed, 44 insertions(+), 45 deletions(-) (limited to 'scripts/precompute/list-traits-to-compute.scm') diff --git a/scripts/precompute/list-traits-to-compute.scm b/scripts/precompute/list-traits-to-compute.scm index 68f0711..db12eed 100755 --- a/scripts/precompute/list-traits-to-compute.scm +++ b/scripts/precompute/list-traits-to-compute.scm @@ -35,6 +35,27 @@ Now list the next 1000 trait IDs: . .guix-shell -- guile -L . -s ./scripts/precompute/list-traits-to-compute.scm --next 1000 +The current logic is to list all datasets that contain a +BXD. (bxd-strain-id-names #:used-for-mapping? #t) fetches all ids and +strain names listed in GN. Note that this differs from the actual +genotype file. + +To find the StrainId in a dataset: + +MariaDB [db_webqtl]> SELECT StrainId,value from ProbeSetData WHERE Id=115467; ++----------+---------+ +| StrainId | value | ++----------+---------+ +| 1 | 9.47169 | +| 2 | 9.21621 | +| 3 | 9.728 | +| 4 | 9.28976 | +| 5 | 9.55523 | +| 6 | 9.63562 ... + +to speed things up a little we batch them up and check whether the BXD is part of it. +When that is the case we might as well write the phenotype file because we have the trait values. + !# (use-modules (dbi dbi) @@ -49,51 +70,29 @@ Now list the next 1000 trait IDs: (srfi srfi-1) ) - - (call-with-db (lambda (db) (begin - (define bxd-strains (bxd-strain-id-names #:used-for-mapping? #t)) - (define (get-trait db probeset-id) - (dbi-query db (string-append "select Id,Chr,Mb,Name,Symbol,description from ProbeSet where Id=" (int-to-string probeset-id) " limit 1")) - (get-row db)) - (define (run-list-traits-to-compute db prev-id count) - (let [(hits (get-precompute-hits db prev-id count))] - (for-each (lambda (hit) - (let* [(data-id (assoc-ref hit "DataId")) - (data-id-str (int-to-string data-id)) - ;; (probesetfreeze-id (assoc-ref hit "ProbeSetFreezeId")) - ;; (probeset-id (assoc-ref hit "ProbeSetId")) - ;; (trait (get-trait db probeset-id)) - ;; (trait-name (assoc-ref trait "Name")) - ;; (name (dataset-name db probesetfreeze-id)) - ] - (display hit) - (newline) - ;; ---- Get strains and phenotypes for this dataset - (dbi-query db (string-append "SELECT StrainId,value from ProbeSetData WHERE Id=" data-id-str)) - (define id_traits (get-rows-apply db - (lambda (r) `(,(assoc-ref r "StrainId") . ,(assoc-ref r "value"))) - '())) - ;; ---- Now we need to make sure that all strains belong to BXD - (define non-bxd (fold - (lambda (strain lst) - (let* [(id (car strain)) - (name (assoc id bxd-strains))] - (if name - lst - (append lst `(,name))))) - - '() - id_traits)) - (define traits (map - (lambda (t) - (match t - ((id . value) (cons (assoc-ref bxd-strains id) value) - ))) - id_traits)) - #t)) - hits))) - (run-list-traits-to-compute db 0 5) ;; start precompute - ))) + (let [(bxd-strains (bxd-strain-id-names #:used-for-mapping? #t))] + (define (run-list-traits-to-compute db prev-id count) + (let* [(hits (get-precompute-hits db prev-id count)) + (data-ids (map (lambda (hit) + (let* [(data-id (assoc-ref hit "DataId")) + ; (data-id-str (int-to-string data-id)) + ] + data-id)) + hits)) + ;; (data-ids-query (fold (lambda (id query) "" (string-append query "Id=" (int-to-string id) " OR ")) "" data-ids)) + (data-str-ids (map (lambda (id) (string-append "Id=" (int-to-string id))) data-ids)) + (data-ids-query (string-join data-str-ids " OR ")) + (query (string-append "SELECT Id,StrainId,value FROM ProbeSetData WHERE " data-ids-query)) + ] + ; (display data-str-ids) + ; (display data-ids-query) + ; (display data-ids) + (display query) + )) + ; (display data-ids) + (run-list-traits-to-compute db 0 50) ;; start precompute + ;; (write bxd-strains) + )))) -- cgit v1.2.3