From a950fc8d6c856bf700841514af113d689e30afc5 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Tue, 7 May 2024 13:53:32 +0200
Subject: Fetch a batch of traits so we can process faster

---
 scripts/precompute/list-traits-to-compute.scm | 89 +++++++++++++--------------
 1 file changed, 44 insertions(+), 45 deletions(-)

(limited to 'scripts/precompute/list-traits-to-compute.scm')

diff --git a/scripts/precompute/list-traits-to-compute.scm b/scripts/precompute/list-traits-to-compute.scm
index 68f0711..db12eed 100755
--- a/scripts/precompute/list-traits-to-compute.scm
+++ b/scripts/precompute/list-traits-to-compute.scm
@@ -35,6 +35,27 @@ Now list the next 1000 trait IDs:
 
     . .guix-shell -- guile -L . -s ./scripts/precompute/list-traits-to-compute.scm --next 1000
 
+The current logic is to list all datasets that contain a
+BXD. (bxd-strain-id-names #:used-for-mapping? #t) fetches all ids and
+strain names listed in GN. Note that this differs from the actual
+genotype file.
+
+To find the StrainId in a dataset:
+
+MariaDB [db_webqtl]> SELECT StrainId,value from ProbeSetData WHERE Id=115467;
++----------+---------+
+| StrainId | value   |
++----------+---------+
+|        1 | 9.47169 |
+|        2 | 9.21621 |
+|        3 |   9.728 |
+|        4 | 9.28976 |
+|        5 | 9.55523 |
+|        6 | 9.63562 ...
+
+to speed things up a little we batch them up and check whether the BXD is part of it.
+When that is the case we might as well write the phenotype file because we have the trait values.
+
 !#
 
 (use-modules (dbi dbi)
@@ -49,51 +70,29 @@ Now list the next 1000 trait IDs:
              (srfi srfi-1)
              )
 
-
-
 (call-with-db
  (lambda (db)
    (begin
-     (define bxd-strains (bxd-strain-id-names #:used-for-mapping? #t))
-     (define (get-trait db probeset-id)
-       (dbi-query db (string-append "select Id,Chr,Mb,Name,Symbol,description from ProbeSet where Id=" (int-to-string probeset-id) " limit 1"))
-       (get-row db))
-     (define (run-list-traits-to-compute db prev-id count)
-       (let [(hits (get-precompute-hits db prev-id count))]
-         (for-each (lambda (hit)
-                     (let* [(data-id (assoc-ref hit "DataId"))
-                            (data-id-str (int-to-string data-id))
-                            ;; (probesetfreeze-id (assoc-ref hit "ProbeSetFreezeId"))
-                            ;; (probeset-id (assoc-ref hit "ProbeSetId"))
-                            ;; (trait (get-trait db probeset-id))
-                            ;; (trait-name (assoc-ref trait "Name"))
-                            ;; (name (dataset-name db probesetfreeze-id))
-                            ]
-                       (display hit)
-                       (newline)
-                       ;; ---- Get strains and phenotypes for this dataset
-                       (dbi-query db (string-append "SELECT StrainId,value from ProbeSetData WHERE Id=" data-id-str))
-                       (define id_traits (get-rows-apply db
-                                                         (lambda (r) `(,(assoc-ref r "StrainId") . ,(assoc-ref r "value")))
-                                                         '()))
-                       ;; ---- Now we need to make sure that all strains belong to BXD
-                       (define non-bxd (fold
-                                        (lambda (strain lst)
-                                          (let* [(id (car strain))
-                                                 (name (assoc id bxd-strains))]
-                                            (if name
-                                                lst
-                                                (append lst `(,name)))))
-
-                                        '()
-                                        id_traits))
-                       (define traits (map
-                                       (lambda (t)
-                                         (match t
-                                           ((id . value) (cons (assoc-ref bxd-strains id) value)
-                                            )))
-                                       id_traits))
-                       #t))
-                   hits)))
-         (run-list-traits-to-compute db 0 5) ;; start precompute
-       )))
+     (let [(bxd-strains (bxd-strain-id-names #:used-for-mapping? #t))]
+       (define (run-list-traits-to-compute db prev-id count)
+         (let* [(hits (get-precompute-hits db prev-id count))
+                (data-ids (map (lambda (hit)
+                                 (let* [(data-id (assoc-ref hit "DataId"))
+                                        ; (data-id-str (int-to-string data-id))
+                                        ]
+                                   data-id))
+                               hits))
+                ;; (data-ids-query (fold (lambda (id query) "" (string-append query "Id=" (int-to-string id) " OR ")) "" data-ids))
+                (data-str-ids (map (lambda (id) (string-append "Id=" (int-to-string id))) data-ids))
+                (data-ids-query (string-join data-str-ids " OR "))
+                (query (string-append "SELECT Id,StrainId,value FROM ProbeSetData WHERE " data-ids-query))
+                ]
+           ; (display data-str-ids)
+           ; (display data-ids-query)
+           ; (display data-ids)
+           (display query)
+           ))
+       ; (display data-ids)
+       (run-list-traits-to-compute db 0 50) ;; start precompute
+       ;; (write bxd-strains)
+       ))))
-- 
cgit v1.2.3