aboutsummaryrefslogtreecommitdiff
path: root/scripts/precompute/list-traits-to-compute.scm
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/precompute/list-traits-to-compute.scm')
-rwxr-xr-xscripts/precompute/list-traits-to-compute.scm89
1 files changed, 44 insertions, 45 deletions
diff --git a/scripts/precompute/list-traits-to-compute.scm b/scripts/precompute/list-traits-to-compute.scm
index 68f0711..db12eed 100755
--- a/scripts/precompute/list-traits-to-compute.scm
+++ b/scripts/precompute/list-traits-to-compute.scm
@@ -35,6 +35,27 @@ Now list the next 1000 trait IDs:
. .guix-shell -- guile -L . -s ./scripts/precompute/list-traits-to-compute.scm --next 1000
+The current logic is to list all datasets that contain a
+BXD. (bxd-strain-id-names #:used-for-mapping? #t) fetches all ids and
+strain names listed in GN. Note that this differs from the actual
+genotype file.
+
+To find the StrainId in a dataset:
+
+MariaDB [db_webqtl]> SELECT StrainId,value from ProbeSetData WHERE Id=115467;
++----------+---------+
+| StrainId | value |
++----------+---------+
+| 1 | 9.47169 |
+| 2 | 9.21621 |
+| 3 | 9.728 |
+| 4 | 9.28976 |
+| 5 | 9.55523 |
+| 6 | 9.63562 ...
+
+to speed things up a little we batch them up and check whether the BXD is part of it.
+When that is the case we might as well write the phenotype file because we have the trait values.
+
!#
(use-modules (dbi dbi)
@@ -49,51 +70,29 @@ Now list the next 1000 trait IDs:
(srfi srfi-1)
)
-
-
(call-with-db
(lambda (db)
(begin
- (define bxd-strains (bxd-strain-id-names #:used-for-mapping? #t))
- (define (get-trait db probeset-id)
- (dbi-query db (string-append "select Id,Chr,Mb,Name,Symbol,description from ProbeSet where Id=" (int-to-string probeset-id) " limit 1"))
- (get-row db))
- (define (run-list-traits-to-compute db prev-id count)
- (let [(hits (get-precompute-hits db prev-id count))]
- (for-each (lambda (hit)
- (let* [(data-id (assoc-ref hit "DataId"))
- (data-id-str (int-to-string data-id))
- ;; (probesetfreeze-id (assoc-ref hit "ProbeSetFreezeId"))
- ;; (probeset-id (assoc-ref hit "ProbeSetId"))
- ;; (trait (get-trait db probeset-id))
- ;; (trait-name (assoc-ref trait "Name"))
- ;; (name (dataset-name db probesetfreeze-id))
- ]
- (display hit)
- (newline)
- ;; ---- Get strains and phenotypes for this dataset
- (dbi-query db (string-append "SELECT StrainId,value from ProbeSetData WHERE Id=" data-id-str))
- (define id_traits (get-rows-apply db
- (lambda (r) `(,(assoc-ref r "StrainId") . ,(assoc-ref r "value")))
- '()))
- ;; ---- Now we need to make sure that all strains belong to BXD
- (define non-bxd (fold
- (lambda (strain lst)
- (let* [(id (car strain))
- (name (assoc id bxd-strains))]
- (if name
- lst
- (append lst `(,name)))))
-
- '()
- id_traits))
- (define traits (map
- (lambda (t)
- (match t
- ((id . value) (cons (assoc-ref bxd-strains id) value)
- )))
- id_traits))
- #t))
- hits)))
- (run-list-traits-to-compute db 0 5) ;; start precompute
- )))
+ (let [(bxd-strains (bxd-strain-id-names #:used-for-mapping? #t))]
+ (define (run-list-traits-to-compute db prev-id count)
+ (let* [(hits (get-precompute-hits db prev-id count))
+ (data-ids (map (lambda (hit)
+ (let* [(data-id (assoc-ref hit "DataId"))
+ ; (data-id-str (int-to-string data-id))
+ ]
+ data-id))
+ hits))
+ ;; (data-ids-query (fold (lambda (id query) "" (string-append query "Id=" (int-to-string id) " OR ")) "" data-ids))
+ (data-str-ids (map (lambda (id) (string-append "Id=" (int-to-string id))) data-ids))
+ (data-ids-query (string-join data-str-ids " OR "))
+ (query (string-append "SELECT Id,StrainId,value FROM ProbeSetData WHERE " data-ids-query))
+ ]
+ ; (display data-str-ids)
+ ; (display data-ids-query)
+ ; (display data-ids)
+ (display query)
+ ))
+ ; (display data-ids)
+ (run-list-traits-to-compute db 0 50) ;; start precompute
+ ;; (write bxd-strains)
+ ))))