aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPjotr Prins2024-05-07 13:53:32 +0200
committerPjotr Prins2024-05-07 13:53:32 +0200
commita950fc8d6c856bf700841514af113d689e30afc5 (patch)
tree0d874172b57c65075898cf9981386884a9ba257e
parent91ed2e9f0f1c5bd96b4378db0cdfe88fdbb4d01a (diff)
downloadgn-guile-a950fc8d6c856bf700841514af113d689e30afc5.tar.gz
Fetch a batch of traits so we can process faster
-rw-r--r--gn/data/strains.scm17
-rw-r--r--gn/db/mysql.scm4
-rwxr-xr-xscripts/precompute/list-traits-to-compute.scm89
3 files changed, 58 insertions, 52 deletions
diff --git a/gn/data/strains.scm b/gn/data/strains.scm
index f1348ac..4a251d4 100644
--- a/gn/data/strains.scm
+++ b/gn/data/strains.scm
@@ -17,17 +17,17 @@
bxd-strain-id-names
))
-(define* (strain-id-names inbred-set #:key (used-for-mapping? #f))
+(define* (strain-id-names inbred-set #:key (used-for-mapping? #t))
"Return assoc list of tuples of strain id+names:
((4 . BXD1) (5 . BXD2) (6 . BXD5) (7 . BXD6)...
-used-for-mapping? will say whether the strains/individuals are used for mapping.
+used-for-mapping? will say whether the strains/individuals are used for mapping. Always True, FIXME
"
(call-with-db
(lambda (db)
(dbi-query db (string-append "SELECT StrainId,Strain.Name FROM Strain, StrainXRef WHERE StrainXRef.StrainId = Strain.Id AND StrainXRef.InbredSetId = " (int-to-string inbred-set)
(if used-for-mapping?
- " AND Used_for_mapping='Y'"
+ ;; " AND Used_for_mapping='Y'"
"")
" ORDER BY StrainId;"))
(get-rows-apply db (lambda (r) `(,(assoc-ref r "StrainId") . ,(assoc-ref r "Name"))) '()))))
@@ -35,5 +35,12 @@ used-for-mapping? will say whether the strains/individuals are used for mapping.
(define* (bxd-strain-id-names #:key (used-for-mapping? #f))
"Return assoc list of tuples of strain id + names. Same as strain-id-names, but just for the BXD
-used-for-mapping? will say whether the strains/individuals are used for mapping."
- (strain-id-names 1))
+used-for-mapping? will say whether the strains/individuals are used for mapping. Always True, FIXME"
+ (filter (lambda (l) l)
+ (map (lambda (l)
+ (let [(id (car l))
+ (name (cdr l))]
+ (if (or (< id 42) (string-contains name "BXD"))
+ l
+ #f))
+ ) (strain-id-names 1 #:used-for-mapping? used-for-mapping?))))
diff --git a/gn/db/mysql.scm b/gn/db/mysql.scm
index 66f28db..ccd414a 100644
--- a/gn/db/mysql.scm
+++ b/gn/db/mysql.scm
@@ -29,8 +29,8 @@
(define (db-open)
(begin
- (display "===> OPENING DB")
- (newline)
+ ;; (display "===> OPENING DB")
+ ;; (newline)
(let [(db (dbi-open "mysql" "webqtlout:webqtlout:db_webqtl:tcp:127.0.0.1:3306"))]
(ensure db)
db
diff --git a/scripts/precompute/list-traits-to-compute.scm b/scripts/precompute/list-traits-to-compute.scm
index 68f0711..db12eed 100755
--- a/scripts/precompute/list-traits-to-compute.scm
+++ b/scripts/precompute/list-traits-to-compute.scm
@@ -35,6 +35,27 @@ Now list the next 1000 trait IDs:
. .guix-shell -- guile -L . -s ./scripts/precompute/list-traits-to-compute.scm --next 1000
+The current logic is to list all datasets that contain a
+BXD. (bxd-strain-id-names #:used-for-mapping? #t) fetches all ids and
+strain names listed in GN. Note that this differs from the actual
+genotype file.
+
+To find the StrainId in a dataset:
+
+MariaDB [db_webqtl]> SELECT StrainId,value from ProbeSetData WHERE Id=115467;
++----------+---------+
+| StrainId | value |
++----------+---------+
+| 1 | 9.47169 |
+| 2 | 9.21621 |
+| 3 | 9.728 |
+| 4 | 9.28976 |
+| 5 | 9.55523 |
+| 6 | 9.63562 ...
+
+to speed things up a little we batch them up and check whether the BXD is part of it.
+When that is the case we might as well write the phenotype file because we have the trait values.
+
!#
(use-modules (dbi dbi)
@@ -49,51 +70,29 @@ Now list the next 1000 trait IDs:
(srfi srfi-1)
)
-
-
(call-with-db
(lambda (db)
(begin
- (define bxd-strains (bxd-strain-id-names #:used-for-mapping? #t))
- (define (get-trait db probeset-id)
- (dbi-query db (string-append "select Id,Chr,Mb,Name,Symbol,description from ProbeSet where Id=" (int-to-string probeset-id) " limit 1"))
- (get-row db))
- (define (run-list-traits-to-compute db prev-id count)
- (let [(hits (get-precompute-hits db prev-id count))]
- (for-each (lambda (hit)
- (let* [(data-id (assoc-ref hit "DataId"))
- (data-id-str (int-to-string data-id))
- ;; (probesetfreeze-id (assoc-ref hit "ProbeSetFreezeId"))
- ;; (probeset-id (assoc-ref hit "ProbeSetId"))
- ;; (trait (get-trait db probeset-id))
- ;; (trait-name (assoc-ref trait "Name"))
- ;; (name (dataset-name db probesetfreeze-id))
- ]
- (display hit)
- (newline)
- ;; ---- Get strains and phenotypes for this dataset
- (dbi-query db (string-append "SELECT StrainId,value from ProbeSetData WHERE Id=" data-id-str))
- (define id_traits (get-rows-apply db
- (lambda (r) `(,(assoc-ref r "StrainId") . ,(assoc-ref r "value")))
- '()))
- ;; ---- Now we need to make sure that all strains belong to BXD
- (define non-bxd (fold
- (lambda (strain lst)
- (let* [(id (car strain))
- (name (assoc id bxd-strains))]
- (if name
- lst
- (append lst `(,name)))))
-
- '()
- id_traits))
- (define traits (map
- (lambda (t)
- (match t
- ((id . value) (cons (assoc-ref bxd-strains id) value)
- )))
- id_traits))
- #t))
- hits)))
- (run-list-traits-to-compute db 0 5) ;; start precompute
- )))
+ (let [(bxd-strains (bxd-strain-id-names #:used-for-mapping? #t))]
+ (define (run-list-traits-to-compute db prev-id count)
+ (let* [(hits (get-precompute-hits db prev-id count))
+ (data-ids (map (lambda (hit)
+ (let* [(data-id (assoc-ref hit "DataId"))
+ ; (data-id-str (int-to-string data-id))
+ ]
+ data-id))
+ hits))
+ ;; (data-ids-query (fold (lambda (id query) "" (string-append query "Id=" (int-to-string id) " OR ")) "" data-ids))
+ (data-str-ids (map (lambda (id) (string-append "Id=" (int-to-string id))) data-ids))
+ (data-ids-query (string-join data-str-ids " OR "))
+ (query (string-append "SELECT Id,StrainId,value FROM ProbeSetData WHERE " data-ids-query))
+ ]
+ ; (display data-str-ids)
+ ; (display data-ids-query)
+ ; (display data-ids)
+ (display query)
+ ))
+ ; (display data-ids)
+ (run-list-traits-to-compute db 0 50) ;; start precompute
+ ;; (write bxd-strains)
+ ))))