aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gn/data/strains.scm12
-rwxr-xr-xscripts/precompute/list-traits-to-compute.scm105
2 files changed, 112 insertions, 5 deletions
diff --git a/gn/data/strains.scm b/gn/data/strains.scm
index 39fe71e..f1348ac 100644
--- a/gn/data/strains.scm
+++ b/gn/data/strains.scm
@@ -17,21 +17,23 @@
bxd-strain-id-names
))
-(define* (strain-id-names inbred-set #:key (map? #f))
+(define* (strain-id-names inbred-set #:key (used-for-mapping? #f))
"Return assoc list of tuples of strain id+names:
((4 . BXD1) (5 . BXD2) (6 . BXD5) (7 . BXD6)...
-map? will say whether the strains/individuals are used for mapping.
+used-for-mapping? will say whether the strains/individuals are used for mapping.
"
(call-with-db
(lambda (db)
(dbi-query db (string-append "SELECT StrainId,Strain.Name FROM Strain, StrainXRef WHERE StrainXRef.StrainId = Strain.Id AND StrainXRef.InbredSetId = " (int-to-string inbred-set)
- (if map?
+ (if used-for-mapping?
" AND Used_for_mapping='Y'"
"")
" ORDER BY StrainId;"))
(get-rows-apply db (lambda (r) `(,(assoc-ref r "StrainId") . ,(assoc-ref r "Name"))) '()))))
-(define* (bxd-strain-id-names #:key (map? #f))
- "Return assoc list of tuples of strain id + names. Same as strain-id-names, but just for the BXD"
+(define* (bxd-strain-id-names #:key (used-for-mapping? #f))
+ "Return assoc list of tuples of strain id + names. Same as strain-id-names, but just for the BXD
+
+used-for-mapping? will say whether the strains/individuals are used for mapping."
(strain-id-names 1))
diff --git a/scripts/precompute/list-traits-to-compute.scm b/scripts/precompute/list-traits-to-compute.scm
new file mode 100755
index 0000000..3cba292
--- /dev/null
+++ b/scripts/precompute/list-traits-to-compute.scm
@@ -0,0 +1,105 @@
+#!
+
+Step p1 lists traits that need to be computed.
+
+This is a script that fetches trait IDs from the GN database
+directly. The direct database calls are used right now and ought to be
+turned into a REST API.
+
+Run from base dir with
+
+ . .guix-shell -- guile -L . -s ./scripts/precompute/list-traits-to-compute.scm
+
+You may want to forward a mysql port if there is no DB locally
+
+ ssh -L 3306:127.0.0.1:3306 -f -N tux02.genenetwork.org
+
+test connection with mysql client:
+
+ mysql -uwebqtlout -pwebqtlout -A -h 127.0.0.1 -P 3306 db_webqtl -e "show tables;"
+
+to create a clean slate, for now, update Locus_old with
+
+ update ProbeSetXRef set Locus_old=NULL;
+
+you should see
+
+ MariaDB [db_webqtl]> select count(Locus_old) from ProbeSetXRef where Locus_old != NULL limit 5;
+ +------------------+
+ | count(Locus_old) |
+ +------------------+
+ | 0 |
+ +------------------+
+
+Now list the next 1000 trait IDs:
+
+ . .guix-shell -- guile -L . -s ./scripts/precompute/list-traits-to-compute.scm --next 1000
+
+!#
+
+(use-modules (dbi dbi)
+ (gn db mysql)
+ (gn data dataset)
+ (gn data hits)
+ (gn data strains)
+ (gn util convert)
+ (gn runner gemma)
+ ; (rnrs base)
+ (ice-9 match)
+ (srfi srfi-1)
+ )
+
+
+
+(call-with-db
+ (lambda (db)
+ (begin
+ (define bxd-strains (bxd-strain-id-names #:used-for-mapping? #t))
+ (define (get-trait db probeset-id)
+ (dbi-query db (string-append "select Id,Chr,Mb,Name,Symbol,description from ProbeSet where Id=" (int-to-string probeset-id) " limit 1"))
+ (get-row db))
+ (define (run-list-traits-to-compute db prev-id count)
+ (let [(hit (get-precompute-hit db prev-id))]
+ (if hit
+ (let* [(data-id (assoc-ref hit "DataId"))
+ (data-id-str (int-to-string data-id))
+ (probesetfreeze-id (assoc-ref hit "ProbeSetFreezeId"))
+ (probeset-id (assoc-ref hit "ProbeSetId"))
+ (trait (get-trait db probeset-id))
+ (trait-name (assoc-ref trait "Name"))
+ (name (dataset-name db probesetfreeze-id))
+ ]
+ (display hit)
+ (newline)
+ ;; ---- Get strains and phenotypes for this dataset
+ (dbi-query db (string-append "SELECT StrainId,value from ProbeSetData WHERE Id=" data-id-str))
+ (define id_traits (get-rows-apply db
+ (lambda (r) `(,(assoc-ref r "StrainId") . ,(assoc-ref r "value")))
+ '()))
+ ;; ---- Now we need to make sure that all strains belong to BXD
+ (define non-bxd (fold
+ (lambda (strain lst)
+ (let* [(id (car strain))
+ (name (assoc id bxd-strains))]
+ (if name
+ lst
+ (append lst `(,name)))))
+
+ '()
+ id_traits))
+ (define traits (map
+ (lambda (t)
+ (match t
+ ((id . value) (cons (assoc-ref bxd-strains id) value)
+ )))
+ id_traits))
+ (if (eq? non-bxd '())
+ (set-precompute-hit-status! db data-id-str "GEMMA-START")
+ ;; disable precompute if non-bxd, for now, so it won't try again
+ (set-precompute-hit-status! db data-id-str "NON-BXD"))
+ (if (> count 0)
+ (run-list-traits-to-compute db data-id (- count 1)) ;; next round
+ )
+ ))))
+ (run-list-traits-to-compute db 0 5) ;; start precompute
+)))