aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorPjotr Prins2024-05-05 13:35:33 +0200
committerPjotr Prins2024-05-05 13:35:33 +0200
commita1511cae7937ea60abdaf56e759f1066c2e83b13 (patch)
treed589baa53aaa4ce769da123197cb1a959f210fa7 /scripts
parente192578a501d09f2405db41a6ca541eb57db3368 (diff)
downloadgn-guile-a1511cae7937ea60abdaf56e759f1066c2e83b13.tar.gz
List data-ids (trait ids) for a batch
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/precompute/list-traits-to-compute.scm105
1 files changed, 105 insertions, 0 deletions
diff --git a/scripts/precompute/list-traits-to-compute.scm b/scripts/precompute/list-traits-to-compute.scm
new file mode 100755
index 0000000..3cba292
--- /dev/null
+++ b/scripts/precompute/list-traits-to-compute.scm
@@ -0,0 +1,105 @@
+#!
+
+Step p1 lists traits that need to be computed.
+
+This is a script that fetches trait IDs from the GN database
+directly. The direct database calls are used right now and ought to be
+turned into a REST API.
+
+Run from base dir with
+
+ . .guix-shell -- guile -L . -s ./scripts/precompute/list-traits-to-compute.scm
+
+You may want to forward a mysql port if there is no DB locally
+
+ ssh -L 3306:127.0.0.1:3306 -f -N tux02.genenetwork.org
+
+test connection with mysql client:
+
+ mysql -uwebqtlout -pwebqtlout -A -h 127.0.0.1 -P 3306 db_webqtl -e "show tables;"
+
+to create a clean slate, for now, update Locus_old with
+
+ update ProbeSetXRef set Locus_old=NULL;
+
+you should see
+
+ MariaDB [db_webqtl]> select count(Locus_old) from ProbeSetXRef where Locus_old != NULL limit 5;
+ +------------------+
+ | count(Locus_old) |
+ +------------------+
+ | 0 |
+ +------------------+
+
+Now list the next 1000 trait IDs:
+
+ . .guix-shell -- guile -L . -s ./scripts/precompute/list-traits-to-compute.scm --next 1000
+
+!#
+
+(use-modules (dbi dbi)
+ (gn db mysql)
+ (gn data dataset)
+ (gn data hits)
+ (gn data strains)
+ (gn util convert)
+ (gn runner gemma)
+ ; (rnrs base)
+ (ice-9 match)
+ (srfi srfi-1)
+ )
+
+
+
+(call-with-db
+ (lambda (db)
+ (begin
+ (define bxd-strains (bxd-strain-id-names #:used-for-mapping? #t))
+ (define (get-trait db probeset-id)
+ (dbi-query db (string-append "select Id,Chr,Mb,Name,Symbol,description from ProbeSet where Id=" (int-to-string probeset-id) " limit 1"))
+ (get-row db))
+ (define (run-list-traits-to-compute db prev-id count)
+ (let [(hit (get-precompute-hit db prev-id))]
+ (if hit
+ (let* [(data-id (assoc-ref hit "DataId"))
+ (data-id-str (int-to-string data-id))
+ (probesetfreeze-id (assoc-ref hit "ProbeSetFreezeId"))
+ (probeset-id (assoc-ref hit "ProbeSetId"))
+ (trait (get-trait db probeset-id))
+ (trait-name (assoc-ref trait "Name"))
+ (name (dataset-name db probesetfreeze-id))
+ ]
+ (display hit)
+ (newline)
+ ;; ---- Get strains and phenotypes for this dataset
+ (dbi-query db (string-append "SELECT StrainId,value from ProbeSetData WHERE Id=" data-id-str))
+ (define id_traits (get-rows-apply db
+ (lambda (r) `(,(assoc-ref r "StrainId") . ,(assoc-ref r "value")))
+ '()))
+ ;; ---- Now we need to make sure that all strains belong to BXD
+ (define non-bxd (fold
+ (lambda (strain lst)
+ (let* [(id (car strain))
+ (name (assoc id bxd-strains))]
+ (if name
+ lst
+ (append lst `(,name)))))
+
+ '()
+ id_traits))
+ (define traits (map
+ (lambda (t)
+ (match t
+ ((id . value) (cons (assoc-ref bxd-strains id) value)
+ )))
+ id_traits))
+ (if (eq? non-bxd '())
+ (set-precompute-hit-status! db data-id-str "GEMMA-START")
+ ;; disable precompute if non-bxd, for now, so it won't try again
+ (set-precompute-hit-status! db data-id-str "NON-BXD"))
+ (if (> count 0)
+ (run-list-traits-to-compute db data-id (- count 1)) ;; next round
+ )
+ ))))
+ (run-list-traits-to-compute db 0 5) ;; start precompute
+)))