From a1511cae7937ea60abdaf56e759f1066c2e83b13 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 5 May 2024 13:35:33 +0200 Subject: List data-ids (trait ids) for a batch --- scripts/precompute/list-traits-to-compute.scm | 105 ++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100755 scripts/precompute/list-traits-to-compute.scm (limited to 'scripts') diff --git a/scripts/precompute/list-traits-to-compute.scm b/scripts/precompute/list-traits-to-compute.scm new file mode 100755 index 0000000..3cba292 --- /dev/null +++ b/scripts/precompute/list-traits-to-compute.scm @@ -0,0 +1,105 @@ +#! + +Step p1 lists traits that need to be computed. + +This is a script that fetches trait IDs from the GN database +directly. The direct database calls are used right now and ought to be +turned into a REST API. + +Run from base dir with + + . .guix-shell -- guile -L . -s ./scripts/precompute/list-traits-to-compute.scm + +You may want to forward a mysql port if there is no DB locally + + ssh -L 3306:127.0.0.1:3306 -f -N tux02.genenetwork.org + +test connection with mysql client: + + mysql -uwebqtlout -pwebqtlout -A -h 127.0.0.1 -P 3306 db_webqtl -e "show tables;" + +to create a clean slate, for now, update Locus_old with + + update ProbeSetXRef set Locus_old=NULL; + +you should see + + MariaDB [db_webqtl]> select count(Locus_old) from ProbeSetXRef where Locus_old != NULL limit 5; + +------------------+ + | count(Locus_old) | + +------------------+ + | 0 | + +------------------+ + +Now list the next 1000 trait IDs: + + . .guix-shell -- guile -L . -s ./scripts/precompute/list-traits-to-compute.scm --next 1000 + +!# + +(use-modules (dbi dbi) + (gn db mysql) + (gn data dataset) + (gn data hits) + (gn data strains) + (gn util convert) + (gn runner gemma) + ; (rnrs base) + (ice-9 match) + (srfi srfi-1) + ) + + + +(call-with-db + (lambda (db) + (begin + (define bxd-strains (bxd-strain-id-names #:used-for-mapping? #t)) + (define (get-trait db probeset-id) + (dbi-query db (string-append "select Id,Chr,Mb,Name,Symbol,description from ProbeSet where Id=" (int-to-string probeset-id) " limit 1")) + (get-row db)) + (define (run-list-traits-to-compute db prev-id count) + (let [(hit (get-precompute-hit db prev-id))] + (if hit + (let* [(data-id (assoc-ref hit "DataId")) + (data-id-str (int-to-string data-id)) + (probesetfreeze-id (assoc-ref hit "ProbeSetFreezeId")) + (probeset-id (assoc-ref hit "ProbeSetId")) + (trait (get-trait db probeset-id)) + (trait-name (assoc-ref trait "Name")) + (name (dataset-name db probesetfreeze-id)) + ] + (display hit) + (newline) + ;; ---- Get strains and phenotypes for this dataset + (dbi-query db (string-append "SELECT StrainId,value from ProbeSetData WHERE Id=" data-id-str)) + (define id_traits (get-rows-apply db + (lambda (r) `(,(assoc-ref r "StrainId") . ,(assoc-ref r "value"))) + '())) + ;; ---- Now we need to make sure that all strains belong to BXD + (define non-bxd (fold + (lambda (strain lst) + (let* [(id (car strain)) + (name (assoc id bxd-strains))] + (if name + lst + (append lst `(,name))))) + + '() + id_traits)) + (define traits (map + (lambda (t) + (match t + ((id . value) (cons (assoc-ref bxd-strains id) value) + ))) + id_traits)) + (if (eq? non-bxd '()) + (set-precompute-hit-status! db data-id-str "GEMMA-START") + ;; disable precompute if non-bxd, for now, so it won't try again + (set-precompute-hit-status! db data-id-str "NON-BXD")) + (if (> count 0) + (run-list-traits-to-compute db data-id (- count 1)) ;; next round + ) + )))) + (run-list-traits-to-compute db 0 5) ;; start precompute +))) -- cgit v1.2.3