about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
authorPjotr Prins2024-05-05 13:35:33 +0200
committerPjotr Prins2024-05-05 13:35:33 +0200
commita1511cae7937ea60abdaf56e759f1066c2e83b13 (patch)
treed589baa53aaa4ce769da123197cb1a959f210fa7 /scripts
parente192578a501d09f2405db41a6ca541eb57db3368 (diff)
downloadgn-guile-a1511cae7937ea60abdaf56e759f1066c2e83b13.tar.gz
List data-ids (trait ids) for a batch
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/precompute/list-traits-to-compute.scm105
1 files changed, 105 insertions, 0 deletions
diff --git a/scripts/precompute/list-traits-to-compute.scm b/scripts/precompute/list-traits-to-compute.scm
new file mode 100755
index 0000000..3cba292
--- /dev/null
+++ b/scripts/precompute/list-traits-to-compute.scm
@@ -0,0 +1,105 @@
+#!
+
+Step p1 lists traits that need to be computed.
+
+This is a script that fetches trait IDs from the GN database
+directly. The direct database calls are used right now and ought to be
+turned into a REST API.
+
+Run from base dir with
+
+    . .guix-shell -- guile -L . -s ./scripts/precompute/list-traits-to-compute.scm
+
+You may want to forward a mysql port if there is no DB locally
+
+    ssh -L 3306:127.0.0.1:3306 -f -N tux02.genenetwork.org
+
+test connection with mysql client:
+
+    mysql -uwebqtlout -pwebqtlout -A -h 127.0.0.1 -P 3306 db_webqtl -e "show tables;"
+
+to create a clean slate, for now, update Locus_old with
+
+    update ProbeSetXRef set Locus_old=NULL;
+
+you should see
+
+    MariaDB [db_webqtl]> select count(Locus_old) from ProbeSetXRef where Locus_old != NULL limit 5;
+    +------------------+
+    | count(Locus_old) |
+    +------------------+
+    |                0 |
+    +------------------+
+
+Now list the next 1000 trait IDs:
+
+    . .guix-shell -- guile -L . -s ./scripts/precompute/list-traits-to-compute.scm --next 1000
+
+!#
+
+(use-modules (dbi dbi)
+             (gn db mysql)
+             (gn data dataset)
+             (gn data hits)
+             (gn data strains)
+             (gn util convert)
+             (gn runner gemma)
+             ; (rnrs base)
+             (ice-9 match)
+             (srfi srfi-1)
+             )
+
+
+
+(call-with-db
+ (lambda (db)
+   (begin
+     (define bxd-strains (bxd-strain-id-names #:used-for-mapping? #t))
+     (define (get-trait db probeset-id)
+       (dbi-query db (string-append "select Id,Chr,Mb,Name,Symbol,description from ProbeSet where Id=" (int-to-string probeset-id) " limit 1"))
+       (get-row db))
+     (define (run-list-traits-to-compute db prev-id count)
+       (let [(hit (get-precompute-hit db prev-id))]
+         (if hit
+             (let* [(data-id (assoc-ref hit "DataId"))
+                    (data-id-str (int-to-string data-id))
+                    (probesetfreeze-id (assoc-ref hit "ProbeSetFreezeId"))
+                    (probeset-id (assoc-ref hit "ProbeSetId"))
+                    (trait (get-trait db probeset-id))
+                    (trait-name (assoc-ref trait "Name"))
+                    (name (dataset-name db probesetfreeze-id))
+                    ]
+               (display hit)
+               (newline)
+               ;; ---- Get strains and phenotypes for this dataset
+               (dbi-query db (string-append "SELECT StrainId,value from ProbeSetData WHERE Id=" data-id-str))
+               (define id_traits (get-rows-apply db
+                                                 (lambda (r) `(,(assoc-ref r "StrainId") . ,(assoc-ref r "value")))
+                                                 '()))
+               ;; ---- Now we need to make sure that all strains belong to BXD
+               (define non-bxd (fold
+                                (lambda (strain lst)
+                                  (let* [(id (car strain))
+                                         (name (assoc id bxd-strains))]
+                                    (if name
+                                        lst
+                                        (append lst `(,name)))))
+
+                                '()
+                                id_traits))
+               (define traits (map
+                               (lambda (t)
+                                 (match t
+                                   ((id . value) (cons (assoc-ref bxd-strains id) value)
+                                    )))
+                               id_traits))
+               (if (eq? non-bxd '())
+                   (set-precompute-hit-status! db data-id-str "GEMMA-START")
+                   ;; disable precompute if non-bxd, for now, so it won't try again
+                   (set-precompute-hit-status! db data-id-str "NON-BXD"))
+               (if (> count 0)
+                   (run-list-traits-to-compute db data-id (- count 1)) ;; next round
+                   )
+               ))))
+         (run-list-traits-to-compute db 0 5) ;; start precompute
+)))