From a1511cae7937ea60abdaf56e759f1066c2e83b13 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 5 May 2024 13:35:33 +0200 Subject: List data-ids (trait ids) for a batch --- gn/data/strains.scm | 12 +-- scripts/precompute/list-traits-to-compute.scm | 105 ++++++++++++++++++++++++++ 2 files changed, 112 insertions(+), 5 deletions(-) create mode 100755 scripts/precompute/list-traits-to-compute.scm diff --git a/gn/data/strains.scm b/gn/data/strains.scm index 39fe71e..f1348ac 100644 --- a/gn/data/strains.scm +++ b/gn/data/strains.scm @@ -17,21 +17,23 @@ bxd-strain-id-names )) -(define* (strain-id-names inbred-set #:key (map? #f)) +(define* (strain-id-names inbred-set #:key (used-for-mapping? #f)) "Return assoc list of tuples of strain id+names: ((4 . BXD1) (5 . BXD2) (6 . BXD5) (7 . BXD6)... -map? will say whether the strains/individuals are used for mapping. +used-for-mapping? will say whether the strains/individuals are used for mapping. " (call-with-db (lambda (db) (dbi-query db (string-append "SELECT StrainId,Strain.Name FROM Strain, StrainXRef WHERE StrainXRef.StrainId = Strain.Id AND StrainXRef.InbredSetId = " (int-to-string inbred-set) - (if map? + (if used-for-mapping? " AND Used_for_mapping='Y'" "") " ORDER BY StrainId;")) (get-rows-apply db (lambda (r) `(,(assoc-ref r "StrainId") . ,(assoc-ref r "Name"))) '())))) -(define* (bxd-strain-id-names #:key (map? #f)) - "Return assoc list of tuples of strain id + names. Same as strain-id-names, but just for the BXD" +(define* (bxd-strain-id-names #:key (used-for-mapping? #f)) + "Return assoc list of tuples of strain id + names. Same as strain-id-names, but just for the BXD + +used-for-mapping? will say whether the strains/individuals are used for mapping." (strain-id-names 1)) diff --git a/scripts/precompute/list-traits-to-compute.scm b/scripts/precompute/list-traits-to-compute.scm new file mode 100755 index 0000000..3cba292 --- /dev/null +++ b/scripts/precompute/list-traits-to-compute.scm @@ -0,0 +1,105 @@ +#! + +Step p1 lists traits that need to be computed. + +This is a script that fetches trait IDs from the GN database +directly. The direct database calls are used right now and ought to be +turned into a REST API. + +Run from base dir with + + . .guix-shell -- guile -L . -s ./scripts/precompute/list-traits-to-compute.scm + +You may want to forward a mysql port if there is no DB locally + + ssh -L 3306:127.0.0.1:3306 -f -N tux02.genenetwork.org + +test connection with mysql client: + + mysql -uwebqtlout -pwebqtlout -A -h 127.0.0.1 -P 3306 db_webqtl -e "show tables;" + +to create a clean slate, for now, update Locus_old with + + update ProbeSetXRef set Locus_old=NULL; + +you should see + + MariaDB [db_webqtl]> select count(Locus_old) from ProbeSetXRef where Locus_old != NULL limit 5; + +------------------+ + | count(Locus_old) | + +------------------+ + | 0 | + +------------------+ + +Now list the next 1000 trait IDs: + + . .guix-shell -- guile -L . -s ./scripts/precompute/list-traits-to-compute.scm --next 1000 + +!# + +(use-modules (dbi dbi) + (gn db mysql) + (gn data dataset) + (gn data hits) + (gn data strains) + (gn util convert) + (gn runner gemma) + ; (rnrs base) + (ice-9 match) + (srfi srfi-1) + ) + + + +(call-with-db + (lambda (db) + (begin + (define bxd-strains (bxd-strain-id-names #:used-for-mapping? #t)) + (define (get-trait db probeset-id) + (dbi-query db (string-append "select Id,Chr,Mb,Name,Symbol,description from ProbeSet where Id=" (int-to-string probeset-id) " limit 1")) + (get-row db)) + (define (run-list-traits-to-compute db prev-id count) + (let [(hit (get-precompute-hit db prev-id))] + (if hit + (let* [(data-id (assoc-ref hit "DataId")) + (data-id-str (int-to-string data-id)) + (probesetfreeze-id (assoc-ref hit "ProbeSetFreezeId")) + (probeset-id (assoc-ref hit "ProbeSetId")) + (trait (get-trait db probeset-id)) + (trait-name (assoc-ref trait "Name")) + (name (dataset-name db probesetfreeze-id)) + ] + (display hit) + (newline) + ;; ---- Get strains and phenotypes for this dataset + (dbi-query db (string-append "SELECT StrainId,value from ProbeSetData WHERE Id=" data-id-str)) + (define id_traits (get-rows-apply db + (lambda (r) `(,(assoc-ref r "StrainId") . ,(assoc-ref r "value"))) + '())) + ;; ---- Now we need to make sure that all strains belong to BXD + (define non-bxd (fold + (lambda (strain lst) + (let* [(id (car strain)) + (name (assoc id bxd-strains))] + (if name + lst + (append lst `(,name))))) + + '() + id_traits)) + (define traits (map + (lambda (t) + (match t + ((id . value) (cons (assoc-ref bxd-strains id) value) + ))) + id_traits)) + (if (eq? non-bxd '()) + (set-precompute-hit-status! db data-id-str "GEMMA-START") + ;; disable precompute if non-bxd, for now, so it won't try again + (set-precompute-hit-status! db data-id-str "NON-BXD")) + (if (> count 0) + (run-list-traits-to-compute db data-id (- count 1)) ;; next round + ) + )))) + (run-list-traits-to-compute db 0 5) ;; start precompute +))) -- cgit v1.2.3