about summary refs log tree commit diff
diff options
context:
space:
mode:
authorPjotr Prins2024-05-05 13:35:33 +0200
committerPjotr Prins2024-05-05 13:35:33 +0200
commita1511cae7937ea60abdaf56e759f1066c2e83b13 (patch)
treed589baa53aaa4ce769da123197cb1a959f210fa7
parente192578a501d09f2405db41a6ca541eb57db3368 (diff)
downloadgn-guile-a1511cae7937ea60abdaf56e759f1066c2e83b13.tar.gz
List data-ids (trait ids) for a batch
-rw-r--r--gn/data/strains.scm12
-rwxr-xr-xscripts/precompute/list-traits-to-compute.scm105
2 files changed, 112 insertions, 5 deletions
diff --git a/gn/data/strains.scm b/gn/data/strains.scm
index 39fe71e..f1348ac 100644
--- a/gn/data/strains.scm
+++ b/gn/data/strains.scm
@@ -17,21 +17,23 @@
             bxd-strain-id-names
             ))
 
-(define* (strain-id-names inbred-set #:key (map? #f))
+(define* (strain-id-names inbred-set #:key (used-for-mapping? #f))
   "Return assoc list of tuples of strain id+names:
    ((4 . BXD1) (5 . BXD2) (6 . BXD5) (7 . BXD6)...
 
-map? will say whether the strains/individuals are used for mapping.
+used-for-mapping? will say whether the strains/individuals are used for mapping.
 "
   (call-with-db
    (lambda (db)
      (dbi-query db (string-append "SELECT StrainId,Strain.Name FROM Strain, StrainXRef WHERE StrainXRef.StrainId = Strain.Id AND StrainXRef.InbredSetId = " (int-to-string inbred-set)
-                                  (if map?
+                                  (if used-for-mapping?
                                       " AND Used_for_mapping='Y'"
                                       "")
                                   " ORDER BY StrainId;"))
       (get-rows-apply db (lambda (r) `(,(assoc-ref r "StrainId") . ,(assoc-ref r "Name"))) '()))))
 
-(define* (bxd-strain-id-names #:key (map? #f))
-  "Return assoc list of tuples of strain id + names. Same as strain-id-names, but just for the BXD"
+(define* (bxd-strain-id-names #:key (used-for-mapping? #f))
+  "Return assoc list of tuples of strain id + names. Same as strain-id-names, but just for the BXD
+
+used-for-mapping? will say whether the strains/individuals are used for mapping."
    (strain-id-names 1))
diff --git a/scripts/precompute/list-traits-to-compute.scm b/scripts/precompute/list-traits-to-compute.scm
new file mode 100755
index 0000000..3cba292
--- /dev/null
+++ b/scripts/precompute/list-traits-to-compute.scm
@@ -0,0 +1,105 @@
+#!
+
+Step p1 lists traits that need to be computed.
+
+This is a script that fetches trait IDs from the GN database
+directly. The direct database calls are used right now and ought to be
+turned into a REST API.
+
+Run from base dir with
+
+    . .guix-shell -- guile -L . -s ./scripts/precompute/list-traits-to-compute.scm
+
+You may want to forward a mysql port if there is no DB locally
+
+    ssh -L 3306:127.0.0.1:3306 -f -N tux02.genenetwork.org
+
+test connection with mysql client:
+
+    mysql -uwebqtlout -pwebqtlout -A -h 127.0.0.1 -P 3306 db_webqtl -e "show tables;"
+
+to create a clean slate, for now, update Locus_old with
+
+    update ProbeSetXRef set Locus_old=NULL;
+
+you should see
+
+    MariaDB [db_webqtl]> select count(Locus_old) from ProbeSetXRef where Locus_old != NULL limit 5;
+    +------------------+
+    | count(Locus_old) |
+    +------------------+
+    |                0 |
+    +------------------+
+
+Now list the next 1000 trait IDs:
+
+    . .guix-shell -- guile -L . -s ./scripts/precompute/list-traits-to-compute.scm --next 1000
+
+!#
+
+(use-modules (dbi dbi)
+             (gn db mysql)
+             (gn data dataset)
+             (gn data hits)
+             (gn data strains)
+             (gn util convert)
+             (gn runner gemma)
+             ; (rnrs base)
+             (ice-9 match)
+             (srfi srfi-1)
+             )
+
+
+
+(call-with-db
+ (lambda (db)
+   (begin
+     (define bxd-strains (bxd-strain-id-names #:used-for-mapping? #t))
+     (define (get-trait db probeset-id)
+       (dbi-query db (string-append "select Id,Chr,Mb,Name,Symbol,description from ProbeSet where Id=" (int-to-string probeset-id) " limit 1"))
+       (get-row db))
+     (define (run-list-traits-to-compute db prev-id count)
+       (let [(hit (get-precompute-hit db prev-id))]
+         (if hit
+             (let* [(data-id (assoc-ref hit "DataId"))
+                    (data-id-str (int-to-string data-id))
+                    (probesetfreeze-id (assoc-ref hit "ProbeSetFreezeId"))
+                    (probeset-id (assoc-ref hit "ProbeSetId"))
+                    (trait (get-trait db probeset-id))
+                    (trait-name (assoc-ref trait "Name"))
+                    (name (dataset-name db probesetfreeze-id))
+                    ]
+               (display hit)
+               (newline)
+               ;; ---- Get strains and phenotypes for this dataset
+               (dbi-query db (string-append "SELECT StrainId,value from ProbeSetData WHERE Id=" data-id-str))
+               (define id_traits (get-rows-apply db
+                                                 (lambda (r) `(,(assoc-ref r "StrainId") . ,(assoc-ref r "value")))
+                                                 '()))
+               ;; ---- Now we need to make sure that all strains belong to BXD
+               (define non-bxd (fold
+                                (lambda (strain lst)
+                                  (let* [(id (car strain))
+                                         (name (assoc id bxd-strains))]
+                                    (if name
+                                        lst
+                                        (append lst `(,name)))))
+
+                                '()
+                                id_traits))
+               (define traits (map
+                               (lambda (t)
+                                 (match t
+                                   ((id . value) (cons (assoc-ref bxd-strains id) value)
+                                    )))
+                               id_traits))
+               (if (eq? non-bxd '())
+                   (set-precompute-hit-status! db data-id-str "GEMMA-START")
+                   ;; disable precompute if non-bxd, for now, so it won't try again
+                   (set-precompute-hit-status! db data-id-str "NON-BXD"))
+               (if (> count 0)
+                   (run-list-traits-to-compute db data-id (- count 1)) ;; next round
+                   )
+               ))))
+         (run-list-traits-to-compute db 0 5) ;; start precompute
+)))