aboutsummaryrefslogtreecommitdiff
path: root/scripts/precompute/list-traits-to-compute.scm
blob: 68f0711bef1fdc14cd67695877a0b407097a3277 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!

Step p1 lists traits that need to be computed.

This is a script that fetches trait IDs from the GN database
directly. The direct database calls are used right now and ought to be
turned into a REST API.

Run from base dir with

    . .guix-shell -- guile -L . -s ./scripts/precompute/list-traits-to-compute.scm

You may want to forward a mysql port if there is no DB locally

    ssh -L 3306:127.0.0.1:3306 -f -N tux02.genenetwork.org

test connection with mysql client:

    mysql -uwebqtlout -pwebqtlout -A -h 127.0.0.1 -P 3306 db_webqtl -e "show tables;"

to create a clean slate, for now, update Locus_old with

    update ProbeSetXRef set Locus_old=NULL;

you should see

    MariaDB [db_webqtl]> select count(Locus_old) from ProbeSetXRef where Locus_old != NULL limit 5;
    +------------------+
    | count(Locus_old) |
    +------------------+
    |                0 |
    +------------------+

Now list the next 1000 trait IDs:

    . .guix-shell -- guile -L . -s ./scripts/precompute/list-traits-to-compute.scm --next 1000

!#

(use-modules (dbi dbi)
             (gn db mysql)
             (gn data dataset)
             (gn data hits)
             (gn data strains)
             (gn util convert)
             (gn runner gemma)
             ; (rnrs base)
             (ice-9 match)
             (srfi srfi-1)
             )



(call-with-db
 (lambda (db)
   (begin
     (define bxd-strains (bxd-strain-id-names #:used-for-mapping? #t))
     (define (get-trait db probeset-id)
       (dbi-query db (string-append "select Id,Chr,Mb,Name,Symbol,description from ProbeSet where Id=" (int-to-string probeset-id) " limit 1"))
       (get-row db))
     (define (run-list-traits-to-compute db prev-id count)
       (let [(hits (get-precompute-hits db prev-id count))]
         (for-each (lambda (hit)
                     (let* [(data-id (assoc-ref hit "DataId"))
                            (data-id-str (int-to-string data-id))
                            ;; (probesetfreeze-id (assoc-ref hit "ProbeSetFreezeId"))
                            ;; (probeset-id (assoc-ref hit "ProbeSetId"))
                            ;; (trait (get-trait db probeset-id))
                            ;; (trait-name (assoc-ref trait "Name"))
                            ;; (name (dataset-name db probesetfreeze-id))
                            ]
                       (display hit)
                       (newline)
                       ;; ---- Get strains and phenotypes for this dataset
                       (dbi-query db (string-append "SELECT StrainId,value from ProbeSetData WHERE Id=" data-id-str))
                       (define id_traits (get-rows-apply db
                                                         (lambda (r) `(,(assoc-ref r "StrainId") . ,(assoc-ref r "value")))
                                                         '()))
                       ;; ---- Now we need to make sure that all strains belong to BXD
                       (define non-bxd (fold
                                        (lambda (strain lst)
                                          (let* [(id (car strain))
                                                 (name (assoc id bxd-strains))]
                                            (if name
                                                lst
                                                (append lst `(,name)))))

                                        '()
                                        id_traits))
                       (define traits (map
                                       (lambda (t)
                                         (match t
                                           ((id . value) (cons (assoc-ref bxd-strains id) value)
                                            )))
                                       id_traits))
                       #t))
                   hits)))
         (run-list-traits-to-compute db 0 5) ;; start precompute
       )))