aboutsummaryrefslogtreecommitdiff
path: root/scripts/precompute/precompute-hits.scm
blob: a18b33db75be57ea594a675309aa3abe5e1c4c95 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#! Precompute hits

This is the obsolete script to compute GEMMA directly from the DB. We have split out
this step into fetching traits and running gemma without DB. This allows scaling up
on HPC etc.

Run from base dir with

. .guix-shell -- guile -L . -s ./scripts/precompute/precompute-hits.scm

and with some extra paths

. .guix-shell ruby --expose=/home/wrk/iwrk/opensource/code/genetics/gemma-wrapper/=/gemma-wrapper --expose=/home/wrk/iwrk/opensource/code/genetics/gemma/=/gemma -- env TMPDIR=tmp guile -L . -s ./scripts/precompute/precompute-hits.scm

You may want to forward a mysql port if there is no DB locally

ssh -L 3306:127.0.0.1:3306 -f -N tux02.genenetwork.org

!#

(use-modules (dbi dbi)
             (gn db mysql)
             (gn data dataset)
             (gn data hits)
             (gn data strains)
             (gn util convert)
             (gn runner gemma)
             ; (rnrs base)
             (ice-9 match)
             (srfi srfi-1)
             )


;; potentially you want to test connection with mysql client:
;;
;;    mysql -uwebqtlout -pwebqtlout -A -h 127.0.0.1 -P 3306 db_webqtl -e "show tables;"
;;
;; for now update Locus_old with
;;
;;    update ProbeSetXRef set Locus_old=NULL;

(call-with-db
 (lambda (db)
   (begin
     ;(newline)
     ; (dbi-query db "SELECT StrainId,Strain.Name FROM Strain, StrainXRef WHERE StrainXRef.StrainId = Strain.Id AND StrainXRef.InbredSetId = 1 AND Used_for_mapping='Y' ORDER BY StrainId;")
     ; (let [(row (get-row db))]
     ;  (display row)
     ;  )
     ;(let [(result (get-rows-apply db (lambda (r) `(,(assoc-ref r "StrainId") . ,(assoc-ref r "Name"))) '()))]
     ;  (display (car result)))

     ;; ---- fetch all known BXD sample/individual/strain names
     (define bxd-strains (bxd-strain-id-names #:map? #t))
     ;(display (assoc 64728 bxd-strains))
     ;(newline)
     ;(newline)
     ;(dbi-query db "SELECT * FROM ProbeSetXRef LIMIT 3")
     ;(let [(row (get-row db))]
     ;  (display row)
     ;  )
     ;(db-check2 db)
     ;(newline)
     ;; ---- get first available dataset for precompute:
     ;; @@ order by dataid - recurse

     (define (get-trait db probeset-id)
       (dbi-query db (string-append "select Id,Chr,Mb,Name,Symbol,description from ProbeSet where Id=" (int-to-string probeset-id) " limit 1"))
       (get-row db)
   )

     (define (run-precompute db prev-id)
       (let [(hit (get-precompute-hit db prev-id))]
         (if hit
             (let* [(data-id (assoc-ref hit "DataId"))
                    (data-id-str (int-to-string data-id))
                    (probesetfreeze-id (assoc-ref hit "ProbeSetFreezeId"))
                    (probeset-id (assoc-ref hit "ProbeSetId"))
                    (trait (get-trait db probeset-id))
                    (trait-name (assoc-ref trait "Name"))
                    (name (dataset-name db probesetfreeze-id))
                    ]
               (display hit)
               (display data-id)
               (newline)
               ;; ---- Get strains and phenotypes for this dataset
               (dbi-query db (string-append "SELECT StrainId,value from ProbeSetData WHERE Id=" data-id-str))
               (define id_traits (get-rows-apply db
                                                 (lambda (r) `(,(assoc-ref r "StrainId") . ,(assoc-ref r "value")))
                                                 '()))
               ;; ---- Now we need to make sure that all strains belong to BXD
               (define non-bxd (fold
                                (lambda (strain lst)
                                  (let* [(id (car strain))
                                         (name (assoc id bxd-strains))]
                                    (if name
                                        lst
                                        (append lst `(,name)))))

                                '()
                                id_traits))
               (define traits (map
                               (lambda (t)
                                 (match t
                                   ((id . value) (cons (assoc-ref bxd-strains id) value)
                                    )))
                               id_traits))
               ;; (display traits)
               ;; (if (= 0 (length non-bxd))
               (if (eq? non-bxd '())
                   (begin
                     (set-precompute-hit-status! db data-id-str "GEMMA-START")
                     (run-gemma "BXD" data-id probeset-id probesetfreeze-id name trait-name traits)
                     (set-precompute-hit-status! db data-id-str "GEMMA-DONE"))
                   ;; disable precompute if non-bxd, for now, so it won't try again
                   (set-precompute-hit-status! db data-id-str "NON-BXD"))
               (run-precompute db data-id) ;; next round
               ))))
         (run-precompute db 0) ;; start precompute
)))