#! Precompute hits This is the *obsolete* script to compute GEMMA directly from the DB. We have split out this step into separate steps for (1) fetching traits and (2) running gemma without DB. This allows scaling up on HPC etc. Run from base dir with . .guix-shell -- guile -L . -s ./scripts/precompute/precompute-hits.scm and with some extra paths . .guix-shell ruby --expose=/home/wrk/iwrk/opensource/code/genetics/gemma-wrapper/=/gemma-wrapper --expose=/home/wrk/iwrk/opensource/code/genetics/gemma/=/gemma -- env TMPDIR=tmp guile -L . -s ./scripts/precompute/precompute-hits.scm You may want to forward a mysql port if there is no DB locally ssh -L 3306:127.0.0.1:3306 -f -N tux02.genenetwork.org !# (use-modules (dbi dbi) (gn db mysql) (gn data dataset) (gn data hits) (gn data strains) (gn util convert) (gn runner gemma) ; (rnrs base) (ice-9 match) (srfi srfi-1) ) ;; potentially you want to test connection with mysql client: ;; ;; mysql -uwebqtlout -pwebqtlout -A -h 127.0.0.1 -P 3306 db_webqtl -e "show tables;" ;; ;; for now update Locus_old with ;; ;; update ProbeSetXRef set Locus_old=NULL; (call-with-db (lambda (db) (begin ;(newline) ; (dbi-query db "SELECT StrainId,Strain.Name FROM Strain, StrainXRef WHERE StrainXRef.StrainId = Strain.Id AND StrainXRef.InbredSetId = 1 AND Used_for_mapping='Y' ORDER BY StrainId;") ; (let [(row (get-row db))] ; (display row) ; ) ;(let [(result (get-rows-apply db (lambda (r) `(,(assoc-ref r "StrainId") . ,(assoc-ref r "Name"))) '()))] ; (display (car result))) ;; ---- fetch all known BXD sample/individual/strain names (define bxd-strains (bxd-strain-id-names #:map? #t)) ;(display (assoc 64728 bxd-strains)) ;(newline) ;(newline) ;(dbi-query db "SELECT * FROM ProbeSetXRef LIMIT 3") ;(let [(row (get-row db))] ; (display row) ; ) ;(db-check2 db) ;(newline) ;; ---- get first available dataset for precompute: ;; @@ order by dataid - recurse (define (get-trait db probeset-id) (dbi-query db (string-append "select Id,Chr,Mb,Name,Symbol,description from ProbeSet where Id=" (int-to-string probeset-id) " limit 1")) (get-row db) ) (define (run-precompute db prev-id) (let [(hit (get-precompute-hit db prev-id))] (if hit (let* [(data-id (assoc-ref hit "DataId")) (data-id-str (int-to-string data-id)) (probesetfreeze-id (assoc-ref hit "ProbeSetFreezeId")) (probeset-id (assoc-ref hit "ProbeSetId")) (trait (get-trait db probeset-id)) (trait-name (assoc-ref trait "Name")) (name (dataset-name db probesetfreeze-id)) ] (display hit) (display data-id) (newline) ;; ---- Get strains and phenotypes for this dataset (dbi-query db (string-append "SELECT StrainId,value from ProbeSetData WHERE Id=" data-id-str)) (define id_traits (get-rows-apply db (lambda (r) `(,(assoc-ref r "StrainId") . ,(assoc-ref r "value"))) '())) ;; ---- Now we need to make sure that all strains belong to BXD (define non-bxd (fold (lambda (strain lst) (let* [(id (car strain)) (name (assoc id bxd-strains))] (if name lst (append lst `(,name))))) '() id_traits)) (define traits (map (lambda (t) (match t ((id . value) (cons (assoc-ref bxd-strains id) value) ))) id_traits)) ;; (display traits) ;; (if (= 0 (length non-bxd)) (if (eq? non-bxd '()) (begin (set-precompute-hit-status! db data-id-str "GEMMA-START") (run-gemma "BXD" data-id probeset-id probesetfreeze-id name trait-name traits) (set-precompute-hit-status! db data-id-str "GEMMA-DONE")) ;; disable precompute if non-bxd, for now, so it won't try again (set-precompute-hit-status! db data-id-str "NON-BXD")) (run-precompute db data-id) ;; next round )))) (run-precompute db 0) ;; start precompute )))