diff options
Diffstat (limited to 'test')
| -rw-r--r-- | test/performance/releases.org | 103 | ||||
| -rwxr-xr-x | test/runner | 18 | ||||
| -rwxr-xr-x | test/test-mdb-integration.scm | 51 | ||||
| -rwxr-xr-x | test/test-uvlmm-integration.scm | 52 |
4 files changed, 224 insertions, 0 deletions
diff --git a/test/performance/releases.org b/test/performance/releases.org index b208e54..b9c451d 100644 --- a/test/performance/releases.org +++ b/test/performance/releases.org @@ -1,5 +1,108 @@ * GEMMA performance stats +** GEMMA 1.00-pre1 + + +Measurements taken on a recent AMD Ryzen 7 3700X 8-Core Processor @2.195GHz. + +Introducing mdb genotype format led to a 30% speed increase on the small mouse set: + +#+begin_src sh +real 0m6.403s +user 0m11.529s +sys 0m6.325s +#+end_src sh + +that may not look like much, but we are only starting! + +** Picking up the pieces + +We are facing a time regression. + +#+begin_src sh +premake5 gmake && make verbose=1 config=release -j 8 gemma && time LD_LIBRARY_PATH=$GUIX_ENVIRONMENT/lib ./build/bin/Release/gemma -g ./example/mouse_hs1940.geno.txt.mdb -p ./example/mouse_hs1940.pheno.txt -n 1 -a ./example/mouse_hs1940.anno.txt -k ./output/result.cXX.txt -lmm -no-check +#+end_src sh + +With openblas 0.3.21 we go a bit faster. Still 10% behind though, there is room for tweaking. It may actually be a new SSD. I want to run some bigger files first. + +#+begin_src sh +Pangemma --- GEMMA 0.98.5 compatible executable 1.0.0 (2025-11-22) with guile 3.0.9 by Xiang Zhou, Pjotr Prins and team (C) 2012-2025 +Reading Files ... +## number of total individuals = 1940 +## number of analyzed individuals = 1410 +## number of covariates = 1 +## number of phenotypes = 1 +## number of total SNPs/var = 12226 +## number of analyzed SNPs = 10768 +Start Eigen-Decomposition... +pve estimate =0.608801 +se(pve) =0.032774 +================================================== 100% +real 0m9.017s +user 0m13.168s +sys 0m5.919s +#+end_src sh + +Before it was + +#+begin_src sh +Pangemma --- GEMMA 0.98.5 compatible executable 1.0.0 (2025-11-22) with guile 3.0.9 by Xiang Zhou, Pjotr Prins and team (C) 2012-2025 +Reading Files ... +## number of total individuals = 1940 +## number of analyzed individuals = 1410 +## number of covariates = 1 +## number of phenotypes = 1 +## number of total SNPs/var = 12226 +## number of analyzed SNPs = 10768 +Start Eigen-Decomposition... +pve estimate =0.608801 +se(pve) =0.032774 +================================================== 100% +real 0m16.772s +user 0m25.443s +sys 0m0.901s +#+end_src sh + +The output looks the same. Good. So far the first difference is a much later openblas 0.3.30 (over 0.3.9). In the source code we added checkpoints and more debugging, particularly write statements. I disabled the latter, but still no dice. + +When compiled with the profiler library prefix the gemma run with + +#+begin_src sh +premake5 gmake && make verbose=1 config=debug -j 8 gemma && time CPUPROFILE=gemma.prof LD_LIBRARY_PATH=$GUIX_ENVIRONMENT/lib ./build/bin/Debug/gemma -g ./example/mouse_hs1940.geno.txt.gz -p ./example/mouse_hs1940.pheno.txt -n 1 -a ./example/mouse_hs1940.anno.txt -k ./output/result.cXX.txt -lmm -no-check -debug +CPUPROFILE=gemma.prof +pprof --text build/bin/Debug/gemma gemma.prof + + 1007 49.2% 49.2% 1015 49.6% dot_compute + 94 4.6% 53.8% 94 4.6% rpcc + 74 3.6% 57.5% 74 3.6% gsl_vector_div + 62 3.0% 60.5% 92 4.5% ____strtod_l_internal + 42 2.1% 62.5% 42 2.1% dgemm_kernel_ZEN +#+end_src sh + +this led me to try the newer openblas on the older gemma - and indeed, the regression is coming from the openblas version. Even though it says 'OpenBLAS 0.3.30 DYNAMIC_ARCH NO_AFFINITY Zen MAX_THREADS=128' I suspect the dynamic arch is not really optimizing. + +Well, at least I found the problem. Time for a special openblas build like I used to do. + + +*** Bigger run + +We translate this 10Gb (gzip compressed) job from our pangenome precompute + +``` +/bin/gemma -loco 3 -k /export2/data/wrk/services/gemma-wrapper/tmp/tmp/panlmm/93f6b39ec06c09fb9ba9ca628b5fb990921b6c60.3.cXX.txt.cXX.txt -o a3248cec40b3fe6b9e8672352b3ab2d7280c426c.3.assoc.txt -p pheno.json.txt -g pangenome-13M-genotypes.txt -a snps-matched.txt -lmm 9 -maf 0.1 -n 2 -outdir /export2/data/wrk/services/gemma-wrapper/tmp/tmp/panlmm/d20251126-4190721-c8bbo8 +``` + +to + +``` +time LD_LIBRARY_PATH=$GUIX_ENVIRONMENT/lib ./build/bin/Release/gemma -g tmp/pangenome-13M-genotypes.txt -p tmp/pheno.json.txt -n 1 -a tmp/snps-matched.txt -k tmp/93f6b39ec06c09fb9ba9ca628b5fb990921b6c60.3.cXX.txt.cXX.txt -lmm 9 -no-check +real 20m4.687s +user 23m42.508s +sys 9m51.929s +``` + +On my AMD Ryzen 7 3700X it uses about ~10Gb of RAM. With the -debug switch it clapped out because of sqrt(NaN). There is a lot that can be gained with better IO and multi-core use. + ** GEMMA 0.98.5-pre1 Measurements taken on a recent AMD Ryzen 7 3700X 8-Core Processor @2.195GHz. diff --git a/test/runner b/test/runner new file mode 100755 index 0000000..5002d80 --- /dev/null +++ b/test/runner @@ -0,0 +1,18 @@ +#!/bin/sh +# -*- mode: scheme; -*- +exec guile --debug -s "$0" "$@" +!# + +(define-module (test-runner) + #:use-module (ice-9 match) + #:use-module (srfi srfi-1) ; for last + #:use-module (srfi srfi-13) + #:use-module (srfi srfi-64) ; for tests + #:use-module (ice-9 rdelim) + ) + +(test-begin "all-tests") + +(load "test-uvlmm-integration.scm") + +(test-end "all-tests") diff --git a/test/test-mdb-integration.scm b/test/test-mdb-integration.scm new file mode 100755 index 0000000..006c241 --- /dev/null +++ b/test/test-mdb-integration.scm @@ -0,0 +1,51 @@ +#!/bin/sh +# -*- mode: scheme; -*- +exec guile --debug -s "$0" "$@" +!# + +(define-module (test-runner) + #:use-module (ice-9 match) + #:use-module (srfi srfi-1) ; for last + #:use-module (srfi srfi-13) + #:use-module (srfi srfi-64) ; for tests + #:use-module (ice-9 rdelim) + ) + +(define kinship-fn "./output/mouse_hs1940.cXX.txt") +(define gwa-fn "./output/mouse_hs1940.assoc.txt") + +(test-begin "uvlmm-mdb-kinship-run") + +(when (file-exists? kinship-fn) + (delete-file kinship-fn)) +(let [(err (system "./build/bin/Debug/gemma -g ./example/mouse_hs1940.geno.mdb -p ./example/mouse_hs1940.pheno.txt -gk -o mouse_hs1940 -debug"))] + (test-eqv 0 err)) + +(test-end "uvlmm-mdb-kinship-run") + +(test-begin "uvlmm-mdb-gwa-run") + +(when (file-exists? gwa-fn) + (delete-file gwa-fn)) +;; The following integration test runs gemma uvlmm and adds up the output column as a check. +;; It uses the kinship-run matrix from the earlier test +(let [(err (system (string-append "./build/bin/Debug/gemma -g ./example/mouse_hs1940.geno.mdb -p ./example/mouse_hs1940.pheno.txt -n 1 -a ./example/mouse_hs1940.anno.txt -k " kinship-fn " -o mouse_hs1940 -lmm 9 -debug")))] + (test-eqv 0 err)) +(call-with-input-file gwa-fn + (lambda (port) + (read-line port) ; skip first line + (let* ((fields (string-split (read-line port) #\tab)) + (last-field (last fields))) + (test-eqv 208.0 (truncate (* 1000 (string->number last-field))))) + (test-eqv 5720672.0 + (let loop ((line (read-line port)) + (sum 208.0)) + (if (eof-object? line) + sum + (let* ((fields (string-split line #\tab)) + (last-field (last fields)) + (value (string->number last-field))) + (loop (read-line port) + (+ sum (truncate (* 1000 value)))))))))) + +(test-end "uvlmm-mdb-gwa-run") diff --git a/test/test-uvlmm-integration.scm b/test/test-uvlmm-integration.scm new file mode 100755 index 0000000..91eb14a --- /dev/null +++ b/test/test-uvlmm-integration.scm @@ -0,0 +1,52 @@ +#!/bin/sh +# -*- mode: scheme; -*- +exec guile --debug -s "$0" "$@" +!# + +(define-module (test-runner) + #:use-module (ice-9 match) + #:use-module (srfi srfi-1) ; for last + #:use-module (srfi srfi-13) + #:use-module (srfi srfi-64) ; for tests + #:use-module (ice-9 rdelim) + ) + +(define kinship-fn "./output/mouse_hs1940.cXX.txt") +(define gwa-fn "./output/mouse_hs1940.assoc.txt") + +(test-begin "uvlmm-bimbam-kinship-run") + +(when (file-exists? kinship-fn) + (delete-file kinship-fn)) +(let [(err (system "./build/bin/Debug/gemma -g ./example/mouse_hs1940.geno.txt.gz -gk -p ./example/mouse_hs1940.pheno.txt -o mouse_hs1940 -debug"))] + (test-eqv 0 err)) + +(test-end "uvlmm-bimbam-kinship-run") + + +(test-begin "uvlmm-bimbam-gwa-run") + +(when (file-exists? gwa-fn) + (delete-file gwa-fn)) +;; The following integration test runs gemma uvlmm and adds up the output column as a check. +;; It uses the kinship-run matrix from the earlier test +(let [(err (system (string-append "./build/bin/Debug/gemma -g ./example/mouse_hs1940.geno.txt.gz -p ./example/mouse_hs1940.pheno.txt -n 1 -a ./example/mouse_hs1940.anno.txt -k " kinship-fn " -o mouse_hs1940 -lmm 9 -debug")))] + (test-eqv 0 err)) +(call-with-input-file gwa-fn + (lambda (port) + (read-line port) ; skip first line + (let* ((fields (string-split (read-line port) #\tab)) + (last-field (last fields))) + (test-eqv 208.0 (truncate (* 1000 (string->number last-field))))) + (test-eqv 5720672.0 + (let loop ((line (read-line port)) + (sum 208.0)) + (if (eof-object? line) + sum + (let* ((fields (string-split line #\tab)) + (last-field (last fields)) + (value (string->number last-field))) + (loop (read-line port) + (+ sum (truncate (* 1000 value)))))))))) + +(test-end "uvlmm-bimbam-gwa-run") |
