diff options
| -rw-r--r-- | guix.scm | 92 | ||||
| -rw-r--r-- | premake5.lua | 4 | ||||
| -rw-r--r-- | test/performance/releases.org | 12 |
3 files changed, 100 insertions, 8 deletions
diff --git a/guix.scm b/guix.scm index e142d7e..6dcaa29 100644 --- a/guix.scm +++ b/guix.scm @@ -19,6 +19,8 @@ #:use-module (guix packages) #:use-module (guix git-download) #:use-module (guix build-system gnu) + #:use-module (guix utils) + #:use-module (gnu packages algebra) #:use-module (gnu packages base) #:use-module (gnu packages build-tools) @@ -27,6 +29,7 @@ #:use-module (gnu packages check) #:use-module (gnu packages cpp) #:use-module (gnu packages databases) + #:use-module (gnu packages gcc) #:use-module (gnu packages gdb) #:use-module (gnu packages guile) #:use-module (gnu packages guile-xyz) @@ -47,6 +50,90 @@ (define %pangemma-version (read-string (open-pipe "cat VERSION|tr -d $'\n'" OPEN_READ))) +(define-public openblas-pangemma +;; we are fixating on an older openblas, for now + (package + (name "openblas-pangemma") + (version "0.3.21") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/xianyi/OpenBLAS") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "0yx1axiki12y0xz0d5s76vvl7ds36k0npv1sww08k2qslhz1g9qp")))) + (build-system gnu-build-system) + (properties `((tunable? . #t))) + (arguments + (list + #:tests? #f ;; skip tests + #:test-target "test" + ;; No default baseline is supplied for powerpc-linux. + #:substitutable? (not (target-ppc32?)) + #:make-flags + #~(list (string-append "PREFIX=" #$output) + (string-append "CFLAGS=-O3 -g -Wno-error -Wno-error=implicit-function-declaration") + "SHELL=bash" + "MAKE_NB_JOBS=0" ;use jobserver for submakes + + ;; This is the maximum number of threads OpenBLAS will ever use (that + ;; is, if $OPENBLAS_NUM_THREADS is greater than that, then NUM_THREADS + ;; is used.) If we don't set it, the makefile sets it to the number + ;; of cores of the build machine, which is obviously wrong. + "NUM_THREADS=128" + + ;; DYNAMIC_ARCH is only supported on some architectures. + ;; DYNAMIC_ARCH combined with TARGET=GENERIC provides a library + ;; which uses the optimizations for the detected CPU. This can + ;; be overridden at runtime with the environment variable + ;; OPENBLAS_CORETYPE=<type>, where "type" is a supported CPU + ;; type. On other architectures we target only the baseline CPU + ;; supported by Guix. + #$@(cond + ((or (target-x86-64?) + (target-x86-32?) + (target-ppc64le?) + (target-aarch64?)) + ;; Dynamic older enables a few extra CPU architectures + ;; on x86_64 that were released before 2010. + '("DYNAMIC_ARCH=1" "DYNAMIC_OLDER=1" "TARGET=GENERIC")) + ;; On some of these architectures the CPU type can't be detected. + ;; We list the oldest CPU core we want to have support for. + ;; On MIPS we force the "SICORTEX" TARGET, as for the other + ;; two available MIPS targets special extended instructions + ;; for Loongson cores are used. + ((target-mips64el?) + '("TARGET=SICORTEX")) + ((target-arm32?) + '("TARGET=ARMV7")) + ((target-riscv64?) + '("TARGET=RISCV64_GENERIC")) + (else '()))) + ;; no configure script + #:phases + #~(modify-phases %standard-phases + (delete 'configure) + (add-before 'build 'set-extralib + (lambda* (#:key inputs #:allow-other-keys) + ;; Get libgfortran found when building in utest. + (setenv "FEXTRALIB" + (string-append + "-L" + (dirname + (search-input-file inputs "/lib/libgfortran.so"))))))))) + (inputs + (list `(,gfortran "lib"))) + (native-inputs + (list cunit gfortran perl)) + (home-page "https://www.openblas.net/") + (synopsis "Optimized BLAS library based on GotoBLAS") + (description + "OpenBLAS is a BLAS library forked from the GotoBLAS2-1.13 BSD version.") + (license license:bsd-3))) + (define-public pangemma-base-git "Pangemma base build package" (package @@ -54,10 +141,9 @@ (version (git-version %pangemma-version "HEAD" %git-commit)) (source (local-file %source-dir #:recursive? #t)) (build-system gnu-build-system) - (properties `((tunable? . #t))) (inputs (list gsl - openblas + openblas-pangemma guile-3.0 `(,guile-3.0 "debug") ;; `(,guile-3.0 "dev") @@ -129,7 +215,7 @@ genome-wide association studies (GWAS).") (list catch2 gdb gsl - openblas + openblas-pangemma zlib)) ;; ("gsl-static" ,gsl-static) ;; ("zlib:static" ,zlib "static") diff --git a/premake5.lua b/premake5.lua index 1091cd8..3ccbdbc 100644 --- a/premake5.lua +++ b/premake5.lua @@ -39,10 +39,14 @@ workspace "PanGemma" filter "configurations:Debug" defines { "DEBUG" } + buildoptions { pkg_cpp_flags } + linkoptions { pkg_linker_flags } symbols "On" filter "configurations:Release" defines { "NDEBUG", "HAVE_INLINE" } + buildoptions { pkg_cpp_flags } + linkoptions { pkg_linker_flags } buildoptions { "-pthread", "-Wall" } optimize "Speed" diff --git a/test/performance/releases.org b/test/performance/releases.org index c973607..af0cbb7 100644 --- a/test/performance/releases.org +++ b/test/performance/releases.org @@ -29,16 +29,18 @@ sys 0m0.901s The output looks the same. Good. So far the first difference is a much later openblas 0.3.30 (over 0.3.9). In the source code we added checkpoints and more debugging, particularly write statements. I disabled the latter, but still no dice. -When compiled with the profile library prefix the gemma run with +When compiled with the profiler library prefix the gemma run with #+begin_src sh +premake5 gmake2 && make verbose=1 config=debug -j 8 gemma && time CPUPROFILE=gemma.prof LD_LIBRARY_PATH=$GUIX_ENVIRONMENT/lib ./build/bin/Debug/gemma -g ./example/mouse_hs1940.geno.txt.gz -p ./example/mouse_hs1940.pheno.txt -n 1 -a ./example/mouse_hs1940.anno.txt -k ./output/result.cXX.txt -lmm -no-check -debug CPUPROFILE=gemma.prof pprof --text build/bin/Debug/gemma gemma.prof - 1024 50.7% 50.7% 1024 50.7% dcopy_k_ZEN - 99 4.9% 55.6% 99 4.9% openblas_read_env - 67 3.3% 58.9% 107 5.3% ____strtod_l_internal - 67 3.3% 62.3% 67 3.3% gsl_vector_div + 1007 49.2% 49.2% 1015 49.6% dot_compute + 94 4.6% 53.8% 94 4.6% rpcc + 74 3.6% 57.5% 74 3.6% gsl_vector_div + 62 3.0% 60.5% 92 4.5% ____strtod_l_internal + 42 2.1% 62.5% 42 2.1% dgemm_kernel_ZEN #+end_src sh this led me to try the newer openblas on the older gemma - and indeed, the regression is coming from the openblas version. Even though it says 'OpenBLAS 0.3.30 DYNAMIC_ARCH NO_AFFINITY Zen MAX_THREADS=128' I suspect the dynamic arch is not really optimizing. |
