about summary refs log tree commit diff
diff options
context:
space:
mode:
authorPjotr Prins2025-11-25 14:38:44 +0100
committerPjotr Prins2025-11-25 14:38:44 +0100
commitbe88d7a399660f3c4ddd6bce502f5b39c61b3298 (patch)
tree708638e5f2e97c746659414944196121409264de
parentf03c82ea21acda54de8cced07ba8150cfafb3769 (diff)
downloadpangemma-be88d7a399660f3c4ddd6bce502f5b39c61b3298.tar.gz
Fixing openblas regression by bringing in optimized package
-rw-r--r--guix.scm92
-rw-r--r--premake5.lua4
-rw-r--r--test/performance/releases.org12
3 files changed, 100 insertions, 8 deletions
diff --git a/guix.scm b/guix.scm
index e142d7e..6dcaa29 100644
--- a/guix.scm
+++ b/guix.scm
@@ -19,6 +19,8 @@
   #:use-module (guix packages)
   #:use-module (guix git-download)
   #:use-module (guix build-system gnu)
+  #:use-module (guix utils)
+
   #:use-module (gnu packages algebra)
   #:use-module (gnu packages base)
   #:use-module (gnu packages build-tools)
@@ -27,6 +29,7 @@
   #:use-module (gnu packages check)
   #:use-module (gnu packages cpp)
   #:use-module (gnu packages databases)
+  #:use-module (gnu packages gcc)
   #:use-module (gnu packages gdb)
   #:use-module (gnu packages guile)
   #:use-module (gnu packages guile-xyz)
@@ -47,6 +50,90 @@
 (define %pangemma-version
     (read-string (open-pipe "cat VERSION|tr -d $'\n'" OPEN_READ)))
 
+(define-public openblas-pangemma
+;; we are fixating on an older openblas, for now
+  (package
+    (name "openblas-pangemma")
+    (version "0.3.21")
+    (source
+     (origin
+       (method git-fetch)
+       (uri (git-reference
+             (url "https://github.com/xianyi/OpenBLAS")
+             (commit (string-append "v" version))))
+       (file-name (git-file-name name version))
+       (sha256
+        (base32
+         "0yx1axiki12y0xz0d5s76vvl7ds36k0npv1sww08k2qslhz1g9qp"))))
+    (build-system gnu-build-system)
+    (properties `((tunable? . #t)))
+    (arguments
+     (list
+      #:tests? #f ;; skip tests
+      #:test-target "test"
+      ;; No default baseline is supplied for powerpc-linux.
+      #:substitutable? (not (target-ppc32?))
+      #:make-flags
+      #~(list (string-append "PREFIX=" #$output)
+              (string-append "CFLAGS=-O3 -g -Wno-error -Wno-error=implicit-function-declaration")
+              "SHELL=bash"
+              "MAKE_NB_JOBS=0"          ;use jobserver for submakes
+
+              ;; This is the maximum number of threads OpenBLAS will ever use (that
+              ;; is, if $OPENBLAS_NUM_THREADS is greater than that, then NUM_THREADS
+              ;; is used.)  If we don't set it, the makefile sets it to the number
+              ;; of cores of the build machine, which is obviously wrong.
+              "NUM_THREADS=128"
+
+              ;; DYNAMIC_ARCH is only supported on some architectures.
+              ;; DYNAMIC_ARCH combined with TARGET=GENERIC provides a library
+              ;; which uses the optimizations for the detected CPU.  This can
+              ;; be overridden at runtime with the environment variable
+              ;; OPENBLAS_CORETYPE=<type>, where "type" is a supported CPU
+              ;; type.  On other architectures we target only the baseline CPU
+              ;; supported by Guix.
+              #$@(cond
+                    ((or (target-x86-64?)
+                         (target-x86-32?)
+                         (target-ppc64le?)
+                         (target-aarch64?))
+                     ;; Dynamic older enables a few extra CPU architectures
+                     ;; on x86_64 that were released before 2010.
+                     '("DYNAMIC_ARCH=1" "DYNAMIC_OLDER=1" "TARGET=GENERIC"))
+                    ;; On some of these architectures the CPU type can't be detected.
+                    ;; We list the oldest CPU core we want to have support for.
+                    ;; On MIPS we force the "SICORTEX" TARGET, as for the other
+                    ;; two available MIPS targets special extended instructions
+                    ;; for Loongson cores are used.
+                    ((target-mips64el?)
+                     '("TARGET=SICORTEX"))
+                    ((target-arm32?)
+                     '("TARGET=ARMV7"))
+                    ((target-riscv64?)
+                     '("TARGET=RISCV64_GENERIC"))
+                    (else '())))
+      ;; no configure script
+      #:phases
+      #~(modify-phases %standard-phases
+          (delete 'configure)
+          (add-before 'build 'set-extralib
+            (lambda* (#:key inputs #:allow-other-keys)
+              ;; Get libgfortran found when building in utest.
+              (setenv "FEXTRALIB"
+                      (string-append
+                       "-L"
+                       (dirname
+                        (search-input-file inputs "/lib/libgfortran.so")))))))))
+    (inputs
+     (list `(,gfortran "lib")))
+    (native-inputs
+     (list cunit gfortran perl))
+    (home-page "https://www.openblas.net/")
+    (synopsis "Optimized BLAS library based on GotoBLAS")
+    (description
+     "OpenBLAS is a BLAS library forked from the GotoBLAS2-1.13 BSD version.")
+    (license license:bsd-3)))
+
 (define-public pangemma-base-git
   "Pangemma base build package"
   (package
@@ -54,10 +141,9 @@
     (version (git-version %pangemma-version "HEAD" %git-commit))
     (source (local-file %source-dir #:recursive? #t))
     (build-system gnu-build-system)
-    (properties `((tunable? . #t)))
     (inputs
      (list gsl
-           openblas
+           openblas-pangemma
            guile-3.0
            `(,guile-3.0 "debug")
            ;; `(,guile-3.0 "dev")
@@ -129,7 +215,7 @@ genome-wide association studies (GWAS).")
      (list catch2
            gdb
            gsl
-           openblas
+           openblas-pangemma
            zlib))
        ;; ("gsl-static" ,gsl-static)
        ;; ("zlib:static" ,zlib "static")
diff --git a/premake5.lua b/premake5.lua
index 1091cd8..3ccbdbc 100644
--- a/premake5.lua
+++ b/premake5.lua
@@ -39,10 +39,14 @@ workspace "PanGemma"
 
    filter "configurations:Debug"
       defines { "DEBUG" }
+      buildoptions { pkg_cpp_flags }
+      linkoptions { pkg_linker_flags }
       symbols "On"
 
    filter "configurations:Release"
       defines { "NDEBUG", "HAVE_INLINE" }
+      buildoptions { pkg_cpp_flags }
+      linkoptions { pkg_linker_flags }
       buildoptions { "-pthread", "-Wall" }
       optimize "Speed"
 
diff --git a/test/performance/releases.org b/test/performance/releases.org
index c973607..af0cbb7 100644
--- a/test/performance/releases.org
+++ b/test/performance/releases.org
@@ -29,16 +29,18 @@ sys     0m0.901s
 
 The output looks the same. Good. So far the first difference is a much later openblas 0.3.30 (over 0.3.9). In the source code we added checkpoints and more debugging, particularly write statements. I disabled the latter, but still no dice.
 
-When compiled with the profile library prefix the gemma run with
+When compiled with the profiler library prefix the gemma run with
 
 #+begin_src sh
+premake5 gmake2 && make verbose=1 config=debug -j 8 gemma && time CPUPROFILE=gemma.prof LD_LIBRARY_PATH=$GUIX_ENVIRONMENT/lib ./build/bin/Debug/gemma -g ./example/mouse_hs1940.geno.txt.gz -p ./example/mouse_hs1940.pheno.txt -n 1 -a ./example/mouse_hs1940.anno.txt -k ./output/result.cXX.txt -lmm -no-check -debug
 CPUPROFILE=gemma.prof
 pprof --text build/bin/Debug/gemma gemma.prof
 
-    1024  50.7%  50.7%     1024  50.7% dcopy_k_ZEN
-      99   4.9%  55.6%       99   4.9% openblas_read_env
-      67   3.3%  58.9%      107   5.3% ____strtod_l_internal
-      67   3.3%  62.3%       67   3.3% gsl_vector_div
+    1007  49.2%  49.2%     1015  49.6% dot_compute
+      94   4.6%  53.8%       94   4.6% rpcc
+      74   3.6%  57.5%       74   3.6% gsl_vector_div
+      62   3.0%  60.5%       92   4.5% ____strtod_l_internal
+      42   2.1%  62.5%       42   2.1% dgemm_kernel_ZEN
 #+end_src sh
 
 this led me to try the newer openblas on the older gemma - and indeed, the regression is coming from the openblas version. Even though it says 'OpenBLAS 0.3.30 DYNAMIC_ARCH NO_AFFINITY Zen MAX_THREADS=128' I suspect the dynamic arch is not really optimizing.