From 01fa01a3553eeadbdd56e11f5fcd020f4dd71310 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 28 Nov 2025 15:24:19 +0100 Subject: Simplify a bit and make guix.scm work without running out of RAM --- guix.scm | 251 --------------------------------------------------------- guix/guix.scm | 253 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/param.cpp | 9 +-- src/param.h | 3 - 4 files changed, 257 insertions(+), 259 deletions(-) delete mode 100644 guix.scm create mode 100644 guix/guix.scm diff --git a/guix.scm b/guix.scm deleted file mode 100644 index 1efe946..0000000 --- a/guix.scm +++ /dev/null @@ -1,251 +0,0 @@ -;; To use this file to build HEAD of gemma: -;; -;; guix build -f guix.scm # default builds pangemma-git -;; -;; To get a development container (e.g., run in emacs shell). -;; -;; guix shell -C -D -F -v 3 -f guix.scm # pangemma-shell-git -;; -;; optimized for arch: -;; -;; guix shell --tune=native -C -D -F -v 3 # pangemma-shell-git -;; -;; see premake5.lua header for examples. -;; -;; To optimize use guix --tune=march-type (e.g. --tune=native) - -(define-module (guix) - #:use-module ((guix licenses) #:prefix license:) - #:use-module (guix gexp) - #:use-module (guix packages) - #:use-module (guix git-download) - #:use-module (guix build-system gnu) - #:use-module (guix utils) - - #:use-module (gnu packages algebra) - #:use-module (gnu packages base) - #:use-module (gnu packages build-tools) - #:use-module (gnu packages compression) - #:use-module (gnu packages commencement) - #:use-module (gnu packages check) - #:use-module (gnu packages cpp) - #:use-module (gnu packages databases) - #:use-module (gnu packages gcc) - #:use-module (gnu packages gdb) - #:use-module (gnu packages guile) - #:use-module (gnu packages guile-xyz) - #:use-module (gnu packages maths) - #:use-module (gnu packages ninja) - #:use-module (gnu packages perl) - #:use-module (gnu packages pkg-config) - #:use-module (gnu packages ruby) - #:use-module (srfi srfi-1) - #:use-module (ice-9 popen) - #:use-module (ice-9 rdelim)) - -(define %source-dir (dirname (current-filename))) - -(define %git-commit - (read-string (open-pipe "git describe --always --tags --long|tr -d $'\n'" OPEN_READ))) - -(define %pangemma-version - (read-string (open-pipe "cat VERSION|tr -d $'\n'" OPEN_READ))) - -(define-public openblas-pangemma -;; we are fixating on an older openblas, for now - (package - (name "openblas-pangemma") - (version "0.3.21") - (source - (origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/xianyi/OpenBLAS") - (commit (string-append "v" version)))) - (file-name (git-file-name name version)) - (sha256 - (base32 - "0yx1axiki12y0xz0d5s76vvl7ds36k0npv1sww08k2qslhz1g9qp")))) - (build-system gnu-build-system) - (properties `((tunable? . #t))) - (arguments - (list - #:tests? #f ;; skip tests - #:test-target "test" - ;; No default baseline is supplied for powerpc-linux. - #:substitutable? (not (target-ppc32?)) - #:make-flags - #~(list (string-append "PREFIX=" #$output) - (string-append "CFLAGS=-O3 -g -Wno-incompatible-pointer-types -Wno-error=implicit-function-declaration") - "COPT=" - "COMMON_OPT=" - "DYNAMIC_ARCH=" - "SHELL=bash" - "MAKE_NB_JOBS=0" ;use jobserver for submakes - - ;; This is the maximum number of threads OpenBLAS will ever use (that - ;; is, if $OPENBLAS_NUM_THREADS is greater than that, then NUM_THREADS - ;; is used.) If we don't set it, the makefile sets it to the number - ;; of cores of the build machine, which is obviously wrong. - "NUM_THREADS=128" - - ;; DYNAMIC_ARCH is only supported on some architectures. - ;; DYNAMIC_ARCH combined with TARGET=GENERIC provides a library - ;; which uses the optimizations for the detected CPU. This can - ;; be overridden at runtime with the environment variable - ;; OPENBLAS_CORETYPE=, where "type" is a supported CPU - ;; type. On other architectures we target only the baseline CPU - ;; supported by Guix. - #$@(cond - ((or (target-x86-64?) - (target-x86-32?) - (target-ppc64le?) - (target-aarch64?)) - ;; Dynamic older enables a few extra CPU architectures - ;; on x86_64 that were released before 2010. - '("DYNAMIC_ARCH=1" "TARGET=GENERIC")) - ;; '("DYNAMIC_ARCH=" "TARGET_CORE=ZEN")) - ;; On some of these architectures the CPU type can't be detected. - ;; We list the oldest CPU core we want to have support for. - ;; On MIPS we force the "SICORTEX" TARGET, as for the other - ;; two available MIPS targets special extended instructions - ;; for Loongson cores are used. - ((target-mips64el?) - '("TARGET=SICORTEX")) - ((target-arm32?) - '("TARGET=ARMV7")) - ((target-riscv64?) - '("TARGET=RISCV64_GENERIC")) - (else '()))) - ;; no configure script - #:phases - #~(modify-phases %standard-phases - (delete 'configure) - (add-before 'build 'set-extralib - (lambda* (#:key inputs #:allow-other-keys) - ;; Get libgfortran found when building in utest. - (setenv "FEXTRALIB" - (string-append - "-L" - (dirname - (search-input-file inputs "/lib/libgfortran.so"))))))))) - (inputs - (list `(,gfortran "lib"))) - (native-inputs - (list cunit gfortran perl)) - (home-page "https://www.openblas.net/") - (synopsis "Optimized BLAS library based on GotoBLAS") - (description - "OpenBLAS is a BLAS library forked from the GotoBLAS2-1.13 BSD version.") - (license license:bsd-3))) - -(define-public pangemma-base-git - "Pangemma base build package" - (package - (name "pangemma-git") - (version (git-version %pangemma-version "HEAD" %git-commit)) - (source (local-file %source-dir #:recursive? #t)) - (build-system gnu-build-system) - (inputs - (list gsl - openblas-pangemma - guile-3.0 - `(,guile-3.0 "debug") - ;; `(,guile-3.0 "dev") - guile-lmdb - lmdb - pkg-config - ninja - ruby - zlib)) - ;; (propagated-inputs - ;; (list - ;; `("guile" ,guile-3.0-latest) - ;; `("guile-debug" ,guile-3.0-latest "debug") - ;; `("guile" ,guile-3.0-latest "dev"))) - - ;; ("gsl-static" ,gsl-static) - ;; ("zlib:static" ,zlib "static") - (arguments - `(#:phases - (modify-phases %standard-phases - (delete 'configure) - (delete 'validate-runpath) - (add-before 'build 'bin-mkdir - (lambda _ - (mkdir-p "bin") - )) - (replace 'install - (lambda* (#:key outputs #:allow-other-keys) - (let ((out (assoc-ref outputs "out"))) - (install-file "bin/gemma" (string-append out "/bin")))))) - #:tests? #t - #:parallel-tests? #f)) - (home-page "https://git.genenetwork.org/pangemma/") - (synopsis "Tool for genome-wide efficient mixed model association") - (description "New version of Genome-wide Efficient Mixed Model Association (PANGEMMA) -provides a standard linear mixed model resolver with application in -genome-wide association studies (GWAS).") - (license license:gpl3))) - -(define-public pangemma-shell-git - "Shell version for development" - (package - (inherit pangemma-base-git) - (name "pangemma-shell-git") - (build-system gnu-build-system) - (propagated-inputs - (modify-inputs (package-inputs pangemma-base-git) - (append which binutils coreutils gcc-toolchain premake5 gnu-make gdb gperftools ;; for the shell - ))) - (arguments - `(#:phases (modify-phases %standard-phases - (delete 'configure) - (delete 'build) - (delete 'package) - (delete 'check) - (delete 'install)))) - (description "Pangemma shell for development") - )) - -;; ---- legacy build ----------------------------------------------------------------- -(define-public gemma-git - "Original legacy gemma -- for as long as it compiles" - (package - (name "gemma-git") - (version (git-version %pangemma-version "HEAD" %git-commit)) - (source (local-file %source-dir #:recursive? #t)) - (build-system gnu-build-system) - (inputs - (list catch2 - gdb - gsl - openblas-pangemma - zlib)) - ;; ("gsl-static" ,gsl-static) - ;; ("zlib:static" ,zlib "static") - (native-inputs ; for running tests - (list perl which)) - (arguments - `(#:phases - (modify-phases %standard-phases - (delete 'configure) - (delete 'validate-runpath) - (add-before 'build 'bin-mkdir - (lambda _ - (mkdir-p "bin") - )) - (replace 'install - (lambda* (#:key outputs #:allow-other-keys) - (let ((out (assoc-ref outputs "out"))) - (install-file "bin/gemma" (string-append out "/bin")))))) - #:tests? #t - #:parallel-tests? #f)) - (home-page "https://github.com/genetics-statistics") - (synopsis "Tool for genome-wide efficient mixed model association") - (description "Genome-wide Efficient Mixed Model Association (GEMMA) -provides a standard linear mixed model resolver with application in -genome-wide association studies (GWAS).") - (license license:gpl3))) - -pangemma-shell-git diff --git a/guix/guix.scm b/guix/guix.scm new file mode 100644 index 0000000..90ef001 --- /dev/null +++ b/guix/guix.scm @@ -0,0 +1,253 @@ +;; To use this file to build HEAD of gemma: +;; +;; guix build -f guix/guix.scm # default builds pangemma-git +;; +;; To get a development container (e.g., run in emacs shell). +;; +;; guix shell -C -D -F -f guix/guix.scm # pangemma-shell-git +;; +;; see premake5.lua for build/test instructions +;; +;; optimized for arch: +;; +;; guix shell --tune=native -C -D -F # pangemma-shell-git +;; +;; see premake5.lua header for examples. +;; +;; To optimize use guix --tune=march-type (e.g. --tune=native) + +(define-module (guix) + #:use-module ((guix licenses) #:prefix license:) + #:use-module (guix gexp) + #:use-module (guix packages) + #:use-module (guix git-download) + #:use-module (guix build-system gnu) + #:use-module (guix utils) + + #:use-module (gnu packages algebra) + #:use-module (gnu packages base) + #:use-module (gnu packages build-tools) + #:use-module (gnu packages compression) + #:use-module (gnu packages commencement) + #:use-module (gnu packages check) + #:use-module (gnu packages cpp) + #:use-module (gnu packages databases) + #:use-module (gnu packages gcc) + #:use-module (gnu packages gdb) + #:use-module (gnu packages guile) + #:use-module (gnu packages guile-xyz) + #:use-module (gnu packages maths) + #:use-module (gnu packages ninja) + #:use-module (gnu packages perl) + #:use-module (gnu packages pkg-config) + #:use-module (gnu packages ruby) + #:use-module (srfi srfi-1) + #:use-module (ice-9 popen) + #:use-module (ice-9 rdelim)) + +(define %source-dir (dirname (current-filename))) + +(define %git-commit + (read-string (open-pipe "git describe --always --tags --long|tr -d $'\n'" OPEN_READ))) + +(define %pangemma-version + (read-string (open-pipe "cat VERSION|tr -d $'\n'" OPEN_READ))) + +(define-public openblas-pangemma +;; we are fixating on an older openblas, for now + (package + (name "openblas-pangemma") + (version "0.3.21") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/xianyi/OpenBLAS") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "0yx1axiki12y0xz0d5s76vvl7ds36k0npv1sww08k2qslhz1g9qp")))) + (build-system gnu-build-system) + (properties `((tunable? . #t))) + (arguments + (list + #:tests? #f ;; skip tests + #:test-target "test" + ;; No default baseline is supplied for powerpc-linux. + #:substitutable? (not (target-ppc32?)) + #:make-flags + #~(list (string-append "PREFIX=" #$output) + (string-append "CFLAGS=-O3 -g -Wno-incompatible-pointer-types -Wno-error=implicit-function-declaration") + "COPT=" + "COMMON_OPT=" + "DYNAMIC_ARCH=" + "SHELL=bash" + "MAKE_NB_JOBS=0" ;use jobserver for submakes + + ;; This is the maximum number of threads OpenBLAS will ever use (that + ;; is, if $OPENBLAS_NUM_THREADS is greater than that, then NUM_THREADS + ;; is used.) If we don't set it, the makefile sets it to the number + ;; of cores of the build machine, which is obviously wrong. + "NUM_THREADS=128" + + ;; DYNAMIC_ARCH is only supported on some architectures. + ;; DYNAMIC_ARCH combined with TARGET=GENERIC provides a library + ;; which uses the optimizations for the detected CPU. This can + ;; be overridden at runtime with the environment variable + ;; OPENBLAS_CORETYPE=, where "type" is a supported CPU + ;; type. On other architectures we target only the baseline CPU + ;; supported by Guix. + #$@(cond + ((or (target-x86-64?) + (target-x86-32?) + (target-ppc64le?) + (target-aarch64?)) + ;; Dynamic older enables a few extra CPU architectures + ;; on x86_64 that were released before 2010. + '("DYNAMIC_ARCH=1" "TARGET=GENERIC")) + ;; '("DYNAMIC_ARCH=" "TARGET_CORE=ZEN")) + ;; On some of these architectures the CPU type can't be detected. + ;; We list the oldest CPU core we want to have support for. + ;; On MIPS we force the "SICORTEX" TARGET, as for the other + ;; two available MIPS targets special extended instructions + ;; for Loongson cores are used. + ((target-mips64el?) + '("TARGET=SICORTEX")) + ((target-arm32?) + '("TARGET=ARMV7")) + ((target-riscv64?) + '("TARGET=RISCV64_GENERIC")) + (else '()))) + ;; no configure script + #:phases + #~(modify-phases %standard-phases + (delete 'configure) + (add-before 'build 'set-extralib + (lambda* (#:key inputs #:allow-other-keys) + ;; Get libgfortran found when building in utest. + (setenv "FEXTRALIB" + (string-append + "-L" + (dirname + (search-input-file inputs "/lib/libgfortran.so"))))))))) + (inputs + (list `(,gfortran "lib"))) + (native-inputs + (list cunit gfortran perl)) + (home-page "https://www.openblas.net/") + (synopsis "Optimized BLAS library based on GotoBLAS") + (description + "OpenBLAS is a BLAS library forked from the GotoBLAS2-1.13 BSD version.") + (license license:bsd-3))) + +(define-public pangemma-base-git + "Pangemma base build package" + (package + (name "pangemma-git") + (version (git-version %pangemma-version "HEAD" %git-commit)) + (source (local-file %source-dir #:recursive? #t)) + (build-system gnu-build-system) + (inputs + (list gsl + openblas-pangemma + guile-3.0 + `(,guile-3.0 "debug") + ;; `(,guile-3.0 "dev") + guile-lmdb + lmdb + pkg-config + ninja + ruby + zlib)) + ;; (propagated-inputs + ;; (list + ;; `("guile" ,guile-3.0-latest) + ;; `("guile-debug" ,guile-3.0-latest "debug") + ;; `("guile" ,guile-3.0-latest "dev"))) + + ;; ("gsl-static" ,gsl-static) + ;; ("zlib:static" ,zlib "static") + (arguments + `(#:phases + (modify-phases %standard-phases + (delete 'configure) + (delete 'validate-runpath) + (add-before 'build 'bin-mkdir + (lambda _ + (mkdir-p "bin") + )) + (replace 'install + (lambda* (#:key outputs #:allow-other-keys) + (let ((out (assoc-ref outputs "out"))) + (install-file "bin/gemma" (string-append out "/bin")))))) + #:tests? #t + #:parallel-tests? #f)) + (home-page "https://git.genenetwork.org/pangemma/") + (synopsis "Tool for genome-wide efficient mixed model association") + (description "New version of Genome-wide Efficient Mixed Model Association (PANGEMMA) +provides a standard linear mixed model resolver with application in +genome-wide association studies (GWAS).") + (license license:gpl3))) + +(define-public pangemma-shell-git + "Shell version for development" + (package + (inherit pangemma-base-git) + (name "pangemma-shell-git") + (build-system gnu-build-system) + (propagated-inputs + (modify-inputs (package-inputs pangemma-base-git) + (append which binutils coreutils gcc-toolchain premake5 gnu-make gdb gperftools ;; for the shell + ))) + (arguments + `(#:phases (modify-phases %standard-phases + (delete 'configure) + (delete 'build) + (delete 'package) + (delete 'check) + (delete 'install)))) + (description "Pangemma shell for development") + )) + +;; ---- legacy build ----------------------------------------------------------------- +(define-public gemma-git + "Original legacy gemma -- for as long as it compiles" + (package + (name "gemma-git") + (version (git-version %pangemma-version "HEAD" %git-commit)) + (source (local-file %source-dir #:recursive? #t)) + (build-system gnu-build-system) + (inputs + (list catch2 + gdb + gsl + openblas-pangemma + zlib)) + ;; ("gsl-static" ,gsl-static) + ;; ("zlib:static" ,zlib "static") + (native-inputs ; for running tests + (list perl which)) + (arguments + `(#:phases + (modify-phases %standard-phases + (delete 'configure) + (delete 'validate-runpath) + (add-before 'build 'bin-mkdir + (lambda _ + (mkdir-p "bin") + )) + (replace 'install + (lambda* (#:key outputs #:allow-other-keys) + (let ((out (assoc-ref outputs "out"))) + (install-file "bin/gemma" (string-append out "/bin")))))) + #:tests? #t + #:parallel-tests? #f)) + (home-page "https://github.com/genetics-statistics") + (synopsis "Tool for genome-wide efficient mixed model association") + (description "Genome-wide Efficient Mixed Model Association (GEMMA) +provides a standard linear mixed model resolver with application in +genome-wide association studies (GWAS).") + (license license:gpl3))) + +pangemma-shell-git diff --git a/src/param.cpp b/src/param.cpp index 034b25c..017d588 100644 --- a/src/param.cpp +++ b/src/param.cpp @@ -323,7 +323,7 @@ void PARAM::ReadFiles(void) { trim_individuals(indicator_idv, ni_max); trim_individuals(indicator_cvt, ni_max); // The following reads the geno file to get the SNPs - only for BIMBAM - if (is_bimbam && ReadFile_bimbam_geno(file_geno, setSnps, W2, indicator_idv, indicator_snp, + if (ReadFile_bimbam_geno(file_geno, setSnps, W2, indicator_idv, indicator_snp, maf_level, miss_level, hwe_level, r2_level, mapRS2chr, mapRS2bp, mapRS2cM, snpInfo, ns_test) == false) { error = true; @@ -1285,8 +1285,7 @@ void PARAM::ReadBIMBAMGenotypes(gsl_matrix *UtX, gsl_matrix *K, const bool calc_ error = true; } } else { - // Read BIMBAM - if (is_bimbam && ReadFile_geno(file_geno, indicator_idv, indicator_snp, UtX, K, calc_K) == false) { + if (ReadFile_geno(file_geno, indicator_idv, indicator_snp, UtX, K, calc_K) == false) { error = true; } } @@ -1311,10 +1310,10 @@ void PARAM::CalcKin(gsl_matrix *matrix_kin) { file_str = file_geno; if (is_mdb) error = !BimbamKin(file_str, setKSnps, indicator_snp, a_mode - 20, d_pace, - matrix_kin, ni_max == 0) == false); + matrix_kin, ni_max == 0); else error = !BimbamKin(file_str, setKSnps, indicator_snp, a_mode - 20, d_pace, - matrix_kin, ni_max == 0) == false); + matrix_kin, ni_max == 0); } diff --git a/src/param.h b/src/param.h index 978196c..38b9430 100644 --- a/src/param.h +++ b/src/param.h @@ -372,7 +372,4 @@ public: size_t GetabIndex(const size_t a, const size_t b, const size_t n_cvt); -#define is_bimbam (!is_mdb) -#define compute_kinship (a_mode == M_KIN || a_mode == M_KIN2) - #endif -- cgit 1.4.1