aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.travis.yml58
-rw-r--r--INSTALL.md44
-rw-r--r--Makefile39
-rw-r--r--README.md45
-rw-r--r--src/bslmmdap.cpp19
-rw-r--r--src/debug.cpp72
-rw-r--r--src/debug.h41
-rw-r--r--src/eigenlib.cpp13
-rw-r--r--src/eigenlib.h4
-rw-r--r--src/fastblas.cpp210
-rw-r--r--src/fastblas.h14
-rw-r--r--src/fastopenblas.h24
-rw-r--r--src/gemma.cpp303
-rw-r--r--src/io.cpp1088
-rw-r--r--src/io.h20
-rw-r--r--src/ldr.cpp7
-rw-r--r--src/lm.cpp246
-rw-r--r--src/lm.h3
-rw-r--r--src/lmm.cpp827
-rw-r--r--src/lmm.h24
-rw-r--r--src/logistic.cpp29
-rw-r--r--src/main.cpp2
-rw-r--r--src/mathfunc.cpp55
-rw-r--r--src/mathfunc.h9
-rw-r--r--src/mvlmm.cpp586
-rw-r--r--src/param.cpp97
-rw-r--r--src/param.h18
-rw-r--r--src/prdt.cpp2
-rw-r--r--src/vc.cpp39
-rwxr-xr-xtest/dev_test_suite.sh48
-rw-r--r--test/src/unittests-math.cpp117
-rwxr-xr-xtest/test_suite.sh6
32 files changed, 1381 insertions, 2728 deletions
diff --git a/.travis.yml b/.travis.yml
index ec2d049..3607992 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,46 +1,52 @@
language: C++
-compiler: gcc
matrix:
+ # OSX testing is under development
+ # allow_failures:
+ # - os: osx
include:
- os: linux
+ compiler: gcc
addons:
apt:
sources:
- ubuntu-toolchain-r-test
packages:
+ # Our dev environment is a more recent GNU C++ and GSL2
- g++-4.9
+ - libopenblas-dev
+ - zlib1g-dev
+ - libeigen3-dev
+ - libgsl0-dev
+ - liblapack-dev
+ # - gfortran-dev for static
env:
- - MATRIX_EVAL="CC=gcc-4.9 && CXX=g++-4.9"
- - os: linux
- addons:
- apt:
- sources:
- - ubuntu-toolchain-r-test
- packages:
- - g++-6
+ - MATRIX_EVAL="CC=gcc-4.9 && CXX=g++-4.9 && EIGEN_INCLUDE_PATH=/usr/include/eigen3"
+ - os: osx
+ compiler: clang
env:
- - MATRIX_EVAL="CC=gcc-6 && CXX=g++-6"
+ - MATRIX_EVAL="EIGEN_INCLUDE_PATH=/usr/local/include/eigen3"
+# - os: linux
+# addons:
+# apt:
+# sources:
+# - ubuntu-toolchain-r-test
+# packages:
+# - g++-6
+# env:
+# - MATRIX_EVAL="CC=gcc-6 && CXX=g++-6"
before_install:
- - sudo apt-get -qq update
- - sudo apt-get install -y libopenblas-dev zlib1g-dev
- - sudo apt-get install -y libeigen3-dev
- - sudo apt-get install -y libgsl0-dev
- - sudo apt-get install -y liblapack-dev
- # for the static release version we need the following
- # - sudo apt-get install -y gfortran-dev
- - dpkg -l
- - eval "${MATRIX_EVAL}"
- - $CXX --version
+ - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update && brew install gsl openblas zlib eigen lapack ; fi
script:
+ - echo $MATRIX_EVAL
- eval "${MATRIX_EVAL}"
- $CXX --version
# build and test debug version
- - make CXX=$CXX WITH_OPENBLAS=1 -j 4
- - time make CXX=$CXX WITH_OPENBLAS=1 check
- - make clean
- # build and test release version
- - make CXX=$CXX FORCE_DYNAMIC=1 WITH_OPENBLAS=1 -j 4
- - time make CXX=$CXX WITH_OPENBLAS=1 DEBUG= check
+ - make CXX=$CXX EIGEN_INCLUDE_PATH=$EIGEN_INCLUDE_PATH WITH_OPENBLAS=1 OPENBLAS_LEGACY=1 -j 4
+ - time make CXX=$CXX WITH_OPENBLAS=1 EIGEN_INCLUDE_PATH=$EIGEN_INCLUDE_PATH WITH_OPENBLAS=1 OPENBLAS_LEGACY=1 check
+ # - make clean
+ # build and test release version (integration test mostly)
+ # - make CXX=$CXX EIGEN_INCLUDE_PATH=$EIGEN_INCLUDE_PATH DEBUG= FORCE_DYNAMIC=1 WITH_OPENBLAS=1 OPENBLAS_LEGACY=1 -j 4
+ # - time make CXX=$CXX DEBUG= WITH_OPENBLAS=1 fast-check
# build static release (fast-check only)
# - make clean
# - make CXX=$CXX TRAVIS_CI=1 -j 4 fast-check
diff --git a/INSTALL.md b/INSTALL.md
index e450a2a..bfd10bb 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -14,7 +14,7 @@ GEMMA runs on Linux and MAC OSX and the runtime has the following
dependencies:
* C++ tool chain >= 4.9
-* GNU Science library (GSL) 1.x (GEMMA does not currently work with GSL >= 2).
+* GNU Science library (GSL) 1.x (note that 2.x is not yet supported)
* blas/openblas
* lapack
* [Eigen3 library](http://eigen.tuxfamily.org/dox/)
@@ -88,6 +88,9 @@ should compile with GCC_FLAGS="" to disable optimizations (-O3). E.g.
make EIGEN_INCLUDE_PATH=~/.guix-profile/include/eigen3 WITH_OPENBLAS=1 GCC_FLAGS=
+If you get older OpenBlas errors you may need to add
+OPENBLAS_LEGACY=1.
+
Other options, such as compiling with warnings, are listed in the
Makefile.
@@ -100,3 +103,42 @@ GEMMA includes the shunit2 test framework (version 2.0).
or
./run_tests.sh
+
+## Optimizing performance
+
+### OpenBlas
+
+Linking against a built-from-source OpenBlas is a first optimization
+step because it will optimize code for the local architecture (on my
+workstation it easily doubles speed). When you check the output .log
+file of GEMMA after a run, it will tell you how the linked-in OpenBlas
+was compiled.
+
+To link a new version, compile OpenBlas as per
+[instructions](http://www.openblas.net/). You can start with the default:
+
+ make -j 4
+
+or play with the switches
+
+ make USE_THREAD=1 NUM_THREADS=16 -j 4
+
+rendering for example:
+
+ OpenBLAS build complete. (BLAS CBLAS)
+ OS ... Linux
+ Architecture ... x86_64
+ BINARY ... 64bit
+ C compiler ... GCC (command line : gcc)
+ Library Name ... libopenblas_haswellp-r0.3.0.dev.a (Multi threaded; Max num-threads is 16)
+
+ To install the library, you can run "make PREFIX=/path/to/your/installation install".
+
+This generates a static library which you can link using the full path
+with using the GEMMA Makefile:
+
+ time env OPENBLAS_NUM_THREADS=4 make EIGEN_INCLUDE_PATH=~/.guix-profile/include/eigen3 LIBS="~/tmp/OpenBLAS/libopenblas_haswellp-r0.3.0.dev.a -lgsl -lgslcblas -pthread -lz -llapack" WITH_OPENBLAS=1 -j 4 fast-check
+
+ make EIGEN_INCLUDE_PATH=~/.guix-profile/include/eigen3 LIBS="~/tmp/OpenBLAS/libopenblas_haswellp-r0.3.0.dev.a -lgsl -lgslcblas -pthread -lz -llapack" WITH_OPENBLAS=1 -j 4 unittests
+
+NOTE: we should make this easier.
diff --git a/Makefile b/Makefile
index 176dd2c..c57567b 100644
--- a/Makefile
+++ b/Makefile
@@ -26,6 +26,14 @@
#
# make check
#
+# Run quick (development) tests with
+#
+# make fast-check
+#
+# Run full (lengthy) tests with
+#
+# make check-all
+#
# See also the INSTALL.md document in the source tree at
#
# https://github.com/genetics-statistics/GEMMA/blob/master/INSTALL.md
@@ -34,14 +42,15 @@
SYS = LNX # LNX|MAC (Linux is the default)
# Leave blank after "=" to disable; put "= 1" to enable
DIST_NAME = gemma-0.97.3
-DEBUG = 1 # DEBUG mode, set DEBUG=0 for a release
+DEBUG = 1 # DEBUG mode, set DEBUG=0 for a release
SHOW_COMPILER_WARNINGS =
WITH_LAPACK = 1
-WITH_OPENBLAS = # Defaults to LAPACK - OPENBLAS may be faster
-FORCE_STATIC = # Static linking of libraries
-GCC_FLAGS = -O3 # extra flags -Wl,--allow-multiple-definition
-TRAVIS_CI = # used by TRAVIS for testing
-EIGEN_INCLUDE_PATH=/usr/include/eigen3
+WITH_OPENBLAS = # Defaults to LAPACK - OPENBLAS may be faster
+OPENBLAS_LEGACY = # Using older OpenBlas
+FORCE_STATIC = # Static linking of libraries
+GCC_FLAGS = -O3 -std=gnu++11 # extra flags -Wl,--allow-multiple-definition
+TRAVIS_CI = # used by TRAVIS for testing
+EIGEN_INCLUDE_PATH = /usr/include/eigen3
# --------------------------------------------------------------------
# Edit below this line with caution
@@ -58,15 +67,25 @@ else
CPP = g++
endif
-ifdef OPENBLAS
- WITH_LAPACK = # OPENBLAS usually includes LAPACK
+ifeq ($(CPP), clang++)
+ # macOS Homebrew settings (as used on Travis-CI)
+ GCC_FLAGS=-O3 -std=c++11 -stdlib=libc++ -isystem//usr/local/opt/openblas/include -isystem//usr/local/include/eigen3 -Wl,-L/usr/local/opt/openblas/lib
+endif
+
+ifdef WITH_OPENBLAS
+ OPENBLAS=1
+ # WITH_LAPACK = # OPENBLAS usually includes LAPACK
+ CPPFLAGS += -DOPENBLAS
+ ifdef OPENBLAS_LEGACY
+ CPPFLAGS += -DOPENBLAS_LEGACY
+ endif
endif
ifdef DEBUG
- CPPFLAGS = -g $(GCC_FLAGS) -std=gnu++11 -isystem/$(EIGEN_INCLUDE_PATH) -Icontrib/catch-1.9.7 -Isrc
+ CPPFLAGS += -g $(GCC_FLAGS) -isystem/$(EIGEN_INCLUDE_PATH) -Icontrib/catch-1.9.7 -Isrc
else
# release mode
- CPPFLAGS = -DNDEBUG $(GCC_FLAGS) -std=gnu++11 -isystem/$(EIGEN_INCLUDE_PATH) -Icontrib/catch-1.9.7 -Isrc
+ CPPFLAGS += -DNDEBUG $(GCC_FLAGS) -isystem/$(EIGEN_INCLUDE_PATH) -Icontrib/catch-1.9.7 -Isrc
endif
ifdef SHOW_COMPILER_WARNINGS
diff --git a/README.md b/README.md
index d6209a8..d5d2f38 100644
--- a/README.md
+++ b/README.md
@@ -162,15 +162,47 @@ unpack the file.
LAPACK and BLAS libraries. There is no need to install these
libraries.
-### Building from source
+### Optimizing performance
+
+Precompiled binaries and libraries may not be optimal for your particular
+hardware. See [INSTALL.md](INSTALL.md) for speeding up tips.
-*Note that GEMMA currently does not work with GSL 2.x. We recommend
-linking to the latest version of GSL 1.x, which is GSL 1.16 as of this
-writing.*
+### Building from source
More information on source code, dependencies and installation can be
found in [INSTALL.md](INSTALL.md).
+## Reporting a GEMMA bug or issue
+
+GEMMA has an
+[issue tracker](https://github.com/genetics-statistics/GEMMA/issues)
+on github. Before posting an issue search the issue tracker first. It
+is likely someone may have encountered something similiar. Also try
+running the latest version of GEMMA to make sure it has not been fixed
+already. When reporting an issue include the output of the program and
+the contents of the .log.txt file in the output directory. We may ask
+you for your data to resolve the issue (treated confidentially). Check
+list:
+
+1. [X] I have found and issue with GEMMA
+2. [ ] I have searched for it on the [issue tracker](https://github.com/genetics-statistics/GEMMA/issues) (incl. closed issues)
+3. [ ] I have tried the latest release of GEMMA
+4. [ ] I have posted a new issue on the issue tracker or added to an existing one
+5. [ ] I have included the output of GEMMA
+6. [ ] I have included the relevant .log.txt file in the output directory
+7. [ ] I have made available the data to reproduce the problem (optional)
+
+Please always remember that GEMMA is written and maintained by
+volunteers with good intentions. Our time is valuable too. By helping
+us as much as possible we can provide this tool for everyone to use.
+
+## Code of conduct
+
+By using GEMMA and communicating with its communtity you implicitely
+agree to abide by the
+[code of conduct](https://software-carpentry.org/conduct/) as
+published by the Software Carpentry initiative.
+
## Credits
The *GEMMA* software was developed by:
@@ -180,7 +212,8 @@ Dept. of Biostatistics<br>
University of Michigan<br>
2012-2017
-Peter Carbonetto, Tim Flutre, Matthew Stephens, Pjotr Prins and others
-have also contributed to the development of this software.
+Peter Carbonetto, Tim Flutre, Matthew Stephens,
+[Pjotr Prins](http://thebird.nl/) and others have also contributed to
+the development of this software.
[latest_release]: https://github.com/genetics-statistics/GEMMA/releases "Most recent stable releases"
diff --git a/src/bslmmdap.cpp b/src/bslmmdap.cpp
index 7aac1d4..6f9aba7 100644
--- a/src/bslmmdap.cpp
+++ b/src/bslmmdap.cpp
@@ -116,16 +116,16 @@ void ReadFile_hyb(const string &file_hyp, vector<double> &vec_sa2,
getline(infile, line);
while (!safeGetline(infile, line).eof()) {
- ch_ptr = strtok((char *)line.c_str(), " , \t");
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
vec_sa2.push_back(atof(ch_ptr));
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
vec_sb2.push_back(atof(ch_ptr));
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
vec_wab.push_back(atof(ch_ptr));
}
@@ -160,11 +160,11 @@ void ReadFile_bf(const string &file_bf, vector<string> &vec_rs,
while (!safeGetline(infile, line).eof()) {
flag_block = 0;
- ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
rs = ch_ptr;
vec_rs.push_back(rs);
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
if (t == 0) {
block = ch_ptr;
} else {
@@ -223,7 +223,7 @@ void ReadFile_cat(const string &file_cat, const vector<string> &vec_rs,
// Read header.
HEADER header;
- !safeGetline(infile, line).eof();
+ safeGetline(infile, line).eof();
ReadHeader_io(line, header);
// Use the header to determine the number of categories.
@@ -238,7 +238,7 @@ void ReadFile_cat(const string &file_cat, const vector<string> &vec_rs,
// Read the following lines to record mapRS2cat.
while (!safeGetline(infile, line).eof()) {
- ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
if (header.rs_col == 0) {
rs = chr + ":" + pos;
@@ -248,6 +248,7 @@ void ReadFile_cat(const string &file_cat, const vector<string> &vec_rs,
catd.clear();
for (size_t i = 0; i < header.coln; i++) {
+ enforce(ch_ptr);
if (header.rs_col != 0 && header.rs_col == i + 1) {
rs = ch_ptr;
} else if (header.chr_col != 0 && header.chr_col == i + 1) {
diff --git a/src/debug.cpp b/src/debug.cpp
index 0d3c9cc..82d2be0 100644
--- a/src/debug.cpp
+++ b/src/debug.cpp
@@ -18,9 +18,71 @@
#include "debug.h"
#include "mathfunc.h"
-// Helper function called by macro validate_K(K, check)
-void do_validate_K(const gsl_matrix *K, bool do_check, bool strict, const char *__file, int __line) {
- if (do_check) {
+static bool debug_mode = false;
+static bool debug_check = true; // check data/algorithms
+static bool debug_strict = false; // fail on error
+static bool debug_quiet = false;
+static uint debug_issue = 0; // github issues
+static bool debug_legacy = false; // legacy mode
+
+void debug_set_debug_mode(bool setting) { debug_mode = setting; }
+void debug_set_no_check_mode(bool setting) {debug_check = !setting; }
+void debug_set_strict_mode(bool setting) { debug_strict = setting; }
+void debug_set_quiet_mode(bool setting) { debug_quiet = setting; }
+void debug_set_issue(uint issue) { debug_issue = issue; }
+void debug_set_legacy_mode(bool setting) { debug_legacy = setting; }
+
+bool is_debug_mode() { return debug_mode; };
+bool is_no_check_mode() { return !debug_check; };
+bool is_check_mode() { return debug_check; };
+bool is_strict_mode() { return debug_strict; };
+bool is_quiet_mode() { return debug_quiet; };
+bool is_issue(uint issue) { return issue == debug_issue; };
+bool is_legacy_mode() { return debug_legacy; };
+
+/*
+ Helper function to make sure gsl allocations do their job because
+ gsl_matrix_alloc does not initiatize values (behaviour that changed
+ in GSL2) we introduced a 'strict mode' by initializing the buffer
+ with NaNs. This happens when NO-CHECKS is not set (default) and with
+ DEBUG (i.e. -debug option).
+*/
+gsl_matrix *gsl_matrix_safe_alloc(size_t rows,size_t cols) {
+ gsl_matrix *m = gsl_matrix_alloc(rows,cols);
+ enforce_msg(m,"Not enough memory"); // just to be sure when there is no error handler set
+ if (is_check_mode() && is_debug_mode()) {
+ gsl_matrix_set_all(m, nan(""));
+ }
+ return m;
+}
+
+/*
+ Helper function to make sure gsl allocations do their job because
+ gsl_vector_alloc does not initiatize values (behaviour that changed
+ in GSL2) we introduced a 'strict mode' by initializing the buffer
+ with NaNs. This happens when NO-CHECKS is not set and with DEBUG
+ (i.e. -debug option).
+*/
+gsl_vector *gsl_vector_safe_alloc(size_t n) {
+ gsl_vector *v = gsl_vector_alloc(n);
+ enforce_msg(v,"Not enough memory"); // just to be sure when there is no error handler set
+ if (is_check_mode() && is_debug_mode()) {
+ gsl_vector_set_all(v, nan(""));
+ }
+ return v;
+}
+
+char *do_strtok_safe(char *tokenize, const char *delimiters, const char *__pretty_function, const char *__file, int __line) {
+ auto token = strtok(tokenize,delimiters);
+ if (token == NULL && (is_debug_mode() || is_strict_mode()))
+ fail_at_msg(__file,__line,string("strtok failed in ") + __pretty_function);
+ return token;
+}
+
+// Helper function called by macro validate_K(K, check). K is validated
+// unless -no-check option is used.
+void do_validate_K(const gsl_matrix *K, const char *__file, int __line) {
+ if (is_check_mode()) {
// debug_msg("Validating K");
auto eigenvalues = getEigenValues(K);
const uint count_small = count_small_values(eigenvalues,EIGEN_MINVALUE);
@@ -33,13 +95,13 @@ void do_validate_K(const gsl_matrix *K, bool do_check, bool strict, const char *
if (!isMatrixIllConditioned(eigenvalues))
warning_at_msg(__file,__line,"K is ill conditioned!");
if (!isMatrixSymmetric(K))
- fail_at_msg(strict,__file,__line,"K is not symmetric!" );
+ warnfail_at_msg(is_strict_mode(),__file,__line,"K is not symmetric!" );
const bool negative_values = has_negative_values_but_one(eigenvalues);
if (negative_values) {
warning_at_msg(__file,__line,"K has more than one negative eigenvalues!");
}
if (count_small>1 && negative_values && !isMatrixPositiveDefinite(K))
- fail_at_msg(strict,__file,__line,"K is not positive definite!");
+ warnfail_at_msg(is_strict_mode(),__file,__line,"K is not positive definite!");
gsl_vector_free(eigenvalues);
}
}
diff --git a/src/debug.h b/src/debug.h
index 06ca5cb..b3ec17b 100644
--- a/src/debug.h
+++ b/src/debug.h
@@ -10,16 +10,36 @@ void gemma_gsl_error_handler (const char * reason,
const char * file,
int line, int gsl_errno);
+void debug_set_debug_mode(bool setting);
+void debug_set_no_check_mode(bool setting);
+void debug_set_strict_mode(bool setting);
+void debug_set_quiet_mode(bool setting);
+void debug_set_issue(uint issue);
+void debug_set_legacy_mode(bool setting);
+
+bool is_debug_mode();
+bool is_no_check_mode();
+bool is_check_mode();
+bool is_strict_mode();
+bool is_quiet_mode();
+bool is_issue(uint issue);
+bool is_legacy_mode();
+
+gsl_matrix *gsl_matrix_safe_alloc(size_t rows,size_t cols);
+gsl_vector *gsl_vector_safe_alloc(size_t n);
+
+char *do_strtok_safe(char *tokenize, const char *delimiters, const char *__pretty_function, const char *__file, int __line);
+#define strtok_safe(string,delimiters) do_strtok_safe(string,delimiters,__PRETTY_FUNCTION__,__FILE__,__LINE__)
// Validation routines
-void do_validate_K(const gsl_matrix *K, bool do_check, bool strict, const char *__file, int __line);
+void do_validate_K(const gsl_matrix *K, const char *__file, int __line);
#define ROUND(f) round(f * 10000.)/10000
-#define validate_K(K,check,strict) do_validate_K(K,check,strict,__FILE__,__LINE__)
+#define validate_K(K) do_validate_K(K,__FILE__,__LINE__)
#define warning_at_msg(__file,__line,msg) cerr << "**** WARNING: " << msg << " in " << __file << " at line " << __line << endl;
-inline void fail_at_msg(bool strict, const char *__file, int __line, const char *msg) {
+inline void warnfail_at_msg(bool strict, const char *__file, int __line, const char *msg) {
if (strict)
std::cerr << "**** STRICT FAIL: ";
else
@@ -29,10 +49,25 @@ inline void fail_at_msg(bool strict, const char *__file, int __line, const char
exit(1);
}
+inline void fail_at_msg(const char *__file, int __line, std::string msg) {
+ std::cerr << msg << " in " << __file << " at line " << __line << std::endl;
+ exit(1);
+}
+
# ifndef __ASSERT_VOID_CAST
# define __ASSERT_VOID_CAST (void)
# endif
+inline void fail_msg(const char *msg) {
+ std::cerr << "**** FAILED: " << msg << std::endl;
+ exit(5);
+}
+
+inline void fail_msg(std::string msg) {
+ std::cerr << "**** FAILED: " << msg << std::endl;
+ exit(5);
+}
+
#if defined NDEBUG
#define warning_msg(msg) cerr << "**** WARNING: " << msg << endl;
diff --git a/src/eigenlib.cpp b/src/eigenlib.cpp
index a8c545c..4d6aacc 100644
--- a/src/eigenlib.cpp
+++ b/src/eigenlib.cpp
@@ -17,16 +17,18 @@
*/
#include "Eigen/Dense"
-#include "gsl/gsl_linalg.h"
+// #include "gsl/gsl_linalg.h"
#include "gsl/gsl_matrix.h"
-#include "gsl/gsl_vector.h"
+// #include "gsl/gsl_vector.h"
#include <cmath>
#include <iostream>
#include <vector>
+#include <cblas.h>
using namespace std;
using namespace Eigen;
+
// On two different clusters, compare eigen vs lapack/gsl:
//
// dgemm, 5x or 0.5x faster or slower than lapack, 5x or 4x faster than gsl
@@ -57,8 +59,6 @@ void eigenlib_dgemm(const char *TransA, const char *TransB, const double alpha,
C_mat = alpha * A_mat.transpose() * B_mat.transpose() + beta * C_mat;
}
}
-
- return;
}
void eigenlib_dgemv(const char *TransA, const double alpha, const gsl_matrix *A,
@@ -75,15 +75,12 @@ void eigenlib_dgemv(const char *TransA, const double alpha, const gsl_matrix *A,
} else {
y_vec = alpha * A_mat.transpose() * x_vec + beta * y_vec;
}
-
- return;
}
void eigenlib_invert(gsl_matrix *A) {
Map<Matrix<double, Dynamic, Dynamic, RowMajor>> A_mat(A->data, A->size1,
A->size2);
A_mat = A_mat.inverse();
- return;
}
void eigenlib_dsyr(const double alpha, const gsl_vector *b, gsl_matrix *A) {
@@ -92,7 +89,6 @@ void eigenlib_dsyr(const double alpha, const gsl_vector *b, gsl_matrix *A) {
Map<Matrix<double, Dynamic, 1>, 0, OuterStride<Dynamic>> b_vec(
b->data, b->size, OuterStride<Dynamic>(b->stride));
A_mat = alpha * b_vec * b_vec.transpose() + A_mat;
- return;
}
void eigenlib_eigensymm(const gsl_matrix *G, gsl_matrix *U, gsl_vector *eval) {
@@ -108,5 +104,4 @@ void eigenlib_eigensymm(const gsl_matrix *G, gsl_matrix *U, gsl_vector *eval) {
abort();
eval_vec = es.eigenvalues();
U_mat = es.eigenvectors();
- return;
}
diff --git a/src/eigenlib.h b/src/eigenlib.h
index b29fa63..7fb69ad 100644
--- a/src/eigenlib.h
+++ b/src/eigenlib.h
@@ -19,9 +19,9 @@
#ifndef __EIGENLIB_H__
#define __EIGENLIB_H__
-#include <vector>
+// #include <vector>
-using namespace std;
+// using namespace std;
void eigenlib_dgemm(const char *TransA, const char *TransB, const double alpha,
const gsl_matrix *A, const gsl_matrix *B, const double beta,
diff --git a/src/fastblas.cpp b/src/fastblas.cpp
new file mode 100644
index 0000000..de7e27b
--- /dev/null
+++ b/src/fastblas.cpp
@@ -0,0 +1,210 @@
+/*
+ Genome-wide Efficient Mixed Model Association (GEMMA)
+ Copyright (C) 2011-2017, Xiang Zhou
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "gsl/gsl_matrix.h"
+#include <algorithm> // std::min
+#include <cmath>
+#include <iomanip>
+#include <vector>
+#include <cblas.h>
+#include "debug.h"
+#include "fastblas.h"
+#include "mathfunc.h"
+#include <string.h>
+#include "eigenlib.h"
+
+using namespace std;
+
+/*
+ Reasonably fast function to copy data from standard C array into
+ gsl_matrix. Avoid it for performance critical sections.
+*/
+gsl_matrix *fast_copy(gsl_matrix *m, const double *mem) {
+ auto rows = m->size1;
+ auto cols = m->size2;
+ if (is_strict_mode()) { // slower correct version
+ for (auto r=0; r<rows; r++) {
+ for (auto c=0; c<cols; c++) {
+ gsl_matrix_set(m,r,c,mem[r*cols+c]);
+ }
+ }
+ } else { // faster goes by row
+ auto v = gsl_vector_calloc(cols);
+ enforce(v); // just to be sure
+ for (auto r=0; r<rows; r++) {
+ assert(v->size == cols);
+ assert(v->block->size == cols);
+ assert(v->stride == 1);
+ memcpy(v->block->data,&mem[r*cols],cols*sizeof(double));
+ gsl_matrix_set_row(m,r,v);
+ }
+ gsl_vector_free(v);
+ }
+ return m;
+}
+
+/*
+ Helper function fast_cblas_dgemm runs the local dgemm
+*/
+void fast_cblas_dgemm(const enum CBLAS_ORDER Order,
+ const enum CBLAS_TRANSPOSE TransA,
+ const enum CBLAS_TRANSPOSE TransB,
+ const size_t M,
+ const size_t N,
+ const size_t K,
+ const double alpha,
+ const double *A,
+ const size_t lda,
+ const double *B,
+ const size_t ldb,
+ const double beta,
+ double *C,
+ const size_t ldc) {
+#ifndef NDEBUG
+ size_t i,j;
+ if (is_debug_mode()) {
+ #ifdef DISABLED
+ printf (" Top left corner of matrix A: \n");
+ for (i=0; i<min(M,6); i++) {
+ for (j=0; j<min(K,6); j++) {
+ printf ("%12.0f", A[j+i*K]);
+ }
+ printf ("\n");
+ }
+
+ printf ("\n Top left corner of matrix B: \n");
+ for (i=0; i<min(K,6); i++) {
+ for (j=0; j<min(N,6); j++) {
+ printf ("%12.0f", B[j+i*N]);
+ }
+ printf ("\n");
+ }
+
+ printf ("\n Top left corner of matrix C: \n");
+ for (i=0; i<min(M,6); i++) {
+ for (j=0; j<min(N,6); j++) {
+ printf ("%12.5G", C[j+i*N]);
+ }
+ printf ("\n");
+ }
+ #endif
+
+ cout << scientific << setprecision(3) << "* RowMajor " << Order << "\t" ;
+ cout << "transA " << TransA << "\t" ;
+ cout << "transB " << TransB << "\t" ;
+ cout << "m " << M << "\t" ;
+ cout << "n " << N << "\t" ;
+ cout << "k " << K << "\n" ;
+ cout << "* lda " << lda << "\t" ;
+ cout << "ldb " << ldb << "\t" ;
+ cout << "ldc " << ldc << "\t" ;
+ cout << "alpha " << alpha << "\t" ;
+ cout << "beta " << beta << "\n" ;
+ cout << "* A03 " << A[3] << "\t" ;
+ cout << "B03 " << B[3] << "\t" ;
+ cout << "C03 " << C[3] << "\t" ;
+ cout << "Asum " << sum(A,M,K) << "\t" ;
+ cout << "Bsum " << sum(B,K,N) << "\n" ;
+ cout << "Csum " << sum(C,M,N) << "\n" ;
+ }
+#endif // NDEBUG
+
+ cblas_dgemm(Order,TransA,TransB,M,N,K,alpha,A,lda,B,ldb,beta,C,ldc);
+
+#ifndef NDEBUG
+ #ifdef DISABLED
+ if (is_debug_mode()) {
+ printf (" Top left corner of matrix A (cols=k %i, rows=m %i): \n",K,M);
+ for (i=0; i<min(M,6); i++) {
+ for (j=0; j<min(K,6); j++) {
+ printf ("%12.0f", A[j+i*K]);
+ }
+ printf ("\n");
+ }
+
+ printf ("\n Top left corner of matrix B: \n");
+ for (i=0; i<min(K,6); i++) {
+ for (j=0; j<min(N,6); j++) {
+ printf ("%12.0f", B[j+i*N]);
+ }
+ printf ("\n");
+ }
+
+ printf ("\n Top left corner of matrix C: \n");
+ for (i=0; i<min(M,6); i++) {
+ for (j=0; j<min(N,6); j++) {
+ printf ("%12.5G", C[j+i*N]);
+ }
+ printf ("\n");
+ }
+ }
+ #endif
+#endif // NDEBUG
+}
+
+/*
+ Helper function fast_cblas_dgemm converts a GEMMA layout to cblas_dgemm.
+*/
+static void fast_cblas_dgemm(const char *TransA, const char *TransB, const double alpha,
+ const gsl_matrix *A, const gsl_matrix *B, const double beta,
+ gsl_matrix *C) {
+ // C++ is row-major
+ auto transA = (*TransA == 'N' || *TransA == 'n' ? CblasNoTrans : CblasTrans);
+ auto transB = (*TransB == 'N' || *TransB == 'n' ? CblasNoTrans : CblasTrans);
+ const size_t M = C->size1;
+ const size_t N = C->size2;
+ const size_t MA = (transA == CblasNoTrans) ? A->size1 : A->size2;
+ const size_t NA = (transA == CblasNoTrans) ? A->size2 : A->size1;
+ const size_t MB = (transB == CblasNoTrans) ? B->size1 : B->size2;
+ const size_t NB = (transB == CblasNoTrans) ? B->size2 : B->size1;
+
+ if (M == MA && N == NB && NA == MB) { /* [MxN] = [MAxNA][MBxNB] */
+
+ cblas_dgemm (CblasRowMajor, transA, transB, M, N,NA,
+ alpha, A->data, A->tda, B->data, B->tda, beta,
+ C->data, C->tda);
+
+ } else {
+ fail_msg("Range error in dgemm");
+ }
+}
+
+
+/*
+ Use the fasted/supported way to call BLAS dgemm
+*/
+
+void fast_dgemm(const char *TransA, const char *TransB, const double alpha,
+ const gsl_matrix *A, const gsl_matrix *B, const double beta,
+ gsl_matrix *C) {
+ if (is_legacy_mode()) {
+ eigenlib_dgemm(TransA,TransB,alpha,A,B,beta,C);
+ } else {
+ fast_cblas_dgemm(TransA,TransB,alpha,A,B,beta,C);
+
+ #ifdef DISABLE
+ if (is_check_mode()) {
+ // ---- validate with original implementation
+ gsl_matrix *C1 = gsl_matrix_alloc(C->size1,C->size2);
+ eigenlib_dgemm(TransA,TransB,alpha,A,B,beta,C1);
+ enforce_msg(gsl_matrix_equal(C,C1),"dgemm outcomes are not equal for fast & eigenlib");
+ gsl_matrix_free(C1);
+ }
+ #endif
+ }
+}
diff --git a/src/fastblas.h b/src/fastblas.h
new file mode 100644
index 0000000..d0f5c14
--- /dev/null
+++ b/src/fastblas.h
@@ -0,0 +1,14 @@
+#ifndef __FASTBLAS_H__
+#define __FASTBLAS_H__
+
+#include <assert.h>
+#include <iostream>
+#include "gsl/gsl_matrix.h"
+
+gsl_matrix *fast_copy(gsl_matrix *m, const double *mem);
+
+void fast_dgemm(const char *TransA, const char *TransB, const double alpha,
+ const gsl_matrix *A, const gsl_matrix *B, const double beta,
+ gsl_matrix *C);
+
+#endif
diff --git a/src/fastopenblas.h b/src/fastopenblas.h
new file mode 100644
index 0000000..969a2e0
--- /dev/null
+++ b/src/fastopenblas.h
@@ -0,0 +1,24 @@
+#ifndef __FASTOPENBLAS_H__
+#define __FASTOPENBLAS_H__
+
+#include <assert.h>
+#include <iostream>
+#include <cblas.h> // For OpenBlas
+#include "gsl/gsl_matrix.h"
+
+void fast_cblas_dgemm(const enum CBLAS_ORDER Order,
+ const enum CBLAS_TRANSPOSE TransA,
+ const enum CBLAS_TRANSPOSE TransB,
+ const size_t M,
+ const size_t N,
+ const size_t K,
+ const double alpha,
+ const double *A,
+ const size_t lda,
+ const double *B,
+ const size_t ldb,
+ const double beta,
+ double *C,
+ const size_t ldc);
+
+#endif // __FASTOPENBLAS_H_
diff --git a/src/gemma.cpp b/src/gemma.cpp
index 24173c3..f9a2fc9 100644
--- a/src/gemma.cpp
+++ b/src/gemma.cpp
@@ -23,6 +23,17 @@
#include <iostream>
#include <string>
#include <sys/stat.h>
+#ifdef OPENBLAS
+#pragma message "Compiling with OPENBLAS"
+extern "C" {
+ // these functions are defined in cblas.h - but if we include that we
+ // conflicts with other BLAS includes
+ int openblas_get_num_threads(void);
+ int openblas_get_parallel(void);
+ char* openblas_get_config(void);
+ char* openblas_get_corename(void);
+}
+#endif
#include "gsl/gsl_blas.h"
#include "gsl/gsl_cdf.h"
@@ -310,11 +321,6 @@ void GEMMA::PrintHelp(size_t option) {
cout << " rs#2, base_position, chr_number" << endl;
cout << " ..." << endl;
- // WJA added.
- cout << " -oxford [prefix] "
- << " specify input Oxford genotype bgen file prefix." << endl;
- cout << " requires: *.bgen, *.sample files" << endl;
-
cout << " -gxe [filename] "
<< " specify input file that contains a column of environmental "
"factor for g by e tests"
@@ -429,8 +435,8 @@ void GEMMA::PrintHelp(size_t option) {
"default 1)"
<< endl;
cout << " -pace [num] "
- << " specify terminal display update pace (default 100000 SNPs or "
- "100000 iterations)."
+ << " specify terminal display update pace (default 1,000 SNPs or "
+ "1,000 iterations)."
<< endl;
cout << " -outdir [path] "
<< " specify output directory path (default \"./output/\")" << endl;
@@ -715,6 +721,7 @@ void GEMMA::PrintHelp(size_t option) {
cout << " -debug debug output" << endl;
cout << " -nind [num] read up to num individuals" << endl;
cout << " -issue [num] enable tests relevant to issue tracker" << endl;
+ cout << " -legacy run gemma in legacy mode" << endl;
cout << endl;
}
@@ -760,7 +767,7 @@ void GEMMA::Assign(int argc, char **argv, PARAM &cPar) {
str.assign(argv[i]);
cPar.file_mbfile = str;
} else if (strcmp(argv[i], "-silence") == 0) {
- cPar.mode_silence = true;
+ debug_set_quiet_mode(true);
} else if (strcmp(argv[i], "-g") == 0) {
if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
continue;
@@ -793,18 +800,6 @@ void GEMMA::Assign(int argc, char **argv, PARAM &cPar) {
str.clear();
str.assign(argv[i]);
cPar.file_anno = str;
- }
-
- // WJA added.
- else if (strcmp(argv[i], "-oxford") == 0 ||
- strcmp(argv[i], "--oxford") == 0 || strcmp(argv[i], "-x") == 0) {
- if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
- continue;
- }
- ++i;
- str.clear();
- str.assign(argv[i]);
- cPar.file_oxford = str;
} else if (strcmp(argv[i], "-gxe") == 0) {
if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
continue;
@@ -1373,8 +1368,9 @@ void GEMMA::Assign(int argc, char **argv, PARAM &cPar) {
++i;
str.clear();
str.assign(argv[i]);
- cPar.issue = atoi(str.c_str()); // for testing purposes
- enforce(cPar.issue > 0);
+ auto issue = atoi(str.c_str()); // for testing purposes
+ enforce(issue > 0);
+ debug_set_issue(issue);
} else if (strcmp(argv[i], "-emp") == 0) {
if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
continue;
@@ -1594,11 +1590,16 @@ void GEMMA::Assign(int argc, char **argv, PARAM &cPar) {
str.assign(argv[i]);
cPar.window_ns = atoi(str.c_str());
} else if (strcmp(argv[i], "-debug") == 0) {
- cPar.mode_debug = true;
+ // cPar.mode_debug = true;
+ debug_set_debug_mode(true);
} else if (strcmp(argv[i], "-no-check") == 0) {
- cPar.mode_check = false;
+ // cPar.mode_check = false;
+ debug_set_no_check_mode(true);
} else if (strcmp(argv[i], "-strict") == 0) {
- cPar.mode_strict = true;
+ // cPar.mode_strict = true;
+ debug_set_strict_mode(true);
+ } else if (strcmp(argv[i], "-legacy") == 0) {
+ debug_set_legacy_mode(true);
} else {
cout << "error! unrecognized option: " << argv[i] << endl;
cPar.error = true;
@@ -1635,7 +1636,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
if (cPar.a_mode == 41 || cPar.a_mode == 42) {
gsl_vector *y_prdt;
- y_prdt = gsl_vector_alloc(cPar.ni_total - cPar.ni_test);
+ y_prdt = gsl_vector_safe_alloc(cPar.ni_total - cPar.ni_test);
// set to zero
gsl_vector_set_zero(y_prdt);
@@ -1647,8 +1648,8 @@ void GEMMA::BatchRun(PARAM &cPar) {
if (!cPar.file_kin.empty() && !cPar.file_ebv.empty()) {
cout << "Adding Breeding Values ... " << endl;
- gsl_matrix *G = gsl_matrix_alloc(cPar.ni_total, cPar.ni_total);
- gsl_vector *u_hat = gsl_vector_alloc(cPar.ni_test);
+ gsl_matrix *G = gsl_matrix_safe_alloc(cPar.ni_total, cPar.ni_total);
+ gsl_vector *u_hat = gsl_vector_safe_alloc(cPar.ni_test);
// read kinship matrix and set u_hat
vector<int> indicator_all;
@@ -1706,25 +1707,25 @@ void GEMMA::BatchRun(PARAM &cPar) {
if (cPar.a_mode == 43) {
// first, use individuals with full phenotypes to obtain estimates of Vg and
// Ve
- gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph);
- gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt);
- gsl_matrix *G = gsl_matrix_alloc(Y->size1, Y->size1);
- gsl_matrix *U = gsl_matrix_alloc(Y->size1, Y->size1);
- gsl_matrix *UtW = gsl_matrix_alloc(Y->size1, W->size2);
- gsl_matrix *UtY = gsl_matrix_alloc(Y->size1, Y->size2);
- gsl_vector *eval = gsl_vector_alloc(Y->size1);
+ gsl_matrix *Y = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_ph);
+ gsl_matrix *W = gsl_matrix_safe_alloc(Y->size1, cPar.n_cvt);
+ gsl_matrix *G = gsl_matrix_safe_alloc(Y->size1, Y->size1);
+ gsl_matrix *U = gsl_matrix_safe_alloc(Y->size1, Y->size1);
+ gsl_matrix *UtW = gsl_matrix_safe_alloc(Y->size1, W->size2);
+ gsl_matrix *UtY = gsl_matrix_safe_alloc(Y->size1, Y->size2);
+ gsl_vector *eval = gsl_vector_safe_alloc(Y->size1);
- gsl_matrix *Y_full = gsl_matrix_alloc(cPar.ni_cvt, cPar.n_ph);
- gsl_matrix *W_full = gsl_matrix_alloc(Y_full->size1, cPar.n_cvt);
+ gsl_matrix *Y_full = gsl_matrix_safe_alloc(cPar.ni_cvt, cPar.n_ph);
+ gsl_matrix *W_full = gsl_matrix_safe_alloc(Y_full->size1, cPar.n_cvt);
// set covariates matrix W and phenotype matrix Y
// an intercept should be included in W,
cPar.CopyCvtPhen(W, Y, 0);
cPar.CopyCvtPhen(W_full, Y_full, 1);
- gsl_matrix *Y_hat = gsl_matrix_alloc(Y_full->size1, cPar.n_ph);
- gsl_matrix *G_full = gsl_matrix_alloc(Y_full->size1, Y_full->size1);
- gsl_matrix *H_full = gsl_matrix_alloc(Y_full->size1 * Y_hat->size2,
+ gsl_matrix *Y_hat = gsl_matrix_safe_alloc(Y_full->size1, cPar.n_ph);
+ gsl_matrix *G_full = gsl_matrix_safe_alloc(Y_full->size1, Y_full->size1);
+ gsl_matrix *H_full = gsl_matrix_safe_alloc(Y_full->size1 * Y_hat->size2,
Y_full->size1 * Y_hat->size2);
// read relatedness matrix G, and matrix G_full
@@ -1745,7 +1746,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
// center matrix G
CenterMatrix(G);
CenterMatrix(G_full);
- validate_K(G,cPar.mode_check,cPar.mode_strict);
+ validate_K(G);
// eigen-decomposition and calculate trace_G
cout << "Start Eigen-Decomposition..." << endl;
@@ -1760,8 +1761,8 @@ void GEMMA::BatchRun(PARAM &cPar) {
// calculate variance component and beta estimates
// and then obtain predicted values
if (cPar.n_ph == 1) {
- gsl_vector *beta = gsl_vector_alloc(W->size2);
- gsl_vector *se_beta = gsl_vector_alloc(W->size2);
+ gsl_vector *beta = gsl_vector_safe_alloc(W->size2);
+ gsl_vector *se_beta = gsl_vector_safe_alloc(W->size2);
double lambda, logl, vg, ve;
gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
@@ -1791,10 +1792,10 @@ void GEMMA::BatchRun(PARAM &cPar) {
gsl_vector_free(beta);
gsl_vector_free(se_beta);
} else {
- gsl_matrix *Vg = gsl_matrix_alloc(cPar.n_ph, cPar.n_ph);
- gsl_matrix *Ve = gsl_matrix_alloc(cPar.n_ph, cPar.n_ph);
- gsl_matrix *B = gsl_matrix_alloc(cPar.n_ph, W->size2);
- gsl_matrix *se_B = gsl_matrix_alloc(cPar.n_ph, W->size2);
+ gsl_matrix *Vg = gsl_matrix_safe_alloc(cPar.n_ph, cPar.n_ph);
+ gsl_matrix *Ve = gsl_matrix_safe_alloc(cPar.n_ph, cPar.n_ph);
+ gsl_matrix *B = gsl_matrix_safe_alloc(cPar.n_ph, W->size2);
+ gsl_matrix *se_B = gsl_matrix_safe_alloc(cPar.n_ph, W->size2);
// obtain estimates
CalcMvLmmVgVeBeta(eval, UtW, UtY, cPar.em_iter, cPar.nr_iter,
@@ -1872,7 +1873,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
if (cPar.a_mode == 21 || cPar.a_mode == 22) {
cout << "Calculating Relatedness Matrix ... " << endl;
- gsl_matrix *G = gsl_matrix_alloc(cPar.ni_total, cPar.ni_total);
+ gsl_matrix *G = gsl_matrix_safe_alloc(cPar.ni_total, cPar.ni_total);
enforce_msg(G, "allocate G"); // just to be sure
time_start = clock();
@@ -1885,7 +1886,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
}
// Now we have the Kinship matrix test it
- validate_K(G,cPar.mode_check,cPar.mode_strict);
+ validate_K(G);
if (cPar.a_mode == 21) {
cPar.WriteMatrix(G, "cXX");
@@ -1917,8 +1918,8 @@ void GEMMA::BatchRun(PARAM &cPar) {
if (cPar.a_mode == 25 || cPar.a_mode == 26) {
cout << "Calculating the S Matrix ... " << endl;
- gsl_matrix *S = gsl_matrix_alloc(cPar.n_vc * 2, cPar.n_vc);
- gsl_vector *ns = gsl_vector_alloc(cPar.n_vc + 1);
+ gsl_matrix *S = gsl_matrix_safe_alloc(cPar.n_vc * 2, cPar.n_vc);
+ gsl_vector *ns = gsl_vector_safe_alloc(cPar.n_vc + 1);
gsl_matrix_set_zero(S);
gsl_vector_set_zero(ns);
@@ -1927,13 +1928,13 @@ void GEMMA::BatchRun(PARAM &cPar) {
gsl_matrix_submatrix(S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
gsl_vector_view ns_vec = gsl_vector_subvector(ns, 0, cPar.n_vc);
- gsl_matrix *K = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
- gsl_matrix *A = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
+ gsl_matrix *K = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
+ gsl_matrix *A = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
gsl_matrix_set_zero(K);
gsl_matrix_set_zero(A);
- gsl_vector *y = gsl_vector_alloc(cPar.ni_test);
- gsl_matrix *W = gsl_matrix_alloc(cPar.ni_test, cPar.n_cvt);
+ gsl_vector *y = gsl_vector_safe_alloc(cPar.ni_test);
+ gsl_matrix *W = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_cvt);
cPar.CopyCvtPhen(W, y, 0);
@@ -1970,9 +1971,9 @@ void GEMMA::BatchRun(PARAM &cPar) {
// Compute the q vector, that is used for variance component estimation using
// summary statistics
if (cPar.a_mode == 27 || cPar.a_mode == 28) {
- gsl_matrix *Vq = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
- gsl_vector *q = gsl_vector_alloc(cPar.n_vc);
- gsl_vector *s = gsl_vector_alloc(cPar.n_vc + 1);
+ gsl_matrix *Vq = gsl_matrix_safe_alloc(cPar.n_vc, cPar.n_vc);
+ gsl_vector *q = gsl_vector_safe_alloc(cPar.n_vc);
+ gsl_vector *s = gsl_vector_safe_alloc(cPar.n_vc + 1);
gsl_vector_set_zero(q);
gsl_vector_set_zero(s);
@@ -2028,8 +2029,8 @@ void GEMMA::BatchRun(PARAM &cPar) {
// LM.
if (cPar.a_mode == 51 || cPar.a_mode == 52 || cPar.a_mode == 53 ||
cPar.a_mode == 54) { // Fit LM
- gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph);
- gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt);
+ gsl_matrix *Y = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_ph);
+ gsl_matrix *W = gsl_matrix_safe_alloc(Y->size1, cPar.n_cvt);
// set covariates matrix W and phenotype matrix Y
// an intercept should be included in W,
@@ -2047,8 +2048,6 @@ void GEMMA::BatchRun(PARAM &cPar) {
&Y_col.vector); // y is the predictor, not the phenotype
} else if (!cPar.file_bfile.empty()) {
cLm.AnalyzePlink(W, &Y_col.vector);
- } else if (!cPar.file_oxford.empty()) {
- cLm.Analyzebgen(W, &Y_col.vector);
} else {
cLm.AnalyzeBimbam(W, &Y_col.vector);
}
@@ -2083,16 +2082,16 @@ void GEMMA::BatchRun(PARAM &cPar) {
cPar.UpdateSNP(mapRS2wK);
// Setup matrices and vectors.
- gsl_matrix *S = gsl_matrix_alloc(cPar.n_vc * 2, cPar.n_vc);
- gsl_matrix *Vq = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
- gsl_vector *q = gsl_vector_alloc(cPar.n_vc);
- gsl_vector *s = gsl_vector_alloc(cPar.n_vc + 1);
+ gsl_matrix *S = gsl_matrix_safe_alloc(cPar.n_vc * 2, cPar.n_vc);
+ gsl_matrix *Vq = gsl_matrix_safe_alloc(cPar.n_vc, cPar.n_vc);
+ gsl_vector *q = gsl_vector_safe_alloc(cPar.n_vc);
+ gsl_vector *s = gsl_vector_safe_alloc(cPar.n_vc + 1);
- gsl_matrix *K = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
- gsl_matrix *A = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
+ gsl_matrix *K = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
+ gsl_matrix *A = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
- gsl_vector *y = gsl_vector_alloc(cPar.ni_test);
- gsl_matrix *W = gsl_matrix_alloc(cPar.ni_test, cPar.n_cvt);
+ gsl_vector *y = gsl_vector_safe_alloc(cPar.ni_test);
+ gsl_matrix *W = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_cvt);
gsl_matrix_set_zero(K);
gsl_matrix_set_zero(A);
@@ -2219,16 +2218,16 @@ void GEMMA::BatchRun(PARAM &cPar) {
cPar.n_vc = cPar.n_vc - 1;
- gsl_matrix *S = gsl_matrix_alloc(2 * cPar.n_vc, cPar.n_vc);
- gsl_matrix *Vq = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
- // gsl_matrix *V=gsl_matrix_alloc (cPar.n_vc+1,
+ gsl_matrix *S = gsl_matrix_safe_alloc(2 * cPar.n_vc, cPar.n_vc);
+ gsl_matrix *Vq = gsl_matrix_safe_alloc(cPar.n_vc, cPar.n_vc);
+ // gsl_matrix *V=gsl_matrix_safe_alloc (cPar.n_vc+1,
// (cPar.n_vc*(cPar.n_vc+1))/2*(cPar.n_vc+1) );
- // gsl_matrix *Vslope=gsl_matrix_alloc (n_lines+1,
+ // gsl_matrix *Vslope=gsl_matrix_safe_alloc (n_lines+1,
// (n_lines*(n_lines+1))/2*(n_lines+1) );
- gsl_vector *q = gsl_vector_alloc(cPar.n_vc);
- gsl_vector *s_study = gsl_vector_alloc(cPar.n_vc);
- gsl_vector *s_ref = gsl_vector_alloc(cPar.n_vc);
- gsl_vector *s = gsl_vector_alloc(cPar.n_vc + 1);
+ gsl_vector *q = gsl_vector_safe_alloc(cPar.n_vc);
+ gsl_vector *s_study = gsl_vector_safe_alloc(cPar.n_vc);
+ gsl_vector *s_ref = gsl_vector_safe_alloc(cPar.n_vc);
+ gsl_vector *s = gsl_vector_safe_alloc(cPar.n_vc + 1);
gsl_matrix_set_zero(S);
gsl_matrix_view S_mat =
@@ -2287,9 +2286,9 @@ void GEMMA::BatchRun(PARAM &cPar) {
gsl_vector_free(s_ref);
gsl_vector_free(s);
} else {
- gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph);
- gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt);
- gsl_matrix *G = gsl_matrix_alloc(Y->size1, Y->size1 * cPar.n_vc);
+ gsl_matrix *Y = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_ph);
+ gsl_matrix *W = gsl_matrix_safe_alloc(Y->size1, cPar.n_cvt);
+ gsl_matrix *G = gsl_matrix_safe_alloc(Y->size1, Y->size1 * cPar.n_vc);
// set covariates matrix W and phenotype matrix Y
// an intercept should be included in W,
@@ -2328,7 +2327,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
// center matrix G
CenterMatrix(G);
- validate_K(G,cPar.mode_check,cPar.mode_strict);
+ validate_K(G);
(cPar.v_traceG).clear();
double d = 0;
@@ -2366,9 +2365,9 @@ void GEMMA::BatchRun(PARAM &cPar) {
// the genotypes
if (cPar.a_mode == 66 || cPar.a_mode == 67) {
// read reference file first
- gsl_matrix *S = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
- gsl_matrix *Svar = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
- gsl_vector *s_ref = gsl_vector_alloc(cPar.n_vc);
+ gsl_matrix *S = gsl_matrix_safe_alloc(cPar.n_vc, cPar.n_vc);
+ gsl_matrix *Svar = gsl_matrix_safe_alloc(cPar.n_vc, cPar.n_vc);
+ gsl_vector *s_ref = gsl_vector_safe_alloc(cPar.n_vc);
gsl_matrix_set_zero(S);
gsl_matrix_set_zero(Svar);
@@ -2393,14 +2392,14 @@ void GEMMA::BatchRun(PARAM &cPar) {
cPar.ObtainWeight(setSnps_beta, mapRS2wK);
// set up matrices and vector
- gsl_matrix *Xz = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc);
- gsl_matrix *XWz = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc);
+ gsl_matrix *Xz = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_vc);
+ gsl_matrix *XWz = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_vc);
gsl_matrix *XtXWz =
- gsl_matrix_alloc(mapRS2wK.size(), cPar.n_vc * cPar.n_vc);
- gsl_vector *w = gsl_vector_alloc(mapRS2wK.size());
- gsl_vector *w1 = gsl_vector_alloc(mapRS2wK.size());
- gsl_vector *z = gsl_vector_alloc(mapRS2wK.size());
- gsl_vector *s_vec = gsl_vector_alloc(cPar.n_vc);
+ gsl_matrix_safe_alloc(mapRS2wK.size(), cPar.n_vc * cPar.n_vc);
+ gsl_vector *w = gsl_vector_safe_alloc(mapRS2wK.size());
+ gsl_vector *w1 = gsl_vector_safe_alloc(mapRS2wK.size());
+ gsl_vector *z = gsl_vector_safe_alloc(mapRS2wK.size());
+ gsl_vector *s_vec = gsl_vector_safe_alloc(cPar.n_vc);
vector<size_t> vec_cat, vec_size;
vector<double> vec_z;
@@ -2524,20 +2523,20 @@ void GEMMA::BatchRun(PARAM &cPar) {
if (cPar.a_mode == 1 || cPar.a_mode == 2 || cPar.a_mode == 3 ||
cPar.a_mode == 4 || cPar.a_mode == 5 ||
cPar.a_mode == 31) { // Fit LMM or mvLMM or eigen
- gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph);
+ gsl_matrix *Y = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_ph);
enforce_msg(Y, "allocate Y"); // just to be sure
- gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt);
- gsl_matrix *B = gsl_matrix_alloc(Y->size2, W->size2); // B is a d by c
+ gsl_matrix *W = gsl_matrix_safe_alloc(Y->size1, cPar.n_cvt);
+ gsl_matrix *B = gsl_matrix_safe_alloc(Y->size2, W->size2); // B is a d by c
// matrix
- gsl_matrix *se_B = gsl_matrix_alloc(Y->size2, W->size2);
- gsl_matrix *G = gsl_matrix_alloc(Y->size1, Y->size1);
- gsl_matrix *U = gsl_matrix_alloc(Y->size1, Y->size1);
+ gsl_matrix *se_B = gsl_matrix_safe_alloc(Y->size2, W->size2);
+ gsl_matrix *G = gsl_matrix_safe_alloc(Y->size1, Y->size1);
+ gsl_matrix *U = gsl_matrix_safe_alloc(Y->size1, Y->size1);
gsl_matrix *UtW = gsl_matrix_calloc(Y->size1, W->size2);
gsl_matrix *UtY = gsl_matrix_calloc(Y->size1, Y->size2);
gsl_vector *eval = gsl_vector_calloc(Y->size1);
- gsl_vector *env = gsl_vector_alloc(Y->size1);
- gsl_vector *weight = gsl_vector_alloc(Y->size1);
- assert_issue(cPar.issue == 26, UtY->data[0] == 0.0);
+ gsl_vector *env = gsl_vector_safe_alloc(Y->size1);
+ gsl_vector *weight = gsl_vector_safe_alloc(Y->size1);
+ assert_issue(is_issue(26), UtY->data[0] == 0.0);
// set covariates matrix W and phenotype matrix Y
// an intercept should be included in W,
@@ -2557,7 +2556,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
// center matrix G
CenterMatrix(G);
- validate_K(G,cPar.mode_check,cPar.mode_strict);
+ validate_K(G);
// is residual weights are provided, then
if (!cPar.file_weight.empty()) {
@@ -2638,7 +2637,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
CalcUtX(U, W, UtW);
CalcUtX(U, Y, UtY);
- assert_issue(cPar.issue == 26, ROUND(UtY->data[0]) == -16.6143);
+ assert_issue(is_issue(26), ROUND(UtY->data[0]) == -16.6143);
LMM cLmm;
cLmm.CopyFromParam(cPar);
@@ -2655,7 +2654,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
// calculate UtW and Uty
CalcUtX(U, W, UtW);
CalcUtX(U, Y, UtY);
- assert_issue(cPar.issue == 26, ROUND(UtY->data[0]) == -16.6143);
+ assert_issue(is_issue(26), ROUND(UtY->data[0]) == -16.6143);
// calculate REMLE/MLE estimate and pve for univariate model
if (cPar.n_ph == 1) { // one phenotype
@@ -2663,31 +2662,27 @@ void GEMMA::BatchRun(PARAM &cPar) {
gsl_vector_view se_beta = gsl_matrix_row(se_B, 0);
gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
- assert_issue(cPar.issue == 26, ROUND(UtY->data[0]) == -16.6143);
+ assert_issue(is_issue(26), ROUND(UtY->data[0]) == -16.6143);
CalcLambda('L', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
assert(!std::isnan(UtY->data[0]));
- assert(!std::isnan(B->data[0]));
- assert(!std::isnan(se_B->data[0]));
CalcLmmVgVeBeta(eval, UtW, &UtY_col.vector, cPar.l_mle_null,
cPar.vg_mle_null, cPar.ve_mle_null, &beta.vector,
&se_beta.vector);
assert(!std::isnan(UtY->data[0]));
- assert(!std::isnan(B->data[0]));
- assert(!std::isnan(se_B->data[0]));
cPar.beta_mle_null.clear();
cPar.se_beta_mle_null.clear();
+ assert(!std::isnan(B->data[0]));
+ assert(!std::isnan(se_B->data[0]));
for (size_t i = 0; i < B->size2; i++) {
cPar.beta_mle_null.push_back(gsl_matrix_get(B, 0, i));
cPar.se_beta_mle_null.push_back(gsl_matrix_get(se_B, 0, i));
}
assert(!std::isnan(UtY->data[0]));
- assert(!std::isnan(B->data[0]));
- assert(!std::isnan(se_B->data[0]));
assert(!std::isnan(cPar.beta_mle_null.front()));
assert(!std::isnan(cPar.se_beta_mle_null.front()));
@@ -2699,6 +2694,9 @@ void GEMMA::BatchRun(PARAM &cPar) {
cPar.beta_remle_null.clear();
cPar.se_beta_remle_null.clear();
+ assert(!std::isnan(B->data[0]));
+ assert(!std::isnan(se_B->data[0]));
+
for (size_t i = 0; i < B->size2; i++) {
cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i));
cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i));
@@ -2710,11 +2708,11 @@ void GEMMA::BatchRun(PARAM &cPar) {
// calculate and output residuals
if (cPar.a_mode == 5) {
- gsl_vector *Utu_hat = gsl_vector_alloc(Y->size1);
- gsl_vector *Ute_hat = gsl_vector_alloc(Y->size1);
- gsl_vector *u_hat = gsl_vector_alloc(Y->size1);
- gsl_vector *e_hat = gsl_vector_alloc(Y->size1);
- gsl_vector *y_hat = gsl_vector_alloc(Y->size1);
+ gsl_vector *Utu_hat = gsl_vector_safe_alloc(Y->size1);
+ gsl_vector *Ute_hat = gsl_vector_safe_alloc(Y->size1);
+ gsl_vector *u_hat = gsl_vector_safe_alloc(Y->size1);
+ gsl_vector *e_hat = gsl_vector_safe_alloc(Y->size1);
+ gsl_vector *y_hat = gsl_vector_safe_alloc(Y->size1);
// obtain Utu and Ute
gsl_vector_memcpy(y_hat, &UtY_col.vector);
@@ -2755,18 +2753,18 @@ void GEMMA::BatchRun(PARAM &cPar) {
gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
if (!cPar.file_bfile.empty()) {
+ // PLINK analysis
if (cPar.file_gxe.empty()) {
cLmm.AnalyzePlink(U, eval, UtW, &UtY_col.vector, W,
- &Y_col.vector);
- } else {
+ &Y_col.vector, cPar.setGWASnps);
+ }
+ else {
cLmm.AnalyzePlinkGXE(U, eval, UtW, &UtY_col.vector, W,
&Y_col.vector, env);
}
}
- // WJA added
- else if (!cPar.file_oxford.empty()) {
- cLmm.Analyzebgen(U, eval, UtW, &UtY_col.vector, W, &Y_col.vector);
- } else {
+ else {
+ // BIMBAM analysis
if (cPar.file_gxe.empty()) {
cLmm.AnalyzeBimbam(U, eval, UtW, &UtY_col.vector, W,
&Y_col.vector, cPar.setGWASnps);
@@ -2788,8 +2786,6 @@ void GEMMA::BatchRun(PARAM &cPar) {
} else {
cMvlmm.AnalyzePlinkGXE(U, eval, UtW, UtY, env);
}
- } else if (!cPar.file_oxford.empty()) {
- cMvlmm.Analyzebgen(U, eval, UtW, UtY);
} else {
if (cPar.file_gxe.empty()) {
cMvlmm.AnalyzeBimbam(U, eval, UtW, UtY);
@@ -2819,10 +2815,10 @@ void GEMMA::BatchRun(PARAM &cPar) {
// BSLMM
if (cPar.a_mode == 11 || cPar.a_mode == 12 || cPar.a_mode == 13) {
- gsl_vector *y = gsl_vector_alloc(cPar.ni_test);
- gsl_matrix *W = gsl_matrix_alloc(y->size, cPar.n_cvt);
- gsl_matrix *G = gsl_matrix_alloc(y->size, y->size);
- gsl_matrix *UtX = gsl_matrix_alloc(y->size, cPar.ns_test);
+ gsl_vector *y = gsl_vector_safe_alloc(cPar.ni_test);
+ gsl_matrix *W = gsl_matrix_safe_alloc(y->size, cPar.n_cvt);
+ gsl_matrix *G = gsl_matrix_safe_alloc(y->size, y->size);
+ gsl_matrix *UtX = gsl_matrix_safe_alloc(y->size, cPar.ns_test);
// set covariates matrix W and phenotype vector y
// an intercept should be included in W,
@@ -2845,10 +2841,10 @@ void GEMMA::BatchRun(PARAM &cPar) {
cBslmm.CopyToParam(cPar);
// else, if rho!=1
} else {
- gsl_matrix *U = gsl_matrix_alloc(y->size, y->size);
- gsl_vector *eval = gsl_vector_alloc(y->size);
- gsl_matrix *UtW = gsl_matrix_alloc(y->size, W->size2);
- gsl_vector *Uty = gsl_vector_alloc(y->size);
+ gsl_matrix *U = gsl_matrix_safe_alloc(y->size, y->size);
+ gsl_vector *eval = gsl_vector_safe_alloc(y->size);
+ gsl_matrix *UtW = gsl_matrix_safe_alloc(y->size, W->size2);
+ gsl_vector *Uty = gsl_vector_safe_alloc(y->size);
// read relatedness matrix G
if (!(cPar.file_kin).empty()) {
@@ -2864,7 +2860,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
// center matrix G
CenterMatrix(G);
- validate_K(G,cPar.mode_check,cPar.mode_strict);
+ validate_K(G);
} else {
cPar.ReadGenotypes(UtX, G, true);
}
@@ -2929,10 +2925,10 @@ void GEMMA::BatchRun(PARAM &cPar) {
// BSLMM-DAP
if (cPar.a_mode == 14 || cPar.a_mode == 15 || cPar.a_mode == 16) {
if (cPar.a_mode == 14) {
- gsl_vector *y = gsl_vector_alloc(cPar.ni_test);
- gsl_matrix *W = gsl_matrix_alloc(y->size, cPar.n_cvt);
- gsl_matrix *G = gsl_matrix_alloc(y->size, y->size);
- gsl_matrix *UtX = gsl_matrix_alloc(y->size, cPar.ns_test);
+ gsl_vector *y = gsl_vector_safe_alloc(cPar.ni_test);
+ gsl_matrix *W = gsl_matrix_safe_alloc(y->size, cPar.n_cvt);
+ gsl_matrix *G = gsl_matrix_safe_alloc(y->size, y->size);
+ gsl_matrix *UtX = gsl_matrix_safe_alloc(y->size, cPar.ns_test);
// set covariates matrix W and phenotype vector y
// an intercept should be included in W,
@@ -2956,10 +2952,10 @@ void GEMMA::BatchRun(PARAM &cPar) {
cBslmm.CopyToParam(cPar);
// else, if rho!=1
} else {
- gsl_matrix *U = gsl_matrix_alloc(y->size, y->size);
- gsl_vector *eval = gsl_vector_alloc(y->size);
- gsl_matrix *UtW = gsl_matrix_alloc(y->size, W->size2);
- gsl_vector *Uty = gsl_vector_alloc(y->size);
+ gsl_matrix *U = gsl_matrix_safe_alloc(y->size, y->size);
+ gsl_vector *eval = gsl_vector_safe_alloc(y->size);
+ gsl_matrix *UtW = gsl_matrix_safe_alloc(y->size, W->size2);
+ gsl_vector *Uty = gsl_vector_safe_alloc(y->size);
// read relatedness matrix G
if (!(cPar.file_kin).empty()) {
@@ -2975,7 +2971,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
// center matrix G
CenterMatrix(G);
- validate_K(G,cPar.mode_check,cPar.mode_strict);
+ validate_K(G);
} else {
cPar.ReadGenotypes(UtX, G, true);
@@ -3090,6 +3086,11 @@ void GEMMA::BatchRun(PARAM &cPar) {
return;
}
+#include "Eigen/Dense"
+#if defined(OPENBLAS) && !defined(OPENBLAS_LEGACY)
+#include <openblas_config.h>
+#endif
+
void GEMMA::WriteLog(int argc, char **argv, PARAM &cPar) {
string file_str;
file_str = cPar.path_out + "/" + cPar.file_out;
@@ -3102,9 +3103,21 @@ void GEMMA::WriteLog(int argc, char **argv, PARAM &cPar) {
}
outfile << "##" << endl;
- outfile << "## GEMMA Version = " << version << endl;
- outfile << "## GSL Version = " << GSL_VERSION << endl;
- outfile << "## Eigen Version = " << EIGEN_WORLD_VERSION << "." << EIGEN_MAJOR_VERSION << "." << EIGEN_MINOR_VERSION << endl;
+ outfile << "## GEMMA Version = " << version << endl;
+ outfile << "## GSL Version = " << GSL_VERSION << endl;
+ outfile << "## Eigen Version = " << EIGEN_WORLD_VERSION << "." << EIGEN_MAJOR_VERSION << "." << EIGEN_MINOR_VERSION << endl;
+#ifdef OPENBLAS
+
+ #ifndef OPENBLAS_LEGACY
+ outfile << "## OpenBlas =" << OPENBLAS_VERSION << " - " << openblas_get_config() << endl;
+ outfile << "## arch = " << openblas_get_corename() << endl;
+ outfile << "## threads = " << openblas_get_num_threads() << endl;
+ #else
+ outfile << "## OpenBlas = " << openblas_get_config() << endl;
+ #endif
+ string* pStr = new string[4] { "sequential", "threaded", "openmp" };
+ outfile << "## parallel type = " << pStr[openblas_get_parallel()] << endl;
+#endif
outfile << "##" << endl;
outfile << "## Command Line Input = ";
diff --git a/src/io.cpp b/src/io.cpp
index 1dc5642..35a59ee 100644
--- a/src/io.cpp
+++ b/src/io.cpp
@@ -41,6 +41,7 @@
#include "debug.h"
#include "eigenlib.h"
+#include "fastblas.h"
#include "gzstream.h"
#include "io.h"
#include "lapack.h"
@@ -49,43 +50,17 @@
using namespace std;
// Print progress bar.
-void ProgressBar(string str, double p, double total) {
- double progress = (100.0 * p / total);
- int barsize = (int)(progress / 2.0);
- char bar[51];
-
- cout << str;
- for (int i = 0; i < 50; i++) {
- if (i < barsize) {
- bar[i] = '=';
- } else {
- bar[i] = ' ';
- }
- cout << bar[i];
- }
- cout << setprecision(2) << fixed << progress << "%\r" << flush;
-
- return;
-}
-
-// Print progress bar with acceptance ratio.
void ProgressBar(string str, double p, double total, double ratio) {
- double progress = (100.0 * p / total);
- int barsize = (int)(progress / 2.0);
- char bar[51];
-
- cout << str;
- for (int i = 0; i < 50; i++) {
- if (i < barsize) {
- bar[i] = '=';
- } else {
- bar[i] = ' ';
- }
- cout << bar[i];
- }
- cout << setprecision(2) << fixed << progress << "% " << ratio << "\r"
- << flush;
- return;
+ assert(p<=total);
+ const double progress = (100.0 * p / total);
+ const uint barsize = (int)(progress / 2.0); // characters
+ cout << str << " ";
+ cout << std::string(barsize,'=');
+ cout << std::string(50-barsize,' ');
+ cout << setprecision(0) << fixed << " " << progress << "%";
+ if (ratio != -1.0)
+ cout << setprecision(2) << " " << ratio;
+ cout << "\r" << flush;
}
bool isBlankLine(char const *line) {
@@ -177,7 +152,7 @@ bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps) {
// Read header.
HEADER header;
- !safeGetline(infile, line).eof();
+ safeGetline(infile, line).eof();
ReadHeader_io(line, header);
if (header.rs_col == 0 && (header.chr_col == 0 || header.pos_col == 0)) {
@@ -233,7 +208,7 @@ bool ReadFile_log(const string &file_log, double &pheno_mean) {
size_t flag = 0;
while (getline(infile, line)) {
- ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
ch_ptr = strtok(NULL, " , \t");
if (ch_ptr != NULL && strcmp(ch_ptr, "estimated") == 0) {
@@ -241,7 +216,7 @@ bool ReadFile_log(const string &file_log, double &pheno_mean) {
if (ch_ptr != NULL && strcmp(ch_ptr, "mean") == 0) {
ch_ptr = strtok(NULL, " , \t");
if (ch_ptr != NULL && strcmp(ch_ptr, "=") == 0) {
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
pheno_mean = atof(ch_ptr);
flag = 1;
}
@@ -339,7 +314,7 @@ bool ReadFile_column(const string &file_pheno, vector<int> &indicator_idv,
string id;
double p;
while (!safeGetline(infile, line).eof()) {
- ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
for (int i = 0; i < (p_column - 1); ++i) {
ch_ptr = strtok(NULL, " , \t");
}
@@ -511,17 +486,17 @@ bool ReadFile_bim(const string &file_bim, vector<SNPINFO> &snpInfo) {
string minor;
while (getline(infile, line)) {
- ch_ptr = strtok((char *)line.c_str(), " \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " \t");
chr = ch_ptr;
- ch_ptr = strtok(NULL, " \t");
+ ch_ptr = strtok_safe(NULL, " \t");
rs = ch_ptr;
- ch_ptr = strtok(NULL, " \t");
+ ch_ptr = strtok_safe(NULL, " \t");
cM = atof(ch_ptr);
- ch_ptr = strtok(NULL, " \t");
+ ch_ptr = strtok_safe(NULL, " \t");
b_pos = atol(ch_ptr);
- ch_ptr = strtok(NULL, " \t");
+ ch_ptr = strtok_safe(NULL, " \t");
minor = ch_ptr;
- ch_ptr = strtok(NULL, " \t");
+ ch_ptr = strtok_safe(NULL, " \t");
major = ch_ptr;
SNPINFO sInfo = {chr, rs, cM, b_pos, minor, major, 0, -9, -9, 0, 0, 0};
@@ -567,12 +542,12 @@ bool ReadFile_fam(const string &file_fam, vector<vector<int>> &indicator_pheno,
}
while (!safeGetline(infile, line).eof()) {
- ch_ptr = strtok((char *)line.c_str(), " \t");
- ch_ptr = strtok(NULL, " \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " \t");
+ ch_ptr = strtok_safe(NULL, " \t");
id = ch_ptr;
- ch_ptr = strtok(NULL, " \t");
- ch_ptr = strtok(NULL, " \t");
- ch_ptr = strtok(NULL, " \t");
+ ch_ptr = strtok_safe(NULL, " \t");
+ ch_ptr = strtok_safe(NULL, " \t");
+ ch_ptr = strtok_safe(NULL, " \t");
ch_ptr = strtok(NULL, " \t");
size_t i = 0;
@@ -620,7 +595,7 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
const double &r2_level, map<string, string> &mapRS2chr,
map<string, long int> &mapRS2bp,
map<string, double> &mapRS2cM, vector<SNPINFO> &snpInfo,
- size_t &ns_test, bool debug) {
+ size_t &ns_test) {
debug_msg("entered");
indicator_snp.clear();
snpInfo.clear();
@@ -631,12 +606,12 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
return false;
}
- gsl_vector *genotype = gsl_vector_alloc(W->size1);
- gsl_vector *genotype_miss = gsl_vector_alloc(W->size1);
- gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
- gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
- gsl_vector *Wtx = gsl_vector_alloc(W->size2);
- gsl_vector *WtWiWtx = gsl_vector_alloc(W->size2);
+ gsl_vector *genotype = gsl_vector_safe_alloc(W->size1);
+ gsl_vector *genotype_miss = gsl_vector_safe_alloc(W->size1);
+ gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2);
+ gsl_matrix *WtWi = gsl_matrix_safe_alloc(W->size2, W->size2);
+ gsl_vector *Wtx = gsl_vector_safe_alloc(W->size2);
+ gsl_vector *WtWiWtx = gsl_vector_safe_alloc(W->size2);
gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
@@ -674,11 +649,11 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
file_pos = 0;
auto count_warnings = 0;
while (!safeGetline(infile, line).eof()) {
- ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
rs = ch_ptr;
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
minor = ch_ptr;
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
major = ch_ptr;
if (setSnps.size() != 0 && setSnps.count(rs) == 0) {
@@ -693,7 +668,7 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
}
if (mapRS2bp.count(rs) == 0) {
- if (debug && count_warnings++ < 10) {
+ if (is_debug_mode() && count_warnings++ < 10) {
std::string msg = "Can't figure out position for ";
msg += rs;
debug_msg(msg);
@@ -719,7 +694,7 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
c_idv = 0;
gsl_vector_set_zero(genotype_miss);
for (int i = 0; i < ni_total; ++i) {
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
if (indicator_idv[i] == 0)
continue;
@@ -842,12 +817,12 @@ bool ReadFile_bed(const string &file_bed, const set<string> &setSnps,
return false;
}
- gsl_vector *genotype = gsl_vector_alloc(W->size1);
- gsl_vector *genotype_miss = gsl_vector_alloc(W->size1);
- gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
- gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
- gsl_vector *Wtx = gsl_vector_alloc(W->size2);
- gsl_vector *WtWiWtx = gsl_vector_alloc(W->size2);
+ gsl_vector *genotype = gsl_vector_safe_alloc(W->size1);
+ gsl_vector *genotype_miss = gsl_vector_safe_alloc(W->size1);
+ gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2);
+ gsl_matrix *WtWi = gsl_matrix_safe_alloc(W->size2, W->size2);
+ gsl_vector *Wtx = gsl_vector_safe_alloc(W->size2);
+ gsl_vector *WtWiWtx = gsl_vector_safe_alloc(W->size2);
gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
@@ -1029,13 +1004,13 @@ bool Bimbam_ReadOneSNP(const size_t inc, const vector<int> &indicator_idv,
bool flag = false;
for (size_t i = 0; i < inc; i++) {
- !safeGetline(infile, line).eof();
+ safeGetline(infile, line).eof();
}
if (!safeGetline(infile, line).eof()) {
- ch_ptr = strtok((char *)line.c_str(), " , \t");
- ch_ptr = strtok(NULL, " , \t");
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
geno_mean = 0.0;
double d;
@@ -1043,7 +1018,7 @@ bool Bimbam_ReadOneSNP(const size_t inc, const vector<int> &indicator_idv,
vector<size_t> geno_miss;
for (size_t i = 0; i < ni_total; ++i) {
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
if (indicator_idv[i] == 0) {
continue;
}
@@ -1159,9 +1134,7 @@ void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv,
size_t i_test = 0, i_total = 0, j_test = 0, j_total = 0;
while (getline(infile, line)) {
if (i_total == ni_total) {
- cout << "error! number of rows in the kinship "
- << "file is larger than the number of phentypes." << endl;
- error = true;
+ fail_msg("number of rows in the kinship file is larger than the number of phentypes");
}
if (indicator_idv[i_total] == 0) {
@@ -1174,10 +1147,7 @@ void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv,
ch_ptr = strtok((char *)line.c_str(), " , \t");
while (ch_ptr != NULL) {
if (j_total == ni_total) {
- cout << "error! number of columns in the "
- << "kinship file is larger than the number"
- << " of phenotypes for row = " << i_total << endl;
- error = true;
+ fail_msg(string("number of columns in the kinship file is larger than the number of individuals for row = ")+to_string(i_total));
}
d = atof(ch_ptr);
@@ -1190,18 +1160,14 @@ void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv,
ch_ptr = strtok(NULL, " , \t");
}
if (j_total != ni_total) {
- cout << "error! number of columns in the kinship "
- << "file do not match the number of phentypes for "
- << "row = " << i_total << endl;
- error = true;
+ string msg = "number of columns in the kinship file does not match the number of individuals for row = " + to_string( i_total );
+ fail_msg(msg);
}
i_total++;
i_test++;
}
if (i_total != ni_total) {
- cout << "error! number of rows in the kinship file do "
- << "not match the number of phenotypes." << endl;
- error = true;
+ fail_msg("number of rows in the kinship file does not match the number of individuals.");
}
} else {
map<size_t, size_t> mapID2ID;
@@ -1218,11 +1184,11 @@ void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv,
size_t n_id1, n_id2;
while (getline(infile, line)) {
- ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
id1 = ch_ptr;
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
id2 = ch_ptr;
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
d = atof(ch_ptr);
if (mapID2num.count(id1) == 0 || mapID2num.count(id2) == 0) {
continue;
@@ -1237,9 +1203,10 @@ void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv,
Cov_d = gsl_matrix_get(G, n_id1, n_id2);
if (Cov_d != 0 && Cov_d != d) {
- cout << "error! redundant and unequal terms in the "
+ cerr << "error! redundant and unequal terms in the "
<< "kinship file, for id1 = " << id1 << " and id2 = " << id2
<< endl;
+ fail_msg("");
} else {
gsl_matrix_set(G, n_id1, n_id2, d);
gsl_matrix_set(G, n_id2, n_id1, d);
@@ -1278,7 +1245,6 @@ void ReadFile_mk(const string &file_mk, vector<int> &indicator_idv,
infile.close();
infile.clear();
- return;
}
void ReadFile_eigenU(const string &file_ku, bool &error, gsl_matrix *U) {
@@ -1354,7 +1320,7 @@ void ReadFile_eigenD(const string &file_kd, bool &error, gsl_vector *eval) {
error = true;
}
- ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
d = atof(ch_ptr);
ch_ptr = strtok(NULL, " , \t");
@@ -1391,12 +1357,12 @@ bool BimbamKin(const string file_geno, const set<string> ksnps,
bool process_ksnps = ksnps.size();
size_t ni_total = matrix_kin->size1;
- gsl_vector *geno = gsl_vector_alloc(ni_total);
- gsl_vector *geno_miss = gsl_vector_alloc(ni_total);
+ gsl_vector *geno = gsl_vector_safe_alloc(ni_total);
+ gsl_vector *geno_miss = gsl_vector_safe_alloc(ni_total);
// Xlarge contains inds x markers
const size_t msize = K_BATCH_SIZE;
- gsl_matrix *Xlarge = gsl_matrix_alloc(ni_total, msize);
+ gsl_matrix *Xlarge = gsl_matrix_safe_alloc(ni_total, msize);
enforce_msg(Xlarge, "allocate Xlarge");
gsl_matrix_set_zero(Xlarge);
@@ -1405,9 +1371,9 @@ bool BimbamKin(const string file_geno, const set<string> ksnps,
size_t ns_test = 0;
for (size_t t = 0; t < indicator_snp.size(); ++t) {
string line;
- !safeGetline(infile, line).eof();
+ safeGetline(infile, line).eof();
if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
- ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1);
+ ProgressBar("Reading SNPs", t, indicator_snp.size() - 1);
}
if (indicator_snp[t] == 0)
continue;
@@ -1480,12 +1446,12 @@ bool BimbamKin(const string file_geno, const set<string> ksnps,
// compute kinship matrix and return in matrix_kin a SNP at a time
if (ns_test % msize == 0) {
- eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+ fast_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
gsl_matrix_set_zero(Xlarge);
}
}
if (ns_test % msize != 0) {
- eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+ fast_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
}
cout << endl;
@@ -1531,14 +1497,14 @@ bool PlinkKin(const string &file_bed, vector<int> &indicator_snp,
double d, geno_mean, geno_var;
size_t ni_total = matrix_kin->size1;
- gsl_vector *geno = gsl_vector_alloc(ni_total);
+ gsl_vector *geno = gsl_vector_safe_alloc(ni_total);
size_t ns_test = 0;
int n_bit;
// Create a large matrix.
const size_t msize = K_BATCH_SIZE;
- gsl_matrix *Xlarge = gsl_matrix_alloc(ni_total, msize);
+ gsl_matrix *Xlarge = gsl_matrix_safe_alloc(ni_total, msize);
gsl_matrix_set_zero(Xlarge);
// Calculate n_bit and c, the number of bit for each snp.
@@ -1556,7 +1522,7 @@ bool PlinkKin(const string &file_bed, vector<int> &indicator_snp,
for (size_t t = 0; t < indicator_snp.size(); ++t) {
if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
- ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1);
+ ProgressBar("Reading SNPs", t, indicator_snp.size() - 1);
}
if (indicator_snp[t] == 0) {
continue;
@@ -1626,13 +1592,13 @@ bool PlinkKin(const string &file_bed, vector<int> &indicator_snp,
ns_test++;
if (ns_test % msize == 0) {
- eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+ fast_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
gsl_matrix_set_zero(Xlarge);
}
}
if (ns_test % msize != 0) {
- eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+ fast_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
}
cout << endl;
@@ -1659,7 +1625,7 @@ bool PlinkKin(const string &file_bed, vector<int> &indicator_snp,
// genotype and calculate K.
bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv,
vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K,
- const bool calc_K, bool debug) {
+ const bool calc_K) {
debug_msg("entered");
igzstream infile(file_geno.c_str(), igzstream::in);
if (!infile) {
@@ -1674,8 +1640,8 @@ bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv,
gsl_matrix_set_zero(K);
}
- gsl_vector *genotype = gsl_vector_alloc(UtX->size1);
- gsl_vector *genotype_miss = gsl_vector_alloc(UtX->size1);
+ gsl_vector *genotype = gsl_vector_safe_alloc(UtX->size1);
+ gsl_vector *genotype_miss = gsl_vector_safe_alloc(UtX->size1);
double geno, geno_mean;
size_t n_miss;
@@ -1687,21 +1653,21 @@ bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv,
int c_idv = 0, c_snp = 0;
for (int i = 0; i < ns_total; ++i) {
- !safeGetline(infile, line).eof();
+ safeGetline(infile, line).eof();
if (indicator_snp[i] == 0) {
continue;
}
- ch_ptr = strtok((char *)line.c_str(), " , \t");
- ch_ptr = strtok(NULL, " , \t");
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
c_idv = 0;
geno_mean = 0;
n_miss = 0;
gsl_vector_set_zero(genotype_miss);
for (int j = 0; j < ni_total; ++j) {
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
if (indicator_idv[j] == 0) {
continue;
}
@@ -1764,7 +1730,7 @@ bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
vector<int> &indicator_snp,
vector<vector<unsigned char>> &Xt, gsl_matrix *K,
const bool calc_K, const size_t ni_test,
- const size_t ns_test, bool debug) {
+ const size_t ns_test) {
debug_msg("entered");
igzstream infile(file_geno.c_str(), igzstream::in);
if (!infile) {
@@ -1785,8 +1751,8 @@ bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
gsl_matrix_set_zero(K);
}
- gsl_vector *genotype = gsl_vector_alloc(ni_test);
- gsl_vector *genotype_miss = gsl_vector_alloc(ni_test);
+ gsl_vector *genotype = gsl_vector_safe_alloc(ni_test);
+ gsl_vector *genotype_miss = gsl_vector_safe_alloc(ni_test);
double geno, geno_mean;
size_t n_miss;
@@ -1796,21 +1762,21 @@ bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
size_t c_idv = 0, c_snp = 0;
for (size_t i = 0; i < ns_total; ++i) {
- !safeGetline(infile, line).eof();
+ safeGetline(infile, line).eof();
if (indicator_snp[i] == 0) {
continue;
}
- ch_ptr = strtok((char *)line.c_str(), " , \t");
- ch_ptr = strtok(NULL, " , \t");
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
c_idv = 0;
geno_mean = 0;
n_miss = 0;
gsl_vector_set_zero(genotype_miss);
for (uint j = 0; j < ni_total; ++j) {
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
if (indicator_idv[j] == 0) {
continue;
}
@@ -1904,7 +1870,7 @@ bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
gsl_matrix_set_zero(K);
}
- gsl_vector *genotype = gsl_vector_alloc(UtX->size1);
+ gsl_vector *genotype = gsl_vector_safe_alloc(UtX->size1);
double geno, geno_mean;
size_t n_miss;
@@ -2040,7 +2006,7 @@ bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
gsl_matrix_set_zero(K);
}
- gsl_vector *genotype = gsl_vector_alloc(ni_test);
+ gsl_vector *genotype = gsl_vector_safe_alloc(ni_test);
double geno, geno_mean;
size_t n_miss;
@@ -2160,7 +2126,7 @@ bool ReadFile_est(const string &file_est, const vector<size_t> &est_column,
size_t n = *max_element(est_column.begin(), est_column.end());
while (getline(infile, line)) {
- ch_ptr = strtok((char *)line.c_str(), " \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " \t");
alpha = 0.0;
beta = 0.0;
@@ -2179,7 +2145,7 @@ bool ReadFile_est(const string &file_est, const vector<size_t> &est_column,
gamma = atof(ch_ptr);
}
if (i < n) {
- ch_ptr = strtok(NULL, " \t");
+ ch_ptr = strtok_safe(NULL, " \t");
}
}
@@ -2237,7 +2203,7 @@ bool ReadFile_gene(const string &file_gene, vector<double> &vec_read,
getline(infile, line);
while (getline(infile, line)) {
- ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
rs = ch_ptr;
ch_ptr = strtok(NULL, " , \t");
@@ -2274,759 +2240,6 @@ bool ReadFile_gene(const string &file_gene, vector<double> &vec_read,
return true;
}
-// WJA Added
-// Read Oxford sample file.
-bool ReadFile_sample(const string &file_sample,
- vector<vector<int>> &indicator_pheno,
- vector<vector<double>> &pheno,
- const vector<size_t> &p_column, vector<int> &indicator_cvt,
- vector<vector<double>> &cvt, size_t &n_cvt) {
- debug_msg("entered");
- indicator_pheno.clear();
- pheno.clear();
- indicator_cvt.clear();
-
- igzstream infile(file_sample.c_str(), igzstream::in);
-
- if (!infile) {
- cout << "error! fail to open sample file: " << file_sample << endl;
- return false;
- }
-
- string line;
- char *ch_ptr;
-
- string id;
- double p, d;
-
- vector<double> pheno_row;
- vector<int> ind_pheno_row;
- int flag_na = 0;
-
- size_t num_cols = 0;
- size_t num_p_in_file = 0;
- size_t num_cvt_in_file = 0;
-
- map<size_t, size_t> mapP2c;
- for (size_t i = 0; i < p_column.size(); i++) {
- mapP2c[p_column[i]] = i;
- pheno_row.push_back(-9);
- ind_pheno_row.push_back(0);
- }
-
- // Read header line1.
- if (!safeGetline(infile, line).eof()) {
- ch_ptr = strtok((char *)line.c_str(), " \t");
- if (strcmp(ch_ptr, "ID_1") != 0) {
- return false;
- }
- ch_ptr = strtok(NULL, " \t");
- if (strcmp(ch_ptr, "ID_2") != 0) {
- return false;
- }
- ch_ptr = strtok(NULL, " \t");
- if (strcmp(ch_ptr, "missing") != 0) {
- return false;
- }
- while (ch_ptr != NULL) {
- num_cols++;
- ch_ptr = strtok(NULL, " \t");
- }
- num_cols--;
- }
-
- vector<map<uint32_t, size_t>> cvt_factor_levels;
-
- char col_type[num_cols];
-
- // Read header line2.
- if (!safeGetline(infile, line).eof()) {
- ch_ptr = strtok((char *)line.c_str(), " \t");
- if (strcmp(ch_ptr, "0") != 0) {
- return false;
- }
- ch_ptr = strtok(NULL, " \t");
- if (strcmp(ch_ptr, "0") != 0) {
- return false;
- }
- ch_ptr = strtok(NULL, " \t");
- if (strcmp(ch_ptr, "0") != 0) {
- return false;
- }
- size_t it = 0;
- ch_ptr = strtok(NULL, " \t");
- if (ch_ptr != NULL)
- while (ch_ptr != NULL) {
- col_type[it++] = ch_ptr[0];
- if (ch_ptr[0] == 'D') {
- cvt_factor_levels.push_back(map<uint32_t, size_t>());
- num_cvt_in_file++;
- }
- if (ch_ptr[0] == 'C') {
- num_cvt_in_file++;
- }
- if ((ch_ptr[0] == 'P') || (ch_ptr[0] == 'B')) {
- num_p_in_file++;
- }
- ch_ptr = strtok(NULL, " \t");
- }
- }
-
- while (!safeGetline(infile, line).eof()) {
-
- ch_ptr = strtok((char *)line.c_str(), " \t");
-
- for (int it = 0; it < 3; it++) {
- ch_ptr = strtok(NULL, " \t");
- }
-
- size_t i = 0;
- size_t p_i = 0;
- size_t fac_cvt_i = 0;
-
- while (i < num_cols) {
-
- if ((col_type[i] == 'P') || (col_type[i] == 'B')) {
- if (mapP2c.count(p_i + 1) != 0) {
- if (strcmp(ch_ptr, "NA") == 0) {
- ind_pheno_row[mapP2c[p_i + 1]] = 0;
- pheno_row[mapP2c[p_i + 1]] = -9;
- } else {
- p = atof(ch_ptr);
- ind_pheno_row[mapP2c[p_i + 1]] = 1;
- pheno_row[mapP2c[p_i + 1]] = p;
- }
- }
- p_i++;
- }
- if (col_type[i] == 'D') {
-
- // NOTE THIS DOES NOT CHECK TO BE SURE LEVEL
- // IS INTEGRAL i.e for atoi error.
- if (strcmp(ch_ptr, "NA") != 0) {
- uint32_t level = atoi(ch_ptr);
- if (cvt_factor_levels[fac_cvt_i].count(level) == 0) {
- cvt_factor_levels[fac_cvt_i][level] =
- cvt_factor_levels[fac_cvt_i].size();
- }
- }
- fac_cvt_i++;
- }
-
- ch_ptr = strtok(NULL, " \t");
- i++;
- }
-
- indicator_pheno.push_back(ind_pheno_row);
- pheno.push_back(pheno_row);
- }
-
- // Close and reopen the file.
- infile.close();
- infile.clear();
-
- if (num_cvt_in_file > 0) {
- igzstream infile2(file_sample.c_str(), igzstream::in);
-
- if (!infile2) {
- cout << "error! fail to open sample file: " << file_sample << endl;
- return false;
- }
-
- // Skip header.
- safeGetline(infile2, line);
- safeGetline(infile2, line);
-
- // Pull in the covariates now we now the number of
- // factor levels.
- while (!safeGetline(infile2, line).eof()) {
-
- vector<double> v_d;
- flag_na = 0;
- ch_ptr = strtok((char *)line.c_str(), " \t");
-
- for (int it = 0; it < 3; it++) {
- ch_ptr = strtok(NULL, " \t");
- }
-
- size_t i = 0;
- size_t fac_cvt_i = 0;
- size_t num_fac_levels;
- while (i < num_cols) {
-
- if (col_type[i] == 'C') {
- if (strcmp(ch_ptr, "NA") == 0) {
- flag_na = 1;
- d = -9;
- } else {
- d = atof(ch_ptr);
- }
-
- v_d.push_back(d);
- }
-
- if (col_type[i] == 'D') {
-
- // NOTE THIS DOES NOT CHECK TO BE SURE
- // LEVEL IS INTEGRAL i.e for atoi error.
- num_fac_levels = cvt_factor_levels[fac_cvt_i].size();
- if (num_fac_levels > 1) {
- if (strcmp(ch_ptr, "NA") == 0) {
- flag_na = 1;
- for (size_t it = 0; it < num_fac_levels - 1; it++) {
- v_d.push_back(-9);
- }
- } else {
- uint32_t level = atoi(ch_ptr);
- for (size_t it = 0; it < num_fac_levels - 1; it++) {
- cvt_factor_levels[fac_cvt_i][level] == it + 1
- ? v_d.push_back(1.0)
- : v_d.push_back(0.0);
- }
- }
- }
- fac_cvt_i++;
- }
-
- ch_ptr = strtok(NULL, " \t");
- i++;
- }
-
- if (flag_na == 0) {
- indicator_cvt.push_back(1);
- } else {
- indicator_cvt.push_back(0);
- }
- cvt.push_back(v_d);
- }
-
- if (indicator_cvt.empty()) {
- n_cvt = 0;
- } else {
- flag_na = 0;
- for (vector<int>::size_type i = 0; i < indicator_cvt.size(); ++i) {
- if (indicator_cvt[i] == 0) {
- continue;
- }
-
- if (flag_na == 0) {
- flag_na = 1;
- n_cvt = cvt[i].size();
- }
- if (flag_na != 0 && n_cvt != cvt[i].size()) {
- cout << "error! number of covariates in row " << i
- << " do not match other rows." << endl;
- return false;
- }
- }
- }
-
- infile2.close();
- infile2.clear();
- }
- return true;
-}
-
-// WJA Added.
-// Read bgen file, the first time.
-bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps,
- const gsl_matrix *W, vector<int> &indicator_idv,
- vector<int> &indicator_snp, vector<SNPINFO> &snpInfo,
- const double &maf_level, const double &miss_level,
- const double &hwe_level, const double &r2_level,
- size_t &ns_test) {
-
- debug_msg("entered");
- indicator_snp.clear();
-
- ifstream infile(file_bgen.c_str(), ios::binary);
- if (!infile) {
- cout << "error reading bgen file:" << file_bgen << endl;
- return false;
- }
-
- gsl_vector *genotype = gsl_vector_alloc(W->size1);
- gsl_vector *genotype_miss = gsl_vector_alloc(W->size1);
- gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
- gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
- gsl_vector *Wtx = gsl_vector_alloc(W->size2);
- gsl_vector *WtWiWtx = gsl_vector_alloc(W->size2);
- gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
-
- gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
- int sig;
- LUDecomp(WtW, pmt, &sig);
- LUInvert(WtW, pmt, WtWi);
-
- // Read in header.
- uint32_t bgen_snp_block_offset;
- uint32_t bgen_header_length;
- uint32_t bgen_nsamples;
- uint32_t bgen_nsnps;
- uint32_t bgen_flags;
- infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4);
- infile.read(reinterpret_cast<char *>(&bgen_header_length), 4);
- bgen_snp_block_offset -= 4;
- infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4);
- bgen_snp_block_offset -= 4;
- infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4);
- bgen_snp_block_offset -= 4;
- infile.ignore(4 + bgen_header_length - 20);
- bgen_snp_block_offset -= 4 + bgen_header_length - 20;
- infile.read(reinterpret_cast<char *>(&bgen_flags), 4);
- bgen_snp_block_offset -= 4;
- bool CompressedSNPBlocks = bgen_flags & 0x1;
- bool LongIds = bgen_flags & 0x4;
-
- if (!LongIds) {
- return false;
- }
-
- infile.ignore(bgen_snp_block_offset);
-
- ns_test = 0;
-
- size_t ns_total = static_cast<size_t>(bgen_nsnps);
-
- snpInfo.clear();
- string rs;
- long int b_pos;
- string chr;
- string major;
- string minor;
- string id;
-
- double v_x, v_w;
- int c_idv = 0;
-
- double maf, geno, geno_old;
- size_t n_miss;
- size_t n_0, n_1, n_2;
- int flag_poly;
-
- double bgen_geno_prob_AA, bgen_geno_prob_AB;
- double bgen_geno_prob_BB, bgen_geno_prob_non_miss;
-
- // Total number of samples in phenotype file.
- size_t ni_total = indicator_idv.size();
-
- // Number of samples to use in test.
- size_t ni_test = 0;
-
- uint32_t bgen_N;
- uint16_t bgen_LS;
- uint16_t bgen_LR;
- uint16_t bgen_LC;
- uint32_t bgen_SNP_pos;
- uint32_t bgen_LA;
- std::string bgen_A_allele;
- uint32_t bgen_LB;
- std::string bgen_B_allele;
- uint32_t bgen_P;
- size_t unzipped_data_size;
-
- for (size_t i = 0; i < ni_total; ++i) {
- ni_test += indicator_idv[i];
- }
-
- for (size_t t = 0; t < ns_total; ++t) {
-
- id.clear();
- rs.clear();
- chr.clear();
- bgen_A_allele.clear();
- bgen_B_allele.clear();
-
- infile.read(reinterpret_cast<char *>(&bgen_N), 4);
- infile.read(reinterpret_cast<char *>(&bgen_LS), 2);
-
- id.resize(bgen_LS);
- infile.read(&id[0], bgen_LS);
-
- infile.read(reinterpret_cast<char *>(&bgen_LR), 2);
- rs.resize(bgen_LR);
- infile.read(&rs[0], bgen_LR);
-
- infile.read(reinterpret_cast<char *>(&bgen_LC), 2);
- chr.resize(bgen_LC);
- infile.read(&chr[0], bgen_LC);
-
- infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4);
-
- infile.read(reinterpret_cast<char *>(&bgen_LA), 4);
- bgen_A_allele.resize(bgen_LA);
- infile.read(&bgen_A_allele[0], bgen_LA);
-
- infile.read(reinterpret_cast<char *>(&bgen_LB), 4);
- bgen_B_allele.resize(bgen_LB);
- infile.read(&bgen_B_allele[0], bgen_LB);
-
- // Should we switch according to MAF?
- minor = bgen_B_allele;
- major = bgen_A_allele;
- b_pos = static_cast<long int>(bgen_SNP_pos);
-
- uint16_t unzipped_data[3 * bgen_N];
-
- if (setSnps.size() != 0 && setSnps.count(rs) == 0) {
- SNPINFO sInfo = {
- "-9", rs, -9, -9, minor, major, static_cast<size_t>(-9),
- -9, (long int)-9};
-
- snpInfo.push_back(sInfo);
- indicator_snp.push_back(0);
- if (CompressedSNPBlocks)
- infile.read(reinterpret_cast<char *>(&bgen_P), 4);
- else
- bgen_P = 6 * bgen_N;
-
- infile.ignore(static_cast<size_t>(bgen_P));
-
- continue;
- }
-
- if (CompressedSNPBlocks) {
- infile.read(reinterpret_cast<char *>(&bgen_P), 4);
- uint8_t zipped_data[bgen_P];
-
- unzipped_data_size = 6 * bgen_N;
-
- infile.read(reinterpret_cast<char *>(zipped_data), bgen_P);
- int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data),
- reinterpret_cast<uLongf *>(&unzipped_data_size),
- reinterpret_cast<Bytef *>(zipped_data),
- static_cast<uLong>(bgen_P));
- assert(result == Z_OK);
-
- } else {
- bgen_P = 6 * bgen_N;
- infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P);
- }
-
- maf = 0;
- n_miss = 0;
- flag_poly = 0;
- geno_old = -9;
- n_0 = 0;
- n_1 = 0;
- n_2 = 0;
- c_idv = 0;
- gsl_vector_set_zero(genotype_miss);
- for (size_t i = 0; i < bgen_N; ++i) {
-
- // CHECK this set correctly!
- if (indicator_idv[i] == 0) {
- continue;
- }
-
- bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0;
- bgen_geno_prob_AB =
- static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0;
- bgen_geno_prob_BB =
- static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0;
- bgen_geno_prob_non_miss =
- bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB;
-
- // CHECK 0.1 OK.
- if (bgen_geno_prob_non_miss < 0.9) {
- gsl_vector_set(genotype_miss, c_idv, 1);
- n_miss++;
- c_idv++;
- continue;
- }
-
- bgen_geno_prob_AA /= bgen_geno_prob_non_miss;
- bgen_geno_prob_AB /= bgen_geno_prob_non_miss;
- bgen_geno_prob_BB /= bgen_geno_prob_non_miss;
-
- geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB;
- if (geno >= 0 && geno <= 0.5) {
- n_0++;
- }
- if (geno > 0.5 && geno < 1.5) {
- n_1++;
- }
- if (geno >= 1.5 && geno <= 2.0) {
- n_2++;
- }
-
- gsl_vector_set(genotype, c_idv, geno);
-
- // CHECK WHAT THIS DOES.
- if (flag_poly == 0) {
- geno_old = geno;
- flag_poly = 2;
- }
- if (flag_poly == 2 && geno != geno_old) {
- flag_poly = 1;
- }
-
- maf += geno;
-
- c_idv++;
- }
-
- maf /= 2.0 * static_cast<double>(ni_test - n_miss);
-
- SNPINFO sInfo = {chr, rs, -9, b_pos,
- minor, major, n_miss, (double)n_miss / (double)ni_test,
- maf};
- snpInfo.push_back(sInfo);
-
- if ((double)n_miss / (double)ni_test > miss_level) {
- indicator_snp.push_back(0);
- continue;
- }
-
- if ((maf < maf_level || maf > (1.0 - maf_level)) && maf_level != -1) {
- indicator_snp.push_back(0);
- continue;
- }
-
- if (flag_poly != 1) {
- indicator_snp.push_back(0);
- continue;
- }
-
- if (hwe_level != 0 && maf_level != -1) {
- if (CalcHWE(n_0, n_2, n_1) < hwe_level) {
- indicator_snp.push_back(0);
- continue;
- }
- }
-
- // Filter SNP if it is correlated with W
- // unless W has only one column, of 1s.
- for (size_t i = 0; i < genotype->size; ++i) {
- if (gsl_vector_get(genotype_miss, i) == 1) {
- geno = maf * 2.0;
- gsl_vector_set(genotype, i, geno);
- }
- }
-
- gsl_blas_dgemv(CblasTrans, 1.0, W, genotype, 0.0, Wtx);
- gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
- gsl_blas_ddot(genotype, genotype, &v_x);
- gsl_blas_ddot(Wtx, WtWiWtx, &v_w);
-
- if (W->size2 != 1 && v_w / v_x >= r2_level) {
- indicator_snp.push_back(0);
- continue;
- }
-
- indicator_snp.push_back(1);
- ns_test++;
- }
-
- return true;
-}
-
-// Read oxford genotype file and calculate kinship matrix.
-bool bgenKin(const string &file_oxford, vector<int> &indicator_snp,
- const int k_mode, const int display_pace, gsl_matrix *matrix_kin) {
- debug_msg("entered");
- string file_bgen = file_oxford;
- ifstream infile(file_bgen.c_str(), ios::binary);
- if (!infile) {
- cout << "error reading bgen file:" << file_bgen << endl;
- return false;
- }
-
- // Read in header.
- uint32_t bgen_snp_block_offset;
- uint32_t bgen_header_length;
- uint32_t bgen_nsamples;
- uint32_t bgen_nsnps;
- uint32_t bgen_flags;
- infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4);
- infile.read(reinterpret_cast<char *>(&bgen_header_length), 4);
- bgen_snp_block_offset -= 4;
- infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4);
- bgen_snp_block_offset -= 4;
- infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4);
- bgen_snp_block_offset -= 4;
- infile.ignore(4 + bgen_header_length - 20);
- bgen_snp_block_offset -= 4 + bgen_header_length - 20;
- infile.read(reinterpret_cast<char *>(&bgen_flags), 4);
- bgen_snp_block_offset -= 4;
- bool CompressedSNPBlocks = bgen_flags & 0x1;
-
- infile.ignore(bgen_snp_block_offset);
-
- double bgen_geno_prob_AA, bgen_geno_prob_AB;
- double bgen_geno_prob_BB, bgen_geno_prob_non_miss;
-
- uint32_t bgen_N;
- uint16_t bgen_LS;
- uint16_t bgen_LR;
- uint16_t bgen_LC;
- uint32_t bgen_SNP_pos;
- uint32_t bgen_LA;
- std::string bgen_A_allele;
- uint32_t bgen_LB;
- std::string bgen_B_allele;
- uint32_t bgen_P;
- size_t unzipped_data_size;
- string id;
- string rs;
- string chr;
- double genotype;
-
- size_t n_miss;
- double d, geno_mean, geno_var;
-
- size_t ni_total = matrix_kin->size1;
- gsl_vector *geno = gsl_vector_alloc(ni_total);
- gsl_vector *geno_miss = gsl_vector_alloc(ni_total);
-
- size_t ns_test = 0;
- for (size_t t = 0; t < indicator_snp.size(); ++t) {
-
- if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
- ProgressBar("Reading bgen SNPs ", t, indicator_snp.size() - 1);
- }
-
- id.clear();
- rs.clear();
- chr.clear();
- bgen_A_allele.clear();
- bgen_B_allele.clear();
-
- infile.read(reinterpret_cast<char *>(&bgen_N), 4);
- infile.read(reinterpret_cast<char *>(&bgen_LS), 2);
-
- id.resize(bgen_LS);
- infile.read(&id[0], bgen_LS);
-
- infile.read(reinterpret_cast<char *>(&bgen_LR), 2);
- rs.resize(bgen_LR);
- infile.read(&rs[0], bgen_LR);
-
- infile.read(reinterpret_cast<char *>(&bgen_LC), 2);
- chr.resize(bgen_LC);
- infile.read(&chr[0], bgen_LC);
-
- infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4);
-
- infile.read(reinterpret_cast<char *>(&bgen_LA), 4);
- bgen_A_allele.resize(bgen_LA);
- infile.read(&bgen_A_allele[0], bgen_LA);
-
- infile.read(reinterpret_cast<char *>(&bgen_LB), 4);
- bgen_B_allele.resize(bgen_LB);
- infile.read(&bgen_B_allele[0], bgen_LB);
-
- uint16_t unzipped_data[3 * bgen_N];
-
- if (indicator_snp[t] == 0) {
- if (CompressedSNPBlocks)
- infile.read(reinterpret_cast<char *>(&bgen_P), 4);
- else
- bgen_P = 6 * bgen_N;
-
- infile.ignore(static_cast<size_t>(bgen_P));
-
- continue;
- }
-
- if (CompressedSNPBlocks) {
- infile.read(reinterpret_cast<char *>(&bgen_P), 4);
- uint8_t zipped_data[bgen_P];
-
- unzipped_data_size = 6 * bgen_N;
-
- infile.read(reinterpret_cast<char *>(zipped_data), bgen_P);
-
- int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data),
- reinterpret_cast<uLongf *>(&unzipped_data_size),
- reinterpret_cast<Bytef *>(zipped_data),
- static_cast<uLong>(bgen_P));
- assert(result == Z_OK);
-
- } else {
-
- bgen_P = 6 * bgen_N;
- infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P);
- }
-
- geno_mean = 0.0;
- n_miss = 0;
- geno_var = 0.0;
- gsl_vector_set_all(geno_miss, 0);
-
- for (size_t i = 0; i < bgen_N; ++i) {
-
- bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0;
- bgen_geno_prob_AB =
- static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0;
- bgen_geno_prob_BB =
- static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0;
- // WJA
- bgen_geno_prob_non_miss =
- bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB;
- if (bgen_geno_prob_non_miss < 0.9) {
- gsl_vector_set(geno_miss, i, 0.0);
- n_miss++;
- } else {
-
- bgen_geno_prob_AA /= bgen_geno_prob_non_miss;
- bgen_geno_prob_AB /= bgen_geno_prob_non_miss;
- bgen_geno_prob_BB /= bgen_geno_prob_non_miss;
-
- genotype = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB;
-
- gsl_vector_set(geno, i, genotype);
- gsl_vector_set(geno_miss, i, 1.0);
- geno_mean += genotype;
- geno_var += genotype * genotype;
- }
- }
-
- geno_mean /= (double)(ni_total - n_miss);
- geno_var += geno_mean * geno_mean * (double)n_miss;
- geno_var /= (double)ni_total;
- geno_var -= geno_mean * geno_mean;
-
- for (size_t i = 0; i < ni_total; ++i) {
- if (gsl_vector_get(geno_miss, i) == 0) {
- gsl_vector_set(geno, i, geno_mean);
- }
- }
-
- gsl_vector_add_constant(geno, -1.0 * geno_mean);
-
- if (geno_var != 0) {
- if (k_mode == 1) {
- gsl_blas_dsyr(CblasUpper, 1.0, geno, matrix_kin);
- } else if (k_mode == 2) {
- gsl_blas_dsyr(CblasUpper, 1.0 / geno_var, geno, matrix_kin);
- } else {
- cout << "Unknown kinship mode." << endl;
- }
- }
-
- ns_test++;
- }
- cout << endl;
-
- gsl_matrix_scale(matrix_kin, 1.0 / (double)ns_test);
-
- for (size_t i = 0; i < ni_total; ++i) {
- for (size_t j = 0; j < i; ++j) {
- d = gsl_matrix_get(matrix_kin, j, i);
- gsl_matrix_set(matrix_kin, i, j, d);
- }
- }
-
- gsl_vector_free(geno);
- gsl_vector_free(geno_miss);
-
- infile.close();
- infile.clear();
-
- return true;
-}
-
// Read header to determine which column contains which item.
bool ReadHeader_io(const string &line, HEADER &header) {
debug_msg("entered");
@@ -3314,7 +2527,7 @@ bool ReadFile_cat(const string &file_cat, map<string, size_t> &mapRS2cat,
// Read header.
HEADER header;
- !safeGetline(infile, line).eof();
+ safeGetline(infile, line).eof();
ReadHeader_io(line, header);
// Use the header to count the number of categories.
@@ -3340,10 +2553,11 @@ bool ReadFile_cat(const string &file_cat, map<string, size_t> &mapRS2cat,
// Read the following lines to record mapRS2cat.
while (!safeGetline(infile, line).eof()) {
- ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
i_cat = 0;
for (size_t i = 0; i < header.coln; i++) {
+ enforce(ch_ptr);
if (header.rs_col != 0 && header.rs_col == i + 1) {
rs = ch_ptr;
} else if (header.chr_col != 0 && header.chr_col == i + 1) {
@@ -3436,13 +2650,13 @@ bool BimbamKinUncentered(const string &file_geno, const set<string> ksnps,
double d, geno_mean, geno_var;
size_t ni_test = matrix_kin->size1;
- gsl_vector *geno = gsl_vector_alloc(ni_test);
- gsl_vector *geno_miss = gsl_vector_alloc(ni_test);
+ gsl_vector *geno = gsl_vector_safe_alloc(ni_test);
+ gsl_vector *geno_miss = gsl_vector_safe_alloc(ni_test);
- gsl_vector *Wtx = gsl_vector_alloc(W->size2);
- gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
- gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
- gsl_vector *WtWiWtx = gsl_vector_alloc(W->size2);
+ gsl_vector *Wtx = gsl_vector_safe_alloc(W->size2);
+ gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2);
+ gsl_matrix *WtWi = gsl_matrix_safe_alloc(W->size2, W->size2);
+ gsl_vector *WtWiWtx = gsl_vector_safe_alloc(W->size2);
gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
@@ -3459,21 +2673,21 @@ bool BimbamKinUncentered(const string &file_geno, const set<string> ksnps,
// Create a large matrix.
const size_t msize = K_BATCH_SIZE;
- gsl_matrix *Xlarge = gsl_matrix_alloc(ni_test, msize * n_vc);
+ gsl_matrix *Xlarge = gsl_matrix_safe_alloc(ni_test, msize * n_vc);
gsl_matrix_set_zero(Xlarge);
size_t ns_test = 0;
for (size_t t = 0; t < indicator_snp.size(); ++t) {
- !safeGetline(infile, line).eof();
+ safeGetline(infile, line).eof();
if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
- ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1);
+ ProgressBar("Reading SNPs", t, indicator_snp.size() - 1);
}
if (indicator_snp[t] == 0)
continue;
- ch_ptr = strtok((char *)line.c_str(), " , \t");
- ch_ptr = strtok(NULL, " , \t");
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
rs = snpInfo[t].rs_number; // This line is new.
@@ -3487,7 +2701,7 @@ bool BimbamKinUncentered(const string &file_geno, const set<string> ksnps,
if (indicator_idv[i] == 0) {
continue;
}
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
if (strcmp(ch_ptr, "NA") == 0) {
gsl_vector_set(geno_miss, i, 0);
n_miss++;
@@ -3536,7 +2750,7 @@ bool BimbamKinUncentered(const string &file_geno, const set<string> ksnps,
ns_vec[0]++;
if (ns_vec[0] % msize == 0) {
- eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+ fast_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
gsl_matrix_set_zero(Xlarge);
}
} else if (mapRS2cat.count(rs) != 0) {
@@ -3553,7 +2767,7 @@ bool BimbamKinUncentered(const string &file_geno, const set<string> ksnps,
gsl_matrix_submatrix(Xlarge, 0, msize * i_vc, ni_test, msize);
gsl_matrix_view kin_sub = gsl_matrix_submatrix(
matrix_kin, 0, ni_test * i_vc, ni_test, ni_test);
- eigenlib_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0,
+ fast_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0,
&kin_sub.matrix);
gsl_matrix_set_zero(&X_sub.matrix);
@@ -3569,7 +2783,7 @@ bool BimbamKinUncentered(const string &file_geno, const set<string> ksnps,
gsl_matrix_submatrix(Xlarge, 0, msize * i_vc, ni_test, msize);
gsl_matrix_view kin_sub =
gsl_matrix_submatrix(matrix_kin, 0, ni_test * i_vc, ni_test, ni_test);
- eigenlib_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0,
+ fast_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0,
&kin_sub.matrix);
}
}
@@ -3628,12 +2842,12 @@ bool PlinkKin(const string &file_bed, const int display_pace,
size_t ni_test = matrix_kin->size1;
size_t ni_total = indicator_idv.size();
- gsl_vector *geno = gsl_vector_alloc(ni_test);
+ gsl_vector *geno = gsl_vector_safe_alloc(ni_test);
- gsl_vector *Wtx = gsl_vector_alloc(W->size2);
- gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
- gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
- gsl_vector *WtWiWtx = gsl_vector_alloc(W->size2);
+ gsl_vector *Wtx = gsl_vector_safe_alloc(W->size2);
+ gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2);
+ gsl_matrix *WtWi = gsl_matrix_safe_alloc(W->size2, W->size2);
+ gsl_vector *WtWiWtx = gsl_vector_safe_alloc(W->size2);
gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
@@ -3653,7 +2867,7 @@ bool PlinkKin(const string &file_bed, const int display_pace,
// Create a large matrix.
const size_t msize = K_BATCH_SIZE;
- gsl_matrix *Xlarge = gsl_matrix_alloc(ni_test, msize * n_vc);
+ gsl_matrix *Xlarge = gsl_matrix_safe_alloc(ni_test, msize * n_vc);
gsl_matrix_set_zero(Xlarge);
// Calculate n_bit and c, the number of bit for each SNP.
@@ -3671,7 +2885,7 @@ bool PlinkKin(const string &file_bed, const int display_pace,
for (size_t t = 0; t < indicator_snp.size(); ++t) {
if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
- ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1);
+ ProgressBar("Reading SNPs", t, indicator_snp.size() - 1);
}
if (indicator_snp[t] == 0) {
continue;
@@ -3762,7 +2976,7 @@ bool PlinkKin(const string &file_bed, const int display_pace,
ns_vec[0]++;
if (ns_vec[0] % msize == 0) {
- eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+ fast_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
gsl_matrix_set_zero(Xlarge);
}
} else if (mapRS2cat.count(rs) != 0) {
@@ -3779,7 +2993,7 @@ bool PlinkKin(const string &file_bed, const int display_pace,
gsl_matrix_submatrix(Xlarge, 0, msize * i_vc, ni_test, msize);
gsl_matrix_view kin_sub = gsl_matrix_submatrix(
matrix_kin, 0, ni_test * i_vc, ni_test, ni_test);
- eigenlib_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0,
+ fast_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0,
&kin_sub.matrix);
gsl_matrix_set_zero(&X_sub.matrix);
@@ -3795,7 +3009,7 @@ bool PlinkKin(const string &file_bed, const int display_pace,
gsl_matrix_submatrix(Xlarge, 0, msize * i_vc, ni_test, msize);
gsl_matrix_view kin_sub =
gsl_matrix_submatrix(matrix_kin, 0, ni_test * i_vc, ni_test, ni_test);
- eigenlib_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0,
+ fast_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0,
&kin_sub.matrix);
}
}
@@ -3852,8 +3066,8 @@ bool MFILEKin(const size_t mfile_mode, const string &file_mfile,
string file_name;
- gsl_matrix *kin_tmp = gsl_matrix_alloc(matrix_kin->size1, matrix_kin->size2);
- gsl_vector *ns_tmp = gsl_vector_alloc(vector_ns->size);
+ gsl_matrix *kin_tmp = gsl_matrix_safe_alloc(matrix_kin->size1, matrix_kin->size2);
+ gsl_vector *ns_tmp = gsl_vector_safe_alloc(vector_ns->size);
size_t l = 0;
double d;
@@ -3929,9 +3143,9 @@ bool ReadFile_wsnp(const string &file_wsnp, map<string, double> &mapRS2weight) {
double weight;
while (!safeGetline(infile, line).eof()) {
- ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
rs = ch_ptr;
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
weight = atof(ch_ptr);
mapRS2weight[rs] = weight;
}
@@ -3960,17 +3174,18 @@ bool ReadFile_wsnp(const string &file_wcat, const size_t n_vc,
// Read header.
HEADER header;
- !safeGetline(infile, line).eof();
+ safeGetline(infile, line).eof();
ReadHeader_io(line, header);
while (!safeGetline(infile, line).eof()) {
if (isBlankLine(line)) {
continue;
}
- ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
size_t t = 0;
for (size_t i = 0; i < header.coln; i++) {
+ enforce(ch_ptr);
if (header.rs_col != 0 && header.rs_col == i + 1) {
rs = ch_ptr;
} else if (header.chr_col != 0 && header.chr_col == i + 1) {
@@ -4052,7 +3267,7 @@ void ReadFile_beta(const string &file_beta,
// Read header.
HEADER header;
- !safeGetline(infile, line).eof();
+ safeGetline(infile, line).eof();
ReadHeader_io(line, header);
if (header.n_col == 0) {
@@ -4074,7 +3289,7 @@ void ReadFile_beta(const string &file_beta,
if (isBlankLine(line)) {
continue;
}
- ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
z = 0;
beta = 0;
@@ -4089,6 +3304,7 @@ void ReadFile_beta(const string &file_beta,
af = 0;
var_x = 0;
for (size_t i = 0; i < header.coln; i++) {
+ enforce(ch_ptr);
if (header.rs_col != 0 && header.rs_col == i + 1) {
rs = ch_ptr;
}
@@ -4234,7 +3450,7 @@ void ReadFile_beta(const string &file_beta, const map<string, double> &mapRS2wA,
// Read header.
HEADER header;
- !safeGetline(infile, line).eof();
+ safeGetline(infile, line).eof();
ReadHeader_io(line, header);
if (header.n_col == 0) {
@@ -4255,7 +3471,7 @@ void ReadFile_beta(const string &file_beta, const map<string, double> &mapRS2wA,
if (isBlankLine(line)) {
continue;
}
- ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
z = 0;
beta = 0;
@@ -4270,6 +3486,7 @@ void ReadFile_beta(const string &file_beta, const map<string, double> &mapRS2wA,
af = 0;
var_x = 0;
for (size_t i = 0; i < header.coln; i++) {
+ enforce(ch_ptr);
if (header.rs_col != 0 && header.rs_col == i + 1) {
rs = ch_ptr;
}
@@ -4540,8 +3757,8 @@ void ReadFile_vector(const string &file_vec, gsl_vector *vec) {
char *ch_ptr;
for (size_t i = 0; i < vec->size; i++) {
- !safeGetline(infile, line).eof();
- ch_ptr = strtok((char *)line.c_str(), " , \t");
+ safeGetline(infile, line).eof();
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
gsl_vector_set(vec, i, atof(ch_ptr));
}
@@ -4563,9 +3780,10 @@ void ReadFile_matrix(const string &file_mat, gsl_matrix *mat) {
char *ch_ptr;
for (size_t i = 0; i < mat->size1; i++) {
- !safeGetline(infile, line).eof();
- ch_ptr = strtok((char *)line.c_str(), " , \t");
+ safeGetline(infile, line).eof();
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
for (size_t j = 0; j < mat->size2; j++) {
+ enforce(ch_ptr);
gsl_matrix_set(mat, i, j, atof(ch_ptr));
ch_ptr = strtok(NULL, " , \t");
}
@@ -4590,18 +3808,20 @@ void ReadFile_matrix(const string &file_mat, gsl_matrix *mat1,
char *ch_ptr;
for (size_t i = 0; i < mat1->size1; i++) {
- !safeGetline(infile, line).eof();
- ch_ptr = strtok((char *)line.c_str(), " , \t");
+ safeGetline(infile, line).eof();
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
for (size_t j = 0; j < mat1->size2; j++) {
+ enforce(ch_ptr);
gsl_matrix_set(mat1, i, j, atof(ch_ptr));
ch_ptr = strtok(NULL, " , \t");
}
}
for (size_t i = 0; i < mat2->size1; i++) {
- !safeGetline(infile, line).eof();
- ch_ptr = strtok((char *)line.c_str(), " , \t");
+ safeGetline(infile, line).eof();
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
for (size_t j = 0; j < mat2->size2; j++) {
+ enforce(ch_ptr);
gsl_matrix_set(mat2, i, j, atof(ch_ptr));
ch_ptr = strtok(NULL, " , \t");
}
@@ -4621,7 +3841,7 @@ void ReadFile_study(const string &file_study, gsl_matrix *Vq_mat,
string sfile = file_study + ".size.txt";
string qfile = file_study + ".q.txt";
- gsl_vector *s = gsl_vector_alloc(s_vec->size + 1);
+ gsl_vector *s = gsl_vector_safe_alloc(s_vec->size + 1);
ReadFile_matrix(Vqfile, Vq_mat);
ReadFile_vector(sfile, s);
@@ -4646,7 +3866,7 @@ void ReadFile_ref(const string &file_ref, gsl_matrix *S_mat,
string sfile = file_ref + ".size.txt";
string Sfile = file_ref + ".S.txt";
- gsl_vector *s = gsl_vector_alloc(s_vec->size + 1);
+ gsl_vector *s = gsl_vector_safe_alloc(s_vec->size + 1);
ReadFile_vector(sfile, s);
ReadFile_matrix(Sfile, S_mat, Svar_mat);
@@ -4672,9 +3892,9 @@ void ReadFile_mstudy(const string &file_mstudy, gsl_matrix *Vq_mat,
gsl_vector_set_zero(s_vec);
ni = 0;
- gsl_matrix *Vq_sub = gsl_matrix_alloc(Vq_mat->size1, Vq_mat->size2);
- gsl_vector *q_sub = gsl_vector_alloc(q_vec->size);
- gsl_vector *s = gsl_vector_alloc(s_vec->size + 1);
+ gsl_matrix *Vq_sub = gsl_matrix_safe_alloc(Vq_mat->size1, Vq_mat->size2);
+ gsl_vector *q_sub = gsl_vector_safe_alloc(q_vec->size);
+ gsl_vector *s = gsl_vector_safe_alloc(s_vec->size + 1);
igzstream infile(file_mstudy.c_str(), igzstream::in);
if (!infile) {
@@ -4763,9 +3983,9 @@ void ReadFile_mref(const string &file_mref, gsl_matrix *S_mat,
gsl_vector_set_zero(s_vec);
ni = 0;
- gsl_matrix *S_sub = gsl_matrix_alloc(S_mat->size1, S_mat->size2);
- gsl_matrix *Svar_sub = gsl_matrix_alloc(Svar_mat->size1, Svar_mat->size2);
- gsl_vector *s = gsl_vector_alloc(s_vec->size + 1);
+ gsl_matrix *S_sub = gsl_matrix_safe_alloc(S_mat->size1, S_mat->size2);
+ gsl_matrix *Svar_sub = gsl_matrix_safe_alloc(Svar_mat->size1, Svar_mat->size2);
+ gsl_vector *s = gsl_vector_safe_alloc(s_vec->size + 1);
igzstream infile(file_mref.c_str(), igzstream::in);
if (!infile) {
diff --git a/src/io.h b/src/io.h
index d9253e3..215e8ba 100644
--- a/src/io.h
+++ b/src/io.h
@@ -32,8 +32,8 @@
using namespace std;
-void ProgressBar(string str, double p, double total);
-void ProgressBar(string str, double p, double total, double ratio);
+void ProgressBar(string str, double p, double total, double ratio = -1.0);
+
std::istream &safeGetline(std::istream &is, std::string &t);
bool ReadFile_snps(const string file_snps, set<string> &setSnps);
@@ -64,7 +64,7 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
const double &r2_level, map<string, string> &mapRS2chr,
map<string, long int> &mapRS2bp,
map<string, double> &mapRS2cM, vector<SNPINFO> &snpInfo,
- size_t &ns_test, bool debug);
+ size_t &ns_test);
bool ReadFile_bed(const string &file_bed, const set<string> &setSnps,
const gsl_matrix *W, vector<int> &indicator_idv,
vector<int> &indicator_snp, vector<SNPINFO> &snpInfo,
@@ -94,7 +94,7 @@ bool PlinkKin(const string &file_bed, vector<int> &indicator_snp,
bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv,
vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K,
- const bool calc_K, bool debug);
+ const bool calc_K);
bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K,
const bool calc_K);
@@ -102,7 +102,7 @@ bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
vector<int> &indicator_snp,
vector<vector<unsigned char>> &Xt, gsl_matrix *K,
const bool calc_K, const size_t ni_test,
- const size_t ns_test, bool debug);
+ const size_t ns_test);
bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
vector<int> &indicator_snp, vector<vector<unsigned char>> &Xt,
gsl_matrix *K, const bool calc_K, const size_t ni_test,
@@ -176,16 +176,6 @@ void ReadFile_mstudy(const string &file_mstudy, gsl_matrix *Vq,
gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni);
void ReadFile_mref(const string &file_mref, gsl_matrix *S_mat,
gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni);
-
-// WJA added.
-bool bgenKin(const string &file_geno, vector<int> &indicator_snp,
- const int k_mode, const int display_pace, gsl_matrix *matrix_kin);
-bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps,
- const gsl_matrix *W, vector<int> &indicator_idv,
- vector<int> &indicator_snp, vector<SNPINFO> &snpInfo,
- const double &maf_level, const double &miss_level,
- const double &hwe_level, const double &r2_level,
- size_t &ns_test);
bool ReadFile_sample(const string &file_sample,
vector<vector<int>> &indicator_pheno,
vector<vector<double>> &pheno,
diff --git a/src/ldr.cpp b/src/ldr.cpp
index 3554efa..f70eb85 100644
--- a/src/ldr.cpp
+++ b/src/ldr.cpp
@@ -29,7 +29,7 @@
#include <stdio.h>
#include <stdlib.h>
-#include "Eigen/Dense"
+// #include "Eigen/Dense"
#include "gsl/gsl_blas.h"
#include "gsl/gsl_cdf.h"
#include "gsl/gsl_eigen.h"
@@ -46,7 +46,7 @@
#include "param.h"
using namespace std;
-using namespace Eigen;
+// using namespace Eigen;
void LDR::CopyFromParam(PARAM &cPar) {
a_mode = cPar.a_mode;
@@ -70,8 +70,10 @@ void LDR::CopyFromParam(PARAM &cPar) {
return;
}
+
void LDR::CopyToParam(PARAM &cPar) { return; }
+/*
// X is a p by n matrix.
void LDR::VB(const vector<vector<unsigned char>> &Xt, const gsl_matrix *W_gsl,
const gsl_vector *y_gsl) {
@@ -107,3 +109,4 @@ void LDR::VB(const vector<vector<unsigned char>> &Xt, const gsl_matrix *W_gsl,
return;
}
+*/
diff --git a/src/lm.cpp b/src/lm.cpp
index 0c2a2bb..b94a426 100644
--- a/src/lm.cpp
+++ b/src/lm.cpp
@@ -55,8 +55,6 @@ void LM::CopyFromParam(PARAM &cPar) {
file_out = cPar.file_out;
path_out = cPar.path_out;
file_gene = cPar.file_gene;
- // WJA added
- file_oxford = cPar.file_oxford;
time_opt = 0.0;
@@ -333,14 +331,14 @@ void LM::AnalyzeGene(const gsl_matrix *W, const gsl_vector *x) {
for (size_t t = 0; t < ng_total; t++) {
getline(infile, line);
if (t % d_pace == 0 || t == ng_total - 1) {
- ProgressBar("Performing Analysis ", t, ng_total - 1);
+ ProgressBar("Performing Analysis", t, ng_total - 1);
}
- ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
rs = ch_ptr;
c_phen = 0;
for (size_t i = 0; i < indicator_idv.size(); ++i) {
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
if (indicator_idv[i] == 0) {
continue;
}
@@ -381,232 +379,6 @@ void LM::AnalyzeGene(const gsl_matrix *W, const gsl_vector *x) {
return;
}
-// WJA added
-void LM::Analyzebgen(const gsl_matrix *W, const gsl_vector *y) {
- debug_msg("entering");
- string file_bgen = file_oxford + ".bgen";
- ifstream infile(file_bgen.c_str(), ios::binary);
- if (!infile) {
- cout << "error reading bgen file:" << file_bgen << endl;
- return;
- }
-
- clock_t time_start = clock();
-
- string line;
- char *ch_ptr;
-
- double beta = 0, se = 0, p_wald = 0, p_lrt = 0, p_score = 0;
- int n_miss, c_phen;
- double geno, x_mean;
-
- // Calculate some basic quantities.
- double yPwy, xPwy, xPwx;
- double df = (double)W->size1 - (double)W->size2 - 1.0;
-
- gsl_vector *x = gsl_vector_alloc(W->size1);
- gsl_vector *x_miss = gsl_vector_alloc(W->size1);
-
- gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
- gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
- gsl_vector *Wty = gsl_vector_alloc(W->size2);
- gsl_vector *Wtx = gsl_vector_alloc(W->size2);
- gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
-
- gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
- int sig;
- LUDecomp(WtW, pmt, &sig);
- LUInvert(WtW, pmt, WtWi);
-
- gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty);
- CalcvPv(WtWi, Wty, y, yPwy);
-
- // Read in header.
- uint32_t bgen_snp_block_offset;
- uint32_t bgen_header_length;
- uint32_t bgen_nsamples;
- uint32_t bgen_nsnps;
- uint32_t bgen_flags;
- infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4);
- infile.read(reinterpret_cast<char *>(&bgen_header_length), 4);
- bgen_snp_block_offset -= 4;
- infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4);
- bgen_snp_block_offset -= 4;
- infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4);
- bgen_snp_block_offset -= 4;
- infile.ignore(4 + bgen_header_length - 20);
- bgen_snp_block_offset -= 4 + bgen_header_length - 20;
- infile.read(reinterpret_cast<char *>(&bgen_flags), 4);
- bgen_snp_block_offset -= 4;
- bool CompressedSNPBlocks = bgen_flags & 0x1;
-
- infile.ignore(bgen_snp_block_offset);
-
- double bgen_geno_prob_AA, bgen_geno_prob_AB;
- double bgen_geno_prob_BB, bgen_geno_prob_non_miss;
-
- uint32_t bgen_N;
- uint16_t bgen_LS;
- uint16_t bgen_LR;
- uint16_t bgen_LC;
- uint32_t bgen_SNP_pos;
- uint32_t bgen_LA;
- std::string bgen_A_allele;
- uint32_t bgen_LB;
- std::string bgen_B_allele;
- uint32_t bgen_P;
- size_t unzipped_data_size;
- string id;
- string rs;
- string chr;
- std::cout << "Warning: WJA hard coded SNP missingness "
- << "threshold of 10%" << std::endl;
-
- // Start reading genotypes and analyze.
- for (size_t t = 0; t < indicator_snp.size(); ++t) {
- if (t % d_pace == 0 || t == (ns_total - 1)) {
- ProgressBar("Reading SNPs ", t, ns_total - 1);
- }
-
- // Read SNP header.
- id.clear();
- rs.clear();
- chr.clear();
- bgen_A_allele.clear();
- bgen_B_allele.clear();
-
- infile.read(reinterpret_cast<char *>(&bgen_N), 4);
- infile.read(reinterpret_cast<char *>(&bgen_LS), 2);
-
- id.resize(bgen_LS);
- infile.read(&id[0], bgen_LS);
-
- infile.read(reinterpret_cast<char *>(&bgen_LR), 2);
- rs.resize(bgen_LR);
- infile.read(&rs[0], bgen_LR);
-
- infile.read(reinterpret_cast<char *>(&bgen_LC), 2);
- chr.resize(bgen_LC);
- infile.read(&chr[0], bgen_LC);
-
- infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4);
-
- infile.read(reinterpret_cast<char *>(&bgen_LA), 4);
- bgen_A_allele.resize(bgen_LA);
- infile.read(&bgen_A_allele[0], bgen_LA);
-
- infile.read(reinterpret_cast<char *>(&bgen_LB), 4);
- bgen_B_allele.resize(bgen_LB);
- infile.read(&bgen_B_allele[0], bgen_LB);
-
- uint16_t unzipped_data[3 * bgen_N];
-
- if (indicator_snp[t] == 0) {
- if (CompressedSNPBlocks)
- infile.read(reinterpret_cast<char *>(&bgen_P), 4);
- else
- bgen_P = 6 * bgen_N;
-
- infile.ignore(static_cast<size_t>(bgen_P));
-
- continue;
- }
-
- if (CompressedSNPBlocks) {
- infile.read(reinterpret_cast<char *>(&bgen_P), 4);
- uint8_t zipped_data[bgen_P];
-
- unzipped_data_size = 6 * bgen_N;
-
- infile.read(reinterpret_cast<char *>(zipped_data), bgen_P);
-
- int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data),
- reinterpret_cast<uLongf *>(&unzipped_data_size),
- reinterpret_cast<Bytef *>(zipped_data),
- static_cast<uLong>(bgen_P));
- assert(result == Z_OK);
-
- } else {
-
- bgen_P = 6 * bgen_N;
- infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P);
- }
-
- x_mean = 0.0;
- c_phen = 0;
- n_miss = 0;
- gsl_vector_set_zero(x_miss);
- for (size_t i = 0; i < bgen_N; ++i) {
- if (indicator_idv[i] == 0) {
- continue;
- }
-
- bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0;
- bgen_geno_prob_AB =
- static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0;
- bgen_geno_prob_BB =
- static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0;
-
- // WJA
- bgen_geno_prob_non_miss =
- bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB;
- if (bgen_geno_prob_non_miss < 0.9) {
- gsl_vector_set(x_miss, c_phen, 0.0);
- n_miss++;
- } else {
- bgen_geno_prob_AA /= bgen_geno_prob_non_miss;
- bgen_geno_prob_AB /= bgen_geno_prob_non_miss;
- bgen_geno_prob_BB /= bgen_geno_prob_non_miss;
-
- geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB;
-
- gsl_vector_set(x, c_phen, geno);
- gsl_vector_set(x_miss, c_phen, 1.0);
- x_mean += geno;
- }
- c_phen++;
- }
-
- x_mean /= static_cast<double>(ni_test - n_miss);
-
- for (size_t i = 0; i < ni_test; ++i) {
- if (gsl_vector_get(x_miss, i) == 0) {
- gsl_vector_set(x, i, x_mean);
- }
- geno = gsl_vector_get(x, i);
- }
-
- // Calculate statistics.
- time_start = clock();
-
- gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx);
- CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
- LmCalcP(a_mode - 50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald,
- p_lrt, p_score);
-
- time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
-
- // Store summary data.
- SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score, -0.0};
- sumStat.push_back(SNPs);
- }
- cout << endl;
-
- gsl_vector_free(x);
- gsl_vector_free(x_miss);
-
- gsl_matrix_free(WtW);
- gsl_matrix_free(WtWi);
- gsl_vector_free(Wty);
- gsl_vector_free(Wtx);
- gsl_permutation_free(pmt);
-
- infile.close();
- infile.clear();
-
- return;
-}
-
void LM::AnalyzeBimbam(const gsl_matrix *W, const gsl_vector *y) {
debug_msg("entering");
igzstream infile(file_geno.c_str(), igzstream::in);
@@ -649,22 +421,22 @@ void LM::AnalyzeBimbam(const gsl_matrix *W, const gsl_vector *y) {
for (size_t t = 0; t < indicator_snp.size(); ++t) {
getline(infile, line);
if (t % d_pace == 0 || t == (ns_total - 1)) {
- ProgressBar("Reading SNPs ", t, ns_total - 1);
+ ProgressBar("Reading SNPs", t, ns_total - 1);
}
if (indicator_snp[t] == 0) {
continue;
}
- ch_ptr = strtok((char *)line.c_str(), " , \t");
- ch_ptr = strtok(NULL, " , \t");
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
x_mean = 0.0;
c_phen = 0;
n_miss = 0;
gsl_vector_set_zero(x_miss);
for (size_t i = 0; i < ni_total; ++i) {
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
if (indicator_idv[i] == 0) {
continue;
}
@@ -775,7 +547,7 @@ void LM::AnalyzePlink(const gsl_matrix *W, const gsl_vector *y) {
for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
if (t % d_pace == 0 || t == snpInfo.size() - 1) {
- ProgressBar("Reading SNPs ", t, snpInfo.size() - 1);
+ ProgressBar("Reading SNPs", t, snpInfo.size() - 1);
}
if (indicator_snp[t] == 0) {
continue;
diff --git a/src/lm.h b/src/lm.h
index cb22d3b..030e6f9 100644
--- a/src/lm.h
+++ b/src/lm.h
@@ -67,9 +67,6 @@ public:
void AnalyzeGene(const gsl_matrix *W, const gsl_vector *x);
void AnalyzePlink(const gsl_matrix *W, const gsl_vector *y);
void AnalyzeBimbam(const gsl_matrix *W, const gsl_vector *y);
- // WJA added.
- void Analyzebgen(const gsl_matrix *W, const gsl_vector *y);
-
void WriteFiles();
};
diff --git a/src/lmm.cpp b/src/lmm.cpp
index 134fbf9..ae8b747 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -39,8 +39,10 @@
#include "gsl/gsl_vector.h"
#include "eigenlib.h"
+
#include "gzstream.h"
#include "io.h"
+#include "fastblas.h"
#include "lapack.h"
#include "lmm.h"
@@ -56,9 +58,6 @@ void LMM::CopyFromParam(PARAM &cPar) {
path_out = cPar.path_out;
file_gene = cPar.file_gene;
- // WJA added.
- file_oxford = cPar.file_oxford;
-
l_min = cPar.l_min;
l_max = cPar.l_max;
n_region = cPar.n_region;
@@ -107,10 +106,10 @@ void LMM::WriteFiles() {
}
auto common_header = [&] () {
- if (a_mode != 2)
+ if (a_mode != 2) {
outfile << "beta" << "\t";
-
- outfile << "se" << "\t";
+ outfile << "se" << "\t";
+ }
outfile << "logl_H1" << "\t"; // we may make this an option
@@ -139,10 +138,10 @@ void LMM::WriteFiles() {
auto sumstats = [&] (SUMSTAT st) {
outfile << scientific << setprecision(6);
- if (a_mode != 2)
+ if (a_mode != 2) {
outfile << st.beta << "\t";
-
- outfile << st.se << "\t";
+ outfile << st.se << "\t";
+ }
outfile << st.logl_H1 << "\t";
@@ -364,9 +363,9 @@ double LogL_f(double l, void *params) {
double f = 0.0, logdet_h = 0.0, d;
size_t index_yy;
- gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
- gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
- gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+ gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+ gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
+ gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
gsl_vector_memcpy(v_temp, p->eval);
gsl_vector_scale(v_temp, l);
@@ -414,11 +413,11 @@ double LogL_dev1(double l, void *params) {
double dev1 = 0.0, trace_Hi = 0.0;
size_t index_yy;
- gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
- gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
- gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
- gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
- gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+ gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+ gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+ gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
+ gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+ gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
gsl_vector_memcpy(v_temp, p->eval);
gsl_vector_scale(v_temp, l);
@@ -477,13 +476,13 @@ double LogL_dev2(double l, void *params) {
double dev2 = 0.0, trace_Hi = 0.0, trace_HiHi = 0.0;
size_t index_yy;
- gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
- gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
- gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index);
- gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
- gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
- gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size);
- gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+ gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+ gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+ gsl_matrix *PPPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+ gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
+ gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+ gsl_vector *HiHiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+ gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
gsl_vector_memcpy(v_temp, p->eval);
gsl_vector_scale(v_temp, l);
@@ -554,13 +553,13 @@ void LogL_dev12(double l, void *params, double *dev1, double *dev2) {
double trace_Hi = 0.0, trace_HiHi = 0.0;
size_t index_yy;
- gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
- gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
- gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index);
- gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
- gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
- gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size);
- gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+ gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+ gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+ gsl_matrix *PPPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+ gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
+ gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+ gsl_vector *HiHiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+ gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
gsl_vector_memcpy(v_temp, p->eval);
gsl_vector_scale(v_temp, l);
@@ -637,10 +636,10 @@ double LogRL_f(double l, void *params) {
double f = 0.0, logdet_h = 0.0, logdet_hiw = 0.0, d;
size_t index_ww;
- gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
- gsl_matrix *Iab = gsl_matrix_alloc(n_cvt + 2, n_index);
- gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
- gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+ gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+ gsl_matrix *Iab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+ gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
+ gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
gsl_vector_memcpy(v_temp, p->eval);
gsl_vector_scale(v_temp, l);
@@ -702,11 +701,11 @@ double LogRL_dev1(double l, void *params) {
double dev1 = 0.0, trace_Hi = 0.0;
size_t index_ww;
- gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
- gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
- gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
- gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
- gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+ gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+ gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+ gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
+ gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+ gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
gsl_vector_memcpy(v_temp, p->eval);
gsl_vector_scale(v_temp, l);
@@ -778,13 +777,13 @@ double LogRL_dev2(double l, void *params) {
double dev2 = 0.0, trace_Hi = 0.0, trace_HiHi = 0.0;
size_t index_ww;
- gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
- gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
- gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index);
- gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
- gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
- gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size);
- gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+ gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+ gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+ gsl_matrix *PPPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+ gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
+ gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+ gsl_vector *HiHiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+ gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
gsl_vector_memcpy(v_temp, p->eval);
gsl_vector_scale(v_temp, l);
@@ -868,13 +867,13 @@ void LogRL_dev12(double l, void *params, double *dev1, double *dev2) {
double trace_Hi = 0.0, trace_HiHi = 0.0;
size_t index_ww;
- gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
- gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
- gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index);
- gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
- gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
- gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size);
- gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+ gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+ gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+ gsl_matrix *PPPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+ gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
+ gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+ gsl_vector *HiHiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+ gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
gsl_vector_memcpy(v_temp, p->eval);
gsl_vector_scale(v_temp, l);
@@ -948,9 +947,9 @@ void LMM::CalcRLWald(const double &l, const FUNC_PARAM &params, double &beta,
int df = (int)ni_test - (int)n_cvt - 1;
- gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
- gsl_vector *Hi_eval = gsl_vector_alloc(params.eval->size);
- gsl_vector *v_temp = gsl_vector_alloc(params.eval->size);
+ gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+ gsl_vector *Hi_eval = gsl_vector_safe_alloc(params.eval->size);
+ gsl_vector *v_temp = gsl_vector_safe_alloc(params.eval->size);
gsl_vector_memcpy(v_temp, params.eval);
gsl_vector_scale(v_temp, l);
@@ -990,9 +989,9 @@ void LMM::CalcRLScore(const double &l, const FUNC_PARAM &params, double &beta,
int df = (int)ni_test - (int)n_cvt - 1;
- gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
- gsl_vector *Hi_eval = gsl_vector_alloc(params.eval->size);
- gsl_vector *v_temp = gsl_vector_alloc(params.eval->size);
+ gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+ gsl_vector *Hi_eval = gsl_vector_safe_alloc(params.eval->size);
+ gsl_vector *v_temp = gsl_vector_safe_alloc(params.eval->size);
gsl_vector_memcpy(v_temp, params.eval);
gsl_vector_scale(v_temp, l);
@@ -1031,7 +1030,7 @@ void CalcUab(const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab) {
size_t index_ab;
size_t n_cvt = UtW->size2;
- gsl_vector *u_a = gsl_vector_alloc(Uty->size);
+ gsl_vector *u_a = gsl_vector_safe_alloc(Uty->size);
for (size_t a = 1; a <= n_cvt + 2; ++a) {
if (a == n_cvt + 1) {
@@ -1097,8 +1096,8 @@ void Calcab(const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab) {
size_t n_cvt = W->size2;
double d;
- gsl_vector *v_a = gsl_vector_alloc(y->size);
- gsl_vector *v_b = gsl_vector_alloc(y->size);
+ gsl_vector *v_a = gsl_vector_safe_alloc(y->size);
+ gsl_vector *v_b = gsl_vector_safe_alloc(y->size);
for (size_t a = 1; a <= n_cvt + 2; ++a) {
if (a == n_cvt + 1) {
@@ -1142,7 +1141,7 @@ void Calcab(const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x,
size_t n_cvt = W->size2;
double d;
- gsl_vector *v_b = gsl_vector_alloc(y->size);
+ gsl_vector *v_b = gsl_vector_safe_alloc(y->size);
for (size_t b = 1; b <= n_cvt + 2; ++b) {
index_ab = GetabIndex(n_cvt + 1, b, n_cvt);
@@ -1167,6 +1166,7 @@ void Calcab(const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x,
void LMM::AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval,
const gsl_matrix *UtW, const gsl_vector *Utx,
const gsl_matrix *W, const gsl_vector *x) {
+ debug_msg(file_gene);
igzstream infile(file_gene.c_str(), igzstream::in);
if (!infile) {
cout << "error reading gene expression file:" << file_gene << endl;
@@ -1188,25 +1188,25 @@ void LMM::AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval,
// Calculate basic quantities.
size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
- gsl_vector *y = gsl_vector_alloc(U->size1);
- gsl_vector *Uty = gsl_vector_alloc(U->size2);
- gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
- gsl_vector *ab = gsl_vector_alloc(n_index);
+ gsl_vector *y = gsl_vector_safe_alloc(U->size1);
+ gsl_vector *Uty = gsl_vector_safe_alloc(U->size2);
+ gsl_matrix *Uab = gsl_matrix_safe_alloc(U->size2, n_index);
+ gsl_vector *ab = gsl_vector_safe_alloc(n_index);
// Header.
getline(infile, line);
for (size_t t = 0; t < ng_total; t++) {
- !safeGetline(infile, line).eof();
+ safeGetline(infile, line).eof();
if (t % d_pace == 0 || t == ng_total - 1) {
- ProgressBar("Performing Analysis ", t, ng_total - 1);
+ ProgressBar("Performing Analysis", t, ng_total - 1);
}
- ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
rs = ch_ptr;
c_phen = 0;
for (size_t i = 0; i < indicator_idv.size(); ++i) {
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
if (indicator_idv[i] == 0) {
continue;
}
@@ -1271,35 +1271,37 @@ void LMM::AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval,
return;
}
-void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_vector *Uty,
- const gsl_matrix *W, const gsl_vector *y,
- const set<string> gwasnps) {
- debug_msg("entering");
+
+void LMM::Analyze(std::function< SnpNameValues(size_t) >& fetch_snp,
+ const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_vector *Uty,
+ const gsl_matrix *W, const gsl_vector *y,
+ const set<string> gwasnps) {
clock_t time_start = clock();
- // LOCO support
+ // Subset/LOCO support
bool process_gwasnps = gwasnps.size();
if (process_gwasnps)
- debug_msg("AnalyzeBimbam w. LOCO");
+ debug_msg("Analyze subset of SNPs (LOCO)");
// Calculate basic quantities.
size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
const size_t inds = U->size1;
- gsl_vector *x = gsl_vector_alloc(inds); // #inds
- gsl_vector *x_miss = gsl_vector_alloc(inds);
- gsl_vector *Utx = gsl_vector_alloc(U->size2);
- gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
- gsl_vector *ab = gsl_vector_alloc(n_index);
+ enforce(inds == ni_test);
+ gsl_vector *x = gsl_vector_safe_alloc(inds); // #inds
+ gsl_vector *x_miss = gsl_vector_safe_alloc(inds);
+ gsl_vector *Utx = gsl_vector_safe_alloc(U->size2);
+ gsl_matrix *Uab = gsl_matrix_safe_alloc(U->size2, n_index);
+ gsl_vector *ab = gsl_vector_safe_alloc(n_index);
// Create a large matrix with LMM_BATCH_SIZE columns for batched processing
// const size_t msize=(process_gwasnps ? 1 : LMM_BATCH_SIZE);
const size_t msize = LMM_BATCH_SIZE;
- gsl_matrix *Xlarge = gsl_matrix_alloc(inds, msize);
- gsl_matrix *UtXlarge = gsl_matrix_alloc(inds, msize);
-
+ gsl_matrix *Xlarge = gsl_matrix_safe_alloc(inds, msize);
+ gsl_matrix *UtXlarge = gsl_matrix_safe_alloc(inds, msize);
enforce_msg(Xlarge && UtXlarge, "Xlarge memory check"); // just to be sure
+ enforce(Xlarge->size1 == inds);
gsl_matrix_set_zero(Xlarge);
gsl_matrix_set_zero(Uab);
CalcUab(UtW, Uty, Uab);
@@ -1307,9 +1309,6 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
// start reading genotypes and analyze
size_t c = 0;
- igzstream infile(file_geno.c_str(), igzstream::in);
- enforce_msg(infile, "error reading genotype file");
-
auto batch_compute = [&](size_t l) { // using a C++ closure
// Compute SNPs in batch, note the computations are independent per SNP
gsl_matrix_view Xlarge_sub = gsl_matrix_submatrix(Xlarge, 0, 0, inds, l);
@@ -1317,7 +1316,7 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
gsl_matrix_submatrix(UtXlarge, 0, 0, inds, l);
time_start = clock();
- eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
+ fast_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
&UtXlarge_sub.matrix);
time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
@@ -1332,8 +1331,8 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
time_start = clock();
FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0};
- double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
- double p_lrt = 0, p_score = 0;
+ double lambda_mle = 0.0, lambda_remle = 0.0, beta = 0.0, se = 0.0, p_wald = 0.0;
+ double p_lrt = 0.0, p_score = 0.0;
double logl_H1 = 0.0;
// 3 is before 1.
@@ -1361,54 +1360,69 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
}
};
- for (size_t t = 0; t < indicator_snp.size(); ++t) {
- // for every SNP
- string line;
- safeGetline(infile, line);
- if (t % d_pace == 0 || t == (ns_total - 1)) {
- ProgressBar("Reading SNPs ", t, ns_total - 1);
+ const auto num_snps = indicator_snp.size();
+ const size_t progress_step = (num_snps/50>d_pace ? num_snps/50 : d_pace);
+
+ for (size_t t = 0; t < num_snps; ++t) {
+ if (t % progress_step == 0 || t == (num_snps - 1)) {
+ ProgressBar("Reading SNPs", t, num_snps - 1);
}
if (indicator_snp[t] == 0)
continue;
- char *ch_ptr = strtok((char *)line.c_str(), " , \t");
- auto snp = string(ch_ptr);
+ auto tup = fetch_snp(t);
+ auto snp = get<0>(tup);
+ auto gs = get<1>(tup);
+
// check whether SNP is included in gwasnps (used by LOCO)
if (process_gwasnps && gwasnps.count(snp) == 0)
continue;
- ch_ptr = strtok(NULL, " , \t");
- ch_ptr = strtok(NULL, " , \t");
- double x_mean = 0.0;
- int c_phen = 0;
- int n_miss = 0;
+ // drop missing idv and plug mean values for missing geno
+ double x_total = 0.0; // sum genotype values to compute x_mean
+ uint pos = 0; // position in target vector
+ uint n_miss = 0;
gsl_vector_set_zero(x_miss);
for (size_t i = 0; i < ni_total; ++i) {
// get the genotypes per individual and compute stats per SNP
- ch_ptr = strtok(NULL, " , \t");
- if (indicator_idv[i] == 0)
+ if (indicator_idv[i] == 0) // skip individual
continue;
- if (strcmp(ch_ptr, "NA") == 0) {
- gsl_vector_set(x_miss, c_phen, 0.0);
+ double geno = gs[i];
+ if (std::isnan(geno)) {
+ gsl_vector_set(x_miss, pos, 1.0);
n_miss++;
} else {
- double geno = atof(ch_ptr);
-
- gsl_vector_set(x, c_phen, geno);
- gsl_vector_set(x_miss, c_phen, 1.0);
- x_mean += geno;
+ gsl_vector_set(x, pos, geno);
+ x_total += geno;
}
- c_phen++;
+ pos++;
}
+ enforce(pos == ni_test);
- x_mean /= (double)(ni_test - n_miss);
+ const double x_mean = x_total/(double)(ni_test - n_miss);
+ // plug x_mean back into missing values
for (size_t i = 0; i < ni_test; ++i) {
- if (gsl_vector_get(x_miss, i) == 0) {
+ if (gsl_vector_get(x_miss, i) == 1.0) {
gsl_vector_set(x, i, x_mean);
}
}
+
+ /* this is what below GxE does
+ for (size_t i = 0; i < ni_test; ++i) {
+ auto geno = gsl_vector_get(x, i);
+ if (std::isnan(geno)) {
+ gsl_vector_set(x, i, x_mean);
+ geno = x_mean;
+ }
+ if (x_mean > 1.0) {
+ gsl_vector_set(x, i, 2 - geno);
+ }
+ }
+ */
+ enforce(x->size == ni_test);
+
// copy genotype values for SNP into Xlarge cache
gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize);
gsl_vector_memcpy(&Xlarge_col.vector, x);
@@ -1418,6 +1432,7 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
batch_compute(msize);
}
batch_compute(c % msize);
+ ProgressBar("Reading SNPs", num_snps - 1, num_snps - 1);
// cout << "Counted SNPs " << c << " sumStat " << sumStat.size() << endl;
cout << endl;
@@ -1430,114 +1445,111 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
gsl_matrix_free(Xlarge);
gsl_matrix_free(UtXlarge);
- infile.close();
- infile.clear();
-
- return;
}
-void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_vector *Uty,
- const gsl_matrix *W, const gsl_vector *y) {
- debug_msg("entering");
- string file_bed = file_bfile + ".bed";
- ifstream infile(file_bed.c_str(), ios::binary);
- if (!infile) {
- cout << "error reading bed file:" << file_bed << endl;
- return;
- }
+void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_vector *Uty,
+ const gsl_matrix *W, const gsl_vector *y,
+ const set<string> gwasnps) {
+ debug_msg(file_geno);
- clock_t time_start = clock();
+ igzstream infile(file_geno.c_str(), igzstream::in);
+ enforce_msg(infile, "error reading genotype file");
+ size_t prev_line = 0;
- char ch[1];
- bitset<8> b;
+ std::vector <double> gs;
+ gs.resize(ni_total);
- double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
- double p_lrt = 0, p_score = 0;
- double logl_H1 = 0.0;
- int n_bit, n_miss, ci_total, ci_test;
- double geno, x_mean;
+ // fetch_snp is a callback function for every SNP row
+ std::function<SnpNameValues(size_t)> fetch_snp = [&](size_t num) {
+ string line;
+ while (prev_line <= num) {
+ // also read SNPs that were skipped
+ safeGetline(infile, line);
+ prev_line++;
+ }
+ char *ch_ptr = strtok((char *)line.c_str(), " , \t");
+ enforce_msg(ch_ptr, "Parsing BIMBAM genofile"); // ch_ptr should not be NULL
- // Calculate basic quantities.
- size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+ auto snp = string(ch_ptr);
+ ch_ptr = strtok_safe(NULL, " , \t"); // skip column
+ ch_ptr = strtok_safe(NULL, " , \t"); // skip column
- gsl_vector *x = gsl_vector_alloc(U->size1);
- gsl_vector *Utx = gsl_vector_alloc(U->size2);
- gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
- gsl_vector *ab = gsl_vector_alloc(n_index);
+ gs.assign (ni_total,nan("")); // wipe values
- // Create a large matrix.
- size_t msize = LMM_BATCH_SIZE;
- gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
- gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
- gsl_matrix_set_zero(Xlarge);
+ for (size_t i = 0; i < ni_total; ++i) {
+ ch_ptr = strtok(NULL, " , \t");
+ enforce_msg(ch_ptr,line.c_str());
+ if (strcmp(ch_ptr, "NA") != 0)
+ gs[i] = atof(ch_ptr);
+ }
+ return std::make_tuple(snp,gs);
+ };
- gsl_matrix_set_zero(Uab);
- CalcUab(UtW, Uty, Uab);
+ LMM::Analyze(fetch_snp,U,eval,UtW,Uty,W,y,gwasnps);
+ infile.close();
+ infile.clear();
+}
+
+void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_vector *Uty,
+ const gsl_matrix *W, const gsl_vector *y,
+ const set<string> gwasnps) {
+ string file_bed = file_bfile + ".bed";
+ debug_msg(file_bed);
+
+ ifstream infile(file_bed.c_str(), ios::binary);
+ enforce_msg(infile,"error reading genotype (.bed) file");
+
+ bitset<8> bset8;
+ char ch[1]; // buffer
// Calculate n_bit and c, the number of bit for each SNP.
- if (ni_total % 4 == 0) {
- n_bit = ni_total / 4;
- } else {
- n_bit = ni_total / 4 + 1;
- }
+ const size_t n_bit = (ni_total % 4 == 0 ? ni_total / 4 : ni_total / 4 + 1);
- // Print the first three magic numbers.
+ // first three magic numbers.
for (int i = 0; i < 3; ++i) {
infile.read(ch, 1);
- b = ch[0];
+ const bitset<8> b = ch[0];
}
- size_t c = 0, t_last = 0;
- for (size_t t = 0; t < snpInfo.size(); ++t) {
- if (indicator_snp[t] == 0)
- continue;
- t_last++;
- }
- for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
- if (t % d_pace == 0 || t == snpInfo.size() - 1) {
- ProgressBar("Reading SNPs ", t, snpInfo.size() - 1);
- }
- if (indicator_snp[t] == 0) {
- continue;
- }
+ std::vector <double> gs;
+ gs.resize(ni_total);
+ // fetch_snp is a callback function for every SNP row
+ std::function<SnpNameValues(size_t)> fetch_snp = [&](size_t num) {
+ gs.assign (ni_total,nan("")); // wipe values
// n_bit, and 3 is the number of magic numbers.
+ auto t = num;
infile.seekg(t * n_bit + 3);
-
- // Read genotypes.
- x_mean = 0.0;
- n_miss = 0;
- ci_total = 0;
- ci_test = 0;
+ auto ci_total = 0;
+ auto ci_test = 0;
+ // ---- for all genotypes
for (int i = 0; i < n_bit; ++i) {
infile.read(ch, 1);
- b = ch[0];
+ bset8 = ch[0];
// Minor allele homozygous: 2.0; major: 0.0.
for (size_t j = 0; j < 4; ++j) {
if ((i == (n_bit - 1)) && ci_total == (int)ni_total) {
break;
}
- if (indicator_idv[ci_total] == 0) {
+ if (indicator_idv[ci_total] == 0) { // skip individual
ci_total++;
continue;
}
- if (b[2 * j] == 0) {
- if (b[2 * j + 1] == 0) {
- gsl_vector_set(x, ci_test, 2);
- x_mean += 2.0;
+ if (bset8[2 * j] == 0) {
+ if (bset8[2 * j + 1] == 0) {
+ gs[ci_test] = 2.0;
} else {
- gsl_vector_set(x, ci_test, 1);
- x_mean += 1.0;
+ gs[ci_test] = 1.0;
}
} else {
- if (b[2 * j + 1] == 1) {
- gsl_vector_set(x, ci_test, 0);
+ if (bset8[2 * j + 1] == 1) {
+ gs[ci_test] = 0.0;
} else {
- gsl_vector_set(x, ci_test, -9);
- n_miss++;
+ gs[ci_test] = nan(""); // already set to NaN - originally was -9.0
}
}
@@ -1545,367 +1557,14 @@ void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
ci_test++;
}
}
+ string snp="unknown";
+ return std::make_tuple(snp,gs);
+ };
- x_mean /= (double)(ni_test - n_miss);
-
- for (size_t i = 0; i < ni_test; ++i) {
- geno = gsl_vector_get(x, i);
- if (geno == -9) {
- gsl_vector_set(x, i, x_mean);
- geno = x_mean;
- }
- }
-
- gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize);
- gsl_vector_memcpy(&Xlarge_col.vector, x);
- c++;
-
- if (c % msize == 0 || c == t_last) {
- size_t l = 0;
- if (c % msize == 0) {
- l = msize;
- } else {
- l = c % msize;
- }
-
- gsl_matrix_view Xlarge_sub =
- gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
- gsl_matrix_view UtXlarge_sub =
- gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
-
- time_start = clock();
- eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
- &UtXlarge_sub.matrix);
- time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
-
- gsl_matrix_set_zero(Xlarge);
-
- for (size_t i = 0; i < l; i++) {
- gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
- gsl_vector_memcpy(Utx, &UtXlarge_col.vector);
-
- CalcUab(UtW, Uty, Utx, Uab);
-
- time_start = clock();
- FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0};
-
- // 3 is before 1, for beta.
- if (a_mode == 3 || a_mode == 4) {
- CalcRLScore(l_mle_null, param1, beta, se, p_score);
- }
-
- if (a_mode == 1 || a_mode == 4) {
- CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle,
- logl_H1);
- CalcRLWald(lambda_remle, param1, beta, se, p_wald);
- }
-
- if (a_mode == 2 || a_mode == 4) {
- CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
- p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1);
- }
-
- time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
-
- // Store summary data.
- SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle,
- p_wald, p_lrt, p_score, logl_H1};
- sumStat.push_back(SNPs);
- }
- }
- }
- cout << endl;
-
- gsl_vector_free(x);
- gsl_vector_free(Utx);
- gsl_matrix_free(Uab);
- gsl_vector_free(ab);
-
- gsl_matrix_free(Xlarge);
- gsl_matrix_free(UtXlarge);
-
- infile.close();
- infile.clear();
-
- return;
-}
-
-// WJA added.
-void LMM::Analyzebgen(const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_vector *Uty,
- const gsl_matrix *W, const gsl_vector *y) {
- debug_msg("entering");
- string file_bgen = file_oxford + ".bgen";
- ifstream infile(file_bgen.c_str(), ios::binary);
- if (!infile) {
- cout << "error reading bgen file:" << file_bgen << endl;
- return;
- }
-
- clock_t time_start = clock();
- double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
- double p_lrt = 0, p_score = 0;
- double logl_H1 = 0.0;
- int n_miss, c_phen;
- double geno, x_mean;
-
- // Calculate basic quantities.
- size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
-
- gsl_vector *x = gsl_vector_alloc(U->size1);
- gsl_vector *x_miss = gsl_vector_alloc(U->size1);
- gsl_vector *Utx = gsl_vector_alloc(U->size2);
- gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
- gsl_vector *ab = gsl_vector_alloc(n_index);
-
- // Create a large matrix.
- size_t msize = LMM_BATCH_SIZE;
- gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
- gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
- gsl_matrix_set_zero(Xlarge);
-
- gsl_matrix_set_zero(Uab);
- CalcUab(UtW, Uty, Uab);
-
- // Read in header.
- uint32_t bgen_snp_block_offset;
- uint32_t bgen_header_length;
- uint32_t bgen_nsamples;
- uint32_t bgen_nsnps;
- uint32_t bgen_flags;
- infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4);
- infile.read(reinterpret_cast<char *>(&bgen_header_length), 4);
- bgen_snp_block_offset -= 4;
- infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4);
- bgen_snp_block_offset -= 4;
- infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4);
- bgen_snp_block_offset -= 4;
- infile.ignore(4 + bgen_header_length - 20);
- bgen_snp_block_offset -= 4 + bgen_header_length - 20;
- infile.read(reinterpret_cast<char *>(&bgen_flags), 4);
- bgen_snp_block_offset -= 4;
- bool CompressedSNPBlocks = bgen_flags & 0x1;
-
- infile.ignore(bgen_snp_block_offset);
-
- double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB;
- double bgen_geno_prob_non_miss;
-
- uint32_t bgen_N;
- uint16_t bgen_LS;
- uint16_t bgen_LR;
- uint16_t bgen_LC;
- uint32_t bgen_SNP_pos;
- uint32_t bgen_LA;
- std::string bgen_A_allele;
- uint32_t bgen_LB;
- std::string bgen_B_allele;
- uint32_t bgen_P;
- size_t unzipped_data_size;
- string id;
- string rs;
- string chr;
- std::cout << "Warning: WJA hard coded SNP missingness "
- << "threshold of 10%" << std::endl;
-
- // Start reading genotypes and analyze.
- size_t c = 0, t_last = 0;
- for (size_t t = 0; t < indicator_snp.size(); ++t) {
- if (indicator_snp[t] == 0) {
- continue;
- }
- t_last++;
- }
- for (size_t t = 0; t < indicator_snp.size(); ++t) {
- if (t % d_pace == 0 || t == (ns_total - 1)) {
- ProgressBar("Reading SNPs ", t, ns_total - 1);
- }
- if (indicator_snp[t] == 0) {
- continue;
- }
-
- // Read SNP header.
- id.clear();
- rs.clear();
- chr.clear();
- bgen_A_allele.clear();
- bgen_B_allele.clear();
-
- infile.read(reinterpret_cast<char *>(&bgen_N), 4);
- infile.read(reinterpret_cast<char *>(&bgen_LS), 2);
-
- id.resize(bgen_LS);
- infile.read(&id[0], bgen_LS);
-
- infile.read(reinterpret_cast<char *>(&bgen_LR), 2);
- rs.resize(bgen_LR);
- infile.read(&rs[0], bgen_LR);
-
- infile.read(reinterpret_cast<char *>(&bgen_LC), 2);
- chr.resize(bgen_LC);
- infile.read(&chr[0], bgen_LC);
-
- infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4);
-
- infile.read(reinterpret_cast<char *>(&bgen_LA), 4);
- bgen_A_allele.resize(bgen_LA);
- infile.read(&bgen_A_allele[0], bgen_LA);
-
- infile.read(reinterpret_cast<char *>(&bgen_LB), 4);
- bgen_B_allele.resize(bgen_LB);
- infile.read(&bgen_B_allele[0], bgen_LB);
-
- uint16_t unzipped_data[3 * bgen_N];
-
- if (indicator_snp[t] == 0) {
- if (CompressedSNPBlocks)
- infile.read(reinterpret_cast<char *>(&bgen_P), 4);
- else
- bgen_P = 6 * bgen_N;
-
- infile.ignore(static_cast<size_t>(bgen_P));
-
- continue;
- }
-
- if (CompressedSNPBlocks) {
- infile.read(reinterpret_cast<char *>(&bgen_P), 4);
- uint8_t zipped_data[bgen_P];
-
- unzipped_data_size = 6 * bgen_N;
-
- infile.read(reinterpret_cast<char *>(zipped_data), bgen_P);
-
- int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data),
- reinterpret_cast<uLongf *>(&unzipped_data_size),
- reinterpret_cast<Bytef *>(zipped_data),
- static_cast<uLong>(bgen_P));
- assert(result == Z_OK);
-
- } else {
-
- bgen_P = 6 * bgen_N;
- infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P);
- }
-
- x_mean = 0.0;
- c_phen = 0;
- n_miss = 0;
- gsl_vector_set_zero(x_miss);
- for (size_t i = 0; i < bgen_N; ++i) {
- if (indicator_idv[i] == 0) {
- continue;
- }
-
- bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0;
- bgen_geno_prob_AB =
- static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0;
- bgen_geno_prob_BB =
- static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0;
-
- // WJA.
- bgen_geno_prob_non_miss =
- bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB;
- if (bgen_geno_prob_non_miss < 0.9) {
- gsl_vector_set(x_miss, c_phen, 0.0);
- n_miss++;
- } else {
-
- bgen_geno_prob_AA /= bgen_geno_prob_non_miss;
- bgen_geno_prob_AB /= bgen_geno_prob_non_miss;
- bgen_geno_prob_BB /= bgen_geno_prob_non_miss;
-
- geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB;
-
- gsl_vector_set(x, c_phen, geno);
- gsl_vector_set(x_miss, c_phen, 1.0);
- x_mean += geno;
- }
- c_phen++;
- }
-
- x_mean /= static_cast<double>(ni_test - n_miss);
-
- for (size_t i = 0; i < ni_test; ++i) {
- if (gsl_vector_get(x_miss, i) == 0) {
- gsl_vector_set(x, i, x_mean);
- }
- geno = gsl_vector_get(x, i);
- }
-
- gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize);
- gsl_vector_memcpy(&Xlarge_col.vector, x);
- c++;
-
- if (c % msize == 0 || c == t_last) {
- size_t l = 0;
- if (c % msize == 0) {
- l = msize;
- } else {
- l = c % msize;
- }
-
- gsl_matrix_view Xlarge_sub =
- gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
- gsl_matrix_view UtXlarge_sub =
- gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
-
- time_start = clock();
- eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
- &UtXlarge_sub.matrix);
- time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
-
- gsl_matrix_set_zero(Xlarge);
-
- for (size_t i = 0; i < l; i++) {
- gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
- gsl_vector_memcpy(Utx, &UtXlarge_col.vector);
-
- CalcUab(UtW, Uty, Utx, Uab);
-
- time_start = clock();
- FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0};
-
- // 3 is before 1.
- if (a_mode == 3 || a_mode == 4) {
- CalcRLScore(l_mle_null, param1, beta, se, p_score);
- }
-
- if (a_mode == 1 || a_mode == 4) {
- CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle,
- logl_H1);
- CalcRLWald(lambda_remle, param1, beta, se, p_wald);
- }
-
- if (a_mode == 2 || a_mode == 4) {
- CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
- p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1);
- }
-
- time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
-
- // Store summary data.
- SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle,
- p_wald, p_lrt, p_score, logl_H1};
- sumStat.push_back(SNPs);
- }
- }
- }
- cout << endl;
-
- gsl_vector_free(x);
- gsl_vector_free(x_miss);
- gsl_vector_free(Utx);
- gsl_matrix_free(Uab);
- gsl_vector_free(ab);
-
- gsl_matrix_free(Xlarge);
- gsl_matrix_free(UtXlarge);
+ LMM::Analyze(fetch_snp,U,eval,UtW,Uty,W,y,gwasnps);
infile.close();
infile.clear();
-
- return;
}
void MatrixCalcLR(const gsl_matrix *U, const gsl_matrix *UtX,
@@ -1914,10 +1573,10 @@ void MatrixCalcLR(const gsl_matrix *U, const gsl_matrix *UtX,
vector<pair<size_t, double>> &pos_loglr) {
double logl_H0, logl_H1, log_lr, lambda0, lambda1;
- gsl_vector *w = gsl_vector_alloc(Uty->size);
- gsl_matrix *Utw = gsl_matrix_alloc(Uty->size, 1);
- gsl_matrix *Uab = gsl_matrix_alloc(Uty->size, 6);
- gsl_vector *ab = gsl_vector_alloc(6);
+ gsl_vector *w = gsl_vector_safe_alloc(Uty->size);
+ gsl_matrix *Utw = gsl_matrix_safe_alloc(Uty->size, 1);
+ gsl_matrix *Uab = gsl_matrix_safe_alloc(Uty->size, 6);
+ gsl_vector *ab = gsl_vector_safe_alloc(6);
gsl_vector_set_zero(ab);
gsl_vector_set_all(w, 1.0);
@@ -2122,8 +1781,8 @@ void CalcLambda(const char func_name, const gsl_vector *eval,
size_t n_cvt = UtW->size2, ni_test = UtW->size1;
size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
- gsl_matrix *Uab = gsl_matrix_alloc(ni_test, n_index);
- gsl_vector *ab = gsl_vector_alloc(n_index);
+ gsl_matrix *Uab = gsl_matrix_safe_alloc(ni_test, n_index);
+ gsl_vector *ab = gsl_vector_safe_alloc(n_index);
gsl_matrix_set_zero(Uab);
CalcUab(UtW, Uty, Uab);
@@ -2145,8 +1804,8 @@ void CalcPve(const gsl_vector *eval, const gsl_matrix *UtW,
size_t n_cvt = UtW->size2, ni_test = UtW->size1;
size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
- gsl_matrix *Uab = gsl_matrix_alloc(ni_test, n_index);
- gsl_vector *ab = gsl_vector_alloc(n_index);
+ gsl_matrix *Uab = gsl_matrix_safe_alloc(ni_test, n_index);
+ gsl_vector *ab = gsl_vector_safe_alloc(n_index);
gsl_matrix_set_zero(Uab);
CalcUab(UtW, Uty, Uab);
@@ -2172,15 +1831,15 @@ void CalcLmmVgVeBeta(const gsl_vector *eval, const gsl_matrix *UtW,
size_t n_cvt = UtW->size2, ni_test = UtW->size1;
size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
- gsl_matrix *Uab = gsl_matrix_alloc(ni_test, n_index);
- gsl_vector *ab = gsl_vector_alloc(n_index);
- gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
- gsl_vector *Hi_eval = gsl_vector_alloc(eval->size);
- gsl_vector *v_temp = gsl_vector_alloc(eval->size);
- gsl_matrix *HiW = gsl_matrix_alloc(eval->size, UtW->size2);
- gsl_matrix *WHiW = gsl_matrix_alloc(UtW->size2, UtW->size2);
- gsl_vector *WHiy = gsl_vector_alloc(UtW->size2);
- gsl_matrix *Vbeta = gsl_matrix_alloc(UtW->size2, UtW->size2);
+ gsl_matrix *Uab = gsl_matrix_safe_alloc(ni_test, n_index);
+ gsl_vector *ab = gsl_vector_safe_alloc(n_index);
+ gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+ gsl_vector *Hi_eval = gsl_vector_safe_alloc(eval->size);
+ gsl_vector *v_temp = gsl_vector_safe_alloc(eval->size);
+ gsl_matrix *HiW = gsl_matrix_safe_alloc(eval->size, UtW->size2);
+ gsl_matrix *WHiW = gsl_matrix_safe_alloc(UtW->size2, UtW->size2);
+ gsl_vector *WHiy = gsl_vector_safe_alloc(UtW->size2);
+ gsl_matrix *Vbeta = gsl_matrix_safe_alloc(UtW->size2, UtW->size2);
gsl_matrix_set_zero(Uab);
CalcUab(UtW, Uty, Uab);
@@ -2262,13 +1921,13 @@ void LMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval,
// Calculate basic quantities.
size_t n_index = (n_cvt + 2 + 2 + 1) * (n_cvt + 2 + 2) / 2;
- gsl_vector *x = gsl_vector_alloc(U->size1);
- gsl_vector *x_miss = gsl_vector_alloc(U->size1);
- gsl_vector *Utx = gsl_vector_alloc(U->size2);
- gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
- gsl_vector *ab = gsl_vector_alloc(n_index);
+ gsl_vector *x = gsl_vector_safe_alloc(U->size1);
+ gsl_vector *x_miss = gsl_vector_safe_alloc(U->size1);
+ gsl_vector *Utx = gsl_vector_safe_alloc(U->size2);
+ gsl_matrix *Uab = gsl_matrix_safe_alloc(U->size2, n_index);
+ gsl_vector *ab = gsl_vector_safe_alloc(n_index);
- gsl_matrix *UtW_expand = gsl_matrix_alloc(U->size1, UtW->size2 + 2);
+ gsl_matrix *UtW_expand = gsl_matrix_safe_alloc(U->size1, UtW->size2 + 2);
gsl_matrix_view UtW_expand_mat =
gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2);
gsl_matrix_memcpy(&UtW_expand_mat.matrix, UtW);
@@ -2278,24 +1937,24 @@ void LMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval,
// Start reading genotypes and analyze.
for (size_t t = 0; t < indicator_snp.size(); ++t) {
- !safeGetline(infile, line).eof();
+ safeGetline(infile, line).eof();
if (t % d_pace == 0 || t == (ns_total - 1)) {
- ProgressBar("Reading SNPs ", t, ns_total - 1);
+ ProgressBar("Reading SNPs", t, ns_total - 1);
}
if (indicator_snp[t] == 0) {
continue;
}
- ch_ptr = strtok((char *)line.c_str(), " , \t");
- ch_ptr = strtok(NULL, " , \t");
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
x_mean = 0.0;
c_phen = 0;
n_miss = 0;
gsl_vector_set_zero(x_miss);
for (size_t i = 0; i < ni_total; ++i) {
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
if (indicator_idv[i] == 0) {
continue;
}
@@ -2390,8 +2049,8 @@ void LMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
const gsl_matrix *UtW, const gsl_vector *Uty,
const gsl_matrix *W, const gsl_vector *y,
const gsl_vector *env) {
- debug_msg("entering");
string file_bed = file_bfile + ".bed";
+ debug_msg(file_bed);
ifstream infile(file_bed.c_str(), ios::binary);
if (!infile) {
cout << "error reading bed file:" << file_bed << endl;
@@ -2412,12 +2071,12 @@ void LMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
// Calculate basic quantities.
size_t n_index = (n_cvt + 2 + 2 + 1) * (n_cvt + 2 + 2) / 2;
- gsl_vector *x = gsl_vector_alloc(U->size1);
- gsl_vector *Utx = gsl_vector_alloc(U->size2);
- gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
- gsl_vector *ab = gsl_vector_alloc(n_index);
+ gsl_vector *x = gsl_vector_safe_alloc(U->size1);
+ gsl_vector *Utx = gsl_vector_safe_alloc(U->size2);
+ gsl_matrix *Uab = gsl_matrix_safe_alloc(U->size2, n_index);
+ gsl_vector *ab = gsl_vector_safe_alloc(n_index);
- gsl_matrix *UtW_expand = gsl_matrix_alloc(U->size1, UtW->size2 + 2);
+ gsl_matrix *UtW_expand = gsl_matrix_safe_alloc(U->size1, UtW->size2 + 2);
gsl_matrix_view UtW_expand_mat =
gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2);
gsl_matrix_memcpy(&UtW_expand_mat.matrix, UtW);
@@ -2440,7 +2099,7 @@ void LMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
if (t % d_pace == 0 || t == snpInfo.size() - 1) {
- ProgressBar("Reading SNPs ", t, snpInfo.size() - 1);
+ ProgressBar("Reading SNPs", t, snpInfo.size() - 1);
}
if (indicator_snp[t] == 0) {
continue;
diff --git a/src/lmm.h b/src/lmm.h
index 4d57ab1..9c46fae 100644
--- a/src/lmm.h
+++ b/src/lmm.h
@@ -23,10 +23,12 @@
#include "gsl/gsl_vector.h"
#include "io.h"
#include "param.h"
+#include <functional>
+#include <tuple>
using namespace std;
-#define LMM_BATCH_SIZE 10000 // used for batch processing
+#define LMM_BATCH_SIZE 20000 // used for batch processing
class FUNC_PARAM {
@@ -40,6 +42,8 @@ public:
size_t e_mode;
};
+typedef std::tuple<string,std::vector<double> > SnpNameValues;
+
class LMM {
public:
@@ -53,8 +57,6 @@ public:
string path_out;
string file_gene;
- // WJA added
- string file_oxford;
// LMM related parameters
double l_min;
@@ -91,17 +93,19 @@ public:
void AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval,
const gsl_matrix *UtW, const gsl_vector *Utx,
const gsl_matrix *W, const gsl_vector *x);
- void AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_vector *Uty,
- const gsl_matrix *W, const gsl_vector *y);
- // WJA added.
- void Analyzebgen(const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_vector *Uty,
- const gsl_matrix *W, const gsl_vector *y);
+ void Analyze(std::function< SnpNameValues(size_t) >& fetch_snp,
+ const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_vector *Uty,
+ const gsl_matrix *W, const gsl_vector *y,
+ const set<string> gwasnps);
void AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
const gsl_matrix *UtW, const gsl_vector *Uty,
const gsl_matrix *W, const gsl_vector *y,
const set<string> gwasnps);
+ void AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
+ const gsl_matrix *UtW, const gsl_vector *Uty,
+ const gsl_matrix *W, const gsl_vector *y,
+ const set<string> gwasnps);
void AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
const gsl_matrix *UtW, const gsl_vector *Uty,
const gsl_matrix *W, const gsl_vector *y,
diff --git a/src/logistic.cpp b/src/logistic.cpp
index 2dd0402..a936682 100644
--- a/src/logistic.cpp
+++ b/src/logistic.cpp
@@ -7,6 +7,7 @@
#include <stdio.h>
#include "logistic.h"
+#include "debug.h"
// I need to bundle all the data that goes to the function to optimze
// together.
@@ -135,7 +136,7 @@ void wgsl_mixed_optim_hessian(const gsl_vector *beta, void *params,
int K = p->X->size2;
int Kc = p->Xc->size2;
int npar = beta->size;
- gsl_vector *gn = gsl_vector_alloc(npar); // gn
+ gsl_vector *gn = gsl_vector_safe_alloc(npar); // gn
// Intitialize Hessian out necessary ???
gsl_matrix_set_zero(out);
@@ -226,11 +227,11 @@ int logistic_mixed_fit(gsl_vector *beta, gsl_matrix_int *X,
// Initial fit.
mLogLik = wgsl_mixed_optim_f(beta, &p);
- gsl_matrix *myH = gsl_matrix_alloc(npar, npar); // Hessian matrix.
- gsl_vector *stBeta = gsl_vector_alloc(npar); // Direction to move.
+ gsl_matrix *myH = gsl_matrix_safe_alloc(npar, npar); // Hessian matrix.
+ gsl_vector *stBeta = gsl_vector_safe_alloc(npar); // Direction to move.
- gsl_vector *myG = gsl_vector_alloc(npar); // Gradient.
- gsl_vector *tau = gsl_vector_alloc(npar); // tau for QR.
+ gsl_vector *myG = gsl_vector_safe_alloc(npar); // Gradient.
+ gsl_vector *tau = gsl_vector_safe_alloc(npar); // tau for QR.
for (iter = 0; iter < 100; iter++) {
wgsl_mixed_optim_hessian(beta, &p, myH); // Calculate Hessian.
@@ -456,11 +457,11 @@ int logistic_cat_fit(gsl_vector *beta, gsl_matrix_int *X, gsl_vector_int *nlev,
// Initial fit.
mLogLik = wgsl_cat_optim_f(beta, &p);
- gsl_matrix *myH = gsl_matrix_alloc(npar, npar); // Hessian matrix.
- gsl_vector *stBeta = gsl_vector_alloc(npar); // Direction to move.
+ gsl_matrix *myH = gsl_matrix_safe_alloc(npar, npar); // Hessian matrix.
+ gsl_vector *stBeta = gsl_vector_safe_alloc(npar); // Direction to move.
- gsl_vector *myG = gsl_vector_alloc(npar); // Gradient.
- gsl_vector *tau = gsl_vector_alloc(npar); // tau for QR.
+ gsl_vector *myG = gsl_vector_safe_alloc(npar); // Gradient.
+ gsl_vector *tau = gsl_vector_safe_alloc(npar); // tau for QR.
for (iter = 0; iter < 100; iter++) {
wgsl_cat_optim_hessian(beta, &p, myH); // Calculate Hessian.
@@ -596,7 +597,7 @@ void wgsl_cont_optim_hessian(const gsl_vector *beta, void *params,
int n = p->y->size;
int Kc = p->Xc->size2;
int npar = beta->size;
- gsl_vector *gn = gsl_vector_alloc(npar); // gn.
+ gsl_vector *gn = gsl_vector_safe_alloc(npar); // gn.
// Intitialize Hessian out necessary ???
@@ -673,11 +674,11 @@ int logistic_cont_fit(gsl_vector *beta,
// Initial fit.
mLogLik = wgsl_cont_optim_f(beta, &p);
- gsl_matrix *myH = gsl_matrix_alloc(npar, npar); // Hessian matrix.
- gsl_vector *stBeta = gsl_vector_alloc(npar); // Direction to move.
+ gsl_matrix *myH = gsl_matrix_safe_alloc(npar, npar); // Hessian matrix.
+ gsl_vector *stBeta = gsl_vector_safe_alloc(npar); // Direction to move.
- gsl_vector *myG = gsl_vector_alloc(npar); // Gradient.
- gsl_vector *tau = gsl_vector_alloc(npar); // tau for QR.
+ gsl_vector *myG = gsl_vector_safe_alloc(npar); // Gradient.
+ gsl_vector *tau = gsl_vector_safe_alloc(npar); // tau for QR.
for (iter = 0; iter < 100; iter++) {
wgsl_cont_optim_hessian(beta, &p, myH); // Calculate Hessian.
diff --git a/src/main.cpp b/src/main.cpp
index 92c4d90..d752a72 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -61,7 +61,7 @@ int main(int argc, char *argv[]) {
return EXIT_FAILURE;
}
- if (cPar.mode_silence) {
+ if (is_quiet_mode()) {
stringstream ss;
cout.rdbuf(ss.rdbuf());
}
diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp
index 4203837..e7dff73 100644
--- a/src/mathfunc.cpp
+++ b/src/mathfunc.cpp
@@ -32,7 +32,7 @@
#include <tuple>
#include <vector>
-#include "Eigen/Dense"
+// #include "Eigen/Dense"
#include "gsl/gsl_version.h"
@@ -49,11 +49,12 @@
#include "debug.h"
#include "eigenlib.h"
+#include "fastblas.h"
#include "lapack.h"
#include "mathfunc.h"
using namespace std;
-using namespace Eigen;
+// using namespace Eigen;
bool has_nan(const vector<double> v) {
for (const auto& e: v) {
@@ -79,8 +80,8 @@ double VectorVar(const gsl_vector *v) {
// Center the matrix G.
void CenterMatrix(gsl_matrix *G) {
double d;
- gsl_vector *w = gsl_vector_alloc(G->size1);
- gsl_vector *Gw = gsl_vector_alloc(G->size1);
+ gsl_vector *w = gsl_vector_safe_alloc(G->size1);
+ gsl_vector *Gw = gsl_vector_safe_alloc(G->size1);
gsl_vector_set_all(w, 1.0);
gsl_blas_dgemv(CblasNoTrans, 1.0, G, w, 0.0, Gw);
@@ -104,7 +105,7 @@ void CenterMatrix(gsl_matrix *G) {
// Center the matrix G.
void CenterMatrix(gsl_matrix *G, const gsl_vector *w) {
double d, wtw;
- gsl_vector *Gw = gsl_vector_alloc(G->size1);
+ gsl_vector *Gw = gsl_vector_safe_alloc(G->size1);
gsl_blas_ddot(w, w, &wtw);
gsl_blas_dgemv(CblasNoTrans, 1.0, G, w, 0.0, Gw);
@@ -126,12 +127,12 @@ void CenterMatrix(gsl_matrix *G, const gsl_vector *w) {
// Center the matrix G.
void CenterMatrix(gsl_matrix *G, const gsl_matrix *W) {
- gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
- gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
- gsl_matrix *WtWiWt = gsl_matrix_alloc(W->size2, G->size1);
- gsl_matrix *GW = gsl_matrix_alloc(G->size1, W->size2);
- gsl_matrix *WtGW = gsl_matrix_alloc(W->size2, W->size2);
- gsl_matrix *Gtmp = gsl_matrix_alloc(G->size1, G->size1);
+ gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2);
+ gsl_matrix *WtWi = gsl_matrix_safe_alloc(W->size2, W->size2);
+ gsl_matrix *WtWiWt = gsl_matrix_safe_alloc(W->size2, G->size1);
+ gsl_matrix *GW = gsl_matrix_safe_alloc(G->size1, W->size2);
+ gsl_matrix *WtGW = gsl_matrix_safe_alloc(W->size2, W->size2);
+ gsl_matrix *Gtmp = gsl_matrix_safe_alloc(G->size1, G->size1);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
@@ -226,7 +227,7 @@ bool isMatrixSymmetric(const gsl_matrix *G) {
bool isMatrixPositiveDefinite(const gsl_matrix *G) {
enforce(G->size1 == G->size2);
- auto G2 = gsl_matrix_alloc(G->size1, G->size2);
+ auto G2 = gsl_matrix_safe_alloc(G->size1, G->size2);
enforce_gsl(gsl_matrix_memcpy(G2,G));
auto handler = gsl_set_error_handler_off();
#if GSL_MAJOR_VERSION >= 2 && GSL_MINOR_VERSION >= 3
@@ -241,11 +242,11 @@ bool isMatrixPositiveDefinite(const gsl_matrix *G) {
gsl_vector *getEigenValues(const gsl_matrix *G) {
enforce(G->size1 == G->size2);
- auto G2 = gsl_matrix_alloc(G->size1, G->size2);
+ auto G2 = gsl_matrix_safe_alloc(G->size1, G->size2);
enforce_gsl(gsl_matrix_memcpy(G2,G));
auto eworkspace = gsl_eigen_symm_alloc(G->size1);
enforce(eworkspace);
- gsl_vector *eigenvalues = gsl_vector_alloc(G->size1);
+ gsl_vector *eigenvalues = gsl_vector_safe_alloc(G->size1);
enforce_gsl(gsl_eigen_symm(G2, eigenvalues, eworkspace));
gsl_eigen_symm_free(eworkspace);
gsl_matrix_free(G2);
@@ -313,6 +314,13 @@ bool isMatrixIllConditioned(const gsl_vector *eigenvalues, double max_ratio) {
return ret_valid;
}
+double sum(const double *m, size_t rows, size_t cols) {
+ double sum = 0.0;
+ for (auto i = 0; i<rows*cols; i++)
+ sum += m[i];
+ return sum;
+}
+
double SumVector(const gsl_vector *v) {
double sum = 0;
for (int i = 0; i < v->size; i++ ) {
@@ -337,9 +345,9 @@ double CenterVector(gsl_vector *y) {
// Center the vector y.
void CenterVector(gsl_vector *y, const gsl_matrix *W) {
- gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
- gsl_vector *Wty = gsl_vector_alloc(W->size2);
- gsl_vector *WtWiWty = gsl_vector_alloc(W->size2);
+ gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2);
+ gsl_vector *Wty = gsl_vector_safe_alloc(W->size2);
+ gsl_vector *WtWiWty = gsl_vector_safe_alloc(W->size2);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty);
@@ -379,22 +387,18 @@ void StandardizeVector(gsl_vector *y) {
// Calculate UtX.
void CalcUtX(const gsl_matrix *U, gsl_matrix *UtX) {
- gsl_matrix *X = gsl_matrix_alloc(UtX->size1, UtX->size2);
+ gsl_matrix *X = gsl_matrix_safe_alloc(UtX->size1, UtX->size2);
gsl_matrix_memcpy(X, UtX);
- eigenlib_dgemm("T", "N", 1.0, U, X, 0.0, UtX);
+ fast_dgemm("T", "N", 1.0, U, X, 0.0, UtX);
gsl_matrix_free(X);
-
- return;
}
void CalcUtX(const gsl_matrix *U, const gsl_matrix *X, gsl_matrix *UtX) {
- eigenlib_dgemm("T", "N", 1.0, U, X, 0.0, UtX);
- return;
+ fast_dgemm("T", "N", 1.0, U, X, 0.0, UtX);
}
void CalcUtX(const gsl_matrix *U, const gsl_vector *x, gsl_vector *Utx) {
gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, Utx);
- return;
}
// Kronecker product.
@@ -520,6 +524,7 @@ unsigned char Double02ToUchar(const double dosage) {
return (int)(dosage * 100);
}
+/*
void uchar_matrix_get_row(const vector<vector<unsigned char>> &X,
const size_t i_row, VectorXd &x_row) {
if (i_row < X.size()) {
@@ -531,3 +536,5 @@ void uchar_matrix_get_row(const vector<vector<unsigned char>> &X,
exit(1);
}
}
+
+*/
diff --git a/src/mathfunc.h b/src/mathfunc.h
index 6e20b37..1319a64 100644
--- a/src/mathfunc.h
+++ b/src/mathfunc.h
@@ -19,7 +19,7 @@
#ifndef __MATHFUNC_H__
#define __MATHFUNC_H__
-#include "Eigen/Dense"
+// #include "Eigen/Dense"
#include "gsl/gsl_matrix.h"
#include "gsl/gsl_vector.h"
@@ -27,7 +27,7 @@
#define EIGEN_MINVALUE 1e-10
using namespace std;
-using namespace Eigen;
+
bool has_nan(const vector<double> v);
@@ -43,6 +43,7 @@ bool isMatrixPositiveDefinite(const gsl_matrix *G);
bool isMatrixIllConditioned(const gsl_vector *eigenvalues, double max_ratio=CONDITIONED_MAXRATIO);
bool isMatrixSymmetric(const gsl_matrix *G);
gsl_vector *getEigenValues(const gsl_matrix *G);
+double sum(const double *m, size_t rows, size_t cols);
double SumVector(const gsl_vector *v);
double CenterVector(gsl_vector *y);
void CenterVector(gsl_vector *y, const gsl_matrix *W);
@@ -56,7 +57,7 @@ void KroneckerSym(const gsl_matrix *K, const gsl_matrix *V, gsl_matrix *H);
double UcharToDouble02(const unsigned char c);
unsigned char Double02ToUchar(const double dosage);
-void uchar_matrix_get_row(const vector<vector<unsigned char>> &X,
- const size_t i_row, VectorXd &x_row);
+// void uchar_matrix_get_row(const vector<vector<unsigned char>> &X,
+// const size_t i_row, Eigen::VectorXd &x_row);
#endif
diff --git a/src/mvlmm.cpp b/src/mvlmm.cpp
index c5efb6e..bdcbe5b 100644
--- a/src/mvlmm.cpp
+++ b/src/mvlmm.cpp
@@ -39,6 +39,7 @@
#include "gsl/gsl_vector.h"
#include "eigenlib.h"
+#include "fastblas.h"
#include "gzstream.h"
#include "io.h"
#include "lapack.h"
@@ -54,7 +55,6 @@ void MVLMM::CopyFromParam(PARAM &cPar) {
file_bfile = cPar.file_bfile;
file_geno = cPar.file_geno;
- file_oxford = cPar.file_oxford;
file_out = cPar.file_out;
path_out = cPar.path_out;
@@ -2950,556 +2950,6 @@ double PCRT(const size_t mode, const size_t d_size, const double p_value,
return p_crt;
}
-// WJA added.
-void MVLMM::Analyzebgen(const gsl_matrix *U, const gsl_vector *eval,
- const gsl_matrix *UtW, const gsl_matrix *UtY) {
- debug_msg("entering");
- string file_bgen = file_oxford + ".bgen";
- ifstream infile(file_bgen.c_str(), ios::binary);
- if (!infile) {
- cout << "error reading bgen file:" << file_bgen << endl;
- return;
- }
-
- clock_t time_start = clock();
- time_UtX = 0;
- time_opt = 0;
-
- string line;
-
- // Create a large matrix.
- size_t msize = LMM_BATCH_SIZE;
- gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
- gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
- gsl_matrix_set_zero(Xlarge);
-
- double logl_H0 = 0.0, logl_H1 = 0.0, p_wald = 0, p_lrt = 0, p_score = 0;
- double crt_a, crt_b, crt_c;
- int n_miss, c_phen;
- double geno, x_mean;
- size_t c = 0;
- size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2;
-
- size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2;
-
- // Large matrices for EM.
- gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size);
- gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size);
- gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size);
- gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size);
- gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
- gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size);
- gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size);
- gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size);
-
- // Large matrices for NR. Each dxd block is H_k^{-1}.
- gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size);
-
- // Each column is H_k^{-1}y_k.
- gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size);
-
- // Each dcxdc block is x_k\otimes H_k^{-1}.
- gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size);
- gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
- gsl_vector *x = gsl_vector_alloc(n_size);
- gsl_vector *x_miss = gsl_vector_alloc(n_size);
-
- gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size);
- gsl_matrix *X = gsl_matrix_alloc(c_size + 1, n_size);
- gsl_matrix *V_g = gsl_matrix_alloc(d_size, d_size);
- gsl_matrix *V_e = gsl_matrix_alloc(d_size, d_size);
- gsl_matrix *B = gsl_matrix_alloc(d_size, c_size + 1);
- gsl_vector *beta = gsl_vector_alloc(d_size);
- gsl_matrix *Vbeta = gsl_matrix_alloc(d_size, d_size);
-
- // Null estimates for initial values.
- gsl_matrix *V_g_null = gsl_matrix_alloc(d_size, d_size);
- gsl_matrix *V_e_null = gsl_matrix_alloc(d_size, d_size);
- gsl_matrix *B_null = gsl_matrix_alloc(d_size, c_size + 1);
- gsl_matrix *se_B_null = gsl_matrix_alloc(d_size, c_size);
-
- gsl_matrix_view X_sub = gsl_matrix_submatrix(X, 0, 0, c_size, n_size);
- gsl_matrix_view B_sub = gsl_matrix_submatrix(B, 0, 0, d_size, c_size);
- gsl_matrix_view xHi_all_sub =
- gsl_matrix_submatrix(xHi_all, 0, 0, d_size * c_size, d_size * n_size);
-
- gsl_matrix_transpose_memcpy(Y, UtY);
-
- gsl_matrix_transpose_memcpy(&X_sub.matrix, UtW);
-
- gsl_vector_view X_row = gsl_matrix_row(X, c_size);
- gsl_vector_set_zero(&X_row.vector);
- gsl_vector_view B_col = gsl_matrix_column(B, c_size);
- gsl_vector_set_zero(&B_col.vector);
-
- MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix, Y, l_min,
- l_max, n_region, V_g, V_e, &B_sub.matrix);
- logl_H0 = MphEM('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat,
- OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
- V_e, &B_sub.matrix);
- logl_H0 = MphNR('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
- &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b,
- crt_c);
- MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix,
- se_B_null);
-
- c = 0;
- Vg_remle_null.clear();
- Ve_remle_null.clear();
- for (size_t i = 0; i < d_size; i++) {
- for (size_t j = i; j < d_size; j++) {
- Vg_remle_null.push_back(gsl_matrix_get(V_g, i, j));
- Ve_remle_null.push_back(gsl_matrix_get(V_e, i, j));
- VVg_remle_null.push_back(gsl_matrix_get(Hessian, c, c));
- VVe_remle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
- c++;
- }
- }
- beta_remle_null.clear();
- se_beta_remle_null.clear();
- for (size_t i = 0; i < se_B_null->size1; i++) {
- for (size_t j = 0; j < se_B_null->size2; j++) {
- beta_remle_null.push_back(gsl_matrix_get(B, i, j));
- se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j));
- }
- }
- logl_remle_H0 = logl_H0;
-
- cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
- cout.precision(4);
-
- cout << "REMLE estimate for Vg in the null model: " << endl;
- for (size_t i = 0; i < d_size; i++) {
- for (size_t j = 0; j <= i; j++) {
- cout << gsl_matrix_get(V_g, i, j) << "\t";
- }
- cout << endl;
- }
- cout << "se(Vg): " << endl;
- for (size_t i = 0; i < d_size; i++) {
- for (size_t j = 0; j <= i; j++) {
- c = GetIndex(i, j, d_size);
- cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
- }
- cout << endl;
- }
- cout << "REMLE estimate for Ve in the null model: " << endl;
- for (size_t i = 0; i < d_size; i++) {
- for (size_t j = 0; j <= i; j++) {
- cout << gsl_matrix_get(V_e, i, j) << "\t";
- }
- cout << endl;
- }
- cout << "se(Ve): " << endl;
- for (size_t i = 0; i < d_size; i++) {
- for (size_t j = 0; j <= i; j++) {
- c = GetIndex(i, j, d_size);
- cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
- }
- cout << endl;
- }
- cout << "REMLE likelihood = " << logl_H0 << endl;
-
- logl_H0 = MphEM('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat,
- OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
- V_e, &B_sub.matrix);
- logl_H0 = MphNR('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
- &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b,
- crt_c);
- MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix,
- se_B_null);
-
- c = 0;
- Vg_mle_null.clear();
- Ve_mle_null.clear();
- for (size_t i = 0; i < d_size; i++) {
- for (size_t j = i; j < d_size; j++) {
- Vg_mle_null.push_back(gsl_matrix_get(V_g, i, j));
- Ve_mle_null.push_back(gsl_matrix_get(V_e, i, j));
- VVg_mle_null.push_back(gsl_matrix_get(Hessian, c, c));
- VVe_mle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
- c++;
- }
- }
- beta_mle_null.clear();
- se_beta_mle_null.clear();
- for (size_t i = 0; i < se_B_null->size1; i++) {
- for (size_t j = 0; j < se_B_null->size2; j++) {
- beta_mle_null.push_back(gsl_matrix_get(B, i, j));
- se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j));
- }
- }
- logl_mle_H0 = logl_H0;
-
- cout << "MLE estimate for Vg in the null model: " << endl;
- for (size_t i = 0; i < d_size; i++) {
- for (size_t j = 0; j <= i; j++) {
- cout << gsl_matrix_get(V_g, i, j) << "\t";
- }
- cout << endl;
- }
- cout << "se(Vg): " << endl;
- for (size_t i = 0; i < d_size; i++) {
- for (size_t j = 0; j <= i; j++) {
- c = GetIndex(i, j, d_size);
- cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
- }
- cout << endl;
- }
- cout << "MLE estimate for Ve in the null model: " << endl;
- for (size_t i = 0; i < d_size; i++) {
- for (size_t j = 0; j <= i; j++) {
- cout << gsl_matrix_get(V_e, i, j) << "\t";
- }
- cout << endl;
- }
- cout << "se(Ve): " << endl;
- for (size_t i = 0; i < d_size; i++) {
- for (size_t j = 0; j <= i; j++) {
- c = GetIndex(i, j, d_size);
- cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
- }
- cout << endl;
- }
- cout << "MLE likelihood = " << logl_H0 << endl;
-
- vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
- for (size_t i = 0; i < d_size; i++) {
- v_beta.push_back(0.0);
- }
- for (size_t i = 0; i < d_size; i++) {
- for (size_t j = i; j < d_size; j++) {
- v_Vg.push_back(0.0);
- v_Ve.push_back(0.0);
- v_Vbeta.push_back(0.0);
- }
- }
-
- gsl_matrix_memcpy(V_g_null, V_g);
- gsl_matrix_memcpy(V_e_null, V_e);
- gsl_matrix_memcpy(B_null, B);
-
- // Read in header.
- uint32_t bgen_snp_block_offset;
- uint32_t bgen_header_length;
- uint32_t bgen_nsamples;
- uint32_t bgen_nsnps;
- uint32_t bgen_flags;
- infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4);
- infile.read(reinterpret_cast<char *>(&bgen_header_length), 4);
- bgen_snp_block_offset -= 4;
- infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4);
- bgen_snp_block_offset -= 4;
- infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4);
- bgen_snp_block_offset -= 4;
- infile.ignore(4 + bgen_header_length - 20);
- bgen_snp_block_offset -= 4 + bgen_header_length - 20;
- infile.read(reinterpret_cast<char *>(&bgen_flags), 4);
- bgen_snp_block_offset -= 4;
- bool CompressedSNPBlocks = bgen_flags & 0x1;
-
- infile.ignore(bgen_snp_block_offset);
-
- double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB;
- double bgen_geno_prob_non_miss;
-
- uint32_t bgen_N;
- uint16_t bgen_LS;
- uint16_t bgen_LR;
- uint16_t bgen_LC;
- uint32_t bgen_SNP_pos;
- uint32_t bgen_LA;
- std::string bgen_A_allele;
- uint32_t bgen_LB;
- std::string bgen_B_allele;
- uint32_t bgen_P;
- size_t unzipped_data_size;
- string id;
- string rs;
- string chr;
- std::cout << "Warning: WJA hard coded SNP missingness threshold "
- << "of 10%" << std::endl;
-
- // Start reading genotypes and analyze.
- size_t csnp = 0, t_last = 0;
- for (size_t t = 0; t < indicator_snp.size(); ++t) {
- if (indicator_snp[t] == 0) {
- continue;
- }
- t_last++;
- }
- for (size_t t = 0; t < indicator_snp.size(); ++t) {
- if (t % d_pace == 0 || t == (ns_total - 1)) {
- ProgressBar("Reading SNPs ", t, ns_total - 1);
- }
- if (indicator_snp[t] == 0) {
- continue;
- }
-
- // Read SNP header.
- id.clear();
- rs.clear();
- chr.clear();
- bgen_A_allele.clear();
- bgen_B_allele.clear();
-
- infile.read(reinterpret_cast<char *>(&bgen_N), 4);
- infile.read(reinterpret_cast<char *>(&bgen_LS), 2);
-
- id.resize(bgen_LS);
- infile.read(&id[0], bgen_LS);
-
- infile.read(reinterpret_cast<char *>(&bgen_LR), 2);
- rs.resize(bgen_LR);
- infile.read(&rs[0], bgen_LR);
-
- infile.read(reinterpret_cast<char *>(&bgen_LC), 2);
- chr.resize(bgen_LC);
- infile.read(&chr[0], bgen_LC);
-
- infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4);
-
- infile.read(reinterpret_cast<char *>(&bgen_LA), 4);
- bgen_A_allele.resize(bgen_LA);
- infile.read(&bgen_A_allele[0], bgen_LA);
-
- infile.read(reinterpret_cast<char *>(&bgen_LB), 4);
- bgen_B_allele.resize(bgen_LB);
- infile.read(&bgen_B_allele[0], bgen_LB);
-
- uint16_t unzipped_data[3 * bgen_N];
-
- if (indicator_snp[t] == 0) {
- if (CompressedSNPBlocks)
- infile.read(reinterpret_cast<char *>(&bgen_P), 4);
- else
- bgen_P = 6 * bgen_N;
-
- infile.ignore(static_cast<size_t>(bgen_P));
-
- continue;
- }
-
- if (CompressedSNPBlocks) {
-
- infile.read(reinterpret_cast<char *>(&bgen_P), 4);
- uint8_t zipped_data[bgen_P];
-
- unzipped_data_size = 6 * bgen_N;
-
- infile.read(reinterpret_cast<char *>(zipped_data), bgen_P);
-
- int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data),
- reinterpret_cast<uLongf *>(&unzipped_data_size),
- reinterpret_cast<Bytef *>(zipped_data),
- static_cast<uLong>(bgen_P));
- assert(result == Z_OK);
-
- } else {
-
- bgen_P = 6 * bgen_N;
- infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P);
- }
-
- x_mean = 0.0;
- c_phen = 0;
- n_miss = 0;
- gsl_vector_set_zero(x_miss);
- for (size_t i = 0; i < bgen_N; ++i) {
- if (indicator_idv[i] == 0) {
- continue;
- }
-
- bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0;
- bgen_geno_prob_AB =
- static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0;
- bgen_geno_prob_BB =
- static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0;
-
- // WJA.
- bgen_geno_prob_non_miss =
- bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB;
- if (bgen_geno_prob_non_miss < 0.9) {
- gsl_vector_set(x_miss, c_phen, 0.0);
- n_miss++;
- } else {
-
- bgen_geno_prob_AA /= bgen_geno_prob_non_miss;
- bgen_geno_prob_AB /= bgen_geno_prob_non_miss;
- bgen_geno_prob_BB /= bgen_geno_prob_non_miss;
-
- geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB;
-
- gsl_vector_set(x, c_phen, geno);
- gsl_vector_set(x_miss, c_phen, 1.0);
- x_mean += geno;
- }
- c_phen++;
- }
-
- x_mean /= static_cast<double>(ni_test - n_miss);
-
- for (size_t i = 0; i < ni_test; ++i) {
- if (gsl_vector_get(x_miss, i) == 0) {
- gsl_vector_set(x, i, x_mean);
- }
- }
-
- gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, csnp % msize);
- gsl_vector_memcpy(&Xlarge_col.vector, x);
- csnp++;
-
- if (csnp % msize == 0 || csnp == t_last) {
- size_t l = 0;
- if (csnp % msize == 0) {
- l = msize;
- } else {
- l = csnp % msize;
- }
-
- gsl_matrix_view Xlarge_sub =
- gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
- gsl_matrix_view UtXlarge_sub =
- gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
-
- time_start = clock();
- eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
- &UtXlarge_sub.matrix);
- time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
-
- gsl_matrix_set_zero(Xlarge);
-
- for (size_t i = 0; i < l; i++) {
- gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
- gsl_vector_memcpy(&X_row.vector, &UtXlarge_col.vector);
-
- // Initial values.
- gsl_matrix_memcpy(V_g, V_g_null);
- gsl_matrix_memcpy(V_e, V_e_null);
- gsl_matrix_memcpy(B, B_null);
-
- time_start = clock();
-
- // 3 is before 1.
- if (a_mode == 3 || a_mode == 4) {
- p_score = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g_null,
- V_e_null, UltVehiY, beta, Vbeta);
- if (p_score < p_nr && crt == 1) {
- logl_H1 = MphNR('R', 1, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
- Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- p_score = PCRT(3, d_size, p_score, crt_a, crt_b, crt_c);
- }
- }
-
- if (a_mode == 2 || a_mode == 4) {
- logl_H1 = MphEM('L', em_iter / 10, em_prec * 10, eval, X, Y, U_hat,
- E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
- UltVehiE, V_g, V_e, B);
-
- // Calculate beta and Vbeta.
- p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
- UltVehiY, beta, Vbeta);
- p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
-
- if (p_lrt < p_nr) {
- logl_H1 =
- MphNR('L', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all,
- xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
-
- // Calculate beta and Vbeta.
- p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
- UltVehiY, beta, Vbeta);
- p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
-
- if (crt == 1) {
- p_lrt = PCRT(2, d_size, p_lrt, crt_a, crt_b, crt_c);
- }
- }
- }
-
- if (a_mode == 1 || a_mode == 4) {
- logl_H1 = MphEM('R', em_iter / 10, em_prec * 10, eval, X, Y, U_hat,
- E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
- UltVehiE, V_g, V_e, B);
- p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
- UltVehiY, beta, Vbeta);
-
- if (p_wald < p_nr) {
- logl_H1 =
- MphNR('R', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all,
- xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
- p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
- UltVehiY, beta, Vbeta);
-
- if (crt == 1) {
- p_wald = PCRT(1, d_size, p_wald, crt_a, crt_b, crt_c);
- }
- }
- }
-
- time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
-
- // Store summary data.
- for (size_t i = 0; i < d_size; i++) {
- v_beta[i] = gsl_vector_get(beta, i);
- }
-
- c = 0;
- for (size_t i = 0; i < d_size; i++) {
- for (size_t j = i; j < d_size; j++) {
- v_Vg[c] = gsl_matrix_get(V_g, i, j);
- v_Ve[c] = gsl_matrix_get(V_e, i, j);
- v_Vbeta[c] = gsl_matrix_get(Vbeta, i, j);
- c++;
- }
- }
-
- MPHSUMSTAT SNPs = {v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
- sumStat.push_back(SNPs);
- }
- }
- }
- cout << endl;
-
- infile.close();
- infile.clear();
-
- gsl_matrix_free(U_hat);
- gsl_matrix_free(E_hat);
- gsl_matrix_free(OmegaU);
- gsl_matrix_free(OmegaE);
- gsl_matrix_free(UltVehiY);
- gsl_matrix_free(UltVehiBX);
- gsl_matrix_free(UltVehiU);
- gsl_matrix_free(UltVehiE);
-
- gsl_matrix_free(Hi_all);
- gsl_matrix_free(Hiy_all);
- gsl_matrix_free(xHi_all);
- gsl_matrix_free(Hessian);
-
- gsl_vector_free(x);
- gsl_vector_free(x_miss);
-
- gsl_matrix_free(Y);
- gsl_matrix_free(X);
- gsl_matrix_free(V_g);
- gsl_matrix_free(V_e);
- gsl_matrix_free(B);
- gsl_vector_free(beta);
- gsl_matrix_free(Vbeta);
-
- gsl_matrix_free(V_g_null);
- gsl_matrix_free(V_e_null);
- gsl_matrix_free(B_null);
- gsl_matrix_free(se_B_null);
-
- gsl_matrix_free(Xlarge);
- gsl_matrix_free(UtXlarge);
-
- return;
-}
-
void MVLMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
const gsl_matrix *UtW, const gsl_matrix *UtY) {
debug_msg("entering");
@@ -3739,24 +3189,24 @@ void MVLMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
t_last++;
}
for (size_t t = 0; t < indicator_snp.size(); ++t) {
- !safeGetline(infile, line).eof();
+ safeGetline(infile, line).eof();
if (t % d_pace == 0 || t == (ns_total - 1)) {
- ProgressBar("Reading SNPs ", t, ns_total - 1);
+ ProgressBar("Reading SNPs", t, ns_total - 1);
}
if (indicator_snp[t] == 0) {
continue;
}
- ch_ptr = strtok((char *)line.c_str(), " , \t");
- ch_ptr = strtok(NULL, " , \t");
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
x_mean = 0.0;
c_phen = 0;
n_miss = 0;
gsl_vector_set_zero(x_miss);
for (size_t i = 0; i < ni_total; ++i) {
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
if (indicator_idv[i] == 0) {
continue;
}
@@ -3801,8 +3251,8 @@ void MVLMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
time_start = clock();
- eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
- &UtXlarge_sub.matrix);
+ fast_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
+ &UtXlarge_sub.matrix);
time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
gsl_matrix_set_zero(Xlarge);
@@ -4190,7 +3640,7 @@ void MVLMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
}
for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
if (t % d_pace == 0 || t == snpInfo.size() - 1) {
- ProgressBar("Reading SNPs ", t, snpInfo.size() - 1);
+ ProgressBar("Reading SNPs", t, snpInfo.size() - 1);
}
if (indicator_snp[t] == 0) {
continue;
@@ -4268,7 +3718,7 @@ void MVLMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
time_start = clock();
- eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
+ fast_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
&UtXlarge_sub.matrix);
time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
@@ -4716,24 +4166,24 @@ void MVLMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval,
// Start reading genotypes and analyze.
for (size_t t = 0; t < indicator_snp.size(); ++t) {
- !safeGetline(infile, line).eof();
+ safeGetline(infile, line).eof();
if (t % d_pace == 0 || t == (ns_total - 1)) {
- ProgressBar("Reading SNPs ", t, ns_total - 1);
+ ProgressBar("Reading SNPs", t, ns_total - 1);
}
if (indicator_snp[t] == 0) {
continue;
}
- ch_ptr = strtok((char *)line.c_str(), " , \t");
- ch_ptr = strtok(NULL, " , \t");
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
x_mean = 0.0;
c_phen = 0;
n_miss = 0;
gsl_vector_set_zero(x_miss);
for (size_t i = 0; i < ni_total; ++i) {
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
if (indicator_idv[i] == 0) {
continue;
}
@@ -5175,7 +4625,7 @@ void MVLMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
if (t % d_pace == 0 || t == snpInfo.size() - 1) {
- ProgressBar("Reading SNPs ", t, snpInfo.size() - 1);
+ ProgressBar("Reading SNPs", t, snpInfo.size() - 1);
}
if (indicator_snp[t] == 0) {
continue;
diff --git a/src/param.cpp b/src/param.cpp
index 3b319e9..1a27a53 100644
--- a/src/param.cpp
+++ b/src/param.cpp
@@ -16,12 +16,12 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <iostream>
+#include <string>
#include <algorithm>
#include <cmath>
#include <cstring>
#include <fstream>
-#include <iostream>
-#include <string>
#include <sys/stat.h>
#include "gsl/gsl_blas.h"
@@ -66,7 +66,7 @@ void LOCO_set_Snps(set<string> &ksnps, set<string> &gwasnps,
// (indicator_idv[x] == 1). This should match indicator_cvt etc. If
// this gives problems with certain sets we can simply trim to size.
-void trim_individuals(vector<int> &idvs, size_t ni_max, bool debug) {
+void trim_individuals(vector<int> &idvs, size_t ni_max) {
if (ni_max) {
size_t count = 0;
for (auto ind = idvs.begin(); ind != idvs.end(); ++ind) {
@@ -76,7 +76,7 @@ void trim_individuals(vector<int> &idvs, size_t ni_max, bool debug) {
break;
}
if (count != idvs.size()) {
- if (debug)
+ if (is_debug_mode())
cout << "**** TEST MODE: trim individuals from " << idvs.size()
<< " to " << count << endl;
idvs.resize(count);
@@ -87,7 +87,7 @@ void trim_individuals(vector<int> &idvs, size_t ni_max, bool debug) {
// ---- PARAM class implementation
PARAM::PARAM(void)
- : mode_silence(false), a_mode(0), k_mode(1), d_pace(100000),
+ : a_mode(0), k_mode(1), d_pace(DEFAULT_PACE),
file_out("result"), path_out("./output/"), miss_level(0.05),
maf_level(0.01), hwe_level(0), r2_level(0.9999), l_min(1e-5), l_max(1e5),
n_region(10), p_nr(0.001), em_prec(0.0001), nr_prec(0.0001),
@@ -221,7 +221,7 @@ void PARAM::ReadFiles(void) {
} else {
n_cvt = 1;
}
- trim_individuals(indicator_cvt, ni_max, mode_debug);
+ trim_individuals(indicator_cvt, ni_max);
if (!file_gxe.empty()) {
if (ReadFile_column(file_gxe, indicator_gxe, gxe, 1) == false) {
@@ -234,38 +234,7 @@ void PARAM::ReadFiles(void) {
}
}
- trim_individuals(indicator_idv, ni_max, mode_debug);
-
- // WJA added.
- // Read genotype and phenotype file for bgen format.
- if (!file_oxford.empty()) {
- file_str = file_oxford + ".sample";
- if (ReadFile_sample(file_str, indicator_pheno, pheno, p_column,
- indicator_cvt, cvt, n_cvt) == false) {
- error = true;
- }
- if ((indicator_cvt).size() == 0) {
- n_cvt = 1;
- }
-
- // Post-process covariates and phenotypes, obtain
- // ni_test, save all useful covariates.
- ProcessCvtPhen();
-
- // Obtain covariate matrix.
- gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt);
- CopyCvt(W);
-
- file_str = file_oxford + ".bgen";
- if (ReadFile_bgen(file_str, setSnps, W, indicator_idv, indicator_snp,
- snpInfo, maf_level, miss_level, hwe_level, r2_level,
- ns_test) == false) {
- error = true;
- }
- gsl_matrix_free(W);
-
- ns_total = indicator_snp.size();
- }
+ trim_individuals(indicator_idv, ni_max);
// Read genotype and phenotype file for PLINK format.
if (!file_bfile.empty()) {
@@ -333,11 +302,11 @@ void PARAM::ReadFiles(void) {
gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt);
CopyCvt(W);
- trim_individuals(indicator_idv, ni_max, mode_debug);
- trim_individuals(indicator_cvt, ni_max, mode_debug);
+ trim_individuals(indicator_idv, ni_max);
+ trim_individuals(indicator_cvt, ni_max);
if (ReadFile_geno(file_geno, setSnps, W, indicator_idv, indicator_snp,
maf_level, miss_level, hwe_level, r2_level, mapRS2chr,
- mapRS2bp, mapRS2cM, snpInfo, ns_test, mode_debug) == false) {
+ mapRS2bp, mapRS2cM, snpInfo, ns_test) == false) {
error = true;
}
gsl_matrix_free(W);
@@ -447,7 +416,7 @@ void PARAM::ReadFiles(void) {
while (!safeGetline(infile, file_name).eof()) {
if (ReadFile_geno(file_name, setSnps, W, indicator_idv, indicator_snp,
maf_level, miss_level, hwe_level, r2_level, mapRS2chr,
- mapRS2bp, mapRS2cM, snpInfo, ns_test_tmp, mode_debug) == false) {
+ mapRS2bp, mapRS2cM, snpInfo, ns_test_tmp) == false) {
error = true;
}
@@ -741,19 +710,6 @@ void PARAM::CheckParam(void) {
}
}
- if (!file_oxford.empty()) {
- str = file_oxford + ".bgen";
- if (stat(str.c_str(), &fileInfo) == -1) {
- cout << "error! fail to open .bgen file: " << str << endl;
- error = true;
- }
- str = file_oxford + ".sample";
- if (stat(str.c_str(), &fileInfo) == -1) {
- cout << "error! fail to open .sample file: " << str << endl;
- error = true;
- }
- }
-
if ((!file_geno.empty() || !file_gene.empty())) {
str = file_pheno;
if (stat(str.c_str(), &fileInfo) == -1) {
@@ -864,11 +820,6 @@ void PARAM::CheckParam(void) {
flag++;
}
- // WJA added.
- if (!file_oxford.empty()) {
- flag++;
- }
-
if (flag != 1 && a_mode != 15 && a_mode != 27 && a_mode != 28 &&
a_mode != 43 && a_mode != 5 && a_mode != 61 && a_mode != 62 &&
a_mode != 63 && a_mode != 66 && a_mode != 67) {
@@ -948,8 +899,7 @@ void PARAM::CheckParam(void) {
if (!loco.empty()) {
enforce_msg((a_mode >= 1 && a_mode <= 4) || a_mode == 21 || a_mode == 22,
"LOCO only works with LMM and K");
- enforce_msg(file_bfile.empty(), "LOCO does not work with PLink (yet)");
- enforce_msg(file_oxford.empty(), "LOCO does not work with Oxford (yet)");
+ // enforce_msg(file_bfile.empty(), "LOCO does not work with PLink (yet)");
enforce_msg(file_gxe.empty(), "LOCO does not support GXE (yet)");
enforce_msg(!file_anno.empty(),
"LOCO requires annotation file (-a switch)");
@@ -1056,14 +1006,6 @@ void PARAM::CheckParam(void) {
void PARAM::CheckData(void) {
- // WJA NOTE: I added this condition so that covariates can be added
- // through sample, probably not exactly what is wanted.
- if (file_oxford.empty()) {
- if ((file_cvt).empty() || (indicator_cvt).size() == 0) {
- n_cvt = 1;
- }
- }
-
if ((a_mode == 66 || a_mode == 67) && (v_pve.size() != n_vc)) {
cout << "error! the number of pve estimates does not equal to "
<< "the number of categories in the cat file:" << v_pve.size() << " "
@@ -1208,7 +1150,7 @@ void PARAM::CheckData(void) {
}
// Set d_pace to 1000 for gene expression.
- if (!file_gene.empty() && d_pace == 100000) {
+ if (!file_gene.empty() && d_pace == DEFAULT_PACE) {
d_pace = 1000;
}
@@ -1340,7 +1282,7 @@ void PARAM::ReadGenotypes(gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) {
}
} else {
if (ReadFile_geno(file_geno, indicator_idv, indicator_snp, UtX, K,
- calc_K, mode_debug) == false) {
+ calc_K) == false) {
error = true;
}
}
@@ -1360,7 +1302,7 @@ void PARAM::ReadGenotypes(vector<vector<unsigned char>> &Xt, gsl_matrix *K,
}
} else {
if (ReadFile_geno(file_geno, indicator_idv, indicator_snp, Xt, K, calc_K,
- ni_test, ns_test, mode_debug) == false) {
+ ni_test, ns_test) == false) {
error = true;
}
}
@@ -1375,18 +1317,11 @@ void PARAM::CalcKin(gsl_matrix *matrix_kin) {
if (!file_bfile.empty()) {
file_str = file_bfile + ".bed";
- enforce_msg(loco.empty(), "FIXME: LOCO nyi");
+ // enforce_msg(loco.empty(), "FIXME: LOCO nyi");
if (PlinkKin(file_str, indicator_snp, a_mode - 20, d_pace, matrix_kin) ==
false) {
error = true;
}
- } else if (!file_oxford.empty()) {
- file_str = file_oxford + ".bgen";
- enforce_msg(loco.empty(), "FIXME: LOCO nyi");
- if (bgenKin(file_str, indicator_snp, a_mode - 20, d_pace, matrix_kin) ==
- false) {
- error = true;
- }
} else {
file_str = file_geno;
if (BimbamKin(file_str, setKSnps, indicator_snp, a_mode - 20, d_pace,
diff --git a/src/param.h b/src/param.h
index ff279bd..c4316bb 100644
--- a/src/param.h
+++ b/src/param.h
@@ -26,7 +26,8 @@
#include <set>
#include <vector>
-#define K_BATCH_SIZE 10000 // #snps used for batched K
+#define K_BATCH_SIZE 20000 // #snps used for batched K
+#define DEFAULT_PACE 1000 // for display only
using namespace std;
@@ -115,16 +116,16 @@ public:
class PARAM {
public:
// IO-related parameters
- bool mode_check = true; // run data checks (slower)
- bool mode_strict = false; // exit on some data checks
- bool mode_silence;
- bool mode_debug = false;
- uint issue; // enable tests for issue on github tracker
+ // bool mode_check = true; // run data checks (slower)
+ // bool mode_strict = false; // exit on some data checks
+ // bool mode_silence;
+ // bool mode_debug = false;
+ // uint issue; // enable tests for issue on github tracker
uint a_mode; // Analysis mode, 1/2/3/4 for Frequentist tests
int k_mode; // Kinship read mode: 1: n by n matrix, 2: id/id/k_value;
vector<size_t> p_column; // Which phenotype column needs analysis.
- size_t d_pace; // Display pace
+ size_t d_pace = DEFAULT_PACE; // Display pace (-pace switch)
string file_bfile, file_mbfile;
string file_geno, file_mgeno;
@@ -155,9 +156,6 @@ public:
string file_ksnps; // File SNPs for computing K
string file_gwasnps; // File SNPs for computing GWAS
- // WJA added.
- string file_oxford;
-
// QC-related parameters.
double miss_level;
double maf_level;
diff --git a/src/prdt.cpp b/src/prdt.cpp
index 9dc84bc..fc0abe8 100644
--- a/src/prdt.cpp
+++ b/src/prdt.cpp
@@ -227,7 +227,7 @@ void PRDT::AnalyzeBimbam(gsl_vector *y_prdt) {
// Start reading genotypes and analyze.
for (size_t t = 0; t < ns_total; ++t) {
- !safeGetline(infile, line).eof();
+ safeGetline(infile, line).eof();
if (t % d_pace == 0 || t == (ns_total - 1)) {
ProgressBar("Reading SNPs ", t, ns_total - 1);
}
diff --git a/src/vc.cpp b/src/vc.cpp
index 1465f16..f4cd650 100644
--- a/src/vc.cpp
+++ b/src/vc.cpp
@@ -41,7 +41,7 @@
#include "gsl/gsl_min.h"
#include "gsl/gsl_multiroots.h"
-#include "Eigen/Dense"
+// #include "Eigen/Dense"
#include "eigenlib.h"
#include "gzstream.h"
@@ -53,7 +53,7 @@
#include "vc.h"
using namespace std;
-using namespace Eigen;
+// using namespace Eigen;
// In this file, X, Y are already transformed (i.e. UtX and UtY).
void VC::CopyFromParam(PARAM &cPar) {
@@ -663,7 +663,7 @@ void ReadFile_cor(const string &file_cor, const set<string> &setSnps,
HEADER header;
// Header.
- !safeGetline(infile, line).eof();
+ safeGetline(infile, line).eof();
ReadHeader_vc(line, header);
if (header.n_col == 0) {
@@ -678,7 +678,7 @@ void ReadFile_cor(const string &file_cor, const set<string> &setSnps,
while (!safeGetline(infile, line).eof()) {
// do not read cor values this time; upto col_n-1.
- ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
n_total = 0;
n_mis = 0;
@@ -688,6 +688,7 @@ void ReadFile_cor(const string &file_cor, const set<string> &setSnps,
d_cm = 0;
d_pos = 0;
for (size_t i = 0; i < header.coln - 1; i++) {
+ enforce(ch_ptr);
if (header.rs_col != 0 && header.rs_col == i + 1) {
rs = ch_ptr;
}
@@ -822,7 +823,7 @@ void ReadFile_beta(const bool flag_priorscale, const string &file_beta,
// Read header.
HEADER header;
- !safeGetline(infile, line).eof();
+ safeGetline(infile, line).eof();
ReadHeader_vc(line, header);
if (header.n_col == 0) {
@@ -844,7 +845,7 @@ void ReadFile_beta(const bool flag_priorscale, const string &file_beta,
}
while (!safeGetline(infile, line).eof()) {
- ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
z = 0;
beta = 0;
@@ -857,6 +858,7 @@ void ReadFile_beta(const bool flag_priorscale, const string &file_beta,
af = 0;
var_x = 0;
for (size_t i = 0; i < header.coln; i++) {
+ enforce(ch_ptr);
if (header.rs_col != 0 && header.rs_col == i + 1) {
rs = ch_ptr;
}
@@ -1055,7 +1057,7 @@ void ReadFile_cor(const string &file_cor, const vector<string> &vec_rs,
// Header.
HEADER header;
- !safeGetline(infile, line).eof();
+ safeGetline(infile, line).eof();
ReadHeader_vc(line, header);
while (!safeGetline(infile, line).eof()) {
@@ -1063,8 +1065,9 @@ void ReadFile_cor(const string &file_cor, const vector<string> &vec_rs,
// Do not read cor values this time; upto col_n-1.
d_pos1 = 0;
d_cm1 = 0;
- ch_ptr = strtok((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
for (size_t i = 0; i < header.coln - 1; i++) {
+ enforce(ch_ptr);
if (header.rs_col != 0 && header.rs_col == i + 1) {
rs = ch_ptr;
}
@@ -2238,7 +2241,7 @@ bool BimbamXwz(const string &file_geno, const int display_pace,
gsl_vector_mul(wz, w);
for (size_t t = 0; t < indicator_snp.size(); ++t) {
- !safeGetline(infile, line).eof();
+ safeGetline(infile, line).eof();
if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1);
}
@@ -2246,9 +2249,9 @@ bool BimbamXwz(const string &file_geno, const int display_pace,
continue;
}
- ch_ptr = strtok((char *)line.c_str(), " , \t");
- ch_ptr = strtok(NULL, " , \t");
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
geno_mean = 0.0;
n_miss = 0;
@@ -2260,7 +2263,7 @@ bool BimbamXwz(const string &file_geno, const int display_pace,
if (indicator_idv[i] == 0) {
continue;
}
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
if (strcmp(ch_ptr, "NA") == 0) {
gsl_vector_set(geno_miss, i, 0);
n_miss++;
@@ -2491,7 +2494,7 @@ bool BimbamXtXwz(const string &file_geno, const int display_pace,
gsl_vector *geno_miss = gsl_vector_alloc(ni_test);
for (size_t t = 0; t < indicator_snp.size(); ++t) {
- !safeGetline(infile, line).eof();
+ safeGetline(infile, line).eof();
if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
ProgressBar("Reading SNPs ", t, indicator_snp.size() - 1);
}
@@ -2499,9 +2502,9 @@ bool BimbamXtXwz(const string &file_geno, const int display_pace,
continue;
}
- ch_ptr = strtok((char *)line.c_str(), " , \t");
- ch_ptr = strtok(NULL, " , \t");
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
geno_mean = 0.0;
n_miss = 0;
@@ -2513,7 +2516,7 @@ bool BimbamXtXwz(const string &file_geno, const int display_pace,
if (indicator_idv[i] == 0) {
continue;
}
- ch_ptr = strtok(NULL, " , \t");
+ ch_ptr = strtok_safe(NULL, " , \t");
if (strcmp(ch_ptr, "NA") == 0) {
gsl_vector_set(geno_miss, i, 0);
n_miss++;
diff --git a/test/dev_test_suite.sh b/test/dev_test_suite.sh
index 0fc4423..284c9aa 100755
--- a/test/dev_test_suite.sh
+++ b/test/dev_test_suite.sh
@@ -11,7 +11,8 @@ testBXDStandardRelatednessMatrixKSingularError() {
-c ../example/BXD_covariates.txt \
-a ../example/BXD_snps.txt \
-gk \
- -debug -o $outn
+ -debug \
+ -o $outn
assertEquals 22 $? # should show singular error
}
@@ -44,7 +45,7 @@ testBXDLMMLikelihoodRatio() {
assertEquals 0 $?
outfn=output/$outn.assoc.txt
- assertEquals "80498" `wc -w < $outfn`
+ assertEquals "73180" `wc -w < $outfn`
assertEquals "3088458212.93" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
}
@@ -83,6 +84,49 @@ testUnivariateLinearMixedModelLOCO1() {
assertEquals "15465346.22" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
}
+testPlinkCenteredRelatednessMatrixKLOCO1() {
+ return 0
+ outn=mouse_hs1940_Plink_LOCO1
+ rm -f output/$outn.*
+ $gemma -bfile ../example/mouse_hs1940 \
+ -a ../example/mouse_hs1940.anno.txt \
+ -snps ../example/mouse_hs1940_snps.txt \
+ -nind 400 \
+ -loco 1 \
+ -gk \
+ -debug \
+ -o $outn
+ assertEquals 0 $?
+ grep "total computation time" < output/$outn.log.txt
+ outfn=output/$outn.cXX.txt
+ assertEquals 0 $?
+ assertEquals "400" `wc -l < $outfn`
+ assertEquals "0.312" `head -c 5 $outfn`
+ assertEquals "71.03" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+}
+
+
+testPlinkUnivariateLinearMixedModelLOCO1() {
+ return 0
+ outn=mouse_hs1940_CD8_Plink_LOCO1_lmm
+ rm -f output/$outn.*
+ $gemma -bfile ../example/mouse_hs1940 \
+ -n 1 \
+ -loco 1 \
+ -k ./output/mouse_hs1940_Plink_LOCO1.cXX.txt \
+ -a ../example/mouse_hs1940.anno.txt \
+ -snps ../example/mouse_hs1940_snps.txt -lmm \
+ -nind 400 \
+ -debug \
+ -o $outn
+ assertEquals 0 $?
+ grep "total computation time" < output/$outn.log.txt
+ assertEquals 0 $?
+ outfn=output/$outn.assoc.txt
+ assertEquals "68" `wc -l < $outfn`
+ assertEquals "15465346.22" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+}
+
shunit2=`which shunit2`
if [ -x "$shunit2" ]; then
diff --git a/test/src/unittests-math.cpp b/test/src/unittests-math.cpp
index ac4c180..757c2dc 100644
--- a/test/src/unittests-math.cpp
+++ b/test/src/unittests-math.cpp
@@ -1,14 +1,23 @@
#include <catch.hpp>
#include <iostream>
#include "gsl/gsl_matrix.h"
-#include "mathfunc.h"
+#include <cblas.h>
+
#include <algorithm>
#include <limits>
#include <numeric>
+#include "debug.h"
+#include "mathfunc.h"
+#include "fastblas.h"
+#include "fastopenblas.h"
+
using namespace std;
TEST_CASE( "Math functions", "[math]" ) {
+ debug_set_debug_mode(true);
+ debug_set_no_check_mode(false);
+ debug_set_strict_mode(true);
double data[] = { 2,-1, 0,
-1, 2,-1,
0,-1, 2};
@@ -51,3 +60,109 @@ TEST_CASE( "Math functions", "[math]" ) {
REQUIRE (std::isnan(v3[2]));
REQUIRE(has_nan(v3));
}
+
+TEST_CASE("cblas_dgemm", "[math]") {
+ double *A, *B, *C;
+ int m, n, k, i, j;
+ double alpha, beta;
+
+ printf ("\n This example computes real matrix C=alpha*A*B+beta*C using \n"
+ " Intel(R) MKL function dgemm, where A, B, and C are matrices and \n"
+ " alpha and beta are double precision scalars\n\n");
+
+ m = 2000, k = 200, n = 1000;
+ printf (" Initializing data for matrix multiplication C=A*B for matrix \n"
+ " A(%ix%i) and matrix B(%ix%i)\n\n", m, k, k, n);
+ alpha = 1.0; beta = 0.0;
+
+ printf (" Allocating memory for matrices aligned on 64-byte boundary for better \n"
+ " performance \n\n");
+ A = (double *)malloc( m*k*sizeof( double ));
+ B = (double *)malloc( k*n*sizeof( double ));
+ C = (double *)malloc( m*n*sizeof( double ));
+
+ printf (" Intializing matrix data \n\n");
+ for (i = 0; i < (m*k); i++) {
+ A[i] = (double)(i+1);
+ }
+
+ for (i = 0; i < (k*n); i++) {
+ B[i] = (double)(-i-1);
+ }
+
+ for (i = 0; i < (m*n); i++) {
+ C[i] = 0.0;
+ }
+
+ printf (" Computing matrix product using Intel(R) MKL dgemm function via CBLAS interface \n\n");
+ assert(m==2000);
+ assert(k==200);
+ assert(n==1000);
+ //cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
+ // m, n, k, alpha, A, k, B, n, beta, C, n);
+ fast_cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
+ m, n, k, alpha, A, k, B, n, beta, C, n);
+
+ REQUIRE(trunc(C[0]) == -2666620100.0 );
+ REQUIRE(trunc(C[1]) == -2666640200.0 );
+ REQUIRE(trunc(C[2003]) == -10627000400.0 );
+
+}
+
+TEST_CASE("fast_dgemm", "[math]") {
+ double *A, *B, *C;
+ int m, n, k, i, j;
+ double alpha, beta;
+
+ printf ("\n This example computes real matrix C=alpha*A*B+beta*C using \n"
+ " Intel(R) MKL function dgemm, where A, B, and C are matrices and \n"
+ " alpha and beta are double precision scalars\n\n");
+
+ m = 2000, k = 200, n = 1000;
+ printf (" Initializing data for matrix multiplication C=A*B for matrix \n"
+ " A(%ix%i) and matrix B(%ix%i)\n\n", m, k, k, n);
+ alpha = 1.0; beta = 0.0;
+
+ printf (" Allocating memory for matrices aligned on 64-byte boundary for better \n"
+ " performance \n\n");
+ A = (double *)malloc( m*k*sizeof( double ));
+ B = (double *)malloc( k*n*sizeof( double ));
+ C = (double *)malloc( m*n*sizeof( double ));
+
+ printf (" Intializing matrix data \n\n");
+ for (i = 0; i < (m*k); i++) {
+ A[i] = (double)(i+1);
+ }
+
+ for (i = 0; i < (k*n); i++) {
+ B[i] = (double)(-i-1);
+ }
+
+ for (i = 0; i < (m*n); i++) {
+ C[i] = 0.0;
+ }
+
+ printf (" Computing matrix product using Intel(R) MKL dgemm function via CBLAS interface \n\n");
+ // cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
+ // m, n, k, alpha, A, k, B, n, beta, C, n);
+ // eigenlib_dgemm(const char *TransA, const char *TransB, const double alpha,
+ // const gsl_matrix *A, const gsl_matrix *B, const double beta,
+ // gsl_matrix *C) {
+ gsl_matrix *AM = gsl_matrix_safe_alloc(m,k); // rows x cols
+ gsl_matrix *BM = gsl_matrix_safe_alloc(k,n);
+ gsl_matrix *CM = gsl_matrix_calloc(m,n);
+
+ fast_copy(AM,A);
+ fast_copy(BM,B);
+ fast_copy(CM,C);
+ fast_dgemm("N","N",alpha,AM,BM,beta,CM);
+ printf ("\n Computations completed.\n\n");
+ A = AM->data;
+ B = BM->data;
+ C = CM->data;
+
+ REQUIRE(trunc(C[0]) == -2666620100.0 );
+ REQUIRE(trunc(C[1]) == -2666640200.0 );
+ REQUIRE(trunc(C[2003]) == -10627000400.0 );
+
+}
diff --git a/test/test_suite.sh b/test/test_suite.sh
index 350fc27..dc6053a 100755
--- a/test/test_suite.sh
+++ b/test/test_suite.sh
@@ -62,7 +62,7 @@ testUnivariateLinearMixedModel() {
assertEquals "4038540440.86" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
}
-testMultivariateLinearMixedModel() {
+testLinearMixedModelPhenotypes() {
$gemma -g ../example/mouse_hs1940.geno.txt.gz \
-p ../example/mouse_hs1940.pheno.txt \
-n 1 6 \
@@ -92,8 +92,8 @@ testPlinkStandardRelatednessMatrixK() {
# Test for https://github.com/genetics-statistics/GEMMA/issues/58
# fixed GSLv2 NaN's that appeared with covariates.
-testPlinkMultivariateLinearMixedModel() {
- testname=testPlinkMultivariateLinearMixedModel
+testPlinkLinearMixedModelCovariates() {
+ testname=testPlinkLinearMixedModelCovariates
datadir=../example
$gemma -bfile $datadir/HLC \
-k output/testPlinkStandardRelatednessMatrixK.sXX.txt \