aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/debug.h4
-rw-r--r--src/io.cpp51
-rw-r--r--src/io.h6
-rw-r--r--src/lapack.cpp23
-rw-r--r--src/lapack.h29
-rw-r--r--src/param.cpp8
6 files changed, 57 insertions, 64 deletions
diff --git a/src/debug.h b/src/debug.h
index f47cb4c..e910a25 100644
--- a/src/debug.h
+++ b/src/debug.h
@@ -36,8 +36,8 @@ inline void fail_at_msg(bool strict, const char *__file, int __line, const char
#else // DEBUG
-#define warning_msg(msg) cerr << "**** WARNING: " << msg << " in " << __FILE__ << " at line " << __LINE__ << " in " << __PRETTY_FUNCTION__ << endl;
-#define debug_msg(msg) cerr << "**** DEBUG: " << msg << " in " << __FILE__ << " at line " << __LINE__ << " in " << __PRETTY_FUNCTION__ << endl;
+#define warning_msg(msg) cerr << "**** WARNING: " << msg << " in " << __FILE__ << " at line " << __LINE__ << " in " << __FUNCTION__ << endl;
+#define debug_msg(msg) cerr << "**** DEBUG: " << msg << " in " << __FILE__ << " at line " << __LINE__ << " in " << __FUNCTION__ << endl;
#define assert_issue(is_issue, expr) \
((is_issue) ? enforce_msg(expr,"FAIL: ISSUE assert") : __ASSERT_VOID_CAST(0))
diff --git a/src/io.cpp b/src/io.cpp
index fedf69a..80adbe6 100644
--- a/src/io.cpp
+++ b/src/io.cpp
@@ -133,7 +133,7 @@ std::istream &safeGetline(std::istream &is, std::string &t) {
}
}
-// Read SNP file.
+// Read SNP file. A single column of SNP names.
bool ReadFile_snps(const string file_snps, set<string> &setSnps) {
setSnps.clear();
@@ -148,6 +148,7 @@ bool ReadFile_snps(const string file_snps, set<string> &setSnps) {
while (getline(infile, line)) {
ch_ptr = strtok((char *)line.c_str(), " , \t");
+ enforce_msg(ch_ptr,"Problem reading SNP file");
setSnps.insert(ch_ptr);
}
@@ -157,6 +158,9 @@ bool ReadFile_snps(const string file_snps, set<string> &setSnps) {
return true;
}
+// Read SNP file using a header. The header determines how the
+// values for each row are parsed. A valid header can be, for example,
+// RS POS CHR
bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps) {
setSnps.clear();
@@ -175,7 +179,7 @@ bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps) {
ReadHeader_io(line, header);
if (header.rs_col == 0 && (header.chr_col == 0 || header.pos_col == 0)) {
- cout << "missing rs id in the hearder" << endl;
+ cout << "missing rs id in the header" << endl;
}
while (!safeGetline(infile, line).eof()) {
@@ -185,6 +189,7 @@ bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps) {
ch_ptr = strtok((char *)line.c_str(), " , \t");
for (size_t i = 0; i < header.coln; i++) {
+ enforce_msg(ch_ptr,"Problem reading SNP file");
if (header.rs_col != 0 && header.rs_col == i + 1) {
rs = ch_ptr;
}
@@ -250,7 +255,7 @@ bool ReadFile_log(const string &file_log, double &pheno_mean) {
return true;
}
-// Read bimbam annotation file.
+// Read bimbam annotation file which consists of rows of SNP, POS and CHR
bool ReadFile_anno(const string &file_anno, map<string, string> &mapRS2chr,
map<string, long int> &mapRS2bp,
map<string, double> &mapRS2cM) {
@@ -264,33 +269,39 @@ bool ReadFile_anno(const string &file_anno, map<string, string> &mapRS2chr,
}
string line;
- char *ch_ptr;
-
- string rs;
- long int b_pos;
- string chr;
- double cM;
while (!safeGetline(infile, line).eof()) {
- ch_ptr = strtok((char *)line.c_str(), " , \t");
- rs = ch_ptr;
+ const char *ch_ptr = strtok((char *)line.c_str(), " , \t");
+ enforce_str(ch_ptr, line + " Bad RS format");
+ const string rs = ch_ptr;
+ enforce_str(rs != "", line + " Bad RS format");
+
ch_ptr = strtok(NULL, " , \t");
+ enforce_str(ch_ptr, line + " Bad format");
+ ulong b_pos;
if (strcmp(ch_ptr, "NA") == 0) {
b_pos = -9;
} else {
b_pos = atol(ch_ptr);
}
+ enforce_str(b_pos,line + " Bad pos format (is zero)");
+
+ string chr;
ch_ptr = strtok(NULL, " , \t");
if (ch_ptr == NULL || strcmp(ch_ptr, "NA") == 0) {
chr = "-9";
} else {
chr = ch_ptr;
+ enforce_str(chr != "", line + " Bad chr format");
}
+
+ double cM;
ch_ptr = strtok(NULL, " , \t");
if (ch_ptr == NULL || strcmp(ch_ptr, "NA") == 0) {
cM = -9;
} else {
cM = atof(ch_ptr);
+ enforce_str(b_pos, line + "Bad cM format (is zero)");
}
mapRS2chr[rs] = chr;
@@ -377,9 +388,9 @@ bool ReadFile_pheno(const string &file_pheno,
while (!safeGetline(infile, line).eof()) {
ch_ptr = strtok((char *)line.c_str(), " , \t");
-
size_t i = 0;
while (i < p_max) {
+ enforce_msg(ch_ptr,"Wrong number of phenotypes");
if (mapP2c.count(i + 1) != 0) {
if (strcmp(ch_ptr, "NA") == 0) {
ind_pheno_row[mapP2c[i + 1]] = 0;
@@ -597,7 +608,7 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
const double &r2_level, map<string, string> &mapRS2chr,
map<string, long int> &mapRS2bp,
map<string, double> &mapRS2cM, vector<SNPINFO> &snpInfo,
- size_t &ns_test) {
+ size_t &ns_test, bool debug) {
indicator_snp.clear();
snpInfo.clear();
@@ -618,7 +629,8 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
int sig;
LUDecomp(WtW, pmt, &sig);
- LUInvert(WtW, pmt, WtWi);
+
+ LUInvert(WtW, pmt, WtWi); // @@
double v_x, v_w;
int c_idv = 0;
@@ -667,6 +679,11 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
}
if (mapRS2bp.count(rs) == 0) {
+ if (debug) {
+ std::string msg = "Can't figure out position for ";
+ msg += rs;
+ debug_msg(msg);
+ }
chr = "-9";
b_pos = -9;
cM = -9;
@@ -1617,7 +1634,7 @@ bool PlinkKin(const string &file_bed, vector<int> &indicator_snp,
// genotype and calculate K.
bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv,
vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K,
- const bool calc_K) {
+ const bool calc_K, bool debug) {
igzstream infile(file_geno.c_str(), igzstream::in);
if (!infile) {
cout << "error reading genotype file:" << file_geno << endl;
@@ -1721,7 +1738,7 @@ bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
vector<int> &indicator_snp,
vector<vector<unsigned char>> &Xt, gsl_matrix *K,
const bool calc_K, const size_t ni_test,
- const size_t ns_test) {
+ const size_t ns_test, bool debug) {
igzstream infile(file_geno.c_str(), igzstream::in);
if (!infile) {
cout << "error reading genotype file:" << file_geno << endl;
@@ -2979,7 +2996,7 @@ bool bgenKin(const string &file_oxford, vector<int> &indicator_snp,
bool ReadHeader_io(const string &line, HEADER &header) {
string rs_ptr[] = {"rs", "RS", "snp", "SNP", "snps", "SNPS",
"snpid", "SNPID", "rsid", "RSID", "MarkerName"};
- set<string> rs_set(rs_ptr, rs_ptr + 11);
+ set<string> rs_set(rs_ptr, rs_ptr + 11); // create a set of 11 items
string chr_ptr[] = {"chr", "CHR"};
set<string> chr_set(chr_ptr, chr_ptr + 2);
string pos_ptr[] = {
diff --git a/src/io.h b/src/io.h
index 9edc5eb..d9253e3 100644
--- a/src/io.h
+++ b/src/io.h
@@ -64,7 +64,7 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
const double &r2_level, map<string, string> &mapRS2chr,
map<string, long int> &mapRS2bp,
map<string, double> &mapRS2cM, vector<SNPINFO> &snpInfo,
- size_t &ns_test);
+ size_t &ns_test, bool debug);
bool ReadFile_bed(const string &file_bed, const set<string> &setSnps,
const gsl_matrix *W, vector<int> &indicator_idv,
vector<int> &indicator_snp, vector<SNPINFO> &snpInfo,
@@ -94,7 +94,7 @@ bool PlinkKin(const string &file_bed, vector<int> &indicator_snp,
bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv,
vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K,
- const bool calc_K);
+ const bool calc_K, bool debug);
bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K,
const bool calc_K);
@@ -102,7 +102,7 @@ bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
vector<int> &indicator_snp,
vector<vector<unsigned char>> &Xt, gsl_matrix *K,
const bool calc_K, const size_t ni_test,
- const size_t ns_test);
+ const size_t ns_test, bool debug);
bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
vector<int> &indicator_snp, vector<vector<unsigned char>> &Xt,
gsl_matrix *K, const bool calc_K, const size_t ni_test,
diff --git a/src/lapack.cpp b/src/lapack.cpp
index b7c6f74..0dbfed6 100644
--- a/src/lapack.cpp
+++ b/src/lapack.cpp
@@ -24,6 +24,7 @@
#include <vector>
#include "debug.h"
+#include "lapack.h"
#include "mathfunc.h"
using namespace std;
@@ -520,7 +521,7 @@ double CholeskySolve(gsl_matrix_float *Omega, gsl_vector_float *Xty,
// LU decomposition.
void LUDecomp(gsl_matrix *LU, gsl_permutation *p, int *signum) {
- gsl_linalg_LU_decomp(LU, p, signum);
+ enforce_gsl(gsl_linalg_LU_decomp(LU, p, signum));
return;
}
@@ -551,11 +552,13 @@ void LUDecomp(gsl_matrix_float *LU, gsl_permutation *p, int *signum) {
}
*/
-// LU invert.
-void LUInvert(const gsl_matrix *LU, const gsl_permutation *p,
- gsl_matrix *inverse) {
- gsl_linalg_LU_invert(LU, p, inverse);
- return;
+// LU invert. Returns inverse. Note that GSL does not recommend using
+// this function
+void LUInvert(const gsl_matrix *LU, const gsl_permutation *p, gsl_matrix *ret_inverse) {
+ auto det = LULndet(LU);
+ assert(det != 1.0);
+
+ enforce_gsl(gsl_linalg_LU_invert(LU, p, ret_inverse));
}
/*
@@ -590,10 +593,8 @@ void LUInvert(const gsl_matrix_float *LU, const gsl_permutation *p,
*/
// LU lndet.
-double LULndet(gsl_matrix *LU) {
- double d;
- d = gsl_linalg_LU_lndet(LU);
- return d;
+double LULndet(const gsl_matrix *LU) {
+ return gsl_linalg_LU_lndet((gsl_matrix *)LU);
}
/*
@@ -620,7 +621,7 @@ double LULndet(gsl_matrix_float *LU) {
// LU solve.
void LUSolve(const gsl_matrix *LU, const gsl_permutation *p,
const gsl_vector *b, gsl_vector *x) {
- gsl_linalg_LU_solve(LU, p, b, x);
+ enforce_gsl(gsl_linalg_LU_solve(LU, p, b, x));
return;
}
diff --git a/src/lapack.h b/src/lapack.h
index 9596667..9115201 100644
--- a/src/lapack.h
+++ b/src/lapack.h
@@ -23,47 +23,22 @@
using namespace std;
-void lapack_float_cholesky_decomp(gsl_matrix_float *A);
void lapack_cholesky_decomp(gsl_matrix *A);
-void lapack_float_cholesky_solve(gsl_matrix_float *A, const gsl_vector_float *b,
- gsl_vector_float *x);
void lapack_cholesky_solve(gsl_matrix *A, const gsl_vector *b, gsl_vector *x);
-void lapack_sgemm(char *TransA, char *TransB, float alpha,
- const gsl_matrix_float *A, const gsl_matrix_float *B,
- float beta, gsl_matrix_float *C);
void lapack_dgemm(char *TransA, char *TransB, double alpha, const gsl_matrix *A,
const gsl_matrix *B, double beta, gsl_matrix *C);
-void lapack_float_eigen_symmv(gsl_matrix_float *A, gsl_vector_float *eval,
- gsl_matrix_float *evec,
- const size_t flag_largematrix);
void lapack_eigen_symmv(gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec,
const size_t flag_largematrix);
-
double EigenDecomp(gsl_matrix *G, gsl_matrix *U, gsl_vector *eval,
const size_t flag_largematrix);
double EigenDecomp_Zeroed(gsl_matrix *G, gsl_matrix *U, gsl_vector *eval,
const size_t flag_largematrix);
-double EigenDecomp(gsl_matrix_float *G, gsl_matrix_float *U,
- gsl_vector_float *eval, const size_t flag_largematrix);
-
double CholeskySolve(gsl_matrix *Omega, gsl_vector *Xty, gsl_vector *OiXty);
-double CholeskySolve(gsl_matrix_float *Omega, gsl_vector_float *Xty,
- gsl_vector_float *OiXty);
-
void LUDecomp(gsl_matrix *LU, gsl_permutation *p, int *signum);
-void LUDecomp(gsl_matrix_float *LU, gsl_permutation *p, int *signum);
-void LUInvert(const gsl_matrix *LU, const gsl_permutation *p,
- gsl_matrix *inverse);
-void LUInvert(const gsl_matrix_float *LU, const gsl_permutation *p,
- gsl_matrix_float *inverse);
-double LULndet(gsl_matrix *LU);
-double LULndet(gsl_matrix_float *LU);
+void LUInvert(const gsl_matrix *LU, const gsl_permutation *p, gsl_matrix *ret_inverse);
+double LULndet(const gsl_matrix *LU);
void LUSolve(const gsl_matrix *LU, const gsl_permutation *p,
const gsl_vector *b, gsl_vector *x);
-void LUSolve(const gsl_matrix_float *LU, const gsl_permutation *p,
- const gsl_vector_float *b, gsl_vector_float *x);
-
bool lapack_ddot(vector<double> &x, vector<double> &y, double &v);
-bool lapack_sdot(vector<float> &x, vector<float> &y, double &v);
#endif
diff --git a/src/param.cpp b/src/param.cpp
index c81eaf5..dce55e0 100644
--- a/src/param.cpp
+++ b/src/param.cpp
@@ -337,7 +337,7 @@ void PARAM::ReadFiles(void) {
trim_individuals(indicator_cvt, ni_max, mode_debug);
if (ReadFile_geno(file_geno, setSnps, W, indicator_idv, indicator_snp,
maf_level, miss_level, hwe_level, r2_level, mapRS2chr,
- mapRS2bp, mapRS2cM, snpInfo, ns_test) == false) {
+ mapRS2bp, mapRS2cM, snpInfo, ns_test, mode_debug) == false) {
error = true;
}
gsl_matrix_free(W);
@@ -445,7 +445,7 @@ void PARAM::ReadFiles(void) {
while (!safeGetline(infile, file_name).eof()) {
if (ReadFile_geno(file_name, setSnps, W, indicator_idv, indicator_snp,
maf_level, miss_level, hwe_level, r2_level, mapRS2chr,
- mapRS2bp, mapRS2cM, snpInfo, ns_test_tmp) == false) {
+ mapRS2bp, mapRS2cM, snpInfo, ns_test_tmp, mode_debug) == false) {
error = true;
}
@@ -1338,7 +1338,7 @@ void PARAM::ReadGenotypes(gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) {
}
} else {
if (ReadFile_geno(file_geno, indicator_idv, indicator_snp, UtX, K,
- calc_K) == false) {
+ calc_K, mode_debug) == false) {
error = true;
}
}
@@ -1358,7 +1358,7 @@ void PARAM::ReadGenotypes(vector<vector<unsigned char>> &Xt, gsl_matrix *K,
}
} else {
if (ReadFile_geno(file_geno, indicator_idv, indicator_snp, Xt, K, calc_K,
- ni_test, ns_test) == false) {
+ ni_test, ns_test, mode_debug) == false) {
error = true;
}
}