diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/debug.h | 4 | ||||
-rw-r--r-- | src/io.cpp | 51 | ||||
-rw-r--r-- | src/io.h | 6 | ||||
-rw-r--r-- | src/lapack.cpp | 23 | ||||
-rw-r--r-- | src/lapack.h | 29 | ||||
-rw-r--r-- | src/param.cpp | 8 |
6 files changed, 57 insertions, 64 deletions
diff --git a/src/debug.h b/src/debug.h index f47cb4c..e910a25 100644 --- a/src/debug.h +++ b/src/debug.h @@ -36,8 +36,8 @@ inline void fail_at_msg(bool strict, const char *__file, int __line, const char #else // DEBUG -#define warning_msg(msg) cerr << "**** WARNING: " << msg << " in " << __FILE__ << " at line " << __LINE__ << " in " << __PRETTY_FUNCTION__ << endl; -#define debug_msg(msg) cerr << "**** DEBUG: " << msg << " in " << __FILE__ << " at line " << __LINE__ << " in " << __PRETTY_FUNCTION__ << endl; +#define warning_msg(msg) cerr << "**** WARNING: " << msg << " in " << __FILE__ << " at line " << __LINE__ << " in " << __FUNCTION__ << endl; +#define debug_msg(msg) cerr << "**** DEBUG: " << msg << " in " << __FILE__ << " at line " << __LINE__ << " in " << __FUNCTION__ << endl; #define assert_issue(is_issue, expr) \ ((is_issue) ? enforce_msg(expr,"FAIL: ISSUE assert") : __ASSERT_VOID_CAST(0)) @@ -133,7 +133,7 @@ std::istream &safeGetline(std::istream &is, std::string &t) { } } -// Read SNP file. +// Read SNP file. A single column of SNP names. bool ReadFile_snps(const string file_snps, set<string> &setSnps) { setSnps.clear(); @@ -148,6 +148,7 @@ bool ReadFile_snps(const string file_snps, set<string> &setSnps) { while (getline(infile, line)) { ch_ptr = strtok((char *)line.c_str(), " , \t"); + enforce_msg(ch_ptr,"Problem reading SNP file"); setSnps.insert(ch_ptr); } @@ -157,6 +158,9 @@ bool ReadFile_snps(const string file_snps, set<string> &setSnps) { return true; } +// Read SNP file using a header. The header determines how the +// values for each row are parsed. A valid header can be, for example, +// RS POS CHR bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps) { setSnps.clear(); @@ -175,7 +179,7 @@ bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps) { ReadHeader_io(line, header); if (header.rs_col == 0 && (header.chr_col == 0 || header.pos_col == 0)) { - cout << "missing rs id in the hearder" << endl; + cout << "missing rs id in the header" << endl; } while (!safeGetline(infile, line).eof()) { @@ -185,6 +189,7 @@ bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps) { ch_ptr = strtok((char *)line.c_str(), " , \t"); for (size_t i = 0; i < header.coln; i++) { + enforce_msg(ch_ptr,"Problem reading SNP file"); if (header.rs_col != 0 && header.rs_col == i + 1) { rs = ch_ptr; } @@ -250,7 +255,7 @@ bool ReadFile_log(const string &file_log, double &pheno_mean) { return true; } -// Read bimbam annotation file. +// Read bimbam annotation file which consists of rows of SNP, POS and CHR bool ReadFile_anno(const string &file_anno, map<string, string> &mapRS2chr, map<string, long int> &mapRS2bp, map<string, double> &mapRS2cM) { @@ -264,33 +269,39 @@ bool ReadFile_anno(const string &file_anno, map<string, string> &mapRS2chr, } string line; - char *ch_ptr; - - string rs; - long int b_pos; - string chr; - double cM; while (!safeGetline(infile, line).eof()) { - ch_ptr = strtok((char *)line.c_str(), " , \t"); - rs = ch_ptr; + const char *ch_ptr = strtok((char *)line.c_str(), " , \t"); + enforce_str(ch_ptr, line + " Bad RS format"); + const string rs = ch_ptr; + enforce_str(rs != "", line + " Bad RS format"); + ch_ptr = strtok(NULL, " , \t"); + enforce_str(ch_ptr, line + " Bad format"); + ulong b_pos; if (strcmp(ch_ptr, "NA") == 0) { b_pos = -9; } else { b_pos = atol(ch_ptr); } + enforce_str(b_pos,line + " Bad pos format (is zero)"); + + string chr; ch_ptr = strtok(NULL, " , \t"); if (ch_ptr == NULL || strcmp(ch_ptr, "NA") == 0) { chr = "-9"; } else { chr = ch_ptr; + enforce_str(chr != "", line + " Bad chr format"); } + + double cM; ch_ptr = strtok(NULL, " , \t"); if (ch_ptr == NULL || strcmp(ch_ptr, "NA") == 0) { cM = -9; } else { cM = atof(ch_ptr); + enforce_str(b_pos, line + "Bad cM format (is zero)"); } mapRS2chr[rs] = chr; @@ -377,9 +388,9 @@ bool ReadFile_pheno(const string &file_pheno, while (!safeGetline(infile, line).eof()) { ch_ptr = strtok((char *)line.c_str(), " , \t"); - size_t i = 0; while (i < p_max) { + enforce_msg(ch_ptr,"Wrong number of phenotypes"); if (mapP2c.count(i + 1) != 0) { if (strcmp(ch_ptr, "NA") == 0) { ind_pheno_row[mapP2c[i + 1]] = 0; @@ -597,7 +608,7 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps, const double &r2_level, map<string, string> &mapRS2chr, map<string, long int> &mapRS2bp, map<string, double> &mapRS2cM, vector<SNPINFO> &snpInfo, - size_t &ns_test) { + size_t &ns_test, bool debug) { indicator_snp.clear(); snpInfo.clear(); @@ -618,7 +629,8 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps, gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); int sig; LUDecomp(WtW, pmt, &sig); - LUInvert(WtW, pmt, WtWi); + + LUInvert(WtW, pmt, WtWi); // @@ double v_x, v_w; int c_idv = 0; @@ -667,6 +679,11 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps, } if (mapRS2bp.count(rs) == 0) { + if (debug) { + std::string msg = "Can't figure out position for "; + msg += rs; + debug_msg(msg); + } chr = "-9"; b_pos = -9; cM = -9; @@ -1617,7 +1634,7 @@ bool PlinkKin(const string &file_bed, vector<int> &indicator_snp, // genotype and calculate K. bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv, vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K, - const bool calc_K) { + const bool calc_K, bool debug) { igzstream infile(file_geno.c_str(), igzstream::in); if (!infile) { cout << "error reading genotype file:" << file_geno << endl; @@ -1721,7 +1738,7 @@ bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv, vector<int> &indicator_snp, vector<vector<unsigned char>> &Xt, gsl_matrix *K, const bool calc_K, const size_t ni_test, - const size_t ns_test) { + const size_t ns_test, bool debug) { igzstream infile(file_geno.c_str(), igzstream::in); if (!infile) { cout << "error reading genotype file:" << file_geno << endl; @@ -2979,7 +2996,7 @@ bool bgenKin(const string &file_oxford, vector<int> &indicator_snp, bool ReadHeader_io(const string &line, HEADER &header) { string rs_ptr[] = {"rs", "RS", "snp", "SNP", "snps", "SNPS", "snpid", "SNPID", "rsid", "RSID", "MarkerName"}; - set<string> rs_set(rs_ptr, rs_ptr + 11); + set<string> rs_set(rs_ptr, rs_ptr + 11); // create a set of 11 items string chr_ptr[] = {"chr", "CHR"}; set<string> chr_set(chr_ptr, chr_ptr + 2); string pos_ptr[] = { @@ -64,7 +64,7 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps, const double &r2_level, map<string, string> &mapRS2chr, map<string, long int> &mapRS2bp, map<string, double> &mapRS2cM, vector<SNPINFO> &snpInfo, - size_t &ns_test); + size_t &ns_test, bool debug); bool ReadFile_bed(const string &file_bed, const set<string> &setSnps, const gsl_matrix *W, vector<int> &indicator_idv, vector<int> &indicator_snp, vector<SNPINFO> &snpInfo, @@ -94,7 +94,7 @@ bool PlinkKin(const string &file_bed, vector<int> &indicator_snp, bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv, vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K, - const bool calc_K); + const bool calc_K, bool debug); bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv, vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K, const bool calc_K); @@ -102,7 +102,7 @@ bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv, vector<int> &indicator_snp, vector<vector<unsigned char>> &Xt, gsl_matrix *K, const bool calc_K, const size_t ni_test, - const size_t ns_test); + const size_t ns_test, bool debug); bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv, vector<int> &indicator_snp, vector<vector<unsigned char>> &Xt, gsl_matrix *K, const bool calc_K, const size_t ni_test, diff --git a/src/lapack.cpp b/src/lapack.cpp index b7c6f74..0dbfed6 100644 --- a/src/lapack.cpp +++ b/src/lapack.cpp @@ -24,6 +24,7 @@ #include <vector> #include "debug.h" +#include "lapack.h" #include "mathfunc.h" using namespace std; @@ -520,7 +521,7 @@ double CholeskySolve(gsl_matrix_float *Omega, gsl_vector_float *Xty, // LU decomposition. void LUDecomp(gsl_matrix *LU, gsl_permutation *p, int *signum) { - gsl_linalg_LU_decomp(LU, p, signum); + enforce_gsl(gsl_linalg_LU_decomp(LU, p, signum)); return; } @@ -551,11 +552,13 @@ void LUDecomp(gsl_matrix_float *LU, gsl_permutation *p, int *signum) { } */ -// LU invert. -void LUInvert(const gsl_matrix *LU, const gsl_permutation *p, - gsl_matrix *inverse) { - gsl_linalg_LU_invert(LU, p, inverse); - return; +// LU invert. Returns inverse. Note that GSL does not recommend using +// this function +void LUInvert(const gsl_matrix *LU, const gsl_permutation *p, gsl_matrix *ret_inverse) { + auto det = LULndet(LU); + assert(det != 1.0); + + enforce_gsl(gsl_linalg_LU_invert(LU, p, ret_inverse)); } /* @@ -590,10 +593,8 @@ void LUInvert(const gsl_matrix_float *LU, const gsl_permutation *p, */ // LU lndet. -double LULndet(gsl_matrix *LU) { - double d; - d = gsl_linalg_LU_lndet(LU); - return d; +double LULndet(const gsl_matrix *LU) { + return gsl_linalg_LU_lndet((gsl_matrix *)LU); } /* @@ -620,7 +621,7 @@ double LULndet(gsl_matrix_float *LU) { // LU solve. void LUSolve(const gsl_matrix *LU, const gsl_permutation *p, const gsl_vector *b, gsl_vector *x) { - gsl_linalg_LU_solve(LU, p, b, x); + enforce_gsl(gsl_linalg_LU_solve(LU, p, b, x)); return; } diff --git a/src/lapack.h b/src/lapack.h index 9596667..9115201 100644 --- a/src/lapack.h +++ b/src/lapack.h @@ -23,47 +23,22 @@ using namespace std; -void lapack_float_cholesky_decomp(gsl_matrix_float *A); void lapack_cholesky_decomp(gsl_matrix *A); -void lapack_float_cholesky_solve(gsl_matrix_float *A, const gsl_vector_float *b, - gsl_vector_float *x); void lapack_cholesky_solve(gsl_matrix *A, const gsl_vector *b, gsl_vector *x); -void lapack_sgemm(char *TransA, char *TransB, float alpha, - const gsl_matrix_float *A, const gsl_matrix_float *B, - float beta, gsl_matrix_float *C); void lapack_dgemm(char *TransA, char *TransB, double alpha, const gsl_matrix *A, const gsl_matrix *B, double beta, gsl_matrix *C); -void lapack_float_eigen_symmv(gsl_matrix_float *A, gsl_vector_float *eval, - gsl_matrix_float *evec, - const size_t flag_largematrix); void lapack_eigen_symmv(gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec, const size_t flag_largematrix); - double EigenDecomp(gsl_matrix *G, gsl_matrix *U, gsl_vector *eval, const size_t flag_largematrix); double EigenDecomp_Zeroed(gsl_matrix *G, gsl_matrix *U, gsl_vector *eval, const size_t flag_largematrix); -double EigenDecomp(gsl_matrix_float *G, gsl_matrix_float *U, - gsl_vector_float *eval, const size_t flag_largematrix); - double CholeskySolve(gsl_matrix *Omega, gsl_vector *Xty, gsl_vector *OiXty); -double CholeskySolve(gsl_matrix_float *Omega, gsl_vector_float *Xty, - gsl_vector_float *OiXty); - void LUDecomp(gsl_matrix *LU, gsl_permutation *p, int *signum); -void LUDecomp(gsl_matrix_float *LU, gsl_permutation *p, int *signum); -void LUInvert(const gsl_matrix *LU, const gsl_permutation *p, - gsl_matrix *inverse); -void LUInvert(const gsl_matrix_float *LU, const gsl_permutation *p, - gsl_matrix_float *inverse); -double LULndet(gsl_matrix *LU); -double LULndet(gsl_matrix_float *LU); +void LUInvert(const gsl_matrix *LU, const gsl_permutation *p, gsl_matrix *ret_inverse); +double LULndet(const gsl_matrix *LU); void LUSolve(const gsl_matrix *LU, const gsl_permutation *p, const gsl_vector *b, gsl_vector *x); -void LUSolve(const gsl_matrix_float *LU, const gsl_permutation *p, - const gsl_vector_float *b, gsl_vector_float *x); - bool lapack_ddot(vector<double> &x, vector<double> &y, double &v); -bool lapack_sdot(vector<float> &x, vector<float> &y, double &v); #endif diff --git a/src/param.cpp b/src/param.cpp index c81eaf5..dce55e0 100644 --- a/src/param.cpp +++ b/src/param.cpp @@ -337,7 +337,7 @@ void PARAM::ReadFiles(void) { trim_individuals(indicator_cvt, ni_max, mode_debug); if (ReadFile_geno(file_geno, setSnps, W, indicator_idv, indicator_snp, maf_level, miss_level, hwe_level, r2_level, mapRS2chr, - mapRS2bp, mapRS2cM, snpInfo, ns_test) == false) { + mapRS2bp, mapRS2cM, snpInfo, ns_test, mode_debug) == false) { error = true; } gsl_matrix_free(W); @@ -445,7 +445,7 @@ void PARAM::ReadFiles(void) { while (!safeGetline(infile, file_name).eof()) { if (ReadFile_geno(file_name, setSnps, W, indicator_idv, indicator_snp, maf_level, miss_level, hwe_level, r2_level, mapRS2chr, - mapRS2bp, mapRS2cM, snpInfo, ns_test_tmp) == false) { + mapRS2bp, mapRS2cM, snpInfo, ns_test_tmp, mode_debug) == false) { error = true; } @@ -1338,7 +1338,7 @@ void PARAM::ReadGenotypes(gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) { } } else { if (ReadFile_geno(file_geno, indicator_idv, indicator_snp, UtX, K, - calc_K) == false) { + calc_K, mode_debug) == false) { error = true; } } @@ -1358,7 +1358,7 @@ void PARAM::ReadGenotypes(vector<vector<unsigned char>> &Xt, gsl_matrix *K, } } else { if (ReadFile_geno(file_geno, indicator_idv, indicator_snp, Xt, K, calc_K, - ni_test, ns_test) == false) { + ni_test, ns_test, mode_debug) == false) { error = true; } } |