diff options
author | Pjotr Prins | 2017-10-05 13:01:27 +0000 |
---|---|---|
committer | Pjotr Prins | 2017-10-05 13:01:27 +0000 |
commit | 4d5cb9ae3847192c98ff585b9ad48f6103b2417b (patch) | |
tree | 7f3c089d78686eb32e2efddc1a97f93c3c876b87 /src | |
parent | 86323ccaf26ad0a3b706a67a0014dd04b9965823 (diff) | |
download | pangemma-4d5cb9ae3847192c98ff585b9ad48f6103b2417b.tar.gz |
Removed Oxford format as per https://github.com/genetics-statistics/GEMMA/issues/46
Diffstat (limited to 'src')
-rw-r--r-- | src/gemma.cpp | 36 | ||||
-rw-r--r-- | src/io.cpp | 753 | ||||
-rw-r--r-- | src/io.h | 10 | ||||
-rw-r--r-- | src/lm.cpp | 228 | ||||
-rw-r--r-- | src/lm.h | 3 | ||||
-rw-r--r-- | src/lmm.cpp | 281 | ||||
-rw-r--r-- | src/lmm.h | 6 | ||||
-rw-r--r-- | src/mvlmm.cpp | 551 | ||||
-rw-r--r-- | src/param.cpp | 65 | ||||
-rw-r--r-- | src/param.h | 3 |
10 files changed, 5 insertions, 1931 deletions
diff --git a/src/gemma.cpp b/src/gemma.cpp index 2af8f8e..5fbd86c 100644 --- a/src/gemma.cpp +++ b/src/gemma.cpp @@ -310,11 +310,6 @@ void GEMMA::PrintHelp(size_t option) { cout << " rs#2, base_position, chr_number" << endl; cout << " ..." << endl; - // WJA added. - cout << " -oxford [prefix] " - << " specify input Oxford genotype bgen file prefix." << endl; - cout << " requires: *.bgen, *.sample files" << endl; - cout << " -gxe [filename] " << " specify input file that contains a column of environmental " "factor for g by e tests" @@ -793,18 +788,6 @@ void GEMMA::Assign(int argc, char **argv, PARAM &cPar) { str.clear(); str.assign(argv[i]); cPar.file_anno = str; - } - - // WJA added. - else if (strcmp(argv[i], "-oxford") == 0 || - strcmp(argv[i], "--oxford") == 0 || strcmp(argv[i], "-x") == 0) { - if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { - continue; - } - ++i; - str.clear(); - str.assign(argv[i]); - cPar.file_oxford = str; } else if (strcmp(argv[i], "-gxe") == 0) { if (argv[i + 1] == NULL || argv[i + 1][0] == '-') { continue; @@ -2047,8 +2030,6 @@ void GEMMA::BatchRun(PARAM &cPar) { &Y_col.vector); // y is the predictor, not the phenotype } else if (!cPar.file_bfile.empty()) { cLm.AnalyzePlink(W, &Y_col.vector); - } else if (!cPar.file_oxford.empty()) { - cLm.Analyzebgen(W, &Y_col.vector); } else { cLm.AnalyzeBimbam(W, &Y_col.vector); } @@ -2763,17 +2744,12 @@ void GEMMA::BatchRun(PARAM &cPar) { &Y_col.vector, env); } } - // WJA added - else if (!cPar.file_oxford.empty()) { - cLmm.Analyzebgen(U, eval, UtW, &UtY_col.vector, W, &Y_col.vector); + if (cPar.file_gxe.empty()) { + cLmm.AnalyzeBimbam(U, eval, UtW, &UtY_col.vector, W, + &Y_col.vector, cPar.setGWASnps); } else { - if (cPar.file_gxe.empty()) { - cLmm.AnalyzeBimbam(U, eval, UtW, &UtY_col.vector, W, - &Y_col.vector, cPar.setGWASnps); - } else { - cLmm.AnalyzeBimbamGXE(U, eval, UtW, &UtY_col.vector, W, - &Y_col.vector, env); - } + cLmm.AnalyzeBimbamGXE(U, eval, UtW, &UtY_col.vector, W, + &Y_col.vector, env); } cLmm.WriteFiles(); @@ -2788,8 +2764,6 @@ void GEMMA::BatchRun(PARAM &cPar) { } else { cMvlmm.AnalyzePlinkGXE(U, eval, UtW, UtY, env); } - } else if (!cPar.file_oxford.empty()) { - cMvlmm.Analyzebgen(U, eval, UtW, UtY); } else { if (cPar.file_gxe.empty()) { cMvlmm.AnalyzeBimbam(U, eval, UtW, UtY); @@ -2274,759 +2274,6 @@ bool ReadFile_gene(const string &file_gene, vector<double> &vec_read, return true; } -// WJA Added -// Read Oxford sample file. -bool ReadFile_sample(const string &file_sample, - vector<vector<int>> &indicator_pheno, - vector<vector<double>> &pheno, - const vector<size_t> &p_column, vector<int> &indicator_cvt, - vector<vector<double>> &cvt, size_t &n_cvt) { - debug_msg("entered"); - indicator_pheno.clear(); - pheno.clear(); - indicator_cvt.clear(); - - igzstream infile(file_sample.c_str(), igzstream::in); - - if (!infile) { - cout << "error! fail to open sample file: " << file_sample << endl; - return false; - } - - string line; - char *ch_ptr; - - string id; - double p, d; - - vector<double> pheno_row; - vector<int> ind_pheno_row; - int flag_na = 0; - - size_t num_cols = 0; - size_t num_p_in_file = 0; - size_t num_cvt_in_file = 0; - - map<size_t, size_t> mapP2c; - for (size_t i = 0; i < p_column.size(); i++) { - mapP2c[p_column[i]] = i; - pheno_row.push_back(-9); - ind_pheno_row.push_back(0); - } - - // Read header line1. - if (!safeGetline(infile, line).eof()) { - ch_ptr = strtok((char *)line.c_str(), " \t"); - if (strcmp(ch_ptr, "ID_1") != 0) { - return false; - } - ch_ptr = strtok(NULL, " \t"); - if (strcmp(ch_ptr, "ID_2") != 0) { - return false; - } - ch_ptr = strtok(NULL, " \t"); - if (strcmp(ch_ptr, "missing") != 0) { - return false; - } - while (ch_ptr != NULL) { - num_cols++; - ch_ptr = strtok(NULL, " \t"); - } - num_cols--; - } - - vector<map<uint32_t, size_t>> cvt_factor_levels; - - char col_type[num_cols]; - - // Read header line2. - if (!safeGetline(infile, line).eof()) { - ch_ptr = strtok((char *)line.c_str(), " \t"); - if (strcmp(ch_ptr, "0") != 0) { - return false; - } - ch_ptr = strtok(NULL, " \t"); - if (strcmp(ch_ptr, "0") != 0) { - return false; - } - ch_ptr = strtok(NULL, " \t"); - if (strcmp(ch_ptr, "0") != 0) { - return false; - } - size_t it = 0; - ch_ptr = strtok(NULL, " \t"); - if (ch_ptr != NULL) - while (ch_ptr != NULL) { - col_type[it++] = ch_ptr[0]; - if (ch_ptr[0] == 'D') { - cvt_factor_levels.push_back(map<uint32_t, size_t>()); - num_cvt_in_file++; - } - if (ch_ptr[0] == 'C') { - num_cvt_in_file++; - } - if ((ch_ptr[0] == 'P') || (ch_ptr[0] == 'B')) { - num_p_in_file++; - } - ch_ptr = strtok(NULL, " \t"); - } - } - - while (!safeGetline(infile, line).eof()) { - - ch_ptr = strtok((char *)line.c_str(), " \t"); - - for (int it = 0; it < 3; it++) { - ch_ptr = strtok(NULL, " \t"); - } - - size_t i = 0; - size_t p_i = 0; - size_t fac_cvt_i = 0; - - while (i < num_cols) { - - if ((col_type[i] == 'P') || (col_type[i] == 'B')) { - if (mapP2c.count(p_i + 1) != 0) { - if (strcmp(ch_ptr, "NA") == 0) { - ind_pheno_row[mapP2c[p_i + 1]] = 0; - pheno_row[mapP2c[p_i + 1]] = -9; - } else { - p = atof(ch_ptr); - ind_pheno_row[mapP2c[p_i + 1]] = 1; - pheno_row[mapP2c[p_i + 1]] = p; - } - } - p_i++; - } - if (col_type[i] == 'D') { - - // NOTE THIS DOES NOT CHECK TO BE SURE LEVEL - // IS INTEGRAL i.e for atoi error. - if (strcmp(ch_ptr, "NA") != 0) { - uint32_t level = atoi(ch_ptr); - if (cvt_factor_levels[fac_cvt_i].count(level) == 0) { - cvt_factor_levels[fac_cvt_i][level] = - cvt_factor_levels[fac_cvt_i].size(); - } - } - fac_cvt_i++; - } - - ch_ptr = strtok(NULL, " \t"); - i++; - } - - indicator_pheno.push_back(ind_pheno_row); - pheno.push_back(pheno_row); - } - - // Close and reopen the file. - infile.close(); - infile.clear(); - - if (num_cvt_in_file > 0) { - igzstream infile2(file_sample.c_str(), igzstream::in); - - if (!infile2) { - cout << "error! fail to open sample file: " << file_sample << endl; - return false; - } - - // Skip header. - safeGetline(infile2, line); - safeGetline(infile2, line); - - // Pull in the covariates now we now the number of - // factor levels. - while (!safeGetline(infile2, line).eof()) { - - vector<double> v_d; - flag_na = 0; - ch_ptr = strtok((char *)line.c_str(), " \t"); - - for (int it = 0; it < 3; it++) { - ch_ptr = strtok(NULL, " \t"); - } - - size_t i = 0; - size_t fac_cvt_i = 0; - size_t num_fac_levels; - while (i < num_cols) { - - if (col_type[i] == 'C') { - if (strcmp(ch_ptr, "NA") == 0) { - flag_na = 1; - d = -9; - } else { - d = atof(ch_ptr); - } - - v_d.push_back(d); - } - - if (col_type[i] == 'D') { - - // NOTE THIS DOES NOT CHECK TO BE SURE - // LEVEL IS INTEGRAL i.e for atoi error. - num_fac_levels = cvt_factor_levels[fac_cvt_i].size(); - if (num_fac_levels > 1) { - if (strcmp(ch_ptr, "NA") == 0) { - flag_na = 1; - for (size_t it = 0; it < num_fac_levels - 1; it++) { - v_d.push_back(-9); - } - } else { - uint32_t level = atoi(ch_ptr); - for (size_t it = 0; it < num_fac_levels - 1; it++) { - cvt_factor_levels[fac_cvt_i][level] == it + 1 - ? v_d.push_back(1.0) - : v_d.push_back(0.0); - } - } - } - fac_cvt_i++; - } - - ch_ptr = strtok(NULL, " \t"); - i++; - } - - if (flag_na == 0) { - indicator_cvt.push_back(1); - } else { - indicator_cvt.push_back(0); - } - cvt.push_back(v_d); - } - - if (indicator_cvt.empty()) { - n_cvt = 0; - } else { - flag_na = 0; - for (vector<int>::size_type i = 0; i < indicator_cvt.size(); ++i) { - if (indicator_cvt[i] == 0) { - continue; - } - - if (flag_na == 0) { - flag_na = 1; - n_cvt = cvt[i].size(); - } - if (flag_na != 0 && n_cvt != cvt[i].size()) { - cout << "error! number of covariates in row " << i - << " do not match other rows." << endl; - return false; - } - } - } - - infile2.close(); - infile2.clear(); - } - return true; -} - -// WJA Added. -// Read bgen file, the first time. -bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps, - const gsl_matrix *W, vector<int> &indicator_idv, - vector<int> &indicator_snp, vector<SNPINFO> &snpInfo, - const double &maf_level, const double &miss_level, - const double &hwe_level, const double &r2_level, - size_t &ns_test) { - - debug_msg("entered"); - indicator_snp.clear(); - - ifstream infile(file_bgen.c_str(), ios::binary); - if (!infile) { - cout << "error reading bgen file:" << file_bgen << endl; - return false; - } - - gsl_vector *genotype = gsl_vector_alloc(W->size1); - gsl_vector *genotype_miss = gsl_vector_alloc(W->size1); - gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2); - gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2); - gsl_vector *Wtx = gsl_vector_alloc(W->size2); - gsl_vector *WtWiWtx = gsl_vector_alloc(W->size2); - gsl_permutation *pmt = gsl_permutation_alloc(W->size2); - - gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); - int sig; - LUDecomp(WtW, pmt, &sig); - LUInvert(WtW, pmt, WtWi); - - // Read in header. - uint32_t bgen_snp_block_offset; - uint32_t bgen_header_length; - uint32_t bgen_nsamples; - uint32_t bgen_nsnps; - uint32_t bgen_flags; - infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4); - infile.read(reinterpret_cast<char *>(&bgen_header_length), 4); - bgen_snp_block_offset -= 4; - infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4); - bgen_snp_block_offset -= 4; - infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4); - bgen_snp_block_offset -= 4; - infile.ignore(4 + bgen_header_length - 20); - bgen_snp_block_offset -= 4 + bgen_header_length - 20; - infile.read(reinterpret_cast<char *>(&bgen_flags), 4); - bgen_snp_block_offset -= 4; - bool CompressedSNPBlocks = bgen_flags & 0x1; - bool LongIds = bgen_flags & 0x4; - - if (!LongIds) { - return false; - } - - infile.ignore(bgen_snp_block_offset); - - ns_test = 0; - - size_t ns_total = static_cast<size_t>(bgen_nsnps); - - snpInfo.clear(); - string rs; - long int b_pos; - string chr; - string major; - string minor; - string id; - - double v_x, v_w; - int c_idv = 0; - - double maf, geno, geno_old; - size_t n_miss; - size_t n_0, n_1, n_2; - int flag_poly; - - double bgen_geno_prob_AA, bgen_geno_prob_AB; - double bgen_geno_prob_BB, bgen_geno_prob_non_miss; - - // Total number of samples in phenotype file. - size_t ni_total = indicator_idv.size(); - - // Number of samples to use in test. - size_t ni_test = 0; - - uint32_t bgen_N; - uint16_t bgen_LS; - uint16_t bgen_LR; - uint16_t bgen_LC; - uint32_t bgen_SNP_pos; - uint32_t bgen_LA; - std::string bgen_A_allele; - uint32_t bgen_LB; - std::string bgen_B_allele; - uint32_t bgen_P; - size_t unzipped_data_size; - - for (size_t i = 0; i < ni_total; ++i) { - ni_test += indicator_idv[i]; - } - - for (size_t t = 0; t < ns_total; ++t) { - - id.clear(); - rs.clear(); - chr.clear(); - bgen_A_allele.clear(); - bgen_B_allele.clear(); - - infile.read(reinterpret_cast<char *>(&bgen_N), 4); - infile.read(reinterpret_cast<char *>(&bgen_LS), 2); - - id.resize(bgen_LS); - infile.read(&id[0], bgen_LS); - - infile.read(reinterpret_cast<char *>(&bgen_LR), 2); - rs.resize(bgen_LR); - infile.read(&rs[0], bgen_LR); - - infile.read(reinterpret_cast<char *>(&bgen_LC), 2); - chr.resize(bgen_LC); - infile.read(&chr[0], bgen_LC); - - infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4); - - infile.read(reinterpret_cast<char *>(&bgen_LA), 4); - bgen_A_allele.resize(bgen_LA); - infile.read(&bgen_A_allele[0], bgen_LA); - - infile.read(reinterpret_cast<char *>(&bgen_LB), 4); - bgen_B_allele.resize(bgen_LB); - infile.read(&bgen_B_allele[0], bgen_LB); - - // Should we switch according to MAF? - minor = bgen_B_allele; - major = bgen_A_allele; - b_pos = static_cast<long int>(bgen_SNP_pos); - - uint16_t unzipped_data[3 * bgen_N]; - - if (setSnps.size() != 0 && setSnps.count(rs) == 0) { - SNPINFO sInfo = { - "-9", rs, -9, -9, minor, major, static_cast<size_t>(-9), - -9, (long int)-9}; - - snpInfo.push_back(sInfo); - indicator_snp.push_back(0); - if (CompressedSNPBlocks) - infile.read(reinterpret_cast<char *>(&bgen_P), 4); - else - bgen_P = 6 * bgen_N; - - infile.ignore(static_cast<size_t>(bgen_P)); - - continue; - } - - if (CompressedSNPBlocks) { - infile.read(reinterpret_cast<char *>(&bgen_P), 4); - uint8_t zipped_data[bgen_P]; - - unzipped_data_size = 6 * bgen_N; - - infile.read(reinterpret_cast<char *>(zipped_data), bgen_P); - int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data), - reinterpret_cast<uLongf *>(&unzipped_data_size), - reinterpret_cast<Bytef *>(zipped_data), - static_cast<uLong>(bgen_P)); - assert(result == Z_OK); - - } else { - bgen_P = 6 * bgen_N; - infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P); - } - - maf = 0; - n_miss = 0; - flag_poly = 0; - geno_old = -9; - n_0 = 0; - n_1 = 0; - n_2 = 0; - c_idv = 0; - gsl_vector_set_zero(genotype_miss); - for (size_t i = 0; i < bgen_N; ++i) { - - // CHECK this set correctly! - if (indicator_idv[i] == 0) { - continue; - } - - bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0; - bgen_geno_prob_AB = - static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0; - bgen_geno_prob_BB = - static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0; - bgen_geno_prob_non_miss = - bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB; - - // CHECK 0.1 OK. - if (bgen_geno_prob_non_miss < 0.9) { - gsl_vector_set(genotype_miss, c_idv, 1); - n_miss++; - c_idv++; - continue; - } - - bgen_geno_prob_AA /= bgen_geno_prob_non_miss; - bgen_geno_prob_AB /= bgen_geno_prob_non_miss; - bgen_geno_prob_BB /= bgen_geno_prob_non_miss; - - geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB; - if (geno >= 0 && geno <= 0.5) { - n_0++; - } - if (geno > 0.5 && geno < 1.5) { - n_1++; - } - if (geno >= 1.5 && geno <= 2.0) { - n_2++; - } - - gsl_vector_set(genotype, c_idv, geno); - - // CHECK WHAT THIS DOES. - if (flag_poly == 0) { - geno_old = geno; - flag_poly = 2; - } - if (flag_poly == 2 && geno != geno_old) { - flag_poly = 1; - } - - maf += geno; - - c_idv++; - } - - maf /= 2.0 * static_cast<double>(ni_test - n_miss); - - SNPINFO sInfo = {chr, rs, -9, b_pos, - minor, major, n_miss, (double)n_miss / (double)ni_test, - maf}; - snpInfo.push_back(sInfo); - - if ((double)n_miss / (double)ni_test > miss_level) { - indicator_snp.push_back(0); - continue; - } - - if ((maf < maf_level || maf > (1.0 - maf_level)) && maf_level != -1) { - indicator_snp.push_back(0); - continue; - } - - if (flag_poly != 1) { - indicator_snp.push_back(0); - continue; - } - - if (hwe_level != 0 && maf_level != -1) { - if (CalcHWE(n_0, n_2, n_1) < hwe_level) { - indicator_snp.push_back(0); - continue; - } - } - - // Filter SNP if it is correlated with W - // unless W has only one column, of 1s. - for (size_t i = 0; i < genotype->size; ++i) { - if (gsl_vector_get(genotype_miss, i) == 1) { - geno = maf * 2.0; - gsl_vector_set(genotype, i, geno); - } - } - - gsl_blas_dgemv(CblasTrans, 1.0, W, genotype, 0.0, Wtx); - gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx); - gsl_blas_ddot(genotype, genotype, &v_x); - gsl_blas_ddot(Wtx, WtWiWtx, &v_w); - - if (W->size2 != 1 && v_w / v_x >= r2_level) { - indicator_snp.push_back(0); - continue; - } - - indicator_snp.push_back(1); - ns_test++; - } - - return true; -} - -// Read oxford genotype file and calculate kinship matrix. -bool bgenKin(const string &file_oxford, vector<int> &indicator_snp, - const int k_mode, const int display_pace, gsl_matrix *matrix_kin) { - debug_msg("entered"); - string file_bgen = file_oxford; - ifstream infile(file_bgen.c_str(), ios::binary); - if (!infile) { - cout << "error reading bgen file:" << file_bgen << endl; - return false; - } - - // Read in header. - uint32_t bgen_snp_block_offset; - uint32_t bgen_header_length; - uint32_t bgen_nsamples; - uint32_t bgen_nsnps; - uint32_t bgen_flags; - infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4); - infile.read(reinterpret_cast<char *>(&bgen_header_length), 4); - bgen_snp_block_offset -= 4; - infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4); - bgen_snp_block_offset -= 4; - infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4); - bgen_snp_block_offset -= 4; - infile.ignore(4 + bgen_header_length - 20); - bgen_snp_block_offset -= 4 + bgen_header_length - 20; - infile.read(reinterpret_cast<char *>(&bgen_flags), 4); - bgen_snp_block_offset -= 4; - bool CompressedSNPBlocks = bgen_flags & 0x1; - - infile.ignore(bgen_snp_block_offset); - - double bgen_geno_prob_AA, bgen_geno_prob_AB; - double bgen_geno_prob_BB, bgen_geno_prob_non_miss; - - uint32_t bgen_N; - uint16_t bgen_LS; - uint16_t bgen_LR; - uint16_t bgen_LC; - uint32_t bgen_SNP_pos; - uint32_t bgen_LA; - std::string bgen_A_allele; - uint32_t bgen_LB; - std::string bgen_B_allele; - uint32_t bgen_P; - size_t unzipped_data_size; - string id; - string rs; - string chr; - double genotype; - - size_t n_miss; - double d, geno_mean, geno_var; - - size_t ni_total = matrix_kin->size1; - gsl_vector *geno = gsl_vector_alloc(ni_total); - gsl_vector *geno_miss = gsl_vector_alloc(ni_total); - - size_t ns_test = 0; - for (size_t t = 0; t < indicator_snp.size(); ++t) { - - if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) { - ProgressBar("Reading bgen SNPs ", t, indicator_snp.size() - 1); - } - - id.clear(); - rs.clear(); - chr.clear(); - bgen_A_allele.clear(); - bgen_B_allele.clear(); - - infile.read(reinterpret_cast<char *>(&bgen_N), 4); - infile.read(reinterpret_cast<char *>(&bgen_LS), 2); - - id.resize(bgen_LS); - infile.read(&id[0], bgen_LS); - - infile.read(reinterpret_cast<char *>(&bgen_LR), 2); - rs.resize(bgen_LR); - infile.read(&rs[0], bgen_LR); - - infile.read(reinterpret_cast<char *>(&bgen_LC), 2); - chr.resize(bgen_LC); - infile.read(&chr[0], bgen_LC); - - infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4); - - infile.read(reinterpret_cast<char *>(&bgen_LA), 4); - bgen_A_allele.resize(bgen_LA); - infile.read(&bgen_A_allele[0], bgen_LA); - - infile.read(reinterpret_cast<char *>(&bgen_LB), 4); - bgen_B_allele.resize(bgen_LB); - infile.read(&bgen_B_allele[0], bgen_LB); - - uint16_t unzipped_data[3 * bgen_N]; - - if (indicator_snp[t] == 0) { - if (CompressedSNPBlocks) - infile.read(reinterpret_cast<char *>(&bgen_P), 4); - else - bgen_P = 6 * bgen_N; - - infile.ignore(static_cast<size_t>(bgen_P)); - - continue; - } - - if (CompressedSNPBlocks) { - infile.read(reinterpret_cast<char *>(&bgen_P), 4); - uint8_t zipped_data[bgen_P]; - - unzipped_data_size = 6 * bgen_N; - - infile.read(reinterpret_cast<char *>(zipped_data), bgen_P); - - int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data), - reinterpret_cast<uLongf *>(&unzipped_data_size), - reinterpret_cast<Bytef *>(zipped_data), - static_cast<uLong>(bgen_P)); - assert(result == Z_OK); - - } else { - - bgen_P = 6 * bgen_N; - infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P); - } - - geno_mean = 0.0; - n_miss = 0; - geno_var = 0.0; - gsl_vector_set_all(geno_miss, 0); - - for (size_t i = 0; i < bgen_N; ++i) { - - bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0; - bgen_geno_prob_AB = - static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0; - bgen_geno_prob_BB = - static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0; - // WJA - bgen_geno_prob_non_miss = - bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB; - if (bgen_geno_prob_non_miss < 0.9) { - gsl_vector_set(geno_miss, i, 0.0); - n_miss++; - } else { - - bgen_geno_prob_AA /= bgen_geno_prob_non_miss; - bgen_geno_prob_AB /= bgen_geno_prob_non_miss; - bgen_geno_prob_BB /= bgen_geno_prob_non_miss; - - genotype = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB; - - gsl_vector_set(geno, i, genotype); - gsl_vector_set(geno_miss, i, 1.0); - geno_mean += genotype; - geno_var += genotype * genotype; - } - } - - geno_mean /= (double)(ni_total - n_miss); - geno_var += geno_mean * geno_mean * (double)n_miss; - geno_var /= (double)ni_total; - geno_var -= geno_mean * geno_mean; - - for (size_t i = 0; i < ni_total; ++i) { - if (gsl_vector_get(geno_miss, i) == 0) { - gsl_vector_set(geno, i, geno_mean); - } - } - - gsl_vector_add_constant(geno, -1.0 * geno_mean); - - if (geno_var != 0) { - if (k_mode == 1) { - gsl_blas_dsyr(CblasUpper, 1.0, geno, matrix_kin); - } else if (k_mode == 2) { - gsl_blas_dsyr(CblasUpper, 1.0 / geno_var, geno, matrix_kin); - } else { - cout << "Unknown kinship mode." << endl; - } - } - - ns_test++; - } - cout << endl; - - gsl_matrix_scale(matrix_kin, 1.0 / (double)ns_test); - - for (size_t i = 0; i < ni_total; ++i) { - for (size_t j = 0; j < i; ++j) { - d = gsl_matrix_get(matrix_kin, j, i); - gsl_matrix_set(matrix_kin, i, j, d); - } - } - - gsl_vector_free(geno); - gsl_vector_free(geno_miss); - - infile.close(); - infile.clear(); - - return true; -} - // Read header to determine which column contains which item. bool ReadHeader_io(const string &line, HEADER &header) { debug_msg("entered"); @@ -176,16 +176,6 @@ void ReadFile_mstudy(const string &file_mstudy, gsl_matrix *Vq, gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni); void ReadFile_mref(const string &file_mref, gsl_matrix *S_mat, gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni); - -// WJA added. -bool bgenKin(const string &file_geno, vector<int> &indicator_snp, - const int k_mode, const int display_pace, gsl_matrix *matrix_kin); -bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps, - const gsl_matrix *W, vector<int> &indicator_idv, - vector<int> &indicator_snp, vector<SNPINFO> &snpInfo, - const double &maf_level, const double &miss_level, - const double &hwe_level, const double &r2_level, - size_t &ns_test); bool ReadFile_sample(const string &file_sample, vector<vector<int>> &indicator_pheno, vector<vector<double>> &pheno, @@ -55,8 +55,6 @@ void LM::CopyFromParam(PARAM &cPar) { file_out = cPar.file_out; path_out = cPar.path_out; file_gene = cPar.file_gene; - // WJA added - file_oxford = cPar.file_oxford; time_opt = 0.0; @@ -381,232 +379,6 @@ void LM::AnalyzeGene(const gsl_matrix *W, const gsl_vector *x) { return; } -// WJA added -void LM::Analyzebgen(const gsl_matrix *W, const gsl_vector *y) { - debug_msg("entering"); - string file_bgen = file_oxford + ".bgen"; - ifstream infile(file_bgen.c_str(), ios::binary); - if (!infile) { - cout << "error reading bgen file:" << file_bgen << endl; - return; - } - - clock_t time_start = clock(); - - string line; - char *ch_ptr; - - double beta = 0, se = 0, p_wald = 0, p_lrt = 0, p_score = 0; - int n_miss, c_phen; - double geno, x_mean; - - // Calculate some basic quantities. - double yPwy, xPwy, xPwx; - double df = (double)W->size1 - (double)W->size2 - 1.0; - - gsl_vector *x = gsl_vector_alloc(W->size1); - gsl_vector *x_miss = gsl_vector_alloc(W->size1); - - gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2); - gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2); - gsl_vector *Wty = gsl_vector_alloc(W->size2); - gsl_vector *Wtx = gsl_vector_alloc(W->size2); - gsl_permutation *pmt = gsl_permutation_alloc(W->size2); - - gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); - int sig; - LUDecomp(WtW, pmt, &sig); - LUInvert(WtW, pmt, WtWi); - - gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty); - CalcvPv(WtWi, Wty, y, yPwy); - - // Read in header. - uint32_t bgen_snp_block_offset; - uint32_t bgen_header_length; - uint32_t bgen_nsamples; - uint32_t bgen_nsnps; - uint32_t bgen_flags; - infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4); - infile.read(reinterpret_cast<char *>(&bgen_header_length), 4); - bgen_snp_block_offset -= 4; - infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4); - bgen_snp_block_offset -= 4; - infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4); - bgen_snp_block_offset -= 4; - infile.ignore(4 + bgen_header_length - 20); - bgen_snp_block_offset -= 4 + bgen_header_length - 20; - infile.read(reinterpret_cast<char *>(&bgen_flags), 4); - bgen_snp_block_offset -= 4; - bool CompressedSNPBlocks = bgen_flags & 0x1; - - infile.ignore(bgen_snp_block_offset); - - double bgen_geno_prob_AA, bgen_geno_prob_AB; - double bgen_geno_prob_BB, bgen_geno_prob_non_miss; - - uint32_t bgen_N; - uint16_t bgen_LS; - uint16_t bgen_LR; - uint16_t bgen_LC; - uint32_t bgen_SNP_pos; - uint32_t bgen_LA; - std::string bgen_A_allele; - uint32_t bgen_LB; - std::string bgen_B_allele; - uint32_t bgen_P; - size_t unzipped_data_size; - string id; - string rs; - string chr; - std::cout << "Warning: WJA hard coded SNP missingness " - << "threshold of 10%" << std::endl; - - // Start reading genotypes and analyze. - for (size_t t = 0; t < indicator_snp.size(); ++t) { - if (t % d_pace == 0 || t == (ns_total - 1)) { - ProgressBar("Reading SNPs ", t, ns_total - 1); - } - - // Read SNP header. - id.clear(); - rs.clear(); - chr.clear(); - bgen_A_allele.clear(); - bgen_B_allele.clear(); - - infile.read(reinterpret_cast<char *>(&bgen_N), 4); - infile.read(reinterpret_cast<char *>(&bgen_LS), 2); - - id.resize(bgen_LS); - infile.read(&id[0], bgen_LS); - - infile.read(reinterpret_cast<char *>(&bgen_LR), 2); - rs.resize(bgen_LR); - infile.read(&rs[0], bgen_LR); - - infile.read(reinterpret_cast<char *>(&bgen_LC), 2); - chr.resize(bgen_LC); - infile.read(&chr[0], bgen_LC); - - infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4); - - infile.read(reinterpret_cast<char *>(&bgen_LA), 4); - bgen_A_allele.resize(bgen_LA); - infile.read(&bgen_A_allele[0], bgen_LA); - - infile.read(reinterpret_cast<char *>(&bgen_LB), 4); - bgen_B_allele.resize(bgen_LB); - infile.read(&bgen_B_allele[0], bgen_LB); - - uint16_t unzipped_data[3 * bgen_N]; - - if (indicator_snp[t] == 0) { - if (CompressedSNPBlocks) - infile.read(reinterpret_cast<char *>(&bgen_P), 4); - else - bgen_P = 6 * bgen_N; - - infile.ignore(static_cast<size_t>(bgen_P)); - - continue; - } - - if (CompressedSNPBlocks) { - infile.read(reinterpret_cast<char *>(&bgen_P), 4); - uint8_t zipped_data[bgen_P]; - - unzipped_data_size = 6 * bgen_N; - - infile.read(reinterpret_cast<char *>(zipped_data), bgen_P); - - int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data), - reinterpret_cast<uLongf *>(&unzipped_data_size), - reinterpret_cast<Bytef *>(zipped_data), - static_cast<uLong>(bgen_P)); - assert(result == Z_OK); - - } else { - - bgen_P = 6 * bgen_N; - infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P); - } - - x_mean = 0.0; - c_phen = 0; - n_miss = 0; - gsl_vector_set_zero(x_miss); - for (size_t i = 0; i < bgen_N; ++i) { - if (indicator_idv[i] == 0) { - continue; - } - - bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0; - bgen_geno_prob_AB = - static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0; - bgen_geno_prob_BB = - static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0; - - // WJA - bgen_geno_prob_non_miss = - bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB; - if (bgen_geno_prob_non_miss < 0.9) { - gsl_vector_set(x_miss, c_phen, 0.0); - n_miss++; - } else { - bgen_geno_prob_AA /= bgen_geno_prob_non_miss; - bgen_geno_prob_AB /= bgen_geno_prob_non_miss; - bgen_geno_prob_BB /= bgen_geno_prob_non_miss; - - geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB; - - gsl_vector_set(x, c_phen, geno); - gsl_vector_set(x_miss, c_phen, 1.0); - x_mean += geno; - } - c_phen++; - } - - x_mean /= static_cast<double>(ni_test - n_miss); - - for (size_t i = 0; i < ni_test; ++i) { - if (gsl_vector_get(x_miss, i) == 0) { - gsl_vector_set(x, i, x_mean); - } - geno = gsl_vector_get(x, i); - } - - // Calculate statistics. - time_start = clock(); - - gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx); - CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx); - LmCalcP(a_mode - 50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald, - p_lrt, p_score); - - time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); - - // Store summary data. - SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score, -0.0}; - sumStat.push_back(SNPs); - } - cout << endl; - - gsl_vector_free(x); - gsl_vector_free(x_miss); - - gsl_matrix_free(WtW); - gsl_matrix_free(WtWi); - gsl_vector_free(Wty); - gsl_vector_free(Wtx); - gsl_permutation_free(pmt); - - infile.close(); - infile.clear(); - - return; -} - void LM::AnalyzeBimbam(const gsl_matrix *W, const gsl_vector *y) { debug_msg("entering"); igzstream infile(file_geno.c_str(), igzstream::in); @@ -67,9 +67,6 @@ public: void AnalyzeGene(const gsl_matrix *W, const gsl_vector *x); void AnalyzePlink(const gsl_matrix *W, const gsl_vector *y); void AnalyzeBimbam(const gsl_matrix *W, const gsl_vector *y); - // WJA added. - void Analyzebgen(const gsl_matrix *W, const gsl_vector *y); - void WriteFiles(); }; diff --git a/src/lmm.cpp b/src/lmm.cpp index 1193700..50fb64c 100644 --- a/src/lmm.cpp +++ b/src/lmm.cpp @@ -56,9 +56,6 @@ void LMM::CopyFromParam(PARAM &cPar) { path_out = cPar.path_out; file_gene = cPar.file_gene; - // WJA added. - file_oxford = cPar.file_oxford; - l_min = cPar.l_min; l_max = cPar.l_max; n_region = cPar.n_region; @@ -1630,284 +1627,6 @@ void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval, return; } -// WJA added. -void LMM::Analyzebgen(const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_vector *Uty, - const gsl_matrix *W, const gsl_vector *y) { - debug_msg("entering"); - string file_bgen = file_oxford + ".bgen"; - ifstream infile(file_bgen.c_str(), ios::binary); - if (!infile) { - cout << "error reading bgen file:" << file_bgen << endl; - return; - } - - clock_t time_start = clock(); - double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0; - double p_lrt = 0, p_score = 0; - double logl_H1 = 0.0; - int n_miss, c_phen; - double geno, x_mean; - - // Calculate basic quantities. - size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; - - gsl_vector *x = gsl_vector_alloc(U->size1); - gsl_vector *x_miss = gsl_vector_alloc(U->size1); - gsl_vector *Utx = gsl_vector_alloc(U->size2); - gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index); - gsl_vector *ab = gsl_vector_alloc(n_index); - - // Create a large matrix. - size_t msize = LMM_BATCH_SIZE; - gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize); - gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize); - gsl_matrix_set_zero(Xlarge); - - gsl_matrix_set_zero(Uab); - CalcUab(UtW, Uty, Uab); - - // Read in header. - uint32_t bgen_snp_block_offset; - uint32_t bgen_header_length; - uint32_t bgen_nsamples; - uint32_t bgen_nsnps; - uint32_t bgen_flags; - infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4); - infile.read(reinterpret_cast<char *>(&bgen_header_length), 4); - bgen_snp_block_offset -= 4; - infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4); - bgen_snp_block_offset -= 4; - infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4); - bgen_snp_block_offset -= 4; - infile.ignore(4 + bgen_header_length - 20); - bgen_snp_block_offset -= 4 + bgen_header_length - 20; - infile.read(reinterpret_cast<char *>(&bgen_flags), 4); - bgen_snp_block_offset -= 4; - bool CompressedSNPBlocks = bgen_flags & 0x1; - - infile.ignore(bgen_snp_block_offset); - - double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB; - double bgen_geno_prob_non_miss; - - uint32_t bgen_N; - uint16_t bgen_LS; - uint16_t bgen_LR; - uint16_t bgen_LC; - uint32_t bgen_SNP_pos; - uint32_t bgen_LA; - std::string bgen_A_allele; - uint32_t bgen_LB; - std::string bgen_B_allele; - uint32_t bgen_P; - size_t unzipped_data_size; - string id; - string rs; - string chr; - std::cout << "Warning: WJA hard coded SNP missingness " - << "threshold of 10%" << std::endl; - - // Start reading genotypes and analyze. - size_t c = 0, t_last = 0; - for (size_t t = 0; t < indicator_snp.size(); ++t) { - if (indicator_snp[t] == 0) { - continue; - } - t_last++; - } - for (size_t t = 0; t < indicator_snp.size(); ++t) { - if (t % d_pace == 0 || t == (ns_total - 1)) { - ProgressBar("Reading SNPs ", t, ns_total - 1); - } - if (indicator_snp[t] == 0) { - continue; - } - - // Read SNP header. - id.clear(); - rs.clear(); - chr.clear(); - bgen_A_allele.clear(); - bgen_B_allele.clear(); - - infile.read(reinterpret_cast<char *>(&bgen_N), 4); - infile.read(reinterpret_cast<char *>(&bgen_LS), 2); - - id.resize(bgen_LS); - infile.read(&id[0], bgen_LS); - - infile.read(reinterpret_cast<char *>(&bgen_LR), 2); - rs.resize(bgen_LR); - infile.read(&rs[0], bgen_LR); - - infile.read(reinterpret_cast<char *>(&bgen_LC), 2); - chr.resize(bgen_LC); - infile.read(&chr[0], bgen_LC); - - infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4); - - infile.read(reinterpret_cast<char *>(&bgen_LA), 4); - bgen_A_allele.resize(bgen_LA); - infile.read(&bgen_A_allele[0], bgen_LA); - - infile.read(reinterpret_cast<char *>(&bgen_LB), 4); - bgen_B_allele.resize(bgen_LB); - infile.read(&bgen_B_allele[0], bgen_LB); - - uint16_t unzipped_data[3 * bgen_N]; - - if (indicator_snp[t] == 0) { - if (CompressedSNPBlocks) - infile.read(reinterpret_cast<char *>(&bgen_P), 4); - else - bgen_P = 6 * bgen_N; - - infile.ignore(static_cast<size_t>(bgen_P)); - - continue; - } - - if (CompressedSNPBlocks) { - infile.read(reinterpret_cast<char *>(&bgen_P), 4); - uint8_t zipped_data[bgen_P]; - - unzipped_data_size = 6 * bgen_N; - - infile.read(reinterpret_cast<char *>(zipped_data), bgen_P); - - int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data), - reinterpret_cast<uLongf *>(&unzipped_data_size), - reinterpret_cast<Bytef *>(zipped_data), - static_cast<uLong>(bgen_P)); - assert(result == Z_OK); - - } else { - - bgen_P = 6 * bgen_N; - infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P); - } - - x_mean = 0.0; - c_phen = 0; - n_miss = 0; - gsl_vector_set_zero(x_miss); - for (size_t i = 0; i < bgen_N; ++i) { - if (indicator_idv[i] == 0) { - continue; - } - - bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0; - bgen_geno_prob_AB = - static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0; - bgen_geno_prob_BB = - static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0; - - // WJA. - bgen_geno_prob_non_miss = - bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB; - if (bgen_geno_prob_non_miss < 0.9) { - gsl_vector_set(x_miss, c_phen, 0.0); - n_miss++; - } else { - - bgen_geno_prob_AA /= bgen_geno_prob_non_miss; - bgen_geno_prob_AB /= bgen_geno_prob_non_miss; - bgen_geno_prob_BB /= bgen_geno_prob_non_miss; - - geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB; - - gsl_vector_set(x, c_phen, geno); - gsl_vector_set(x_miss, c_phen, 1.0); - x_mean += geno; - } - c_phen++; - } - - x_mean /= static_cast<double>(ni_test - n_miss); - - for (size_t i = 0; i < ni_test; ++i) { - if (gsl_vector_get(x_miss, i) == 0) { - gsl_vector_set(x, i, x_mean); - } - geno = gsl_vector_get(x, i); - } - - gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize); - gsl_vector_memcpy(&Xlarge_col.vector, x); - c++; - - if (c % msize == 0 || c == t_last) { - size_t l = 0; - if (c % msize == 0) { - l = msize; - } else { - l = c % msize; - } - - gsl_matrix_view Xlarge_sub = - gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l); - gsl_matrix_view UtXlarge_sub = - gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l); - - time_start = clock(); - eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, - &UtXlarge_sub.matrix); - time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); - - gsl_matrix_set_zero(Xlarge); - - for (size_t i = 0; i < l; i++) { - gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i); - gsl_vector_memcpy(Utx, &UtXlarge_col.vector); - - CalcUab(UtW, Uty, Utx, Uab); - - time_start = clock(); - FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0}; - - // 3 is before 1. - if (a_mode == 3 || a_mode == 4) { - CalcRLScore(l_mle_null, param1, beta, se, p_score); - } - - if (a_mode == 1 || a_mode == 4) { - CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle, - logl_H1); - CalcRLWald(lambda_remle, param1, beta, se, p_wald); - } - - if (a_mode == 2 || a_mode == 4) { - CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1); - p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1); - } - - time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); - - // Store summary data. - SUMSTAT SNPs = {beta, se, lambda_remle, lambda_mle, - p_wald, p_lrt, p_score, logl_H1}; - sumStat.push_back(SNPs); - } - } - } - cout << endl; - - gsl_vector_free(x); - gsl_vector_free(x_miss); - gsl_vector_free(Utx); - gsl_matrix_free(Uab); - gsl_vector_free(ab); - - gsl_matrix_free(Xlarge); - gsl_matrix_free(UtXlarge); - - infile.close(); - infile.clear(); - - return; -} - void MatrixCalcLR(const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty, const gsl_vector *K_eval, const double l_min, const double l_max, const size_t n_region, @@ -53,8 +53,6 @@ public: string path_out; string file_gene; - // WJA added - string file_oxford; // LMM related parameters double l_min; @@ -94,10 +92,6 @@ public: void AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y); - // WJA added. - void Analyzebgen(const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_vector *Uty, - const gsl_matrix *W, const gsl_vector *y); void AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y, diff --git a/src/mvlmm.cpp b/src/mvlmm.cpp index c5efb6e..88df111 100644 --- a/src/mvlmm.cpp +++ b/src/mvlmm.cpp @@ -54,7 +54,6 @@ void MVLMM::CopyFromParam(PARAM &cPar) { file_bfile = cPar.file_bfile; file_geno = cPar.file_geno; - file_oxford = cPar.file_oxford; file_out = cPar.file_out; path_out = cPar.path_out; @@ -2950,556 +2949,6 @@ double PCRT(const size_t mode, const size_t d_size, const double p_value, return p_crt; } -// WJA added. -void MVLMM::Analyzebgen(const gsl_matrix *U, const gsl_vector *eval, - const gsl_matrix *UtW, const gsl_matrix *UtY) { - debug_msg("entering"); - string file_bgen = file_oxford + ".bgen"; - ifstream infile(file_bgen.c_str(), ios::binary); - if (!infile) { - cout << "error reading bgen file:" << file_bgen << endl; - return; - } - - clock_t time_start = clock(); - time_UtX = 0; - time_opt = 0; - - string line; - - // Create a large matrix. - size_t msize = LMM_BATCH_SIZE; - gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize); - gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize); - gsl_matrix_set_zero(Xlarge); - - double logl_H0 = 0.0, logl_H1 = 0.0, p_wald = 0, p_lrt = 0, p_score = 0; - double crt_a, crt_b, crt_c; - int n_miss, c_phen; - double geno, x_mean; - size_t c = 0; - size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2; - - size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2; - - // Large matrices for EM. - gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size); - gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size); - gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size); - gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size); - gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size); - gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size); - gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size); - gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size); - - // Large matrices for NR. Each dxd block is H_k^{-1}. - gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size); - - // Each column is H_k^{-1}y_k. - gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size); - - // Each dcxdc block is x_k\otimes H_k^{-1}. - gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size); - gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2); - gsl_vector *x = gsl_vector_alloc(n_size); - gsl_vector *x_miss = gsl_vector_alloc(n_size); - - gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size); - gsl_matrix *X = gsl_matrix_alloc(c_size + 1, n_size); - gsl_matrix *V_g = gsl_matrix_alloc(d_size, d_size); - gsl_matrix *V_e = gsl_matrix_alloc(d_size, d_size); - gsl_matrix *B = gsl_matrix_alloc(d_size, c_size + 1); - gsl_vector *beta = gsl_vector_alloc(d_size); - gsl_matrix *Vbeta = gsl_matrix_alloc(d_size, d_size); - - // Null estimates for initial values. - gsl_matrix *V_g_null = gsl_matrix_alloc(d_size, d_size); - gsl_matrix *V_e_null = gsl_matrix_alloc(d_size, d_size); - gsl_matrix *B_null = gsl_matrix_alloc(d_size, c_size + 1); - gsl_matrix *se_B_null = gsl_matrix_alloc(d_size, c_size); - - gsl_matrix_view X_sub = gsl_matrix_submatrix(X, 0, 0, c_size, n_size); - gsl_matrix_view B_sub = gsl_matrix_submatrix(B, 0, 0, d_size, c_size); - gsl_matrix_view xHi_all_sub = - gsl_matrix_submatrix(xHi_all, 0, 0, d_size * c_size, d_size * n_size); - - gsl_matrix_transpose_memcpy(Y, UtY); - - gsl_matrix_transpose_memcpy(&X_sub.matrix, UtW); - - gsl_vector_view X_row = gsl_matrix_row(X, c_size); - gsl_vector_set_zero(&X_row.vector); - gsl_vector_view B_col = gsl_matrix_column(B, c_size); - gsl_vector_set_zero(&B_col.vector); - - MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix, Y, l_min, - l_max, n_region, V_g, V_e, &B_sub.matrix); - logl_H0 = MphEM('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat, - OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, - V_e, &B_sub.matrix); - logl_H0 = MphNR('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all, - &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, - crt_c); - MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix, - se_B_null); - - c = 0; - Vg_remle_null.clear(); - Ve_remle_null.clear(); - for (size_t i = 0; i < d_size; i++) { - for (size_t j = i; j < d_size; j++) { - Vg_remle_null.push_back(gsl_matrix_get(V_g, i, j)); - Ve_remle_null.push_back(gsl_matrix_get(V_e, i, j)); - VVg_remle_null.push_back(gsl_matrix_get(Hessian, c, c)); - VVe_remle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size)); - c++; - } - } - beta_remle_null.clear(); - se_beta_remle_null.clear(); - for (size_t i = 0; i < se_B_null->size1; i++) { - for (size_t j = 0; j < se_B_null->size2; j++) { - beta_remle_null.push_back(gsl_matrix_get(B, i, j)); - se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j)); - } - } - logl_remle_H0 = logl_H0; - - cout.setf(std::ios_base::fixed, std::ios_base::floatfield); - cout.precision(4); - - cout << "REMLE estimate for Vg in the null model: " << endl; - for (size_t i = 0; i < d_size; i++) { - for (size_t j = 0; j <= i; j++) { - cout << gsl_matrix_get(V_g, i, j) << "\t"; - } - cout << endl; - } - cout << "se(Vg): " << endl; - for (size_t i = 0; i < d_size; i++) { - for (size_t j = 0; j <= i; j++) { - c = GetIndex(i, j, d_size); - cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t"; - } - cout << endl; - } - cout << "REMLE estimate for Ve in the null model: " << endl; - for (size_t i = 0; i < d_size; i++) { - for (size_t j = 0; j <= i; j++) { - cout << gsl_matrix_get(V_e, i, j) << "\t"; - } - cout << endl; - } - cout << "se(Ve): " << endl; - for (size_t i = 0; i < d_size; i++) { - for (size_t j = 0; j <= i; j++) { - c = GetIndex(i, j, d_size); - cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t"; - } - cout << endl; - } - cout << "REMLE likelihood = " << logl_H0 << endl; - - logl_H0 = MphEM('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat, - OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, - V_e, &B_sub.matrix); - logl_H0 = MphNR('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all, - &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, - crt_c); - MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix, - se_B_null); - - c = 0; - Vg_mle_null.clear(); - Ve_mle_null.clear(); - for (size_t i = 0; i < d_size; i++) { - for (size_t j = i; j < d_size; j++) { - Vg_mle_null.push_back(gsl_matrix_get(V_g, i, j)); - Ve_mle_null.push_back(gsl_matrix_get(V_e, i, j)); - VVg_mle_null.push_back(gsl_matrix_get(Hessian, c, c)); - VVe_mle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size)); - c++; - } - } - beta_mle_null.clear(); - se_beta_mle_null.clear(); - for (size_t i = 0; i < se_B_null->size1; i++) { - for (size_t j = 0; j < se_B_null->size2; j++) { - beta_mle_null.push_back(gsl_matrix_get(B, i, j)); - se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j)); - } - } - logl_mle_H0 = logl_H0; - - cout << "MLE estimate for Vg in the null model: " << endl; - for (size_t i = 0; i < d_size; i++) { - for (size_t j = 0; j <= i; j++) { - cout << gsl_matrix_get(V_g, i, j) << "\t"; - } - cout << endl; - } - cout << "se(Vg): " << endl; - for (size_t i = 0; i < d_size; i++) { - for (size_t j = 0; j <= i; j++) { - c = GetIndex(i, j, d_size); - cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t"; - } - cout << endl; - } - cout << "MLE estimate for Ve in the null model: " << endl; - for (size_t i = 0; i < d_size; i++) { - for (size_t j = 0; j <= i; j++) { - cout << gsl_matrix_get(V_e, i, j) << "\t"; - } - cout << endl; - } - cout << "se(Ve): " << endl; - for (size_t i = 0; i < d_size; i++) { - for (size_t j = 0; j <= i; j++) { - c = GetIndex(i, j, d_size); - cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t"; - } - cout << endl; - } - cout << "MLE likelihood = " << logl_H0 << endl; - - vector<double> v_beta, v_Vg, v_Ve, v_Vbeta; - for (size_t i = 0; i < d_size; i++) { - v_beta.push_back(0.0); - } - for (size_t i = 0; i < d_size; i++) { - for (size_t j = i; j < d_size; j++) { - v_Vg.push_back(0.0); - v_Ve.push_back(0.0); - v_Vbeta.push_back(0.0); - } - } - - gsl_matrix_memcpy(V_g_null, V_g); - gsl_matrix_memcpy(V_e_null, V_e); - gsl_matrix_memcpy(B_null, B); - - // Read in header. - uint32_t bgen_snp_block_offset; - uint32_t bgen_header_length; - uint32_t bgen_nsamples; - uint32_t bgen_nsnps; - uint32_t bgen_flags; - infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4); - infile.read(reinterpret_cast<char *>(&bgen_header_length), 4); - bgen_snp_block_offset -= 4; - infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4); - bgen_snp_block_offset -= 4; - infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4); - bgen_snp_block_offset -= 4; - infile.ignore(4 + bgen_header_length - 20); - bgen_snp_block_offset -= 4 + bgen_header_length - 20; - infile.read(reinterpret_cast<char *>(&bgen_flags), 4); - bgen_snp_block_offset -= 4; - bool CompressedSNPBlocks = bgen_flags & 0x1; - - infile.ignore(bgen_snp_block_offset); - - double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB; - double bgen_geno_prob_non_miss; - - uint32_t bgen_N; - uint16_t bgen_LS; - uint16_t bgen_LR; - uint16_t bgen_LC; - uint32_t bgen_SNP_pos; - uint32_t bgen_LA; - std::string bgen_A_allele; - uint32_t bgen_LB; - std::string bgen_B_allele; - uint32_t bgen_P; - size_t unzipped_data_size; - string id; - string rs; - string chr; - std::cout << "Warning: WJA hard coded SNP missingness threshold " - << "of 10%" << std::endl; - - // Start reading genotypes and analyze. - size_t csnp = 0, t_last = 0; - for (size_t t = 0; t < indicator_snp.size(); ++t) { - if (indicator_snp[t] == 0) { - continue; - } - t_last++; - } - for (size_t t = 0; t < indicator_snp.size(); ++t) { - if (t % d_pace == 0 || t == (ns_total - 1)) { - ProgressBar("Reading SNPs ", t, ns_total - 1); - } - if (indicator_snp[t] == 0) { - continue; - } - - // Read SNP header. - id.clear(); - rs.clear(); - chr.clear(); - bgen_A_allele.clear(); - bgen_B_allele.clear(); - - infile.read(reinterpret_cast<char *>(&bgen_N), 4); - infile.read(reinterpret_cast<char *>(&bgen_LS), 2); - - id.resize(bgen_LS); - infile.read(&id[0], bgen_LS); - - infile.read(reinterpret_cast<char *>(&bgen_LR), 2); - rs.resize(bgen_LR); - infile.read(&rs[0], bgen_LR); - - infile.read(reinterpret_cast<char *>(&bgen_LC), 2); - chr.resize(bgen_LC); - infile.read(&chr[0], bgen_LC); - - infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4); - - infile.read(reinterpret_cast<char *>(&bgen_LA), 4); - bgen_A_allele.resize(bgen_LA); - infile.read(&bgen_A_allele[0], bgen_LA); - - infile.read(reinterpret_cast<char *>(&bgen_LB), 4); - bgen_B_allele.resize(bgen_LB); - infile.read(&bgen_B_allele[0], bgen_LB); - - uint16_t unzipped_data[3 * bgen_N]; - - if (indicator_snp[t] == 0) { - if (CompressedSNPBlocks) - infile.read(reinterpret_cast<char *>(&bgen_P), 4); - else - bgen_P = 6 * bgen_N; - - infile.ignore(static_cast<size_t>(bgen_P)); - - continue; - } - - if (CompressedSNPBlocks) { - - infile.read(reinterpret_cast<char *>(&bgen_P), 4); - uint8_t zipped_data[bgen_P]; - - unzipped_data_size = 6 * bgen_N; - - infile.read(reinterpret_cast<char *>(zipped_data), bgen_P); - - int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data), - reinterpret_cast<uLongf *>(&unzipped_data_size), - reinterpret_cast<Bytef *>(zipped_data), - static_cast<uLong>(bgen_P)); - assert(result == Z_OK); - - } else { - - bgen_P = 6 * bgen_N; - infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P); - } - - x_mean = 0.0; - c_phen = 0; - n_miss = 0; - gsl_vector_set_zero(x_miss); - for (size_t i = 0; i < bgen_N; ++i) { - if (indicator_idv[i] == 0) { - continue; - } - - bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0; - bgen_geno_prob_AB = - static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0; - bgen_geno_prob_BB = - static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0; - - // WJA. - bgen_geno_prob_non_miss = - bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB; - if (bgen_geno_prob_non_miss < 0.9) { - gsl_vector_set(x_miss, c_phen, 0.0); - n_miss++; - } else { - - bgen_geno_prob_AA /= bgen_geno_prob_non_miss; - bgen_geno_prob_AB /= bgen_geno_prob_non_miss; - bgen_geno_prob_BB /= bgen_geno_prob_non_miss; - - geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB; - - gsl_vector_set(x, c_phen, geno); - gsl_vector_set(x_miss, c_phen, 1.0); - x_mean += geno; - } - c_phen++; - } - - x_mean /= static_cast<double>(ni_test - n_miss); - - for (size_t i = 0; i < ni_test; ++i) { - if (gsl_vector_get(x_miss, i) == 0) { - gsl_vector_set(x, i, x_mean); - } - } - - gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, csnp % msize); - gsl_vector_memcpy(&Xlarge_col.vector, x); - csnp++; - - if (csnp % msize == 0 || csnp == t_last) { - size_t l = 0; - if (csnp % msize == 0) { - l = msize; - } else { - l = csnp % msize; - } - - gsl_matrix_view Xlarge_sub = - gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l); - gsl_matrix_view UtXlarge_sub = - gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l); - - time_start = clock(); - eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, - &UtXlarge_sub.matrix); - time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); - - gsl_matrix_set_zero(Xlarge); - - for (size_t i = 0; i < l; i++) { - gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i); - gsl_vector_memcpy(&X_row.vector, &UtXlarge_col.vector); - - // Initial values. - gsl_matrix_memcpy(V_g, V_g_null); - gsl_matrix_memcpy(V_e, V_e_null); - gsl_matrix_memcpy(B, B_null); - - time_start = clock(); - - // 3 is before 1. - if (a_mode == 3 || a_mode == 4) { - p_score = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g_null, - V_e_null, UltVehiY, beta, Vbeta); - if (p_score < p_nr && crt == 1) { - logl_H1 = MphNR('R', 1, nr_prec * 10, eval, X, Y, Hi_all, xHi_all, - Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c); - p_score = PCRT(3, d_size, p_score, crt_a, crt_b, crt_c); - } - } - - if (a_mode == 2 || a_mode == 4) { - logl_H1 = MphEM('L', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, - E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, - UltVehiE, V_g, V_e, B); - - // Calculate beta and Vbeta. - p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, - UltVehiY, beta, Vbeta); - p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size); - - if (p_lrt < p_nr) { - logl_H1 = - MphNR('L', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, - xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c); - - // Calculate beta and Vbeta. - p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, - UltVehiY, beta, Vbeta); - p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size); - - if (crt == 1) { - p_lrt = PCRT(2, d_size, p_lrt, crt_a, crt_b, crt_c); - } - } - } - - if (a_mode == 1 || a_mode == 4) { - logl_H1 = MphEM('R', em_iter / 10, em_prec * 10, eval, X, Y, U_hat, - E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, - UltVehiE, V_g, V_e, B); - p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, - UltVehiY, beta, Vbeta); - - if (p_wald < p_nr) { - logl_H1 = - MphNR('R', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all, - xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c); - p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, - UltVehiY, beta, Vbeta); - - if (crt == 1) { - p_wald = PCRT(1, d_size, p_wald, crt_a, crt_b, crt_c); - } - } - } - - time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); - - // Store summary data. - for (size_t i = 0; i < d_size; i++) { - v_beta[i] = gsl_vector_get(beta, i); - } - - c = 0; - for (size_t i = 0; i < d_size; i++) { - for (size_t j = i; j < d_size; j++) { - v_Vg[c] = gsl_matrix_get(V_g, i, j); - v_Ve[c] = gsl_matrix_get(V_e, i, j); - v_Vbeta[c] = gsl_matrix_get(Vbeta, i, j); - c++; - } - } - - MPHSUMSTAT SNPs = {v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta}; - sumStat.push_back(SNPs); - } - } - } - cout << endl; - - infile.close(); - infile.clear(); - - gsl_matrix_free(U_hat); - gsl_matrix_free(E_hat); - gsl_matrix_free(OmegaU); - gsl_matrix_free(OmegaE); - gsl_matrix_free(UltVehiY); - gsl_matrix_free(UltVehiBX); - gsl_matrix_free(UltVehiU); - gsl_matrix_free(UltVehiE); - - gsl_matrix_free(Hi_all); - gsl_matrix_free(Hiy_all); - gsl_matrix_free(xHi_all); - gsl_matrix_free(Hessian); - - gsl_vector_free(x); - gsl_vector_free(x_miss); - - gsl_matrix_free(Y); - gsl_matrix_free(X); - gsl_matrix_free(V_g); - gsl_matrix_free(V_e); - gsl_matrix_free(B); - gsl_vector_free(beta); - gsl_matrix_free(Vbeta); - - gsl_matrix_free(V_g_null); - gsl_matrix_free(V_e_null); - gsl_matrix_free(B_null); - gsl_matrix_free(se_B_null); - - gsl_matrix_free(Xlarge); - gsl_matrix_free(UtXlarge); - - return; -} - void MVLMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY) { debug_msg("entering"); diff --git a/src/param.cpp b/src/param.cpp index 3b319e9..8452fb8 100644 --- a/src/param.cpp +++ b/src/param.cpp @@ -236,37 +236,6 @@ void PARAM::ReadFiles(void) { trim_individuals(indicator_idv, ni_max, mode_debug); - // WJA added. - // Read genotype and phenotype file for bgen format. - if (!file_oxford.empty()) { - file_str = file_oxford + ".sample"; - if (ReadFile_sample(file_str, indicator_pheno, pheno, p_column, - indicator_cvt, cvt, n_cvt) == false) { - error = true; - } - if ((indicator_cvt).size() == 0) { - n_cvt = 1; - } - - // Post-process covariates and phenotypes, obtain - // ni_test, save all useful covariates. - ProcessCvtPhen(); - - // Obtain covariate matrix. - gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt); - CopyCvt(W); - - file_str = file_oxford + ".bgen"; - if (ReadFile_bgen(file_str, setSnps, W, indicator_idv, indicator_snp, - snpInfo, maf_level, miss_level, hwe_level, r2_level, - ns_test) == false) { - error = true; - } - gsl_matrix_free(W); - - ns_total = indicator_snp.size(); - } - // Read genotype and phenotype file for PLINK format. if (!file_bfile.empty()) { file_str = file_bfile + ".bim"; @@ -741,19 +710,6 @@ void PARAM::CheckParam(void) { } } - if (!file_oxford.empty()) { - str = file_oxford + ".bgen"; - if (stat(str.c_str(), &fileInfo) == -1) { - cout << "error! fail to open .bgen file: " << str << endl; - error = true; - } - str = file_oxford + ".sample"; - if (stat(str.c_str(), &fileInfo) == -1) { - cout << "error! fail to open .sample file: " << str << endl; - error = true; - } - } - if ((!file_geno.empty() || !file_gene.empty())) { str = file_pheno; if (stat(str.c_str(), &fileInfo) == -1) { @@ -864,11 +820,6 @@ void PARAM::CheckParam(void) { flag++; } - // WJA added. - if (!file_oxford.empty()) { - flag++; - } - if (flag != 1 && a_mode != 15 && a_mode != 27 && a_mode != 28 && a_mode != 43 && a_mode != 5 && a_mode != 61 && a_mode != 62 && a_mode != 63 && a_mode != 66 && a_mode != 67) { @@ -949,7 +900,6 @@ void PARAM::CheckParam(void) { enforce_msg((a_mode >= 1 && a_mode <= 4) || a_mode == 21 || a_mode == 22, "LOCO only works with LMM and K"); enforce_msg(file_bfile.empty(), "LOCO does not work with PLink (yet)"); - enforce_msg(file_oxford.empty(), "LOCO does not work with Oxford (yet)"); enforce_msg(file_gxe.empty(), "LOCO does not support GXE (yet)"); enforce_msg(!file_anno.empty(), "LOCO requires annotation file (-a switch)"); @@ -1056,14 +1006,6 @@ void PARAM::CheckParam(void) { void PARAM::CheckData(void) { - // WJA NOTE: I added this condition so that covariates can be added - // through sample, probably not exactly what is wanted. - if (file_oxford.empty()) { - if ((file_cvt).empty() || (indicator_cvt).size() == 0) { - n_cvt = 1; - } - } - if ((a_mode == 66 || a_mode == 67) && (v_pve.size() != n_vc)) { cout << "error! the number of pve estimates does not equal to " << "the number of categories in the cat file:" << v_pve.size() << " " @@ -1380,13 +1322,6 @@ void PARAM::CalcKin(gsl_matrix *matrix_kin) { false) { error = true; } - } else if (!file_oxford.empty()) { - file_str = file_oxford + ".bgen"; - enforce_msg(loco.empty(), "FIXME: LOCO nyi"); - if (bgenKin(file_str, indicator_snp, a_mode - 20, d_pace, matrix_kin) == - false) { - error = true; - } } else { file_str = file_geno; if (BimbamKin(file_str, setKSnps, indicator_snp, a_mode - 20, d_pace, diff --git a/src/param.h b/src/param.h index ff279bd..3976440 100644 --- a/src/param.h +++ b/src/param.h @@ -155,9 +155,6 @@ public: string file_ksnps; // File SNPs for computing K string file_gwasnps; // File SNPs for computing GWAS - // WJA added. - string file_oxford; - // QC-related parameters. double miss_level; double maf_level; |