diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/gemma_io.cpp | 15 | ||||
| -rw-r--r-- | src/param.cpp | 32 | ||||
| -rw-r--r-- | src/param.h | 2 |
3 files changed, 24 insertions, 25 deletions
diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp index ef02429..cfa92d1 100644 --- a/src/gemma_io.cpp +++ b/src/gemma_io.cpp @@ -413,28 +413,28 @@ bool ReadFile_pheno(const string &file_pheno, double p; vector<double> pheno_row; - vector<int> ind_pheno_row; + vector<int> indicator_pheno_row; size_t p_max = *max_element(p_column.begin(), p_column.end()); map<size_t, size_t> mapP2c; for (size_t i = 0; i < p_column.size(); i++) { mapP2c[p_column[i]] = i; pheno_row.push_back(-9); - ind_pheno_row.push_back(0); + indicator_pheno_row.push_back(0); } while (!safeGetline(infile, line).eof()) { ch_ptr = strtok((char *)line.c_str(), " ,\t"); size_t i = 0; while (i < p_max) { - enforce_msg(ch_ptr,"Number of phenotypes in pheno file do not match phenotypes in geno file"); + enforce_msg(ch_ptr,"Number of phenotypes in pheno file do not match selected columns"); if (mapP2c.count(i + 1) != 0) { if (strcmp(ch_ptr, "NA") == 0) { - ind_pheno_row[mapP2c[i + 1]] = 0; + indicator_pheno_row[mapP2c[i + 1]] = 0; // skip this trait row pheno_row[mapP2c[i + 1]] = -9; } else { p = atof(ch_ptr); - ind_pheno_row[mapP2c[i + 1]] = 1; + indicator_pheno_row[mapP2c[i + 1]] = 1; // use this trait row pheno_row[mapP2c[i + 1]] = p; } } @@ -442,7 +442,7 @@ bool ReadFile_pheno(const string &file_pheno, ch_ptr = strtok(NULL, " ,\t"); } - indicator_pheno.push_back(ind_pheno_row); + indicator_pheno.push_back(indicator_pheno_row); pheno.push_back(pheno_row); } @@ -2422,7 +2422,8 @@ bool ReadFile_est(const string &file_est, const vector<size_t> &est_column, } bool CountFileLines(const string &file_input, size_t &n_lines) { - debug_msg("entered"); + checkpoint("count-lines",file_input); + igzstream infile(file_input.c_str(), igzstream::in); if (!infile) { cout << "error! fail to open file: " << file_input << endl; diff --git a/src/param.cpp b/src/param.cpp index 955da8e..a81d18f 100644 --- a/src/param.cpp +++ b/src/param.cpp @@ -163,7 +163,7 @@ void PARAM::ReadFiles(void) { setSnps.clear(); } - // Also read KSNP set. Snps used by GRM. + // Also read KSNP set. Reads all Snps later to be used by GRM. if (!file_ksnps.empty()) { if (ReadFile_snps(file_ksnps, setKSnps) == false) { error = true; @@ -192,10 +192,12 @@ void PARAM::ReadFiles(void) { if (!file_geno.empty()) { + // --- Read (multi-)column phenotypes into pheno and set the NA indicator if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == false) { error = true; } + // --- compute ns_total by readin geno file if (!is_mdb && is_check_mode() && CountFileLines(file_geno, ns_total) == false) { error = true; } @@ -225,7 +227,7 @@ void PARAM::ReadFiles(void) { indicator_idv.push_back(k); } - ns_test = 0; + ns_test = 0; // number of snps to test return; } @@ -283,7 +285,7 @@ void PARAM::ReadFiles(void) { // Post-process covariates and phenotypes, obtain // ni_test, save all useful covariates. - ProcessCvtPhen(); + ProcessInclusionIndicators(); // Obtain covariate matrix. auto W1 = gsl_matrix_safe_alloc(ni_test, n_cvt); @@ -316,7 +318,7 @@ void PARAM::ReadFiles(void) { // Post-process covariates and phenotypes, obtain // ni_test, save all useful covariates. - ProcessCvtPhen(); + ProcessInclusionIndicators(); // Obtain covariate matrix. auto W2 = gsl_matrix_safe_alloc(ni_test, n_cvt); @@ -335,10 +337,9 @@ void PARAM::ReadFiles(void) { ns_total = indicator_snp.size(); } - - // Check MDB format + // lmdb-type genotype file: if (is_mdb && !file_geno.empty()) { - ns_total = -1; + ns_total = indicator_snp.size(); } // Read genotype file for multiple PLINK files. @@ -376,7 +377,7 @@ void PARAM::ReadFiles(void) { // Post-process covariates and phenotypes, obtain // ni_test, save all useful covariates. - ProcessCvtPhen(); + ProcessInclusionIndicators(); // Obtain covariate matrix. W3 = gsl_matrix_safe_alloc(ni_test, n_cvt); @@ -420,7 +421,7 @@ void PARAM::ReadFiles(void) { // Post-process covariates and phenotypes, obtain ni_test, // save all useful covariates. - ProcessCvtPhen(); + ProcessInclusionIndicators(); // Obtain covariate matrix. gsl_matrix *W4 = gsl_matrix_safe_alloc(ni_test, n_cvt); @@ -473,7 +474,7 @@ void PARAM::ReadFiles(void) { // Post-process covariates and phenotypes, obtain // ni_test, save all useful covariates. - ProcessCvtPhen(); + ProcessInclusionIndicators(); // Obtain covariate matrix. // gsl_matrix *W5 = gsl_matrix_alloc(ni_test, n_cvt); @@ -508,7 +509,7 @@ void PARAM::ReadFiles(void) { // Post-process covariates and phenotypes, obtain // ni_test, save all useful covariates. - ProcessCvtPhen(); + ProcessInclusionIndicators(); } // Compute setKSnps when -loco is passed in @@ -1989,7 +1990,7 @@ void PARAM::CheckCvt() { } // Post-process phenotypes and covariates. -void PARAM::ProcessCvtPhen() { +void PARAM::ProcessInclusionIndicators() { // Convert indicator_pheno to indicator_idv. int k = 1; @@ -2027,11 +2028,8 @@ void PARAM::ProcessCvtPhen() { // Obtain ni_test. ni_test = 0; - for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) { - if (indicator_idv[i] == 0) { - continue; - } - ni_test++; + for(auto &i : indicator_idv) { + ni_test += indicator_idv[i]; } // If subsample number is set, perform a random sub-sampling diff --git a/src/param.h b/src/param.h index 7cf4a6c..36c9a9f 100644 --- a/src/param.h +++ b/src/param.h @@ -344,7 +344,7 @@ public: void CopyA(size_t flag, gsl_matrix *A); void CopyGxe(gsl_vector *gxe); void CopyWeight(gsl_vector *w); - void ProcessCvtPhen(); + void ProcessInclusionIndicators(); void CopyCvtPhen(gsl_matrix *W, gsl_vector *y, size_t flag); void CopyCvtPhen(gsl_matrix *W, gsl_matrix *Y, size_t flag); gsl_matrix *MdbCalcKin(); |
