diff options
Diffstat (limited to 'src/gemma_io.cpp')
| -rw-r--r-- | src/gemma_io.cpp | 111 |
1 files changed, 0 insertions, 111 deletions
diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp index 3826cea..b1bc368 100644 --- a/src/gemma_io.cpp +++ b/src/gemma_io.cpp @@ -1907,117 +1907,6 @@ bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv, return true; } -// Compact version of the above function, using uchar instead of -// gsl_matrix. -bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv, - vector<int> &indicator_snp, - vector<vector<unsigned char>> &Xt, gsl_matrix *K, - const bool calc_K, const size_t ni_test, - const size_t ns_test) { - checkpoint("read-file-geno",file_geno); - debug_msg("entered"); - igzstream infile(file_geno.c_str(), igzstream::in); - if (!infile) { - cout << "error reading genotype file:" << file_geno << endl; - return false; - } - - Xt.clear(); - vector<unsigned char> Xt_row; - for (size_t i = 0; i < ni_test; i++) { - Xt_row.push_back(0); - } - - string line; - char *ch_ptr; - - if (calc_K == true) { - gsl_matrix_set_zero(K); - } - - gsl_vector *genotype = gsl_vector_safe_alloc(ni_test); - gsl_vector *genotype_miss = gsl_vector_safe_alloc(ni_test); - double geno, geno_mean; - size_t n_miss; - - size_t ni_total = indicator_idv.size(); - size_t ns_total = indicator_snp.size(); - - size_t c_idv = 0, c_snp = 0; - - auto infilen = file_geno.c_str(); - for (size_t i = 0; i < ns_total; ++i) { - safeGetline(infile, line).eof(); - if (indicator_snp[i] == 0) { - continue; - } - - ch_ptr = strtok_safe2((char *)line.c_str(), " ,\t",infilen); - ch_ptr = strtok_safe2(NULL, " ,\t",infilen); - ch_ptr = strtok_safe2(NULL, " ,\t",infilen); - - c_idv = 0; - geno_mean = 0; - n_miss = 0; - gsl_vector_set_zero(genotype_miss); - for (uint j = 0; j < ni_total; ++j) { - ch_ptr = strtok_safe2(NULL, " ,\t",infilen); - if (indicator_idv[j] == 0) { - continue; - } - - if (strcmp(ch_ptr, "NA") == 0) { - gsl_vector_set(genotype_miss, c_idv, 1); - n_miss++; - } else { - geno = atof(ch_ptr); - gsl_vector_set(genotype, c_idv, geno); - geno_mean += geno; - } - c_idv++; - } - - geno_mean /= (double)(ni_test - n_miss); - - for (size_t j = 0; j < genotype->size; ++j) { - if (gsl_vector_get(genotype_miss, j) == 1) { - geno = geno_mean; - } else { - geno = gsl_vector_get(genotype, j); - } - - Xt_row[j] = Double02ToUchar(geno); - gsl_vector_set(genotype, j, (geno - geno_mean)); - } - Xt.push_back(Xt_row); - - if (calc_K == true) { - gsl_blas_dsyr(CblasUpper, 1.0, genotype, K); - } - - c_snp++; - } - - if (calc_K == true) { - gsl_matrix_scale(K, 1.0 / (double)ns_test); - - for (size_t i = 0; i < genotype->size; ++i) { - for (size_t j = 0; j < i; ++j) { - geno = gsl_matrix_get(K, j, i); - gsl_matrix_set(K, i, j, geno); - } - } - } - - gsl_vector_free(genotype); - gsl_vector_free(genotype_miss); - - infile.clear(); - infile.close(); - - return true; -} - // Read bimbam mean genotype file, the second time, recode "mean" // genotype and calculate K. bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv, |
