diff options
| author | Pjotr Prins | 2025-11-27 10:20:49 +0100 |
|---|---|---|
| committer | Pjotr Prins | 2025-11-27 10:23:18 +0100 |
| commit | 6bd0ea3e2a245c3f86f66505eec118296ae04d30 (patch) | |
| tree | 04ef903832c459490f508af2dc5ed474c4f7f22a /src | |
| parent | 333bfc9adfff0fcd444a815cfcca27708442fe80 (diff) | |
| download | pangemma-6bd0ea3e2a245c3f86f66505eec118296ae04d30.tar.gz | |
No one uses these uchar versions
Diffstat (limited to 'src')
| -rw-r--r-- | src/gemma_io.cpp | 111 | ||||
| -rw-r--r-- | src/gemma_io.h | 5 | ||||
| -rw-r--r-- | src/param.cpp | 32 | ||||
| -rw-r--r-- | src/param.h | 4 |
4 files changed, 10 insertions, 142 deletions
diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp index 3826cea..b1bc368 100644 --- a/src/gemma_io.cpp +++ b/src/gemma_io.cpp @@ -1907,117 +1907,6 @@ bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv, return true; } -// Compact version of the above function, using uchar instead of -// gsl_matrix. -bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv, - vector<int> &indicator_snp, - vector<vector<unsigned char>> &Xt, gsl_matrix *K, - const bool calc_K, const size_t ni_test, - const size_t ns_test) { - checkpoint("read-file-geno",file_geno); - debug_msg("entered"); - igzstream infile(file_geno.c_str(), igzstream::in); - if (!infile) { - cout << "error reading genotype file:" << file_geno << endl; - return false; - } - - Xt.clear(); - vector<unsigned char> Xt_row; - for (size_t i = 0; i < ni_test; i++) { - Xt_row.push_back(0); - } - - string line; - char *ch_ptr; - - if (calc_K == true) { - gsl_matrix_set_zero(K); - } - - gsl_vector *genotype = gsl_vector_safe_alloc(ni_test); - gsl_vector *genotype_miss = gsl_vector_safe_alloc(ni_test); - double geno, geno_mean; - size_t n_miss; - - size_t ni_total = indicator_idv.size(); - size_t ns_total = indicator_snp.size(); - - size_t c_idv = 0, c_snp = 0; - - auto infilen = file_geno.c_str(); - for (size_t i = 0; i < ns_total; ++i) { - safeGetline(infile, line).eof(); - if (indicator_snp[i] == 0) { - continue; - } - - ch_ptr = strtok_safe2((char *)line.c_str(), " ,\t",infilen); - ch_ptr = strtok_safe2(NULL, " ,\t",infilen); - ch_ptr = strtok_safe2(NULL, " ,\t",infilen); - - c_idv = 0; - geno_mean = 0; - n_miss = 0; - gsl_vector_set_zero(genotype_miss); - for (uint j = 0; j < ni_total; ++j) { - ch_ptr = strtok_safe2(NULL, " ,\t",infilen); - if (indicator_idv[j] == 0) { - continue; - } - - if (strcmp(ch_ptr, "NA") == 0) { - gsl_vector_set(genotype_miss, c_idv, 1); - n_miss++; - } else { - geno = atof(ch_ptr); - gsl_vector_set(genotype, c_idv, geno); - geno_mean += geno; - } - c_idv++; - } - - geno_mean /= (double)(ni_test - n_miss); - - for (size_t j = 0; j < genotype->size; ++j) { - if (gsl_vector_get(genotype_miss, j) == 1) { - geno = geno_mean; - } else { - geno = gsl_vector_get(genotype, j); - } - - Xt_row[j] = Double02ToUchar(geno); - gsl_vector_set(genotype, j, (geno - geno_mean)); - } - Xt.push_back(Xt_row); - - if (calc_K == true) { - gsl_blas_dsyr(CblasUpper, 1.0, genotype, K); - } - - c_snp++; - } - - if (calc_K == true) { - gsl_matrix_scale(K, 1.0 / (double)ns_test); - - for (size_t i = 0; i < genotype->size; ++i) { - for (size_t j = 0; j < i; ++j) { - geno = gsl_matrix_get(K, j, i); - gsl_matrix_set(K, i, j, geno); - } - } - } - - gsl_vector_free(genotype); - gsl_vector_free(genotype_miss); - - infile.clear(); - infile.close(); - - return true; -} - // Read bimbam mean genotype file, the second time, recode "mean" // genotype and calculate K. bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv, diff --git a/src/gemma_io.h b/src/gemma_io.h index 5c14227..116c1a5 100644 --- a/src/gemma_io.h +++ b/src/gemma_io.h @@ -100,11 +100,6 @@ bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv, bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv, vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K, const bool calc_K); -bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv, - vector<int> &indicator_snp, - vector<vector<unsigned char>> &Xt, gsl_matrix *K, - const bool calc_K, const size_t ni_test, - const size_t ns_test); bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv, vector<int> &indicator_snp, vector<vector<unsigned char>> &Xt, gsl_matrix *K, const bool calc_K, const size_t ni_test, diff --git a/src/param.cpp b/src/param.cpp index f96e9c3..9755579 100644 --- a/src/param.cpp +++ b/src/param.cpp @@ -104,7 +104,9 @@ PARAM::PARAM(void) randseed(-1), window_cm(0), window_bp(0), window_ns(0), n_block(200), error(false), ni_subsample(0), n_cvt(1), n_cat(0), n_vc(1), time_total(0.0), time_G(0.0), time_eigen(0.0), time_UtX(0.0), - time_UtZ(0.0), time_opt(0.0), time_Omega(0.0) {} + time_UtZ(0.0), time_opt(0.0), time_Omega(0.0) { + is_mdb = false; + } PARAM::~PARAM() { if (gsl_r) @@ -181,12 +183,14 @@ void PARAM::ReadFiles(void) { } if (!file_geno.empty()) { - if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == - false) { + std::string extension = ".mdb"; + if (file_geno.size() >= extension.size() && file_geno.compare(file_geno.size() - extension.size(), extension.size(), extension) == 0) + is_mdb = true; + if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == false) { error = true; } - if (CountFileLines(file_geno, ns_total) == false) { + if (!is_mdb && is_check_mode() && CountFileLines(file_geno, ns_total) == false) { error = true; } } @@ -1277,26 +1281,6 @@ void PARAM::ReadGenotypes(gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) { return; } -void PARAM::ReadGenotypes(vector<vector<unsigned char>> &Xt, gsl_matrix *K, - const bool calc_K) { - string file_str; - - if (!file_bfile.empty()) { - file_str = file_bfile + ".bed"; - if (ReadFile_bed(file_str, indicator_idv, indicator_snp, Xt, K, calc_K, - ni_test, ns_test) == false) { - error = true; - } - } else { - if (ReadFile_geno(file_geno, indicator_idv, indicator_snp, Xt, K, calc_K, - ni_test, ns_test) == false) { - error = true; - } - } - - return; -} - void PARAM::CalcKin(gsl_matrix *matrix_kin) { string file_str; diff --git a/src/param.h b/src/param.h index d3ce686..8d2fba3 100644 --- a/src/param.h +++ b/src/param.h @@ -160,6 +160,8 @@ public: string file_ksnps; // File SNPs for computing K string file_gwasnps; // File SNPs for computing GWAS + bool is_mdb; + // QC-related parameters. double miss_level; double maf_level; @@ -337,8 +339,6 @@ public: void CheckData(); void PrintSummary(); void ReadGenotypes(gsl_matrix *UtX, gsl_matrix *K, const bool calc_K); - void ReadGenotypes(vector<vector<unsigned char>> &Xt, gsl_matrix *K, - const bool calc_K); void CheckCvt(); void CopyCvt(gsl_matrix *W); void CopyA(size_t flag, gsl_matrix *A); |
