diff options
| author | Pjotr Prins | 2025-11-30 08:53:10 +0100 |
|---|---|---|
| committer | Pjotr Prins | 2025-11-30 08:53:10 +0100 |
| commit | ab434bbd38a61941bf32afcf91b7cc397462a53b (patch) | |
| tree | 6bddea2b6a706a6f91ec98a654627f3458e84f9f | |
| parent | 254113f4e02d5b423af302e9facb0ed580749ef5 (diff) | |
| download | pangemma-ab434bbd38a61941bf32afcf91b7cc397462a53b.tar.gz | |
Write out K
| -rw-r--r-- | src/gemma.cpp | 11 | ||||
| -rw-r--r-- | src/gemma_io.cpp | 29 | ||||
| -rw-r--r-- | src/gemma_io.h | 2 | ||||
| -rw-r--r-- | src/param.cpp | 5 | ||||
| -rw-r--r-- | src/param.h | 2 |
5 files changed, 17 insertions, 32 deletions
diff --git a/src/gemma.cpp b/src/gemma.cpp index 6b42763..1cced77 100644 --- a/src/gemma.cpp +++ b/src/gemma.cpp @@ -1915,7 +1915,16 @@ void GEMMA::BatchRun(PARAM &cPar) { cout << "Calculating Relatedness Matrix ... " << endl; if (cPar.is_mdb) { - cPar.MdbCalcKin(); + time_start = clock(); + auto K = cPar.MdbCalcKin(); + cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + if (cPar.a_mode == M_KIN) { + cPar.WriteMatrix(K, "cXX"); + } else { // M_KIN2 + cPar.WriteMatrix(K, "sXX"); + } + + gsl_matrix_safe_free(K); return; } diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp index 8fa0eab..baa44b9 100644 --- a/src/gemma_io.cpp +++ b/src/gemma_io.cpp @@ -1477,7 +1477,7 @@ void ReadFile_eigenD(const string &file_kd, bool &error, gsl_vector *eval) { // Read bimbam mean genotype file and calculate kinship matrix. -int mdb_calc_kin(const string file_geno, bool is_loco, const int k_mode, const vector<int> &indicator_snp) { +gsl_matrix *mdb_calc_kin(const string file_geno, bool is_loco, const int k_mode, const vector<int> &indicator_snp) { checkpoint("mdb-kin",file_geno); /* Create and open the LMDB environment: */ @@ -1537,20 +1537,10 @@ int mdb_calc_kin(const string file_geno, bool is_loco, const int k_mode, const v if (t % display_pace == 0) { ProgressBar("Reading SNPs", t, num_markers - 1); } - - // auto tokens = tokenize_whitespace(line,ni_total+3,""); - // auto token_i = tokens.begin(); - // const char *snp = tokens[0]; // first field - // token_i++; // skip snp name - // token_i++; // skip nucleotide field - // token_i++; // skip nucleotide field - string_view key, value; cursor.get(key, value, mdb_fetch); mdb_fetch = MDB_NEXT; - // cout << " key: " << key << endl ; size_t num_floats = value.size() / sizeof(float); - // cout << " num: " << num_floats << endl ; assert(num_floats == ni_total); const float* gs = reinterpret_cast<const float*>(value.data()); size_t n_miss = 0; @@ -1559,23 +1549,11 @@ int mdb_calc_kin(const string file_geno, bool is_loco, const int k_mode, const v gsl_vector_set_all(geno_miss, 0); for (size_t i = 0; i < ni_total; ++i) { - // enforce_str(token_i != tokens.end(), line + " number of fields"); - // auto field = *token_i; - // auto sfield = std::string(field); - // cout << i << ":" << sfield << "," << endl; - // if (strncmp(field,"NA",2)==0) { // missing value - // gsl_vector_set(geno_miss, i, 0); - // n_miss++; - // } else { double d = gs[i]; - // if (is_strict_mode() && d == 0.0) - // enforce_is_float(std::string(field)); // rule out non NA and non-float fields gsl_vector_set(geno, i, d); gsl_vector_set(geno_miss, i, 1); geno_mean += d; geno_var += d * d; - // } - // token_i++; } geno_mean /= (double)(ni_total - n_miss); @@ -1617,7 +1595,7 @@ int mdb_calc_kin(const string file_geno, bool is_loco, const int k_mode, const v fast_eigen_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin); write(matrix_kin,"K updated"); } - ProgressBar("Reading SNPs", 100, 100); + ProgressBar("Reading SNPs", num_markers, num_markers); cout << endl; enforce_gsl(gsl_matrix_scale(matrix_kin, 1.0 / (double)ns_test)); @@ -1625,9 +1603,8 @@ int mdb_calc_kin(const string file_geno, bool is_loco, const int k_mode, const v gsl_vector_free(geno); gsl_vector_free(geno_miss); gsl_matrix_free(Xlarge); - gsl_matrix_free(matrix_kin); - return true; + return matrix_kin; } // Read bimbam mean genotype file and calculate kinship matrix. diff --git a/src/gemma_io.h b/src/gemma_io.h index ad13379..de74ada 100644 --- a/src/gemma_io.h +++ b/src/gemma_io.h @@ -87,7 +87,7 @@ void ReadFile_mk(const string &file_mk, vector<int> &indicator_idv, void ReadFile_eigenU(const string &file_u, bool &error, gsl_matrix *U); void ReadFile_eigenD(const string &file_d, bool &error, gsl_vector *eval); -int mdb_calc_kin(const string file_geno, const bool is_loco, const int mode, const vector<int> &indicator_snp); +gsl_matrix *mdb_calc_kin(const string file_geno, const bool is_loco, const int mode, const vector<int> &indicator_snp); bool BimbamKin(const string file_geno, const set<string> ksnps, vector<int> &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin, diff --git a/src/param.cpp b/src/param.cpp index 3b74e53..1adee9e 100644 --- a/src/param.cpp +++ b/src/param.cpp @@ -1293,9 +1293,8 @@ void PARAM::ReadBIMBAMGenotypes(gsl_matrix *UtX, gsl_matrix *K, const bool calc_ return; } -void PARAM::MdbCalcKin() { - error = !mdb_calc_kin(file_geno, is_loco(), a_mode - 20, indicator_snp); - return; +gsl_matrix *PARAM::MdbCalcKin() { + return mdb_calc_kin(file_geno, is_loco(), a_mode - 20, indicator_snp); } void PARAM::CalcKin(gsl_matrix *matrix_kin) { diff --git a/src/param.h b/src/param.h index aa08b5d..7cf4a6c 100644 --- a/src/param.h +++ b/src/param.h @@ -347,7 +347,7 @@ public: void ProcessCvtPhen(); void CopyCvtPhen(gsl_matrix *W, gsl_vector *y, size_t flag); void CopyCvtPhen(gsl_matrix *W, gsl_matrix *Y, size_t flag); - void MdbCalcKin(); + gsl_matrix *MdbCalcKin(); void CalcKin(gsl_matrix *matrix_kin); void CalcS(const map<string, double> &mapRS2wA, const map<string, double> &mapRS2wK, const gsl_matrix *W, |
