From 12dc6b3e92a85f3b50cb35f36058e0c97bd081c7 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 28 Nov 2025 14:42:50 +0100 Subject: working on mdb --- src/param.cpp | 63 +++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 23 deletions(-) (limited to 'src') diff --git a/src/param.cpp b/src/param.cpp index 82121b9..034b25c 100644 --- a/src/param.cpp +++ b/src/param.cpp @@ -40,6 +40,7 @@ #include "mathfunc.h" #include "param.h" #include "fastblas.h" +#include "checkpoint.h" using namespace std; @@ -115,6 +116,7 @@ PARAM::~PARAM() { // Read files: obtain ns_total, ng_total, ns_test, ni_test. void PARAM::ReadFiles(void) { + checkpoint_nofile("PARAM::ReadFiles"); string file_str; // Read cat file. @@ -165,6 +167,10 @@ void PARAM::ReadFiles(void) { setKSnps.clear(); } + if (file_geno.find(".mdb") != string::npos) { + is_mdb = true; + } + // For prediction. if (!file_epm.empty()) { if (ReadFile_est(file_epm, est_column, mapRS2est) == false) { @@ -182,10 +188,8 @@ void PARAM::ReadFiles(void) { } } + if (!file_geno.empty()) { - std::string extension = ".mdb"; - if (file_geno.size() >= extension.size() && file_geno.compare(file_geno.size() - extension.size(), extension.size(), extension) == 0) - is_mdb = true; if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == false) { error = true; } @@ -294,7 +298,7 @@ void PARAM::ReadFiles(void) { } // Read genotype and phenotype file for BIMBAM format. - if (!file_geno.empty()) { + if (!is_mdb && !file_geno.empty()) { // Annotation file before genotype file. if (!file_anno.empty()) { @@ -303,7 +307,7 @@ void PARAM::ReadFiles(void) { } } - // Phenotype file before genotype file. + // Phenotype file before genotype file. Already done this(?!) if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == false) { error = true; } @@ -330,6 +334,11 @@ void PARAM::ReadFiles(void) { ns_total = indicator_snp.size(); } + // Check MDB format + if (is_mdb && !file_geno.empty()) { + ns_total = -1; + } + // Read genotype file for multiple PLINK files. if (!file_mbfile.empty()) { igzstream infile(file_mbfile.c_str(), igzstream::in); @@ -393,7 +402,7 @@ void PARAM::ReadFiles(void) { } // Read genotype and phenotype file for multiple BIMBAM files. Note this goes untested. - if (!file_mgeno.empty()) { + if (!is_mdb && !file_mgeno.empty()) { // Annotation file before genotype file. if (!file_anno.empty()) { @@ -485,7 +494,8 @@ void PARAM::ReadFiles(void) { ni_test += indicator_idv[i]; } - enforce_msg(ni_test,"number of analyzed individuals equals 0."); + if (!is_mdb) + enforce_msg(ni_test,"number of analyzed individuals equals 0."); } // For ridge prediction, read phenotype only. @@ -1083,13 +1093,15 @@ void PARAM::CheckData(void) { } } - enforce_msg(ni_test,"number of analyzed individuals equals 0."); + if (!is_mdb) { + enforce_msg(ni_test,"number of analyzed individuals equals 0."); - if (ni_test == 0 && file_cor.empty() && file_mstudy.empty() && - file_study.empty() && file_beta.empty() && file_bf.empty()) { - error = true; - cout << "error! number of analyzed individuals equals 0. " << endl; - return; + if (ni_test == 0 && file_cor.empty() && file_mstudy.empty() && + file_study.empty() && file_beta.empty() && file_bf.empty()) { + error = true; + cout << "error! number of analyzed individuals equals 0. " << endl; + return; + } } if (a_mode == 43) { @@ -1283,6 +1295,7 @@ void PARAM::ReadBIMBAMGenotypes(gsl_matrix *UtX, gsl_matrix *K, const bool calc_ } void PARAM::CalcKin(gsl_matrix *matrix_kin) { + checkpoint_nofile("PARAM::CalcKin"); string file_str; gsl_matrix_set_zero(matrix_kin); @@ -1296,10 +1309,13 @@ void PARAM::CalcKin(gsl_matrix *matrix_kin) { } } else { file_str = file_geno; - if (BimbamKin(file_str, setKSnps, indicator_snp, a_mode - 20, d_pace, - matrix_kin, ni_max == 0) == false) { - error = true; - } + if (is_mdb) + error = !BimbamKin(file_str, setKSnps, indicator_snp, a_mode - 20, d_pace, + matrix_kin, ni_max == 0) == false); + else + error = !BimbamKin(file_str, setKSnps, indicator_snp, a_mode - 20, d_pace, + matrix_kin, ni_max == 0) == false); + } return; @@ -2056,13 +2072,14 @@ void PARAM::ProcessCvtPhen() { } // Check ni_test. - if (a_mode != M_BSLMM5) - enforce_msg(ni_test,"number of analyzed individuals equals 0."); - if (ni_test == 0 && a_mode != M_BSLMM5) { - error = true; - cout << "error! number of analyzed individuals equals 0. " << endl; + if (!is_mdb) { + if (a_mode != M_BSLMM5) + enforce_msg(ni_test,"number of analyzed individuals equals 0."); + if (ni_test == 0 && a_mode != M_BSLMM5) { + error = true; + cout << "error! number of analyzed individuals equals 0. " << endl; + } } - // Check covariates to see if they are correlated with each // other, and to see if the intercept term is included. // After getting ni_test. -- cgit 1.4.1