diff options
Diffstat (limited to 'src/param.cpp')
-rw-r--r-- | src/param.cpp | 4138 |
1 files changed, 2113 insertions, 2025 deletions
diff --git a/src/param.cpp b/src/param.cpp index 413d517..2572bbb 100644 --- a/src/param.cpp +++ b/src/param.cpp @@ -16,1322 +16,1357 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <iostream> +#include <algorithm> +#include <cmath> +#include <cstring> #include <fstream> +#include <iostream> #include <string> -#include <cstring> #include <sys/stat.h> -#include <cmath> -#include <algorithm> -#include "gsl/gsl_randist.h" +#include "gsl/gsl_blas.h" +#include "gsl/gsl_linalg.h" #include "gsl/gsl_matrix.h" -#include "gsl/gsl_vector.h" #include "gsl/gsl_matrix.h" -#include "gsl/gsl_linalg.h" -#include "gsl/gsl_blas.h" +#include "gsl/gsl_randist.h" +#include "gsl/gsl_vector.h" #include "eigenlib.h" +#include "io.h" #include "mathfunc.h" #include "param.h" -#include "io.h" using namespace std; -PARAM::PARAM(void): -mode_silence (false), a_mode (0), k_mode(1), d_pace (100000), -file_out("result"), path_out("./output/"), -miss_level(0.05), maf_level(0.01), hwe_level(0), r2_level(0.9999), -l_min(1e-5), l_max(1e5), n_region(10),p_nr(0.001),em_prec(0.0001), -nr_prec(0.0001),em_iter(10000),nr_iter(100),crt(0), -pheno_mean(0), noconstrain (false), -h_min(-1), h_max(-1), h_scale(-1), -rho_min(0.0), rho_max(1.0), rho_scale(-1), -logp_min(0.0), logp_max(0.0), logp_scale(-1), -h_ngrid(10), rho_ngrid(10), -s_min(0), s_max(300), -w_step(100000), s_step(1000000), -r_pace(10), w_pace(1000), -n_accept(0), -n_mh(10), -geo_mean(2000.0), -randseed(-1), -window_cm(0), window_bp(0), window_ns(0), n_block(200), -error(false), -ni_subsample(0), n_cvt(1), n_vc(1), n_cat(0), -time_total(0.0), time_G(0.0), time_eigen(0.0), time_UtX(0.0), -time_UtZ(0.0), time_opt(0.0), time_Omega(0.0) -{} +PARAM::PARAM(void) + : mode_silence(false), a_mode(0), k_mode(1), d_pace(100000), + file_out("result"), path_out("./output/"), miss_level(0.05), + maf_level(0.01), hwe_level(0), r2_level(0.9999), l_min(1e-5), l_max(1e5), + n_region(10), p_nr(0.001), em_prec(0.0001), nr_prec(0.0001), + em_iter(10000), nr_iter(100), crt(0), pheno_mean(0), noconstrain(false), + h_min(-1), h_max(-1), h_scale(-1), rho_min(0.0), rho_max(1.0), + rho_scale(-1), logp_min(0.0), logp_max(0.0), logp_scale(-1), h_ngrid(10), + rho_ngrid(10), s_min(0), s_max(300), w_step(100000), s_step(1000000), + r_pace(10), w_pace(1000), n_accept(0), n_mh(10), geo_mean(2000.0), + randseed(-1), window_cm(0), window_bp(0), window_ns(0), n_block(200), + error(false), ni_subsample(0), n_cvt(1), n_vc(1), n_cat(0), + time_total(0.0), time_G(0.0), time_eigen(0.0), time_UtX(0.0), + time_UtZ(0.0), time_opt(0.0), time_Omega(0.0) {} // Read files: obtain ns_total, ng_total, ns_test, ni_test. -void PARAM::ReadFiles (void) { - string file_str; - - // Read cat file. - if (!file_mcat.empty()) { - if (ReadFile_mcat (file_mcat, mapRS2cat, n_vc)==false) {error=true;} - } else if (!file_cat.empty()) { - if (ReadFile_cat (file_cat, mapRS2cat, n_vc)==false) {error=true;} - } - - // Read snp weight files. - if (!file_wcat.empty()) { - if (ReadFile_wsnp (file_wcat, n_vc, mapRS2wcat)==false) {error=true;} - } - if (!file_wsnp.empty()) { - if (ReadFile_wsnp (file_wsnp, mapRS2wsnp)==false) {error=true;} - } - - // Count number of kinship files. - if (!file_mk.empty()) { - if (CountFileLines (file_mk, n_vc)==false) {error=true;} - } - - // Read SNP set. - if (!file_snps.empty()) { - if (ReadFile_snps (file_snps, setSnps)==false) {error=true;} - } else { - setSnps.clear(); - } - - // For prediction. - if (!file_epm.empty()) { - if (ReadFile_est (file_epm, est_column, mapRS2est)==false) { - error=true; - } - if (!file_bfile.empty()) { - file_str=file_bfile+".bim"; - if (ReadFile_bim (file_str, snpInfo)==false) { - error=true; - } - file_str=file_bfile+".fam"; - if (ReadFile_fam (file_str, indicator_pheno, pheno, - mapID2num, p_column)==false) { - error=true; - } - } - - if (!file_geno.empty()) { - if (ReadFile_pheno (file_pheno, indicator_pheno, - pheno, p_column)==false) { - error=true; - } - - if (CountFileLines (file_geno, ns_total)==false) { - error=true; - } - } - - if (!file_ebv.empty() ) { - if (ReadFile_column (file_ebv, indicator_bv, - vec_bv, 1)==false) { - error=true; - } - } - - if (!file_log.empty() ) { - if (ReadFile_log (file_log, pheno_mean)==false) { - error=true; - } - } - - // Convert indicator_pheno to indicator_idv. - int k=1; - for (size_t i=0; i<indicator_pheno.size(); i++) { - k=1; - for (size_t j=0; j<indicator_pheno[i].size(); j++) { - if (indicator_pheno[i][j]==0) {k=0;} - } - indicator_idv.push_back(k); - } - - ns_test=0; - - return; - } - - // Read covariates before the genotype files. - if (!file_cvt.empty() ) { - if (ReadFile_cvt (file_cvt, indicator_cvt, - cvt, n_cvt)==false) { - error=true; - } - if ((indicator_cvt).size()==0) { - n_cvt=1; - } - } else { - n_cvt=1; - } - - if (!file_gxe.empty() ) { - if (ReadFile_column (file_gxe, indicator_gxe, gxe, 1)==false) { - error=true; - } - } - if (!file_weight.empty() ) { - if (ReadFile_column (file_weight, indicator_weight, - weight, 1)==false) { - error=true; - } - } - - // WJA added. - // Read genotype and phenotype file for bgen format. - if (!file_oxford.empty()) { - file_str=file_oxford+".sample"; - if (ReadFile_sample(file_str, indicator_pheno, pheno, p_column, - indicator_cvt, cvt, n_cvt)==false) { - error=true; - } - if ((indicator_cvt).size()==0) { - n_cvt=1; - } - - // Post-process covariates and phenotypes, obtain - // ni_test, save all useful covariates. - ProcessCvtPhen(); - - // Obtain covariate matrix. - gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt); - CopyCvt (W); - - file_str=file_oxford+".bgen"; - if (ReadFile_bgen (file_str, setSnps, W, indicator_idv, - indicator_snp, snpInfo, maf_level, - miss_level, hwe_level, r2_level, - ns_test)==false) { - error=true; - } - gsl_matrix_free(W); - - ns_total=indicator_snp.size(); - } - - // Read genotype and phenotype file for PLINK format. - if (!file_bfile.empty()) { - file_str=file_bfile+".bim"; - snpInfo.clear(); - if (ReadFile_bim (file_str, snpInfo)==false) {error=true;} - - // If both fam file and pheno files are used, use - // phenotypes inside the pheno file. - if (!file_pheno.empty()) { - - // Phenotype file before genotype file. - if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, - p_column)==false) {error=true;} - } else { - file_str=file_bfile+".fam"; - if (ReadFile_fam (file_str, indicator_pheno, pheno, - mapID2num, p_column)==false) {error=true;} - } - - // Post-process covariates and phenotypes, obtain - // ni_test, save all useful covariates. - ProcessCvtPhen(); - - // Obtain covariate matrix. - gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt); - CopyCvt (W); - - file_str=file_bfile+".bed"; - if (ReadFile_bed (file_str, setSnps, W, indicator_idv, - indicator_snp, snpInfo, maf_level, - miss_level, hwe_level, r2_level, - ns_test) == false) { - error=true; - } - gsl_matrix_free(W); - ns_total=indicator_snp.size(); - } - - // Read genotype and phenotype file for BIMBAM format. - if (!file_geno.empty()) { - - // Annotation file before genotype file. - if (!file_anno.empty() ) { - if (ReadFile_anno (file_anno, mapRS2chr, mapRS2bp, - mapRS2cM)==false) { - error=true; - } - } - - // Phenotype file before genotype file. - if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, - p_column) == false) { - error=true; - } - - // Post-process covariates and phenotypes, obtain - // ni_test, save all useful covariates. - ProcessCvtPhen(); - - // Obtain covariate matrix. - gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt); - CopyCvt (W); - - if (ReadFile_geno (file_geno, setSnps, W, indicator_idv, - indicator_snp, maf_level, miss_level, - hwe_level, r2_level, mapRS2chr, mapRS2bp, - mapRS2cM, snpInfo, ns_test)==false) { - error=true; - } - gsl_matrix_free(W); - ns_total=indicator_snp.size(); - } - - // Read genotype file for multiple PLINK files. - if (!file_mbfile.empty()) { - igzstream infile (file_mbfile.c_str(), igzstream::in); - if (!infile) { - cout<<"error! fail to open mbfile file: " << file_mbfile<<endl; - return; - } - - string file_name; - size_t t=0, ns_test_tmp=0; - gsl_matrix *W; - while (!safeGetline(infile, file_name).eof()) { - file_str=file_name+".bim"; - - if (ReadFile_bim (file_str, snpInfo)==false) {error=true;} - - if (t==0) { - - // If both fam file and pheno files are used, use - // phenotypes inside the pheno file. - if (!file_pheno.empty()) { - - // Phenotype file before genotype file. - if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, - p_column)==false) { - error=true; - } - } else { - file_str=file_name+".fam"; - if (ReadFile_fam (file_str, indicator_pheno, pheno, - mapID2num, p_column)==false) { - error=true; - } - } - - // Post-process covariates and phenotypes, obtain - // ni_test, save all useful covariates. - ProcessCvtPhen(); - - // Obtain covariate matrix. - W=gsl_matrix_alloc (ni_test, n_cvt); - CopyCvt (W); - } - - file_str=file_name+".bed"; - if (ReadFile_bed (file_str, setSnps, W, indicator_idv, - indicator_snp, snpInfo, maf_level, - miss_level, hwe_level, r2_level, - ns_test_tmp)==false) { - error=true; - } - mindicator_snp.push_back(indicator_snp); - msnpInfo.push_back(snpInfo); - ns_test+=ns_test_tmp; - ns_total+=indicator_snp.size(); - - t++; - } - - gsl_matrix_free(W); - - infile.close(); - infile.clear(); - } - - // Read genotype and phenotype file for multiple BIMBAM files. - if (!file_mgeno.empty()) { - - // Annotation file before genotype file. - if (!file_anno.empty() ) { - if (ReadFile_anno (file_anno, mapRS2chr, mapRS2bp, - mapRS2cM)==false) { - error=true; - } - } - - // Phenotype file before genotype file. - if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, - p_column)==false) { - error=true; - } - - // Post-process covariates and phenotypes, obtain ni_test, - // save all useful covariates. - ProcessCvtPhen(); - - // Obtain covariate matrix. - gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt); - CopyCvt (W); - - igzstream infile (file_mgeno.c_str(), igzstream::in); - if (!infile) { - cout<<"error! fail to open mgeno file: "<<file_mgeno<<endl; - return; - } - - string file_name; - size_t ns_test_tmp; - while (!safeGetline(infile, file_name).eof()) { - if (ReadFile_geno (file_name, setSnps, W, indicator_idv, - indicator_snp, maf_level, miss_level, - hwe_level, r2_level, mapRS2chr, mapRS2bp, - mapRS2cM, snpInfo, ns_test_tmp)==false) { - error=true; - } - - mindicator_snp.push_back(indicator_snp); - msnpInfo.push_back(snpInfo); - ns_test+=ns_test_tmp; - ns_total+=indicator_snp.size(); - } - - gsl_matrix_free(W); - - infile.close(); - infile.clear(); - } - - if (!file_gene.empty()) { - if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, - p_column)==false) {error=true;} - - // Convert indicator_pheno to indicator_idv. - int k=1; - for (size_t i=0; i<indicator_pheno.size(); i++) { - k=1; - for (size_t j=0; j<indicator_pheno[i].size(); j++) { - if (indicator_pheno[i][j]==0) {k=0;} - } - indicator_idv.push_back(k); - } - - // Post-process covariates and phenotypes, obtain - // ni_test, save all useful covariates. - ProcessCvtPhen(); - - // Obtain covariate matrix. - gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt); - CopyCvt (W); - - if (ReadFile_gene (file_gene, vec_read, snpInfo, - ng_total)==false) { - error=true; - } - } - - // Read is after gene file. - if (!file_read.empty() ) { - if (ReadFile_column (file_read, indicator_read, - vec_read, 1)==false) { - error=true; - } - - ni_test=0; - for (vector<int>::size_type i=0; - i<(indicator_idv).size(); - ++i) { - indicator_idv[i]*=indicator_read[i]; - ni_test+=indicator_idv[i]; - } - - if (ni_test==0) { - error=true; - cout<<"error! number of analyzed individuals equals 0. "<< - endl; - return; - } - } - - // For ridge prediction, read phenotype only. - if (file_geno.empty() && file_gene.empty() && !file_pheno.empty()) { - if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, - p_column)==false) { - error=true; - } - - // Post-process covariates and phenotypes, obtain - // ni_test, save all useful covariates. - ProcessCvtPhen(); - } - return; +void PARAM::ReadFiles(void) { + string file_str; + + // Read cat file. + if (!file_mcat.empty()) { + if (ReadFile_mcat(file_mcat, mapRS2cat, n_vc) == false) { + error = true; + } + } else if (!file_cat.empty()) { + if (ReadFile_cat(file_cat, mapRS2cat, n_vc) == false) { + error = true; + } + } + + // Read snp weight files. + if (!file_wcat.empty()) { + if (ReadFile_wsnp(file_wcat, n_vc, mapRS2wcat) == false) { + error = true; + } + } + if (!file_wsnp.empty()) { + if (ReadFile_wsnp(file_wsnp, mapRS2wsnp) == false) { + error = true; + } + } + + // Count number of kinship files. + if (!file_mk.empty()) { + if (CountFileLines(file_mk, n_vc) == false) { + error = true; + } + } + + // Read SNP set. + if (!file_snps.empty()) { + if (ReadFile_snps(file_snps, setSnps) == false) { + error = true; + } + } else { + setSnps.clear(); + } + + // For prediction. + if (!file_epm.empty()) { + if (ReadFile_est(file_epm, est_column, mapRS2est) == false) { + error = true; + } + if (!file_bfile.empty()) { + file_str = file_bfile + ".bim"; + if (ReadFile_bim(file_str, snpInfo) == false) { + error = true; + } + file_str = file_bfile + ".fam"; + if (ReadFile_fam(file_str, indicator_pheno, pheno, mapID2num, p_column) == + false) { + error = true; + } + } + + if (!file_geno.empty()) { + if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == + false) { + error = true; + } + + if (CountFileLines(file_geno, ns_total) == false) { + error = true; + } + } + + if (!file_ebv.empty()) { + if (ReadFile_column(file_ebv, indicator_bv, vec_bv, 1) == false) { + error = true; + } + } + + if (!file_log.empty()) { + if (ReadFile_log(file_log, pheno_mean) == false) { + error = true; + } + } + + // Convert indicator_pheno to indicator_idv. + int k = 1; + for (size_t i = 0; i < indicator_pheno.size(); i++) { + k = 1; + for (size_t j = 0; j < indicator_pheno[i].size(); j++) { + if (indicator_pheno[i][j] == 0) { + k = 0; + } + } + indicator_idv.push_back(k); + } + + ns_test = 0; + + return; + } + + // Read covariates before the genotype files. + if (!file_cvt.empty()) { + if (ReadFile_cvt(file_cvt, indicator_cvt, cvt, n_cvt) == false) { + error = true; + } + if ((indicator_cvt).size() == 0) { + n_cvt = 1; + } + } else { + n_cvt = 1; + } + + if (!file_gxe.empty()) { + if (ReadFile_column(file_gxe, indicator_gxe, gxe, 1) == false) { + error = true; + } + } + if (!file_weight.empty()) { + if (ReadFile_column(file_weight, indicator_weight, weight, 1) == false) { + error = true; + } + } + + // WJA added. + // Read genotype and phenotype file for bgen format. + if (!file_oxford.empty()) { + file_str = file_oxford + ".sample"; + if (ReadFile_sample(file_str, indicator_pheno, pheno, p_column, + indicator_cvt, cvt, n_cvt) == false) { + error = true; + } + if ((indicator_cvt).size() == 0) { + n_cvt = 1; + } + + // Post-process covariates and phenotypes, obtain + // ni_test, save all useful covariates. + ProcessCvtPhen(); + + // Obtain covariate matrix. + gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt); + CopyCvt(W); + + file_str = file_oxford + ".bgen"; + if (ReadFile_bgen(file_str, setSnps, W, indicator_idv, indicator_snp, + snpInfo, maf_level, miss_level, hwe_level, r2_level, + ns_test) == false) { + error = true; + } + gsl_matrix_free(W); + + ns_total = indicator_snp.size(); + } + + // Read genotype and phenotype file for PLINK format. + if (!file_bfile.empty()) { + file_str = file_bfile + ".bim"; + snpInfo.clear(); + if (ReadFile_bim(file_str, snpInfo) == false) { + error = true; + } + + // If both fam file and pheno files are used, use + // phenotypes inside the pheno file. + if (!file_pheno.empty()) { + + // Phenotype file before genotype file. + if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == + false) { + error = true; + } + } else { + file_str = file_bfile + ".fam"; + if (ReadFile_fam(file_str, indicator_pheno, pheno, mapID2num, p_column) == + false) { + error = true; + } + } + + // Post-process covariates and phenotypes, obtain + // ni_test, save all useful covariates. + ProcessCvtPhen(); + + // Obtain covariate matrix. + gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt); + CopyCvt(W); + + file_str = file_bfile + ".bed"; + if (ReadFile_bed(file_str, setSnps, W, indicator_idv, indicator_snp, + snpInfo, maf_level, miss_level, hwe_level, r2_level, + ns_test) == false) { + error = true; + } + gsl_matrix_free(W); + ns_total = indicator_snp.size(); + } + + // Read genotype and phenotype file for BIMBAM format. + if (!file_geno.empty()) { + + // Annotation file before genotype file. + if (!file_anno.empty()) { + if (ReadFile_anno(file_anno, mapRS2chr, mapRS2bp, mapRS2cM) == false) { + error = true; + } + } + + // Phenotype file before genotype file. + if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == false) { + error = true; + } + + // Post-process covariates and phenotypes, obtain + // ni_test, save all useful covariates. + ProcessCvtPhen(); + + // Obtain covariate matrix. + gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt); + CopyCvt(W); + + if (ReadFile_geno(file_geno, setSnps, W, indicator_idv, indicator_snp, + maf_level, miss_level, hwe_level, r2_level, mapRS2chr, + mapRS2bp, mapRS2cM, snpInfo, ns_test) == false) { + error = true; + } + gsl_matrix_free(W); + ns_total = indicator_snp.size(); + } + + // Read genotype file for multiple PLINK files. + if (!file_mbfile.empty()) { + igzstream infile(file_mbfile.c_str(), igzstream::in); + if (!infile) { + cout << "error! fail to open mbfile file: " << file_mbfile << endl; + return; + } + + string file_name; + size_t t = 0, ns_test_tmp = 0; + gsl_matrix *W; + while (!safeGetline(infile, file_name).eof()) { + file_str = file_name + ".bim"; + + if (ReadFile_bim(file_str, snpInfo) == false) { + error = true; + } + + if (t == 0) { + + // If both fam file and pheno files are used, use + // phenotypes inside the pheno file. + if (!file_pheno.empty()) { + + // Phenotype file before genotype file. + if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == + false) { + error = true; + } + } else { + file_str = file_name + ".fam"; + if (ReadFile_fam(file_str, indicator_pheno, pheno, mapID2num, + p_column) == false) { + error = true; + } + } + + // Post-process covariates and phenotypes, obtain + // ni_test, save all useful covariates. + ProcessCvtPhen(); + + // Obtain covariate matrix. + W = gsl_matrix_alloc(ni_test, n_cvt); + CopyCvt(W); + } + + file_str = file_name + ".bed"; + if (ReadFile_bed(file_str, setSnps, W, indicator_idv, indicator_snp, + snpInfo, maf_level, miss_level, hwe_level, r2_level, + ns_test_tmp) == false) { + error = true; + } + mindicator_snp.push_back(indicator_snp); + msnpInfo.push_back(snpInfo); + ns_test += ns_test_tmp; + ns_total += indicator_snp.size(); + + t++; + } + + gsl_matrix_free(W); + + infile.close(); + infile.clear(); + } + + // Read genotype and phenotype file for multiple BIMBAM files. + if (!file_mgeno.empty()) { + + // Annotation file before genotype file. + if (!file_anno.empty()) { + if (ReadFile_anno(file_anno, mapRS2chr, mapRS2bp, mapRS2cM) == false) { + error = true; + } + } + + // Phenotype file before genotype file. + if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == false) { + error = true; + } + + // Post-process covariates and phenotypes, obtain ni_test, + // save all useful covariates. + ProcessCvtPhen(); + + // Obtain covariate matrix. + gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt); + CopyCvt(W); + + igzstream infile(file_mgeno.c_str(), igzstream::in); + if (!infile) { + cout << "error! fail to open mgeno file: " << file_mgeno << endl; + return; + } + + string file_name; + size_t ns_test_tmp; + while (!safeGetline(infile, file_name).eof()) { + if (ReadFile_geno(file_name, setSnps, W, indicator_idv, indicator_snp, + maf_level, miss_level, hwe_level, r2_level, mapRS2chr, + mapRS2bp, mapRS2cM, snpInfo, ns_test_tmp) == false) { + error = true; + } + + mindicator_snp.push_back(indicator_snp); + msnpInfo.push_back(snpInfo); + ns_test += ns_test_tmp; + ns_total += indicator_snp.size(); + } + + gsl_matrix_free(W); + + infile.close(); + infile.clear(); + } + + if (!file_gene.empty()) { + if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == false) { + error = true; + } + + // Convert indicator_pheno to indicator_idv. + int k = 1; + for (size_t i = 0; i < indicator_pheno.size(); i++) { + k = 1; + for (size_t j = 0; j < indicator_pheno[i].size(); j++) { + if (indicator_pheno[i][j] == 0) { + k = 0; + } + } + indicator_idv.push_back(k); + } + + // Post-process covariates and phenotypes, obtain + // ni_test, save all useful covariates. + ProcessCvtPhen(); + + // Obtain covariate matrix. + gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt); + CopyCvt(W); + + if (ReadFile_gene(file_gene, vec_read, snpInfo, ng_total) == false) { + error = true; + } + } + + // Read is after gene file. + if (!file_read.empty()) { + if (ReadFile_column(file_read, indicator_read, vec_read, 1) == false) { + error = true; + } + + ni_test = 0; + for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) { + indicator_idv[i] *= indicator_read[i]; + ni_test += indicator_idv[i]; + } + + if (ni_test == 0) { + error = true; + cout << "error! number of analyzed individuals equals 0. " << endl; + return; + } + } + + // For ridge prediction, read phenotype only. + if (file_geno.empty() && file_gene.empty() && !file_pheno.empty()) { + if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == false) { + error = true; + } + + // Post-process covariates and phenotypes, obtain + // ni_test, save all useful covariates. + ProcessCvtPhen(); + } + return; } -void PARAM::CheckParam (void) { - struct stat fileInfo; - string str; - - // Check parameters. - if (k_mode!=1 && k_mode!=2) { - cout<<"error! unknown kinship/relatedness input mode: "<< - k_mode<<endl; - error=true; - } - if (a_mode!=1 && a_mode!=2 && a_mode!=3 && a_mode!=4 && a_mode!=5 - && a_mode!=11 && a_mode!=12 && a_mode!=13 && a_mode!=14 && - a_mode!=15 && a_mode!=21 && a_mode!=22 && a_mode!=25 && - a_mode!=26 && a_mode!=27 && a_mode!=28 && a_mode!=31 && - a_mode!=41 && a_mode!=42 && a_mode!=43 && a_mode!=51 && - a_mode!=52 && a_mode!=53 && a_mode!=54 && a_mode!=61 && - a_mode!=62 && a_mode!=63 && a_mode!=66 && a_mode!=67 && - a_mode!=71) { - cout<<"error! unknown analysis mode: "<<a_mode<< - ". make sure -gk or -eigen or -lmm or -bslmm -predict or " << - "-calccov is sepcified correctly."<<endl; - error=true; - } - if (miss_level>1) { - cout<<"error! missing level needs to be between 0 and 1. " << - "current value = "<<miss_level<<endl; - error=true; - } - if (maf_level>0.5) { - cout<<"error! maf level needs to be between 0 and 0.5. " << - "current value = "<<maf_level<<endl; - error=true; - } - if (hwe_level>1) { - cout<<"error! hwe level needs to be between 0 and 1. " << - "current value = "<<hwe_level<<endl; - error=true; - } - if (r2_level>1) { - cout<<"error! r2 level needs to be between 0 and 1. " << - "current value = "<<r2_level<<endl; - error=true; - } - - if (l_max<l_min) { - cout<<"error! maximum lambda value must be larger than the " << - "minimal value. current values = "<<l_max<<" and "<<l_min<<endl; - error=true; - } - if (h_max<h_min) { - cout<<"error! maximum h value must be larger than the minimal "<< - "value. current values = "<<h_max<<" and "<<h_min<<endl; - error=true; - } - if (s_max<s_min) { - cout<<"error! maximum s value must be larger than the minimal "<< - "value. current values = "<<s_max<<" and "<<s_min<<endl; - error=true; - } - if (rho_max<rho_min) { - cout<<"error! maximum rho value must be larger than the"<< - "minimal value. current values = "<<rho_max<<" and "<< - rho_min<<endl; - error=true; - } - if (logp_max<logp_min) { - cout<<"error! maximum logp value must be larger than the "<< - "minimal value. current values = "<<logp_max/log(10)<< - " and "<<logp_min/log(10)<<endl; - error=true; - } - - if (h_max>1) { - cout<<"error! h values must be bewtween 0 and 1. current "<< - "values = "<<h_max<<" and "<<h_min<<endl; - error=true; - } - if (rho_max>1) { - cout<<"error! rho values must be between 0 and 1. current "<< - "values = "<<rho_max<<" and "<<rho_min<<endl; - error=true; - } - if (logp_max>0) { - cout<<"error! maximum logp value must be smaller than 0. "<< - "current values = "<<logp_max/log(10)<<" and "<< - logp_min/log(10)<<endl; - error=true; - } - if (l_max<l_min) { - cout<<"error! maximum lambda value must be larger than the "<< - "minimal value. current values = "<<l_max<<" and "<<l_min<<endl; - error=true; - } - - if (h_scale>1.0) { - cout<<"error! hscale value must be between 0 and 1. "<< - "current value = "<<h_scale<<endl; - error=true; - } - if (rho_scale>1.0) { - cout<<"error! rscale value must be between 0 and 1. "<< - "current value = "<<rho_scale<<endl; - error=true; - } - if (logp_scale>1.0) { - cout<<"error! pscale value must be between 0 and 1. "<< - "current value = "<<logp_scale<<endl; - error=true; - } - - if (rho_max==1 && rho_min==1 && a_mode==12) { - cout<<"error! ridge regression does not support a rho "<< - "parameter. current values = "<<rho_max<<" and "<<rho_min<<endl; - error=true; - } - - if (window_cm<0) { - cout<<"error! windowcm values must be non-negative. "<< - "current values = "<<window_cm<<endl; - error=true; - } - - if (window_cm==0 && window_bp==0 && window_ns==0) { - window_bp=1000000; - } - - // Check p_column, and (no need to) sort p_column into - // ascending order. - if (p_column.size()==0) { - p_column.push_back(1); - } else { - for (size_t i=0; i<p_column.size(); i++) { - for (size_t j=0; j<i; j++) { - if (p_column[i]==p_column[j]) { - cout<<"error! identical phenotype "<< - "columns: "<<p_column[i]<<endl; - error= - true;} - } - } - } - - n_ph=p_column.size(); - - // Only LMM option (and one prediction option) can deal with - // multiple phenotypes and no gene expression files. - if (n_ph>1 && a_mode!=1 && a_mode!=2 && a_mode!=3 && a_mode!=4 && - a_mode!=43) { - cout<<"error! the current analysis mode "<<a_mode<< - " can not deal with multiple phenotypes."<<endl; - error=true; - } - if (n_ph>1 && !file_gene.empty() ) { - cout<<"error! multiple phenotype analysis option not "<< - "allowed with gene expression files. "<<endl; - error=true; - } - - if (p_nr>1) { - cout<<"error! pnr value must be between 0 and 1. current value = "<< - p_nr<<endl; - error=true; - } - - //check est_column - if (est_column.size()==0) { - if (file_ebv.empty()) { - est_column.push_back(2); - est_column.push_back(5); - est_column.push_back(6); - est_column.push_back(7); - } else { - est_column.push_back(2); - est_column.push_back(0); - est_column.push_back(6); - est_column.push_back(7); - } - } - - if (est_column.size()!=4) { - cout<<"error! -en not followed by four numbers. current number = "<< - est_column.size()<<endl; - error=true; - } - if (est_column[0]==0) { - cout<<"error! -en rs column can not be zero. current number = "<< - est_column.size()<<endl; - error=true; - } - - // Check if files are compatible with each other, and if files exist. - if (!file_bfile.empty()) { - str=file_bfile+".bim"; - if (stat(str.c_str(),&fileInfo)==-1) { - cout<<"error! fail to open .bim file: "<<str<<endl; - error=true; - } - str=file_bfile+".bed"; - if (stat(str.c_str(),&fileInfo)==-1) { - cout<<"error! fail to open .bed file: "<<str<<endl; - error=true; - } - str=file_bfile+".fam"; - if (stat(str.c_str(),&fileInfo)==-1) { - cout<<"error! fail to open .fam file: "<<str<<endl; - error=true; - } - } - - if (!file_oxford.empty()) { - str=file_oxford+".bgen"; - if (stat(str.c_str(),&fileInfo)==-1) { - cout<<"error! fail to open .bgen file: "<<str<<endl; - error=true; - } - str=file_oxford+".sample"; - if (stat(str.c_str(),&fileInfo)==-1) { - cout<<"error! fail to open .sample file: "<<str<<endl; - error=true; - } - } - - if ((!file_geno.empty() || !file_gene.empty()) ) { - str=file_pheno; - if (stat(str.c_str(),&fileInfo)==-1) { - cout<<"error! fail to open phenotype file: "<<str<<endl; - error=true; - } - } - - str=file_geno; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open mean genotype file: "<<str<<endl; - error=true; - } - - str=file_gene; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open gene expression file: "<<str<<endl; - error=true; - } - - str=file_cat; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open category file: "<<str<<endl; - error=true; - } - - str=file_mcat; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open mcategory file: "<<str<<endl; - error=true; - } - - str=file_beta; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open beta file: "<<str<<endl; - error=true; - } - - str=file_cor; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open correlation file: "<<str<<endl; - error=true; - } - - if (!file_study.empty()) { - str=file_study+".Vq.txt"; - if (stat(str.c_str(),&fileInfo)==-1) { - cout<<"error! fail to open .Vq.txt file: "<<str<<endl; - error=true; - } - str=file_study+".q.txt"; - if (stat(str.c_str(),&fileInfo)==-1) { - cout<<"error! fail to open .q.txt file: "<<str<<endl; - error=true; - } - str=file_study+".size.txt"; - if (stat(str.c_str(),&fileInfo)==-1) { - cout<<"error! fail to open .size.txt file: "<<str<<endl; - error=true; - } - } - - if (!file_ref.empty()) { - str=file_ref+".S.txt"; - if (stat(str.c_str(),&fileInfo)==-1) { - cout<<"error! fail to open .S.txt file: "<<str<<endl; - error=true; - } - str=file_ref+".size.txt"; - if (stat(str.c_str(),&fileInfo)==-1) { - cout<<"error! fail to open .size.txt file: "<<str<<endl; - error=true; - } - } - - str=file_mstudy; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open mstudy file: "<<str<<endl; - error=true; - } - - str=file_mref; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open mref file: "<<str<<endl; - error=true; - } - - str=file_mgeno; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open mgeno file: "<<str<<endl; - error=true; - } - - str=file_mbfile; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open mbfile file: "<<str<<endl; - error=true; - } - - size_t flag=0; - if (!file_bfile.empty()) {flag++;} - if (!file_geno.empty()) {flag++;} - if (!file_gene.empty()) {flag++;} - - // WJA added. - if (!file_oxford.empty()) {flag++;} - - if (flag!=1 && a_mode!=15 && a_mode!=27 && a_mode!=28 && - a_mode!=43 && a_mode!=5 && a_mode!=61 && a_mode!=62 && - a_mode!=63 && a_mode!=66 && a_mode!=67) { - cout<<"error! either plink binary files, or bimbam mean"<< - "genotype files, or gene expression files are required."<<endl; - error=true; - } - - if (file_pheno.empty() && (a_mode==43 || a_mode==5) ) { - cout<<"error! phenotype file is required."<<endl; error=true; - } - - if (a_mode==61 || a_mode==62) { - if (!file_beta.empty()) { - if ( file_mbfile.empty() && file_bfile.empty() && - file_mgeno.empty() && file_geno.empty() && - file_mref.empty() && file_ref.empty() ) { - cout<<"error! missing genotype file or ref/mref file."<<endl; - error=true; - } - } else if (!file_pheno.empty()) { - if (file_kin.empty() && (file_ku.empty()||file_kd.empty()) && - file_mk.empty() ) { - cout<<"error! missing relatedness file. "<<endl; error=true; - } - } else if ( (file_mstudy.empty() && file_study.empty()) || - (file_mref.empty() && file_ref.empty() ) ) { - cout<<"error! either beta file, or phenotype files or "<< - "study/ref mstudy/mref files are required."<<endl; - error=true; - } - } - - - if (a_mode==63) { - if (file_kin.empty() && (file_ku.empty()||file_kd.empty()) && - file_mk.empty() ) { - cout<<"error! missing relatedness file. "<<endl; error=true; - } - if ( file_pheno.empty() ) { - cout<<"error! missing phenotype file."<<endl; error=true; - } - } - - if (a_mode==66 || a_mode==67) { - if (file_beta.empty() || - (file_mbfile.empty() && file_bfile.empty() && - file_mgeno.empty() && file_geno.empty()) ) { - cout<<"error! missing beta file or genotype file."<<endl; - error=true; - } - } - - - if (!file_epm.empty() && file_bfile.empty() && file_geno.empty()) { - cout<<"error! estimated parameter file also requires genotype "<< - "file."<<endl; - error=true; - } - if (!file_ebv.empty() && file_kin.empty()) { - cout<<"error! estimated breeding value file also requires "<< - "relatedness file."<<endl; - error=true; - } - - if (!file_log.empty() && pheno_mean!=0) { - cout<<"error! either log file or mu value can be provide."<<endl; - error=true; - } - - str=file_snps; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open snps file: "<<str<<endl; - error=true; - } - - str=file_log; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open log file: "<<str<<endl; - error=true; - } - - str=file_anno; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open annotation file: "<<str<<endl; - error=true; - } - - str=file_kin; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open relatedness matrix file: "<<str<<endl; - error=true; - } - - str=file_mk; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open relatedness matrix file: "<<str<<endl; - error=true; - } - - str=file_cvt; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open covariates file: "<<str<<endl; - error=true; - } - - str=file_gxe; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open environmental covariate file: "<< - str<<endl; - error=true; - } - - str=file_weight; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open the residual weight file: "<<str<<endl; - error=true; - } - - str=file_epm; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open estimated parameter file: "<<str<<endl; - error=true; - } - - str=file_ebv; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open estimated breeding value file: "<< - str<<endl; - error=true; - } - - str=file_read; - if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) { - cout<<"error! fail to open total read file: "<<str<<endl; - error=true; - } - - // Check if files are compatible with analysis mode. - if (k_mode==2 && !file_geno.empty() ) { - cout<<"error! use \"-km 1\" when using bimbam mean genotype "<< - "file. "<<endl; - error=true; - } - - if ((a_mode==1 || a_mode==2 || a_mode==3 || a_mode==4 || - a_mode==5 || a_mode==31) && - (file_kin.empty() && (file_ku.empty()||file_kd.empty()))) { - cout<<"error! missing relatedness file. "<<endl; - error=true; - } - - if ((a_mode==43) && file_kin.empty()) { - cout<<"error! missing relatedness file. -predict option requires "<< - "-k option to provide a relatedness file."<<endl; - error=true; - } - - if ((a_mode==11 || a_mode==12 || a_mode==13 || a_mode==14 || - a_mode==16) && !file_cvt.empty()) { - cout<<"error! -bslmm option does not support covariates files."<< - endl; - error=true; - } - - if (a_mode==41 || a_mode==42) { - if (!file_cvt.empty() ) { - cout<<"error! -predict option does not support "<< - "covariates files."<<endl; - error=true; - } - if (file_epm.empty() ) { - cout<<"error! -predict option requires estimated "<< - "parameter files."<<endl; - error=true; - } - } - - if (file_beta.empty() && (a_mode==27 || a_mode==28) ) { - cout<<"error! beta effects file is required."<<endl; - error=true; - } - - return; +void PARAM::CheckParam(void) { + struct stat fileInfo; + string str; + + // Check parameters. + if (k_mode != 1 && k_mode != 2) { + cout << "error! unknown kinship/relatedness input mode: " << k_mode << endl; + error = true; + } + if (a_mode != 1 && a_mode != 2 && a_mode != 3 && a_mode != 4 && a_mode != 5 && + a_mode != 11 && a_mode != 12 && a_mode != 13 && a_mode != 14 && + a_mode != 15 && a_mode != 21 && a_mode != 22 && a_mode != 25 && + a_mode != 26 && a_mode != 27 && a_mode != 28 && a_mode != 31 && + a_mode != 41 && a_mode != 42 && a_mode != 43 && a_mode != 51 && + a_mode != 52 && a_mode != 53 && a_mode != 54 && a_mode != 61 && + a_mode != 62 && a_mode != 63 && a_mode != 66 && a_mode != 67 && + a_mode != 71) { + cout << "error! unknown analysis mode: " << a_mode + << ". make sure -gk or -eigen or -lmm or -bslmm -predict or " + << "-calccov is sepcified correctly." << endl; + error = true; + } + if (miss_level > 1) { + cout << "error! missing level needs to be between 0 and 1. " + << "current value = " << miss_level << endl; + error = true; + } + if (maf_level > 0.5) { + cout << "error! maf level needs to be between 0 and 0.5. " + << "current value = " << maf_level << endl; + error = true; + } + if (hwe_level > 1) { + cout << "error! hwe level needs to be between 0 and 1. " + << "current value = " << hwe_level << endl; + error = true; + } + if (r2_level > 1) { + cout << "error! r2 level needs to be between 0 and 1. " + << "current value = " << r2_level << endl; + error = true; + } + + if (l_max < l_min) { + cout << "error! maximum lambda value must be larger than the " + << "minimal value. current values = " << l_max << " and " << l_min + << endl; + error = true; + } + if (h_max < h_min) { + cout << "error! maximum h value must be larger than the minimal " + << "value. current values = " << h_max << " and " << h_min << endl; + error = true; + } + if (s_max < s_min) { + cout << "error! maximum s value must be larger than the minimal " + << "value. current values = " << s_max << " and " << s_min << endl; + error = true; + } + if (rho_max < rho_min) { + cout << "error! maximum rho value must be larger than the" + << "minimal value. current values = " << rho_max << " and " << rho_min + << endl; + error = true; + } + if (logp_max < logp_min) { + cout << "error! maximum logp value must be larger than the " + << "minimal value. current values = " << logp_max / log(10) << " and " + << logp_min / log(10) << endl; + error = true; + } + + if (h_max > 1) { + cout << "error! h values must be bewtween 0 and 1. current " + << "values = " << h_max << " and " << h_min << endl; + error = true; + } + if (rho_max > 1) { + cout << "error! rho values must be between 0 and 1. current " + << "values = " << rho_max << " and " << rho_min << endl; + error = true; + } + if (logp_max > 0) { + cout << "error! maximum logp value must be smaller than 0. " + << "current values = " << logp_max / log(10) << " and " + << logp_min / log(10) << endl; + error = true; + } + if (l_max < l_min) { + cout << "error! maximum lambda value must be larger than the " + << "minimal value. current values = " << l_max << " and " << l_min + << endl; + error = true; + } + + if (h_scale > 1.0) { + cout << "error! hscale value must be between 0 and 1. " + << "current value = " << h_scale << endl; + error = true; + } + if (rho_scale > 1.0) { + cout << "error! rscale value must be between 0 and 1. " + << "current value = " << rho_scale << endl; + error = true; + } + if (logp_scale > 1.0) { + cout << "error! pscale value must be between 0 and 1. " + << "current value = " << logp_scale << endl; + error = true; + } + + if (rho_max == 1 && rho_min == 1 && a_mode == 12) { + cout << "error! ridge regression does not support a rho " + << "parameter. current values = " << rho_max << " and " << rho_min + << endl; + error = true; + } + + if (window_cm < 0) { + cout << "error! windowcm values must be non-negative. " + << "current values = " << window_cm << endl; + error = true; + } + + if (window_cm == 0 && window_bp == 0 && window_ns == 0) { + window_bp = 1000000; + } + + // Check p_column, and (no need to) sort p_column into + // ascending order. + if (p_column.size() == 0) { + p_column.push_back(1); + } else { + for (size_t i = 0; i < p_column.size(); i++) { + for (size_t j = 0; j < i; j++) { + if (p_column[i] == p_column[j]) { + cout << "error! identical phenotype " + << "columns: " << p_column[i] << endl; + error = true; + } + } + } + } + + n_ph = p_column.size(); + + // Only LMM option (and one prediction option) can deal with + // multiple phenotypes and no gene expression files. + if (n_ph > 1 && a_mode != 1 && a_mode != 2 && a_mode != 3 && a_mode != 4 && + a_mode != 43) { + cout << "error! the current analysis mode " << a_mode + << " can not deal with multiple phenotypes." << endl; + error = true; + } + if (n_ph > 1 && !file_gene.empty()) { + cout << "error! multiple phenotype analysis option not " + << "allowed with gene expression files. " << endl; + error = true; + } + + if (p_nr > 1) { + cout << "error! pnr value must be between 0 and 1. current value = " << p_nr + << endl; + error = true; + } + + // check est_column + if (est_column.size() == 0) { + if (file_ebv.empty()) { + est_column.push_back(2); + est_column.push_back(5); + est_column.push_back(6); + est_column.push_back(7); + } else { + est_column.push_back(2); + est_column.push_back(0); + est_column.push_back(6); + est_column.push_back(7); + } + } + + if (est_column.size() != 4) { + cout << "error! -en not followed by four numbers. current number = " + << est_column.size() << endl; + error = true; + } + if (est_column[0] == 0) { + cout << "error! -en rs column can not be zero. current number = " + << est_column.size() << endl; + error = true; + } + + // Check if files are compatible with each other, and if files exist. + if (!file_bfile.empty()) { + str = file_bfile + ".bim"; + if (stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open .bim file: " << str << endl; + error = true; + } + str = file_bfile + ".bed"; + if (stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open .bed file: " << str << endl; + error = true; + } + str = file_bfile + ".fam"; + if (stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open .fam file: " << str << endl; + error = true; + } + } + + if (!file_oxford.empty()) { + str = file_oxford + ".bgen"; + if (stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open .bgen file: " << str << endl; + error = true; + } + str = file_oxford + ".sample"; + if (stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open .sample file: " << str << endl; + error = true; + } + } + + if ((!file_geno.empty() || !file_gene.empty())) { + str = file_pheno; + if (stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open phenotype file: " << str << endl; + error = true; + } + } + + str = file_geno; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open mean genotype file: " << str << endl; + error = true; + } + + str = file_gene; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open gene expression file: " << str << endl; + error = true; + } + + str = file_cat; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open category file: " << str << endl; + error = true; + } + + str = file_mcat; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open mcategory file: " << str << endl; + error = true; + } + + str = file_beta; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open beta file: " << str << endl; + error = true; + } + + str = file_cor; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open correlation file: " << str << endl; + error = true; + } + + if (!file_study.empty()) { + str = file_study + ".Vq.txt"; + if (stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open .Vq.txt file: " << str << endl; + error = true; + } + str = file_study + ".q.txt"; + if (stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open .q.txt file: " << str << endl; + error = true; + } + str = file_study + ".size.txt"; + if (stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open .size.txt file: " << str << endl; + error = true; + } + } + + if (!file_ref.empty()) { + str = file_ref + ".S.txt"; + if (stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open .S.txt file: " << str << endl; + error = true; + } + str = file_ref + ".size.txt"; + if (stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open .size.txt file: " << str << endl; + error = true; + } + } + + str = file_mstudy; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open mstudy file: " << str << endl; + error = true; + } + + str = file_mref; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open mref file: " << str << endl; + error = true; + } + + str = file_mgeno; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open mgeno file: " << str << endl; + error = true; + } + + str = file_mbfile; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open mbfile file: " << str << endl; + error = true; + } + + size_t flag = 0; + if (!file_bfile.empty()) { + flag++; + } + if (!file_geno.empty()) { + flag++; + } + if (!file_gene.empty()) { + flag++; + } + + // WJA added. + if (!file_oxford.empty()) { + flag++; + } + + if (flag != 1 && a_mode != 15 && a_mode != 27 && a_mode != 28 && + a_mode != 43 && a_mode != 5 && a_mode != 61 && a_mode != 62 && + a_mode != 63 && a_mode != 66 && a_mode != 67) { + cout << "error! either plink binary files, or bimbam mean" + << "genotype files, or gene expression files are required." << endl; + error = true; + } + + if (file_pheno.empty() && (a_mode == 43 || a_mode == 5)) { + cout << "error! phenotype file is required." << endl; + error = true; + } + + if (a_mode == 61 || a_mode == 62) { + if (!file_beta.empty()) { + if (file_mbfile.empty() && file_bfile.empty() && file_mgeno.empty() && + file_geno.empty() && file_mref.empty() && file_ref.empty()) { + cout << "error! missing genotype file or ref/mref file." << endl; + error = true; + } + } else if (!file_pheno.empty()) { + if (file_kin.empty() && (file_ku.empty() || file_kd.empty()) && + file_mk.empty()) { + cout << "error! missing relatedness file. " << endl; + error = true; + } + } else if ((file_mstudy.empty() && file_study.empty()) || + (file_mref.empty() && file_ref.empty())) { + cout << "error! either beta file, or phenotype files or " + << "study/ref mstudy/mref files are required." << endl; + error = true; + } + } + + if (a_mode == 63) { + if (file_kin.empty() && (file_ku.empty() || file_kd.empty()) && + file_mk.empty()) { + cout << "error! missing relatedness file. " << endl; + error = true; + } + if (file_pheno.empty()) { + cout << "error! missing phenotype file." << endl; + error = true; + } + } + + if (a_mode == 66 || a_mode == 67) { + if (file_beta.empty() || (file_mbfile.empty() && file_bfile.empty() && + file_mgeno.empty() && file_geno.empty())) { + cout << "error! missing beta file or genotype file." << endl; + error = true; + } + } + + if (!file_epm.empty() && file_bfile.empty() && file_geno.empty()) { + cout << "error! estimated parameter file also requires genotype " + << "file." << endl; + error = true; + } + if (!file_ebv.empty() && file_kin.empty()) { + cout << "error! estimated breeding value file also requires " + << "relatedness file." << endl; + error = true; + } + + if (!file_log.empty() && pheno_mean != 0) { + cout << "error! either log file or mu value can be provide." << endl; + error = true; + } + + str = file_snps; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open snps file: " << str << endl; + error = true; + } + + str = file_log; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open log file: " << str << endl; + error = true; + } + + str = file_anno; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open annotation file: " << str << endl; + error = true; + } + + str = file_kin; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open relatedness matrix file: " << str << endl; + error = true; + } + + str = file_mk; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open relatedness matrix file: " << str << endl; + error = true; + } + + str = file_cvt; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open covariates file: " << str << endl; + error = true; + } + + str = file_gxe; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open environmental covariate file: " << str << endl; + error = true; + } + + str = file_weight; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open the residual weight file: " << str << endl; + error = true; + } + + str = file_epm; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open estimated parameter file: " << str << endl; + error = true; + } + + str = file_ebv; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open estimated breeding value file: " << str + << endl; + error = true; + } + + str = file_read; + if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) { + cout << "error! fail to open total read file: " << str << endl; + error = true; + } + + // Check if files are compatible with analysis mode. + if (k_mode == 2 && !file_geno.empty()) { + cout << "error! use \"-km 1\" when using bimbam mean genotype " + << "file. " << endl; + error = true; + } + + if ((a_mode == 1 || a_mode == 2 || a_mode == 3 || a_mode == 4 || + a_mode == 5 || a_mode == 31) && + (file_kin.empty() && (file_ku.empty() || file_kd.empty()))) { + cout << "error! missing relatedness file. " << endl; + error = true; + } + + if ((a_mode == 43) && file_kin.empty()) { + cout << "error! missing relatedness file. -predict option requires " + << "-k option to provide a relatedness file." << endl; + error = true; + } + + if ((a_mode == 11 || a_mode == 12 || a_mode == 13 || a_mode == 14 || + a_mode == 16) && + !file_cvt.empty()) { + cout << "error! -bslmm option does not support covariates files." << endl; + error = true; + } + + if (a_mode == 41 || a_mode == 42) { + if (!file_cvt.empty()) { + cout << "error! -predict option does not support " + << "covariates files." << endl; + error = true; + } + if (file_epm.empty()) { + cout << "error! -predict option requires estimated " + << "parameter files." << endl; + error = true; + } + } + + if (file_beta.empty() && (a_mode == 27 || a_mode == 28)) { + cout << "error! beta effects file is required." << endl; + error = true; + } + + return; } -void PARAM::CheckData (void) { +void PARAM::CheckData(void) { // WJA NOTE: I added this condition so that covariates can be added // through sample, probably not exactly what is wanted. - if(file_oxford.empty()) - { - if ((file_cvt).empty() || (indicator_cvt).size()==0) { - n_cvt=1; - } - } - - if ( (a_mode==66 || a_mode==67) && (v_pve.size()!=n_vc)) { - cout<<"error! the number of pve estimates does not equal to "<< - "the number of categories in the cat file:"<<v_pve.size()<<" "<< - n_vc<<endl; - error=true; - } - - if ( (indicator_cvt).size()!=0 && - (indicator_cvt).size()!=(indicator_idv).size()) { - error=true; - cout << "error! number of rows in the covariates file do not "<< - "match the number of individuals. "<<endl; + if (file_oxford.empty()) { + if ((file_cvt).empty() || (indicator_cvt).size() == 0) { + n_cvt = 1; + } + } + + if ((a_mode == 66 || a_mode == 67) && (v_pve.size() != n_vc)) { + cout << "error! the number of pve estimates does not equal to " + << "the number of categories in the cat file:" << v_pve.size() << " " + << n_vc << endl; + error = true; + } + + if ((indicator_cvt).size() != 0 && + (indicator_cvt).size() != (indicator_idv).size()) { + error = true; + cout << "error! number of rows in the covariates file do not " + << "match the number of individuals. " << endl; return; } - if ( (indicator_gxe).size()!=0 && (indicator_gxe).size() != - (indicator_idv).size()) { - error=true; - cout<<"error! number of rows in the gxe file do not match the number "<< - "of individuals. "<<endl; + if ((indicator_gxe).size() != 0 && + (indicator_gxe).size() != (indicator_idv).size()) { + error = true; + cout << "error! number of rows in the gxe file do not match the number " + << "of individuals. " << endl; return; } - if ( (indicator_weight).size()!=0 && - (indicator_weight).size()!=(indicator_idv).size()) { - error=true; - cout<<"error! number of rows in the weight file do not match "<< - "the number of individuals. "<<endl; + if ((indicator_weight).size() != 0 && + (indicator_weight).size() != (indicator_idv).size()) { + error = true; + cout << "error! number of rows in the weight file do not match " + << "the number of individuals. " << endl; return; } - if ( (indicator_read).size()!=0 && - (indicator_read).size()!=(indicator_idv).size()) { - error=true; - cout<<"error! number of rows in the total read file do not "<< - "match the number of individuals. "<<endl; + if ((indicator_read).size() != 0 && + (indicator_read).size() != (indicator_idv).size()) { + error = true; + cout << "error! number of rows in the total read file do not " + << "match the number of individuals. " << endl; return; } - // Calculate ni_total and ni_test, and set indicator_idv to 0 - // whenever indicator_cvt=0, and calculate np_obs and np_miss. - ni_total=(indicator_idv).size(); - - ni_test=0; - for (vector<int>::size_type i=0; i<(indicator_idv).size(); ++i) { - if (indicator_idv[i]==0) {continue;} - ni_test++; - } - - ni_cvt=0; - for (size_t i=0; i<indicator_cvt.size(); i++) { - if (indicator_cvt[i]==0) {continue;} - ni_cvt++; - } - - np_obs=0; np_miss=0; - for (size_t i=0; i<indicator_pheno.size(); i++) { - if (indicator_cvt.size()!=0) { - if (indicator_cvt[i]==0) {continue;} - } - - if (indicator_gxe.size()!=0) { - if (indicator_gxe[i]==0) {continue;} - } - - if (indicator_weight.size()!=0) { - if (indicator_weight[i]==0) {continue;} - } - - for (size_t j=0; j<indicator_pheno[i].size(); j++) { - if (indicator_pheno[i][j]==0) { - np_miss++; - } else { - np_obs++; - } - } - } - - if (ni_test==0 && file_cor.empty() && file_mstudy.empty() && - file_study.empty() && file_beta.empty() && file_bf.empty() ) { - error=true; - cout<<"error! number of analyzed individuals equals 0. "<<endl; - return; - } - - if (a_mode==43) { - if (ni_cvt==ni_test) { - error=true; - cout<<"error! no individual has missing "<< - "phenotypes."<<endl; - return; - } - if ((np_obs+np_miss)!=(ni_cvt*n_ph)) { - error=true; - cout<<"error! number of phenotypes do not match the "<< - "summation of missing and observed phenotypes."<< - endl; - return; - } - } - - // Output some information. - if (file_cor.empty() && file_mstudy.empty() && file_study.empty() && - a_mode!=15 && a_mode!=27 && a_mode!=28) { - cout<<"## number of total individuals = "<<ni_total<<endl; - if (a_mode==43) { - cout<<"## number of analyzed individuals = "<<ni_cvt<<endl; - cout<<"## number of individuals with full phenotypes = "<< - ni_test<<endl; - } else { - cout<<"## number of analyzed individuals = "<<ni_test<<endl; - } - cout<<"## number of covariates = "<<n_cvt<<endl; - cout<<"## number of phenotypes = "<<n_ph<<endl; - if (a_mode==43) { - cout<<"## number of observed data = "<<np_obs<<endl; - cout<<"## number of missing data = "<<np_miss<<endl; - } - if (!file_gene.empty()) { - cout<<"## number of total genes = "<<ng_total<<endl; - } else if (file_epm.empty() && a_mode!=43 && a_mode!=5) { - cout<<"## number of total SNPs = "<<ns_total<<endl; - cout<<"## number of analyzed SNPs = "<<ns_test<<endl; - } else {} - } - - // Set d_pace to 1000 for gene expression. - if (!file_gene.empty() && d_pace==100000) { - d_pace=1000; - } - - // For case-control studies, count # cases and # controls. - int flag_cc=0; - if (a_mode==13) { - ni_case=0; - ni_control=0; - for (size_t i=0; i<indicator_idv.size(); i++) { - if (indicator_idv[i]==0) {continue;} - - if (pheno[i][0]==0) {ni_control++;} - else if (pheno[i][0]==1) {ni_case++;} - else {flag_cc=1;} - } - cout<<"## number of cases = "<<ni_case<<endl; - cout<<"## number of controls = "<<ni_control<<endl; - } - - if (flag_cc==1) {cout<<"Unexpected non-binary phenotypes for "<< - "case/control analysis. Use default (BSLMM) analysis instead."<< - endl; - a_mode=11; - } - - // Set parameters for BSLMM and check for predict. - if (a_mode==11 || a_mode==12 || a_mode==13 || a_mode==14) { - if (a_mode==11) { - n_mh=1; - } - if (logp_min==0) { - logp_min=-1.0*log((double)ns_test); - } - - if (h_scale==-1) { - h_scale=min(1.0, 10.0/sqrt((double)ni_test) ); - } - if (rho_scale==-1) { - rho_scale=min(1.0, 10.0/sqrt((double)ni_test) ); - } - if (logp_scale==-1) { - logp_scale=min(1.0, 5.0/sqrt((double)ni_test) ); - } - - if (h_min==-1) {h_min=0.0;} - if (h_max==-1) {h_max=1.0;} - - if (s_max>ns_test) { - s_max=ns_test; - cout<<"s_max is re-set to the number of analyzed SNPs."<< - endl; - } - if (s_max<s_min) { - cout<<"error! maximum s value must be larger than the "<< - "minimal value. current values = "<<s_max<<" and "<< - s_min<<endl; - error=true; - } - } else if (a_mode==41 || a_mode==42) { - if (indicator_bv.size()!=0) { - if (indicator_idv.size()!=indicator_bv.size()) { - cout<<"error! number of rows in the "<< - "phenotype file does not match that in the "<< - "estimated breeding value file: "<< - indicator_idv.size()<<"\t"<<indicator_bv.size()<< - endl; - error=true; - } else { - size_t flag_bv=0; - for (size_t i=0; i<(indicator_bv).size(); ++i) { - if (indicator_idv[i]!=indicator_bv[i]) {flag_bv++;} - } - if (flag_bv!=0) { - cout<<"error! individuals with missing value in the "<< - "phenotype file does not match that in the "<< - "estimated breeding value file: "<<flag_bv<<endl; - error=true; - } - } - } - } - - if (a_mode==62 && !file_beta.empty() && mapRS2wcat.size()==0) { - cout<<"vc analysis with beta files requires -wcat file."<<endl; - error=true; - } - if (a_mode==67 && mapRS2wcat.size()==0) { - cout<<"ci analysis with beta files requires -wcat file."<<endl; - error=true; - } - - // File_mk needs to contain more than one line. - if (n_vc==1 && !file_mk.empty()) { - cout<<"error! -mk file should contain more than one line."<<endl; - error=true; - } - - return; -} + // Calculate ni_total and ni_test, and set indicator_idv to 0 + // whenever indicator_cvt=0, and calculate np_obs and np_miss. + ni_total = (indicator_idv).size(); + + ni_test = 0; + for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) { + if (indicator_idv[i] == 0) { + continue; + } + ni_test++; + } + + ni_cvt = 0; + for (size_t i = 0; i < indicator_cvt.size(); i++) { + if (indicator_cvt[i] == 0) { + continue; + } + ni_cvt++; + } + + np_obs = 0; + np_miss = 0; + for (size_t i = 0; i < indicator_pheno.size(); i++) { + if (indicator_cvt.size() != 0) { + if (indicator_cvt[i] == 0) { + continue; + } + } + + if (indicator_gxe.size() != 0) { + if (indicator_gxe[i] == 0) { + continue; + } + } + + if (indicator_weight.size() != 0) { + if (indicator_weight[i] == 0) { + continue; + } + } + + for (size_t j = 0; j < indicator_pheno[i].size(); j++) { + if (indicator_pheno[i][j] == 0) { + np_miss++; + } else { + np_obs++; + } + } + } + + if (ni_test == 0 && file_cor.empty() && file_mstudy.empty() && + file_study.empty() && file_beta.empty() && file_bf.empty()) { + error = true; + cout << "error! number of analyzed individuals equals 0. " << endl; + return; + } + + if (a_mode == 43) { + if (ni_cvt == ni_test) { + error = true; + cout << "error! no individual has missing " + << "phenotypes." << endl; + return; + } + if ((np_obs + np_miss) != (ni_cvt * n_ph)) { + error = true; + cout << "error! number of phenotypes do not match the " + << "summation of missing and observed phenotypes." << endl; + return; + } + } + + // Output some information. + if (file_cor.empty() && file_mstudy.empty() && file_study.empty() && + a_mode != 15 && a_mode != 27 && a_mode != 28) { + cout << "## number of total individuals = " << ni_total << endl; + if (a_mode == 43) { + cout << "## number of analyzed individuals = " << ni_cvt << endl; + cout << "## number of individuals with full phenotypes = " << ni_test + << endl; + } else { + cout << "## number of analyzed individuals = " << ni_test << endl; + } + cout << "## number of covariates = " << n_cvt << endl; + cout << "## number of phenotypes = " << n_ph << endl; + if (a_mode == 43) { + cout << "## number of observed data = " << np_obs << endl; + cout << "## number of missing data = " << np_miss << endl; + } + if (!file_gene.empty()) { + cout << "## number of total genes = " << ng_total << endl; + } else if (file_epm.empty() && a_mode != 43 && a_mode != 5) { + cout << "## number of total SNPs = " << ns_total << endl; + cout << "## number of analyzed SNPs = " << ns_test << endl; + } else { + } + } + + // Set d_pace to 1000 for gene expression. + if (!file_gene.empty() && d_pace == 100000) { + d_pace = 1000; + } + + // For case-control studies, count # cases and # controls. + int flag_cc = 0; + if (a_mode == 13) { + ni_case = 0; + ni_control = 0; + for (size_t i = 0; i < indicator_idv.size(); i++) { + if (indicator_idv[i] == 0) { + continue; + } + + if (pheno[i][0] == 0) { + ni_control++; + } else if (pheno[i][0] == 1) { + ni_case++; + } else { + flag_cc = 1; + } + } + cout << "## number of cases = " << ni_case << endl; + cout << "## number of controls = " << ni_control << endl; + } + + if (flag_cc == 1) { + cout << "Unexpected non-binary phenotypes for " + << "case/control analysis. Use default (BSLMM) analysis instead." + << endl; + a_mode = 11; + } + + // Set parameters for BSLMM and check for predict. + if (a_mode == 11 || a_mode == 12 || a_mode == 13 || a_mode == 14) { + if (a_mode == 11) { + n_mh = 1; + } + if (logp_min == 0) { + logp_min = -1.0 * log((double)ns_test); + } + + if (h_scale == -1) { + h_scale = min(1.0, 10.0 / sqrt((double)ni_test)); + } + if (rho_scale == -1) { + rho_scale = min(1.0, 10.0 / sqrt((double)ni_test)); + } + if (logp_scale == -1) { + logp_scale = min(1.0, 5.0 / sqrt((double)ni_test)); + } + + if (h_min == -1) { + h_min = 0.0; + } + if (h_max == -1) { + h_max = 1.0; + } + + if (s_max > ns_test) { + s_max = ns_test; + cout << "s_max is re-set to the number of analyzed SNPs." << endl; + } + if (s_max < s_min) { + cout << "error! maximum s value must be larger than the " + << "minimal value. current values = " << s_max << " and " << s_min + << endl; + error = true; + } + } else if (a_mode == 41 || a_mode == 42) { + if (indicator_bv.size() != 0) { + if (indicator_idv.size() != indicator_bv.size()) { + cout << "error! number of rows in the " + << "phenotype file does not match that in the " + << "estimated breeding value file: " << indicator_idv.size() + << "\t" << indicator_bv.size() << endl; + error = true; + } else { + size_t flag_bv = 0; + for (size_t i = 0; i < (indicator_bv).size(); ++i) { + if (indicator_idv[i] != indicator_bv[i]) { + flag_bv++; + } + } + if (flag_bv != 0) { + cout << "error! individuals with missing value in the " + << "phenotype file does not match that in the " + << "estimated breeding value file: " << flag_bv << endl; + error = true; + } + } + } + } -void PARAM::PrintSummary () { - if (n_ph==1) { - cout<<"pve estimate ="<<pve_null<<endl; - cout<<"se(pve) ="<<pve_se_null<<endl; - } else { + if (a_mode == 62 && !file_beta.empty() && mapRS2wcat.size() == 0) { + cout << "vc analysis with beta files requires -wcat file." << endl; + error = true; + } + if (a_mode == 67 && mapRS2wcat.size() == 0) { + cout << "ci analysis with beta files requires -wcat file." << endl; + error = true; + } + + // File_mk needs to contain more than one line. + if (n_vc == 1 && !file_mk.empty()) { + cout << "error! -mk file should contain more than one line." << endl; + error = true; + } + + return; +} - } - return; +void PARAM::PrintSummary() { + if (n_ph == 1) { + cout << "pve estimate =" << pve_null << endl; + cout << "se(pve) =" << pve_se_null << endl; + } else { + } + return; } -void PARAM::ReadGenotypes (gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) { - string file_str; - - if (!file_bfile.empty()) { - file_str=file_bfile+".bed"; - if (ReadFile_bed (file_str, indicator_idv, indicator_snp, - UtX, K, calc_K)==false) { - error=true; - } - } - else { - if (ReadFile_geno (file_geno, indicator_idv, indicator_snp, - UtX, K, calc_K)==false) { - error=true; - } - } - - return; +void PARAM::ReadGenotypes(gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) { + string file_str; + + if (!file_bfile.empty()) { + file_str = file_bfile + ".bed"; + if (ReadFile_bed(file_str, indicator_idv, indicator_snp, UtX, K, calc_K) == + false) { + error = true; + } + } else { + if (ReadFile_geno(file_geno, indicator_idv, indicator_snp, UtX, K, + calc_K) == false) { + error = true; + } + } + + return; } -void PARAM::ReadGenotypes (vector<vector<unsigned char> > &Xt, gsl_matrix *K, - const bool calc_K) { - string file_str; - - if (!file_bfile.empty()) { - file_str=file_bfile+".bed"; - if (ReadFile_bed (file_str, indicator_idv, indicator_snp, - Xt, K, calc_K, ni_test, ns_test)==false) { - error=true; - } - } else { - if (ReadFile_geno (file_geno, indicator_idv, indicator_snp, - Xt, K, calc_K, ni_test, ns_test)==false) { - error=true; - } - } - - return; +void PARAM::ReadGenotypes(vector<vector<unsigned char>> &Xt, gsl_matrix *K, + const bool calc_K) { + string file_str; + + if (!file_bfile.empty()) { + file_str = file_bfile + ".bed"; + if (ReadFile_bed(file_str, indicator_idv, indicator_snp, Xt, K, calc_K, + ni_test, ns_test) == false) { + error = true; + } + } else { + if (ReadFile_geno(file_geno, indicator_idv, indicator_snp, Xt, K, calc_K, + ni_test, ns_test) == false) { + error = true; + } + } + + return; } -void PARAM::CalcKin (gsl_matrix *matrix_kin) { - string file_str; - - gsl_matrix_set_zero (matrix_kin); - - if (!file_bfile.empty() ) { - file_str=file_bfile+".bed"; - if (PlinkKin (file_str, indicator_snp, a_mode-20, d_pace, - matrix_kin)==false) { - error=true; - } - } - else if (!file_oxford.empty() ) { - file_str=file_oxford+".bgen"; - if (bgenKin (file_str, indicator_snp, a_mode-20, d_pace, - matrix_kin)==false) { - error=true; - } - } - else { - file_str=file_geno; - if (BimbamKin (file_str, indicator_snp, a_mode-20, d_pace, - matrix_kin)==false) { - error=true; - } - } - - return; +void PARAM::CalcKin(gsl_matrix *matrix_kin) { + string file_str; + + gsl_matrix_set_zero(matrix_kin); + + if (!file_bfile.empty()) { + file_str = file_bfile + ".bed"; + if (PlinkKin(file_str, indicator_snp, a_mode - 20, d_pace, matrix_kin) == + false) { + error = true; + } + } else if (!file_oxford.empty()) { + file_str = file_oxford + ".bgen"; + if (bgenKin(file_str, indicator_snp, a_mode - 20, d_pace, matrix_kin) == + false) { + error = true; + } + } else { + file_str = file_geno; + if (BimbamKin(file_str, indicator_snp, a_mode - 20, d_pace, matrix_kin) == + false) { + error = true; + } + } + + return; } // From an existing n by nd A and K matrices, compute the d by d S // matrix (which is not necessary symmetric). -void compAKtoS (const gsl_matrix *A, const gsl_matrix *K, const size_t n_cvt, - gsl_matrix *S) { - size_t n_vc=S->size1, ni_test=A->size1; +void compAKtoS(const gsl_matrix *A, const gsl_matrix *K, const size_t n_cvt, + gsl_matrix *S) { + size_t n_vc = S->size1, ni_test = A->size1; double di, dj, tr_AK, sum_A, sum_K, s_A, s_K, sum_AK, tr_A, tr_K, d; - for (size_t i=0; i<n_vc; i++) { - for (size_t j=0; j<n_vc; j++) { - tr_AK=0; sum_A=0; sum_K=0; sum_AK=0; tr_A=0; tr_K=0; - for (size_t l=0; l<ni_test; l++) { - s_A=0; s_K=0; - for (size_t k=0; k<ni_test; k++) { - di=gsl_matrix_get(A, l, k+ni_test*i); - dj=gsl_matrix_get(K, l, k+ni_test*j); - s_A+=di; s_K+=dj; - - tr_AK+=di*dj; sum_A+=di; sum_K+=dj; - if (l==k) {tr_A+=di; tr_K+=dj;} - } - sum_AK+=s_A*s_K; - } - - sum_A/=(double)ni_test; - sum_K/=(double)ni_test; - sum_AK/=(double)ni_test; - tr_A-=sum_A; - tr_K-=sum_K; - d=tr_AK-2*sum_AK+sum_A*sum_K; - - if (tr_A==0 || tr_K==0) { - d=0; + for (size_t i = 0; i < n_vc; i++) { + for (size_t j = 0; j < n_vc; j++) { + tr_AK = 0; + sum_A = 0; + sum_K = 0; + sum_AK = 0; + tr_A = 0; + tr_K = 0; + for (size_t l = 0; l < ni_test; l++) { + s_A = 0; + s_K = 0; + for (size_t k = 0; k < ni_test; k++) { + di = gsl_matrix_get(A, l, k + ni_test * i); + dj = gsl_matrix_get(K, l, k + ni_test * j); + s_A += di; + s_K += dj; + + tr_AK += di * dj; + sum_A += di; + sum_K += dj; + if (l == k) { + tr_A += di; + tr_K += dj; + } + } + sum_AK += s_A * s_K; + } + + sum_A /= (double)ni_test; + sum_K /= (double)ni_test; + sum_AK /= (double)ni_test; + tr_A -= sum_A; + tr_K -= sum_K; + d = tr_AK - 2 * sum_AK + sum_A * sum_K; + + if (tr_A == 0 || tr_K == 0) { + d = 0; } else { - d=d/(tr_A*tr_K)-1/(double)(ni_test-n_cvt); + d = d / (tr_A * tr_K) - 1 / (double)(ni_test - n_cvt); } - gsl_matrix_set (S, i, j, d); + gsl_matrix_set(S, i, j, d); } } @@ -1340,187 +1375,195 @@ void compAKtoS (const gsl_matrix *A, const gsl_matrix *K, const size_t n_cvt, // Copied from lmm.cpp; is used in the following function compKtoV // map a number 1-(n_cvt+2) to an index between 0 and [(n_c+2)^2+(n_c+2)]/2-1 -size_t GetabIndex (const size_t a, const size_t b, const size_t n_cvt) { - if (a>n_cvt+2 || b>n_cvt+2 || a<=0 || b<=0) { - cout<<"error in GetabIndex."<<endl; - return 0; - } - size_t index; - size_t l, h; - if (b>a) {l=a; h=b;} else {l=b; h=a;} - - size_t n=n_cvt+2; - index=(2*n-l+2)*(l-1)/2+h-l; - - return index; +size_t GetabIndex(const size_t a, const size_t b, const size_t n_cvt) { + if (a > n_cvt + 2 || b > n_cvt + 2 || a <= 0 || b <= 0) { + cout << "error in GetabIndex." << endl; + return 0; + } + size_t index; + size_t l, h; + if (b > a) { + l = a; + h = b; + } else { + l = b; + h = a; + } + + size_t n = n_cvt + 2; + index = (2 * n - l + 2) * (l - 1) / 2 + h - l; + + return index; } // From an existing n by nd (centered) G matrix, compute the d+1 by // d*(d-1)/2*(d+1) Q matrix where inside i'th d+1 by d+1 matrix, each // element is tr(KiKlKjKm)-r*tr(KmKiKl)-r*tr(KlKjKm)+r^2*tr(KlKm), // where r=n/(n-1) -void compKtoV (const gsl_matrix *G, gsl_matrix *V) { - size_t n_vc=G->size2/G->size1, ni_test=G->size1; +void compKtoV(const gsl_matrix *G, gsl_matrix *V) { + size_t n_vc = G->size2 / G->size1, ni_test = G->size1; - gsl_matrix *KiKj=gsl_matrix_alloc(ni_test, (n_vc*(n_vc+1))/2*ni_test); - gsl_vector *trKiKj=gsl_vector_alloc( n_vc*(n_vc+1)/2 ); - gsl_vector *trKi=gsl_vector_alloc(n_vc); + gsl_matrix *KiKj = + gsl_matrix_alloc(ni_test, (n_vc * (n_vc + 1)) / 2 * ni_test); + gsl_vector *trKiKj = gsl_vector_alloc(n_vc * (n_vc + 1) / 2); + gsl_vector *trKi = gsl_vector_alloc(n_vc); - double d, tr, r=(double)ni_test/(double)(ni_test-1); + double d, tr, r = (double)ni_test / (double)(ni_test - 1); size_t t, t_il, t_jm, t_lm, t_im, t_jl, t_ij; // Compute KiKj for all pairs of i and j (not including the identity // matrix). - t=0; - for (size_t i=0; i<n_vc; i++) { - gsl_matrix_const_view Ki= - gsl_matrix_const_submatrix(G, 0, i*ni_test, ni_test, ni_test); - for (size_t j=i; j<n_vc; j++) { - gsl_matrix_const_view Kj= - gsl_matrix_const_submatrix(G, 0, j*ni_test, ni_test, ni_test); - gsl_matrix_view KiKj_sub= - gsl_matrix_submatrix (KiKj, 0, t*ni_test, ni_test, ni_test); - eigenlib_dgemm ("N", "N", 1.0, &Ki.matrix, &Kj.matrix, 0.0, - &KiKj_sub.matrix); + t = 0; + for (size_t i = 0; i < n_vc; i++) { + gsl_matrix_const_view Ki = + gsl_matrix_const_submatrix(G, 0, i * ni_test, ni_test, ni_test); + for (size_t j = i; j < n_vc; j++) { + gsl_matrix_const_view Kj = + gsl_matrix_const_submatrix(G, 0, j * ni_test, ni_test, ni_test); + gsl_matrix_view KiKj_sub = + gsl_matrix_submatrix(KiKj, 0, t * ni_test, ni_test, ni_test); + eigenlib_dgemm("N", "N", 1.0, &Ki.matrix, &Kj.matrix, 0.0, + &KiKj_sub.matrix); t++; } } // Compute trKi, trKiKj. - t=0; - for (size_t i=0; i<n_vc; i++) { - for (size_t j=i; j<n_vc; j++) { - tr=0; - for (size_t k=0; k<ni_test; k++) { - tr+=gsl_matrix_get (KiKj, k, t*ni_test+k); + t = 0; + for (size_t i = 0; i < n_vc; i++) { + for (size_t j = i; j < n_vc; j++) { + tr = 0; + for (size_t k = 0; k < ni_test; k++) { + tr += gsl_matrix_get(KiKj, k, t * ni_test + k); } - gsl_vector_set (trKiKj, t, tr); + gsl_vector_set(trKiKj, t, tr); t++; } - tr=0; - for (size_t k=0; k<ni_test; k++) { - tr+=gsl_matrix_get (G, k, i*ni_test+k); + tr = 0; + for (size_t k = 0; k < ni_test; k++) { + tr += gsl_matrix_get(G, k, i * ni_test + k); } - gsl_vector_set (trKi, i, tr); + gsl_vector_set(trKi, i, tr); } // Compute V. - for (size_t i=0; i<n_vc; i++) { - for (size_t j=i; j<n_vc; j++) { - t_ij=GetabIndex (i+1, j+1, n_vc-2); - for (size_t l=0; l<n_vc+1; l++) { - for (size_t m=0; m<n_vc+1; m++) { - if (l!=n_vc && m!=n_vc) { - t_il=GetabIndex (i+1, l+1, n_vc-2); - t_jm=GetabIndex (j+1, m+1, n_vc-2); - t_lm=GetabIndex (l+1, m+1, n_vc-2); - tr=0; - for (size_t k=0; k<ni_test; k++) { - gsl_vector_const_view KiKl_row= - gsl_matrix_const_subrow (KiKj, k, t_il*ni_test, ni_test); - gsl_vector_const_view KiKl_col= - gsl_matrix_const_column (KiKj, t_il*ni_test+k); - gsl_vector_const_view KjKm_row= - gsl_matrix_const_subrow (KiKj, k, t_jm*ni_test, ni_test); - gsl_vector_const_view KjKm_col= - gsl_matrix_const_column (KiKj, t_jm*ni_test+k); - - gsl_vector_const_view Kl_row= - gsl_matrix_const_subrow (G, k, l*ni_test, ni_test); - gsl_vector_const_view Km_row= - gsl_matrix_const_subrow (G, k, m*ni_test, ni_test); - - if (i<=l && j<=m) { - gsl_blas_ddot (&KiKl_row.vector, &KjKm_col.vector, &d); - tr+=d; - gsl_blas_ddot (&Km_row.vector, &KiKl_col.vector, &d); - tr-=r*d; - gsl_blas_ddot (&Kl_row.vector, &KjKm_col.vector, &d); - tr-=r*d; - } else if (i<=l && j>m) { - gsl_blas_ddot (&KiKl_row.vector, &KjKm_row.vector, &d); - tr+=d; - gsl_blas_ddot (&Km_row.vector, &KiKl_col.vector, &d); - tr-=r*d; - gsl_blas_ddot (&Kl_row.vector, &KjKm_row.vector, &d); - tr-=r*d; - } else if (i>l && j<=m) { - gsl_blas_ddot (&KiKl_col.vector, &KjKm_col.vector, &d); - tr+=d; - gsl_blas_ddot (&Km_row.vector, &KiKl_row.vector, &d); - tr-=r*d; - gsl_blas_ddot (&Kl_row.vector, &KjKm_col.vector, &d); - tr-=r*d; - } else { - gsl_blas_ddot (&KiKl_col.vector, &KjKm_row.vector, &d); - tr+=d; - gsl_blas_ddot (&Km_row.vector, &KiKl_row.vector, &d); - tr-=r*d; - gsl_blas_ddot (&Kl_row.vector, &KjKm_row.vector, &d); - tr-=r*d; - } - } - - tr+=r*r*gsl_vector_get (trKiKj, t_lm); - } else if (l!=n_vc && m==n_vc) { - t_il=GetabIndex (i+1, l+1, n_vc-2); - t_jl=GetabIndex (j+1, l+1, n_vc-2); - tr=0; - for (size_t k=0; k<ni_test; k++) { - gsl_vector_const_view KiKl_row= - gsl_matrix_const_subrow (KiKj, k, t_il*ni_test, ni_test); - gsl_vector_const_view KiKl_col= - gsl_matrix_const_column (KiKj, t_il*ni_test+k); - gsl_vector_const_view Kj_row= - gsl_matrix_const_subrow (G, k, j*ni_test, ni_test); - - if (i<=l) { - gsl_blas_ddot (&KiKl_row.vector, &Kj_row.vector, &d); - tr+=d; - } else { - gsl_blas_ddot (&KiKl_col.vector, &Kj_row.vector, &d); - tr+=d; - } - } - tr+=-r*gsl_vector_get (trKiKj, t_il) - - r*gsl_vector_get (trKiKj, t_jl)+r*r*gsl_vector_get (trKi, l); - } else if (l==n_vc && m!=n_vc) { - t_jm=GetabIndex (j+1, m+1, n_vc-2); - t_im=GetabIndex (i+1, m+1, n_vc-2); - tr=0; - for (size_t k=0; k<ni_test; k++) { - gsl_vector_const_view KjKm_row= - gsl_matrix_const_subrow (KiKj, k, t_jm*ni_test, ni_test); - gsl_vector_const_view KjKm_col= - gsl_matrix_const_column (KiKj, t_jm*ni_test+k); - gsl_vector_const_view Ki_row= - gsl_matrix_const_subrow (G, k, i*ni_test, ni_test); - - if (j<=m) { - gsl_blas_ddot (&KjKm_row.vector, &Ki_row.vector, &d); - tr+=d; - } else { - gsl_blas_ddot (&KjKm_col.vector, &Ki_row.vector, &d); - tr+=d; - } - } - tr+=-r*gsl_vector_get (trKiKj, t_im) - - r*gsl_vector_get (trKiKj, t_jm)+r*r*gsl_vector_get (trKi, m); - } else { - tr=gsl_vector_get (trKiKj, t_ij) - - r*gsl_vector_get (trKi, i) - - r*gsl_vector_get (trKi, j)+r*r*(double)(ni_test-1); - } - - gsl_matrix_set (V, l, t_ij*(n_vc+1)+m, tr); - } - } - } - } - - gsl_matrix_scale (V, 1.0/pow((double)ni_test, 2) ); + for (size_t i = 0; i < n_vc; i++) { + for (size_t j = i; j < n_vc; j++) { + t_ij = GetabIndex(i + 1, j + 1, n_vc - 2); + for (size_t l = 0; l < n_vc + 1; l++) { + for (size_t m = 0; m < n_vc + 1; m++) { + if (l != n_vc && m != n_vc) { + t_il = GetabIndex(i + 1, l + 1, n_vc - 2); + t_jm = GetabIndex(j + 1, m + 1, n_vc - 2); + t_lm = GetabIndex(l + 1, m + 1, n_vc - 2); + tr = 0; + for (size_t k = 0; k < ni_test; k++) { + gsl_vector_const_view KiKl_row = + gsl_matrix_const_subrow(KiKj, k, t_il * ni_test, ni_test); + gsl_vector_const_view KiKl_col = + gsl_matrix_const_column(KiKj, t_il * ni_test + k); + gsl_vector_const_view KjKm_row = + gsl_matrix_const_subrow(KiKj, k, t_jm * ni_test, ni_test); + gsl_vector_const_view KjKm_col = + gsl_matrix_const_column(KiKj, t_jm * ni_test + k); + + gsl_vector_const_view Kl_row = + gsl_matrix_const_subrow(G, k, l * ni_test, ni_test); + gsl_vector_const_view Km_row = + gsl_matrix_const_subrow(G, k, m * ni_test, ni_test); + + if (i <= l && j <= m) { + gsl_blas_ddot(&KiKl_row.vector, &KjKm_col.vector, &d); + tr += d; + gsl_blas_ddot(&Km_row.vector, &KiKl_col.vector, &d); + tr -= r * d; + gsl_blas_ddot(&Kl_row.vector, &KjKm_col.vector, &d); + tr -= r * d; + } else if (i <= l && j > m) { + gsl_blas_ddot(&KiKl_row.vector, &KjKm_row.vector, &d); + tr += d; + gsl_blas_ddot(&Km_row.vector, &KiKl_col.vector, &d); + tr -= r * d; + gsl_blas_ddot(&Kl_row.vector, &KjKm_row.vector, &d); + tr -= r * d; + } else if (i > l && j <= m) { + gsl_blas_ddot(&KiKl_col.vector, &KjKm_col.vector, &d); + tr += d; + gsl_blas_ddot(&Km_row.vector, &KiKl_row.vector, &d); + tr -= r * d; + gsl_blas_ddot(&Kl_row.vector, &KjKm_col.vector, &d); + tr -= r * d; + } else { + gsl_blas_ddot(&KiKl_col.vector, &KjKm_row.vector, &d); + tr += d; + gsl_blas_ddot(&Km_row.vector, &KiKl_row.vector, &d); + tr -= r * d; + gsl_blas_ddot(&Kl_row.vector, &KjKm_row.vector, &d); + tr -= r * d; + } + } + + tr += r * r * gsl_vector_get(trKiKj, t_lm); + } else if (l != n_vc && m == n_vc) { + t_il = GetabIndex(i + 1, l + 1, n_vc - 2); + t_jl = GetabIndex(j + 1, l + 1, n_vc - 2); + tr = 0; + for (size_t k = 0; k < ni_test; k++) { + gsl_vector_const_view KiKl_row = + gsl_matrix_const_subrow(KiKj, k, t_il * ni_test, ni_test); + gsl_vector_const_view KiKl_col = + gsl_matrix_const_column(KiKj, t_il * ni_test + k); + gsl_vector_const_view Kj_row = + gsl_matrix_const_subrow(G, k, j * ni_test, ni_test); + + if (i <= l) { + gsl_blas_ddot(&KiKl_row.vector, &Kj_row.vector, &d); + tr += d; + } else { + gsl_blas_ddot(&KiKl_col.vector, &Kj_row.vector, &d); + tr += d; + } + } + tr += -r * gsl_vector_get(trKiKj, t_il) - + r * gsl_vector_get(trKiKj, t_jl) + + r * r * gsl_vector_get(trKi, l); + } else if (l == n_vc && m != n_vc) { + t_jm = GetabIndex(j + 1, m + 1, n_vc - 2); + t_im = GetabIndex(i + 1, m + 1, n_vc - 2); + tr = 0; + for (size_t k = 0; k < ni_test; k++) { + gsl_vector_const_view KjKm_row = + gsl_matrix_const_subrow(KiKj, k, t_jm * ni_test, ni_test); + gsl_vector_const_view KjKm_col = + gsl_matrix_const_column(KiKj, t_jm * ni_test + k); + gsl_vector_const_view Ki_row = + gsl_matrix_const_subrow(G, k, i * ni_test, ni_test); + + if (j <= m) { + gsl_blas_ddot(&KjKm_row.vector, &Ki_row.vector, &d); + tr += d; + } else { + gsl_blas_ddot(&KjKm_col.vector, &Ki_row.vector, &d); + tr += d; + } + } + tr += -r * gsl_vector_get(trKiKj, t_im) - + r * gsl_vector_get(trKiKj, t_jm) + + r * r * gsl_vector_get(trKi, m); + } else { + tr = gsl_vector_get(trKiKj, t_ij) - r * gsl_vector_get(trKi, i) - + r * gsl_vector_get(trKi, j) + r * r * (double)(ni_test - 1); + } + + gsl_matrix_set(V, l, t_ij * (n_vc + 1) + m, tr); + } + } + } + } + + gsl_matrix_scale(V, 1.0 / pow((double)ni_test, 2)); gsl_matrix_free(KiKj); gsl_vector_free(trKiKj); @@ -1530,21 +1573,21 @@ void compKtoV (const gsl_matrix *G, gsl_matrix *V) { } // Perform Jacknife sampling for variance of S. -void JackknifeAKtoS (const gsl_matrix *W, const gsl_matrix *A, - const gsl_matrix *K, gsl_matrix *S, gsl_matrix *Svar) { - size_t n_vc=Svar->size1, ni_test=A->size1, n_cvt=W->size2; +void JackknifeAKtoS(const gsl_matrix *W, const gsl_matrix *A, + const gsl_matrix *K, gsl_matrix *S, gsl_matrix *Svar) { + size_t n_vc = Svar->size1, ni_test = A->size1, n_cvt = W->size2; - vector<vector<vector<double> > > trAK, sumAK; - vector<vector<double> > sumA, sumK, trA, trK, sA, sK; + vector<vector<vector<double>>> trAK, sumAK; + vector<vector<double>> sumA, sumK, trA, trK, sA, sK; vector<double> vec_tmp; double di, dj, d, m, v; // Initialize and set all elements to zero. - for (size_t i=0; i<ni_test; i++) { + for (size_t i = 0; i < ni_test; i++) { vec_tmp.push_back(0); } - for (size_t i=0; i<n_vc; i++) { + for (size_t i = 0; i < n_vc; i++) { sumA.push_back(vec_tmp); sumK.push_back(vec_tmp); trA.push_back(vec_tmp); @@ -1553,82 +1596,93 @@ void JackknifeAKtoS (const gsl_matrix *W, const gsl_matrix *A, sK.push_back(vec_tmp); } - for (size_t i=0; i<n_vc; i++) { + for (size_t i = 0; i < n_vc; i++) { trAK.push_back(sumK); sumAK.push_back(sumK); } // Run jackknife. - for (size_t i=0; i<n_vc; i++) { - for (size_t l=0; l<ni_test; l++) { - for (size_t k=0; k<ni_test; k++) { - di=gsl_matrix_get(A, l, k+ni_test*i); - dj=gsl_matrix_get(K, l, k+ni_test*i); - - for (size_t t=0; t<ni_test; t++) { - if (t==l || t==k) {continue;} - sumA[i][t]+=di; - sumK[i][t]+=dj; - if (l==k) {trA[i][t]+=di; trK[i][t]+=dj;} - } - sA[i][l]+=di; - sK[i][l]+=dj; + for (size_t i = 0; i < n_vc; i++) { + for (size_t l = 0; l < ni_test; l++) { + for (size_t k = 0; k < ni_test; k++) { + di = gsl_matrix_get(A, l, k + ni_test * i); + dj = gsl_matrix_get(K, l, k + ni_test * i); + + for (size_t t = 0; t < ni_test; t++) { + if (t == l || t == k) { + continue; + } + sumA[i][t] += di; + sumK[i][t] += dj; + if (l == k) { + trA[i][t] += di; + trK[i][t] += dj; + } + } + sA[i][l] += di; + sK[i][l] += dj; } } - for (size_t t=0; t<ni_test; t++) { - sumA[i][t]/=(double)(ni_test-1); - sumK[i][t]/=(double)(ni_test-1); + for (size_t t = 0; t < ni_test; t++) { + sumA[i][t] /= (double)(ni_test - 1); + sumK[i][t] /= (double)(ni_test - 1); } } - for (size_t i=0; i<n_vc; i++) { - for (size_t j=0; j<n_vc; j++) { - for (size_t l=0; l<ni_test; l++) { - for (size_t k=0; k<ni_test; k++) { - di=gsl_matrix_get(A, l, k+ni_test*i); - dj=gsl_matrix_get(K, l, k+ni_test*j); - d=di*dj; - - for (size_t t=0; t<ni_test; t++) { - if (t==l || t==k) {continue;} - trAK[i][j][t]+=d; + for (size_t i = 0; i < n_vc; i++) { + for (size_t j = 0; j < n_vc; j++) { + for (size_t l = 0; l < ni_test; l++) { + for (size_t k = 0; k < ni_test; k++) { + di = gsl_matrix_get(A, l, k + ni_test * i); + dj = gsl_matrix_get(K, l, k + ni_test * j); + d = di * dj; + + for (size_t t = 0; t < ni_test; t++) { + if (t == l || t == k) { + continue; + } + trAK[i][j][t] += d; } - } + } - for (size_t t=0; t<ni_test; t++) { - if (t==l) {continue;} - di=gsl_matrix_get(A, l, t+ni_test*i); - dj=gsl_matrix_get(K, l, t+ni_test*j); + for (size_t t = 0; t < ni_test; t++) { + if (t == l) { + continue; + } + di = gsl_matrix_get(A, l, t + ni_test * i); + dj = gsl_matrix_get(K, l, t + ni_test * j); - sumAK[i][j][t]+=(sA[i][l]-di)*(sK[j][l]-dj); - } + sumAK[i][j][t] += (sA[i][l] - di) * (sK[j][l] - dj); + } } - for (size_t t=0; t<ni_test; t++) { - sumAK[i][j][t]/=(double)(ni_test-1); + for (size_t t = 0; t < ni_test; t++) { + sumAK[i][j][t] /= (double)(ni_test - 1); } - m=0; v=0; - for (size_t t=0; t<ni_test; t++) { - d=trAK[i][j][t]-2*sumAK[i][j][t]+sumA[i][t]*sumK[j][t]; - if ( (trA[i][t]-sumA[i][t])==0 || (trK[j][t]-sumK[j][t])==0) { - d=0; - } else { - d/=(trA[i][t]-sumA[i][t])*(trK[j][t]-sumK[j][t]); - d-=1/(double)(ni_test-n_cvt-1); - } - m+=d; v+=d*d; + m = 0; + v = 0; + for (size_t t = 0; t < ni_test; t++) { + d = trAK[i][j][t] - 2 * sumAK[i][j][t] + sumA[i][t] * sumK[j][t]; + if ((trA[i][t] - sumA[i][t]) == 0 || (trK[j][t] - sumK[j][t]) == 0) { + d = 0; + } else { + d /= (trA[i][t] - sumA[i][t]) * (trK[j][t] - sumK[j][t]); + d -= 1 / (double)(ni_test - n_cvt - 1); + } + m += d; + v += d * d; } - m/=(double)ni_test; - v/=(double)ni_test; - v-=m*m; - v*=(double)(ni_test-1); - gsl_matrix_set (Svar, i, j, v); - if (n_cvt==1) { - d=gsl_matrix_get (S, i, j); - d=(double)ni_test*d-(double)(ni_test-1)*m; - gsl_matrix_set (S, i, j, d); + m /= (double)ni_test; + v /= (double)ni_test; + v -= m * m; + v *= (double)(ni_test - 1); + gsl_matrix_set(Svar, i, j, v); + if (n_cvt == 1) { + d = gsl_matrix_get(S, i, j); + d = (double)ni_test * d - (double)(ni_test - 1) * m; + gsl_matrix_set(S, i, j, d); } } } @@ -1638,561 +1692,590 @@ void JackknifeAKtoS (const gsl_matrix *W, const gsl_matrix *A, // Compute the d by d S matrix with its d by d variance matrix of // Svar, and the d+1 by d(d+1) matrix of Q for V(q). -void PARAM::CalcS (const map<string, double> &mapRS2wA, - const map<string, double> &mapRS2wK, - const gsl_matrix *W, gsl_matrix *A, - gsl_matrix *K, gsl_matrix *S, - gsl_matrix *Svar, gsl_vector *ns) { +void PARAM::CalcS(const map<string, double> &mapRS2wA, + const map<string, double> &mapRS2wK, const gsl_matrix *W, + gsl_matrix *A, gsl_matrix *K, gsl_matrix *S, gsl_matrix *Svar, + gsl_vector *ns) { string file_str; - gsl_matrix_set_zero (S); - gsl_matrix_set_zero (Svar); - gsl_vector_set_zero (ns); + gsl_matrix_set_zero(S); + gsl_matrix_set_zero(Svar); + gsl_vector_set_zero(ns); // Compute the kinship matrix G for multiple categories; these // matrices are not centered, for convienence of Jacknife sampling. - if (!file_bfile.empty() ) { - file_str=file_bfile+".bed"; - if (mapRS2wA.size()==0) { - if (PlinkKin (file_str, d_pace, indicator_idv, indicator_snp, mapRS2wK, - mapRS2cat, snpInfo, W, K, ns)==false) { - error=true; + if (!file_bfile.empty()) { + file_str = file_bfile + ".bed"; + if (mapRS2wA.size() == 0) { + if (PlinkKin(file_str, d_pace, indicator_idv, indicator_snp, mapRS2wK, + mapRS2cat, snpInfo, W, K, ns) == false) { + error = true; } } else { - if (PlinkKin (file_str, d_pace, indicator_idv, indicator_snp, mapRS2wA, - mapRS2cat, snpInfo, W, A, ns)==false) { - error=true; + if (PlinkKin(file_str, d_pace, indicator_idv, indicator_snp, mapRS2wA, + mapRS2cat, snpInfo, W, A, ns) == false) { + error = true; } } } else if (!file_geno.empty()) { - file_str=file_geno; - if (mapRS2wA.size()==0) { - if (BimbamKin (file_str, d_pace, indicator_idv, indicator_snp, - mapRS2wK, mapRS2cat, snpInfo, W, K, ns)==false) { - error=true; + file_str = file_geno; + if (mapRS2wA.size() == 0) { + if (BimbamKin(file_str, d_pace, indicator_idv, indicator_snp, mapRS2wK, + mapRS2cat, snpInfo, W, K, ns) == false) { + error = true; } } else { - if (BimbamKin (file_str, d_pace, indicator_idv, indicator_snp, - mapRS2wA, mapRS2cat, snpInfo, W, A, ns)==false) { - error=true; + if (BimbamKin(file_str, d_pace, indicator_idv, indicator_snp, mapRS2wA, + mapRS2cat, snpInfo, W, A, ns) == false) { + error = true; } } - } else if (!file_mbfile.empty() ){ - if (mapRS2wA.size()==0) { - if (MFILEKin (1, file_mbfile, d_pace, indicator_idv, mindicator_snp, - mapRS2wK, mapRS2cat, msnpInfo, W, K, ns)==false) { - error=true; + } else if (!file_mbfile.empty()) { + if (mapRS2wA.size() == 0) { + if (MFILEKin(1, file_mbfile, d_pace, indicator_idv, mindicator_snp, + mapRS2wK, mapRS2cat, msnpInfo, W, K, ns) == false) { + error = true; } } else { - if (MFILEKin (1, file_mbfile, d_pace, indicator_idv, mindicator_snp, - mapRS2wA, mapRS2cat, msnpInfo, W, A, ns)==false) { - error=true; + if (MFILEKin(1, file_mbfile, d_pace, indicator_idv, mindicator_snp, + mapRS2wA, mapRS2cat, msnpInfo, W, A, ns) == false) { + error = true; } } } else if (!file_mgeno.empty()) { - if (mapRS2wA.size()==0) { - if (MFILEKin (0, file_mgeno, d_pace, indicator_idv, mindicator_snp, - mapRS2wK, mapRS2cat, msnpInfo, W, K, ns)==false) { - error=true; + if (mapRS2wA.size() == 0) { + if (MFILEKin(0, file_mgeno, d_pace, indicator_idv, mindicator_snp, + mapRS2wK, mapRS2cat, msnpInfo, W, K, ns) == false) { + error = true; } } else { - if (MFILEKin (0, file_mgeno, d_pace, indicator_idv, mindicator_snp, - mapRS2wA, mapRS2cat, msnpInfo, W, A, ns)==false) { - error=true; + if (MFILEKin(0, file_mgeno, d_pace, indicator_idv, mindicator_snp, + mapRS2wA, mapRS2cat, msnpInfo, W, A, ns) == false) { + error = true; } } } - if (mapRS2wA.size()==0) { - gsl_matrix_memcpy (A, K); + if (mapRS2wA.size() == 0) { + gsl_matrix_memcpy(A, K); } // Center and scale every kinship matrix inside G. - for (size_t i=0; i<n_vc; i++) { - gsl_matrix_view Ksub=gsl_matrix_submatrix(K,0,i*ni_test,ni_test,ni_test); + for (size_t i = 0; i < n_vc; i++) { + gsl_matrix_view Ksub = + gsl_matrix_submatrix(K, 0, i * ni_test, ni_test, ni_test); CenterMatrix(&Ksub.matrix); ScaleMatrix(&Ksub.matrix); - gsl_matrix_view Asub=gsl_matrix_submatrix(A,0,i*ni_test,ni_test,ni_test); + gsl_matrix_view Asub = + gsl_matrix_submatrix(A, 0, i * ni_test, ni_test, ni_test); CenterMatrix(&Asub.matrix); ScaleMatrix(&Asub.matrix); } // Cased on G, compute S. - compAKtoS (A, K, W->size2, S); + compAKtoS(A, K, W->size2, S); // Compute Svar and update S with Jacknife. - JackknifeAKtoS (W, A, K, S, Svar); + JackknifeAKtoS(W, A, K, S, Svar); return; } -void PARAM::WriteVector (const gsl_vector *q, const gsl_vector *s, - const size_t n_total, const string suffix) { - string file_str; - file_str=path_out+"/"+file_out; - file_str+="."; - file_str+=suffix; - file_str+=".txt"; - - ofstream outfile (file_str.c_str(), ofstream::out); - if (!outfile) { - cout<<"error writing file: "<<file_str.c_str()<<endl; - return; - } +void PARAM::WriteVector(const gsl_vector *q, const gsl_vector *s, + const size_t n_total, const string suffix) { + string file_str; + file_str = path_out + "/" + file_out; + file_str += "."; + file_str += suffix; + file_str += ".txt"; + + ofstream outfile(file_str.c_str(), ofstream::out); + if (!outfile) { + cout << "error writing file: " << file_str.c_str() << endl; + return; + } - outfile.precision(10); + outfile.precision(10); - for (size_t i=0; i<q->size; ++i) { - outfile<<gsl_vector_get (q, i)<<endl; - } + for (size_t i = 0; i < q->size; ++i) { + outfile << gsl_vector_get(q, i) << endl; + } - for (size_t i=0; i<s->size; ++i) { - outfile<<gsl_vector_get (s, i)<<endl; - } + for (size_t i = 0; i < s->size; ++i) { + outfile << gsl_vector_get(s, i) << endl; + } - outfile<<n_total<<endl; + outfile << n_total << endl; - outfile.close(); - outfile.clear(); - return; + outfile.close(); + outfile.clear(); + return; } -void PARAM::WriteVar (const string suffix) { +void PARAM::WriteVar(const string suffix) { string file_str, rs; - file_str=path_out+"/"+file_out; - file_str+="."; - file_str+=suffix; - file_str+=".txt.gz"; - - ogzstream outfile (file_str.c_str(), ogzstream::out); - if (!outfile) { - cout<<"error writing file: "<<file_str.c_str()<<endl; - return; - } - - outfile.precision(10); - - if (mindicator_snp.size()!=0) { - for (size_t t=0; t<mindicator_snp.size(); t++) { - indicator_snp=mindicator_snp[t]; - for (size_t i=0; i<indicator_snp.size(); i++) { - if (indicator_snp[i]==0) {continue;} - rs=snpInfo[i].rs_number; - outfile<<rs<<endl; - } - } - } else { - for (size_t i=0; i<indicator_snp.size(); i++) { - if (indicator_snp[i]==0) {continue;} - rs=snpInfo[i].rs_number; - outfile<<rs<<endl; - } - } - - outfile.close(); - outfile.clear(); - return; -} + file_str = path_out + "/" + file_out; + file_str += "."; + file_str += suffix; + file_str += ".txt.gz"; + + ogzstream outfile(file_str.c_str(), ogzstream::out); + if (!outfile) { + cout << "error writing file: " << file_str.c_str() << endl; + return; + } + + outfile.precision(10); + + if (mindicator_snp.size() != 0) { + for (size_t t = 0; t < mindicator_snp.size(); t++) { + indicator_snp = mindicator_snp[t]; + for (size_t i = 0; i < indicator_snp.size(); i++) { + if (indicator_snp[i] == 0) { + continue; + } + rs = snpInfo[i].rs_number; + outfile << rs << endl; + } + } + } else { + for (size_t i = 0; i < indicator_snp.size(); i++) { + if (indicator_snp[i] == 0) { + continue; + } + rs = snpInfo[i].rs_number; + outfile << rs << endl; + } + } -void PARAM::WriteMatrix (const gsl_matrix *matrix_U, const string suffix) { - string file_str; - file_str=path_out+"/"+file_out; - file_str+="."; - file_str+=suffix; - file_str+=".txt"; - - ofstream outfile (file_str.c_str(), ofstream::out); - if (!outfile) { - cout<<"error writing file: "<<file_str.c_str()<<endl; - return; - } - - outfile.precision(10); - - for (size_t i=0; i<matrix_U->size1; ++i) { - for (size_t j=0; j<matrix_U->size2; ++j) { - outfile<<gsl_matrix_get (matrix_U, i, j)<<"\t"; - } - outfile<<endl; - } - - outfile.close(); - outfile.clear(); - return; + outfile.close(); + outfile.clear(); + return; } -void PARAM::WriteVector (const gsl_vector *vector_D, const string suffix) { - string file_str; - file_str=path_out+"/"+file_out; - file_str+="."; - file_str+=suffix; - file_str+=".txt"; +void PARAM::WriteMatrix(const gsl_matrix *matrix_U, const string suffix) { + string file_str; + file_str = path_out + "/" + file_out; + file_str += "."; + file_str += suffix; + file_str += ".txt"; + + ofstream outfile(file_str.c_str(), ofstream::out); + if (!outfile) { + cout << "error writing file: " << file_str.c_str() << endl; + return; + } + + outfile.precision(10); - ofstream outfile (file_str.c_str(), ofstream::out); - if (!outfile) { - cout<<"error writing file: "<<file_str.c_str()<<endl; - return; - } + for (size_t i = 0; i < matrix_U->size1; ++i) { + for (size_t j = 0; j < matrix_U->size2; ++j) { + outfile << gsl_matrix_get(matrix_U, i, j) << "\t"; + } + outfile << endl; + } + + outfile.close(); + outfile.clear(); + return; +} + +void PARAM::WriteVector(const gsl_vector *vector_D, const string suffix) { + string file_str; + file_str = path_out + "/" + file_out; + file_str += "."; + file_str += suffix; + file_str += ".txt"; + + ofstream outfile(file_str.c_str(), ofstream::out); + if (!outfile) { + cout << "error writing file: " << file_str.c_str() << endl; + return; + } - outfile.precision(10); + outfile.precision(10); - for (size_t i=0; i<vector_D->size; ++i) { - outfile<<gsl_vector_get (vector_D, i)<<endl; - } + for (size_t i = 0; i < vector_D->size; ++i) { + outfile << gsl_vector_get(vector_D, i) << endl; + } - outfile.close(); - outfile.clear(); - return; + outfile.close(); + outfile.clear(); + return; } -void PARAM::CheckCvt () { - if (indicator_cvt.size()==0) {return;} - - size_t ci_test=0; - - gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt); - - for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) { - if (indicator_idv[i]==0 || indicator_cvt[i]==0) {continue;} - for (size_t j=0; j<n_cvt; ++j) { - gsl_matrix_set (W, ci_test, j, (cvt)[i][j]); - } - ci_test++; - } - - size_t flag_ipt=0; - double v_min, v_max; - set<size_t> set_remove; - - // Check if any columns is an intercept. - for (size_t i=0; i<W->size2; i++) { - gsl_vector_view w_col=gsl_matrix_column (W, i); - gsl_vector_minmax (&w_col.vector, &v_min, &v_max); - if (v_min==v_max) {flag_ipt=1; set_remove.insert (i);} - } - - // Add an intecept term if needed. - if (n_cvt==set_remove.size()) { - indicator_cvt.clear(); - n_cvt=1; - } else if (flag_ipt==0) { - cout<<"no intecept term is found in the cvt file. "<< - "a column of 1s is added."<<endl; - for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) { - if (indicator_idv[i]==0 || indicator_cvt[i]==0) { - continue; - } - cvt[i].push_back(1.0); - } - - n_cvt++; - } else {} - - gsl_matrix_free(W); - - return; +void PARAM::CheckCvt() { + if (indicator_cvt.size() == 0) { + return; + } + + size_t ci_test = 0; + + gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt); + + for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) { + if (indicator_idv[i] == 0 || indicator_cvt[i] == 0) { + continue; + } + for (size_t j = 0; j < n_cvt; ++j) { + gsl_matrix_set(W, ci_test, j, (cvt)[i][j]); + } + ci_test++; + } + + size_t flag_ipt = 0; + double v_min, v_max; + set<size_t> set_remove; + + // Check if any columns is an intercept. + for (size_t i = 0; i < W->size2; i++) { + gsl_vector_view w_col = gsl_matrix_column(W, i); + gsl_vector_minmax(&w_col.vector, &v_min, &v_max); + if (v_min == v_max) { + flag_ipt = 1; + set_remove.insert(i); + } + } + + // Add an intecept term if needed. + if (n_cvt == set_remove.size()) { + indicator_cvt.clear(); + n_cvt = 1; + } else if (flag_ipt == 0) { + cout << "no intecept term is found in the cvt file. " + << "a column of 1s is added." << endl; + for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) { + if (indicator_idv[i] == 0 || indicator_cvt[i] == 0) { + continue; + } + cvt[i].push_back(1.0); + } + + n_cvt++; + } else { + } + + gsl_matrix_free(W); + + return; } // Post-process phentoypes and covariates. -void PARAM::ProcessCvtPhen () { - - // Convert indicator_pheno to indicator_idv. - int k=1; - indicator_idv.clear(); - for (size_t i=0; i<indicator_pheno.size(); i++) { - k=1; - for (size_t j=0; j<indicator_pheno[i].size(); j++) { - if (indicator_pheno[i][j]==0) {k=0;} - } - indicator_idv.push_back(k); - } - - // Remove individuals with missing covariates. - if ((indicator_cvt).size()!=0) { - for (vector<int>::size_type i=0; - i<(indicator_idv).size(); - ++i) { - indicator_idv[i]*=indicator_cvt[i]; - } - } - - // Remove individuals with missing gxe variables. - if ((indicator_gxe).size()!=0) { - for (vector<int>::size_type i=0; - i<(indicator_idv).size(); - ++i) { - indicator_idv[i]*=indicator_gxe[i]; - } - } - - // Remove individuals with missing residual weights. - if ((indicator_weight).size()!=0) { - for (vector<int>::size_type i=0; - i<(indicator_idv).size(); - ++i) { - indicator_idv[i]*=indicator_weight[i]; - } - } - - // Obtain ni_test. - ni_test=0; - for (vector<int>::size_type i=0; i<(indicator_idv).size(); ++i) { - if (indicator_idv[i]==0) {continue;} - ni_test++; - } - - // If subsample number is set, perform a random sub-sampling - // to determine the subsampled ids. - if (ni_subsample!=0) { - if (ni_test<ni_subsample) { - cout<<"error! number of subsamples is less than number of"<< - "analyzed individuals. "<<endl; - } else { - - // Set up random environment. - gsl_rng_env_setup(); - gsl_rng *gsl_r; - const gsl_rng_type * gslType; - gslType = gsl_rng_default; - if (randseed<0) { - time_t rawtime; - time (&rawtime); - tm * ptm = gmtime (&rawtime); - - randseed = (unsigned) - (ptm->tm_hour%24*3600+ptm->tm_min*60+ptm->tm_sec); - } - gsl_r = gsl_rng_alloc(gslType); - gsl_rng_set(gsl_r, randseed); - - // From ni_test, sub-sample ni_subsample. - vector<size_t> a, b; - for (size_t i=0; i<ni_subsample; i++) { - a.push_back(0); - } - for (size_t i=0; i<ni_test; i++) { - b.push_back(i); - } - - gsl_ran_choose (gsl_r, static_cast<void*>(&a[0]), ni_subsample, - static_cast<void*>(&b[0]),ni_test,sizeof (size_t)); - - // Re-set indicator_idv and ni_test. - int j=0; - for (vector<int>::size_type i=0; i<(indicator_idv).size(); ++i) { - if (indicator_idv[i]==0) {continue;} - if(find(a.begin(), a.end(), j) == a.end()) { - indicator_idv[i]=0; - } - j++; - } - ni_test=ni_subsample; - } - } - - // Check ni_test. - if (ni_test==0 && a_mode!=15) { - error=true; - cout<<"error! number of analyzed individuals equals 0. "<<endl; - return; - } - - // Check covariates to see if they are correlated with each - // other, and to see if the intercept term is included. - // After getting ni_test. - // Add or remove covariates. - if (indicator_cvt.size()!=0) { - CheckCvt(); - } else { - vector<double> cvt_row; - cvt_row.push_back(1); - - for (vector<int>::size_type i=0; - i<(indicator_idv).size(); - ++i) { - indicator_cvt.push_back(1); - cvt.push_back(cvt_row); - } - } - - return; +void PARAM::ProcessCvtPhen() { + + // Convert indicator_pheno to indicator_idv. + int k = 1; + indicator_idv.clear(); + for (size_t i = 0; i < indicator_pheno.size(); i++) { + k = 1; + for (size_t j = 0; j < indicator_pheno[i].size(); j++) { + if (indicator_pheno[i][j] == 0) { + k = 0; + } + } + indicator_idv.push_back(k); + } + + // Remove individuals with missing covariates. + if ((indicator_cvt).size() != 0) { + for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) { + indicator_idv[i] *= indicator_cvt[i]; + } + } + + // Remove individuals with missing gxe variables. + if ((indicator_gxe).size() != 0) { + for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) { + indicator_idv[i] *= indicator_gxe[i]; + } + } + + // Remove individuals with missing residual weights. + if ((indicator_weight).size() != 0) { + for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) { + indicator_idv[i] *= indicator_weight[i]; + } + } + + // Obtain ni_test. + ni_test = 0; + for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) { + if (indicator_idv[i] == 0) { + continue; + } + ni_test++; + } + + // If subsample number is set, perform a random sub-sampling + // to determine the subsampled ids. + if (ni_subsample != 0) { + if (ni_test < ni_subsample) { + cout << "error! number of subsamples is less than number of" + << "analyzed individuals. " << endl; + } else { + + // Set up random environment. + gsl_rng_env_setup(); + gsl_rng *gsl_r; + const gsl_rng_type *gslType; + gslType = gsl_rng_default; + if (randseed < 0) { + time_t rawtime; + time(&rawtime); + tm *ptm = gmtime(&rawtime); + + randseed = (unsigned)(ptm->tm_hour % 24 * 3600 + ptm->tm_min * 60 + + ptm->tm_sec); + } + gsl_r = gsl_rng_alloc(gslType); + gsl_rng_set(gsl_r, randseed); + + // From ni_test, sub-sample ni_subsample. + vector<size_t> a, b; + for (size_t i = 0; i < ni_subsample; i++) { + a.push_back(0); + } + for (size_t i = 0; i < ni_test; i++) { + b.push_back(i); + } + + gsl_ran_choose(gsl_r, static_cast<void *>(&a[0]), ni_subsample, + static_cast<void *>(&b[0]), ni_test, sizeof(size_t)); + + // Re-set indicator_idv and ni_test. + int j = 0; + for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) { + if (indicator_idv[i] == 0) { + continue; + } + if (find(a.begin(), a.end(), j) == a.end()) { + indicator_idv[i] = 0; + } + j++; + } + ni_test = ni_subsample; + } + } + + // Check ni_test. + if (ni_test == 0 && a_mode != 15) { + error = true; + cout << "error! number of analyzed individuals equals 0. " << endl; + return; + } + + // Check covariates to see if they are correlated with each + // other, and to see if the intercept term is included. + // After getting ni_test. + // Add or remove covariates. + if (indicator_cvt.size() != 0) { + CheckCvt(); + } else { + vector<double> cvt_row; + cvt_row.push_back(1); + + for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) { + indicator_cvt.push_back(1); + cvt.push_back(cvt_row); + } + } + + return; } -void PARAM::CopyCvt (gsl_matrix *W) { - size_t ci_test=0; +void PARAM::CopyCvt(gsl_matrix *W) { + size_t ci_test = 0; - for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) { - if (indicator_idv[i]==0 || indicator_cvt[i]==0) {continue;} - for (size_t j=0; j<n_cvt; ++j) { - gsl_matrix_set (W, ci_test, j, (cvt)[i][j]); - } - ci_test++; - } + for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) { + if (indicator_idv[i] == 0 || indicator_cvt[i] == 0) { + continue; + } + for (size_t j = 0; j < n_cvt; ++j) { + gsl_matrix_set(W, ci_test, j, (cvt)[i][j]); + } + ci_test++; + } - return; + return; } -void PARAM::CopyGxe (gsl_vector *env) { - size_t ci_test=0; +void PARAM::CopyGxe(gsl_vector *env) { + size_t ci_test = 0; - for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) { - if (indicator_idv[i]==0 || indicator_gxe[i]==0) {continue;} - gsl_vector_set (env, ci_test, gxe[i]); - ci_test++; - } + for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) { + if (indicator_idv[i] == 0 || indicator_gxe[i] == 0) { + continue; + } + gsl_vector_set(env, ci_test, gxe[i]); + ci_test++; + } - return; + return; } -void PARAM::CopyWeight (gsl_vector *w) { - size_t ci_test=0; +void PARAM::CopyWeight(gsl_vector *w) { + size_t ci_test = 0; - for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) { - if (indicator_idv[i]==0 || indicator_weight[i]==0) {continue;} - gsl_vector_set (w, ci_test, weight[i]); - ci_test++; - } + for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) { + if (indicator_idv[i] == 0 || indicator_weight[i] == 0) { + continue; + } + gsl_vector_set(w, ci_test, weight[i]); + ci_test++; + } - return; + return; } // If flag=0, then use indicator_idv to load W and Y; // else, use indicator_cvt to load them. -void PARAM::CopyCvtPhen (gsl_matrix *W, gsl_vector *y, size_t flag) { - size_t ci_test=0; +void PARAM::CopyCvtPhen(gsl_matrix *W, gsl_vector *y, size_t flag) { + size_t ci_test = 0; - for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) { - if (flag==0) { - if (indicator_idv[i]==0) {continue;} - } else { - if (indicator_cvt[i]==0) {continue;} - } + for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) { + if (flag == 0) { + if (indicator_idv[i] == 0) { + continue; + } + } else { + if (indicator_cvt[i] == 0) { + continue; + } + } - gsl_vector_set (y, ci_test, (pheno)[i][0]); + gsl_vector_set(y, ci_test, (pheno)[i][0]); - for (size_t j=0; j<n_cvt; ++j) { - gsl_matrix_set (W, ci_test, j, (cvt)[i][j]); - } - ci_test++; - } + for (size_t j = 0; j < n_cvt; ++j) { + gsl_matrix_set(W, ci_test, j, (cvt)[i][j]); + } + ci_test++; + } - return; + return; } // If flag=0, then use indicator_idv to load W and Y; // else, use indicator_cvt to load them. -void PARAM::CopyCvtPhen (gsl_matrix *W, gsl_matrix *Y, size_t flag) { - size_t ci_test=0; - - for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) { - if (flag==0) { - if (indicator_idv[i]==0) {continue;} - } else { - if (indicator_cvt[i]==0) {continue;} - } - - for (size_t j=0; j<n_ph; ++j) { - gsl_matrix_set (Y, ci_test, j, (pheno)[i][j]); - } - for (size_t j=0; j<n_cvt; ++j) { - gsl_matrix_set (W, ci_test, j, (cvt)[i][j]); - } - - ci_test++; - } - - return; +void PARAM::CopyCvtPhen(gsl_matrix *W, gsl_matrix *Y, size_t flag) { + size_t ci_test = 0; + + for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) { + if (flag == 0) { + if (indicator_idv[i] == 0) { + continue; + } + } else { + if (indicator_cvt[i] == 0) { + continue; + } + } + + for (size_t j = 0; j < n_ph; ++j) { + gsl_matrix_set(Y, ci_test, j, (pheno)[i][j]); + } + for (size_t j = 0; j < n_cvt; ++j) { + gsl_matrix_set(W, ci_test, j, (cvt)[i][j]); + } + + ci_test++; + } + + return; } -void PARAM::CopyRead (gsl_vector *log_N) { - size_t ci_test=0; +void PARAM::CopyRead(gsl_vector *log_N) { + size_t ci_test = 0; - for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) { - if (indicator_idv[i]==0) {continue;} - gsl_vector_set (log_N, ci_test, log(vec_read[i]) ); - ci_test++; - } + for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) { + if (indicator_idv[i] == 0) { + continue; + } + gsl_vector_set(log_N, ci_test, log(vec_read[i])); + ci_test++; + } - return; + return; } -void PARAM::ObtainWeight (const set<string> &setSnps_beta, - map<string, double> &mapRS2wK) { +void PARAM::ObtainWeight(const set<string> &setSnps_beta, + map<string, double> &mapRS2wK) { mapRS2wK.clear(); vector<double> wsum, wcount; - for (size_t i=0; i<n_vc; i++) { + for (size_t i = 0; i < n_vc; i++) { wsum.push_back(0.0); wcount.push_back(0.0); } string rs; - if (msnpInfo.size()==0) { - for (size_t i=0; i<snpInfo.size(); i++) { - if (indicator_snp[i]==0) {continue;} - - rs=snpInfo[i].rs_number; - if ( (setSnps_beta.size()==0 || setSnps_beta.count(rs)!=0) && - (mapRS2wsnp.size()==0 || mapRS2wsnp.count(rs)!=0) && - (mapRS2wcat.size()==0 || mapRS2wcat.count(rs)!=0) && - (mapRS2cat.size()==0 || mapRS2cat.count(rs)!=0) ) { - if (mapRS2wsnp.size()!=0) { - mapRS2wK[rs]=mapRS2wsnp[rs]; - if (mapRS2cat.size()==0) { - wsum[0]+=mapRS2wsnp[rs]; - } else { - wsum[mapRS2cat[rs]]+=mapRS2wsnp[rs]; - } - wcount[0]++; - } else { - mapRS2wK[rs]=1; - } + if (msnpInfo.size() == 0) { + for (size_t i = 0; i < snpInfo.size(); i++) { + if (indicator_snp[i] == 0) { + continue; } + rs = snpInfo[i].rs_number; + if ((setSnps_beta.size() == 0 || setSnps_beta.count(rs) != 0) && + (mapRS2wsnp.size() == 0 || mapRS2wsnp.count(rs) != 0) && + (mapRS2wcat.size() == 0 || mapRS2wcat.count(rs) != 0) && + (mapRS2cat.size() == 0 || mapRS2cat.count(rs) != 0)) { + if (mapRS2wsnp.size() != 0) { + mapRS2wK[rs] = mapRS2wsnp[rs]; + if (mapRS2cat.size() == 0) { + wsum[0] += mapRS2wsnp[rs]; + } else { + wsum[mapRS2cat[rs]] += mapRS2wsnp[rs]; + } + wcount[0]++; + } else { + mapRS2wK[rs] = 1; + } + } } } else { - for (size_t t=0; t<msnpInfo.size(); t++) { - snpInfo=msnpInfo[t]; - indicator_snp=mindicator_snp[t]; - - for (size_t i=0; i<snpInfo.size(); i++) { - if (indicator_snp[i]==0) {continue;} - - rs=snpInfo[i].rs_number; - if ((setSnps_beta.size()==0 || setSnps_beta.count(rs)!=0) && - (mapRS2wsnp.size()==0 || mapRS2wsnp.count(rs)!=0) && - (mapRS2wcat.size()==0 || mapRS2wcat.count(rs)!=0) && - (mapRS2cat.size()==0 || mapRS2cat.count(rs)!=0) ) { - if (mapRS2wsnp.size()!=0) { - mapRS2wK[rs]=mapRS2wsnp[rs]; - if (mapRS2cat.size()==0) { - wsum[0]+=mapRS2wsnp[rs]; - } else { - wsum[mapRS2cat[rs]]+=mapRS2wsnp[rs]; - } - wcount[0]++; - } else { - mapRS2wK[rs]=1; - } - } - } - } - } - - if (mapRS2wsnp.size()!=0) { - for (size_t i=0; i<n_vc; i++) { - wsum[i]/=wcount[i]; - } - - for (map<string, double>::iterator it=mapRS2wK.begin(); - it!=mapRS2wK.end(); - ++it) { - if (mapRS2cat.size()==0) { - it->second/=wsum[0]; + for (size_t t = 0; t < msnpInfo.size(); t++) { + snpInfo = msnpInfo[t]; + indicator_snp = mindicator_snp[t]; + + for (size_t i = 0; i < snpInfo.size(); i++) { + if (indicator_snp[i] == 0) { + continue; + } + + rs = snpInfo[i].rs_number; + if ((setSnps_beta.size() == 0 || setSnps_beta.count(rs) != 0) && + (mapRS2wsnp.size() == 0 || mapRS2wsnp.count(rs) != 0) && + (mapRS2wcat.size() == 0 || mapRS2wcat.count(rs) != 0) && + (mapRS2cat.size() == 0 || mapRS2cat.count(rs) != 0)) { + if (mapRS2wsnp.size() != 0) { + mapRS2wK[rs] = mapRS2wsnp[rs]; + if (mapRS2cat.size() == 0) { + wsum[0] += mapRS2wsnp[rs]; + } else { + wsum[mapRS2cat[rs]] += mapRS2wsnp[rs]; + } + wcount[0]++; + } else { + mapRS2wK[rs] = 1; + } + } + } + } + } + + if (mapRS2wsnp.size() != 0) { + for (size_t i = 0; i < n_vc; i++) { + wsum[i] /= wcount[i]; + } + + for (map<string, double>::iterator it = mapRS2wK.begin(); + it != mapRS2wK.end(); ++it) { + if (mapRS2cat.size() == 0) { + it->second /= wsum[0]; } else { - it->second/=wsum[mapRS2cat[it->first]]; + it->second /= wsum[mapRS2cat[it->first]]; } } } @@ -2201,54 +2284,52 @@ void PARAM::ObtainWeight (const set<string> &setSnps_beta, // If pve_flag=0 then do not change pve; pve_flag==1, then change pve // to 0 if pve < 0 and pve to 1 if pve > 1. -void PARAM::UpdateWeight (const size_t pve_flag, - const map<string, double> &mapRS2wK, - const size_t ni_test, const gsl_vector *ns, - map<string, double> &mapRS2wA) { +void PARAM::UpdateWeight(const size_t pve_flag, + const map<string, double> &mapRS2wK, + const size_t ni_test, const gsl_vector *ns, + map<string, double> &mapRS2wA) { double d; vector<double> wsum, wcount; - for (size_t i=0; i<n_vc; i++) { + for (size_t i = 0; i < n_vc; i++) { wsum.push_back(0.0); wcount.push_back(0.0); } - for (map<string, double>::const_iterator it=mapRS2wK.begin(); - it!=mapRS2wK.end(); - ++it) { - d=1; - for (size_t i=0; i<n_vc; i++) { - if (v_pve[i]>=1 && pve_flag==1) { - d+=(double)ni_test/gsl_vector_get(ns, i)*mapRS2wcat[it->first][i]; - } else if (v_pve[i]<=0 && pve_flag==1) { - d+=0; + for (map<string, double>::const_iterator it = mapRS2wK.begin(); + it != mapRS2wK.end(); ++it) { + d = 1; + for (size_t i = 0; i < n_vc; i++) { + if (v_pve[i] >= 1 && pve_flag == 1) { + d += (double)ni_test / gsl_vector_get(ns, i) * mapRS2wcat[it->first][i]; + } else if (v_pve[i] <= 0 && pve_flag == 1) { + d += 0; } else { - d+=(double)ni_test/gsl_vector_get(ns, i)* - mapRS2wcat[it->first][i]*v_pve[i]; + d += (double)ni_test / gsl_vector_get(ns, i) * + mapRS2wcat[it->first][i] * v_pve[i]; } } - mapRS2wA[it->first]=1/(d*d); + mapRS2wA[it->first] = 1 / (d * d); - if (mapRS2cat.size()==0) { - wsum[0]+=mapRS2wA[it->first]; + if (mapRS2cat.size() == 0) { + wsum[0] += mapRS2wA[it->first]; wcount[0]++; } else { - wsum[mapRS2cat[it->first]]+=mapRS2wA[it->first]; + wsum[mapRS2cat[it->first]] += mapRS2wA[it->first]; wcount[mapRS2cat[it->first]]++; } } - for (size_t i=0; i<n_vc; i++) { - wsum[i]/=wcount[i]; + for (size_t i = 0; i < n_vc; i++) { + wsum[i] /= wcount[i]; } - for (map<string, double>::iterator it=mapRS2wA.begin(); - it!=mapRS2wA.end(); - ++it) { - if (mapRS2cat.size()==0) { - it->second/=wsum[0]; + for (map<string, double>::iterator it = mapRS2wA.begin(); + it != mapRS2wA.end(); ++it) { + if (mapRS2cat.size() == 0) { + it->second /= wsum[0]; } else { - it->second/=wsum[mapRS2cat[it->first]]; + it->second /= wsum[mapRS2cat[it->first]]; } } return; @@ -2256,61 +2337,64 @@ void PARAM::UpdateWeight (const size_t pve_flag, // This function updates indicator_snp, and save z-scores and other // values into vectors. -void PARAM::UpdateSNPnZ (const map<string, double> &mapRS2wA, - const map<string, string> &mapRS2A1, - const map<string, double> &mapRS2z, - gsl_vector *w, gsl_vector *z, - vector<size_t> &vec_cat) { - gsl_vector_set_zero (w); - gsl_vector_set_zero (z); +void PARAM::UpdateSNPnZ(const map<string, double> &mapRS2wA, + const map<string, string> &mapRS2A1, + const map<string, double> &mapRS2z, gsl_vector *w, + gsl_vector *z, vector<size_t> &vec_cat) { + gsl_vector_set_zero(w); + gsl_vector_set_zero(z); vec_cat.clear(); string rs, a1; - size_t c=0; - if (msnpInfo.size()==0) { - for (size_t i=0; i<snpInfo.size(); i++) { - if (indicator_snp[i]==0) {continue;} - - rs=snpInfo[i].rs_number; - a1=snpInfo[i].a_minor; - - if (mapRS2wA.count(rs)!=0) { - if (a1==mapRS2A1.at(rs)) { - gsl_vector_set (z, c, mapRS2z.at(rs) ); - } else { - gsl_vector_set (z, c, -1*mapRS2z.at(rs) ); - } - vec_cat.push_back(mapRS2cat.at(rs) ); - gsl_vector_set (w, c, mapRS2wA.at(rs) ); - - c++; - } else { - indicator_snp[i]=0; + size_t c = 0; + if (msnpInfo.size() == 0) { + for (size_t i = 0; i < snpInfo.size(); i++) { + if (indicator_snp[i] == 0) { + continue; } - } - } else { - for (size_t t=0; t<msnpInfo.size(); t++) { - snpInfo=msnpInfo[t]; - for (size_t i=0; i<snpInfo.size(); i++) { - if (mindicator_snp[t][i]==0) {continue;} + rs = snpInfo[i].rs_number; + a1 = snpInfo[i].a_minor; - rs=snpInfo[i].rs_number; - a1=snpInfo[i].a_minor; + if (mapRS2wA.count(rs) != 0) { + if (a1 == mapRS2A1.at(rs)) { + gsl_vector_set(z, c, mapRS2z.at(rs)); + } else { + gsl_vector_set(z, c, -1 * mapRS2z.at(rs)); + } + vec_cat.push_back(mapRS2cat.at(rs)); + gsl_vector_set(w, c, mapRS2wA.at(rs)); - if (mapRS2wA.count(rs)!=0) { - if (a1==mapRS2A1.at(rs)) { - gsl_vector_set (z, c, mapRS2z.at(rs) ); - } else { - gsl_vector_set (z, c, -1*mapRS2z.at(rs) ); - } - vec_cat.push_back(mapRS2cat.at(rs) ); - gsl_vector_set (w, c, mapRS2wA.at(rs) ); + c++; + } else { + indicator_snp[i] = 0; + } + } + } else { + for (size_t t = 0; t < msnpInfo.size(); t++) { + snpInfo = msnpInfo[t]; + + for (size_t i = 0; i < snpInfo.size(); i++) { + if (mindicator_snp[t][i] == 0) { + continue; + } + + rs = snpInfo[i].rs_number; + a1 = snpInfo[i].a_minor; + + if (mapRS2wA.count(rs) != 0) { + if (a1 == mapRS2A1.at(rs)) { + gsl_vector_set(z, c, mapRS2z.at(rs)); + } else { + gsl_vector_set(z, c, -1 * mapRS2z.at(rs)); + } + vec_cat.push_back(mapRS2cat.at(rs)); + gsl_vector_set(w, c, mapRS2wA.at(rs)); - c++; - } else { - mindicator_snp[t][i]=0; - } + c++; + } else { + mindicator_snp[t][i] = 0; + } } } } @@ -2320,30 +2404,34 @@ void PARAM::UpdateSNPnZ (const map<string, double> &mapRS2wA, // This function updates indicator_snp, and save z-scores and other // values into vectors. -void PARAM::UpdateSNP (const map<string, double> &mapRS2wA) { +void PARAM::UpdateSNP(const map<string, double> &mapRS2wA) { string rs; - if (msnpInfo.size()==0) { - for (size_t i=0; i<snpInfo.size(); i++) { - if (indicator_snp[i]==0) {continue;} + if (msnpInfo.size() == 0) { + for (size_t i = 0; i < snpInfo.size(); i++) { + if (indicator_snp[i] == 0) { + continue; + } - rs=snpInfo[i].rs_number; + rs = snpInfo[i].rs_number; - if (mapRS2wA.count(rs)==0) { - indicator_snp[i]=0; + if (mapRS2wA.count(rs) == 0) { + indicator_snp[i] = 0; } } } else { - for (size_t t=0; t<msnpInfo.size(); t++) { - snpInfo=msnpInfo[t]; + for (size_t t = 0; t < msnpInfo.size(); t++) { + snpInfo = msnpInfo[t]; - for (size_t i=0; i<mindicator_snp[t].size(); i++) { - if (mindicator_snp[t][i]==0) {continue;} + for (size_t i = 0; i < mindicator_snp[t].size(); i++) { + if (mindicator_snp[t][i] == 0) { + continue; + } - rs=snpInfo[i].rs_number; + rs = snpInfo[i].rs_number; - if (mapRS2wA.count(rs)==0) { - mindicator_snp[t][i]=0; - } + if (mapRS2wA.count(rs) == 0) { + mindicator_snp[t][i] = 0; + } } } } |