aboutsummaryrefslogtreecommitdiff
path: root/src/io.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/io.cpp')
-rw-r--r--src/io.cpp753
1 files changed, 0 insertions, 753 deletions
diff --git a/src/io.cpp b/src/io.cpp
index 6be01fd..1d75207 100644
--- a/src/io.cpp
+++ b/src/io.cpp
@@ -2274,759 +2274,6 @@ bool ReadFile_gene(const string &file_gene, vector<double> &vec_read,
return true;
}
-// WJA Added
-// Read Oxford sample file.
-bool ReadFile_sample(const string &file_sample,
- vector<vector<int>> &indicator_pheno,
- vector<vector<double>> &pheno,
- const vector<size_t> &p_column, vector<int> &indicator_cvt,
- vector<vector<double>> &cvt, size_t &n_cvt) {
- debug_msg("entered");
- indicator_pheno.clear();
- pheno.clear();
- indicator_cvt.clear();
-
- igzstream infile(file_sample.c_str(), igzstream::in);
-
- if (!infile) {
- cout << "error! fail to open sample file: " << file_sample << endl;
- return false;
- }
-
- string line;
- char *ch_ptr;
-
- string id;
- double p, d;
-
- vector<double> pheno_row;
- vector<int> ind_pheno_row;
- int flag_na = 0;
-
- size_t num_cols = 0;
- size_t num_p_in_file = 0;
- size_t num_cvt_in_file = 0;
-
- map<size_t, size_t> mapP2c;
- for (size_t i = 0; i < p_column.size(); i++) {
- mapP2c[p_column[i]] = i;
- pheno_row.push_back(-9);
- ind_pheno_row.push_back(0);
- }
-
- // Read header line1.
- if (!safeGetline(infile, line).eof()) {
- ch_ptr = strtok((char *)line.c_str(), " \t");
- if (strcmp(ch_ptr, "ID_1") != 0) {
- return false;
- }
- ch_ptr = strtok(NULL, " \t");
- if (strcmp(ch_ptr, "ID_2") != 0) {
- return false;
- }
- ch_ptr = strtok(NULL, " \t");
- if (strcmp(ch_ptr, "missing") != 0) {
- return false;
- }
- while (ch_ptr != NULL) {
- num_cols++;
- ch_ptr = strtok(NULL, " \t");
- }
- num_cols--;
- }
-
- vector<map<uint32_t, size_t>> cvt_factor_levels;
-
- char col_type[num_cols];
-
- // Read header line2.
- if (!safeGetline(infile, line).eof()) {
- ch_ptr = strtok((char *)line.c_str(), " \t");
- if (strcmp(ch_ptr, "0") != 0) {
- return false;
- }
- ch_ptr = strtok(NULL, " \t");
- if (strcmp(ch_ptr, "0") != 0) {
- return false;
- }
- ch_ptr = strtok(NULL, " \t");
- if (strcmp(ch_ptr, "0") != 0) {
- return false;
- }
- size_t it = 0;
- ch_ptr = strtok(NULL, " \t");
- if (ch_ptr != NULL)
- while (ch_ptr != NULL) {
- col_type[it++] = ch_ptr[0];
- if (ch_ptr[0] == 'D') {
- cvt_factor_levels.push_back(map<uint32_t, size_t>());
- num_cvt_in_file++;
- }
- if (ch_ptr[0] == 'C') {
- num_cvt_in_file++;
- }
- if ((ch_ptr[0] == 'P') || (ch_ptr[0] == 'B')) {
- num_p_in_file++;
- }
- ch_ptr = strtok(NULL, " \t");
- }
- }
-
- while (!safeGetline(infile, line).eof()) {
-
- ch_ptr = strtok((char *)line.c_str(), " \t");
-
- for (int it = 0; it < 3; it++) {
- ch_ptr = strtok(NULL, " \t");
- }
-
- size_t i = 0;
- size_t p_i = 0;
- size_t fac_cvt_i = 0;
-
- while (i < num_cols) {
-
- if ((col_type[i] == 'P') || (col_type[i] == 'B')) {
- if (mapP2c.count(p_i + 1) != 0) {
- if (strcmp(ch_ptr, "NA") == 0) {
- ind_pheno_row[mapP2c[p_i + 1]] = 0;
- pheno_row[mapP2c[p_i + 1]] = -9;
- } else {
- p = atof(ch_ptr);
- ind_pheno_row[mapP2c[p_i + 1]] = 1;
- pheno_row[mapP2c[p_i + 1]] = p;
- }
- }
- p_i++;
- }
- if (col_type[i] == 'D') {
-
- // NOTE THIS DOES NOT CHECK TO BE SURE LEVEL
- // IS INTEGRAL i.e for atoi error.
- if (strcmp(ch_ptr, "NA") != 0) {
- uint32_t level = atoi(ch_ptr);
- if (cvt_factor_levels[fac_cvt_i].count(level) == 0) {
- cvt_factor_levels[fac_cvt_i][level] =
- cvt_factor_levels[fac_cvt_i].size();
- }
- }
- fac_cvt_i++;
- }
-
- ch_ptr = strtok(NULL, " \t");
- i++;
- }
-
- indicator_pheno.push_back(ind_pheno_row);
- pheno.push_back(pheno_row);
- }
-
- // Close and reopen the file.
- infile.close();
- infile.clear();
-
- if (num_cvt_in_file > 0) {
- igzstream infile2(file_sample.c_str(), igzstream::in);
-
- if (!infile2) {
- cout << "error! fail to open sample file: " << file_sample << endl;
- return false;
- }
-
- // Skip header.
- safeGetline(infile2, line);
- safeGetline(infile2, line);
-
- // Pull in the covariates now we now the number of
- // factor levels.
- while (!safeGetline(infile2, line).eof()) {
-
- vector<double> v_d;
- flag_na = 0;
- ch_ptr = strtok((char *)line.c_str(), " \t");
-
- for (int it = 0; it < 3; it++) {
- ch_ptr = strtok(NULL, " \t");
- }
-
- size_t i = 0;
- size_t fac_cvt_i = 0;
- size_t num_fac_levels;
- while (i < num_cols) {
-
- if (col_type[i] == 'C') {
- if (strcmp(ch_ptr, "NA") == 0) {
- flag_na = 1;
- d = -9;
- } else {
- d = atof(ch_ptr);
- }
-
- v_d.push_back(d);
- }
-
- if (col_type[i] == 'D') {
-
- // NOTE THIS DOES NOT CHECK TO BE SURE
- // LEVEL IS INTEGRAL i.e for atoi error.
- num_fac_levels = cvt_factor_levels[fac_cvt_i].size();
- if (num_fac_levels > 1) {
- if (strcmp(ch_ptr, "NA") == 0) {
- flag_na = 1;
- for (size_t it = 0; it < num_fac_levels - 1; it++) {
- v_d.push_back(-9);
- }
- } else {
- uint32_t level = atoi(ch_ptr);
- for (size_t it = 0; it < num_fac_levels - 1; it++) {
- cvt_factor_levels[fac_cvt_i][level] == it + 1
- ? v_d.push_back(1.0)
- : v_d.push_back(0.0);
- }
- }
- }
- fac_cvt_i++;
- }
-
- ch_ptr = strtok(NULL, " \t");
- i++;
- }
-
- if (flag_na == 0) {
- indicator_cvt.push_back(1);
- } else {
- indicator_cvt.push_back(0);
- }
- cvt.push_back(v_d);
- }
-
- if (indicator_cvt.empty()) {
- n_cvt = 0;
- } else {
- flag_na = 0;
- for (vector<int>::size_type i = 0; i < indicator_cvt.size(); ++i) {
- if (indicator_cvt[i] == 0) {
- continue;
- }
-
- if (flag_na == 0) {
- flag_na = 1;
- n_cvt = cvt[i].size();
- }
- if (flag_na != 0 && n_cvt != cvt[i].size()) {
- cout << "error! number of covariates in row " << i
- << " do not match other rows." << endl;
- return false;
- }
- }
- }
-
- infile2.close();
- infile2.clear();
- }
- return true;
-}
-
-// WJA Added.
-// Read bgen file, the first time.
-bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps,
- const gsl_matrix *W, vector<int> &indicator_idv,
- vector<int> &indicator_snp, vector<SNPINFO> &snpInfo,
- const double &maf_level, const double &miss_level,
- const double &hwe_level, const double &r2_level,
- size_t &ns_test) {
-
- debug_msg("entered");
- indicator_snp.clear();
-
- ifstream infile(file_bgen.c_str(), ios::binary);
- if (!infile) {
- cout << "error reading bgen file:" << file_bgen << endl;
- return false;
- }
-
- gsl_vector *genotype = gsl_vector_alloc(W->size1);
- gsl_vector *genotype_miss = gsl_vector_alloc(W->size1);
- gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
- gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
- gsl_vector *Wtx = gsl_vector_alloc(W->size2);
- gsl_vector *WtWiWtx = gsl_vector_alloc(W->size2);
- gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
-
- gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
- int sig;
- LUDecomp(WtW, pmt, &sig);
- LUInvert(WtW, pmt, WtWi);
-
- // Read in header.
- uint32_t bgen_snp_block_offset;
- uint32_t bgen_header_length;
- uint32_t bgen_nsamples;
- uint32_t bgen_nsnps;
- uint32_t bgen_flags;
- infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4);
- infile.read(reinterpret_cast<char *>(&bgen_header_length), 4);
- bgen_snp_block_offset -= 4;
- infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4);
- bgen_snp_block_offset -= 4;
- infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4);
- bgen_snp_block_offset -= 4;
- infile.ignore(4 + bgen_header_length - 20);
- bgen_snp_block_offset -= 4 + bgen_header_length - 20;
- infile.read(reinterpret_cast<char *>(&bgen_flags), 4);
- bgen_snp_block_offset -= 4;
- bool CompressedSNPBlocks = bgen_flags & 0x1;
- bool LongIds = bgen_flags & 0x4;
-
- if (!LongIds) {
- return false;
- }
-
- infile.ignore(bgen_snp_block_offset);
-
- ns_test = 0;
-
- size_t ns_total = static_cast<size_t>(bgen_nsnps);
-
- snpInfo.clear();
- string rs;
- long int b_pos;
- string chr;
- string major;
- string minor;
- string id;
-
- double v_x, v_w;
- int c_idv = 0;
-
- double maf, geno, geno_old;
- size_t n_miss;
- size_t n_0, n_1, n_2;
- int flag_poly;
-
- double bgen_geno_prob_AA, bgen_geno_prob_AB;
- double bgen_geno_prob_BB, bgen_geno_prob_non_miss;
-
- // Total number of samples in phenotype file.
- size_t ni_total = indicator_idv.size();
-
- // Number of samples to use in test.
- size_t ni_test = 0;
-
- uint32_t bgen_N;
- uint16_t bgen_LS;
- uint16_t bgen_LR;
- uint16_t bgen_LC;
- uint32_t bgen_SNP_pos;
- uint32_t bgen_LA;
- std::string bgen_A_allele;
- uint32_t bgen_LB;
- std::string bgen_B_allele;
- uint32_t bgen_P;
- size_t unzipped_data_size;
-
- for (size_t i = 0; i < ni_total; ++i) {
- ni_test += indicator_idv[i];
- }
-
- for (size_t t = 0; t < ns_total; ++t) {
-
- id.clear();
- rs.clear();
- chr.clear();
- bgen_A_allele.clear();
- bgen_B_allele.clear();
-
- infile.read(reinterpret_cast<char *>(&bgen_N), 4);
- infile.read(reinterpret_cast<char *>(&bgen_LS), 2);
-
- id.resize(bgen_LS);
- infile.read(&id[0], bgen_LS);
-
- infile.read(reinterpret_cast<char *>(&bgen_LR), 2);
- rs.resize(bgen_LR);
- infile.read(&rs[0], bgen_LR);
-
- infile.read(reinterpret_cast<char *>(&bgen_LC), 2);
- chr.resize(bgen_LC);
- infile.read(&chr[0], bgen_LC);
-
- infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4);
-
- infile.read(reinterpret_cast<char *>(&bgen_LA), 4);
- bgen_A_allele.resize(bgen_LA);
- infile.read(&bgen_A_allele[0], bgen_LA);
-
- infile.read(reinterpret_cast<char *>(&bgen_LB), 4);
- bgen_B_allele.resize(bgen_LB);
- infile.read(&bgen_B_allele[0], bgen_LB);
-
- // Should we switch according to MAF?
- minor = bgen_B_allele;
- major = bgen_A_allele;
- b_pos = static_cast<long int>(bgen_SNP_pos);
-
- uint16_t unzipped_data[3 * bgen_N];
-
- if (setSnps.size() != 0 && setSnps.count(rs) == 0) {
- SNPINFO sInfo = {
- "-9", rs, -9, -9, minor, major, static_cast<size_t>(-9),
- -9, (long int)-9};
-
- snpInfo.push_back(sInfo);
- indicator_snp.push_back(0);
- if (CompressedSNPBlocks)
- infile.read(reinterpret_cast<char *>(&bgen_P), 4);
- else
- bgen_P = 6 * bgen_N;
-
- infile.ignore(static_cast<size_t>(bgen_P));
-
- continue;
- }
-
- if (CompressedSNPBlocks) {
- infile.read(reinterpret_cast<char *>(&bgen_P), 4);
- uint8_t zipped_data[bgen_P];
-
- unzipped_data_size = 6 * bgen_N;
-
- infile.read(reinterpret_cast<char *>(zipped_data), bgen_P);
- int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data),
- reinterpret_cast<uLongf *>(&unzipped_data_size),
- reinterpret_cast<Bytef *>(zipped_data),
- static_cast<uLong>(bgen_P));
- assert(result == Z_OK);
-
- } else {
- bgen_P = 6 * bgen_N;
- infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P);
- }
-
- maf = 0;
- n_miss = 0;
- flag_poly = 0;
- geno_old = -9;
- n_0 = 0;
- n_1 = 0;
- n_2 = 0;
- c_idv = 0;
- gsl_vector_set_zero(genotype_miss);
- for (size_t i = 0; i < bgen_N; ++i) {
-
- // CHECK this set correctly!
- if (indicator_idv[i] == 0) {
- continue;
- }
-
- bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0;
- bgen_geno_prob_AB =
- static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0;
- bgen_geno_prob_BB =
- static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0;
- bgen_geno_prob_non_miss =
- bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB;
-
- // CHECK 0.1 OK.
- if (bgen_geno_prob_non_miss < 0.9) {
- gsl_vector_set(genotype_miss, c_idv, 1);
- n_miss++;
- c_idv++;
- continue;
- }
-
- bgen_geno_prob_AA /= bgen_geno_prob_non_miss;
- bgen_geno_prob_AB /= bgen_geno_prob_non_miss;
- bgen_geno_prob_BB /= bgen_geno_prob_non_miss;
-
- geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB;
- if (geno >= 0 && geno <= 0.5) {
- n_0++;
- }
- if (geno > 0.5 && geno < 1.5) {
- n_1++;
- }
- if (geno >= 1.5 && geno <= 2.0) {
- n_2++;
- }
-
- gsl_vector_set(genotype, c_idv, geno);
-
- // CHECK WHAT THIS DOES.
- if (flag_poly == 0) {
- geno_old = geno;
- flag_poly = 2;
- }
- if (flag_poly == 2 && geno != geno_old) {
- flag_poly = 1;
- }
-
- maf += geno;
-
- c_idv++;
- }
-
- maf /= 2.0 * static_cast<double>(ni_test - n_miss);
-
- SNPINFO sInfo = {chr, rs, -9, b_pos,
- minor, major, n_miss, (double)n_miss / (double)ni_test,
- maf};
- snpInfo.push_back(sInfo);
-
- if ((double)n_miss / (double)ni_test > miss_level) {
- indicator_snp.push_back(0);
- continue;
- }
-
- if ((maf < maf_level || maf > (1.0 - maf_level)) && maf_level != -1) {
- indicator_snp.push_back(0);
- continue;
- }
-
- if (flag_poly != 1) {
- indicator_snp.push_back(0);
- continue;
- }
-
- if (hwe_level != 0 && maf_level != -1) {
- if (CalcHWE(n_0, n_2, n_1) < hwe_level) {
- indicator_snp.push_back(0);
- continue;
- }
- }
-
- // Filter SNP if it is correlated with W
- // unless W has only one column, of 1s.
- for (size_t i = 0; i < genotype->size; ++i) {
- if (gsl_vector_get(genotype_miss, i) == 1) {
- geno = maf * 2.0;
- gsl_vector_set(genotype, i, geno);
- }
- }
-
- gsl_blas_dgemv(CblasTrans, 1.0, W, genotype, 0.0, Wtx);
- gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
- gsl_blas_ddot(genotype, genotype, &v_x);
- gsl_blas_ddot(Wtx, WtWiWtx, &v_w);
-
- if (W->size2 != 1 && v_w / v_x >= r2_level) {
- indicator_snp.push_back(0);
- continue;
- }
-
- indicator_snp.push_back(1);
- ns_test++;
- }
-
- return true;
-}
-
-// Read oxford genotype file and calculate kinship matrix.
-bool bgenKin(const string &file_oxford, vector<int> &indicator_snp,
- const int k_mode, const int display_pace, gsl_matrix *matrix_kin) {
- debug_msg("entered");
- string file_bgen = file_oxford;
- ifstream infile(file_bgen.c_str(), ios::binary);
- if (!infile) {
- cout << "error reading bgen file:" << file_bgen << endl;
- return false;
- }
-
- // Read in header.
- uint32_t bgen_snp_block_offset;
- uint32_t bgen_header_length;
- uint32_t bgen_nsamples;
- uint32_t bgen_nsnps;
- uint32_t bgen_flags;
- infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4);
- infile.read(reinterpret_cast<char *>(&bgen_header_length), 4);
- bgen_snp_block_offset -= 4;
- infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4);
- bgen_snp_block_offset -= 4;
- infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4);
- bgen_snp_block_offset -= 4;
- infile.ignore(4 + bgen_header_length - 20);
- bgen_snp_block_offset -= 4 + bgen_header_length - 20;
- infile.read(reinterpret_cast<char *>(&bgen_flags), 4);
- bgen_snp_block_offset -= 4;
- bool CompressedSNPBlocks = bgen_flags & 0x1;
-
- infile.ignore(bgen_snp_block_offset);
-
- double bgen_geno_prob_AA, bgen_geno_prob_AB;
- double bgen_geno_prob_BB, bgen_geno_prob_non_miss;
-
- uint32_t bgen_N;
- uint16_t bgen_LS;
- uint16_t bgen_LR;
- uint16_t bgen_LC;
- uint32_t bgen_SNP_pos;
- uint32_t bgen_LA;
- std::string bgen_A_allele;
- uint32_t bgen_LB;
- std::string bgen_B_allele;
- uint32_t bgen_P;
- size_t unzipped_data_size;
- string id;
- string rs;
- string chr;
- double genotype;
-
- size_t n_miss;
- double d, geno_mean, geno_var;
-
- size_t ni_total = matrix_kin->size1;
- gsl_vector *geno = gsl_vector_alloc(ni_total);
- gsl_vector *geno_miss = gsl_vector_alloc(ni_total);
-
- size_t ns_test = 0;
- for (size_t t = 0; t < indicator_snp.size(); ++t) {
-
- if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
- ProgressBar("Reading bgen SNPs ", t, indicator_snp.size() - 1);
- }
-
- id.clear();
- rs.clear();
- chr.clear();
- bgen_A_allele.clear();
- bgen_B_allele.clear();
-
- infile.read(reinterpret_cast<char *>(&bgen_N), 4);
- infile.read(reinterpret_cast<char *>(&bgen_LS), 2);
-
- id.resize(bgen_LS);
- infile.read(&id[0], bgen_LS);
-
- infile.read(reinterpret_cast<char *>(&bgen_LR), 2);
- rs.resize(bgen_LR);
- infile.read(&rs[0], bgen_LR);
-
- infile.read(reinterpret_cast<char *>(&bgen_LC), 2);
- chr.resize(bgen_LC);
- infile.read(&chr[0], bgen_LC);
-
- infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4);
-
- infile.read(reinterpret_cast<char *>(&bgen_LA), 4);
- bgen_A_allele.resize(bgen_LA);
- infile.read(&bgen_A_allele[0], bgen_LA);
-
- infile.read(reinterpret_cast<char *>(&bgen_LB), 4);
- bgen_B_allele.resize(bgen_LB);
- infile.read(&bgen_B_allele[0], bgen_LB);
-
- uint16_t unzipped_data[3 * bgen_N];
-
- if (indicator_snp[t] == 0) {
- if (CompressedSNPBlocks)
- infile.read(reinterpret_cast<char *>(&bgen_P), 4);
- else
- bgen_P = 6 * bgen_N;
-
- infile.ignore(static_cast<size_t>(bgen_P));
-
- continue;
- }
-
- if (CompressedSNPBlocks) {
- infile.read(reinterpret_cast<char *>(&bgen_P), 4);
- uint8_t zipped_data[bgen_P];
-
- unzipped_data_size = 6 * bgen_N;
-
- infile.read(reinterpret_cast<char *>(zipped_data), bgen_P);
-
- int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data),
- reinterpret_cast<uLongf *>(&unzipped_data_size),
- reinterpret_cast<Bytef *>(zipped_data),
- static_cast<uLong>(bgen_P));
- assert(result == Z_OK);
-
- } else {
-
- bgen_P = 6 * bgen_N;
- infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P);
- }
-
- geno_mean = 0.0;
- n_miss = 0;
- geno_var = 0.0;
- gsl_vector_set_all(geno_miss, 0);
-
- for (size_t i = 0; i < bgen_N; ++i) {
-
- bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0;
- bgen_geno_prob_AB =
- static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0;
- bgen_geno_prob_BB =
- static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0;
- // WJA
- bgen_geno_prob_non_miss =
- bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB;
- if (bgen_geno_prob_non_miss < 0.9) {
- gsl_vector_set(geno_miss, i, 0.0);
- n_miss++;
- } else {
-
- bgen_geno_prob_AA /= bgen_geno_prob_non_miss;
- bgen_geno_prob_AB /= bgen_geno_prob_non_miss;
- bgen_geno_prob_BB /= bgen_geno_prob_non_miss;
-
- genotype = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB;
-
- gsl_vector_set(geno, i, genotype);
- gsl_vector_set(geno_miss, i, 1.0);
- geno_mean += genotype;
- geno_var += genotype * genotype;
- }
- }
-
- geno_mean /= (double)(ni_total - n_miss);
- geno_var += geno_mean * geno_mean * (double)n_miss;
- geno_var /= (double)ni_total;
- geno_var -= geno_mean * geno_mean;
-
- for (size_t i = 0; i < ni_total; ++i) {
- if (gsl_vector_get(geno_miss, i) == 0) {
- gsl_vector_set(geno, i, geno_mean);
- }
- }
-
- gsl_vector_add_constant(geno, -1.0 * geno_mean);
-
- if (geno_var != 0) {
- if (k_mode == 1) {
- gsl_blas_dsyr(CblasUpper, 1.0, geno, matrix_kin);
- } else if (k_mode == 2) {
- gsl_blas_dsyr(CblasUpper, 1.0 / geno_var, geno, matrix_kin);
- } else {
- cout << "Unknown kinship mode." << endl;
- }
- }
-
- ns_test++;
- }
- cout << endl;
-
- gsl_matrix_scale(matrix_kin, 1.0 / (double)ns_test);
-
- for (size_t i = 0; i < ni_total; ++i) {
- for (size_t j = 0; j < i; ++j) {
- d = gsl_matrix_get(matrix_kin, j, i);
- gsl_matrix_set(matrix_kin, i, j, d);
- }
- }
-
- gsl_vector_free(geno);
- gsl_vector_free(geno_miss);
-
- infile.close();
- infile.clear();
-
- return true;
-}
-
// Read header to determine which column contains which item.
bool ReadHeader_io(const string &line, HEADER &header) {
debug_msg("entered");