diff options
author | Pjotr Prins | 2017-08-02 08:46:58 +0000 |
---|---|---|
committer | Pjotr Prins | 2017-08-02 08:46:58 +0000 |
commit | 3935ba39d30666dd7d4a831155631847c77b70c4 (patch) | |
tree | c45fc682b473618a219e324d5c85b5e1f9361d0c /src/prdt.cpp | |
parent | 84360c191f418bf8682b35e0c8235fcc3bd19a06 (diff) | |
download | pangemma-3935ba39d30666dd7d4a831155631847c77b70c4.tar.gz |
Massive patch using LLVM coding style. It was generated with:
clang-format -style=LLVM -i *.cpp *.h
Please set your editor to replace tabs with spaces and use indentation of 2 spaces.
Diffstat (limited to 'src/prdt.cpp')
-rw-r--r-- | src/prdt.cpp | 988 |
1 files changed, 499 insertions, 489 deletions
diff --git a/src/prdt.cpp b/src/prdt.cpp index b29d150..3e7c004 100644 --- a/src/prdt.cpp +++ b/src/prdt.cpp @@ -16,527 +16,537 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <iostream> -#include <sstream> +#include "gsl/gsl_blas.h" +#include "gsl/gsl_linalg.h" +#include "gsl/gsl_matrix.h" +#include "gsl/gsl_vector.h" +#include <bitset> +#include <cmath> #include <fstream> -#include <string> #include <iomanip> -#include <bitset> -#include <vector> +#include <iostream> +#include <sstream> #include <stdio.h> #include <stdlib.h> -#include <cmath> -#include "gsl/gsl_vector.h" -#include "gsl/gsl_matrix.h" -#include "gsl/gsl_linalg.h" -#include "gsl/gsl_blas.h" +#include <string> +#include <vector> -#include "io.h" -#include "lapack.h" #include "gzstream.h" #include "io.h" -#include "prdt.h" +#include "io.h" +#include "lapack.h" #include "mathfunc.h" +#include "prdt.h" using namespace std; -void PRDT::CopyFromParam (PARAM &cPar) { - a_mode=cPar.a_mode; - d_pace=cPar.d_pace; +void PRDT::CopyFromParam(PARAM &cPar) { + a_mode = cPar.a_mode; + d_pace = cPar.d_pace; - file_bfile=cPar.file_bfile; - file_geno=cPar.file_geno; - file_out=cPar.file_out; - path_out=cPar.path_out; + file_bfile = cPar.file_bfile; + file_geno = cPar.file_geno; + file_out = cPar.file_out; + path_out = cPar.path_out; - indicator_pheno=cPar.indicator_pheno; - indicator_cvt=cPar.indicator_cvt; - indicator_idv=cPar.indicator_idv; + indicator_pheno = cPar.indicator_pheno; + indicator_cvt = cPar.indicator_cvt; + indicator_idv = cPar.indicator_idv; - snpInfo=cPar.snpInfo; - mapRS2est=cPar.mapRS2est; + snpInfo = cPar.snpInfo; + mapRS2est = cPar.mapRS2est; - time_eigen=0; + time_eigen = 0; - n_ph=cPar.n_ph; - np_obs=cPar.np_obs; - np_miss=cPar.np_miss; - ns_total=cPar.ns_total; - ns_test=0; + n_ph = cPar.n_ph; + np_obs = cPar.np_obs; + np_miss = cPar.np_miss; + ns_total = cPar.ns_total; + ns_test = 0; - return; + return; } -void PRDT::CopyToParam (PARAM &cPar) { - cPar.ns_test=ns_test; - cPar.time_eigen=time_eigen; +void PRDT::CopyToParam(PARAM &cPar) { + cPar.ns_test = ns_test; + cPar.time_eigen = time_eigen; - return; + return; } -void PRDT::WriteFiles (gsl_vector *y_prdt) { - string file_str; - file_str=path_out+"/"+file_out; - file_str+="."; - file_str+="prdt"; - file_str+=".txt"; - - ofstream outfile (file_str.c_str(), ofstream::out); - if (!outfile) { - cout<<"error writing file: "<<file_str.c_str()<<endl; - return; - } - - size_t ci_test=0; - for (size_t i=0; i<indicator_idv.size(); i++) { - if (indicator_idv[i]==1) { - outfile<<"NA"<<endl; - } else { - outfile<<gsl_vector_get (y_prdt, ci_test)<<endl; - ci_test++; - } - } - - outfile.close(); - outfile.clear(); - return; +void PRDT::WriteFiles(gsl_vector *y_prdt) { + string file_str; + file_str = path_out + "/" + file_out; + file_str += "."; + file_str += "prdt"; + file_str += ".txt"; + + ofstream outfile(file_str.c_str(), ofstream::out); + if (!outfile) { + cout << "error writing file: " << file_str.c_str() << endl; + return; + } + + size_t ci_test = 0; + for (size_t i = 0; i < indicator_idv.size(); i++) { + if (indicator_idv[i] == 1) { + outfile << "NA" << endl; + } else { + outfile << gsl_vector_get(y_prdt, ci_test) << endl; + ci_test++; + } + } + + outfile.close(); + outfile.clear(); + return; } -void PRDT::WriteFiles (gsl_matrix *Y_full) { - string file_str; - file_str=path_out+"/"+file_out; - file_str+=".prdt.txt"; - - ofstream outfile (file_str.c_str(), ofstream::out); - if (!outfile) { - cout<<"error writing file: "<<file_str.c_str()<<endl; - return; - } - - size_t ci_test=0; - for (size_t i=0; i<indicator_cvt.size(); i++) { - if (indicator_cvt[i]==0) { - outfile<<"NA"<<endl; - } else { - for (size_t j=0; j<Y_full->size2; j++) { - outfile << gsl_matrix_get(Y_full,ci_test,j) << - "\t"; - } - outfile<<endl; - ci_test++; - } - } - - outfile.close(); - outfile.clear(); - return; +void PRDT::WriteFiles(gsl_matrix *Y_full) { + string file_str; + file_str = path_out + "/" + file_out; + file_str += ".prdt.txt"; + + ofstream outfile(file_str.c_str(), ofstream::out); + if (!outfile) { + cout << "error writing file: " << file_str.c_str() << endl; + return; + } + + size_t ci_test = 0; + for (size_t i = 0; i < indicator_cvt.size(); i++) { + if (indicator_cvt[i] == 0) { + outfile << "NA" << endl; + } else { + for (size_t j = 0; j < Y_full->size2; j++) { + outfile << gsl_matrix_get(Y_full, ci_test, j) << "\t"; + } + outfile << endl; + ci_test++; + } + } + + outfile.close(); + outfile.clear(); + return; } -void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) { - size_t ni_test=u_hat->size, ni_total=G->size1; - - gsl_matrix *Goo=gsl_matrix_alloc (ni_test, ni_test); - gsl_matrix *Gfo=gsl_matrix_alloc (ni_total-ni_test, ni_test); - gsl_matrix *U=gsl_matrix_alloc (ni_test, ni_test); - gsl_vector *eval=gsl_vector_alloc (ni_test); - gsl_vector *Utu=gsl_vector_alloc (ni_test); - gsl_vector *w=gsl_vector_alloc (ni_total); - gsl_permutation *pmt=gsl_permutation_alloc (ni_test); - - //center matrix G based on indicator_idv - for (size_t i=0; i<ni_total; i++) { - gsl_vector_set(w, i, indicator_idv[i]); - } - CenterMatrix(G, w); - - //obtain Koo and Kfo - size_t o_i=0, o_j=0; - double d; - for (size_t i=0; i<indicator_idv.size(); i++) { - o_j=0; - for (size_t j=0; j<indicator_idv.size(); j++) { - d=gsl_matrix_get(G, i, j); - if (indicator_idv[i]==1 && indicator_idv[j]==1) { - gsl_matrix_set(Goo, o_i, o_j, d); - } - if (indicator_idv[i]==0 && indicator_idv[j]==1) { - gsl_matrix_set(Gfo, i-o_i, o_j, d); - } - if (indicator_idv[j]==1) {o_j++;} - } - if (indicator_idv[i]==1) {o_i++;} - } - - //matrix operations to get u_prdt - cout<<"Start Eigen-Decomposition..."<<endl; - clock_t time_start=clock(); - EigenDecomp (Goo, U, eval, 0); - for (size_t i=0; i<eval->size; i++) { - if (gsl_vector_get(eval,i)<1e-10) { - gsl_vector_set(eval, i, 0); - } - } - - time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - gsl_blas_dgemv (CblasTrans, 1.0, U, u_hat, 0.0, Utu); - for (size_t i=0; i<eval->size; i++) { - d=gsl_vector_get(eval, i); - if (d!=0) { - d=gsl_vector_get(Utu, i)/d; - gsl_vector_set(Utu, i, d); - } - } - gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu, 0.0, eval); - gsl_blas_dgemv (CblasNoTrans, 1.0, Gfo, eval, 1.0, y_prdt); - - // Free matrices. - gsl_matrix_free(Goo); - gsl_matrix_free(Gfo); - gsl_matrix_free(U); - gsl_vector_free(eval); - gsl_vector_free(Utu); - gsl_vector_free(w); - gsl_permutation_free(pmt); - - return; +void PRDT::AddBV(gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) { + size_t ni_test = u_hat->size, ni_total = G->size1; + + gsl_matrix *Goo = gsl_matrix_alloc(ni_test, ni_test); + gsl_matrix *Gfo = gsl_matrix_alloc(ni_total - ni_test, ni_test); + gsl_matrix *U = gsl_matrix_alloc(ni_test, ni_test); + gsl_vector *eval = gsl_vector_alloc(ni_test); + gsl_vector *Utu = gsl_vector_alloc(ni_test); + gsl_vector *w = gsl_vector_alloc(ni_total); + gsl_permutation *pmt = gsl_permutation_alloc(ni_test); + + // center matrix G based on indicator_idv + for (size_t i = 0; i < ni_total; i++) { + gsl_vector_set(w, i, indicator_idv[i]); + } + CenterMatrix(G, w); + + // obtain Koo and Kfo + size_t o_i = 0, o_j = 0; + double d; + for (size_t i = 0; i < indicator_idv.size(); i++) { + o_j = 0; + for (size_t j = 0; j < indicator_idv.size(); j++) { + d = gsl_matrix_get(G, i, j); + if (indicator_idv[i] == 1 && indicator_idv[j] == 1) { + gsl_matrix_set(Goo, o_i, o_j, d); + } + if (indicator_idv[i] == 0 && indicator_idv[j] == 1) { + gsl_matrix_set(Gfo, i - o_i, o_j, d); + } + if (indicator_idv[j] == 1) { + o_j++; + } + } + if (indicator_idv[i] == 1) { + o_i++; + } + } + + // matrix operations to get u_prdt + cout << "Start Eigen-Decomposition..." << endl; + clock_t time_start = clock(); + EigenDecomp(Goo, U, eval, 0); + for (size_t i = 0; i < eval->size; i++) { + if (gsl_vector_get(eval, i) < 1e-10) { + gsl_vector_set(eval, i, 0); + } + } + + time_eigen = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + gsl_blas_dgemv(CblasTrans, 1.0, U, u_hat, 0.0, Utu); + for (size_t i = 0; i < eval->size; i++) { + d = gsl_vector_get(eval, i); + if (d != 0) { + d = gsl_vector_get(Utu, i) / d; + gsl_vector_set(Utu, i, d); + } + } + gsl_blas_dgemv(CblasNoTrans, 1.0, U, Utu, 0.0, eval); + gsl_blas_dgemv(CblasNoTrans, 1.0, Gfo, eval, 1.0, y_prdt); + + // Free matrices. + gsl_matrix_free(Goo); + gsl_matrix_free(Gfo); + gsl_matrix_free(U); + gsl_vector_free(eval); + gsl_vector_free(Utu); + gsl_vector_free(w); + gsl_permutation_free(pmt); + + return; } -void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) { - igzstream infile (file_geno.c_str(), igzstream::in); - if (!infile) { - cout<<"error reading genotype file:"<<file_geno<<endl; - return; - } - - string line; - char *ch_ptr; - string rs; - - size_t n_miss, n_train_nomiss, c_phen; - double geno, x_mean, x_train_mean, effect_size; - - gsl_vector *x=gsl_vector_alloc (y_prdt->size); - gsl_vector *x_miss=gsl_vector_alloc (y_prdt->size); - - ns_test=0; - - // Start reading genotypes and analyze. - for (size_t t=0; t<ns_total; ++t) { - !safeGetline(infile, line).eof(); - if (t%d_pace==0 || t==(ns_total-1)) { - ProgressBar ("Reading SNPs ", t, ns_total-1); - } - - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - rs=ch_ptr; - ch_ptr=strtok (NULL, " , \t"); - ch_ptr=strtok (NULL, " , \t"); - - if (mapRS2est.count(rs)==0) { - continue; - } else { - effect_size=mapRS2est[rs]; - } - - x_mean=0.0; - c_phen=0; - n_miss=0; - x_train_mean=0; - n_train_nomiss=0; - - gsl_vector_set_zero(x_miss); - - for (size_t i=0; i<indicator_idv.size(); ++i) { - ch_ptr=strtok (NULL, " , \t"); - if (indicator_idv[i]==1) { - if (strcmp(ch_ptr, "NA")!=0) { - geno=atof(ch_ptr); - x_train_mean+=geno; - n_train_nomiss++; - } - } else { - if (strcmp(ch_ptr, "NA")==0) { - gsl_vector_set(x_miss, c_phen, 0.0); - n_miss++; - } else { - geno=atof(ch_ptr); - - gsl_vector_set(x, c_phen, geno); - gsl_vector_set(x_miss, c_phen, 1.0); - x_mean+=geno; - } - c_phen++; - } - } - - if (x->size==n_miss) { - cout << "snp " << rs << " has missing genotype for all " << - "individuals and will be ignored." << endl; - continue;} - - - x_mean/=(double)(x->size-n_miss); - x_train_mean/=(double)(n_train_nomiss); - - - for (size_t i=0; i<x->size; ++i) { - geno=gsl_vector_get(x, i); - if (gsl_vector_get (x_miss, i)==0) { - gsl_vector_set(x, i, x_mean-x_train_mean); - } else { - gsl_vector_set(x, i, geno-x_train_mean); - } - } - - gsl_vector_scale (x, effect_size); - gsl_vector_add (y_prdt, x); - - ns_test++; - } - cout<<endl; - - gsl_vector_free (x); - gsl_vector_free (x_miss); - - infile.close(); - infile.clear(); - - return; +void PRDT::AnalyzeBimbam(gsl_vector *y_prdt) { + igzstream infile(file_geno.c_str(), igzstream::in); + if (!infile) { + cout << "error reading genotype file:" << file_geno << endl; + return; + } + + string line; + char *ch_ptr; + string rs; + + size_t n_miss, n_train_nomiss, c_phen; + double geno, x_mean, x_train_mean, effect_size; + + gsl_vector *x = gsl_vector_alloc(y_prdt->size); + gsl_vector *x_miss = gsl_vector_alloc(y_prdt->size); + + ns_test = 0; + + // Start reading genotypes and analyze. + for (size_t t = 0; t < ns_total; ++t) { + !safeGetline(infile, line).eof(); + if (t % d_pace == 0 || t == (ns_total - 1)) { + ProgressBar("Reading SNPs ", t, ns_total - 1); + } + + ch_ptr = strtok((char *)line.c_str(), " , \t"); + rs = ch_ptr; + ch_ptr = strtok(NULL, " , \t"); + ch_ptr = strtok(NULL, " , \t"); + + if (mapRS2est.count(rs) == 0) { + continue; + } else { + effect_size = mapRS2est[rs]; + } + + x_mean = 0.0; + c_phen = 0; + n_miss = 0; + x_train_mean = 0; + n_train_nomiss = 0; + + gsl_vector_set_zero(x_miss); + + for (size_t i = 0; i < indicator_idv.size(); ++i) { + ch_ptr = strtok(NULL, " , \t"); + if (indicator_idv[i] == 1) { + if (strcmp(ch_ptr, "NA") != 0) { + geno = atof(ch_ptr); + x_train_mean += geno; + n_train_nomiss++; + } + } else { + if (strcmp(ch_ptr, "NA") == 0) { + gsl_vector_set(x_miss, c_phen, 0.0); + n_miss++; + } else { + geno = atof(ch_ptr); + + gsl_vector_set(x, c_phen, geno); + gsl_vector_set(x_miss, c_phen, 1.0); + x_mean += geno; + } + c_phen++; + } + } + + if (x->size == n_miss) { + cout << "snp " << rs << " has missing genotype for all " + << "individuals and will be ignored." << endl; + continue; + } + + x_mean /= (double)(x->size - n_miss); + x_train_mean /= (double)(n_train_nomiss); + + for (size_t i = 0; i < x->size; ++i) { + geno = gsl_vector_get(x, i); + if (gsl_vector_get(x_miss, i) == 0) { + gsl_vector_set(x, i, x_mean - x_train_mean); + } else { + gsl_vector_set(x, i, geno - x_train_mean); + } + } + + gsl_vector_scale(x, effect_size); + gsl_vector_add(y_prdt, x); + + ns_test++; + } + cout << endl; + + gsl_vector_free(x); + gsl_vector_free(x_miss); + + infile.close(); + infile.clear(); + + return; } -void PRDT::AnalyzePlink (gsl_vector *y_prdt) { - string file_bed=file_bfile+".bed"; - ifstream infile (file_bed.c_str(), ios::binary); - if (!infile) { - cout<<"error reading bed file:"<<file_bed<<endl; - return; - } - - char ch[1]; - bitset<8> b; - string rs; - - size_t n_bit, n_miss, ci_total, ci_test, n_train_nomiss; - double geno, x_mean, x_train_mean, effect_size; - - gsl_vector *x=gsl_vector_alloc (y_prdt->size); - - // Calculate n_bit and c, the number of bit for each SNP. - if (indicator_idv.size()%4==0) {n_bit=indicator_idv.size()/4;} - else {n_bit=indicator_idv.size()/4+1; } - - // Print the first 3 magic numbers. - for (size_t i=0; i<3; ++i) { - infile.read(ch,1); - b=ch[0]; - } - - ns_test=0; - - for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) { - if (t%d_pace==0 || t==snpInfo.size()-1) { - ProgressBar ("Reading SNPs ", t, snpInfo.size()-1); - } - - rs=snpInfo[t].rs_number; - - if (mapRS2est.count(rs)==0) { - continue; - } else { - effect_size=mapRS2est[rs]; - } - - // n_bit, and 3 is the number of magic numbers. - infile.seekg(t*n_bit+3); - - // Read genotypes. - x_mean=0.0; - n_miss=0; - ci_total=0; ci_test=0; x_train_mean=0; n_train_nomiss=0; - for (size_t i=0; i<n_bit; ++i) { - infile.read(ch,1); - b=ch[0]; - - // Minor allele homozygous: 2.0; major: 0.0. - for (size_t j=0; j<4; ++j) { - if ((i==(n_bit-1)) && - ci_total==indicator_idv.size()) { - break; - } - if (indicator_idv[ci_total]==1) { - if (b[2*j]==0) { - if (b[2*j+1]==0) { - x_train_mean+=2.0; - n_train_nomiss++; - } - else { - x_train_mean+=1.0; - n_train_nomiss++; - } - } - else { - if (b[2*j+1]==1) { - n_train_nomiss++; - } - else {} - } - } else { - if (b[2*j]==0) { - if (b[2*j+1]==0) { - gsl_vector_set(x,ci_test,2); - x_mean+=2.0; - } - else { - gsl_vector_set(x,ci_test,1); - x_mean+=1.0; - } - } - else { - if (b[2*j+1]==1) { - gsl_vector_set(x,ci_test,0); - } - else { - gsl_vector_set(x,ci_test,-9); - n_miss++; - } - } - ci_test++; - } - ci_total++; - - } - } - - if (x->size==n_miss) { - cout << "snp " << rs << " has missing genotype for all " << - "individuals and will be ignored."<<endl; - continue; - } - - x_mean/=(double)(x->size-n_miss); - x_train_mean/=(double)(n_train_nomiss); - - for (size_t i=0; i<x->size; ++i) { - geno=gsl_vector_get(x, i); - if (geno==-9) { - gsl_vector_set(x, i, x_mean-x_train_mean); - } else { - gsl_vector_set(x, i, geno-x_train_mean); - } - } - - gsl_vector_scale (x, effect_size); - gsl_vector_add (y_prdt, x); - - ns_test++; - } - cout<<endl; - - gsl_vector_free (x); - - infile.close(); - infile.clear(); - - return; +void PRDT::AnalyzePlink(gsl_vector *y_prdt) { + string file_bed = file_bfile + ".bed"; + ifstream infile(file_bed.c_str(), ios::binary); + if (!infile) { + cout << "error reading bed file:" << file_bed << endl; + return; + } + + char ch[1]; + bitset<8> b; + string rs; + + size_t n_bit, n_miss, ci_total, ci_test, n_train_nomiss; + double geno, x_mean, x_train_mean, effect_size; + + gsl_vector *x = gsl_vector_alloc(y_prdt->size); + + // Calculate n_bit and c, the number of bit for each SNP. + if (indicator_idv.size() % 4 == 0) { + n_bit = indicator_idv.size() / 4; + } else { + n_bit = indicator_idv.size() / 4 + 1; + } + + // Print the first 3 magic numbers. + for (size_t i = 0; i < 3; ++i) { + infile.read(ch, 1); + b = ch[0]; + } + + ns_test = 0; + + for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) { + if (t % d_pace == 0 || t == snpInfo.size() - 1) { + ProgressBar("Reading SNPs ", t, snpInfo.size() - 1); + } + + rs = snpInfo[t].rs_number; + + if (mapRS2est.count(rs) == 0) { + continue; + } else { + effect_size = mapRS2est[rs]; + } + + // n_bit, and 3 is the number of magic numbers. + infile.seekg(t * n_bit + 3); + + // Read genotypes. + x_mean = 0.0; + n_miss = 0; + ci_total = 0; + ci_test = 0; + x_train_mean = 0; + n_train_nomiss = 0; + for (size_t i = 0; i < n_bit; ++i) { + infile.read(ch, 1); + b = ch[0]; + + // Minor allele homozygous: 2.0; major: 0.0. + for (size_t j = 0; j < 4; ++j) { + if ((i == (n_bit - 1)) && ci_total == indicator_idv.size()) { + break; + } + if (indicator_idv[ci_total] == 1) { + if (b[2 * j] == 0) { + if (b[2 * j + 1] == 0) { + x_train_mean += 2.0; + n_train_nomiss++; + } else { + x_train_mean += 1.0; + n_train_nomiss++; + } + } else { + if (b[2 * j + 1] == 1) { + n_train_nomiss++; + } else { + } + } + } else { + if (b[2 * j] == 0) { + if (b[2 * j + 1] == 0) { + gsl_vector_set(x, ci_test, 2); + x_mean += 2.0; + } else { + gsl_vector_set(x, ci_test, 1); + x_mean += 1.0; + } + } else { + if (b[2 * j + 1] == 1) { + gsl_vector_set(x, ci_test, 0); + } else { + gsl_vector_set(x, ci_test, -9); + n_miss++; + } + } + ci_test++; + } + ci_total++; + } + } + + if (x->size == n_miss) { + cout << "snp " << rs << " has missing genotype for all " + << "individuals and will be ignored." << endl; + continue; + } + + x_mean /= (double)(x->size - n_miss); + x_train_mean /= (double)(n_train_nomiss); + + for (size_t i = 0; i < x->size; ++i) { + geno = gsl_vector_get(x, i); + if (geno == -9) { + gsl_vector_set(x, i, x_mean - x_train_mean); + } else { + gsl_vector_set(x, i, geno - x_train_mean); + } + } + + gsl_vector_scale(x, effect_size); + gsl_vector_add(y_prdt, x); + + ns_test++; + } + cout << endl; + + gsl_vector_free(x); + + infile.close(); + infile.clear(); + + return; } // Predict missing phenotypes using ridge regression. // Y_hat contains fixed effects -void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H, - gsl_matrix *Y_full) { - gsl_vector *y_obs=gsl_vector_alloc (np_obs); - gsl_vector *y_miss=gsl_vector_alloc (np_miss); - gsl_matrix *H_oo=gsl_matrix_alloc (np_obs, np_obs); - gsl_matrix *H_mo=gsl_matrix_alloc (np_miss, np_obs); - gsl_vector *Hiy=gsl_vector_alloc (np_obs); - - size_t c_obs1=0, c_obs2=0, c_miss1=0, c_miss2=0; - - // Obtain H_oo, H_mo. - c_obs1=0; c_miss1=0; - for (vector<int>::size_type i1=0; i1<indicator_pheno.size(); ++i1) { - if (indicator_cvt[i1]==0) {continue;} - for (vector<int>::size_type j1=0; j1<n_ph; ++j1) { - - c_obs2=0; c_miss2=0; - for (vector<int>::size_type i2=0; - i2<indicator_pheno.size(); ++i2) { - if (indicator_cvt[i2]==0) {continue;} - for (vector<int>::size_type j2=0; - j2<n_ph; j2++) { - - if (indicator_pheno[i2][j2]==1) { - if (indicator_pheno[i1][j1]==1) { - gsl_matrix_set(H_oo,c_obs1, c_obs2, gsl_matrix_get (H, c_obs1+c_miss1, c_obs2+c_miss2) ); - } else { - gsl_matrix_set (H_mo, c_miss1, c_obs2, gsl_matrix_get (H, c_obs1+c_miss1, c_obs2+c_miss2) ); - } - c_obs2++; - } else { - c_miss2++; - } - } - } - - if (indicator_pheno[i1][j1]==1) { - c_obs1++; - } else { - c_miss1++; - } - } - - } - - // Do LU decomposition of H_oo. - int sig; - gsl_permutation * pmt=gsl_permutation_alloc (np_obs); - LUDecomp (H_oo, pmt, &sig); - - // Obtain y_obs=y_full-y_hat. - // Add the fixed effects part to y_miss: y_miss=y_hat. - c_obs1=0; c_miss1=0; - for (vector<int>::size_type i=0; - i<indicator_pheno.size(); ++i) { - if (indicator_cvt[i]==0) {continue;} - - for (vector<int>::size_type j=0; j<n_ph; ++j) { - if (indicator_pheno[i][j]==1) { - gsl_vector_set (y_obs, c_obs1, gsl_matrix_get (Y_full, i, j)-gsl_matrix_get (Y_hat, i, j) ); - c_obs1++; - } else { - gsl_vector_set (y_miss, c_miss1, gsl_matrix_get (Y_hat, i, j) ); - c_miss1++; - } - } - } - - LUSolve (H_oo, pmt, y_obs, Hiy); - - gsl_blas_dgemv (CblasNoTrans, 1.0, H_mo, Hiy, 1.0, y_miss); - - // Put back predicted y_miss to Y_full. - c_miss1=0; - for (vector<int>::size_type i=0; - i<indicator_pheno.size(); ++i) { - if (indicator_cvt[i]==0) {continue;} - - for (vector<int>::size_type j=0; j<n_ph; ++j) { - if (indicator_pheno[i][j]==0) { - gsl_matrix_set (Y_full, i, j, gsl_vector_get (y_miss, c_miss1) ); - c_miss1++; - } - } - } - - // Free matrices. - gsl_vector_free(y_obs); - gsl_vector_free(y_miss); - gsl_matrix_free(H_oo); - gsl_matrix_free(H_mo); - gsl_vector_free(Hiy); - - return; +void PRDT::MvnormPrdt(const gsl_matrix *Y_hat, const gsl_matrix *H, + gsl_matrix *Y_full) { + gsl_vector *y_obs = gsl_vector_alloc(np_obs); + gsl_vector *y_miss = gsl_vector_alloc(np_miss); + gsl_matrix *H_oo = gsl_matrix_alloc(np_obs, np_obs); + gsl_matrix *H_mo = gsl_matrix_alloc(np_miss, np_obs); + gsl_vector *Hiy = gsl_vector_alloc(np_obs); + + size_t c_obs1 = 0, c_obs2 = 0, c_miss1 = 0, c_miss2 = 0; + + // Obtain H_oo, H_mo. + c_obs1 = 0; + c_miss1 = 0; + for (vector<int>::size_type i1 = 0; i1 < indicator_pheno.size(); ++i1) { + if (indicator_cvt[i1] == 0) { + continue; + } + for (vector<int>::size_type j1 = 0; j1 < n_ph; ++j1) { + + c_obs2 = 0; + c_miss2 = 0; + for (vector<int>::size_type i2 = 0; i2 < indicator_pheno.size(); ++i2) { + if (indicator_cvt[i2] == 0) { + continue; + } + for (vector<int>::size_type j2 = 0; j2 < n_ph; j2++) { + + if (indicator_pheno[i2][j2] == 1) { + if (indicator_pheno[i1][j1] == 1) { + gsl_matrix_set( + H_oo, c_obs1, c_obs2, + gsl_matrix_get(H, c_obs1 + c_miss1, c_obs2 + c_miss2)); + } else { + gsl_matrix_set( + H_mo, c_miss1, c_obs2, + gsl_matrix_get(H, c_obs1 + c_miss1, c_obs2 + c_miss2)); + } + c_obs2++; + } else { + c_miss2++; + } + } + } + + if (indicator_pheno[i1][j1] == 1) { + c_obs1++; + } else { + c_miss1++; + } + } + } + + // Do LU decomposition of H_oo. + int sig; + gsl_permutation *pmt = gsl_permutation_alloc(np_obs); + LUDecomp(H_oo, pmt, &sig); + + // Obtain y_obs=y_full-y_hat. + // Add the fixed effects part to y_miss: y_miss=y_hat. + c_obs1 = 0; + c_miss1 = 0; + for (vector<int>::size_type i = 0; i < indicator_pheno.size(); ++i) { + if (indicator_cvt[i] == 0) { + continue; + } + + for (vector<int>::size_type j = 0; j < n_ph; ++j) { + if (indicator_pheno[i][j] == 1) { + gsl_vector_set(y_obs, c_obs1, gsl_matrix_get(Y_full, i, j) - + gsl_matrix_get(Y_hat, i, j)); + c_obs1++; + } else { + gsl_vector_set(y_miss, c_miss1, gsl_matrix_get(Y_hat, i, j)); + c_miss1++; + } + } + } + + LUSolve(H_oo, pmt, y_obs, Hiy); + + gsl_blas_dgemv(CblasNoTrans, 1.0, H_mo, Hiy, 1.0, y_miss); + + // Put back predicted y_miss to Y_full. + c_miss1 = 0; + for (vector<int>::size_type i = 0; i < indicator_pheno.size(); ++i) { + if (indicator_cvt[i] == 0) { + continue; + } + + for (vector<int>::size_type j = 0; j < n_ph; ++j) { + if (indicator_pheno[i][j] == 0) { + gsl_matrix_set(Y_full, i, j, gsl_vector_get(y_miss, c_miss1)); + c_miss1++; + } + } + } + + // Free matrices. + gsl_vector_free(y_obs); + gsl_vector_free(y_miss); + gsl_matrix_free(H_oo); + gsl_matrix_free(H_mo); + gsl_vector_free(Hiy); + + return; } - - |