From 0dd4e05fc8babc1517de1d7981a99ad0a5241a5e Mon Sep 17 00:00:00 2001 From: Peter Carbonetto Date: Thu, 4 May 2017 14:43:12 -0500 Subject: Added new files shared by Xiang via email on May 4, 2017. --- src/varcov.cpp | 482 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 482 insertions(+) create mode 100644 src/varcov.cpp (limited to 'src/varcov.cpp') diff --git a/src/varcov.cpp b/src/varcov.cpp new file mode 100644 index 0000000..fdc6f10 --- /dev/null +++ b/src/varcov.cpp @@ -0,0 +1,482 @@ +/* + Genome-wide Efficient Mixed Model Association (GEMMA) + Copyright (C) 2011 Xiang Zhou + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gsl/gsl_vector.h" +#include "gsl/gsl_matrix.h" +#include "gsl/gsl_linalg.h" +#include "gsl/gsl_blas.h" +#include "gsl/gsl_cdf.h" + +#include "lapack.h" +#include "gzstream.h" + +#ifdef FORCE_FLOAT +#include "param_float.h" +#include "varcov_float.h" +#include "io_float.h" +#include "mathfunc_float.h" +#else +#include "param.h" +#include "varcov.h" +#include "io.h" +#include "mathfunc.h" +#endif + + +using namespace std; + + + + +void VARCOV::CopyFromParam (PARAM &cPar) +{ + d_pace=cPar.d_pace; + + file_bfile=cPar.file_bfile; + file_geno=cPar.file_geno; + file_out=cPar.file_out; + path_out=cPar.path_out; + + time_opt=0.0; + + window_cm=cPar.window_cm; + window_bp=cPar.window_bp; + window_ns=cPar.window_ns; + + indicator_idv=cPar.indicator_idv; + indicator_snp=cPar.indicator_snp; + snpInfo=cPar.snpInfo; + + return; +} + + +void VARCOV::CopyToParam (PARAM &cPar) +{ + cPar.time_opt=time_opt; + return; +} + + + +//chr rs ps n_idv allele1 allele0 af var window_size r2 (r2_11,n_11,r2_12,n_12...r2_1m,n_1m) +void VARCOV::WriteCov (const int flag, const vector &snpInfo_sub, const vector > &Cov_mat) +{ + string file_cov; + file_cov=path_out+"/"+file_out; + file_cov+=".cor.txt"; + + ofstream outfile; + + if (flag==0) { + outfile.open (file_cov.c_str(), ofstream::out); + if (!outfile) {cout<<"error writing file: "<0) { + return false; + } else { + if (c_bp<0) { + return true; + } else if (c_bp>0) { + return false; + } else { + return true; + } + } +} + + +// do not sort SNPs (because gzip files do not support random access) +// then calculate n_nb, the number of neighbours, for each snp +void VARCOV::CalcNB (vector &snpInfo_sort) +{ + // stable_sort(snpInfo_sort.begin(), snpInfo_sort.end(), CompareSNPinfo); + + size_t t2=0, n_nb=0; + for (size_t t=0; t > &X_mat, vector &cov_vec) +{ + cov_vec.clear(); + + double v1, v2, r; + vector x_vec=X_mat[0]; + + lapack_ddot(x_vec, x_vec, v1); + cov_vec.push_back(v1/(double)x_vec.size() ); + + for (size_t i=1; i > &X_mat, vector &cov_vec) +{ + cov_vec.clear(); + + int d1, d2, m1, m2, n1, n2, n12; + double m1d, m2d, m12d, v1d, v2d, v; + + vector x_vec=X_mat[0]; + + //calculate m2 + m2=0; n2=0; + for (size_t j=0; j snpInfo_sub; + CalcNB(snpInfo); + + size_t ni_test=0; + for (size_t i=0; i x_vec, cov_vec; + vector > X_mat, Cov_mat; + + for (size_t i=0; i=2) {break;} + + if (X_mat.size()==0) { + n_nb=snpInfo[t].n_nb+1; + } else { + n_nb=snpInfo[t].n_nb-n_nb+1; + } + + for (int i=0; isize; j++) { + x_vec[j]=gsl_vector_get(geno, j); + } + X_mat.push_back(x_vec); + + t2++; + } + + n_nb=snpInfo[t].n_nb; + + Calc_Cor(X_mat, cov_vec); + Cov_mat.push_back(cov_vec); + snpInfo_sub.push_back(snpInfo[t]); + + X_mat.erase(X_mat.begin()); + + //write out var/cov values + if (Cov_mat.size()==10000) { + WriteCov (1, snpInfo_sub, Cov_mat); + Cov_mat.clear(); + snpInfo_sub.clear(); + } + } + + if (Cov_mat.size()!=0) { + WriteCov (1, snpInfo_sub, Cov_mat); + Cov_mat.clear(); + snpInfo_sub.clear(); + } + + gsl_vector_free(geno); + + infile.close(); + infile.clear(); + + return; +} + + + + +void VARCOV::AnalyzePlink () +{ + string file_bed=file_bfile+".bed"; + ifstream infile (file_bed.c_str(), ios::binary); + if (!infile) {cout<<"error reading bed file:"< snpInfo_sub; + CalcNB(snpInfo); + + size_t ni_test=0; + for (size_t i=0; i x_vec, cov_vec; + vector > X_mat, Cov_mat; + + for (size_t i=0; i=2) {break;} + + if (X_mat.size()==0) { + n_nb=snpInfo[t].n_nb+1; + } else { + n_nb=snpInfo[t].n_nb-n_nb+1; + } + + for (int i=0; isize; j++) { + x_vec[j]=gsl_vector_get(geno, j); + } + X_mat.push_back(x_vec); + + t2++; + } + + n_nb=snpInfo[t].n_nb; + + Calc_Cor(X_mat, cov_vec); + Cov_mat.push_back(cov_vec); + snpInfo_sub.push_back(snpInfo[t]); + + X_mat.erase(X_mat.begin()); + + //write out var/cov values + if (Cov_mat.size()==10000) { + WriteCov (1, snpInfo_sub, Cov_mat); + Cov_mat.clear(); + snpInfo_sub.clear(); + } + } + + if (Cov_mat.size()!=0) { + WriteCov (1, snpInfo_sub, Cov_mat); + Cov_mat.clear(); + snpInfo_sub.clear(); + } + + gsl_vector_free(geno); + + infile.close(); + infile.clear(); + + return; +} -- cgit v1.2.3