From 9aa9f76cb16a0e71fea19dd125761034aaca4fd4 Mon Sep 17 00:00:00 2001 From: xiangzhou Date: Mon, 22 Sep 2014 11:12:46 -0400 Subject: version 0.95alpha --- lm.cpp | 572 ----------------------------------------------------------------- 1 file changed, 572 deletions(-) delete mode 100644 lm.cpp (limited to 'lm.cpp') diff --git a/lm.cpp b/lm.cpp deleted file mode 100644 index 7577d0a..0000000 --- a/lm.cpp +++ /dev/null @@ -1,572 +0,0 @@ -/* - Genome-wide Efficient Mixed Model Association (GEMMA) - Copyright (C) 2011 Xiang Zhou - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - */ - - - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "gsl/gsl_vector.h" -#include "gsl/gsl_matrix.h" -#include "gsl/gsl_linalg.h" -#include "gsl/gsl_blas.h" - - -#include "gsl/gsl_cdf.h" -#include "gsl/gsl_roots.h" -#include "gsl/gsl_min.h" -#include "gsl/gsl_integration.h" - -#include "gzstream.h" -#include "lapack.h" - -#ifdef FORCE_FLOAT -#include "lm_float.h" -#else -#include "lm.h" -#endif - - -using namespace std; - - - - - -void LM::CopyFromParam (PARAM &cPar) -{ - a_mode=cPar.a_mode; - d_pace=cPar.d_pace; - - file_bfile=cPar.file_bfile; - file_geno=cPar.file_geno; - file_out=cPar.file_out; - path_out=cPar.path_out; - file_gene=cPar.file_gene; - - time_opt=0.0; - - ni_total=cPar.ni_total; - ns_total=cPar.ns_total; - ni_test=cPar.ni_test; - ns_test=cPar.ns_test; - n_cvt=cPar.n_cvt; - - ng_total=cPar.ng_total; - ng_test=0; - - indicator_idv=cPar.indicator_idv; - indicator_snp=cPar.indicator_snp; - snpInfo=cPar.snpInfo; - - return; -} - - -void LM::CopyToParam (PARAM &cPar) -{ - cPar.time_opt=time_opt; - - cPar.ng_test=ng_test; - - return; -} - - - -void LM::WriteFiles () -{ - string file_str; - file_str=path_out+"/"+file_out; - file_str+=".assoc.txt"; - - ofstream outfile (file_str.c_str(), ofstream::out); - if (!outfile) {cout<<"error writing file: "<::size_type t=0; tsize; - double d; - - gsl_vector *WtWiWtx=gsl_vector_alloc (c_size); - - gsl_blas_ddot (x, x, &xPwx); - gsl_blas_ddot (x, y, &xPwy); - gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx); - - gsl_blas_ddot (WtWiWtx, Wtx, &d); - xPwx-=d; - - gsl_blas_ddot (WtWiWtx, Wty, &d); - xPwy-=d; - - gsl_vector_free (WtWiWtx); - - return; -} - - -void CalcvPv(const gsl_matrix *WtWi, const gsl_vector *Wty, const gsl_vector *y, double &yPwy) -{ - size_t c_size=Wty->size; - double d; - - gsl_vector *WtWiWty=gsl_vector_alloc (c_size); - - gsl_blas_ddot (y, y, &yPwy); - gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wty, 0.0, WtWiWty); - - gsl_blas_ddot (WtWiWty, Wty, &d); - yPwy-=d; - - gsl_vector_free (WtWiWty); - - return; -} - - - -//calculate p values and beta/se in a linear model -void LmCalcP (const size_t test_mode, const double yPwy, const double xPwy, const double xPwx, const double df, const size_t n_size, double &beta, double &se, double &p_wald, double &p_lrt, double &p_score) -{ - double yPxy=yPwy-xPwy*xPwy/xPwx; - double se_wald, se_score; - - beta=xPwy/xPwx; - se_wald=sqrt(yPxy/(df*xPwx) ); - se_score=sqrt(yPwy/((double)n_size*xPwx) ); - - p_wald=gsl_cdf_fdist_Q (beta*beta/(se_wald*se_wald), 1.0, df); - p_score=gsl_cdf_fdist_Q (beta*beta/(se_score*se_score), 1.0, df); - p_lrt=gsl_cdf_chisq_Q ((double)n_size*(log(yPwy)-log(yPxy)), 1); - - if (test_mode==3) {se=se_score;} else {se=se_wald;} - - return; -} - - - - -void LM::AnalyzeGene (const gsl_matrix *W, const gsl_vector *x) -{ - ifstream infile (file_gene.c_str(), ifstream::in); - if (!infile) {cout<<"error reading gene expression file:"<size1-(double)W->size2-1.0; - - gsl_vector *y=gsl_vector_alloc (W->size1); - - gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2); - gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2); - gsl_vector *Wty=gsl_vector_alloc (W->size2); - gsl_vector *Wtx=gsl_vector_alloc (W->size2); - gsl_permutation * pmt=gsl_permutation_alloc (W->size2); - - gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); - int sig; - LUDecomp (WtW, pmt, &sig); - LUInvert (WtW, pmt, WtWi); - - gsl_blas_dgemv (CblasTrans, 1.0, W, x, 0.0, Wtx); - CalcvPv(WtWi, Wtx, x, xPwx); - - //header - getline(infile, line); - - for (size_t t=0; tsize1, beta, se, p_wald, p_lrt, p_score); - - time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - //store summary data - SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score}; - sumStat.push_back(SNPs); - } - cout<size1-(double)W->size2-1.0; - - gsl_vector *x=gsl_vector_alloc (W->size1); - gsl_vector *x_miss=gsl_vector_alloc (W->size1); - - gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2); - gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2); - gsl_vector *Wty=gsl_vector_alloc (W->size2); - gsl_vector *Wtx=gsl_vector_alloc (W->size2); - gsl_permutation * pmt=gsl_permutation_alloc (W->size2); - - gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); - int sig; - LUDecomp (WtW, pmt, &sig); - LUInvert (WtW, pmt, WtWi); - - gsl_blas_dgemv (CblasTrans, 1.0, W, y, 0.0, Wty); - CalcvPv(WtWi, Wty, y, yPwy); - - //start reading genotypes and analyze - for (size_t t=0; t1) {break;} - getline(infile, line); - if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs ", t, ns_total-1);} - if (indicator_snp[t]==0) {continue;} - - ch_ptr=strtok ((char *)line.c_str(), " , \t"); - ch_ptr=strtok (NULL, " , \t"); - ch_ptr=strtok (NULL, " , \t"); - - x_mean=0.0; c_phen=0; n_miss=0; - gsl_vector_set_zero(x_miss); - for (size_t i=0; i1) { - gsl_vector_set(x, i, 2-geno); - } - } - - //calculate statistics - time_start=clock(); - - gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx); - CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx); - LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald, p_lrt, p_score); - - time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - //store summary data - SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score}; - sumStat.push_back(SNPs); - } - cout< b; - - double beta=0, se=0, p_wald=0, p_lrt=0, p_score=0; - int n_bit, n_miss, ci_total, ci_test; - double geno, x_mean; - - //calculate some basic quantities - double yPwy, xPwy, xPwx; - double df=(double)W->size1-(double)W->size2-1.0; - - gsl_vector *x=gsl_vector_alloc (W->size1); - - gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2); - gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2); - gsl_vector *Wty=gsl_vector_alloc (W->size2); - gsl_vector *Wtx=gsl_vector_alloc (W->size2); - gsl_permutation * pmt=gsl_permutation_alloc (W->size2); - - gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); - int sig; - LUDecomp (WtW, pmt, &sig); - LUInvert (WtW, pmt, WtWi); - - gsl_blas_dgemv (CblasTrans, 1.0, W, y, 0.0, Wty); - CalcvPv(WtWi, Wty, y, yPwy); - - //calculate n_bit and c, the number of bit for each snp - if (ni_total%4==0) {n_bit=ni_total/4;} - else {n_bit=ni_total/4+1; } - - //print the first three majic numbers - for (int i=0; i<3; ++i) { - infile.read(ch,1); - b=ch[0]; - } - - - for (vector::size_type t=0; t1) { - gsl_vector_set(x, i, 2-geno); - } - } - - //calculate statistics - time_start=clock(); - - gsl_blas_dgemv (CblasTrans, 1.0, W, x, 0.0, Wtx); - CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx); - LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald, p_lrt, p_score); - - time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - //store summary data - SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score}; - sumStat.push_back(SNPs); - } - cout< > &pos_loglr) -{ - double yty, xty, xtx, log_lr; - gsl_blas_ddot(y, y, &yty); - - for (size_t i=0; isize2; ++i) { - gsl_vector_const_view X_col=gsl_matrix_const_column (X, i); - gsl_blas_ddot(&X_col.vector, &X_col.vector, &xtx); - gsl_blas_ddot(&X_col.vector, y, &xty); - - log_lr=0.5*(double)y->size*(log(yty)-log(yty-xty*xty/xtx)); - pos_loglr.push_back(make_pair(i,log_lr) ); - } - - return; -} -- cgit v1.2.3