From 17deca2d54827a00df3ea4d98df700fc2b8ed777 Mon Sep 17 00:00:00 2001
From: xiangzhou
Date: Sat, 20 Sep 2014 10:17:34 -0400
Subject: initial upload, version 0.95alpha
---
lm.cpp | 571 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 571 insertions(+)
create mode 100644 lm.cpp
(limited to 'lm.cpp')
diff --git a/lm.cpp b/lm.cpp
new file mode 100644
index 0000000..c983253
--- /dev/null
+++ b/lm.cpp
@@ -0,0 +1,571 @@
+/*
+ Genome-wide Efficient Mixed Model Association (GEMMA)
+ Copyright (C) 2011 Xiang Zhou
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+ */
+
+
+
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "gsl/gsl_vector.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_blas.h"
+
+
+#include "gsl/gsl_cdf.h"
+#include "gsl/gsl_roots.h"
+#include "gsl/gsl_min.h"
+#include "gsl/gsl_integration.h"
+
+#include "gzstream.h"
+#include "lapack.h"
+
+#ifdef FORCE_FLOAT
+#include "lm_float.h"
+#else
+#include "lm.h"
+#endif
+
+
+using namespace std;
+
+
+
+
+
+void LM::CopyFromParam (PARAM &cPar)
+{
+ a_mode=cPar.a_mode;
+ d_pace=cPar.d_pace;
+
+ file_bfile=cPar.file_bfile;
+ file_geno=cPar.file_geno;
+ file_out=cPar.file_out;
+ file_gene=cPar.file_gene;
+
+ time_opt=0.0;
+
+ ni_total=cPar.ni_total;
+ ns_total=cPar.ns_total;
+ ni_test=cPar.ni_test;
+ ns_test=cPar.ns_test;
+ n_cvt=cPar.n_cvt;
+
+ ng_total=cPar.ng_total;
+ ng_test=0;
+
+ indicator_idv=cPar.indicator_idv;
+ indicator_snp=cPar.indicator_snp;
+ snpInfo=cPar.snpInfo;
+
+ return;
+}
+
+
+void LM::CopyToParam (PARAM &cPar)
+{
+ cPar.time_opt=time_opt;
+
+ cPar.ng_test=ng_test;
+
+ return;
+}
+
+
+
+void LM::WriteFiles ()
+{
+ string file_str;
+ file_str="./output/"+file_out;
+ file_str+=".assoc.txt";
+
+ ofstream outfile (file_str.c_str(), ofstream::out);
+ if (!outfile) {cout<<"error writing file: "<::size_type t=0; tsize;
+ double d;
+
+ gsl_vector *WtWiWtx=gsl_vector_alloc (c_size);
+
+ gsl_blas_ddot (x, x, &xPwx);
+ gsl_blas_ddot (x, y, &xPwy);
+ gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
+
+ gsl_blas_ddot (WtWiWtx, Wtx, &d);
+ xPwx-=d;
+
+ gsl_blas_ddot (WtWiWtx, Wty, &d);
+ xPwy-=d;
+
+ gsl_vector_free (WtWiWtx);
+
+ return;
+}
+
+
+void CalcvPv(const gsl_matrix *WtWi, const gsl_vector *Wty, const gsl_vector *y, double &yPwy)
+{
+ size_t c_size=Wty->size;
+ double d;
+
+ gsl_vector *WtWiWty=gsl_vector_alloc (c_size);
+
+ gsl_blas_ddot (y, y, &yPwy);
+ gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wty, 0.0, WtWiWty);
+
+ gsl_blas_ddot (WtWiWty, Wty, &d);
+ yPwy-=d;
+
+ gsl_vector_free (WtWiWty);
+
+ return;
+}
+
+
+
+//calculate p values and beta/se in a linear model
+void LmCalcP (const size_t test_mode, const double yPwy, const double xPwy, const double xPwx, const double df, const size_t n_size, double &beta, double &se, double &p_wald, double &p_lrt, double &p_score)
+{
+ double yPxy=yPwy-xPwy*xPwy/xPwx;
+ double se_wald, se_score;
+
+ beta=xPwy/xPwx;
+ se_wald=sqrt(yPxy/(df*xPwx) );
+ se_score=sqrt(yPwy/((double)n_size*xPwx) );
+
+ p_wald=gsl_cdf_fdist_Q (beta*beta/(se_wald*se_wald), 1.0, df);
+ p_score=gsl_cdf_fdist_Q (beta*beta/(se_score*se_score), 1.0, df);
+ p_lrt=gsl_cdf_chisq_Q ((double)n_size*(log(yPwy)-log(yPxy)), 1);
+
+ if (test_mode==3) {se=se_score;} else {se=se_wald;}
+
+ return;
+}
+
+
+
+
+void LM::AnalyzeGene (const gsl_matrix *W, const gsl_vector *x)
+{
+ ifstream infile (file_gene.c_str(), ifstream::in);
+ if (!infile) {cout<<"error reading gene expression file:"<size1-(double)W->size2-1.0;
+
+ gsl_vector *y=gsl_vector_alloc (W->size1);
+
+ gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
+ gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
+ gsl_vector *Wty=gsl_vector_alloc (W->size2);
+ gsl_vector *Wtx=gsl_vector_alloc (W->size2);
+ gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
+
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+ int sig;
+ LUDecomp (WtW, pmt, &sig);
+ LUInvert (WtW, pmt, WtWi);
+
+ gsl_blas_dgemv (CblasTrans, 1.0, W, x, 0.0, Wtx);
+ CalcvPv(WtWi, Wtx, x, xPwx);
+
+ //header
+ getline(infile, line);
+
+ for (size_t t=0; tsize1, beta, se, p_wald, p_lrt, p_score);
+
+ time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ //store summary data
+ SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
+ sumStat.push_back(SNPs);
+ }
+ cout<size1-(double)W->size2-1.0;
+
+ gsl_vector *x=gsl_vector_alloc (W->size1);
+ gsl_vector *x_miss=gsl_vector_alloc (W->size1);
+
+ gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
+ gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
+ gsl_vector *Wty=gsl_vector_alloc (W->size2);
+ gsl_vector *Wtx=gsl_vector_alloc (W->size2);
+ gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
+
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+ int sig;
+ LUDecomp (WtW, pmt, &sig);
+ LUInvert (WtW, pmt, WtWi);
+
+ gsl_blas_dgemv (CblasTrans, 1.0, W, y, 0.0, Wty);
+ CalcvPv(WtWi, Wty, y, yPwy);
+
+ //start reading genotypes and analyze
+ for (size_t t=0; t1) {break;}
+ getline(infile, line);
+ if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs ", t, ns_total-1);}
+ if (indicator_snp[t]==0) {continue;}
+
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+ ch_ptr=strtok (NULL, " , \t");
+ ch_ptr=strtok (NULL, " , \t");
+
+ x_mean=0.0; c_phen=0; n_miss=0;
+ gsl_vector_set_zero(x_miss);
+ for (size_t i=0; i1) {
+ gsl_vector_set(x, i, 2-geno);
+ }
+ }
+
+ //calculate statistics
+ time_start=clock();
+
+ gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx);
+ CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
+ LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald, p_lrt, p_score);
+
+ time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ //store summary data
+ SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
+ sumStat.push_back(SNPs);
+ }
+ cout< b;
+
+ double beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
+ int n_bit, n_miss, ci_total, ci_test;
+ double geno, x_mean;
+
+ //calculate some basic quantities
+ double yPwy, xPwy, xPwx;
+ double df=(double)W->size1-(double)W->size2-1.0;
+
+ gsl_vector *x=gsl_vector_alloc (W->size1);
+
+ gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
+ gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
+ gsl_vector *Wty=gsl_vector_alloc (W->size2);
+ gsl_vector *Wtx=gsl_vector_alloc (W->size2);
+ gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
+
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+ int sig;
+ LUDecomp (WtW, pmt, &sig);
+ LUInvert (WtW, pmt, WtWi);
+
+ gsl_blas_dgemv (CblasTrans, 1.0, W, y, 0.0, Wty);
+ CalcvPv(WtWi, Wty, y, yPwy);
+
+ //calculate n_bit and c, the number of bit for each snp
+ if (ni_total%4==0) {n_bit=ni_total/4;}
+ else {n_bit=ni_total/4+1; }
+
+ //print the first three majic numbers
+ for (int i=0; i<3; ++i) {
+ infile.read(ch,1);
+ b=ch[0];
+ }
+
+
+ for (vector::size_type t=0; t1) {
+ gsl_vector_set(x, i, 2-geno);
+ }
+ }
+
+ //calculate statistics
+ time_start=clock();
+
+ gsl_blas_dgemv (CblasTrans, 1.0, W, x, 0.0, Wtx);
+ CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
+ LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald, p_lrt, p_score);
+
+ time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ //store summary data
+ SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
+ sumStat.push_back(SNPs);
+ }
+ cout< > &pos_loglr)
+{
+ double yty, xty, xtx, log_lr;
+ gsl_blas_ddot(y, y, &yty);
+
+ for (size_t i=0; isize2; ++i) {
+ gsl_vector_const_view X_col=gsl_matrix_const_column (X, i);
+ gsl_blas_ddot(&X_col.vector, &X_col.vector, &xtx);
+ gsl_blas_ddot(&X_col.vector, y, &xty);
+
+ log_lr=0.5*(double)y->size*(log(yty)-log(yty-xty*xty/xtx));
+ pos_loglr.push_back(make_pair(i,log_lr) );
+ }
+
+ return;
+}
--
cgit v1.2.3