aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/io.cpp2482
-rw-r--r--src/io.h36
-rw-r--r--src/lm.cpp516
-rw-r--r--src/lm.h28
-rw-r--r--src/lmm.cpp1517
-rw-r--r--src/lmm.h30
-rw-r--r--src/mvlmm.cpp3005
-rw-r--r--src/mvlmm.h30
-rw-r--r--src/param.cpp1122
-rw-r--r--src/param.h122
10 files changed, 6790 insertions, 2098 deletions
diff --git a/src/io.cpp b/src/io.cpp
index 7ed95c4..d6a70dd 100644
--- a/src/io.cpp
+++ b/src/io.cpp
@@ -28,7 +28,7 @@
#include <cstring>
#include <cmath>
#include <stdio.h>
-#include <stdlib.h>
+#include <stdlib.h>
#include "gsl/gsl_vector.h"
#include "gsl/gsl_matrix.h"
@@ -39,6 +39,7 @@
#include "lapack.h"
#include "gzstream.h"
#include "mathfunc.h"
+#include "eigenlib.h"
#ifdef FORCE_FLOAT
#include "io_float.h"
@@ -54,10 +55,10 @@ using namespace std;
//Print process bar
void ProgressBar (string str, double p, double total)
{
- double progress = (100.0 * p / total);
- int barsize = (int) (progress / 2.0);
+ double progress = (100.0 * p / total);
+ int barsize = (int) (progress / 2.0);
char bar[51];
-
+
cout<<str;
for (int i = 0; i <50; i++) {
if (i<barsize) {bar[i] = '=';}
@@ -65,7 +66,7 @@ void ProgressBar (string str, double p, double total)
cout<<bar[i];
}
cout<<setprecision(2)<<fixed<<progress<<"%\r"<<flush;
-
+
return;
}
@@ -73,10 +74,10 @@ void ProgressBar (string str, double p, double total)
//Print process bar (with acceptance ratio)
void ProgressBar (string str, double p, double total, double ratio)
{
- double progress = (100.0 * p / total);
- int barsize = (int) (progress / 2.0);
+ double progress = (100.0 * p / total);
+ int barsize = (int) (progress / 2.0);
char bar[51];
-
+
cout<<str;
for (int i = 0; i <50; i++) {
if (i<barsize) {bar[i] = '=';}
@@ -84,8 +85,8 @@ void ProgressBar (string str, double p, double total, double ratio)
cout<<bar[i];
}
cout<<setprecision(2)<<fixed<<progress<<"% "<<ratio<<"\r"<<flush;
-
-
+
+
return;
}
@@ -130,18 +131,18 @@ bool ReadFile_snps (const string &file_snps, set<string> &setSnps)
ifstream infile (file_snps.c_str(), ifstream::in);
if (!infile) {cout<<"error! fail to open snps file: "<<file_snps<<endl; return false;}
-
+
string line;
char *ch_ptr;
-
+
while (getline(infile, line)) {
ch_ptr=strtok ((char *)line.c_str(), " , \t");
- setSnps.insert(ch_ptr);
+ setSnps.insert(ch_ptr);
}
-
+
infile.close();
- infile.clear();
-
+ infile.clear();
+
return true;
}
@@ -151,15 +152,15 @@ bool ReadFile_log (const string &file_log, double &pheno_mean)
{
ifstream infile (file_log.c_str(), ifstream::in);
if (!infile) {cout<<"error! fail to open log file: "<<file_log<<endl; return false;}
-
+
string line;
char *ch_ptr;
size_t flag=0;
-
+
while (getline(infile, line)) {
ch_ptr=strtok ((char *)line.c_str(), " , \t");
ch_ptr=strtok (NULL, " , \t");
-
+
if (ch_ptr!=NULL && strcmp(ch_ptr, "estimated")==0) {
ch_ptr=strtok (NULL, " , \t");
if (ch_ptr!=NULL && strcmp(ch_ptr, "mean")==0) {
@@ -171,13 +172,13 @@ bool ReadFile_log (const string &file_log, double &pheno_mean)
}
}
}
-
+
if (flag==1) {break;}
}
-
+
infile.close();
- infile.clear();
-
+ infile.clear();
+
return true;
}
@@ -187,18 +188,18 @@ bool ReadFile_anno (const string &file_anno, map<string, string> &mapRS2chr, map
{
mapRS2chr.clear();
mapRS2bp.clear();
-
+
ifstream infile (file_anno.c_str(), ifstream::in);
if (!infile) {cout<<"error opening annotation file: "<<file_anno<<endl; return false;}
-
+
string line;
char *ch_ptr;
-
+
string rs;
long int b_pos;
string chr;
double cM;
-
+
while (!safeGetline(infile, line).eof()) {
ch_ptr=strtok ((char *)line.c_str(), " , \t");
rs=ch_ptr;
@@ -208,15 +209,15 @@ bool ReadFile_anno (const string &file_anno, map<string, string> &mapRS2chr, map
if (ch_ptr==NULL || strcmp(ch_ptr, "NA")==0) {chr="-9";} else {chr=ch_ptr;}
ch_ptr=strtok (NULL, " , \t");
if (ch_ptr==NULL || strcmp(ch_ptr, "NA")==0) {cM=-9;} else {cM=atof(ch_ptr);}
-
+
mapRS2chr[rs]=chr;
mapRS2bp[rs]=b_pos;
mapRS2cM[rs]=cM;
}
-
+
infile.close();
- infile.clear();
-
+ infile.clear();
+
return true;
}
@@ -225,28 +226,28 @@ bool ReadFile_column (const string &file_pheno, vector<int> &indicator_idv, vect
{
indicator_idv.clear();
pheno.clear();
-
+
igzstream infile (file_pheno.c_str(), igzstream::in);
// ifstream infile (file_pheno.c_str(), ifstream::in);
if (!infile) {cout<<"error! fail to open phenotype file: "<<file_pheno<<endl; return false;}
-
+
string line;
char *ch_ptr;
-
+
string id;
double p;
while (!safeGetline(infile, line).eof()) {
ch_ptr=strtok ((char *)line.c_str(), " , \t");
for (int i=0; i<(p_column-1); ++i) {
- ch_ptr=strtok (NULL, " , \t");
- }
+ ch_ptr=strtok (NULL, " , \t");
+ }
if (strcmp(ch_ptr, "NA")==0) {indicator_idv.push_back(0); pheno.push_back(-9);} //pheno is different from pimass2
else {p=atof(ch_ptr); indicator_idv.push_back(1); pheno.push_back(p);}
}
-
+
infile.close();
- infile.clear();
-
+ infile.clear();
+
return true;
}
@@ -257,48 +258,48 @@ bool ReadFile_pheno (const string &file_pheno, vector<vector<int> > &indicator_p
{
indicator_pheno.clear();
pheno.clear();
-
+
igzstream infile (file_pheno.c_str(), igzstream::in);
// ifstream infile (file_pheno.c_str(), ifstream::in);
if (!infile) {cout<<"error! fail to open phenotype file: "<<file_pheno<<endl; return false;}
string line;
char *ch_ptr;
-
+
string id;
double p;
-
+
vector<double> pheno_row;
vector<int> ind_pheno_row;
-
+
size_t p_max=*max_element(p_column.begin(), p_column.end() );
map<size_t, size_t> mapP2c;
for (size_t i=0; i<p_column.size(); i++) {
mapP2c[p_column[i]]=i;
pheno_row.push_back(-9);
ind_pheno_row.push_back(0);
- }
-
+ }
+
while (!safeGetline(infile, line).eof()) {
ch_ptr=strtok ((char *)line.c_str(), " , \t");
-
+
size_t i=0;
- while (i<p_max ) {
+ while (i<p_max ) {
if (mapP2c.count(i+1)!=0) {
if (strcmp(ch_ptr, "NA")==0) {ind_pheno_row[mapP2c[i+1]]=0; pheno_row[mapP2c[i+1]]=-9;}
else {p=atof(ch_ptr); ind_pheno_row[mapP2c[i+1]]=1; pheno_row[mapP2c[i+1]]=p;}
}
i++;
- ch_ptr=strtok (NULL, " , \t");
+ ch_ptr=strtok (NULL, " , \t");
}
-
- indicator_pheno.push_back(ind_pheno_row);
- pheno.push_back(pheno_row);
+
+ indicator_pheno.push_back(ind_pheno_row);
+ pheno.push_back(pheno_row);
}
-
+
infile.close();
- infile.clear();
-
+ infile.clear();
+
return true;
}
@@ -306,44 +307,44 @@ bool ReadFile_pheno (const string &file_pheno, vector<vector<int> > &indicator_p
bool ReadFile_cvt (const string &file_cvt, vector<int> &indicator_cvt, vector<vector<double> > &cvt, size_t &n_cvt)
{
indicator_cvt.clear();
-
+
ifstream infile (file_cvt.c_str(), ifstream::in);
if (!infile) {cout<<"error! fail to open covariates file: "<<file_cvt<<endl; return false;}
-
+
string line;
char *ch_ptr;
- double d;
-
- int flag_na=0;
-
+ double d;
+
+ int flag_na=0;
+
while (!safeGetline(infile, line).eof()) {
vector<double> v_d; flag_na=0;
ch_ptr=strtok ((char *)line.c_str(), " , \t");
while (ch_ptr!=NULL) {
if (strcmp(ch_ptr, "NA")==0) {flag_na=1; d=-9;}
else {d=atof(ch_ptr);}
-
+
v_d.push_back(d);
- ch_ptr=strtok (NULL, " , \t");
+ ch_ptr=strtok (NULL, " , \t");
}
- if (flag_na==0) {indicator_cvt.push_back(1);} else {indicator_cvt.push_back(0);}
+ if (flag_na==0) {indicator_cvt.push_back(1);} else {indicator_cvt.push_back(0);}
cvt.push_back(v_d);
}
-
+
if (indicator_cvt.empty()) {n_cvt=0;}
else {
flag_na=0;
for (vector<int>::size_type i=0; i<indicator_cvt.size(); ++i) {
if (indicator_cvt[i]==0) {continue;}
-
+
if (flag_na==0) {flag_na=1; n_cvt=cvt[i].size();}
if (flag_na!=0 && n_cvt!=cvt[i].size()) {cout<<"error! number of covariates in row "<<i<<" do not match other rows."<<endl; return false;}
}
}
-
+
infile.close();
- infile.clear();
-
+ infile.clear();
+
return true;
}
@@ -353,20 +354,20 @@ bool ReadFile_cvt (const string &file_cvt, vector<int> &indicator_cvt, vector<ve
bool ReadFile_bim (const string &file_bim, vector<SNPINFO> &snpInfo)
{
snpInfo.clear();
-
+
ifstream infile (file_bim.c_str(), ifstream::in);
if (!infile) {cout<<"error opening .bim file: "<<file_bim<<endl; return false;}
-
+
string line;
char *ch_ptr;
-
+
string rs;
long int b_pos;
string chr;
double cM;
string major;
string minor;
-
+
while (getline(infile, line)) {
ch_ptr=strtok ((char *)line.c_str(), " \t");
chr=ch_ptr;
@@ -380,13 +381,13 @@ bool ReadFile_bim (const string &file_bim, vector<SNPINFO> &snpInfo)
minor=ch_ptr;
ch_ptr=strtok (NULL, " \t");
major=ch_ptr;
-
- SNPINFO sInfo={chr, rs, cM, b_pos, minor, major, -9, -9, -9};
+
+ SNPINFO sInfo={chr, rs, cM, b_pos, minor, major, 0, -9, -9, 0, 0, 0};
snpInfo.push_back(sInfo);
}
-
+
infile.close();
- infile.clear();
+ infile.clear();
return true;
}
@@ -396,8 +397,8 @@ bool ReadFile_fam (const string &file_fam, vector<vector<int> > &indicator_pheno
{
indicator_pheno.clear();
pheno.clear();
- mapID2num.clear();
-
+ mapID2num.clear();
+
igzstream infile (file_fam.c_str(), igzstream::in);
//ifstream infile (file_fam.c_str(), ifstream::in);
if (!infile) {cout<<"error opening .fam file: "<<file_fam<<endl; return false;}
@@ -411,15 +412,15 @@ bool ReadFile_fam (const string &file_fam, vector<vector<int> > &indicator_pheno
vector<double> pheno_row;
vector<int> ind_pheno_row;
-
+
size_t p_max=*max_element(p_column.begin(), p_column.end() );
map<size_t, size_t> mapP2c;
for (size_t i=0; i<p_column.size(); i++) {
mapP2c[p_column[i]]=i;
pheno_row.push_back(-9);
ind_pheno_row.push_back(0);
- }
-
+ }
+
while (!safeGetline(infile, line).eof()) {
ch_ptr=strtok ((char *)line.c_str(), " \t");
ch_ptr=strtok (NULL, " \t");
@@ -428,7 +429,7 @@ bool ReadFile_fam (const string &file_fam, vector<vector<int> > &indicator_pheno
ch_ptr=strtok (NULL, " \t");
ch_ptr=strtok (NULL, " \t");
ch_ptr=strtok (NULL, " \t");
-
+
size_t i=0;
while (i<p_max ) {
if (mapP2c.count(i+1)!=0 ) {
@@ -436,23 +437,23 @@ bool ReadFile_fam (const string &file_fam, vector<vector<int> > &indicator_pheno
ind_pheno_row[mapP2c[i+1]]=0; pheno_row[mapP2c[i+1]]=-9;
} else {
p=atof(ch_ptr);
-
+
if (p==-9) {ind_pheno_row[mapP2c[i+1]]=0; pheno_row[mapP2c[i+1]]=-9;}
else {ind_pheno_row[mapP2c[i+1]]=1; pheno_row[mapP2c[i+1]]=p;}
}
}
i++;
- ch_ptr=strtok (NULL, " , \t");
+ ch_ptr=strtok (NULL, " , \t");
}
-
+
indicator_pheno.push_back(ind_pheno_row);
- pheno.push_back(pheno_row);
-
+ pheno.push_back(pheno_row);
+
mapID2num[id]=c; c++;
}
-
+
infile.close();
- infile.clear();
+ infile.clear();
return true;
}
@@ -466,7 +467,7 @@ bool ReadFile_geno (const string &file_geno, const set<string> &setSnps, const g
{
indicator_snp.clear();
snpInfo.clear();
-
+
igzstream infile (file_geno.c_str(), igzstream::in);
// ifstream infile (file_geno.c_str(), ifstream::in);
if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return false;}
@@ -478,112 +479,118 @@ bool ReadFile_geno (const string &file_geno, const set<string> &setSnps, const g
gsl_vector *Wtx=gsl_vector_alloc (W->size2);
gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2);
gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
-
+
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
- int sig;
+ //eigenlib_dgemm("T", "N", 1.0, W, W, 0.0, WtW);
+ int sig;
LUDecomp (WtW, pmt, &sig);
LUInvert (WtW, pmt, WtWi);
-
+
double v_x, v_w;
int c_idv=0;
-
+
string line;
char *ch_ptr;
-
+
string rs;
long int b_pos;
string chr;
string major;
string minor;
double cM;
-
+ size_t file_pos;
+
double maf, geno, geno_old;
size_t n_miss;
size_t n_0, n_1, n_2;
int flag_poly;
-
+
int ni_total=indicator_idv.size();
int ni_test=0;
for (int i=0; i<ni_total; ++i) {
ni_test+=indicator_idv[i];
}
ns_test=0;
-
- while (!safeGetline(infile, line).eof()) {
+
+ file_pos=0;
+ while (!safeGetline(infile, line).eof()) {
ch_ptr=strtok ((char *)line.c_str(), " , \t");
rs=ch_ptr;
ch_ptr=strtok (NULL, " , \t");
minor=ch_ptr;
ch_ptr=strtok (NULL, " , \t");
major=ch_ptr;
-
+
if (setSnps.size()!=0 && setSnps.count(rs)==0) {
- SNPINFO sInfo={"-9", rs, -9, -9, minor, major, -9, -9, -9};
- snpInfo.push_back(sInfo);
- indicator_snp.push_back(0);
- continue;
+ SNPINFO sInfo={"-9", rs, -9, -9, minor, major, 0, -9, -9, 0, 0, file_pos};
+ snpInfo.push_back(sInfo);
+ indicator_snp.push_back(0);
+
+ file_pos++;
+ continue;
}
-
+
if (mapRS2bp.count(rs)==0) {chr="-9"; b_pos=-9;cM=-9;}
- else {b_pos=mapRS2bp[rs]; chr=mapRS2chr[rs]; cM=mapRS2cM[rs];}
-
+ else {b_pos=mapRS2bp[rs]; chr=mapRS2chr[rs]; cM=mapRS2cM[rs];}
+
maf=0; n_miss=0; flag_poly=0; geno_old=-9;
n_0=0; n_1=0; n_2=0;
c_idv=0; gsl_vector_set_zero (genotype_miss);
for (int i=0; i<ni_total; ++i) {
ch_ptr=strtok (NULL, " , \t");
- if (indicator_idv[i]==0) {continue;}
+ if (indicator_idv[i]==0) {continue;}
if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set (genotype_miss, c_idv, 1); n_miss++; c_idv++; continue;}
-
+
geno=atof(ch_ptr);
if (geno>=0 && geno<=0.5) {n_0++;}
if (geno>0.5 && geno<1.5) {n_1++;}
if (geno>=1.5 && geno<=2.0) {n_2++;}
-
- gsl_vector_set (genotype, c_idv, geno);
-
+
+ gsl_vector_set (genotype, c_idv, geno);
+
// if (geno<0) {n_miss++; continue;}
-
+
if (flag_poly==0) {geno_old=geno; flag_poly=2;}
if (flag_poly==2 && geno!=geno_old) {flag_poly=1;}
-
+
maf+=geno;
-
+
c_idv++;
}
- maf/=2.0*(double)(ni_test-n_miss);
-
- SNPINFO sInfo={chr, rs, cM, b_pos, minor, major, n_miss, (double)n_miss/(double)ni_test, maf};
+ maf/=2.0*(double)(ni_test-n_miss);
+
+ SNPINFO sInfo={chr, rs, cM, b_pos, minor, major, n_miss, (double)n_miss/(double)ni_test, maf, ni_test-n_miss, 0, file_pos};
snpInfo.push_back(sInfo);
-
+ file_pos++;
+
if ( (double)n_miss/(double)ni_test > miss_level) {indicator_snp.push_back(0); continue;}
-
+
if ( (maf<maf_level || maf> (1.0-maf_level)) && maf_level!=-1 ) {indicator_snp.push_back(0); continue;}
-
+
if (flag_poly!=1) {indicator_snp.push_back(0); continue;}
-
+
if (hwe_level!=0 && maf_level!=-1) {
if (CalcHWE(n_0, n_2, n_1)<hwe_level) {indicator_snp.push_back(0); continue;}
}
-
+
//filter SNP if it is correlated with W
//unless W has only one column, of 1s
- for (size_t i=0; i<genotype->size; ++i) {
- if (gsl_vector_get (genotype_miss, i)==1) {geno=maf*2.0; gsl_vector_set (genotype, i, geno);}
+ for (size_t i=0; i<genotype->size; ++i) {
+ if (gsl_vector_get (genotype_miss, i)==1) {geno=maf*2.0; gsl_vector_set (genotype, i, geno);}
}
-
+
gsl_blas_dgemv (CblasTrans, 1.0, W, genotype, 0.0, Wtx);
gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
gsl_blas_ddot (genotype, genotype, &v_x);
gsl_blas_ddot (Wtx, WtWiWtx, &v_w);
-
+
if (W->size2!=1 && v_w/v_x >= r2_level) {indicator_snp.push_back(0); continue;}
-
- indicator_snp.push_back(1);
+
+ indicator_snp.push_back(1);
ns_test++;
}
-
+
gsl_vector_free (genotype);
gsl_vector_free (genotype_miss);
gsl_matrix_free (WtW);
@@ -591,10 +598,10 @@ bool ReadFile_geno (const string &file_geno, const set<string> &setSnps, const g
gsl_vector_free (Wtx);
gsl_vector_free (WtWiWtx);
gsl_permutation_free (pmt);
-
+
infile.close();
- infile.clear();
-
+ infile.clear();
+
return true;
}
@@ -602,13 +609,13 @@ bool ReadFile_geno (const string &file_geno, const set<string> &setSnps, const g
-
+
//Read bed file, the first time
bool ReadFile_bed (const string &file_bed, const set<string> &setSnps, const gsl_matrix *W, vector<int> &indicator_idv, vector<int> &indicator_snp, vector<SNPINFO> &snpInfo, const double &maf_level, const double &miss_level, const double &hwe_level, const double &r2_level, size_t &ns_test)
{
indicator_snp.clear();
size_t ns_total=snpInfo.size();
-
+
ifstream infile (file_bed.c_str(), ios::binary);
if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return false;}
@@ -619,25 +626,25 @@ bool ReadFile_bed (const string &file_bed, const set<string> &setSnps, const gsl
gsl_vector *Wtx=gsl_vector_alloc (W->size2);
gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2);
gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
-
+
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
- int sig;
+ int sig;
LUDecomp (WtW, pmt, &sig);
LUInvert (WtW, pmt, WtWi);
-
+
double v_x, v_w, geno;
size_t c_idv=0;
-
+
char ch[1];
bitset<8> b;
-
+
size_t ni_total=indicator_idv.size();
size_t ni_test=0;
for (size_t i=0; i<ni_total; ++i) {
ni_test+=indicator_idv[i];
}
ns_test=0;
-
+
//calculate n_bit and c, the number of bit for each snp
size_t n_bit;
if (ni_total%4==0) {n_bit=ni_total/4;}
@@ -648,19 +655,20 @@ bool ReadFile_bed (const string &file_bed, const set<string> &setSnps, const gsl
infile.read(ch,1);
b=ch[0];
}
-
+
double maf;
size_t n_miss;
- size_t n_0, n_1, n_2, c;
-
+ size_t n_0, n_1, n_2, c;
+
//start reading snps and doing association test
for (size_t t=0; t<ns_total; ++t) {
infile.seekg(t*n_bit+3); //n_bit, and 3 is the number of magic numbers
-
+
if (setSnps.size()!=0 && setSnps.count(snpInfo[t].rs_number)==0) {
snpInfo[t].n_miss=-9;
snpInfo[t].missingness=-9;
snpInfo[t].maf=-9;
+ snpInfo[t].file_position=t;
indicator_snp.push_back(0);
continue;
}
@@ -675,52 +683,55 @@ bool ReadFile_bed (const string &file_bed, const set<string> &setSnps, const gsl
if ((i==(n_bit-1)) && c==ni_total) {break;}
if (indicator_idv[c]==0) {c++; continue;}
c++;
-
+
if (b[2*j]==0) {
if (b[2*j+1]==0) {gsl_vector_set(genotype, c_idv, 2.0); maf+=2.0; n_2++;}
else {gsl_vector_set(genotype, c_idv, 1.0); maf+=1.0; n_1++;}
}
else {
- if (b[2*j+1]==1) {gsl_vector_set(genotype, c_idv, 0.0); maf+=0.0; n_0++;}
+ if (b[2*j+1]==1) {gsl_vector_set(genotype, c_idv, 0.0); maf+=0.0; n_0++;}
else {gsl_vector_set(genotype_miss, c_idv, 1); n_miss++; }
}
c_idv++;
}
}
maf/=2.0*(double)(ni_test-n_miss);
-
+
snpInfo[t].n_miss=n_miss;
snpInfo[t].missingness=(double)n_miss/(double)ni_test;
snpInfo[t].maf=maf;
-
+ snpInfo[t].n_idv=ni_test-n_miss;
+ snpInfo[t].n_nb=0;
+ snpInfo[t].file_position=t;
+
if ( (double)n_miss/(double)ni_test > miss_level) {indicator_snp.push_back(0); continue;}
-
+
if ( (maf<maf_level || maf> (1.0-maf_level)) && maf_level!=-1 ) {indicator_snp.push_back(0); continue;}
-
+
if ( (n_0+n_1)==0 || (n_1+n_2)==0 || (n_2+n_0)==0) {indicator_snp.push_back(0); continue;}
-
+
if (hwe_level!=1 && maf_level!=-1) {
if (CalcHWE(n_0, n_2, n_1)<hwe_level) {indicator_snp.push_back(0); continue;}
}
-
-
+
+
//filter SNP if it is correlated with W
//unless W has only one column, of 1s
- for (size_t i=0; i<genotype->size; ++i) {
- if (gsl_vector_get (genotype_miss, i)==1) {geno=maf*2.0; gsl_vector_set (genotype, i, geno);}
+ for (size_t i=0; i<genotype->size; ++i) {
+ if (gsl_vector_get (genotype_miss, i)==1) {geno=maf*2.0; gsl_vector_set (genotype, i, geno);}
}
-
+
gsl_blas_dgemv (CblasTrans, 1.0, W, genotype, 0.0, Wtx);
gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
gsl_blas_ddot (genotype, genotype, &v_x);
gsl_blas_ddot (Wtx, WtWiWtx, &v_w);
-
+
if (W->size2!=1 && v_w/v_x > r2_level) {indicator_snp.push_back(0); continue;}
-
- indicator_snp.push_back(1);
+
+ indicator_snp.push_back(1);
ns_test++;
}
-
+
gsl_vector_free (genotype);
gsl_vector_free (genotype_miss);
gsl_matrix_free (WtW);
@@ -728,63 +739,177 @@ bool ReadFile_bed (const string &file_bed, const set<string> &setSnps, const gsl
gsl_vector_free (Wtx);
gsl_vector_free (WtWiWtx);
gsl_permutation_free (pmt);
-
+
infile.close();
- infile.clear();
-
+ infile.clear();
+
return true;
}
-void ReadFile_kin (const string &file_kin, vector<int> &indicator_idv, map<string, int> &mapID2num, const size_t k_mode, bool &error, gsl_matrix *G)
+
+
+//read the genotype for one SNP; remember to read empty lines
+//geno stores original genotypes without centering
+//missing values are replaced by mean
+bool Bimbam_ReadOneSNP (const size_t inc, const vector<int> &indicator_idv, igzstream &infile, gsl_vector *geno, double &geno_mean)
+{
+ size_t ni_total=indicator_idv.size();
+
+ // if (infile.eof()) {infile.clear();}
+ // infile.seekg(pos);
+
+ string line;
+ char *ch_ptr;
+ bool flag=false;
+
+ for (size_t i=0; i<inc; i++) {
+ !safeGetline(infile, line).eof();
+ }
+
+ if (!safeGetline(infile, line).eof()) {
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+ ch_ptr=strtok (NULL, " , \t");
+ ch_ptr=strtok (NULL, " , \t");
+
+ geno_mean=0.0;
+ double d;
+ size_t c_idv=0;
+ vector<size_t> geno_miss;
+
+ for (size_t i=0; i<ni_total; ++i) {
+ ch_ptr=strtok (NULL, " , \t");
+ if (indicator_idv[i]==0) {continue;}
+
+ if (strcmp(ch_ptr, "NA")==0) {
+ geno_miss.push_back(c_idv);
+ } else {
+ d=atof(ch_ptr);
+ gsl_vector_set (geno, c_idv, d);
+ geno_mean+=d;
+ }
+ c_idv++;
+ }
+
+ geno_mean/=(double)(c_idv-geno_miss.size() );
+
+ for (size_t i=0; i<geno_miss.size(); ++i) {
+ gsl_vector_set(geno, geno_miss[i], geno_mean);
+ }
+ flag=true;
+ }
+
+ return flag;
+}
+
+
+//for plink, store SNPs as double too
+void Plink_ReadOneSNP (const int pos, const vector<int> &indicator_idv, ifstream &infile, gsl_vector *geno, double &geno_mean)
+{
+ size_t ni_total=indicator_idv.size(), n_bit;
+ if (ni_total%4==0) {n_bit=ni_total/4;}
+ else {n_bit=ni_total/4+1;}
+ infile.seekg(pos*n_bit+3); //n_bit, and 3 is the number of magic numbers
+
+ //read genotypes
+ char ch[1];
+ bitset<8> b;
+
+ geno_mean=0.0;
+ size_t c=0, c_idv=0;
+ vector<size_t> geno_miss;
+
+ for (size_t i=0; i<n_bit; ++i) {
+ infile.read(ch,1);
+ b=ch[0];
+ for (size_t j=0; j<4; ++j) { //minor allele homozygous: 2.0; major: 0.0;
+ if ((i==(n_bit-1)) && c==ni_total) {break;}
+ if (indicator_idv[c]==0) {c++; continue;}
+ c++;
+
+ if (b[2*j]==0) {
+ if (b[2*j+1]==0) {
+ gsl_vector_set (geno, c_idv, 2);
+ geno_mean+=2.0;
+ } else {
+ gsl_vector_set (geno, c_idv, 1);
+ geno_mean+=1.0;
+ }
+ } else {
+ if (b[2*j+1]==1) {
+ gsl_vector_set (geno, c_idv, 0);
+ geno_mean+=0.0;
+ } else {
+ geno_miss.push_back(c_idv);
+ }
+ }
+
+ c_idv++;
+ }
+ }
+
+ geno_mean/=(double)(c_idv-geno_miss.size());
+
+ for (size_t i=0; i<geno_miss.size(); ++i) {
+ gsl_vector_set(geno, geno_miss[i], geno_mean);
+ }
+
+ return;
+}
+
+
+
+
+
+void ReadFile_kin (const string &file_kin, vector<int> &indicator_idv, map<string, int> &mapID2num, const size_t k_mode, bool &error, gsl_matrix *G)
{
igzstream infile (file_kin.c_str(), igzstream::in);
// ifstream infile (file_kin.c_str(), ifstream::in);
if (!infile) {cout<<"error! fail to open kinship file: "<<file_kin<<endl; error=true; return;}
-
+
size_t ni_total=indicator_idv.size();
-
+
gsl_matrix_set_zero (G);
-
+
string line;
- char *ch_ptr;
+ char *ch_ptr;
double d;
-
+
if (k_mode==1) {
size_t i_test=0, i_total=0, j_test=0, j_total=0;
while (getline(infile, line)) {
- if (i_total==ni_total) {cout<<"error! number of rows in the kinship file is larger than the number of phentypes."<<endl; error=true;}
-
+ if (i_total==ni_total) {cout<<"error! number of rows in the kinship file is larger than the number of phentypes."<<endl; error=true;}
+
if (indicator_idv[i_total]==0) {i_total++; continue;}
-
+
j_total=0; j_test=0;
ch_ptr=strtok ((char *)line.c_str(), " , \t");
while (ch_ptr!=NULL) {
if (j_total==ni_total) {cout<<"error! number of columns in the kinship file is larger than the number of phentypes for row = "<<i_total<<endl; error=true;}
-
+
d=atof(ch_ptr);
- if (indicator_idv[j_total]==1) {gsl_matrix_set (G, i_test, j_test, d); j_test++;}
+ if (indicator_idv[j_total]==1) {gsl_matrix_set (G, i_test, j_test, d); j_test++;}
j_total++;
-
+
ch_ptr=strtok (NULL, " , \t");
}
if (j_total!=ni_total) {cout<<"error! number of columns in the kinship file do not match the number of phentypes for row = "<<i_total<<endl; error=true;}
- i_total++; i_test++;
+ i_total++; i_test++;
}
if (i_total!=ni_total) {cout<<"error! number of rows in the kinship file do not match the number of phentypes."<<endl; error=true;}
- }
- else {
+ }
+ else {
map<size_t, size_t> mapID2ID;
size_t c=0;
for (size_t i=0; i<indicator_idv.size(); i++) {
if (indicator_idv[i]==1) {mapID2ID[i]=c; c++;}
}
-
+
string id1, id2;
double Cov_d;
size_t n_id1, n_id2;
-
+
while (getline(infile, line)) {
ch_ptr=strtok ((char *)line.c_str(), " , \t");
id1=ch_ptr;
@@ -794,10 +919,10 @@ void ReadFile_kin (const string &file_kin, vector<int> &indicator_idv, map<strin
d=atof(ch_ptr);
if (mapID2num.count(id1)==0 || mapID2num.count(id2)==0) {continue;}
if (indicator_idv[mapID2num[id1]]==0 || indicator_idv[mapID2num[id2]]==0) {continue;}
-
+
n_id1=mapID2ID[mapID2num[id1]];
n_id2=mapID2ID[mapID2num[id2]];
-
+
Cov_d=gsl_matrix_get(G, n_id1, n_id2);
if (Cov_d!=0 && Cov_d!=d) {cout<<"error! redundant and unequal terms in the kinship file, for id1 = "<<id1<<" and id2 = "<<id2<<endl;}
else {
@@ -806,15 +931,15 @@ void ReadFile_kin (const string &file_kin, vector<int> &indicator_idv, map<strin
}
}
}
-
+
infile.close();
- infile.clear();
-
+ infile.clear();
+
return;
}
-void ReadFile_mk (const string &file_mk, vector<int> &indicator_idv, map<string, int> &mapID2num, const size_t k_mode, bool &error, gsl_matrix *G)
+void ReadFile_mk (const string &file_mk, vector<int> &indicator_idv, map<string, int> &mapID2num, const size_t k_mode, bool &error, gsl_matrix *G)
{
igzstream infile (file_mk.c_str(), igzstream::in);
if (!infile) {cout<<"error! fail to open file: "<<file_mk<<endl; error=true; return;}
@@ -830,101 +955,101 @@ void ReadFile_mk (const string &file_mk, vector<int> &indicator_idv, map<string,
}
infile.close();
- infile.clear();
+ infile.clear();
return;
}
-void ReadFile_eigenU (const string &file_ku, bool &error, gsl_matrix *U)
+void ReadFile_eigenU (const string &file_ku, bool &error, gsl_matrix *U)
{
igzstream infile (file_ku.c_str(), igzstream::in);
// ifstream infile (file_ku.c_str(), ifstream::in);
if (!infile) {cout<<"error! fail to open the U file: "<<file_ku<<endl; error=true; return;}
-
+
size_t n_row=U->size1, n_col=U->size2, i_row=0, i_col=0;
-
+
gsl_matrix_set_zero (U);
-
+
string line;
- char *ch_ptr;
+ char *ch_ptr;
double d;
-
+
while (getline(infile, line)) {
- if (i_row==n_row) {cout<<"error! number of rows in the U file is larger than expected."<<endl; error=true;}
-
+ if (i_row==n_row) {cout<<"error! number of rows in the U file is larger than expected."<<endl; error=true;}
+
i_col=0;
ch_ptr=strtok ((char *)line.c_str(), " , \t");
while (ch_ptr!=NULL) {
if (i_col==n_col) {cout<<"error! number of columns in the U file is larger than expected, for row = "<<i_row<<endl; error=true;}
-
+
d=atof(ch_ptr);
- gsl_matrix_set (U, i_row, i_col, d);
+ gsl_matrix_set (U, i_row, i_col, d);
i_col++;
-
+
ch_ptr=strtok (NULL, " , \t");
}
-
+
i_row++;
}
-
+
infile.close();
- infile.clear();
-
+ infile.clear();
+
return;
}
-void ReadFile_eigenD (const string &file_kd, bool &error, gsl_vector *eval)
+void ReadFile_eigenD (const string &file_kd, bool &error, gsl_vector *eval)
{
igzstream infile (file_kd.c_str(), igzstream::in);
// ifstream infile (file_kd.c_str(), ifstream::in);
if (!infile) {cout<<"error! fail to open the D file: "<<file_kd<<endl; error=true; return;}
-
+
size_t n_row=eval->size, i_row=0;
-
+
gsl_vector_set_zero (eval);
-
+
string line;
- char *ch_ptr;
+ char *ch_ptr;
double d;
-
+
while (getline(infile, line)) {
- if (i_row==n_row) {cout<<"error! number of rows in the D file is larger than expected."<<endl; error=true;}
-
+ if (i_row==n_row) {cout<<"error! number of rows in the D file is larger than expected."<<endl; error=true;}
+
ch_ptr=strtok ((char *)line.c_str(), " , \t");
d=atof(ch_ptr);
-
+
ch_ptr=strtok (NULL, " , \t");
if (ch_ptr!=NULL) {cout<<"error! number of columns in the D file is larger than expected, for row = "<<i_row<<endl; error=true;}
-
+
gsl_vector_set (eval, i_row, d);
-
+
i_row++;
}
-
+
infile.close();
- infile.clear();
-
+ infile.clear();
+
return;
}
//read bimbam mean genotype file and calculate kinship matrix
-bool BimbamKin (const string &file_geno, vector<int> &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin)
+bool BimbamKin (const string &file_geno, vector<int> &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin)
{
igzstream infile (file_geno.c_str(), igzstream::in);
//ifstream infile (file_geno.c_str(), ifstream::in);
if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return false;}
-
+
string line;
char *ch_ptr;
-
+
size_t n_miss;
double d, geno_mean, geno_var;
-
+
size_t ni_total=matrix_kin->size1;
gsl_vector *geno=gsl_vector_alloc (ni_total);
gsl_vector *geno_miss=gsl_vector_alloc (ni_total);
@@ -934,11 +1059,11 @@ bool BimbamKin (const string &file_geno, vector<int> &indicator_snp, const int k
!safeGetline(infile, line).eof();
if (t%display_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);}
if (indicator_snp[t]==0) {continue;}
-
+
ch_ptr=strtok ((char *)line.c_str(), " , \t");
ch_ptr=strtok (NULL, " , \t");
ch_ptr=strtok (NULL, " , \t");
-
+
geno_mean=0.0; n_miss=0; geno_var=0.0;
gsl_vector_set_all(geno_miss, 0);
for (size_t i=0; i<ni_total; ++i) {
@@ -952,44 +1077,49 @@ bool BimbamKin (const string &file_geno, vector<int> &indicator_snp, const int k
geno_var+=d*d;
}
}
-
+
geno_mean/=(double)(ni_total-n_miss);
geno_var+=geno_mean*geno_mean*(double)n_miss;
geno_var/=(double)ni_total;
geno_var-=geno_mean*geno_mean;
// geno_var=geno_mean*(1-geno_mean*0.5);
-
+
for (size_t i=0; i<ni_total; ++i) {
if (gsl_vector_get (geno_miss, i)==0) {gsl_vector_set(geno, i, geno_mean);}
- }
-
+ }
+
gsl_vector_add_constant (geno, -1.0*geno_mean);
-
+
if (geno_var!=0) {
- if (k_mode==1) {gsl_blas_dsyr (CblasUpper, 1.0, geno, matrix_kin);}
- else if (k_mode==2) {gsl_blas_dsyr (CblasUpper, 1.0/geno_var, geno, matrix_kin);}
- else {cout<<"Unknown kinship mode."<<endl;}
+ if (k_mode==1) {
+ gsl_blas_dsyr (CblasUpper, 1.0, geno, matrix_kin);
+ //eigenlib_dsyr (1.0, geno, matrix_kin);
+ } else if (k_mode==2) {
+ gsl_blas_dsyr (CblasUpper, 1.0/geno_var, geno, matrix_kin);
+ //eigenlib_dsyr (1.0/geno_var, geno, matrix_kin);
+ } else {
+ cout<<"Unknown kinship mode."<<endl;
+ }
}
-
ns_test++;
- }
+ }
cout<<endl;
-
+
gsl_matrix_scale (matrix_kin, 1.0/(double)ns_test);
-
+
for (size_t i=0; i<ni_total; ++i) {
for (size_t j=0; j<i; ++j) {
d=gsl_matrix_get (matrix_kin, j, i);
gsl_matrix_set (matrix_kin, i, j, d);
}
}
-
+
gsl_vector_free (geno);
gsl_vector_free (geno_miss);
-
+
infile.close();
- infile.clear();
-
+ infile.clear();
+
return true;
}
@@ -999,23 +1129,23 @@ bool BimbamKin (const string &file_geno, vector<int> &indicator_snp, const int k
-bool PlinkKin (const string &file_bed, vector<int> &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin)
+bool PlinkKin (const string &file_bed, vector<int> &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin)
{
ifstream infile (file_bed.c_str(), ios::binary);
if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return false;}
-
+
char ch[1];
bitset<8> b;
-
+
size_t n_miss, ci_total;
double d, geno_mean, geno_var;
-
+
size_t ni_total=matrix_kin->size1;
gsl_vector *geno=gsl_vector_alloc (ni_total);
size_t ns_test=0;
int n_bit;
-
+
//calculate n_bit and c, the number of bit for each snp
if (ni_total%4==0) {n_bit=ni_total/4;}
else {n_bit=ni_total/4+1; }
@@ -1024,14 +1154,14 @@ bool PlinkKin (const string &file_bed, vector<int> &indicator_snp, const int k_m
for (int i=0; i<3; ++i) {
infile.read(ch,1);
b=ch[0];
- }
-
+ }
+
for (size_t t=0; t<indicator_snp.size(); ++t) {
if (t%display_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);}
if (indicator_snp[t]==0) {continue;}
-
+
infile.seekg(t*n_bit+3); //n_bit, and 3 is the number of magic numbers
-
+
//read genotypes
geno_mean=0.0; n_miss=0; ci_total=0; geno_var=0.0;
for (int i=0; i<n_bit; ++i) {
@@ -1045,51 +1175,51 @@ bool PlinkKin (const string &file_bed, vector<int> &indicator_snp, const int k_m
else {gsl_vector_set(geno, ci_total, 1.0); geno_mean+=1.0; geno_var+=1.0;}
}
else {
- if (b[2*j+1]==1) {gsl_vector_set(geno, ci_total, 0.0); }
+ if (b[2*j+1]==1) {gsl_vector_set(geno, ci_total, 0.0); }
else {gsl_vector_set(geno, ci_total, -9.0); n_miss++; }
}
ci_total++;
}
}
-
+
geno_mean/=(double)(ni_total-n_miss);
geno_var+=geno_mean*geno_mean*(double)n_miss;
geno_var/=(double)ni_total;
geno_var-=geno_mean*geno_mean;
// geno_var=geno_mean*(1-geno_mean*0.5);
-
+
for (size_t i=0; i<ni_total; ++i) {
d=gsl_vector_get(geno,i);
if (d==-9.0) {gsl_vector_set(geno, i, geno_mean);}
- }
-
+ }
+
gsl_vector_add_constant (geno, -1.0*geno_mean);
-
+
if (geno_var!=0) {
if (k_mode==1) {gsl_blas_dsyr (CblasUpper, 1.0, geno, matrix_kin);}
else if (k_mode==2) {gsl_blas_dsyr (CblasUpper, 1.0/geno_var, geno, matrix_kin);}
else {cout<<"Unknown kinship mode."<<endl;}
}
-
+
ns_test++;
- }
+ }
cout<<endl;
-
+
gsl_matrix_scale (matrix_kin, 1.0/(double)ns_test);
-
+
for (size_t i=0; i<ni_total; ++i) {
for (size_t j=0; j<i; ++j) {
d=gsl_matrix_get (matrix_kin, j, i);
gsl_matrix_set (matrix_kin, i, j, d);
}
}
-
+
gsl_vector_free (geno);
-
+
infile.close();
- infile.clear();
-
+ infile.clear();
+
return true;
}
@@ -1103,65 +1233,65 @@ bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv, vector<
igzstream infile (file_geno.c_str(), igzstream::in);
// ifstream infile (file_geno.c_str(), ifstream::in);
if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return false;}
-
+
string line;
char *ch_ptr;
-
+
if (calc_K==true) {gsl_matrix_set_zero (K);}
-
+
gsl_vector *genotype=gsl_vector_alloc (UtX->size1);
gsl_vector *genotype_miss=gsl_vector_alloc (UtX->size1);
double geno, geno_mean;
size_t n_miss;
-
+
int ni_total=(int)indicator_idv.size();
int ns_total=(int)indicator_snp.size();
int ni_test=UtX->size1;
int ns_test=UtX->size2;
-
+
int c_idv=0, c_snp=0;
-
+
for (int i=0; i<ns_total; ++i) {
!safeGetline(infile, line).eof();
- if (indicator_snp[i]==0) {continue;}
-
+ if (indicator_snp[i]==0) {continue;}
+
ch_ptr=strtok ((char *)line.c_str(), " , \t");
ch_ptr=strtok (NULL, " , \t");
ch_ptr=strtok (NULL, " , \t");
-
+
c_idv=0; geno_mean=0; n_miss=0;
gsl_vector_set_zero (genotype_miss);
for (int j=0; j<ni_total; ++j) {
ch_ptr=strtok (NULL, " , \t");
- if (indicator_idv[j]==0) {continue;}
-
+ if (indicator_idv[j]==0) {continue;}
+
if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set (genotype_miss, c_idv, 1); n_miss++;}
- else {
+ else {
geno=atof(ch_ptr);
- gsl_vector_set (genotype, c_idv, geno);
+ gsl_vector_set (genotype, c_idv, geno);
geno_mean+=geno;
}
c_idv++;
}
-
+
geno_mean/=(double)(ni_test-n_miss);
-
- for (size_t i=0; i<genotype->size; ++i) {
+
+ for (size_t i=0; i<genotype->size; ++i) {
if (gsl_vector_get (genotype_miss, i)==1) {geno=0;}
else {geno=gsl_vector_get (genotype, i); geno-=geno_mean;}
-
+
gsl_vector_set (genotype, i, geno);
gsl_matrix_set (UtX, i, c_snp, geno);
}
-
+
if (calc_K==true) {gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);}
-
+
c_snp++;
- }
-
+ }
+
if (calc_K==true) {
gsl_matrix_scale (K, 1.0/(double)ns_test);
-
+
for (size_t i=0; i<genotype->size; ++i) {
for (size_t j=0; j<i; ++j) {
geno=gsl_matrix_get (K, j, i);
@@ -1169,18 +1299,106 @@ bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv, vector<
}
}
}
-
+
gsl_vector_free (genotype);
gsl_vector_free (genotype_miss);
-
+
infile.clear();
infile.close();
-
+
return true;
}
+//compact version of the above function, using uchar instead of gsl_matrix
+bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv, vector<int> &indicator_snp, vector<vector<unsigned char> > &Xt, gsl_matrix *K, const bool calc_K, const size_t ni_test, const size_t ns_test)
+{
+ igzstream infile (file_geno.c_str(), igzstream::in);
+ // ifstream infile (file_geno.c_str(), ifstream::in);
+ if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return false;}
+
+ Xt.clear();
+ vector<unsigned char> Xt_row;
+ for (size_t i=0; i<ni_test; i++) {
+ Xt_row.push_back(0);
+ }
+
+ string line;
+ char *ch_ptr;
+
+ if (calc_K==true) {gsl_matrix_set_zero (K);}
+
+ gsl_vector *genotype=gsl_vector_alloc (ni_test);
+ gsl_vector *genotype_miss=gsl_vector_alloc (ni_test);
+ double geno, geno_mean;
+ size_t n_miss;
+
+ size_t ni_total= indicator_idv.size();
+ size_t ns_total= indicator_snp.size();
+
+ size_t c_idv=0, c_snp=0;
+
+ for (size_t i=0; i<ns_total; ++i) {
+ !safeGetline(infile, line).eof();
+ if (indicator_snp[i]==0) {continue;}
+
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+ ch_ptr=strtok (NULL, " , \t");
+ ch_ptr=strtok (NULL, " , \t");
+
+ c_idv=0; geno_mean=0; n_miss=0;
+ gsl_vector_set_zero (genotype_miss);
+ for (uint j=0; j<ni_total; ++j) {
+ ch_ptr=strtok (NULL, " , \t");
+ if (indicator_idv[j]==0) {continue;}
+
+ if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set (genotype_miss, c_idv, 1); n_miss++;} else {
+ geno=atof(ch_ptr);
+ gsl_vector_set (genotype, c_idv, geno);
+ geno_mean+=geno;
+ }
+ c_idv++;
+ }
+
+ geno_mean/=(double)(ni_test-n_miss);
+
+ for (size_t j=0; j<genotype->size; ++j) {
+ if (gsl_vector_get (genotype_miss, j)==1) {
+ geno=geno_mean;
+ } else {
+ geno=gsl_vector_get (genotype, j);
+ }
+
+ Xt_row[j]=Double02ToUchar(geno);
+ gsl_vector_set (genotype, j, (geno-geno_mean));
+ }
+ Xt.push_back(Xt_row);
+
+ if (calc_K==true) {gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);}
+
+ c_snp++;
+ }
+
+ if (calc_K==true) {
+ gsl_matrix_scale (K, 1.0/(double)ns_test);
+
+ for (size_t i=0; i<genotype->size; ++i) {
+ for (size_t j=0; j<i; ++j) {
+ geno=gsl_matrix_get (K, j, i);
+ gsl_matrix_set (K, i, j, geno);
+ }
+ }
+ }
+
+ gsl_vector_free (genotype);
+ gsl_vector_free (genotype_miss);
+
+ infile.clear();
+ infile.close();
+
+ return true;
+}
@@ -1190,79 +1408,79 @@ bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv, vector<in
{
ifstream infile (file_bed.c_str(), ios::binary);
if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return false;}
-
+
char ch[1];
bitset<8> b;
-
- int ni_total=(int)indicator_idv.size();
- int ns_total=(int)indicator_snp.size();
- int ni_test=UtX->size1;
- int ns_test=UtX->size2;
+
+ size_t ni_total=indicator_idv.size();
+ size_t ns_total=indicator_snp.size();
+ size_t ni_test=UtX->size1;
+ size_t ns_test=UtX->size2;
int n_bit;
-
+
if (ni_total%4==0) {n_bit=ni_total/4;}
else {n_bit=ni_total/4+1;}
-
+
//print the first three majic numbers
for (int i=0; i<3; ++i) {
infile.read(ch,1);
b=ch[0];
}
-
+
if (calc_K==true) {gsl_matrix_set_zero (K);}
-
- gsl_vector *genotype=gsl_vector_alloc (UtX->size1);
-
+
+ gsl_vector *genotype=gsl_vector_alloc (UtX->size1);
+
double geno, geno_mean;
- size_t n_miss;
- int c_idv=0, c_snp=0, c=0;
-
+ size_t n_miss;
+ size_t c_idv=0, c_snp=0, c=0;
+
//start reading snps and doing association test
- for (int t=0; t<ns_total; ++t) {
- if (indicator_snp[t]==0) {continue;}
+ for (size_t t=0; t<ns_total; ++t) {
+ if (indicator_snp[t]==0) {continue;}
infile.seekg(t*n_bit+3); //n_bit, and 3 is the number of magic numbers
-
+
//read genotypes
c_idv=0; geno_mean=0.0; n_miss=0; c=0;
for (int i=0; i<n_bit; ++i) {
infile.read(ch,1);
b=ch[0];
for (size_t j=0; j<4; ++j) { //minor allele homozygous: 2.0; major: 0.0;
- if ((i==(n_bit-1)) && c==ni_total) {break;}
+ if ((i==(n_bit-1)) && c==ni_total) {break;}
if (indicator_idv[c]==0) {c++; continue;}
c++;
-
+
if (b[2*j]==0) {
if (b[2*j+1]==0) {gsl_vector_set(genotype, c_idv, 2.0); geno_mean+=2.0;}
else {gsl_vector_set(genotype, c_idv, 1.0); geno_mean+=1.0;}
}
else {
- if (b[2*j+1]==1) {gsl_vector_set(genotype, c_idv, 0.0); geno_mean+=0.0;}
+ if (b[2*j+1]==1) {gsl_vector_set(genotype, c_idv, 0.0); geno_mean+=0.0;}
else {gsl_vector_set(genotype, c_idv, -9.0); n_miss++;}
}
c_idv++;
}
}
-
+
geno_mean/=(double)(ni_test-n_miss);
-
- for (size_t i=0; i<genotype->size; ++i) {
+
+ for (size_t i=0; i<genotype->size; ++i) {
geno=gsl_vector_get (genotype, i);
if (geno==-9) {geno=0;}
else {geno-=geno_mean;}
-
+
gsl_vector_set (genotype, i, geno);
gsl_matrix_set (UtX, i, c_snp, geno);
}
-
+
if (calc_K==true) {gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);}
-
+
c_snp++;
- }
-
+ }
+
if (calc_K==true) {
gsl_matrix_scale (K, 1.0/(double)ns_test);
-
+
for (size_t i=0; i<genotype->size; ++i) {
for (size_t j=0; j<i; ++j) {
geno=gsl_matrix_get (K, j, i);
@@ -1270,39 +1488,144 @@ bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv, vector<in
}
}
}
-
- gsl_vector_free (genotype);
+
+ gsl_vector_free (genotype);
infile.clear();
infile.close();
-
+
return true;
}
+//compact version of the above function, using uchar instead of gsl_matrix
+bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv, vector<int> &indicator_snp, vector<vector<unsigned char> > &Xt, gsl_matrix *K, const bool calc_K, const size_t ni_test, const size_t ns_test)
+{
+ ifstream infile (file_bed.c_str(), ios::binary);
+ if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return false;}
+
+ Xt.clear();
+ vector<unsigned char> Xt_row;
+ for (size_t i=0; i<ni_test; i++) {
+ Xt_row.push_back(0);
+ }
+
+ char ch[1];
+ bitset<8> b;
+
+ size_t ni_total=indicator_idv.size();
+ size_t ns_total=indicator_snp.size();
+ int n_bit;
+
+ if (ni_total%4==0) {n_bit=ni_total/4;}
+ else {n_bit=ni_total/4+1;}
+
+ //print the first three majic numbers
+ for (int i=0; i<3; ++i) {
+ infile.read(ch,1);
+ b=ch[0];
+ }
+
+ if (calc_K==true) {gsl_matrix_set_zero (K);}
+
+ gsl_vector *genotype=gsl_vector_alloc (ni_test);
+
+ double geno, geno_mean;
+ size_t n_miss;
+ size_t c_idv=0, c_snp=0, c=0;
+
+ //start reading snps and doing association test
+ for (size_t t=0; t<ns_total; ++t) {
+ if (indicator_snp[t]==0) {continue;}
+ infile.seekg(t*n_bit+3); //n_bit, and 3 is the number of magic numbers
+
+ //read genotypes
+ c_idv=0; geno_mean=0.0; n_miss=0; c=0;
+ for (int i=0; i<n_bit; ++i) {
+ infile.read(ch,1);
+ b=ch[0];
+ for (size_t j=0; j<4; ++j) { //minor allele homozygous: 2.0; major: 0.0;
+ if ((i==(n_bit-1)) && c==ni_total) {break;}
+ if (indicator_idv[c]==0) {c++; continue;}
+ c++;
+
+ if (b[2*j]==0) {
+ if (b[2*j+1]==0) {gsl_vector_set(genotype, c_idv, 2.0); geno_mean+=2.0;}
+ else {gsl_vector_set(genotype, c_idv, 1.0); geno_mean+=1.0;}
+ }
+ else {
+ if (b[2*j+1]==1) {gsl_vector_set(genotype, c_idv, 0.0); geno_mean+=0.0;}
+ else {gsl_vector_set(genotype, c_idv, -9.0); n_miss++;}
+ }
+ c_idv++;
+ }
+ }
+
+ geno_mean/=(double)(ni_test-n_miss);
+
+ for (size_t i=0; i<genotype->size; ++i) {
+ geno=gsl_vector_get (genotype, i);
+ if (geno==-9) {geno=geno_mean;}
+
+ Xt_row[i]=Double02ToUchar(geno);
+
+ geno-=geno_mean;
+
+ gsl_vector_set (genotype, i, geno);
+ }
+ Xt.push_back(Xt_row);
+
+ if (calc_K==true) {gsl_blas_dsyr (CblasUpper, 1.0, genotype, K);}
+
+ c_snp++;
+ }
+
+ if (calc_K==true) {
+ gsl_matrix_scale (K, 1.0/(double)ns_test);
+
+ for (size_t i=0; i<genotype->size; ++i) {
+ for (size_t j=0; j<i; ++j) {
+ geno=gsl_matrix_get (K, j, i);
+ gsl_matrix_set (K, i, j, geno);
+ }
+ }
+ }
+
+ gsl_vector_free (genotype);
+ infile.clear();
+ infile.close();
+
+ return true;
+}
+
+
+
+
+
+
bool ReadFile_est (const string &file_est, const vector<size_t> &est_column, map<string, double> &mapRS2est)
{
mapRS2est.clear();
-
+
ifstream infile (file_est.c_str(), ifstream::in);
if (!infile) {cout<<"error opening estimated parameter file: "<<file_est<<endl; return false;}
-
+
string line;
char *ch_ptr;
-
+
string rs;
double alpha, beta, gamma, d;
-
+
//header
getline(infile, line);
-
+
size_t n=*max_element(est_column.begin(), est_column.end());
-
+
while (getline(infile, line)) {
- ch_ptr=strtok ((char *)line.c_str(), " \t");
-
+ ch_ptr=strtok ((char *)line.c_str(), " \t");
+
alpha=0.0; beta=0.0; gamma=1.0;
for (size_t i=0; i<n+1; ++i) {
if (i==est_column[0]-1) {rs=ch_ptr;}
@@ -1311,9 +1634,9 @@ bool ReadFile_est (const string &file_est, const vector<size_t> &est_column, map
if (i==est_column[3]-1) {gamma=atof(ch_ptr);}
if (i<n) {ch_ptr=strtok (NULL, " \t");}
}
-
+
d=alpha+beta*gamma;
-
+
if (mapRS2est.count(rs)==0) {
mapRS2est[rs]=d;
}
@@ -1321,7 +1644,7 @@ bool ReadFile_est (const string &file_est, const vector<size_t> &est_column, map
cout<<"the same SNP occurs more than once in estimated parameter file: "<<rs<<endl; return false;
}
}
-
+
infile.clear();
infile.close();
return true;
@@ -1337,7 +1660,7 @@ bool CountFileLines (const string &file_input, size_t &n_lines)
n_lines=count(istreambuf_iterator<char>(infile), istreambuf_iterator<char>(), '\n');
infile.seekg (0, ios::beg);
-
+
return true;
}
@@ -1348,25 +1671,25 @@ bool ReadFile_gene (const string &file_gene, vector<double> &vec_read, vector<SN
{
vec_read.clear();
ng_total=0;
-
- ifstream infile (file_gene.c_str(), ifstream::in);
+
+ igzstream infile (file_gene.c_str(), igzstream::in);
if (!infile) {cout<<"error! fail to open gene expression file: "<<file_gene<<endl; return false;}
-
+
string line;
char *ch_ptr;
string rs;
-
+
size_t n_idv=0, t=0;
-
+
//header
getline(infile, line);
-
+
while (getline(infile, line)) {
ch_ptr=strtok ((char *)line.c_str(), " , \t");
rs=ch_ptr;
-
- ch_ptr=strtok (NULL, " , \t");
-
+
+ ch_ptr=strtok (NULL, " , \t");
+
t=0;
while (ch_ptr!=NULL) {
if (ng_total==0) {
@@ -1374,25 +1697,1482 @@ bool ReadFile_gene (const string &file_gene, vector<double> &vec_read, vector<SN
t++;
n_idv++;
} else {
- vec_read[t]+=atof(ch_ptr);
+ vec_read[t]+=atof(ch_ptr);
t++;
}
-
- ch_ptr=strtok (NULL, " , \t");
+
+ ch_ptr=strtok (NULL, " , \t");
}
-
+
if (t!=n_idv) {cout<<"error! number of columns doesn't match in row: "<<ng_total<<endl; return false;}
-
- SNPINFO sInfo={"-9", rs, -9, -9, "-9", "-9", -9, -9, -9};
+
+ SNPINFO sInfo={"-9", rs, -9, -9, "-9", "-9", 0, -9, -9, 0, 0, 0};
snpInfo.push_back(sInfo);
-
+
ng_total++;
}
-
+
infile.close();
- infile.clear();
-
+ infile.clear();
+
return true;
}
+
+
+
+
+
+// WJA Added
+//Read Oxford sample file
+bool ReadFile_sample(const string &file_sample, vector<vector<int> > &indicator_pheno, vector<vector<double> > &pheno, const vector<size_t> &p_column, vector<int> &indicator_cvt, vector<vector<double> > &cvt, size_t &n_cvt)
+{
+ indicator_pheno.clear();
+ pheno.clear();
+ indicator_cvt.clear();
+
+ igzstream infile (file_sample.c_str(), igzstream::in);
+
+ if (!infile) {cout<<"error! fail to open sample file: "<<file_sample<<endl; return false;}
+
+ string line;
+ char *ch_ptr;
+
+
+ string id;
+ double p,d;
+
+ vector<double> pheno_row;
+ vector<int> ind_pheno_row;
+ int flag_na=0;
+
+ size_t num_cols=0;
+ size_t num_p_in_file=0;
+ size_t num_cvt_in_file=0;
+
+// size_t p_max=*max_element(p_column.begin(), p_column.end());
+
+ map<size_t, size_t> mapP2c;
+ for (size_t i=0; i<p_column.size(); i++) {
+ mapP2c[p_column[i]]=i;
+ pheno_row.push_back(-9);
+ ind_pheno_row.push_back(0);
+ }
+ // read header line1
+ if(!safeGetline(infile, line).eof()) {
+ ch_ptr=strtok((char *)line.c_str(), " ");
+ if(strcmp(ch_ptr, "ID_1")!=0) {return false;}
+ ch_ptr=strtok(NULL, " ");
+ if(strcmp(ch_ptr, "ID_2")!=0) {return false;}
+ ch_ptr=strtok(NULL, " ");
+ if(strcmp(ch_ptr, "missing")!=0) {return false;}
+ while (ch_ptr!=NULL) {
+ num_cols++;
+ ch_ptr=strtok (NULL, " ");
+
+ }
+ num_cols--;
+ }
+
+ vector<map<uint32_t, size_t> > cvt_factor_levels;
+
+ char col_type[num_cols];
+ // read header line2
+ if(!safeGetline(infile, line).eof()) {
+ ch_ptr=strtok ((char *)line.c_str(), " ");
+ if(strcmp(ch_ptr, "0")!=0) {return false;}
+ ch_ptr=strtok(NULL, " ");
+ if(strcmp(ch_ptr, "0")!=0) {return false;}
+ ch_ptr=strtok(NULL, " ");
+ if(strcmp(ch_ptr, "0")!=0) {return false;}
+ size_t it=0;
+ ch_ptr=strtok (NULL, " ");
+ if(ch_ptr!=NULL)
+ while(ch_ptr!=NULL){
+ col_type[it++]=ch_ptr[0];
+ if(ch_ptr[0]=='D') {cvt_factor_levels.push_back(map<uint32_t, size_t>());num_cvt_in_file++;}
+ if(ch_ptr[0]=='C') {num_cvt_in_file++;}
+ if((ch_ptr[0]=='P')||(ch_ptr[0]=='B')) {num_p_in_file++;}
+ ch_ptr=strtok(NULL, " ");
+ }
+
+ }
+
+ while (!safeGetline(infile, line).eof()) {
+
+ ch_ptr=strtok ((char *)line.c_str(), " ");
+
+ for(int it=0;it<3;it++){ch_ptr=strtok(NULL, " ");}
+
+
+ size_t i=0;
+ size_t p_i=0;
+ size_t fac_cvt_i=0;
+
+ while (i<num_cols) {
+
+ if((col_type[i]=='P')||(col_type[i]=='B'))
+ {
+ if (mapP2c.count(p_i+1)!=0) {
+ if (strcmp(ch_ptr, "NA")==0) {ind_pheno_row[mapP2c[p_i+1]]=0; pheno_row[mapP2c[p_i+1]]=-9;}
+ else {p=atof(ch_ptr); ind_pheno_row[mapP2c[p_i+1]]=1; pheno_row[mapP2c[p_i+1]]=p;}
+ }
+ p_i++;
+ }
+ if(col_type[i]=='D')
+ {
+ // NOTE THIS DOES NOT CHECK TO BE SURE LEVEL IS INTEGRAL i.e for atoi error
+ if (strcmp(ch_ptr, "NA")!=0) {uint32_t level=atoi(ch_ptr); if(cvt_factor_levels[fac_cvt_i].count(level) == 0) {cvt_factor_levels[fac_cvt_i][level]=cvt_factor_levels[fac_cvt_i].size();}}
+ fac_cvt_i++;
+ }
+
+ ch_ptr=strtok (NULL, " ");
+ i++;
+ }
+
+
+ indicator_pheno.push_back(ind_pheno_row);
+ pheno.push_back(pheno_row);
+
+ }
+ // close and reopen the file
+ infile.close();
+ infile.clear();
+
+ if(num_cvt_in_file>0)
+ {
+ igzstream infile2 (file_sample.c_str(), igzstream::in);
+
+ if (!infile2) {cout<<"error! fail to open sample file: "<<file_sample<<endl; return false;}
+ // skip header
+ safeGetline(infile2, line);
+ safeGetline(infile2, line);
+
+ // pull in the covariates now we now the number of factor levels
+ while (!safeGetline(infile2, line).eof()) {
+
+ vector<double> v_d; flag_na=0;
+ ch_ptr=strtok ((char *)line.c_str(), " ");
+
+ for(int it=0;it<3;it++){ch_ptr=strtok(NULL, " ");}
+
+
+ size_t i=0;
+ size_t fac_cvt_i=0;
+ size_t num_fac_levels;
+ while (i<num_cols) {
+
+ if(col_type[i]=='C')
+ {
+ if (strcmp(ch_ptr, "NA")==0) {flag_na=1; d=-9;}
+ else {d=atof(ch_ptr);}
+
+ v_d.push_back(d);
+ }
+
+
+ if(col_type[i]=='D')
+ {
+ // NOTE THIS DOES NOT CHECK TO BE SURE LEVEL IS INTEGRAL i.e for atoi error
+ num_fac_levels=cvt_factor_levels[fac_cvt_i].size();
+ if(num_fac_levels>1)
+ {
+ if (strcmp(ch_ptr, "NA")==0) {flag_na=1; for(size_t it=0;it<num_fac_levels-1; it++) {v_d.push_back(-9);}}
+ else {uint32_t level=atoi(ch_ptr); for(size_t it=0;it<num_fac_levels-1;it++) {cvt_factor_levels[fac_cvt_i][level]==it+1 ? v_d.push_back(1.0) : v_d.push_back(0.0); }}
+ }
+ fac_cvt_i++;
+ }
+
+ ch_ptr=strtok (NULL, " ");
+ i++;
+ }
+
+ if (flag_na==0) {indicator_cvt.push_back(1);} else {indicator_cvt.push_back(0);}
+ cvt.push_back(v_d);
+
+
+ }
+
+ if (indicator_cvt.empty()) {n_cvt=0;}
+ else {
+ flag_na=0;
+ for (vector<int>::size_type i=0; i<indicator_cvt.size(); ++i) {
+ if (indicator_cvt[i]==0) {continue;}
+
+ if (flag_na==0) {flag_na=1; n_cvt=cvt[i].size();}
+ if (flag_na!=0 && n_cvt!=cvt[i].size()) {cout<<"error! number of covariates in row "<<i<<" do not match other rows."<<endl; return false;}
+ }
+ }
+
+ infile2.close();
+ infile2.clear();
+ }
+ return true;
+}
+
+
+
+// WJA Added
+//Read bgen file, the first time
+#include <cstdint>
+#include <assert.h>
+bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps, const gsl_matrix *W, vector<int> &indicator_idv, vector<int> &indicator_snp, vector<SNPINFO> &snpInfo, const double &maf_level, const double &miss_level, const double &hwe_level, const double &r2_level, size_t &ns_test)
+{
+
+ indicator_snp.clear();
+
+ ifstream infile (file_bgen.c_str(), ios::binary);
+ if (!infile) {cout<<"error reading bgen file:"<<file_bgen<<endl; return false;}
+
+ gsl_vector *genotype=gsl_vector_alloc (W->size1);
+ gsl_vector *genotype_miss=gsl_vector_alloc (W->size1);
+ gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
+ gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
+ gsl_vector *Wtx=gsl_vector_alloc (W->size2);
+ gsl_vector *WtWiWtx=gsl_vector_alloc (W->size2);
+ gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
+
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+ int sig;
+ LUDecomp (WtW, pmt, &sig);
+ LUInvert (WtW, pmt, WtWi);
+
+ // read in header
+ uint32_t bgen_snp_block_offset;
+ uint32_t bgen_header_length;
+ uint32_t bgen_nsamples;
+ uint32_t bgen_nsnps;
+ uint32_t bgen_flags;
+ infile.read(reinterpret_cast<char*>(&bgen_snp_block_offset),4);
+ infile.read(reinterpret_cast<char*>(&bgen_header_length),4);
+ bgen_snp_block_offset-=4;
+ infile.read(reinterpret_cast<char*>(&bgen_nsnps),4);
+ bgen_snp_block_offset-=4;
+ infile.read(reinterpret_cast<char*>(&bgen_nsamples),4);
+ bgen_snp_block_offset-=4;
+ infile.ignore(4+bgen_header_length-20);
+ bgen_snp_block_offset-=4+bgen_header_length-20;
+ infile.read(reinterpret_cast<char*>(&bgen_flags),4);
+ bgen_snp_block_offset-=4;
+ bool CompressedSNPBlocks=bgen_flags&0x1;
+ bool LongIds=bgen_flags&0x4;
+
+ if(!LongIds) {return false;}
+
+ infile.ignore(bgen_snp_block_offset);
+
+ ns_test=0;
+
+ size_t ns_total=static_cast<size_t>(bgen_nsnps);
+
+ snpInfo.clear();
+ string rs;
+ long int b_pos;
+ string chr;
+// double cM;
+ string major;
+ string minor;
+ string id;
+
+ double v_x, v_w;
+ int c_idv=0;
+
+
+ double maf, geno, geno_old;
+ size_t n_miss;
+ size_t n_0, n_1, n_2;
+ int flag_poly;
+
+ double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB, bgen_geno_prob_non_miss;
+
+
+ size_t ni_total=indicator_idv.size(); // total number of samples in phenotype file
+ size_t ni_test=0; // number of samples to use in test
+
+ uint32_t bgen_N;
+ uint16_t bgen_LS;
+ uint16_t bgen_LR;
+ uint16_t bgen_LC;
+ uint32_t bgen_SNP_pos;
+ uint32_t bgen_LA;
+ std::string bgen_A_allele;
+ uint32_t bgen_LB;
+ std::string bgen_B_allele;
+ uint32_t bgen_P;
+ size_t unzipped_data_size;
+
+ for (size_t i=0; i<ni_total; ++i) {
+
+ ni_test+=indicator_idv[i];
+ }
+
+
+
+// ns_total=1;
+ for (size_t t=0; t<ns_total; ++t) {
+
+ id.clear();
+ rs.clear();
+ chr.clear();
+ bgen_A_allele.clear();
+ bgen_B_allele.clear();
+
+ infile.read(reinterpret_cast<char*>(&bgen_N),4);
+ infile.read(reinterpret_cast<char*>(&bgen_LS),2);
+
+ id.resize(bgen_LS);
+ infile.read(&id[0], bgen_LS);
+
+ infile.read(reinterpret_cast<char*>(&bgen_LR),2);
+ rs.resize(bgen_LR);
+ infile.read(&rs[0], bgen_LR);
+
+ infile.read(reinterpret_cast<char*>(&bgen_LC),2);
+ chr.resize(bgen_LC);
+ infile.read(&chr[0], bgen_LC);
+
+ infile.read(reinterpret_cast<char*>(&bgen_SNP_pos),4);
+
+ infile.read(reinterpret_cast<char*>(&bgen_LA),4);
+ bgen_A_allele.resize(bgen_LA);
+ infile.read(&bgen_A_allele[0], bgen_LA);
+
+
+ infile.read(reinterpret_cast<char*>(&bgen_LB),4);
+ bgen_B_allele.resize(bgen_LB);
+ infile.read(&bgen_B_allele[0], bgen_LB);
+
+
+ // should we switch according to MAF?
+ minor=bgen_B_allele;
+ major=bgen_A_allele;
+ b_pos=static_cast<long int>(bgen_SNP_pos);
+
+ uint16_t unzipped_data[3*bgen_N];
+
+ if (setSnps.size()!=0 && setSnps.count(rs)==0) {
+ SNPINFO sInfo={"-9", rs, -9, -9, minor, major, -9, -9, -9};
+ snpInfo.push_back(sInfo);
+ indicator_snp.push_back(0);
+ if(CompressedSNPBlocks)
+ infile.read(reinterpret_cast<char*>(&bgen_P),4);
+ else
+ bgen_P=6*bgen_N;
+
+ infile.ignore(static_cast<size_t>(bgen_P));
+
+ continue;
+ }
+
+
+ if(CompressedSNPBlocks)
+ {
+ infile.read(reinterpret_cast<char*>(&bgen_P),4);
+ uint8_t zipped_data[bgen_P];
+
+ unzipped_data_size=6*bgen_N;
+
+ infile.read(reinterpret_cast<char*>(zipped_data),bgen_P);
+ int result=uncompress(reinterpret_cast<Bytef*>(unzipped_data), reinterpret_cast<uLongf*>(&unzipped_data_size), reinterpret_cast<Bytef*>(zipped_data), static_cast<uLong> (bgen_P));
+ assert(result == Z_OK);
+
+ }
+ else
+ {
+ bgen_P=6*bgen_N;
+ infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P);
+
+ }
+
+
+ maf=0; n_miss=0; flag_poly=0; geno_old=-9;
+ n_0=0; n_1=0; n_2=0;
+ c_idv=0;
+ gsl_vector_set_zero (genotype_miss);
+ for (size_t i=0; i<bgen_N; ++i) {
+ // CHECK this set correctly!
+ if (indicator_idv[i]==0) {continue;}
+
+
+ bgen_geno_prob_AA=static_cast<double>(unzipped_data[i*3])/32768.0;
+ bgen_geno_prob_AB=static_cast<double>(unzipped_data[i*3+1])/32768.0;
+ bgen_geno_prob_BB=static_cast<double>(unzipped_data[i*3+2])/32768.0;
+ bgen_geno_prob_non_miss=bgen_geno_prob_AA+bgen_geno_prob_AB+bgen_geno_prob_BB;
+
+ //CHECK 0.1 OK
+ if (bgen_geno_prob_non_miss<0.9) {gsl_vector_set (genotype_miss, c_idv, 1); n_miss++; c_idv++; continue;}
+
+
+ bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
+ bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
+ bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
+
+ geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
+ if (geno>=0 && geno<=0.5) {n_0++;}
+ if (geno>0.5 && geno<1.5) {n_1++;}
+ if (geno>=1.5 && geno<=2.0) {n_2++;}
+
+ gsl_vector_set (genotype, c_idv, geno);
+
+ // CHECK WHAT THIS DOES
+ if (flag_poly==0) {geno_old=geno; flag_poly=2;}
+ if (flag_poly==2 && geno!=geno_old) {flag_poly=1;}
+
+ maf+=geno;
+
+ c_idv++;
+ }
+
+ maf/=2.0*static_cast<double>(ni_test-n_miss);
+
+ SNPINFO sInfo={chr, rs, -9, b_pos, minor, major, n_miss, (double)n_miss/(double)ni_test, maf};
+ snpInfo.push_back(sInfo);
+
+ if ( (double)n_miss/(double)ni_test > miss_level) {indicator_snp.push_back(0); continue;}
+
+ if ( (maf<maf_level || maf> (1.0-maf_level)) && maf_level!=-1 ) {indicator_snp.push_back(0); continue;}
+
+ if (flag_poly!=1) {indicator_snp.push_back(0); continue;}
+
+ if (hwe_level!=0 && maf_level!=-1) {
+ if (CalcHWE(n_0, n_2, n_1)<hwe_level) {indicator_snp.push_back(0); continue;}
+ }
+
+ //filter SNP if it is correlated with W
+ //unless W has only one column, of 1s
+ for (size_t i=0; i<genotype->size; ++i) {
+ if (gsl_vector_get (genotype_miss, i)==1) {geno=maf*2.0; gsl_vector_set (genotype, i, geno);}
+ }
+
+ gsl_blas_dgemv (CblasTrans, 1.0, W, genotype, 0.0, Wtx);
+ gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
+ gsl_blas_ddot (genotype, genotype, &v_x);
+ gsl_blas_ddot (Wtx, WtWiWtx, &v_w);
+
+ if (W->size2!=1 && v_w/v_x >= r2_level) {indicator_snp.push_back(0); continue;}
+
+ indicator_snp.push_back(1);
+ ns_test++;
+
+ }
+
+
+
+
+ return true;
+
+}
+
+
+//read oxford genotype file and calculate kinship matrix
+bool bgenKin (const string &file_oxford, vector<int> &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin)
+{
+ string file_bgen=file_oxford;
+ ifstream infile (file_bgen.c_str(), ios::binary);
+ if (!infile) {cout<<"error reading bgen file:"<<file_bgen<<endl; return false;}
+
+
+ // read in header
+ uint32_t bgen_snp_block_offset;
+ uint32_t bgen_header_length;
+ uint32_t bgen_nsamples;
+ uint32_t bgen_nsnps;
+ uint32_t bgen_flags;
+ infile.read(reinterpret_cast<char*>(&bgen_snp_block_offset),4);
+ infile.read(reinterpret_cast<char*>(&bgen_header_length),4);
+ bgen_snp_block_offset-=4;
+ infile.read(reinterpret_cast<char*>(&bgen_nsnps),4);
+ bgen_snp_block_offset-=4;
+ infile.read(reinterpret_cast<char*>(&bgen_nsamples),4);
+ bgen_snp_block_offset-=4;
+ infile.ignore(4+bgen_header_length-20);
+ bgen_snp_block_offset-=4+bgen_header_length-20;
+ infile.read(reinterpret_cast<char*>(&bgen_flags),4);
+ bgen_snp_block_offset-=4;
+ bool CompressedSNPBlocks=bgen_flags&0x1;
+// bool LongIds=bgen_flags&0x4;
+
+ infile.ignore(bgen_snp_block_offset);
+
+ double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB, bgen_geno_prob_non_miss;
+
+ uint32_t bgen_N;
+ uint16_t bgen_LS;
+ uint16_t bgen_LR;
+ uint16_t bgen_LC;
+ uint32_t bgen_SNP_pos;
+ uint32_t bgen_LA;
+ std::string bgen_A_allele;
+ uint32_t bgen_LB;
+ std::string bgen_B_allele;
+ uint32_t bgen_P;
+ size_t unzipped_data_size;
+ string id;
+ string rs;
+ string chr;
+ double genotype;
+
+
+ size_t n_miss;
+ double d, geno_mean, geno_var;
+
+ size_t ni_total=matrix_kin->size1;
+ gsl_vector *geno=gsl_vector_alloc (ni_total);
+ gsl_vector *geno_miss=gsl_vector_alloc (ni_total);
+
+ size_t ns_test=0;
+ for (size_t t=0; t<indicator_snp.size(); ++t) {
+
+ if (t%display_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);}
+
+ id.clear();
+ rs.clear();
+ chr.clear();
+ bgen_A_allele.clear();
+ bgen_B_allele.clear();
+
+ infile.read(reinterpret_cast<char*>(&bgen_N),4);
+ infile.read(reinterpret_cast<char*>(&bgen_LS),2);
+
+ id.resize(bgen_LS);
+ infile.read(&id[0], bgen_LS);
+
+ infile.read(reinterpret_cast<char*>(&bgen_LR),2);
+ rs.resize(bgen_LR);
+ infile.read(&rs[0], bgen_LR);
+
+ infile.read(reinterpret_cast<char*>(&bgen_LC),2);
+ chr.resize(bgen_LC);
+ infile.read(&chr[0], bgen_LC);
+
+ infile.read(reinterpret_cast<char*>(&bgen_SNP_pos),4);
+
+ infile.read(reinterpret_cast<char*>(&bgen_LA),4);
+ bgen_A_allele.resize(bgen_LA);
+ infile.read(&bgen_A_allele[0], bgen_LA);
+
+
+ infile.read(reinterpret_cast<char*>(&bgen_LB),4);
+ bgen_B_allele.resize(bgen_LB);
+ infile.read(&bgen_B_allele[0], bgen_LB);
+
+
+
+
+ uint16_t unzipped_data[3*bgen_N];
+
+ if (indicator_snp[t]==0) {
+ if(CompressedSNPBlocks)
+ infile.read(reinterpret_cast<char*>(&bgen_P),4);
+ else
+ bgen_P=6*bgen_N;
+
+ infile.ignore(static_cast<size_t>(bgen_P));
+
+ continue;
+ }
+
+
+
+ if(CompressedSNPBlocks)
+ {
+
+
+ infile.read(reinterpret_cast<char*>(&bgen_P),4);
+ uint8_t zipped_data[bgen_P];
+
+ unzipped_data_size=6*bgen_N;
+
+ infile.read(reinterpret_cast<char*>(zipped_data),bgen_P);
+
+ int result=uncompress(reinterpret_cast<Bytef*>(unzipped_data), reinterpret_cast<uLongf*>(&unzipped_data_size), reinterpret_cast<Bytef*>(zipped_data), static_cast<uLong> (bgen_P));
+ assert(result == Z_OK);
+
+ }
+ else
+ {
+
+ bgen_P=6*bgen_N;
+ infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P);
+ }
+
+
+
+ geno_mean=0.0; n_miss=0; geno_var=0.0;
+ gsl_vector_set_all(geno_miss, 0);
+
+ for (size_t i=0; i<bgen_N; ++i) {
+
+
+ bgen_geno_prob_AA=static_cast<double>(unzipped_data[i*3])/32768.0;
+ bgen_geno_prob_AB=static_cast<double>(unzipped_data[i*3+1])/32768.0;
+ bgen_geno_prob_BB=static_cast<double>(unzipped_data[i*3+2])/32768.0;
+ // WJA
+ bgen_geno_prob_non_miss=bgen_geno_prob_AA+bgen_geno_prob_AB+bgen_geno_prob_BB;
+ if (bgen_geno_prob_non_miss<0.9) {gsl_vector_set(geno_miss, i, 0.0); n_miss++;}
+ else {
+
+ bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
+ bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
+ bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
+
+ genotype=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
+
+ gsl_vector_set(geno, i, genotype);
+ gsl_vector_set(geno_miss, i, 1.0);
+ geno_mean+=genotype;
+ geno_var+=genotype*genotype;
+ }
+
+ }
+
+
+ geno_mean/=(double)(ni_total-n_miss);
+ geno_var+=geno_mean*geno_mean*(double)n_miss;
+ geno_var/=(double)ni_total;
+ geno_var-=geno_mean*geno_mean;
+// geno_var=geno_mean*(1-geno_mean*0.5);
+
+ for (size_t i=0; i<ni_total; ++i) {
+ if (gsl_vector_get (geno_miss, i)==0) {gsl_vector_set(geno, i, geno_mean);}
+ }
+
+ gsl_vector_add_constant (geno, -1.0*geno_mean);
+
+ if (geno_var!=0) {
+ if (k_mode==1) {gsl_blas_dsyr (CblasUpper, 1.0, geno, matrix_kin);}
+ else if (k_mode==2) {gsl_blas_dsyr (CblasUpper, 1.0/geno_var, geno, matrix_kin);}
+ else {cout<<"Unknown kinship mode."<<endl;}
+ }
+
+ ns_test++;
+ }
+ cout<<endl;
+
+ gsl_matrix_scale (matrix_kin, 1.0/(double)ns_test);
+
+ for (size_t i=0; i<ni_total; ++i) {
+ for (size_t j=0; j<i; ++j) {
+ d=gsl_matrix_get (matrix_kin, j, i);
+ gsl_matrix_set (matrix_kin, i, j, d);
+ }
+ }
+
+ gsl_vector_free (geno);
+ gsl_vector_free (geno_miss);
+
+ infile.close();
+ infile.clear();
+
+ return true;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+//read header to determine which column contains which item
+bool ReadHeader (const string &line, HEADER &header)
+{
+ string rs_ptr[]={"rs","RS","snp","SNP","snps","SNPS","snpid","SNPID","rsid","RSID"};
+ set<string> rs_set(rs_ptr, rs_ptr+10);
+ string chr_ptr[]={"chr","CHR"};
+ set<string> chr_set(chr_ptr, chr_ptr+2);
+ string pos_ptr[]={"ps","PS","pos","POS","base_position","BASE_POSITION", "bp", "BP"};
+ set<string> pos_set(pos_ptr, pos_ptr+8);
+ string cm_ptr[]={"cm","CM"};
+ set<string> cm_set(cm_ptr, cm_ptr+2);
+ string a1_ptr[]={"a1","A1","allele1","ALLELE1"};
+ set<string> a1_set(a1_ptr, a1_ptr+4);
+ string a0_ptr[]={"a0","A0","allele0","ALLELE0"};
+ set<string> a0_set(a0_ptr, a0_ptr+4);
+
+ string z_ptr[]={"z","Z","z_score","Z_SCORE","zscore","ZSCORE"};
+ set<string> z_set(z_ptr, z_ptr+6);
+ string beta_ptr[]={"beta","BETA","b","B"};
+ set<string> beta_set(beta_ptr, beta_ptr+4);
+ string sebeta_ptr[]={"se_beta","SE_BETA","se","SE"};
+ set<string> sebeta_set(sebeta_ptr, sebeta_ptr+4);
+ string chisq_ptr[]={"chisq","CHISQ","chisquare","CHISQUARE"};
+ set<string> chisq_set(chisq_ptr, chisq_ptr+4);
+ string p_ptr[]={"p","P","pvalue","PVALUE","p-value","P-VALUE"};
+ set<string> p_set(p_ptr, p_ptr+6);
+
+ string n_ptr[]={"n","N","ntotal","NTOTAL","n_total","N_TOTAL"};
+ set<string> n_set(n_ptr, n_ptr+6);
+ string nmis_ptr[]={"nmis","NMIS","n_mis","N_MIS","n_miss","N_MISS"};
+ set<string> nmis_set(nmis_ptr, nmis_ptr+6);
+ string nobs_ptr[]={"nobs","NOBS","n_obs","N_OBS"};
+ set<string> nobs_set(nobs_ptr, nobs_ptr+4);
+
+ string af_ptr[]={"af","AF","maf","MAF","f","F","allele_freq","ALLELE_FREQ","allele_frequency","ALLELE_FREQUENCY"};
+ set<string> af_set(af_ptr, af_ptr+10);
+ string var_ptr[]={"var","VAR"};
+ set<string> var_set(var_ptr, var_ptr+2);
+
+ string ws_ptr[]={"window_size","WINDOW_SIZE","ws","WS"};
+ set<string> ws_set(ws_ptr, ws_ptr+4);
+ string cor_ptr[]={"cor","COR","r","R"};
+ set<string> cor_set(cor_ptr, cor_ptr+4);
+
+ header.rs_col=0; header.chr_col=0; header.pos_col=0; header.a1_col=0; header.a0_col=0; header.z_col=0; header.beta_col=0; header.sebeta_col=0; header.chisq_col=0; header.p_col=0; header.n_col=0; header.nmis_col=0; header.nobs_col=0; header.af_col=0; header.var_col=0; header.ws_col=0; header.cor_col=0; header.coln=0;
+
+ char *ch_ptr;
+ string type;
+ size_t n_error=0;
+
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+ while (ch_ptr!=NULL) {
+ type=ch_ptr;
+ if (rs_set.count(type)!=0) {
+ if (header.rs_col==0) {header.rs_col=header.coln+1;} else {cout<<"error! more than two rs columns in the file."<<endl; n_error++;}
+ } else if (chr_set.count(type)!=0) {
+ if (header.chr_col==0) {header.chr_col=header.coln+1;} else {cout<<"error! more than two chr columns in the file."<<endl; n_error++;}
+ } else if (pos_set.count(type)!=0) {
+ if (header.pos_col==0) {header.pos_col=header.coln+1;} else {cout<<"error! more than two pos columns in the file."<<endl; n_error++;}
+ } else if (cm_set.count(type)!=0) {
+ if (header.cm_col==0) {header.cm_col=header.coln+1;} else {cout<<"error! more than two cm columns in the file."<<endl; n_error++;}
+ } else if (a1_set.count(type)!=0) {
+ if (header.a1_col==0) {header.a1_col=header.coln+1;} else {cout<<"error! more than two allele1 columns in the file."<<endl; n_error++;}
+ } else if (a0_set.count(type)!=0) {
+ if (header.a0_col==0) {header.a0_col=header.coln+1;} else {cout<<"error! more than two allele0 columns in the file."<<endl; n_error++;}
+ } else if (z_set.count(type)!=0) {
+ if (header.z_col==0) {header.z_col=header.coln+1;} else {cout<<"error! more than two z columns in the file."<<endl; n_error++;}
+ } else if (beta_set.count(type)!=0) {
+ if (header.beta_col==0) {header.beta_col=header.coln+1;} else {cout<<"error! more than two beta columns in the file."<<endl; n_error++;}
+ } else if (sebeta_set.count(type)!=0) {
+ if (header.sebeta_col==0) {header.sebeta_col=header.coln+1;} else {cout<<"error! more than two se_beta columns in the file."<<endl; n_error++;}
+ } else if (chisq_set.count(type)!=0) {
+ if (header.chisq_col==0) {header.chisq_col=header.coln+1;} else {cout<<"error! more than two z columns in the file."<<endl; n_error++;}
+ } else if (p_set.count(type)!=0) {
+ if (header.p_col==0) {header.p_col=header.coln+1;} else {cout<<"error! more than two p columns in the file."<<endl; n_error++;}
+ } else if (n_set.count(type)!=0) {
+ if (header.n_col==0) {header.n_col=header.coln+1;} else {cout<<"error! more than two n_total columns in the file."<<endl; n_error++;}
+ } else if (nmis_set.count(type)!=0) {
+ if (header.nmis_col==0) {header.nmis_col=header.coln+1;} else {cout<<"error! more than two n_mis columns in the file."<<endl; n_error++;}
+ } else if (nobs_set.count(type)!=0) {
+ if (header.nobs_col==0) {header.nobs_col=header.coln+1;} else {cout<<"error! more than two n_obs columns in the file."<<endl; n_error++;}
+ } else if (ws_set.count(type)!=0) {
+ if (header.ws_col==0) {header.ws_col=header.coln+1;} else {cout<<"error! more than two window_size columns in the file."<<endl; n_error++;}
+ } else if (af_set.count(type)!=0) {
+ if (header.af_col==0) {header.af_col=header.coln+1;} else {cout<<"error! more than two af columns in the file."<<endl; n_error++;}
+ } else if (cor_set.count(type)!=0) {
+ if (header.cor_col==0) {header.cor_col=header.coln+1;} else {cout<<"error! more than two cor columns in the file."<<endl; n_error++;}
+ } else {}
+
+ ch_ptr=strtok (NULL, " , \t");
+ header.coln++;
+ }
+
+ if (header.cor_col!=0 && header.cor_col!=header.coln) {cout<<"error! the cor column should be the last column."<<endl; n_error++;}
+
+ if (header.rs_col==0) {
+ if (header.chr_col!=0 && header.pos_col!=0) {
+ cout<<"missing an rs column. rs id will be replaced by chr:pos"<<endl;
+ } else {
+ cout<<"error! missing an rs column."<<endl; n_error++;
+ }
+ }
+
+ if (n_error==0) {return true;} else {return false;}
+}
+
+
+
+
+//read category file, record mapRS2in
+//the category file does not contain a null category
+//so if a snp has 0 for all categories, then it is not included in the analysis
+bool ReadFile_cat (const string &file_cat, map<string, size_t> &mapRS2cat, size_t &n_vc)
+{
+ mapRS2cat.clear();
+
+ igzstream infile (file_cat.c_str(), igzstream::in);
+ if (!infile) {cout<<"error! fail to open category file: "<<file_cat<<endl; return false;}
+
+ string line;
+ char *ch_ptr;
+
+ string rs, chr, a1, a0, pos, cm;
+ size_t i_cat;// ns_vc=0;
+
+ //read header
+ HEADER header;
+ !safeGetline(infile, line).eof();
+ ReadHeader (line, header);
+
+ //use the header to count the number of categories
+ n_vc=header.coln;
+ if (header.rs_col!=0) {n_vc--;}
+ if (header.chr_col!=0) {n_vc--;}
+ if (header.pos_col!=0) {n_vc--;}
+ if (header.cm_col!=0) {n_vc--;}
+ if (header.a1_col!=0) {n_vc--;}
+ if (header.a0_col!=0) {n_vc--;}
+
+ //read the following lines to record mapRS2cat
+ while (!safeGetline(infile, line).eof()) {
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+
+ i_cat=0;
+ for (size_t i=0; i<header.coln; i++) {
+ if (header.rs_col!=0 && header.rs_col==i+1) {
+ rs=ch_ptr;
+ } else if (header.chr_col!=0 && header.chr_col==i+1) {
+ chr=ch_ptr;
+ } else if (header.pos_col!=0 && header.pos_col==i+1) {
+ pos=ch_ptr;
+ } else if (header.cm_col!=0 && header.cm_col==i+1) {
+ cm=ch_ptr;
+ } else if (header.a1_col!=0 && header.a1_col==i+1) {
+ a1=ch_ptr;
+ } else if (header.a0_col!=0 && header.a0_col==i+1) {
+ a0=ch_ptr;
+ } else if (atoi(ch_ptr)==1 || atoi(ch_ptr)==0) {
+ if (i_cat==0) {
+ if (header.rs_col==0) {
+ rs=chr+":"+pos;
+ }
+ }
+
+ if (atoi(ch_ptr)==1 && mapRS2cat.count(rs)==0) {mapRS2cat[rs]=i_cat;}
+ i_cat++;
+ } else {}
+
+ ch_ptr=strtok (NULL, " , \t");
+ }
+
+ //if (mapRS2cat.count(rs)==0) {mapRS2cat[rs]=n_vc+1; ns_vc++;}
+ }
+
+ //if (ns_vc>0) {n_vc++;}
+
+ infile.clear();
+ infile.close();
+
+ return true;
+}
+
+
+
+
+//read bimbam mean genotype file and calculate kinship matrix; this time, the kinship matrix is not centered, and can contain multiple K matrix
+bool BimbamKin (const string &file_geno, vector<int> &indicator_idv, vector<int> &indicator_snp, const int k_mode, const int display_pace, const map<string, size_t> &mapRS2cat, map<string, double> &mapRS2var, vector<SNPINFO> &snpInfo, gsl_matrix *matrix_kin)
+{
+ igzstream infile (file_geno.c_str(), igzstream::in);
+ //ifstream infile (file_geno.c_str(), ifstream::in);
+ if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return false;}
+
+ string line;
+ char *ch_ptr;
+
+ size_t n_miss;
+ double d, geno_mean, geno_var;
+
+ size_t ni_test=matrix_kin->size1;
+ gsl_vector *geno=gsl_vector_alloc (ni_test);
+ gsl_vector *geno_miss=gsl_vector_alloc (ni_test);
+
+ size_t n_vc=matrix_kin->size2/ni_test, i_vc;
+ string rs;
+ vector<size_t> ns_vec;
+ for (size_t i=0; i<n_vc; i++) {
+ ns_vec.push_back(0);
+ }
+
+ size_t ns_test=0;
+ for (size_t t=0; t<indicator_snp.size(); ++t) {
+ !safeGetline(infile, line).eof();
+ if (t%display_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);}
+ if (indicator_snp[t]==0) {continue;}
+
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+ ch_ptr=strtok (NULL, " , \t");
+ ch_ptr=strtok (NULL, " , \t");
+
+ rs=snpInfo[t].rs_number;//this line is new
+
+ geno_mean=0.0; n_miss=0; geno_var=0.0;
+ gsl_vector_set_all(geno_miss, 0);
+
+ size_t j=0;
+ for (size_t i=0; i<indicator_idv.size(); ++i) {
+ if (indicator_idv[i]==0) {continue;}
+ ch_ptr=strtok (NULL, " , \t");
+ if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set(geno_miss, i, 0); n_miss++;}
+ else {
+ d=atof(ch_ptr);
+ gsl_vector_set (geno, j, d);
+ gsl_vector_set (geno_miss, j, 1);
+ geno_mean+=d;
+ geno_var+=d*d;
+ }
+ j++;
+ }
+
+ geno_mean/=(double)(ni_test-n_miss);
+ geno_var+=geno_mean*geno_mean*(double)n_miss;
+ geno_var/=(double)ni_test;
+ geno_var-=geno_mean*geno_mean;
+// geno_var=geno_mean*(1-geno_mean*0.5);
+
+ for (size_t i=0; i<ni_test; ++i) {
+ if (gsl_vector_get (geno_miss, i)==0) {gsl_vector_set(geno, i, geno_mean);}
+ }
+
+ //this line is new; removed
+ //gsl_vector_add_constant (geno, -1.0*geno_mean);
+
+ if (geno_var!=0) {
+ mapRS2var[rs]=geno_var;
+
+ if (k_mode==1) {
+ if (n_vc==1 || mapRS2cat.size()==0 ) {
+ gsl_blas_dsyr (CblasUpper, 1.0, geno, matrix_kin);
+ ns_vec[0]++;
+ } else if (mapRS2cat.count(rs)!=0) {
+ i_vc=mapRS2cat.at(rs);
+ ns_vec[i_vc]++;
+ gsl_matrix_view kin_sub=gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc, ni_test, ni_test);
+ gsl_blas_dsyr (CblasUpper, 1.0, geno, &kin_sub.matrix);
+ }
+
+ //eigenlib_dsyr (1.0, geno, matrix_kin);
+ } else if (k_mode==2) {
+ if (n_vc==1 || mapRS2cat.size()==0 ) {
+ gsl_blas_dsyr (CblasUpper, 1.0/geno_var, geno, matrix_kin);
+ ns_vec[0]++;
+ } else if (mapRS2cat.count(rs)!=0) {
+ i_vc=mapRS2cat.at(rs);
+ ns_vec[i_vc]++;
+ gsl_matrix_view kin_sub=gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc, ni_test, ni_test);
+ gsl_blas_dsyr (CblasUpper, 1.0/geno_var, geno, &kin_sub.matrix);
+ }
+ } else {
+ cout<<"Unknown kinship mode."<<endl;
+ }
+ }
+ ns_test++;
+ }
+ cout<<endl;
+
+ for (size_t t=0; t<n_vc; t++) {
+ if (ns_vec[t]!=0) {gsl_matrix_scale (matrix_kin, 1.0/(double)ns_vec[t]);}
+
+ for (size_t i=0; i<ni_test; ++i) {
+ for (size_t j=0; j<i; ++j) {
+ d=gsl_matrix_get (matrix_kin, j, i+ni_test*t);
+ gsl_matrix_set (matrix_kin, i, j+ni_test*t, d);
+ }
+ }
+ }
+
+ gsl_vector_free (geno);
+ gsl_vector_free (geno_miss);
+
+ infile.close();
+ infile.clear();
+
+ return true;
+}
+
+
+
+
+
+
+
+bool PlinkKin (const string &file_bed, vector<int> &indicator_idv, vector<int> &indicator_snp, const int k_mode, const int display_pace, const map<string, size_t> &mapRS2cat, map<string, double> &mapRS2var, vector<SNPINFO> &snpInfo, gsl_matrix *matrix_kin)
+{
+ ifstream infile (file_bed.c_str(), ios::binary);
+ if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return false;}
+
+ char ch[1];
+ bitset<8> b;
+
+ size_t n_miss, ci_total, ci_test;
+ double d, geno_mean, geno_var;
+
+ size_t ni_test=matrix_kin->size1;
+ size_t ni_total=indicator_idv.size();
+ gsl_vector *geno=gsl_vector_alloc (ni_test);
+
+ size_t ns_test=0;
+ int n_bit;
+
+ size_t n_vc=matrix_kin->size2/ni_test, i_vc;
+ string rs;
+ vector<size_t> ns_vec;
+ for (size_t i=0; i<n_vc; i++) {
+ ns_vec.push_back(0);
+ }
+
+ //calculate n_bit and c, the number of bit for each snp
+ if (ni_total%4==0) {n_bit=ni_total/4;}
+ else {n_bit=ni_total/4+1; }
+
+ //print the first three majic numbers
+ for (int i=0; i<3; ++i) {
+ infile.read(ch,1);
+ b=ch[0];
+ }
+
+ for (size_t t=0; t<indicator_snp.size(); ++t) {
+ if (t%display_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);}
+ if (indicator_snp[t]==0) {continue;}
+
+ infile.seekg(t*n_bit+3); //n_bit, and 3 is the number of magic numbers
+
+ rs=snpInfo[t].rs_number;//this line is new
+
+ //read genotypes
+ geno_mean=0.0; n_miss=0; ci_total=0; geno_var=0.0; ci_test=0;
+ for (int i=0; i<n_bit; ++i) {
+ infile.read(ch,1);
+ b=ch[0];
+ for (size_t j=0; j<4; ++j) { //minor allele homozygous: 2.0; major: 0.0;
+ if ((i==(n_bit-1)) && ci_total==ni_total) {break;}
+ if (indicator_idv[ci_total]==0) {ci_total++; continue;}
+
+ if (b[2*j]==0) {
+ if (b[2*j+1]==0) {gsl_vector_set(geno, ci_test, 2.0); geno_mean+=2.0; geno_var+=4.0; }
+ else {gsl_vector_set(geno, ci_test, 1.0); geno_mean+=1.0; geno_var+=1.0;}
+ }
+ else {
+ if (b[2*j+1]==1) {gsl_vector_set(geno, ci_test, 0.0); }
+ else {gsl_vector_set(geno, ci_test, -9.0); n_miss++; }
+ }
+
+ ci_test++;
+ ci_total++;
+ }
+ }
+
+
+ geno_mean/=(double)(ni_test-n_miss);
+ geno_var+=geno_mean*geno_mean*(double)n_miss;
+ geno_var/=(double)ni_test;
+ geno_var-=geno_mean*geno_mean;
+// geno_var=geno_mean*(1-geno_mean*0.5);
+
+ for (size_t i=0; i<ni_test; ++i) {
+ d=gsl_vector_get(geno,i);
+ if (d==-9.0) {gsl_vector_set(geno, i, geno_mean);}
+ }
+
+ //this line is new; removed
+ //gsl_vector_add_constant (geno, -1.0*geno_mean);
+
+ if (geno_var!=0) {
+ mapRS2var[rs]=geno_var;
+ if (k_mode==1) {
+ if (n_vc==1 || mapRS2cat.size()==0 ) {
+ gsl_blas_dsyr (CblasUpper, 1.0, geno, matrix_kin);
+ ns_vec[0]++;
+ } else if (mapRS2cat.count(rs)!=0) {
+ i_vc=mapRS2cat.at(rs);
+ ns_vec[i_vc]++;
+ gsl_matrix_view kin_sub=gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc, ni_test, ni_test);
+ gsl_blas_dsyr (CblasUpper, 1.0, geno, &kin_sub.matrix);
+ }
+ } else if (k_mode==2) {
+ if (n_vc==1 || mapRS2cat.size()==0 ) {
+ gsl_blas_dsyr (CblasUpper, 1.0/geno_var, geno, matrix_kin);
+ ns_vec[0]++;
+ } else if (mapRS2cat.count(rs)!=0) {
+ i_vc=mapRS2cat.at(rs);
+ ns_vec[i_vc]++;
+ gsl_matrix_view kin_sub=gsl_matrix_submatrix(matrix_kin, 0, ni_test*i_vc, ni_test, ni_test);
+ gsl_blas_dsyr (CblasUpper, 1.0/geno_var, geno, &kin_sub.matrix);
+ }
+ } else {
+ cout<<"Unknown kinship mode."<<endl;
+ }
+ }
+
+ ns_test++;
+ }
+ cout<<endl;
+
+ for (size_t t=0; t<n_vc; t++) {
+ if (ns_vec[t]!=0) {gsl_matrix_scale (matrix_kin, 1.0/(double)ns_vec[t]);}
+
+ for (size_t i=0; i<ni_test; ++i) {
+ for (size_t j=0; j<i; ++j) {
+ d=gsl_matrix_get (matrix_kin, j, i+ni_test*t);
+ gsl_matrix_set (matrix_kin, i, j+ni_test*t, d);
+ //cout<<d<<" ";
+ }
+ //cout<<endl;
+ }
+ }
+
+ d=0;
+ for (size_t i=0; i<ni_test; ++i) {
+ for (size_t j=0; j<ni_test; ++j) {
+ d+=gsl_matrix_get (matrix_kin, i, j)*gsl_matrix_get (matrix_kin, i, j);
+ }
+ }
+ d/=(double)ni_test*(double)ni_test;
+ //cout<<"trace = "<<scientific<<d-1/(double)ni_test<<endl;
+
+
+
+ gsl_vector_free (geno);
+
+ infile.close();
+ infile.clear();
+
+ return true;
+}
+
+
+
+//read var file, store mapRS2var
+bool ReadFile_var (const string &file_var, map<string, double> &mapRS2var)
+{
+ mapRS2var.clear();
+
+ igzstream infile (file_var.c_str(), igzstream::in);
+ if (!infile) {cout<<"error! fail to open var file: "<<file_var<<endl; return false;}
+
+ char *ch_ptr;
+ string line, rs;
+ double var;
+
+ while (!safeGetline(infile, line).eof()) {
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+ rs=ch_ptr;
+ ch_ptr=strtok (NULL, " , \t");
+ var=atof(ch_ptr);
+ mapRS2var[rs]=var;
+ }
+
+ return true;
+}
+
+
+//read beta file, use the mapRS2var to select snps (and to provide var if maf/var is not provided in the beta file), calculate q
+void ReadFile_beta (const string &file_beta, const int k_mode, const map<string, size_t> &mapRS2cat, const map<string, double> &mapRS2var, gsl_vector *q, gsl_vector *s, size_t &ni_total, size_t &ns_total, size_t &ns_test)
+{
+ gsl_vector_set_zero(q);
+ ni_total=0; ns_total=0; ns_test=0;
+
+ igzstream infile (file_beta.c_str(), igzstream::in);
+ if (!infile) {cout<<"error! fail to open beta file: "<<file_beta<<endl; return;}
+
+ string line;
+ char *ch_ptr;
+ string type;
+
+ string rs, chr, a1, a0, pos, cm;
+ double z=0, beta=0, se_beta=0, chisq=0, pvalue=0, zsquare=0, af=0, var_x=0;
+ size_t n_total=0, n_mis=0, n_obs=0;
+
+ vector<double> vec_q, vec_s;
+ for (size_t i=0; i<q->size; i++) {
+ vec_q.push_back(0.0);
+ vec_s.push_back(0.0);
+ }
+
+ //read header
+ HEADER header;
+ !safeGetline(infile, line).eof();
+ ReadHeader (line, header);
+
+ if (header.n_col==0 ) {
+ if (header.nobs_col==0 && header.nmis_col==0) {
+ cout<<"error! missing sample size in the beta file."<<endl;
+ } else {
+ cout<<"total sample size will be replaced by obs/mis sample size."<<endl;
+ }
+ }
+
+ if (header.z_col==0 && (header.beta_col==0 || header.sebeta_col==0) && header.chisq_col==0 && header.p_col==0) {
+ cout<<"error! missing z scores in the beta file."<<endl;
+ }
+
+ if (header.af_col==0 && header.var_col==0 && mapRS2var.size()==0) {
+ cout<<"error! missing allele frequency in the beta file."<<endl;
+ }
+
+ while (!safeGetline(infile, line).eof()) {
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+
+ z=0; beta=0; se_beta=0; chisq=0; pvalue=0;
+ n_total=0; n_mis=0; n_obs=0; af=0; var_x=0;
+ for (size_t i=0; i<header.coln; i++) {
+ if (header.rs_col!=0 && header.rs_col==i+1) {rs=ch_ptr;}
+ if (header.chr_col!=0 && header.chr_col==i+1) {chr=ch_ptr;}
+ if (header.pos_col!=0 && header.pos_col==i+1) {pos=ch_ptr;}
+ if (header.cm_col!=0 && header.cm_col==i+1) {cm=ch_ptr;}
+ if (header.a1_col!=0 && header.a1_col==i+1) {a1=ch_ptr;}
+ if (header.a0_col!=0 && header.a0_col==i+1) {a0=ch_ptr;}
+
+ if (header.z_col!=0 && header.z_col==i+1) {z=atof(ch_ptr);}
+ if (header.beta_col!=0 && header.beta_col==i+1) {beta=atof(ch_ptr);}
+ if (header.sebeta_col!=0 && header.sebeta_col==i+1) {se_beta=atof(ch_ptr);}
+ if (header.chisq_col!=0 && header.chisq_col==i+1) {chisq=atof(ch_ptr);}
+ if (header.p_col!=0 && header.p_col==i+1) {pvalue=atof(ch_ptr);}
+
+ if (header.n_col!=0 && header.n_col==i+1) {n_total=atoi(ch_ptr);}
+ if (header.nmis_col!=0 && header.nmis_col==i+1) {n_mis=atoi(ch_ptr);}
+ if (header.nobs_col!=0 && header.nobs_col==i+1) {n_obs=atoi(ch_ptr);}
+
+ if (header.af_col!=0 && header.af_col==i+1) {af=atof(ch_ptr);}
+ if (header.var_col!=0 && header.var_col==i+1) {var_x=atof(ch_ptr);}
+
+ ch_ptr=strtok (NULL, " , \t");
+ }
+
+ if (header.rs_col==0) {
+ rs=chr+":"+pos;
+ }
+
+ if (header.n_col==0) {
+ n_total=n_mis+n_obs;
+ }
+
+ //both z values and beta/se_beta have directions, while chisq/pvalue do not
+ if (header.z_col!=0) {
+ zsquare=z*z;
+ } else if (header.beta_col!=0 && header.sebeta_col!=0) {
+ z=beta/se_beta;
+ zsquare=z*z;
+ } else if (header.chisq_col!=0) {
+ zsquare=chisq;
+ } else if (header.p_col!=0) {
+ zsquare=gsl_cdf_chisq_Qinv (pvalue, 1);
+ } else {zsquare=0;}
+
+ //if the snp is also present in cor file, then do calculations
+ if (mapRS2var.count(rs)!=0 && (mapRS2cat.size()==0 || mapRS2cat.count(rs)!=0) ) {
+ //obtain var_x
+ if (k_mode==1) {
+ if (header.var_col==0) {
+ if (header.af_col!=0) {
+ var_x=2.0*af*(1.0-af);
+ } else {
+ var_x=mapRS2var.at(rs);
+ }
+ }
+ } else {
+ var_x=1.0;
+ }
+
+ //compute q
+ if (mapRS2cat.size()!=0) {
+ vec_q[mapRS2cat.at(rs) ]+=(zsquare-1.0)*var_x/(double)n_total;
+ vec_s[mapRS2cat.at(rs) ]+=var_x;
+ } else {
+ vec_q[0]+=(zsquare-1.0)*var_x/(double)n_total;
+ vec_s[0]+=var_x;
+ }
+
+ ni_total=max(ni_total, n_total);
+ ns_test++;
+ }
+
+ ns_total++;
+ }
+
+ //save q
+ for (size_t i=0; i<q->size; i++) {
+ if (vec_s[i]!=0) {
+ gsl_vector_set(q, i, vec_q[i]/vec_s[i]);
+ }
+ gsl_vector_set(s, i, vec_s[i]);
+ }
+
+ infile.clear();
+ infile.close();
+
+ return;
+}
+
+
+
+
+//read S file: S and Svar
+void ReadFile_s (const string &file_s, gsl_matrix *S, gsl_matrix *Svar)
+{
+ igzstream infile (file_s.c_str(), igzstream::in);
+ if (!infile) {cout<<"error! fail to open s file: "<<file_s<<endl; return;}
+
+ string line;
+ char *ch_ptr;
+ double d;
+
+ for (size_t i=0; i<S->size1; i++) {
+ !safeGetline(infile, line).eof();
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+ for (size_t j=0; j<S->size2; j++) {
+ d=gsl_matrix_get(S, i, j)+atof(ch_ptr);
+ gsl_matrix_set(S, i, j, d);
+ ch_ptr=strtok (NULL, " , \t");
+ }
+ }
+
+ for (size_t i=0; i<Svar->size1; i++) {
+ !safeGetline(infile, line).eof();
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+ for (size_t j=0; j<Svar->size2; j++) {
+ d=gsl_matrix_get(Svar, i, j)+atof(ch_ptr);
+ gsl_matrix_set(Svar, i, j, d);
+ ch_ptr=strtok (NULL, " , \t");
+ }
+ }
+
+ infile.clear();
+ infile.close();
+
+ return;
+}
+
+
+
+
+void ReadFile_ms (const string &file_ms, gsl_matrix *S, gsl_matrix *Svar)
+{
+ gsl_matrix_set_zero(S);
+ gsl_matrix_set_zero(Svar);
+
+ string file_name;
+
+ igzstream infile (file_ms.c_str(), igzstream::in);
+ if (!infile) {cout<<"error! fail to open ms file: "<<file_ms<<endl; return;}
+
+ while (!safeGetline(infile, file_name).eof()) {
+ ReadFile_s(file_name, S, Svar);
+ }
+
+ infile.clear();
+ infile.close();
+
+ return;
+}
+
+
+
+
+//read V file: V (i.e. Q)
+void ReadFile_v (const string &file_v, gsl_matrix *V)
+{
+ igzstream infile (file_v.c_str(), igzstream::in);
+ if (!infile) {cout<<"error! fail to open v file: "<<file_v<<endl; return;}
+
+ string line;
+ char *ch_ptr;
+ double d;
+
+ for (size_t i=0; i<V->size1; i++) {
+ !safeGetline(infile, line).eof();
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+ for (size_t j=0; j<V->size2; j++) {
+ d=gsl_matrix_get(V, i, j)+atof(ch_ptr);
+ gsl_matrix_set(V, i, j, d);
+ ch_ptr=strtok (NULL, " , \t");
+ }
+ }
+
+ infile.clear();
+ infile.close();
+
+ return;
+}
+
+
+void ReadFile_mv (const string &file_mv, gsl_matrix *V)
+{
+ gsl_matrix_set_zero(V);
+
+ string file_name;
+
+ igzstream infile (file_mv.c_str(), igzstream::in);
+ if (!infile) {cout<<"error! fail to open ms file: "<<file_mv<<endl; return;}
+
+ while (!safeGetline(infile, file_name).eof()) {
+ ReadFile_v(file_name, V);
+ }
+
+ infile.clear();
+ infile.close();
+
+ return;
+}
+
+
+//read q file: q, s and ni_test
+void ReadFile_q (const string &file_s, gsl_vector *q_vec, gsl_vector *s_vec, double &df)
+{
+ igzstream infile (file_s.c_str(), igzstream::in);
+ if (!infile) {cout<<"error! fail to open s file: "<<file_s<<endl; return;}
+
+ string line;
+ char *ch_ptr;
+ double d;
+
+ for (size_t i=0; i<q_vec->size; i++) {
+ !safeGetline(infile, line).eof();
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+ d=gsl_vector_get(q_vec, i)+atof(ch_ptr);
+ gsl_vector_set(q_vec, i, d);
+ }
+
+ for (size_t i=0; i<s_vec->size; i++) {
+ !safeGetline(infile, line).eof();
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+ d=gsl_vector_get(s_vec, i)+atof(ch_ptr);
+ gsl_vector_set(s_vec, i, d);
+ }
+
+ !safeGetline(infile, line).eof();
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+ df=atof(ch_ptr);
+
+ infile.clear();
+ infile.close();
+
+ return;
+}
+
+
+
+void ReadFile_mq (const string &file_mq, gsl_vector *q_vec, gsl_vector *s_vec, double &df)
+{
+ gsl_vector_set_zero(q_vec);
+ gsl_vector_set_zero(s_vec);
+
+ string file_name;
+
+ igzstream infile (file_mq.c_str(), igzstream::in);
+ if (!infile) {cout<<"error! fail to open mq file: "<<file_mq<<endl; return;}
+
+ while (!safeGetline(infile, file_name).eof()) {
+ ReadFile_q(file_name, q_vec, s_vec, df);
+ }
+
+ infile.clear();
+ infile.close();
+
+ return;
+}
diff --git a/src/io.h b/src/io.h
index 13e3e47..6787176 100644
--- a/src/io.h
+++ b/src/io.h
@@ -16,7 +16,7 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#ifndef __IO_H__
+#ifndef __IO_H__
#define __IO_H__
@@ -26,6 +26,8 @@
#include "gsl/gsl_vector.h"
#include "gsl/gsl_matrix.h"
+#include "gzstream.h"
+
#ifdef FORCE_FLOAT
#include "param_float.h"
#else
@@ -34,6 +36,9 @@
using namespace std;
+
+
+
void ProgressBar (string str, double p, double total);
void ProgressBar (string str, double p, double total, double ratio);
std::istream& safeGetline(std::istream& is, std::string& t);
@@ -51,17 +56,21 @@ bool ReadFile_column (const string &file_pheno, vector<int> &indicator_idv, vect
bool ReadFile_geno (const string &file_geno, const set<string> &setSnps, const gsl_matrix *W, vector<int> &indicator_idv, vector<int> &indicator_snp, const double &maf_level, const double &miss_level, const double &hwe_level, const double &r2_level, map<string, string> &mapRS2chr, map<string, long int> &mapRS2bp, map<string, double> &mapRS2cM, vector<SNPINFO> &snpInfo, size_t &ns_test);
bool ReadFile_bed (const string &file_bed, const set<string> &setSnps, const gsl_matrix *W, vector<int> &indicator_idv, vector<int> &indicator_snp, vector<SNPINFO> &snpInfo, const double &maf_level, const double &miss_level, const double &hwe_level, const double &r2_level, size_t &ns_test);
+bool Bimbam_ReadOneSNP (const size_t inc, const vector<int> &indicator_idv, igzstream &infile, gsl_vector *geno, double &geno_mean);
+void Plink_ReadOneSNP (const int pos, const vector<int> &indicator_idv, ifstream &infile, gsl_vector *geno, double &geno_mean);
void ReadFile_kin (const string &file_kin, vector<int> &indicator_idv, map<string, int> &mapID2num, const size_t k_mode, bool &error, gsl_matrix *G);
void ReadFile_mk (const string &file_mk, vector<int> &indicator_idv, map<string, int> &mapID2num, const size_t k_mode, bool &error, gsl_matrix *G);
void ReadFile_eigenU (const string &file_u, bool &error, gsl_matrix *U);
-void ReadFile_eigenD (const string &file_d, bool &error, gsl_vector *eval);
+void ReadFile_eigenD (const string &file_d, bool &error, gsl_vector *eval);
bool BimbamKin (const string &file_geno, vector<int> &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin);
bool PlinkKin (const string &file_bed, vector<int> &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin);
bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv, vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K, const bool calc_K);
bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv, vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K, const bool calc_K);
+bool ReadFile_geno (const string &file_geno, vector<int> &indicator_idv, vector<int> &indicator_snp, vector<vector<unsigned char> > &Xt, gsl_matrix *K, const bool calc_K, const size_t ni_test, const size_t ns_test);
+bool ReadFile_bed (const string &file_bed, vector<int> &indicator_idv, vector<int> &indicator_snp, vector<vector<unsigned char> > &Xt, gsl_matrix *K, const bool calc_K, const size_t ni_test, const size_t ns_test);
bool ReadFile_est (const string &file_est, const vector<size_t> &est_column, map<string, double> &mapRS2est);
@@ -69,6 +78,29 @@ bool CountFileLines (const string &file_input, size_t &n_lines);
bool ReadFile_gene (const string &file_gene, vector<double> &vec_read, vector<SNPINFO> &snpInfo, size_t &ng_total);
+bool ReadHeader (const string &line, HEADER &header);
+bool ReadFile_cat (const string &file_cat, map<string, size_t> &mapRS2cat, size_t &n_vc);
+
+bool BimbamKin (const string &file_geno, vector<int> &indicator_idv, vector<int> &indicator_snp, const int k_mode, const int display_pace, const map<string, size_t> &mapRS2cat, map<string, double> &mapRS2var, vector<SNPINFO> &snpInfo, gsl_matrix *matrix_kin);
+bool PlinkKin (const string &file_bed, vector<int> &indicator_idv, vector<int> &indicator_snp, const int k_mode, const int display_pace, const map<string, size_t> &mapRS2cat, map<string, double> &mapRS2var, vector<SNPINFO> &snpInfo, gsl_matrix *matrix_kin);
+
+bool ReadFile_var (const string &file_var, map<string, double> &mapRS2var);
+void ReadFile_beta (const string &file_beta, const int k_mode, const map<string, size_t> &mapRS2cat, const map<string, double> &mapRS2var, gsl_vector *q, gsl_vector *s, size_t &ni_total, size_t &ns_total, size_t &ns_test);
+
+
+void ReadFile_s (const string &file_s, gsl_matrix *S, gsl_matrix *Svar);
+void ReadFile_ms (const string &file_ms, gsl_matrix *S, gsl_matrix *Svar);
+void ReadFile_v (const string &file_v, gsl_matrix *V);
+void ReadFile_mv (const string &file_mq, gsl_matrix *V);
+void ReadFile_q (const string &file_s, gsl_vector *q_vec, gsl_vector *s_vec, double &df);
+void ReadFile_mq (const string &file_mq, gsl_vector *q_vec, gsl_vector *s_vec, double &df);
+
+// WJA added
+bool bgenKin (const string &file_geno, vector<int> &indicator_snp, const int k_mode, const int display_pace, gsl_matrix *matrix_kin);
+bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps, const gsl_matrix *W, vector<int> &indicator_idv, vector<int> &indicator_snp, vector<SNPINFO> &snpInfo, const double &maf_level, const double &miss_level, const double &hwe_level, const double &r2_level, size_t &ns_test);
+bool ReadFile_sample(const string &file_sample, vector<vector<int> > &indicator_pheno, vector<vector<double> > &pheno, const vector<size_t> &p_column, vector<int> &indicator_cvt, vector<vector<double> > &cvt, size_t &n_cvt);
+
+
#endif
diff --git a/src/lm.cpp b/src/lm.cpp
index 7577d0a..b4bc010 100644
--- a/src/lm.cpp
+++ b/src/lm.cpp
@@ -1,17 +1,17 @@
/*
Genome-wide Efficient Mixed Model Association (GEMMA)
Copyright (C) 2011 Xiang Zhou
-
+
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
-
+
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
+
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
@@ -26,7 +26,7 @@
#include <cmath>
#include <iostream>
#include <stdio.h>
-#include <stdlib.h>
+#include <stdlib.h>
#include <bitset>
#include <cstring>
@@ -57,48 +57,50 @@ using namespace std;
-void LM::CopyFromParam (PARAM &cPar)
+void LM::CopyFromParam (PARAM &cPar)
{
a_mode=cPar.a_mode;
d_pace=cPar.d_pace;
-
+
file_bfile=cPar.file_bfile;
file_geno=cPar.file_geno;
file_out=cPar.file_out;
path_out=cPar.path_out;
file_gene=cPar.file_gene;
-
+ // WJA added
+ file_oxford=cPar.file_oxford;
+
time_opt=0.0;
-
+
ni_total=cPar.ni_total;
ns_total=cPar.ns_total;
ni_test=cPar.ni_test;
ns_test=cPar.ns_test;
n_cvt=cPar.n_cvt;
-
+
ng_total=cPar.ng_total;
ng_test=0;
-
- indicator_idv=cPar.indicator_idv;
- indicator_snp=cPar.indicator_snp;
+
+ indicator_idv=cPar.indicator_idv;
+ indicator_snp=cPar.indicator_snp;
snpInfo=cPar.snpInfo;
-
+
return;
}
-void LM::CopyToParam (PARAM &cPar)
+void LM::CopyToParam (PARAM &cPar)
{
- cPar.time_opt=time_opt;
-
+ cPar.time_opt=time_opt;
+
cPar.ng_test=ng_test;
-
+
return;
}
-void LM::WriteFiles ()
+void LM::WriteFiles ()
{
string file_str;
file_str=path_out+"/"+file_out;
@@ -109,7 +111,7 @@ void LM::WriteFiles ()
if (!file_gene.empty()) {
outfile<<"geneID"<<"\t";
-
+
if (a_mode==51) {
outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_wald"<<endl;
} else if (a_mode==52) {
@@ -119,10 +121,10 @@ void LM::WriteFiles ()
} else if (a_mode==54) {
outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_wald"<<"\t"<<"p_lrt"<<"\t"<<"p_score"<<endl;
} else {}
-
- for (vector<SUMSTAT>::size_type t=0; t<sumStat.size(); ++t) {
+
+ for (vector<SUMSTAT>::size_type t=0; t<sumStat.size(); ++t) {
outfile<<snpInfo[t].rs_number<<"\t";
-
+
if (a_mode==51) {
outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].p_wald <<endl;
} else if (a_mode==52) {
@@ -132,10 +134,10 @@ void LM::WriteFiles ()
} else if (a_mode==54) {
outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].p_wald <<"\t"<<sumStat[t].p_lrt<<"\t"<<sumStat[t].p_score<<endl;
} else {}
- }
+ }
} else {
- outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_miss"<<"\t"<<"allele1"<<"\t"<<"allele0"<<"\t"<<"af"<<"\t";
-
+ outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_mis"<<"\t"<<"n_obs"<<"\t"<<"allele1"<<"\t"<<"allele0"<<"\t"<<"af"<<"\t";
+
if (a_mode==51) {
outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_wald"<<endl;
} else if (a_mode==52) {
@@ -145,13 +147,13 @@ void LM::WriteFiles ()
} else if (a_mode==54) {
outfile<<"beta"<<"\t"<<"se"<<"\t"<<"p_wald"<<"\t"<<"p_lrt"<<"\t"<<"p_score"<<endl;
} else {}
-
+
size_t t=0;
for (size_t i=0; i<snpInfo.size(); ++i) {
if (indicator_snp[i]==0) {continue;}
-
- outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"<<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<"\t"<<snpInfo[i].a_minor<<"\t"<<snpInfo[i].a_major<<"\t"<<fixed<<setprecision(3)<<snpInfo[i].maf<<"\t";
-
+
+ outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"<<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<"\t"<<ni_test-snpInfo[i].n_miss<<"\t"<<snpInfo[i].a_minor<<"\t"<<snpInfo[i].a_major<<"\t"<<fixed<<setprecision(3)<<snpInfo[i].maf<<"\t";
+
if (a_mode==51) {
outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].p_wald <<endl;
} else if (a_mode==52) {
@@ -164,8 +166,8 @@ void LM::WriteFiles ()
t++;
}
}
-
-
+
+
outfile.close();
outfile.clear();
return;
@@ -179,21 +181,21 @@ void CalcvPv(const gsl_matrix *WtWi, const gsl_vector *Wty, const gsl_vector *Wt
{
size_t c_size=Wty->size;
double d;
-
+
gsl_vector *WtWiWtx=gsl_vector_alloc (c_size);
-
+
gsl_blas_ddot (x, x, &xPwx);
gsl_blas_ddot (x, y, &xPwy);
- gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
-
- gsl_blas_ddot (WtWiWtx, Wtx, &d);
+ gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
+
+ gsl_blas_ddot (WtWiWtx, Wtx, &d);
xPwx-=d;
-
- gsl_blas_ddot (WtWiWtx, Wty, &d);
+
+ gsl_blas_ddot (WtWiWtx, Wty, &d);
xPwy-=d;
-
+
gsl_vector_free (WtWiWtx);
-
+
return;
}
@@ -202,17 +204,17 @@ void CalcvPv(const gsl_matrix *WtWi, const gsl_vector *Wty, const gsl_vector *y,
{
size_t c_size=Wty->size;
double d;
-
+
gsl_vector *WtWiWty=gsl_vector_alloc (c_size);
-
+
gsl_blas_ddot (y, y, &yPwy);
- gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wty, 0.0, WtWiWty);
-
- gsl_blas_ddot (WtWiWty, Wty, &d);
+ gsl_blas_dgemv (CblasNoTrans, 1.0, WtWi, Wty, 0.0, WtWiWty);
+
+ gsl_blas_ddot (WtWiWty, Wty, &d);
yPwy-=d;
-
+
gsl_vector_free (WtWiWty);
-
+
return;
}
@@ -223,38 +225,38 @@ void LmCalcP (const size_t test_mode, const double yPwy, const double xPwy, cons
{
double yPxy=yPwy-xPwy*xPwy/xPwx;
double se_wald, se_score;
-
+
beta=xPwy/xPwx;
se_wald=sqrt(yPxy/(df*xPwx) );
se_score=sqrt(yPwy/((double)n_size*xPwx) );
-
+
p_wald=gsl_cdf_fdist_Q (beta*beta/(se_wald*se_wald), 1.0, df);
p_score=gsl_cdf_fdist_Q (beta*beta/(se_score*se_score), 1.0, df);
p_lrt=gsl_cdf_chisq_Q ((double)n_size*(log(yPwy)-log(yPxy)), 1);
-
+
if (test_mode==3) {se=se_score;} else {se=se_wald;}
-
+
return;
}
-void LM::AnalyzeGene (const gsl_matrix *W, const gsl_vector *x)
+void LM::AnalyzeGene (const gsl_matrix *W, const gsl_vector *x)
{
ifstream infile (file_gene.c_str(), ifstream::in);
if (!infile) {cout<<"error reading gene expression file:"<<file_gene<<endl; return;}
-
+
clock_t time_start=clock();
-
+
string line;
char *ch_ptr;
-
+
double beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
int c_phen;
string rs; //gene id
double d;
-
+
//calculate some basic quantities
double yPwy, xPwy, xPwx;
double df=(double)W->size1-(double)W->size2-1.0;
@@ -262,7 +264,7 @@ void LM::AnalyzeGene (const gsl_matrix *W, const gsl_vector *x)
gsl_vector *y=gsl_vector_alloc (W->size1);
gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
- gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
+ gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
gsl_vector *Wty=gsl_vector_alloc (W->size2);
gsl_vector *Wtx=gsl_vector_alloc (W->size2);
gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
@@ -274,42 +276,42 @@ void LM::AnalyzeGene (const gsl_matrix *W, const gsl_vector *x)
gsl_blas_dgemv (CblasTrans, 1.0, W, x, 0.0, Wtx);
CalcvPv(WtWi, Wtx, x, xPwx);
-
+
//header
getline(infile, line);
-
+
for (size_t t=0; t<ng_total; t++) {
getline(infile, line);
if (t%d_pace==0 || t==ng_total-1) {ProgressBar ("Performing Analysis ", t, ng_total-1);}
ch_ptr=strtok ((char *)line.c_str(), " , \t");
rs=ch_ptr;
-
- c_phen=0;
+
+ c_phen=0;
for (size_t i=0; i<indicator_idv.size(); ++i) {
ch_ptr=strtok (NULL, " , \t");
if (indicator_idv[i]==0) {continue;}
-
- d=atof(ch_ptr);
+
+ d=atof(ch_ptr);
gsl_vector_set(y, c_phen, d);
-
+
c_phen++;
}
-
- //calculate statistics
- time_start=clock();
-
+
+ //calculate statistics
+ time_start=clock();
+
gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty);
CalcvPv(WtWi, Wtx, Wty, x, y, xPwy, yPwy);
- LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald, p_lrt, p_score);
-
+ LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald, p_lrt, p_score);
+
time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
+
//store summary data
SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
sumStat.push_back(SNPs);
}
cout<<endl;
-
+
gsl_vector_free(y);
gsl_matrix_free(WtW);
@@ -317,31 +319,259 @@ void LM::AnalyzeGene (const gsl_matrix *W, const gsl_vector *x)
gsl_vector_free(Wty);
gsl_vector_free(Wtx);
gsl_permutation_free(pmt);
-
+
infile.close();
infile.clear();
-
+
return;
}
+// WJA added
+#include <assert.h>
+void LM::Analyzebgen (const gsl_matrix *W, const gsl_vector *y)
+{
+ string file_bgen=file_oxford+".bgen";
+ ifstream infile (file_bgen.c_str(), ios::binary);
+ if (!infile) {cout<<"error reading bgen file:"<<file_bgen<<endl; return;}
+
+
+ clock_t time_start=clock();
+
+ string line;
+ char *ch_ptr;
+
+ double beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
+ int n_miss, c_phen;
+ double geno, x_mean;
+
+ //calculate some basic quantities
+ double yPwy, xPwy, xPwx;
+ double df=(double)W->size1-(double)W->size2-1.0;
+
+ gsl_vector *x=gsl_vector_alloc (W->size1);
+ gsl_vector *x_miss=gsl_vector_alloc (W->size1);
+
+ gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
+ gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
+ gsl_vector *Wty=gsl_vector_alloc (W->size2);
+ gsl_vector *Wtx=gsl_vector_alloc (W->size2);
+ gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
+
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
+ int sig;
+ LUDecomp (WtW, pmt, &sig);
+ LUInvert (WtW, pmt, WtWi);
+
+ gsl_blas_dgemv (CblasTrans, 1.0, W, y, 0.0, Wty);
+ CalcvPv(WtWi, Wty, y, yPwy);
+
+ // read in header
+ uint32_t bgen_snp_block_offset;
+ uint32_t bgen_header_length;
+ uint32_t bgen_nsamples;
+ uint32_t bgen_nsnps;
+ uint32_t bgen_flags;
+ infile.read(reinterpret_cast<char*>(&bgen_snp_block_offset),4);
+ infile.read(reinterpret_cast<char*>(&bgen_header_length),4);
+ bgen_snp_block_offset-=4;
+ infile.read(reinterpret_cast<char*>(&bgen_nsnps),4);
+ bgen_snp_block_offset-=4;
+ infile.read(reinterpret_cast<char*>(&bgen_nsamples),4);
+ bgen_snp_block_offset-=4;
+ infile.ignore(4+bgen_header_length-20);
+ bgen_snp_block_offset-=4+bgen_header_length-20;
+ infile.read(reinterpret_cast<char*>(&bgen_flags),4);
+ bgen_snp_block_offset-=4;
+ bool CompressedSNPBlocks=bgen_flags&0x1;
+// bool LongIds=bgen_flags&0x4;
+
+ infile.ignore(bgen_snp_block_offset);
+
+ double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB, bgen_geno_prob_non_miss;
+
+ uint32_t bgen_N;
+ uint16_t bgen_LS;
+ uint16_t bgen_LR;
+ uint16_t bgen_LC;
+ uint32_t bgen_SNP_pos;
+ uint32_t bgen_LA;
+ std::string bgen_A_allele;
+ uint32_t bgen_LB;
+ std::string bgen_B_allele;
+ uint32_t bgen_P;
+ size_t unzipped_data_size;
+ string id;
+ string rs;
+ string chr;
+ std::cout<<"Warning: WJA hard coded SNP missingness threshold of 10%"<<std::endl;
+
+
+
+ //start reading genotypes and analyze
+ for (size_t t=0; t<indicator_snp.size(); ++t)
+ {
+
+// if (t>1) {break;}
+ if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs ", t, ns_total-1);}
+ // read SNP header
+ id.clear();
+ rs.clear();
+ chr.clear();
+ bgen_A_allele.clear();
+ bgen_B_allele.clear();
+
+ infile.read(reinterpret_cast<char*>(&bgen_N),4);
+ infile.read(reinterpret_cast<char*>(&bgen_LS),2);
+
+ id.resize(bgen_LS);
+ infile.read(&id[0], bgen_LS);
+
+ infile.read(reinterpret_cast<char*>(&bgen_LR),2);
+ rs.resize(bgen_LR);
+ infile.read(&rs[0], bgen_LR);
+
+ infile.read(reinterpret_cast<char*>(&bgen_LC),2);
+ chr.resize(bgen_LC);
+ infile.read(&chr[0], bgen_LC);
+
+ infile.read(reinterpret_cast<char*>(&bgen_SNP_pos),4);
+
+ infile.read(reinterpret_cast<char*>(&bgen_LA),4);
+ bgen_A_allele.resize(bgen_LA);
+ infile.read(&bgen_A_allele[0], bgen_LA);
+
+
+ infile.read(reinterpret_cast<char*>(&bgen_LB),4);
+ bgen_B_allele.resize(bgen_LB);
+ infile.read(&bgen_B_allele[0], bgen_LB);
+
+
+
+
+ uint16_t unzipped_data[3*bgen_N];
+
+ if (indicator_snp[t]==0) {
+ if(CompressedSNPBlocks)
+ infile.read(reinterpret_cast<char*>(&bgen_P),4);
+ else
+ bgen_P=6*bgen_N;
+
+ infile.ignore(static_cast<size_t>(bgen_P));
+
+ continue;
+ }
+
+
+ if(CompressedSNPBlocks)
+ {
+
+
+ infile.read(reinterpret_cast<char*>(&bgen_P),4);
+ uint8_t zipped_data[bgen_P];
+
+ unzipped_data_size=6*bgen_N;
+
+ infile.read(reinterpret_cast<char*>(zipped_data),bgen_P);
+
+ int result=uncompress(reinterpret_cast<Bytef*>(unzipped_data), reinterpret_cast<uLongf*>(&unzipped_data_size), reinterpret_cast<Bytef*>(zipped_data), static_cast<uLong> (bgen_P));
+ assert(result == Z_OK);
+
+ }
+ else
+ {
+
+ bgen_P=6*bgen_N;
+ infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P);
+ }
+
+ x_mean=0.0; c_phen=0; n_miss=0;
+ gsl_vector_set_zero(x_miss);
+ for (size_t i=0; i<bgen_N; ++i) {
+ if (indicator_idv[i]==0) {continue;}
+
+
+ bgen_geno_prob_AA=static_cast<double>(unzipped_data[i*3])/32768.0;
+ bgen_geno_prob_AB=static_cast<double>(unzipped_data[i*3+1])/32768.0;
+ bgen_geno_prob_BB=static_cast<double>(unzipped_data[i*3+2])/32768.0;
+ // WJA
+ bgen_geno_prob_non_miss=bgen_geno_prob_AA+bgen_geno_prob_AB+bgen_geno_prob_BB;
+ if (bgen_geno_prob_non_miss<0.9) {gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;}
+ else {
+
+ bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
+ bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
+ bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
+
+ geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
+
+ gsl_vector_set(x, c_phen, geno);
+ gsl_vector_set(x_miss, c_phen, 1.0);
+ x_mean+=geno;
+ }
+ c_phen++;
+ }
+
+ x_mean/=static_cast<double>(ni_test-n_miss);
+
+ for (size_t i=0; i<ni_test; ++i) {
+ if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
+ geno=gsl_vector_get(x, i);
+ if (x_mean>1) {
+ gsl_vector_set(x, i, 2-geno);
+ }
+ }
+
+
+ //calculate statistics
+ time_start=clock();
+
+ gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx);
+ CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
+ LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald, p_lrt, p_score);
+
+ time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ //store summary data
+ SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
+ sumStat.push_back(SNPs);
+ }
+ cout<<endl;
+
+ gsl_vector_free(x);
+ gsl_vector_free(x_miss);
+
+ gsl_matrix_free(WtW);
+ gsl_matrix_free(WtWi);
+ gsl_vector_free(Wty);
+ gsl_vector_free(Wtx);
+ gsl_permutation_free(pmt);
+
+ infile.close();
+ infile.clear();
+
+ return;
+}
+
+
+
void LM::AnalyzeBimbam (const gsl_matrix *W, const gsl_vector *y)
{
igzstream infile (file_geno.c_str(), igzstream::in);
// ifstream infile (file_geno.c_str(), ifstream::in);
if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return;}
-
+
clock_t time_start=clock();
-
+
string line;
char *ch_ptr;
-
+
double beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
int n_miss, c_phen;
double geno, x_mean;
-
+
//calculate some basic quantities
double yPwy, xPwy, xPwx;
double df=(double)W->size1-(double)W->size2-1.0;
@@ -350,7 +580,7 @@ void LM::AnalyzeBimbam (const gsl_matrix *W, const gsl_vector *y)
gsl_vector *x_miss=gsl_vector_alloc (W->size1);
gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
- gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
+ gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
gsl_vector *Wty=gsl_vector_alloc (W->size2);
gsl_vector *Wtx=gsl_vector_alloc (W->size2);
gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
@@ -362,58 +592,58 @@ void LM::AnalyzeBimbam (const gsl_matrix *W, const gsl_vector *y)
gsl_blas_dgemv (CblasTrans, 1.0, W, y, 0.0, Wty);
CalcvPv(WtWi, Wty, y, yPwy);
-
- //start reading genotypes and analyze
+
+ //start reading genotypes and analyze
for (size_t t=0; t<indicator_snp.size(); ++t) {
//if (t>1) {break;}
getline(infile, line);
if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs ", t, ns_total-1);}
if (indicator_snp[t]==0) {continue;}
-
+
ch_ptr=strtok ((char *)line.c_str(), " , \t");
ch_ptr=strtok (NULL, " , \t");
ch_ptr=strtok (NULL, " , \t");
-
+
x_mean=0.0; c_phen=0; n_miss=0;
gsl_vector_set_zero(x_miss);
for (size_t i=0; i<ni_total; ++i) {
ch_ptr=strtok (NULL, " , \t");
if (indicator_idv[i]==0) {continue;}
-
+
if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;}
else {
- geno=atof(ch_ptr);
-
- gsl_vector_set(x, c_phen, geno);
- gsl_vector_set(x_miss, c_phen, 1.0);
+ geno=atof(ch_ptr);
+
+ gsl_vector_set(x, c_phen, geno);
+ gsl_vector_set(x_miss, c_phen, 1.0);
x_mean+=geno;
}
c_phen++;
- }
-
+ }
+
x_mean/=(double)(ni_test-n_miss);
-
+
for (size_t i=0; i<ni_test; ++i) {
if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
geno=gsl_vector_get(x, i);
if (x_mean>1) {
gsl_vector_set(x, i, 2-geno);
}
- }
-
- //calculate statistics
- time_start=clock();
+ }
- gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx);
+ //calculate statistics
+ time_start=clock();
+
+ gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx);
CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald, p_lrt, p_score);
-
+
time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
+
//store summary data
SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
sumStat.push_back(SNPs);
- }
+ }
cout<<endl;
gsl_vector_free(x);
@@ -424,10 +654,10 @@ void LM::AnalyzeBimbam (const gsl_matrix *W, const gsl_vector *y)
gsl_vector_free(Wty);
gsl_vector_free(Wtx);
gsl_permutation_free(pmt);
-
+
infile.close();
infile.clear();
-
+
return;
}
@@ -437,21 +667,21 @@ void LM::AnalyzeBimbam (const gsl_matrix *W, const gsl_vector *y)
-void LM::AnalyzePlink (const gsl_matrix *W, const gsl_vector *y)
+void LM::AnalyzePlink (const gsl_matrix *W, const gsl_vector *y)
{
string file_bed=file_bfile+".bed";
ifstream infile (file_bed.c_str(), ios::binary);
if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;}
-
+
clock_t time_start=clock();
-
+
char ch[1];
- bitset<8> b;
-
+ bitset<8> b;
+
double beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
int n_bit, n_miss, ci_total, ci_test;
double geno, x_mean;
-
+
//calculate some basic quantities
double yPwy, xPwy, xPwx;
double df=(double)W->size1-(double)W->size2-1.0;
@@ -459,7 +689,7 @@ void LM::AnalyzePlink (const gsl_matrix *W, const gsl_vector *y)
gsl_vector *x=gsl_vector_alloc (W->size1);
gsl_matrix *WtW=gsl_matrix_alloc (W->size2, W->size2);
- gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
+ gsl_matrix *WtWi=gsl_matrix_alloc (W->size2, W->size2);
gsl_vector *Wty=gsl_vector_alloc (W->size2);
gsl_vector *Wtx=gsl_vector_alloc (W->size2);
gsl_permutation * pmt=gsl_permutation_alloc (W->size2);
@@ -471,90 +701,104 @@ void LM::AnalyzePlink (const gsl_matrix *W, const gsl_vector *y)
gsl_blas_dgemv (CblasTrans, 1.0, W, y, 0.0, Wty);
CalcvPv(WtWi, Wty, y, yPwy);
-
+
//calculate n_bit and c, the number of bit for each snp
if (ni_total%4==0) {n_bit=ni_total/4;}
else {n_bit=ni_total/4+1; }
-
+
//print the first three majic numbers
for (int i=0; i<3; ++i) {
infile.read(ch,1);
b=ch[0];
}
-
-
+
+
for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
if (t%d_pace==0 || t==snpInfo.size()-1) {ProgressBar ("Reading SNPs ", t, snpInfo.size()-1);}
if (indicator_snp[t]==0) {continue;}
-
+
infile.seekg(t*n_bit+3); //n_bit, and 3 is the number of magic numbers
-
+
//read genotypes
- x_mean=0.0; n_miss=0; ci_total=0; ci_test=0;
+ x_mean=0.0; n_miss=0; ci_total=0; ci_test=0;
for (int i=0; i<n_bit; ++i) {
infile.read(ch,1);
b=ch[0];
for (size_t j=0; j<4; ++j) { //minor allele homozygous: 2.0; major: 0.0;
if ((i==(n_bit-1)) && ci_total==(int)ni_total) {break;}
if (indicator_idv[ci_total]==0) {ci_total++; continue;}
-
+
if (b[2*j]==0) {
if (b[2*j+1]==0) {gsl_vector_set(x, ci_test, 2); x_mean+=2.0; }
else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
}
else {
- if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
+ if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
else {gsl_vector_set(x, ci_test, -9); n_miss++; }
}
-
+
ci_total++;
ci_test++;
}
}
-
+
x_mean/=(double)(ni_test-n_miss);
-
- for (size_t i=0; i<ni_test; ++i) {
+
+ for (size_t i=0; i<ni_test; ++i) {
geno=gsl_vector_get(x,i);
if (geno==-9) {gsl_vector_set(x, i, x_mean); geno=x_mean;}
if (x_mean>1) {
gsl_vector_set(x, i, 2-geno);
}
}
-
- //calculate statistics
- time_start=clock();
-
+
+ //calculate statistics
+ time_start=clock();
+
gsl_blas_dgemv (CblasTrans, 1.0, W, x, 0.0, Wtx);
- CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
- LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald, p_lrt, p_score);
+ CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
+ LmCalcP (a_mode-50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald, p_lrt, p_score);
time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
+
//store summary data
SUMSTAT SNPs={beta, se, 0.0, 0.0, p_wald, p_lrt, p_score};
sumStat.push_back(SNPs);
- }
+ }
cout<<endl;
-
+
gsl_vector_free(x);
gsl_matrix_free(WtW);
- gsl_matrix_free(WtWi);
+ gsl_matrix_free(WtWi);
gsl_vector_free(Wty);
gsl_vector_free(Wtx);
gsl_permutation_free(pmt);
-
+
infile.close();
- infile.clear();
-
+ infile.clear();
+
return;
}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
//make sure that both y and X are centered already
-void MatrixCalcLmLR (const gsl_matrix *X, const gsl_vector *y, vector<pair<size_t, double> > &pos_loglr)
+void MatrixCalcLmLR (const gsl_matrix *X, const gsl_vector *y, vector<pair<size_t, double> > &pos_loglr)
{
double yty, xty, xtx, log_lr;
gsl_blas_ddot(y, y, &yty);
@@ -567,6 +811,6 @@ void MatrixCalcLmLR (const gsl_matrix *X, const gsl_vector *y, vector<pair<size_
log_lr=0.5*(double)y->size*(log(yty)-log(yty-xty*xty/xtx));
pos_loglr.push_back(make_pair(i,log_lr) );
}
-
+
return;
}
diff --git a/src/lm.h b/src/lm.h
index ceec060..656dd52 100644
--- a/src/lm.h
+++ b/src/lm.h
@@ -1,22 +1,22 @@
/*
Genome-wide Efficient Mixed Model Association (GEMMA)
Copyright (C) 2011 Xiang Zhou
-
+
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
-
+
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
+
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#ifndef __LM_H__
+#ifndef __LM_H__
#define __LM_H__
#include "gsl/gsl_vector.h"
@@ -35,40 +35,44 @@ using namespace std;
class LM {
-
+
public:
// IO related parameters
int a_mode; //analysis mode, 50+1/2/3/4 for Frequentist tests
size_t d_pace; //display pace
-
+
string file_bfile;
string file_geno;
+ string file_oxford;
string file_out;
string path_out;
-
+
string file_gene;
-
+
// Summary statistics
size_t ni_total, ni_test; //number of individuals
size_t ns_total, ns_test; //number of snps
size_t ng_total, ng_test; //number of genes
size_t n_cvt;
double time_opt; //time spent
-
+
vector<int> indicator_idv; //indicator for individuals (phenotypes), 0 missing, 1 available for analysis
vector<int> indicator_snp; //sequence indicator for SNPs: 0 ignored because of (a) maf, (b) miss, (c) non-poly; 1 available for analysis
-
+
vector<SNPINFO> snpInfo; //record SNP information
-
+
// Not included in PARAM
vector<SUMSTAT> sumStat; //Output SNPSummary Data
-
+
// Main functions
void CopyFromParam (PARAM &cPar);
void CopyToParam (PARAM &cPar);
void AnalyzeGene (const gsl_matrix *W, const gsl_vector *x);
void AnalyzePlink (const gsl_matrix *W, const gsl_vector *y);
void AnalyzeBimbam (const gsl_matrix *W, const gsl_vector *y);
+ // WJA added
+ void Analyzebgen (const gsl_matrix *W, const gsl_vector *y);
+
void WriteFiles ();
};
void MatrixCalcLmLR (const gsl_matrix *X, const gsl_vector *y, vector<pair<size_t, double> > &pos_loglr);
diff --git a/src/lmm.cpp b/src/lmm.cpp
index e0b4160..7bcf89a 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -26,7 +26,7 @@
#include <cmath>
#include <iostream>
#include <stdio.h>
-#include <stdlib.h>
+#include <stdlib.h>
#include <bitset>
#include <cstring>
@@ -58,56 +58,58 @@ using namespace std;
-void LMM::CopyFromParam (PARAM &cPar)
+void LMM::CopyFromParam (PARAM &cPar)
{
a_mode=cPar.a_mode;
d_pace=cPar.d_pace;
-
+
file_bfile=cPar.file_bfile;
file_geno=cPar.file_geno;
file_out=cPar.file_out;
path_out=cPar.path_out;
file_gene=cPar.file_gene;
-
+ // WJA added
+ file_oxford=cPar.file_oxford;
+
l_min=cPar.l_min;
l_max=cPar.l_max;
- n_region=cPar.n_region;
+ n_region=cPar.n_region;
l_mle_null=cPar.l_mle_null;
logl_mle_H0=cPar.logl_mle_H0;
-
+
time_UtX=0.0;
time_opt=0.0;
-
+
ni_total=cPar.ni_total;
ns_total=cPar.ns_total;
ni_test=cPar.ni_test;
ns_test=cPar.ns_test;
n_cvt=cPar.n_cvt;
-
+
ng_total=cPar.ng_total;
ng_test=0;
-
- indicator_idv=cPar.indicator_idv;
- indicator_snp=cPar.indicator_snp;
+
+ indicator_idv=cPar.indicator_idv;
+ indicator_snp=cPar.indicator_snp;
snpInfo=cPar.snpInfo;
-
+
return;
}
-void LMM::CopyToParam (PARAM &cPar)
+void LMM::CopyToParam (PARAM &cPar)
{
cPar.time_UtX=time_UtX;
- cPar.time_opt=time_opt;
-
+ cPar.time_opt=time_opt;
+
cPar.ng_test=ng_test;
-
+
return;
}
-void LMM::WriteFiles ()
+void LMM::WriteFiles ()
{
string file_str;
file_str=path_out+"/"+file_out;
@@ -118,7 +120,7 @@ void LMM::WriteFiles ()
if (!file_gene.empty()) {
outfile<<"geneID"<<"\t";
-
+
if (a_mode==1) {
outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<"\t"<<"p_wald"<<endl;
} else if (a_mode==2) {
@@ -128,10 +130,10 @@ void LMM::WriteFiles ()
} else if (a_mode==4) {
outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<"\t"<<"l_mle"<<"\t"<<"p_wald"<<"\t"<<"p_lrt"<<"\t"<<"p_score"<<endl;
} else {}
-
- for (vector<SUMSTAT>::size_type t=0; t<sumStat.size(); ++t) {
+
+ for (vector<SUMSTAT>::size_type t=0; t<sumStat.size(); ++t) {
outfile<<snpInfo[t].rs_number<<"\t";
-
+
if (a_mode==1) {
outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].lambda_remle<<"\t"<<sumStat[t].p_wald <<endl;
} else if (a_mode==2) {
@@ -141,10 +143,10 @@ void LMM::WriteFiles ()
} else if (a_mode==4) {
outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].lambda_remle<<"\t"<<sumStat[t].lambda_mle<<"\t"<<sumStat[t].p_wald <<"\t"<<sumStat[t].p_lrt<<"\t"<<sumStat[t].p_score<<endl;
} else {}
- }
+ }
} else {
outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_miss"<<"\t"<<"allele1"<<"\t"<<"allele0"<<"\t"<<"af"<<"\t";
-
+
if (a_mode==1) {
outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<"\t"<<"p_wald"<<endl;
} else if (a_mode==2) {
@@ -154,13 +156,13 @@ void LMM::WriteFiles ()
} else if (a_mode==4) {
outfile<<"beta"<<"\t"<<"se"<<"\t"<<"l_remle"<<"\t"<<"l_mle"<<"\t"<<"p_wald"<<"\t"<<"p_lrt"<<"\t"<<"p_score"<<endl;
} else {}
-
+
size_t t=0;
for (size_t i=0; i<snpInfo.size(); ++i) {
if (indicator_snp[i]==0) {continue;}
-
+
outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"<<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<"\t"<<snpInfo[i].a_minor<<"\t"<<snpInfo[i].a_major<<"\t"<<fixed<<setprecision(3)<<snpInfo[i].maf<<"\t";
-
+
if (a_mode==1) {
outfile<<scientific<<setprecision(6)<<sumStat[t].beta<<"\t"<<sumStat[t].se<<"\t"<<sumStat[t].lambda_remle<<"\t"<<sumStat[t].p_wald <<endl;
} else if (a_mode==2) {
@@ -173,8 +175,8 @@ void LMM::WriteFiles ()
t++;
}
}
-
-
+
+
outfile.close();
outfile.clear();
return;
@@ -196,10 +198,10 @@ size_t GetabIndex (const size_t a, const size_t b, const size_t n_cvt) {
size_t index;
size_t l, h;
if (b>a) {l=a; h=b;} else {l=b; h=a;}
-
+
size_t n=n_cvt+2;
- index=(2*n-l+2)*(l-1)/2+h-l;
-
+ index=(2*n-l+2)*(l-1)/2+h-l;
+
return index;
}
@@ -209,12 +211,12 @@ void CalcPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *Hi_eval
size_t index_ab, index_aw, index_bw, index_ww;
double p_ab;
double ps_ab, ps_aw, ps_bw, ps_ww;
-
+
for (size_t p=0; p<=n_cvt+1; ++p) {
for (size_t a=p+1; a<=n_cvt+2; ++a) {
for (size_t b=a; b<=n_cvt+2; ++b) {
index_ab=GetabIndex (a, b, n_cvt);
- if (p==0) {
+ if (p==0) {
gsl_vector_const_view Uab_col=gsl_matrix_const_column (Uab, index_ab);
gsl_blas_ddot (Hi_eval, &Uab_col.vector, &p_ab);
if (e_mode!=0) {p_ab=gsl_vector_get (ab, index_ab)-p_ab;}
@@ -224,12 +226,12 @@ void CalcPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *Hi_eval
index_aw=GetabIndex (a, p, n_cvt);
index_bw=GetabIndex (b, p, n_cvt);
index_ww=GetabIndex (p, p, n_cvt);
-
+
ps_ab=gsl_matrix_get (Pab, p-1, index_ab);
ps_aw=gsl_matrix_get (Pab, p-1, index_aw);
ps_bw=gsl_matrix_get (Pab, p-1, index_bw);
ps_ww=gsl_matrix_get (Pab, p-1, index_ww);
-
+
p_ab=ps_ab-ps_aw*ps_bw/ps_ww;
gsl_matrix_set (Pab, p, index_ab, p_ab);
}
@@ -245,12 +247,12 @@ void CalcPPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *HiHi_e
size_t index_ab, index_aw, index_bw, index_ww;
double p2_ab;
double ps2_ab, ps_aw, ps_bw, ps_ww, ps2_aw, ps2_bw, ps2_ww;
-
+
for (size_t p=0; p<=n_cvt+1; ++p) {
for (size_t a=p+1; a<=n_cvt+2; ++a) {
for (size_t b=a; b<=n_cvt+2; ++b) {
index_ab=GetabIndex (a, b, n_cvt);
- if (p==0) {
+ if (p==0) {
gsl_vector_const_view Uab_col=gsl_matrix_const_column (Uab, index_ab);
gsl_blas_ddot (HiHi_eval, &Uab_col.vector, &p2_ab);
if (e_mode!=0) {p2_ab=p2_ab-gsl_vector_get (ab, index_ab)+2.0*gsl_matrix_get (Pab, 0, index_ab);}
@@ -260,7 +262,7 @@ void CalcPPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *HiHi_e
index_aw=GetabIndex (a, p, n_cvt);
index_bw=GetabIndex (b, p, n_cvt);
index_ww=GetabIndex (p, p, n_cvt);
-
+
ps2_ab=gsl_matrix_get (PPab, p-1, index_ab);
ps_aw=gsl_matrix_get (Pab, p-1, index_aw);
ps_bw=gsl_matrix_get (Pab, p-1, index_bw);
@@ -268,11 +270,11 @@ void CalcPPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *HiHi_e
ps2_aw=gsl_matrix_get (PPab, p-1, index_aw);
ps2_bw=gsl_matrix_get (PPab, p-1, index_bw);
ps2_ww=gsl_matrix_get (PPab, p-1, index_ww);
-
+
p2_ab=ps2_ab+ps_aw*ps_bw*ps2_ww/(ps_ww*ps_ww);
p2_ab-=(ps_aw*ps2_bw+ps_bw*ps2_aw)/ps_ww;
gsl_matrix_set (PPab, p, index_ab, p2_ab);
-
+
}
}
}
@@ -286,12 +288,12 @@ void CalcPPPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *HiHiH
size_t index_ab, index_aw, index_bw, index_ww;
double p3_ab;
double ps3_ab, ps_aw, ps_bw, ps_ww, ps2_aw, ps2_bw, ps2_ww, ps3_aw, ps3_bw, ps3_ww;
-
+
for (size_t p=0; p<=n_cvt+1; ++p) {
for (size_t a=p+1; a<=n_cvt+2; ++a) {
for (size_t b=a; b<=n_cvt+2; ++b) {
index_ab=GetabIndex (a, b, n_cvt);
- if (p==0) {
+ if (p==0) {
gsl_vector_const_view Uab_col=gsl_matrix_const_column (Uab, index_ab);
gsl_blas_ddot (HiHiHi_eval, &Uab_col.vector, &p3_ab);
if (e_mode!=0) {p3_ab=gsl_vector_get (ab, index_ab)-p3_ab+3.0*gsl_matrix_get (PPab, 0, index_ab)-3.0*gsl_matrix_get (Pab, 0, index_ab);}
@@ -301,7 +303,7 @@ void CalcPPPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *HiHiH
index_aw=GetabIndex (a, p, n_cvt);
index_bw=GetabIndex (b, p, n_cvt);
index_ww=GetabIndex (p, p, n_cvt);
-
+
ps3_ab=gsl_matrix_get (PPPab, p-1, index_ab);
ps_aw=gsl_matrix_get (Pab, p-1, index_aw);
ps_bw=gsl_matrix_get (Pab, p-1, index_bw);
@@ -312,11 +314,11 @@ void CalcPPPab (const size_t n_cvt, const size_t e_mode, const gsl_vector *HiHiH
ps3_aw=gsl_matrix_get (PPPab, p-1, index_aw);
ps3_bw=gsl_matrix_get (PPPab, p-1, index_bw);
ps3_ww=gsl_matrix_get (PPPab, p-1, index_ww);
-
+
p3_ab=ps3_ab-ps_aw*ps_bw*ps2_ww*ps2_ww/(ps_ww*ps_ww*ps_ww);
p3_ab-=(ps_aw*ps3_bw+ps_bw*ps3_aw+ps2_aw*ps2_bw)/ps_ww;
p3_ab+=(ps_aw*ps2_bw*ps2_ww+ps_bw*ps2_aw*ps2_ww+ps_aw*ps_bw*ps3_ww)/(ps_ww*ps_ww);
-
+
gsl_matrix_set (PPPab, p, index_ab, p3_ab);
}
}
@@ -331,119 +333,119 @@ double LogL_f (double l, void *params)
{
FUNC_PARAM *p=(FUNC_PARAM *) params;
size_t n_cvt=p->n_cvt;
- size_t ni_test=p->ni_test;
+ size_t ni_test=p->ni_test;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
+
size_t nc_total;
if (p->calc_null==true) {nc_total=n_cvt;} else {nc_total=n_cvt+1;}
-
+
double f=0.0, logdet_h=0.0, d;
size_t index_yy;
-
+
gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
+
gsl_vector_memcpy (v_temp, p->eval);
gsl_vector_scale (v_temp, l);
if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
gsl_vector_add_constant (v_temp, 1.0);
- gsl_vector_div (Hi_eval, v_temp);
-
+ gsl_vector_div (Hi_eval, v_temp);
+
for (size_t i=0; i<(p->eval)->size; ++i) {
d=gsl_vector_get (v_temp, i);
logdet_h+=log(fabs(d));
- }
-
- CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
-
+ }
+
+ CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+
double c=0.5*(double)ni_test*(log((double)ni_test)-log(2*M_PI)-1.0);
-
- index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
+
+ index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
double P_yy=gsl_matrix_get (Pab, nc_total, index_yy);
f=c-0.5*logdet_h-0.5*(double)ni_test*log(P_yy);
-
+
gsl_matrix_free (Pab);
gsl_vector_free (Hi_eval);
gsl_vector_free (v_temp);
return f;
}
-
-
+
+
double LogL_dev1 (double l, void *params)
{
- FUNC_PARAM *p=(FUNC_PARAM *) params;
+ FUNC_PARAM *p=(FUNC_PARAM *) params;
size_t n_cvt=p->n_cvt;
- size_t ni_test=p->ni_test;
+ size_t ni_test=p->ni_test;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
+
size_t nc_total;
if (p->calc_null==true) {nc_total=n_cvt;} else {nc_total=n_cvt+1;}
-
+
double dev1=0.0, trace_Hi=0.0;
size_t index_yy;
-
+
gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
+
gsl_vector_memcpy (v_temp, p->eval);
gsl_vector_scale (v_temp, l);
if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
gsl_vector_add_constant (v_temp, 1.0);
gsl_vector_div (Hi_eval, v_temp);
-
+
gsl_vector_memcpy (HiHi_eval, Hi_eval);
- gsl_vector_mul (HiHi_eval, Hi_eval);
-
+ gsl_vector_mul (HiHi_eval, Hi_eval);
+
gsl_vector_set_all (v_temp, 1.0);
gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi);
-
+
if (p->e_mode!=0) {trace_Hi=(double)ni_test-trace_Hi;}
-
- CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
- CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
-
- double trace_HiK=((double)ni_test-trace_Hi)/l;
-
+
+ CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+ CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
+
+ double trace_HiK=((double)ni_test-trace_Hi)/l;
+
index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
-
+
double P_yy=gsl_matrix_get (Pab, nc_total, index_yy);
double PP_yy=gsl_matrix_get (PPab, nc_total, index_yy);
- double yPKPy=(P_yy-PP_yy)/l;
+ double yPKPy=(P_yy-PP_yy)/l;
dev1=-0.5*trace_HiK+0.5*(double)ni_test*yPKPy/P_yy;
-
+
gsl_matrix_free (Pab);
gsl_matrix_free (PPab);
gsl_vector_free (Hi_eval);
gsl_vector_free (HiHi_eval);
- gsl_vector_free (v_temp);
-
+ gsl_vector_free (v_temp);
+
return dev1;
}
-
-
+
+
double LogL_dev2 (double l, void *params)
{
- FUNC_PARAM *p=(FUNC_PARAM *) params;
+ FUNC_PARAM *p=(FUNC_PARAM *) params;
size_t n_cvt=p->n_cvt;
- size_t ni_test=p->ni_test;
+ size_t ni_test=p->ni_test;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
+
size_t nc_total;
if (p->calc_null==true) {nc_total=n_cvt;} else {nc_total=n_cvt+1;}
-
+
double dev2=0.0, trace_Hi=0.0, trace_HiHi=0.0;
size_t index_yy;
-
+
gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_matrix *PPPab=gsl_matrix_alloc (n_cvt+2, n_index);
@@ -451,71 +453,71 @@ double LogL_dev2 (double l, void *params)
gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *HiHiHi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
+
gsl_vector_memcpy (v_temp, p->eval);
gsl_vector_scale (v_temp, l);
if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
gsl_vector_add_constant (v_temp, 1.0);
gsl_vector_div (Hi_eval, v_temp);
-
+
gsl_vector_memcpy (HiHi_eval, Hi_eval);
- gsl_vector_mul (HiHi_eval, Hi_eval);
+ gsl_vector_mul (HiHi_eval, Hi_eval);
gsl_vector_memcpy (HiHiHi_eval, HiHi_eval);
gsl_vector_mul (HiHiHi_eval, Hi_eval);
-
+
gsl_vector_set_all (v_temp, 1.0);
gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi);
gsl_blas_ddot (HiHi_eval, v_temp, &trace_HiHi);
-
- if (p->e_mode!=0) {
+
+ if (p->e_mode!=0) {
trace_Hi=(double)ni_test-trace_Hi;
trace_HiHi=2*trace_Hi+trace_HiHi-(double)ni_test;
}
-
- CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
- CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
- CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
-
+
+ CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+ CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
+ CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
+
double trace_HiKHiK=((double)ni_test+trace_HiHi-2*trace_Hi)/(l*l);
-
+
index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
double P_yy=gsl_matrix_get (Pab, nc_total, index_yy);
double PP_yy=gsl_matrix_get (PPab, nc_total, index_yy);
- double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_yy);
-
+ double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_yy);
+
double yPKPy=(P_yy-PP_yy)/l;
double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l);
-
+
dev2=0.5*trace_HiKHiK-0.5*(double)ni_test*(2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy);
-
+
gsl_matrix_free (Pab);
gsl_matrix_free (PPab);
gsl_matrix_free (PPPab);
gsl_vector_free (Hi_eval);
gsl_vector_free (HiHi_eval);
gsl_vector_free (HiHiHi_eval);
- gsl_vector_free (v_temp);
-
+ gsl_vector_free (v_temp);
+
return dev2;
}
-
-
-
-
-
+
+
+
+
+
void LogL_dev12 (double l, void *params, double *dev1, double *dev2)
{
FUNC_PARAM *p=(FUNC_PARAM *) params;
size_t n_cvt=p->n_cvt;
- size_t ni_test=p->ni_test;
+ size_t ni_test=p->ni_test;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
+
size_t nc_total;
if (p->calc_null==true) {nc_total=n_cvt;} else {nc_total=n_cvt+1;}
-
+
double trace_Hi=0.0, trace_HiHi=0.0;
size_t index_yy;
-
+
gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_matrix *PPPab=gsl_matrix_alloc (n_cvt+2, n_index);
@@ -523,54 +525,54 @@ void LogL_dev12 (double l, void *params, double *dev1, double *dev2)
gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *HiHiHi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
+
gsl_vector_memcpy (v_temp, p->eval);
gsl_vector_scale (v_temp, l);
if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
gsl_vector_add_constant (v_temp, 1.0);
gsl_vector_div (Hi_eval, v_temp);
-
+
gsl_vector_memcpy (HiHi_eval, Hi_eval);
- gsl_vector_mul (HiHi_eval, Hi_eval);
+ gsl_vector_mul (HiHi_eval, Hi_eval);
gsl_vector_memcpy (HiHiHi_eval, HiHi_eval);
gsl_vector_mul (HiHiHi_eval, Hi_eval);
-
+
gsl_vector_set_all (v_temp, 1.0);
gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi);
gsl_blas_ddot (HiHi_eval, v_temp, &trace_HiHi);
-
- if (p->e_mode!=0) {
+
+ if (p->e_mode!=0) {
trace_Hi=(double)ni_test-trace_Hi;
trace_HiHi=2*trace_Hi+trace_HiHi-(double)ni_test;
}
-
- CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
- CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
- CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
-
+
+ CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+ CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
+ CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
+
double trace_HiK=((double)ni_test-trace_Hi)/l;
double trace_HiKHiK=((double)ni_test+trace_HiHi-2*trace_Hi)/(l*l);
-
+
index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
-
+
double P_yy=gsl_matrix_get (Pab, nc_total, index_yy);
double PP_yy=gsl_matrix_get (PPab, nc_total, index_yy);
- double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_yy);
-
- double yPKPy=(P_yy-PP_yy)/l;
+ double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_yy);
+
+ double yPKPy=(P_yy-PP_yy)/l;
double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l);
-
+
*dev1=-0.5*trace_HiK+0.5*(double)ni_test*yPKPy/P_yy;
*dev2=0.5*trace_HiKHiK-0.5*(double)ni_test*(2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy);
-
+
gsl_matrix_free (Pab);
gsl_matrix_free (PPab);
gsl_matrix_free (PPPab);
gsl_vector_free (Hi_eval);
gsl_vector_free (HiHi_eval);
gsl_vector_free (HiHiHi_eval);
- gsl_vector_free (v_temp);
-
+ gsl_vector_free (v_temp);
+
return;
}
@@ -578,39 +580,39 @@ void LogL_dev12 (double l, void *params, double *dev1, double *dev2)
double LogRL_f (double l, void *params)
{
- FUNC_PARAM *p=(FUNC_PARAM *) params;
+ FUNC_PARAM *p=(FUNC_PARAM *) params;
size_t n_cvt=p->n_cvt;
- size_t ni_test=p->ni_test;
+ size_t ni_test=p->ni_test;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
+
double df;
size_t nc_total;
if (p->calc_null==true) {nc_total=n_cvt; df=(double)ni_test-(double)n_cvt; }
else {nc_total=n_cvt+1; df=(double)ni_test-(double)n_cvt-1.0;}
-
+
double f=0.0, logdet_h=0.0, logdet_hiw=0.0, d;
size_t index_ww;
-
+
gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_matrix *Iab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
+
gsl_vector_memcpy (v_temp, p->eval);
gsl_vector_scale (v_temp, l);
if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
gsl_vector_add_constant (v_temp, 1.0);
- gsl_vector_div (Hi_eval, v_temp);
-
+ gsl_vector_div (Hi_eval, v_temp);
+
for (size_t i=0; i<(p->eval)->size; ++i) {
d=gsl_vector_get (v_temp, i);
logdet_h+=log(fabs(d));
}
-
- CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+
+ CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
gsl_vector_set_all (v_temp, 1.0);
- CalcPab (n_cvt, p->e_mode, v_temp, p->Uab, p->ab, Iab);
-
+ CalcPab (n_cvt, p->e_mode, v_temp, p->Uab, p->ab, Iab);
+
//calculate |WHiW|-|WW|
logdet_hiw=0.0;
for (size_t i=0; i<nc_total; ++i) {
@@ -620,12 +622,12 @@ double LogRL_f (double l, void *params)
d=gsl_matrix_get (Iab, i, index_ww);
logdet_hiw-=log(d);
}
- index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
+ index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
double P_yy=gsl_matrix_get (Pab, nc_total, index_ww);
-
- double c=0.5*df*(log(df)-log(2*M_PI)-1.0);
+
+ double c=0.5*df*(log(df)-log(2*M_PI)-1.0);
f=c-0.5*logdet_h-0.5*logdet_hiw-0.5*df*log(P_yy);
-
+
gsl_matrix_free (Pab);
gsl_matrix_free (Iab);
gsl_vector_free (Hi_eval);
@@ -637,44 +639,44 @@ double LogRL_f (double l, void *params)
double LogRL_dev1 (double l, void *params)
{
- FUNC_PARAM *p=(FUNC_PARAM *) params;
+ FUNC_PARAM *p=(FUNC_PARAM *) params;
size_t n_cvt=p->n_cvt;
- size_t ni_test=p->ni_test;
+ size_t ni_test=p->ni_test;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
+
double df;
size_t nc_total;
if (p->calc_null==true) {nc_total=n_cvt; df=(double)ni_test-(double)n_cvt; }
else {nc_total=n_cvt+1; df=(double)ni_test-(double)n_cvt-1.0;}
-
+
double dev1=0.0, trace_Hi=0.0;
size_t index_ww;
-
+
gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_vector *Hi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
+
gsl_vector_memcpy (v_temp, p->eval);
gsl_vector_scale (v_temp, l);
if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
gsl_vector_add_constant (v_temp, 1.0);
gsl_vector_div (Hi_eval, v_temp);
-
+
gsl_vector_memcpy (HiHi_eval, Hi_eval);
- gsl_vector_mul (HiHi_eval, Hi_eval);
-
+ gsl_vector_mul (HiHi_eval, Hi_eval);
+
gsl_vector_set_all (v_temp, 1.0);
gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi);
-
- if (p->e_mode!=0) {
+
+ if (p->e_mode!=0) {
trace_Hi=(double)ni_test-trace_Hi;
}
-
- CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
- CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
-
+
+ CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+ CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
+
//calculate tracePK and trace PKPK
double trace_P=trace_Hi;
double ps_ww, ps2_ww;
@@ -685,21 +687,21 @@ double LogRL_dev1 (double l, void *params)
trace_P-=ps2_ww/ps_ww;
}
double trace_PK=(df-trace_P)/l;
-
+
//calculate yPKPy, yPKPKPy
index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
double P_yy=gsl_matrix_get (Pab, nc_total, index_ww);
- double PP_yy=gsl_matrix_get (PPab, nc_total, index_ww);
- double yPKPy=(P_yy-PP_yy)/l;
-
- dev1=-0.5*trace_PK+0.5*df*yPKPy/P_yy;
-
+ double PP_yy=gsl_matrix_get (PPab, nc_total, index_ww);
+ double yPKPy=(P_yy-PP_yy)/l;
+
+ dev1=-0.5*trace_PK+0.5*df*yPKPy/P_yy;
+
gsl_matrix_free (Pab);
gsl_matrix_free (PPab);
gsl_vector_free (Hi_eval);
gsl_vector_free (HiHi_eval);
- gsl_vector_free (v_temp);
-
+ gsl_vector_free (v_temp);
+
return dev1;
}
@@ -708,19 +710,19 @@ double LogRL_dev1 (double l, void *params)
double LogRL_dev2 (double l, void *params)
{
- FUNC_PARAM *p=(FUNC_PARAM *) params;
+ FUNC_PARAM *p=(FUNC_PARAM *) params;
size_t n_cvt=p->n_cvt;
- size_t ni_test=p->ni_test;
+ size_t ni_test=p->ni_test;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
+
double df;
size_t nc_total;
if (p->calc_null==true) {nc_total=n_cvt; df=(double)ni_test-(double)n_cvt; }
else {nc_total=n_cvt+1; df=(double)ni_test-(double)n_cvt-1.0;}
-
+
double dev2=0.0, trace_Hi=0.0, trace_HiHi=0.0;
size_t index_ww;
-
+
gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_matrix *PPPab=gsl_matrix_alloc (n_cvt+2, n_index);
@@ -728,31 +730,31 @@ double LogRL_dev2 (double l, void *params)
gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *HiHiHi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
+
gsl_vector_memcpy (v_temp, p->eval);
gsl_vector_scale (v_temp, l);
if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
gsl_vector_add_constant (v_temp, 1.0);
gsl_vector_div (Hi_eval, v_temp);
-
+
gsl_vector_memcpy (HiHi_eval, Hi_eval);
- gsl_vector_mul (HiHi_eval, Hi_eval);
+ gsl_vector_mul (HiHi_eval, Hi_eval);
gsl_vector_memcpy (HiHiHi_eval, HiHi_eval);
gsl_vector_mul (HiHiHi_eval, Hi_eval);
-
+
gsl_vector_set_all (v_temp, 1.0);
gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi);
gsl_blas_ddot (HiHi_eval, v_temp, &trace_HiHi);
-
- if (p->e_mode!=0) {
+
+ if (p->e_mode!=0) {
trace_Hi=(double)ni_test-trace_Hi;
trace_HiHi=2*trace_Hi+trace_HiHi-(double)ni_test;
}
-
- CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
- CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
- CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
-
+
+ CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+ CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
+ CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
+
//calculate tracePK and trace PKPK
double trace_P=trace_Hi, trace_PP=trace_HiHi;
double ps_ww, ps2_ww, ps3_ww;
@@ -765,46 +767,46 @@ double LogRL_dev2 (double l, void *params)
trace_PP+=ps2_ww*ps2_ww/(ps_ww*ps_ww)-2.0*ps3_ww/ps_ww;
}
double trace_PKPK=(df+trace_PP-2.0*trace_P)/(l*l);
-
+
//calculate yPKPy, yPKPKPy
index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
double P_yy=gsl_matrix_get (Pab, nc_total, index_ww);
double PP_yy=gsl_matrix_get (PPab, nc_total, index_ww);
- double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_ww);
- double yPKPy=(P_yy-PP_yy)/l;
+ double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_ww);
+ double yPKPy=(P_yy-PP_yy)/l;
double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l);
-
+
dev2=0.5*trace_PKPK-0.5*df*(2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy);
-
+
gsl_matrix_free (Pab);
gsl_matrix_free (PPab);
gsl_matrix_free (PPPab);
gsl_vector_free (Hi_eval);
gsl_vector_free (HiHi_eval);
gsl_vector_free (HiHiHi_eval);
- gsl_vector_free (v_temp);
-
+ gsl_vector_free (v_temp);
+
return dev2;
}
-
+
void LogRL_dev12 (double l, void *params, double *dev1, double *dev2)
{
- FUNC_PARAM *p=(FUNC_PARAM *) params;
+ FUNC_PARAM *p=(FUNC_PARAM *) params;
size_t n_cvt=p->n_cvt;
- size_t ni_test=p->ni_test;
+ size_t ni_test=p->ni_test;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
+
double df;
size_t nc_total;
if (p->calc_null==true) {nc_total=n_cvt; df=(double)ni_test-(double)n_cvt; }
else {nc_total=n_cvt+1; df=(double)ni_test-(double)n_cvt-1.0;}
-
+
double trace_Hi=0.0, trace_HiHi=0.0;
size_t index_ww;
-
+
gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_matrix *PPab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_matrix *PPPab=gsl_matrix_alloc (n_cvt+2, n_index);
@@ -812,31 +814,31 @@ void LogRL_dev12 (double l, void *params, double *dev1, double *dev2)
gsl_vector *HiHi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *HiHiHi_eval=gsl_vector_alloc((p->eval)->size);
gsl_vector *v_temp=gsl_vector_alloc((p->eval)->size);
-
+
gsl_vector_memcpy (v_temp, p->eval);
gsl_vector_scale (v_temp, l);
if (p->e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
gsl_vector_add_constant (v_temp, 1.0);
gsl_vector_div (Hi_eval, v_temp);
-
+
gsl_vector_memcpy (HiHi_eval, Hi_eval);
- gsl_vector_mul (HiHi_eval, Hi_eval);
+ gsl_vector_mul (HiHi_eval, Hi_eval);
gsl_vector_memcpy (HiHiHi_eval, HiHi_eval);
gsl_vector_mul (HiHiHi_eval, Hi_eval);
-
+
gsl_vector_set_all (v_temp, 1.0);
gsl_blas_ddot (Hi_eval, v_temp, &trace_Hi);
gsl_blas_ddot (HiHi_eval, v_temp, &trace_HiHi);
-
- if (p->e_mode!=0) {
+
+ if (p->e_mode!=0) {
trace_Hi=(double)ni_test-trace_Hi;
trace_HiHi=2*trace_Hi+trace_HiHi-(double)ni_test;
}
-
- CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
- CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
- CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
-
+
+ CalcPab (n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab);
+ CalcPPab (n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab);
+ CalcPPPab (n_cvt, p->e_mode, HiHiHi_eval, p->Uab, p->ab, Pab, PPab, PPPab);
+
//calculate tracePK and trace PKPK
double trace_P=trace_Hi, trace_PP=trace_HiHi;
double ps_ww, ps2_ww, ps3_ww;
@@ -850,29 +852,29 @@ void LogRL_dev12 (double l, void *params, double *dev1, double *dev2)
}
double trace_PK=(df-trace_P)/l;
double trace_PKPK=(df+trace_PP-2.0*trace_P)/(l*l);
-
+
//calculate yPKPy, yPKPKPy
index_ww=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
double P_yy=gsl_matrix_get (Pab, nc_total, index_ww);
double PP_yy=gsl_matrix_get (PPab, nc_total, index_ww);
- double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_ww);
- double yPKPy=(P_yy-PP_yy)/l;
+ double PPP_yy=gsl_matrix_get (PPPab, nc_total, index_ww);
+ double yPKPy=(P_yy-PP_yy)/l;
double yPKPKPy=(P_yy+PPP_yy-2.0*PP_yy)/(l*l);
-
+
*dev1=-0.5*trace_PK+0.5*df*yPKPy/P_yy;
*dev2=0.5*trace_PKPK-0.5*df*(2.0*yPKPKPy*P_yy-yPKPy*yPKPy)/(P_yy*P_yy);
-
+
gsl_matrix_free (Pab);
gsl_matrix_free (PPab);
gsl_matrix_free (PPPab);
gsl_vector_free (Hi_eval);
gsl_vector_free (HiHi_eval);
gsl_vector_free (HiHiHi_eval);
- gsl_vector_free (v_temp);
-
+ gsl_vector_free (v_temp);
+
return ;
}
-
+
@@ -884,35 +886,35 @@ void LMM::CalcRLWald (const double &l, const FUNC_PARAM &params, double &beta, d
{
size_t n_cvt=params.n_cvt;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
+
int df=(int)ni_test-(int)n_cvt-1;
-
+
gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_vector *Hi_eval=gsl_vector_alloc(params.eval->size);
gsl_vector *v_temp=gsl_vector_alloc(params.eval->size);
-
+
gsl_vector_memcpy (v_temp, params.eval);
gsl_vector_scale (v_temp, l);
if (params.e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
gsl_vector_add_constant (v_temp, 1.0);
- gsl_vector_div (Hi_eval, v_temp);
-
- CalcPab (n_cvt, params.e_mode, Hi_eval, params.Uab, params.ab, Pab);
-
- size_t index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
+ gsl_vector_div (Hi_eval, v_temp);
+
+ CalcPab (n_cvt, params.e_mode, Hi_eval, params.Uab, params.ab, Pab);
+
+ size_t index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
size_t index_xx=GetabIndex (n_cvt+1, n_cvt+1, n_cvt);
size_t index_xy=GetabIndex (n_cvt+2, n_cvt+1, n_cvt);
double P_yy=gsl_matrix_get (Pab, n_cvt, index_yy);
double P_xx=gsl_matrix_get (Pab, n_cvt, index_xx);
- double P_xy=gsl_matrix_get (Pab, n_cvt, index_xy);
- double Px_yy=gsl_matrix_get (Pab, n_cvt+1, index_yy);
-
+ double P_xy=gsl_matrix_get (Pab, n_cvt, index_xy);
+ double Px_yy=gsl_matrix_get (Pab, n_cvt+1, index_yy);
+
beta=P_xy/P_xx;
double tau=(double)df/Px_yy;
- se=sqrt(1.0/(tau*P_xx));
- p_wald=gsl_cdf_fdist_Q ((P_yy-Px_yy)*tau, 1.0, df);
-// p_wald=gsl_cdf_chisq_Q ((P_yy-Px_yy)*tau, 1);
-
+ se=sqrt(1.0/(tau*P_xx));
+ p_wald=gsl_cdf_fdist_Q ((P_yy-Px_yy)*tau, 1.0, df);
+// p_wald=gsl_cdf_chisq_Q ((P_yy-Px_yy)*tau, 1);
+
gsl_matrix_free (Pab);
gsl_vector_free (Hi_eval);
gsl_vector_free (v_temp);
@@ -924,36 +926,36 @@ void LMM::CalcRLScore (const double &l, const FUNC_PARAM &params, double &beta,
{
size_t n_cvt=params.n_cvt;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
+
int df=(int)ni_test-(int)n_cvt-1;
-
+
gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_vector *Hi_eval=gsl_vector_alloc(params.eval->size);
gsl_vector *v_temp=gsl_vector_alloc(params.eval->size);
-
+
gsl_vector_memcpy (v_temp, params.eval);
gsl_vector_scale (v_temp, l);
if (params.e_mode==0) {gsl_vector_set_all (Hi_eval, 1.0);} else {gsl_vector_memcpy (Hi_eval, v_temp);}
gsl_vector_add_constant (v_temp, 1.0);
- gsl_vector_div (Hi_eval, v_temp);
-
- CalcPab (n_cvt, params.e_mode, Hi_eval, params.Uab, params.ab, Pab);
-
- size_t index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
+ gsl_vector_div (Hi_eval, v_temp);
+
+ CalcPab (n_cvt, params.e_mode, Hi_eval, params.Uab, params.ab, Pab);
+
+ size_t index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
size_t index_xx=GetabIndex (n_cvt+1, n_cvt+1, n_cvt);
size_t index_xy=GetabIndex (n_cvt+2, n_cvt+1, n_cvt);
double P_yy=gsl_matrix_get (Pab, n_cvt, index_yy);
double P_xx=gsl_matrix_get (Pab, n_cvt, index_xx);
- double P_xy=gsl_matrix_get (Pab, n_cvt, index_xy);
- double Px_yy=gsl_matrix_get (Pab, n_cvt+1, index_yy);
-
+ double P_xy=gsl_matrix_get (Pab, n_cvt, index_xy);
+ double Px_yy=gsl_matrix_get (Pab, n_cvt+1, index_yy);
+
beta=P_xy/P_xx;
double tau=(double)df/Px_yy;
- se=sqrt(1.0/(tau*P_xx));
-
+ se=sqrt(1.0/(tau*P_xx));
+
p_score=gsl_cdf_fdist_Q ((double)ni_test*P_xy*P_xy/(P_yy*P_xx), 1.0, df);
-// p_score=gsl_cdf_chisq_Q ((double)ni_test*P_xy*P_xy/(P_yy*P_xx), 1);
-
+// p_score=gsl_cdf_chisq_Q ((double)ni_test*P_xy*P_xy/(P_yy*P_xx), 1);
+
gsl_matrix_free (Pab);
gsl_vector_free (Hi_eval);
gsl_vector_free (v_temp);
@@ -967,131 +969,131 @@ void LMM::CalcRLScore (const double &l, const FUNC_PARAM &params, double &beta,
-void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab)
+void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab)
{
size_t index_ab;
size_t n_cvt=UtW->size2;
-
+
gsl_vector *u_a=gsl_vector_alloc (Uty->size);
-
+
for (size_t a=1; a<=n_cvt+2; ++a) {
if (a==n_cvt+1) {continue;}
-
+
if (a==n_cvt+2) {gsl_vector_memcpy (u_a, Uty);}
else {
gsl_vector_const_view UtW_col=gsl_matrix_const_column (UtW, a-1);
gsl_vector_memcpy (u_a, &UtW_col.vector);
}
-
- for (size_t b=a; b>=1; --b) {
+
+ for (size_t b=a; b>=1; --b) {
if (b==n_cvt+1) {continue;}
-
+
index_ab=GetabIndex (a, b, n_cvt);
gsl_vector_view Uab_col=gsl_matrix_column (Uab, index_ab);
-
+
if (b==n_cvt+2) {gsl_vector_memcpy (&Uab_col.vector, Uty);}
else {
gsl_vector_const_view UtW_col=gsl_matrix_const_column (UtW, b-1);
gsl_vector_memcpy (&Uab_col.vector, &UtW_col.vector);
- }
-
+ }
+
gsl_vector_mul(&Uab_col.vector, u_a);
}
}
-
+
gsl_vector_free (u_a);
return;
}
-void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_vector *Utx, gsl_matrix *Uab)
-{
+void CalcUab (const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_vector *Utx, gsl_matrix *Uab)
+{
size_t index_ab;
size_t n_cvt=UtW->size2;
-
- for (size_t b=1; b<=n_cvt+2; ++b) {
+
+ for (size_t b=1; b<=n_cvt+2; ++b) {
index_ab=GetabIndex (n_cvt+1, b, n_cvt);
gsl_vector_view Uab_col=gsl_matrix_column (Uab, index_ab);
-
+
if (b==n_cvt+2) {gsl_vector_memcpy (&Uab_col.vector, Uty);}
else if (b==n_cvt+1) {gsl_vector_memcpy (&Uab_col.vector, Utx);}
else {
gsl_vector_const_view UtW_col=gsl_matrix_const_column (UtW, b-1);
gsl_vector_memcpy (&Uab_col.vector, &UtW_col.vector);
}
-
+
gsl_vector_mul(&Uab_col.vector, Utx);
}
-
+
return;
}
-void Calcab (const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab)
+void Calcab (const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab)
{
size_t index_ab;
size_t n_cvt=W->size2;
-
+
double d;
gsl_vector *v_a=gsl_vector_alloc (y->size);
gsl_vector *v_b=gsl_vector_alloc (y->size);
-
+
for (size_t a=1; a<=n_cvt+2; ++a) {
if (a==n_cvt+1) {continue;}
-
+
if (a==n_cvt+2) {gsl_vector_memcpy (v_a, y);}
else {
gsl_vector_const_view W_col=gsl_matrix_const_column (W, a-1);
gsl_vector_memcpy (v_a, &W_col.vector);
}
-
- for (size_t b=a; b>=1; --b) {
+
+ for (size_t b=a; b>=1; --b) {
if (b==n_cvt+1) {continue;}
-
+
index_ab=GetabIndex (a, b, n_cvt);
-
+
if (b==n_cvt+2) {gsl_vector_memcpy (v_b, y);}
else {
gsl_vector_const_view W_col=gsl_matrix_const_column (W, b-1);
gsl_vector_memcpy (v_b, &W_col.vector);
- }
-
+ }
+
gsl_blas_ddot (v_a, v_b, &d);
gsl_vector_set(ab, index_ab, d);
}
}
-
+
gsl_vector_free (v_a);
gsl_vector_free (v_b);
return;
}
-void Calcab (const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x, gsl_vector *ab)
-{
+void Calcab (const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x, gsl_vector *ab)
+{
size_t index_ab;
size_t n_cvt=W->size2;
-
+
double d;
gsl_vector *v_b=gsl_vector_alloc (y->size);
-
- for (size_t b=1; b<=n_cvt+2; ++b) {
+
+ for (size_t b=1; b<=n_cvt+2; ++b) {
index_ab=GetabIndex (n_cvt+1, b, n_cvt);
-
+
if (b==n_cvt+2) {gsl_vector_memcpy (v_b, y);}
else if (b==n_cvt+1) {gsl_vector_memcpy (v_b, x);}
else {
gsl_vector_const_view W_col=gsl_matrix_const_column (W, b-1);
gsl_vector_memcpy (v_b, &W_col.vector);
}
-
+
gsl_blas_ddot (x, v_b, &d);
gsl_vector_set(ab, index_ab, d);
}
-
+
gsl_vector_free (v_b);
-
+
return;
}
@@ -1099,101 +1101,101 @@ void Calcab (const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x, gsl_
-void LMM::AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Utx, const gsl_matrix *W, const gsl_vector *x)
+void LMM::AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Utx, const gsl_matrix *W, const gsl_vector *x)
{
- ifstream infile (file_gene.c_str(), ifstream::in);
+ igzstream infile (file_gene.c_str(), igzstream::in);
if (!infile) {cout<<"error reading gene expression file:"<<file_gene<<endl; return;}
-
+
clock_t time_start=clock();
-
+
string line;
char *ch_ptr;
-
+
double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
double logl_H1=0.0, logl_H0=0.0, l_H0;
int c_phen;
string rs; //gene id
double d;
-
+
//Calculate basic quantities
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
+
gsl_vector *y=gsl_vector_alloc (U->size1);
gsl_vector *Uty=gsl_vector_alloc (U->size2);
gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
- gsl_vector *ab=gsl_vector_alloc (n_index);
-
+ gsl_vector *ab=gsl_vector_alloc (n_index);
+
//header
getline(infile, line);
-
+
for (size_t t=0; t<ng_total; t++) {
!safeGetline(infile, line).eof();
if (t%d_pace==0 || t==ng_total-1) {ProgressBar ("Performing Analysis ", t, ng_total-1);}
ch_ptr=strtok ((char *)line.c_str(), " , \t");
rs=ch_ptr;
-
- c_phen=0;
+
+ c_phen=0;
for (size_t i=0; i<indicator_idv.size(); ++i) {
ch_ptr=strtok (NULL, " , \t");
if (indicator_idv[i]==0) {continue;}
-
- d=atof(ch_ptr);
+
+ d=atof(ch_ptr);
gsl_vector_set(y, c_phen, d);
-
+
c_phen++;
}
-
+
time_start=clock();
- gsl_blas_dgemv (CblasTrans, 1.0, U, y, 0.0, Uty);
+ gsl_blas_dgemv (CblasTrans, 1.0, U, y, 0.0, Uty);
time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
+
//calculate null
time_start=clock();
-
+
gsl_matrix_set_zero (Uab);
-
+
CalcUab (UtW, Uty, Uab);
FUNC_PARAM param0={false, ni_test, n_cvt, eval, Uab, ab, 0};
-
+
if (a_mode==2 || a_mode==3 || a_mode==4) {
CalcLambda('L', param0, l_min, l_max, n_region, l_H0, logl_H0);
}
-
+
//calculate alternative
CalcUab(UtW, Uty, Utx, Uab);
FUNC_PARAM param1={false, ni_test, n_cvt, eval, Uab, ab, 0};
-
+
//3 is before 1
if (a_mode==3 || a_mode==4) {
CalcRLScore (l_H0, param1, beta, se, p_score);
}
-
+
if (a_mode==1 || a_mode==4) {
CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
CalcRLWald (lambda_remle, param1, beta, se, p_wald);
}
-
+
if (a_mode==2 || a_mode==4) {
CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1);
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1);
}
-
+
time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
+
//store summary data
SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
sumStat.push_back(SNPs);
}
cout<<endl;
-
+
gsl_vector_free (y);
gsl_vector_free (Uty);
gsl_matrix_free (Uab);
gsl_vector_free (ab);
-
+
infile.close();
infile.clear();
-
+
return;
}
@@ -1201,22 +1203,22 @@ void LMM::AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma
-void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y)
+void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y)
{
igzstream infile (file_geno.c_str(), igzstream::in);
// ifstream infile (file_geno.c_str(), ifstream::in);
if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return;}
clock_t time_start=clock();
-
+
string line;
char *ch_ptr;
-
+
double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
double logl_H1=0.0;
int n_miss, c_phen;
double geno, x_mean;
-
+
//Calculate basic quantities
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
@@ -1224,45 +1226,45 @@ void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_
gsl_vector *x_miss=gsl_vector_alloc (U->size1);
gsl_vector *Utx=gsl_vector_alloc (U->size2);
gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
- gsl_vector *ab=gsl_vector_alloc (n_index);
-
+ gsl_vector *ab=gsl_vector_alloc (n_index);
+
gsl_matrix_set_zero (Uab);
CalcUab (UtW, Uty, Uab);
// if (e_mode!=0) {
// gsl_vector_set_zero (ab);
// Calcab (W, y, ab);
-// }
-
- //start reading genotypes and analyze
+// }
+
+ //start reading genotypes and analyze
for (size_t t=0; t<indicator_snp.size(); ++t) {
// if (t>1) {break;}
!safeGetline(infile, line).eof();
if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs ", t, ns_total-1);}
if (indicator_snp[t]==0) {continue;}
-
+
ch_ptr=strtok ((char *)line.c_str(), " , \t");
ch_ptr=strtok (NULL, " , \t");
- ch_ptr=strtok (NULL, " , \t");
-
+ ch_ptr=strtok (NULL, " , \t");
+
x_mean=0.0; c_phen=0; n_miss=0;
gsl_vector_set_zero(x_miss);
for (size_t i=0; i<ni_total; ++i) {
ch_ptr=strtok (NULL, " , \t");
if (indicator_idv[i]==0) {continue;}
-
+
if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;}
else {
- geno=atof(ch_ptr);
-
- gsl_vector_set(x, c_phen, geno);
- gsl_vector_set(x_miss, c_phen, 1.0);
+ geno=atof(ch_ptr);
+
+ gsl_vector_set(x, c_phen, geno);
+ gsl_vector_set(x_miss, c_phen, 1.0);
x_mean+=geno;
}
c_phen++;
- }
-
+ }
+
x_mean/=(double)(ni_test-n_miss);
-
+
for (size_t i=0; i<ni_test; ++i) {
if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
geno=gsl_vector_get(x, i);
@@ -1270,55 +1272,55 @@ void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_
gsl_vector_set(x, i, 2-geno);
}
}
-
-
+
+
//calculate statistics
time_start=clock();
- gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx);
+ gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx);
time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
+
CalcUab(UtW, Uty, Utx, Uab);
// if (e_mode!=0) {
// Calcab (W, y, x, ab);
// }
-
+
time_start=clock();
FUNC_PARAM param1={false, ni_test, n_cvt, eval, Uab, ab, 0};
-
+
//3 is before 1
if (a_mode==3 || a_mode==4) {
CalcRLScore (l_mle_null, param1, beta, se, p_score);
}
-
+
if (a_mode==1 || a_mode==4) {
- CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
+ CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
CalcRLWald (lambda_remle, param1, beta, se, p_wald);
}
-
+
if (a_mode==2 || a_mode==4) {
CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1);
- }
-
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1);
+ }
+
if (x_mean>1) {beta*=-1;}
-
+
time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
+
//store summary data
SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
sumStat.push_back(SNPs);
- }
+ }
cout<<endl;
-
+
gsl_vector_free (x);
gsl_vector_free (x_miss);
gsl_vector_free (Utx);
gsl_matrix_free (Uab);
gsl_vector_free (ab);
-
+
infile.close();
infile.clear();
-
+
return;
}
@@ -1328,37 +1330,37 @@ void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_
-void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y)
+void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y)
{
string file_bed=file_bfile+".bed";
ifstream infile (file_bed.c_str(), ios::binary);
if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;}
-
+
clock_t time_start=clock();
-
+
char ch[1];
- bitset<8> b;
-
+ bitset<8> b;
+
double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
double logl_H1=0.0;
int n_bit, n_miss, ci_total, ci_test;
double geno, x_mean;
-
+
//Calculate basic quantities
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
gsl_vector *x=gsl_vector_alloc (U->size1);
gsl_vector *Utx=gsl_vector_alloc (U->size2);
- gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
- gsl_vector *ab=gsl_vector_alloc (n_index);
-
+ gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
+ gsl_vector *ab=gsl_vector_alloc (n_index);
+
gsl_matrix_set_zero (Uab);
CalcUab (UtW, Uty, Uab);
// if (e_mode!=0) {
// gsl_vector_set_zero (ab);
// Calcab (W, y, ab);
// }
-
+
//calculate n_bit and c, the number of bit for each snp
if (ni_total%4==0) {n_bit=ni_total/4;}
else {n_bit=ni_total/4+1; }
@@ -1368,16 +1370,16 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_m
infile.read(ch,1);
b=ch[0];
}
-
-
+
+
for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
if (t%d_pace==0 || t==snpInfo.size()-1) {ProgressBar ("Reading SNPs ", t, snpInfo.size()-1);}
if (indicator_snp[t]==0) {continue;}
-
+
infile.seekg(t*n_bit+3); //n_bit, and 3 is the number of magic numbers
-
+
//read genotypes
- x_mean=0.0; n_miss=0; ci_total=0; ci_test=0;
+ x_mean=0.0; n_miss=0; ci_total=0; ci_test=0;
for (int i=0; i<n_bit; ++i) {
infile.read(ch,1);
b=ch[0];
@@ -1390,7 +1392,7 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_m
else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
}
else {
- if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
+ if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
else {gsl_vector_set(x, ci_test, -9); n_miss++; }
}
@@ -1398,105 +1400,345 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_m
ci_test++;
}
}
-
+
x_mean/=(double)(ni_test-n_miss);
-
- for (size_t i=0; i<ni_test; ++i) {
+
+ for (size_t i=0; i<ni_test; ++i) {
geno=gsl_vector_get(x,i);
if (geno==-9) {gsl_vector_set(x, i, x_mean); geno=x_mean;}
if (x_mean>1) {
gsl_vector_set(x, i, 2-geno);
}
}
-
+
//calculate statistics
time_start=clock();
gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx);
time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
+
CalcUab(UtW, Uty, Utx, Uab);
// if (e_mode!=0) {
// Calcab (W, y, x, ab);
// }
-
+
time_start=clock();
FUNC_PARAM param1={false, ni_test, n_cvt, eval, Uab, ab, 0};
-
+
//3 is before 1, for beta
if (a_mode==3 || a_mode==4) {
CalcRLScore (l_mle_null, param1, beta, se, p_score);
}
-
+
if (a_mode==1 || a_mode==4) {
- CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
+ CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
CalcRLWald (lambda_remle, param1, beta, se, p_wald);
}
-
+
if (a_mode==2 || a_mode==4) {
CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1);
- }
-
- if (x_mean>1) {beta*=-1;}
-
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1);
+ }
+
+ if (x_mean>1) {beta*=-1;}
+
time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
+
//store summary data
SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
sumStat.push_back(SNPs);
- }
+ }
cout<<endl;
-
+
gsl_vector_free (x);
gsl_vector_free (Utx);
gsl_matrix_free (Uab);
gsl_vector_free (ab);
-
+
infile.close();
- infile.clear();
-
+ infile.clear();
+
return;
}
+// WJA added
+#include <assert.h>
+void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y)
+{
+ string file_bgen=file_oxford+".bgen";
+ ifstream infile (file_bgen.c_str(), ios::binary);
+ if (!infile) {cout<<"error reading bgen file:"<<file_bgen<<endl; return;}
+
+
+ clock_t time_start=clock();
+ double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
+ double logl_H1=0.0;
+ int n_miss, c_phen;
+ double geno, x_mean;
+
+ //Calculate basic quantities
+ size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
+
+ gsl_vector *x=gsl_vector_alloc (U->size1);
+ gsl_vector *x_miss=gsl_vector_alloc (U->size1);
+ gsl_vector *Utx=gsl_vector_alloc (U->size2);
+ gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
+ gsl_vector *ab=gsl_vector_alloc (n_index);
+
+ gsl_matrix_set_zero (Uab);
+ CalcUab (UtW, Uty, Uab);
+// if (e_mode!=0) {
+// gsl_vector_set_zero (ab);
+// Calcab (W, y, ab);
+// }
+
+ // read in header
+ uint32_t bgen_snp_block_offset;
+ uint32_t bgen_header_length;
+ uint32_t bgen_nsamples;
+ uint32_t bgen_nsnps;
+ uint32_t bgen_flags;
+ infile.read(reinterpret_cast<char*>(&bgen_snp_block_offset),4);
+ infile.read(reinterpret_cast<char*>(&bgen_header_length),4);
+ bgen_snp_block_offset-=4;
+ infile.read(reinterpret_cast<char*>(&bgen_nsnps),4);
+ bgen_snp_block_offset-=4;
+ infile.read(reinterpret_cast<char*>(&bgen_nsamples),4);
+ bgen_snp_block_offset-=4;
+ infile.ignore(4+bgen_header_length-20);
+ bgen_snp_block_offset-=4+bgen_header_length-20;
+ infile.read(reinterpret_cast<char*>(&bgen_flags),4);
+ bgen_snp_block_offset-=4;
+ bool CompressedSNPBlocks=bgen_flags&0x1;
+// bool LongIds=bgen_flags&0x4;
+
+ infile.ignore(bgen_snp_block_offset);
+
+ double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB, bgen_geno_prob_non_miss;
+
+ uint32_t bgen_N;
+ uint16_t bgen_LS;
+ uint16_t bgen_LR;
+ uint16_t bgen_LC;
+ uint32_t bgen_SNP_pos;
+ uint32_t bgen_LA;
+ std::string bgen_A_allele;
+ uint32_t bgen_LB;
+ std::string bgen_B_allele;
+ uint32_t bgen_P;
+ size_t unzipped_data_size;
+ string id;
+ string rs;
+ string chr;
+ std::cout<<"Warning: WJA hard coded SNP missingness threshold of 10%"<<std::endl;
+
+
+
+ //start reading genotypes and analyze
+ for (size_t t=0; t<indicator_snp.size(); ++t)
+ {
+
+// if (t>1) {break;}
+ if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs ", t, ns_total-1);}
+ // read SNP header
+ id.clear();
+ rs.clear();
+ chr.clear();
+ bgen_A_allele.clear();
+ bgen_B_allele.clear();
-void MatrixCalcLR (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty, const gsl_vector *K_eval, const double l_min, const double l_max, const size_t n_region, vector<pair<size_t, double> > &pos_loglr)
+ infile.read(reinterpret_cast<char*>(&bgen_N),4);
+ infile.read(reinterpret_cast<char*>(&bgen_LS),2);
+
+ id.resize(bgen_LS);
+ infile.read(&id[0], bgen_LS);
+
+ infile.read(reinterpret_cast<char*>(&bgen_LR),2);
+ rs.resize(bgen_LR);
+ infile.read(&rs[0], bgen_LR);
+
+ infile.read(reinterpret_cast<char*>(&bgen_LC),2);
+ chr.resize(bgen_LC);
+ infile.read(&chr[0], bgen_LC);
+
+ infile.read(reinterpret_cast<char*>(&bgen_SNP_pos),4);
+
+ infile.read(reinterpret_cast<char*>(&bgen_LA),4);
+ bgen_A_allele.resize(bgen_LA);
+ infile.read(&bgen_A_allele[0], bgen_LA);
+
+
+ infile.read(reinterpret_cast<char*>(&bgen_LB),4);
+ bgen_B_allele.resize(bgen_LB);
+ infile.read(&bgen_B_allele[0], bgen_LB);
+
+
+
+
+ uint16_t unzipped_data[3*bgen_N];
+
+ if (indicator_snp[t]==0) {
+ if(CompressedSNPBlocks)
+ infile.read(reinterpret_cast<char*>(&bgen_P),4);
+ else
+ bgen_P=6*bgen_N;
+
+ infile.ignore(static_cast<size_t>(bgen_P));
+
+ continue;
+ }
+
+
+ if(CompressedSNPBlocks)
+ {
+
+
+ infile.read(reinterpret_cast<char*>(&bgen_P),4);
+ uint8_t zipped_data[bgen_P];
+
+ unzipped_data_size=6*bgen_N;
+
+ infile.read(reinterpret_cast<char*>(zipped_data),bgen_P);
+
+ int result=uncompress(reinterpret_cast<Bytef*>(unzipped_data), reinterpret_cast<uLongf*>(&unzipped_data_size), reinterpret_cast<Bytef*>(zipped_data), static_cast<uLong> (bgen_P));
+ assert(result == Z_OK);
+
+ }
+ else
+ {
+
+ bgen_P=6*bgen_N;
+ infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P);
+ }
+
+ x_mean=0.0; c_phen=0; n_miss=0;
+ gsl_vector_set_zero(x_miss);
+ for (size_t i=0; i<bgen_N; ++i) {
+ if (indicator_idv[i]==0) {continue;}
+
+
+ bgen_geno_prob_AA=static_cast<double>(unzipped_data[i*3])/32768.0;
+ bgen_geno_prob_AB=static_cast<double>(unzipped_data[i*3+1])/32768.0;
+ bgen_geno_prob_BB=static_cast<double>(unzipped_data[i*3+2])/32768.0;
+ // WJA
+ bgen_geno_prob_non_miss=bgen_geno_prob_AA+bgen_geno_prob_AB+bgen_geno_prob_BB;
+ if (bgen_geno_prob_non_miss<0.9) {gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;}
+ else {
+
+ bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
+ bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
+ bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
+
+ geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
+
+ gsl_vector_set(x, c_phen, geno);
+ gsl_vector_set(x_miss, c_phen, 1.0);
+ x_mean+=geno;
+ }
+ c_phen++;
+ }
+
+ x_mean/=static_cast<double>(ni_test-n_miss);
+
+ for (size_t i=0; i<ni_test; ++i) {
+ if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
+ geno=gsl_vector_get(x, i);
+ if (x_mean>1) {
+ gsl_vector_set(x, i, 2-geno);
+ }
+ }
+
+
+ //calculate statistics
+ time_start=clock();
+ gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx);
+ time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ CalcUab(UtW, Uty, Utx, Uab);
+// if (e_mode!=0) {
+// Calcab (W, y, x, ab);
+// }
+
+ time_start=clock();
+ FUNC_PARAM param1={false, ni_test, n_cvt, eval, Uab, ab, 0};
+
+ //3 is before 1
+ if (a_mode==3 || a_mode==4) {
+ CalcRLScore (l_mle_null, param1, beta, se, p_score);
+ }
+
+ if (a_mode==1 || a_mode==4) {
+ CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
+ CalcRLWald (lambda_remle, param1, beta, se, p_wald);
+ }
+
+ if (a_mode==2 || a_mode==4) {
+ CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1);
+ }
+
+ if (x_mean>1) {beta*=-1;}
+
+ time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ //store summary data
+ SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+ sumStat.push_back(SNPs);
+ }
+ cout<<endl;
+
+ gsl_vector_free (x);
+ gsl_vector_free (x_miss);
+ gsl_vector_free (Utx);
+ gsl_matrix_free (Uab);
+ gsl_vector_free (ab);
+
+ infile.close();
+ infile.clear();
+
+ return;
+
+}
+
+
+
+void MatrixCalcLR (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty, const gsl_vector *K_eval, const double l_min, const double l_max, const size_t n_region, vector<pair<size_t, double> > &pos_loglr)
{
double logl_H0, logl_H1, log_lr, lambda0, lambda1;
-
+
gsl_vector *w=gsl_vector_alloc (Uty->size);
- gsl_matrix *Utw=gsl_matrix_alloc (Uty->size, 1);
+ gsl_matrix *Utw=gsl_matrix_alloc (Uty->size, 1);
gsl_matrix *Uab=gsl_matrix_alloc (Uty->size, 6);
- gsl_vector *ab=gsl_vector_alloc (6);
-
+ gsl_vector *ab=gsl_vector_alloc (6);
+
gsl_vector_set_zero(ab);
gsl_vector_set_all (w, 1.0);
- gsl_vector_view Utw_col=gsl_matrix_column (Utw, 0);
- gsl_blas_dgemv (CblasTrans, 1.0, U, w, 0.0, &Utw_col.vector);
-
- CalcUab (Utw, Uty, Uab) ;
- FUNC_PARAM param0={true, Uty->size, 1, K_eval, Uab, ab, 0};
-
+ gsl_vector_view Utw_col=gsl_matrix_column (Utw, 0);
+ gsl_blas_dgemv (CblasTrans, 1.0, U, w, 0.0, &Utw_col.vector);
+
+ CalcUab (Utw, Uty, Uab) ;
+ FUNC_PARAM param0={true, Uty->size, 1, K_eval, Uab, ab, 0};
+
CalcLambda('L', param0, l_min, l_max, n_region, lambda0, logl_H0);
-
+
for (size_t i=0; i<UtX->size2; ++i) {
gsl_vector_const_view UtX_col=gsl_matrix_const_column (UtX, i);
CalcUab(Utw, Uty, &UtX_col.vector, Uab);
FUNC_PARAM param1={false, UtX->size1, 1, K_eval, Uab, ab, 0};
-
+
CalcLambda ('L', param1, l_min, l_max, n_region, lambda1, logl_H1);
- log_lr=logl_H1-logl_H0;
-
+ log_lr=logl_H1-logl_H0;
+
pos_loglr.push_back(make_pair(i,log_lr) );
}
-
+
gsl_vector_free (w);
gsl_matrix_free (Utw);
gsl_matrix_free (Uab);
gsl_vector_free (ab);
-
+
return;
}
@@ -1506,17 +1748,17 @@ void MatrixCalcLR (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector
void CalcLambda (const char func_name, FUNC_PARAM &params, const double l_min, const double l_max, const size_t n_region, double &lambda, double &logf)
{
if (func_name!='R' && func_name!='L' && func_name!='r' && func_name!='l') {cout<<"func_name only takes 'R' or 'L': 'R' for log-restricted likelihood, 'L' for log-likelihood."<<endl; return;}
-
+
vector<pair<double, double> > lambda_lh;
-
+
//evaluate first order derivates in different intervals
double lambda_l, lambda_h, lambda_interval=log(l_max/l_min)/(double)n_region;
double dev1_l, dev1_h, logf_l, logf_h;
-
+
for (size_t i=0; i<n_region; ++i) {
lambda_l=l_min*exp(lambda_interval*i);
lambda_h=l_min*exp(lambda_interval*(i+1.0));
-
+
if (func_name=='R' || func_name=='r') {
dev1_l=LogRL_dev1 (lambda_l, &params);
dev1_h=LogRL_dev1 (lambda_h, &params);
@@ -1525,12 +1767,12 @@ void CalcLambda (const char func_name, FUNC_PARAM &params, const double l_min, c
dev1_l=LogL_dev1 (lambda_l, &params);
dev1_h=LogL_dev1 (lambda_h, &params);
}
-
+
if (dev1_l*dev1_h<=0) {
lambda_lh.push_back(make_pair(lambda_l, lambda_h));
}
}
-
+
//if derivates do not change signs in any interval
if (lambda_lh.empty()) {
if (func_name=='R' || func_name=='r') {
@@ -1541,21 +1783,21 @@ void CalcLambda (const char func_name, FUNC_PARAM &params, const double l_min, c
logf_l=LogL_f (l_min, &params);
logf_h=LogL_f (l_max, &params);
}
-
+
if (logf_l>=logf_h) {lambda=l_min; logf=logf_l;} else {lambda=l_max; logf=logf_h;}
}
else {
//if derivates change signs
int status;
int iter=0, max_iter=100;
- double l, l_temp;
-
+ double l, l_temp;
+
gsl_function F;
gsl_function_fdf FDF;
-
+
F.params=&params;
FDF.params=&params;
-
+
if (func_name=='R' || func_name=='r') {
F.function=&LogRL_dev1;
FDF.f=&LogRL_dev1;
@@ -1568,57 +1810,57 @@ void CalcLambda (const char func_name, FUNC_PARAM &params, const double l_min, c
FDF.df=&LogL_dev2;
FDF.fdf=&LogL_dev12;
}
-
+
const gsl_root_fsolver_type *T_f;
gsl_root_fsolver *s_f;
T_f=gsl_root_fsolver_brent;
s_f=gsl_root_fsolver_alloc (T_f);
-
+
const gsl_root_fdfsolver_type *T_fdf;
gsl_root_fdfsolver *s_fdf;
T_fdf=gsl_root_fdfsolver_newton;
- s_fdf=gsl_root_fdfsolver_alloc(T_fdf);
-
+ s_fdf=gsl_root_fdfsolver_alloc(T_fdf);
+
for (vector<double>::size_type i=0; i<lambda_lh.size(); ++i) {
lambda_l=lambda_lh[i].first; lambda_h=lambda_lh[i].second;
-
+
gsl_root_fsolver_set (s_f, &F, lambda_l, lambda_h);
-
+
do {
iter++;
status=gsl_root_fsolver_iterate (s_f);
l=gsl_root_fsolver_root (s_f);
lambda_l=gsl_root_fsolver_x_lower (s_f);
lambda_h=gsl_root_fsolver_x_upper (s_f);
- status=gsl_root_test_interval (lambda_l, lambda_h, 0, 1e-1);
+ status=gsl_root_test_interval (lambda_l, lambda_h, 0, 1e-1);
}
- while (status==GSL_CONTINUE && iter<max_iter);
-
+ while (status==GSL_CONTINUE && iter<max_iter);
+
iter=0;
-
- gsl_root_fdfsolver_set (s_fdf, &FDF, l);
-
+
+ gsl_root_fdfsolver_set (s_fdf, &FDF, l);
+
do {
iter++;
status=gsl_root_fdfsolver_iterate (s_fdf);
l_temp=l;
l=gsl_root_fdfsolver_root (s_fdf);
- status=gsl_root_test_delta (l, l_temp, 0, 1e-5);
+ status=gsl_root_test_delta (l, l_temp, 0, 1e-5);
}
- while (status==GSL_CONTINUE && iter<max_iter && l>l_min && l<l_max);
-
+ while (status==GSL_CONTINUE && iter<max_iter && l>l_min && l<l_max);
+
l=l_temp;
if (l<l_min) {l=l_min;}
if (l>l_max) {l=l_max;}
- if (func_name=='R' || func_name=='r') {logf_l=LogRL_f (l, &params);} else {logf_l=LogL_f (l, &params);}
-
+ if (func_name=='R' || func_name=='r') {logf_l=LogRL_f (l, &params);} else {logf_l=LogL_f (l, &params);}
+
if (i==0) {logf=logf_l; lambda=l;}
else if (logf<logf_l) {logf=logf_l; lambda=l;}
else {}
}
- gsl_root_fsolver_free (s_f);
- gsl_root_fdfsolver_free (s_fdf);
-
+ gsl_root_fsolver_free (s_f);
+ gsl_root_fdfsolver_free (s_fdf);
+
if (func_name=='R' || func_name=='r') {
logf_l=LogRL_f (l_min, &params);
logf_h=LogRL_f (l_max, &params);
@@ -1627,11 +1869,11 @@ void CalcLambda (const char func_name, FUNC_PARAM &params, const double l_min, c
logf_l=LogL_f (l_min, &params);
logf_h=LogL_f (l_max, &params);
}
-
- if (logf_l>logf) {lambda=l_min; logf=logf_l;}
+
+ if (logf_l>logf) {lambda=l_min; logf=logf_l;}
if (logf_h>logf) {lambda=l_max; logf=logf_h;}
}
-
+
return;
}
@@ -1646,53 +1888,53 @@ void CalcLambda (const char func_name, const gsl_vector *eval, const gsl_matrix
size_t n_cvt=UtW->size2, ni_test=UtW->size1;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
- gsl_matrix *Uab=gsl_matrix_alloc (ni_test, n_index);
- gsl_vector *ab=gsl_vector_alloc (n_index);
-
+
+ gsl_matrix *Uab=gsl_matrix_alloc (ni_test, n_index);
+ gsl_vector *ab=gsl_vector_alloc (n_index);
+
gsl_matrix_set_zero (Uab);
CalcUab (UtW, Uty, Uab);
// if (e_mode!=0) {
// gsl_vector_set_zero (ab);
// Calcab (W, y, ab);
// }
-
+
FUNC_PARAM param0={true, ni_test, n_cvt, eval, Uab, ab, 0};
-
+
CalcLambda(func_name, param0, l_min, l_max, n_region, lambda, logl_H0);
-
- gsl_matrix_free(Uab);
- gsl_vector_free(ab);
-
+
+ gsl_matrix_free(Uab);
+ gsl_vector_free(ab);
+
return;
}
-
-
+
+
//obtain REMLE estimate for PVE using lambda_remle
void CalcPve (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const double lambda, const double trace_G, double &pve, double &pve_se)
{
size_t n_cvt=UtW->size2, ni_test=UtW->size1;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
- gsl_matrix *Uab=gsl_matrix_alloc (ni_test, n_index);
- gsl_vector *ab=gsl_vector_alloc (n_index);
-
+
+ gsl_matrix *Uab=gsl_matrix_alloc (ni_test, n_index);
+ gsl_vector *ab=gsl_vector_alloc (n_index);
+
gsl_matrix_set_zero (Uab);
CalcUab (UtW, Uty, Uab);
// if (e_mode!=0) {
// gsl_vector_set_zero (ab);
// Calcab (W, y, ab);
// }
-
+
FUNC_PARAM param0={true, ni_test, n_cvt, eval, Uab, ab, 0};
-
+
double se=sqrt(-1.0/LogRL_dev2 (lambda, &param0));
-
+
pve=trace_G*lambda/(trace_G*lambda+1.0);
pve_se=trace_G/((trace_G*lambda+1.0)*(trace_G*lambda+1.0))*se;
-
+
gsl_matrix_free (Uab);
- gsl_vector_free (ab);
+ gsl_vector_free (ab);
return;
}
@@ -1703,9 +1945,9 @@ void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_v
{
size_t n_cvt=UtW->size2, ni_test=UtW->size1;
size_t n_index=(n_cvt+2+1)*(n_cvt+2)/2;
-
- gsl_matrix *Uab=gsl_matrix_alloc (ni_test, n_index);
- gsl_vector *ab=gsl_vector_alloc (n_index);
+
+ gsl_matrix *Uab=gsl_matrix_alloc (ni_test, n_index);
+ gsl_vector *ab=gsl_vector_alloc (n_index);
gsl_matrix *Pab=gsl_matrix_alloc (n_cvt+2, n_index);
gsl_vector *Hi_eval=gsl_vector_alloc(eval->size);
gsl_vector *v_temp=gsl_vector_alloc(eval->size);
@@ -1713,16 +1955,16 @@ void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_v
gsl_matrix *WHiW=gsl_matrix_alloc(UtW->size2, UtW->size2);
gsl_vector *WHiy=gsl_vector_alloc(UtW->size2);
gsl_matrix *Vbeta=gsl_matrix_alloc(UtW->size2, UtW->size2);
-
+
gsl_matrix_set_zero (Uab);
- CalcUab (UtW, Uty, Uab);
-
+ CalcUab (UtW, Uty, Uab);
+
gsl_vector_memcpy (v_temp, eval);
gsl_vector_scale (v_temp, lambda);
gsl_vector_set_all (Hi_eval, 1.0);
gsl_vector_add_constant (v_temp, 1.0);
gsl_vector_div (Hi_eval, v_temp);
-
+
//calculate beta
gsl_matrix_memcpy (HiW, UtW);
for (size_t i=0; i<UtW->size2; i++) {
@@ -1731,30 +1973,30 @@ void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_v
}
gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, HiW, UtW, 0.0, WHiW);
gsl_blas_dgemv (CblasTrans, 1.0, HiW, Uty, 0.0, WHiy);
-
+
int sig;
gsl_permutation * pmt=gsl_permutation_alloc (UtW->size2);
LUDecomp (WHiW, pmt, &sig);
LUSolve (WHiW, pmt, WHiy, beta);
LUInvert (WHiW, pmt, Vbeta);
-
+
//calculate vg and ve
- CalcPab (n_cvt, 0, Hi_eval, Uab, ab, Pab);
-
- size_t index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
- double P_yy=gsl_matrix_get (Pab, n_cvt, index_yy);
-
+ CalcPab (n_cvt, 0, Hi_eval, Uab, ab, Pab);
+
+ size_t index_yy=GetabIndex (n_cvt+2, n_cvt+2, n_cvt);
+ double P_yy=gsl_matrix_get (Pab, n_cvt, index_yy);
+
ve=P_yy/(double)(ni_test-n_cvt);
vg=ve*lambda;
-
+
//with ve, calculate se(beta)
gsl_matrix_scale(Vbeta, ve);
-
+
//obtain se_beta
for (size_t i=0; i<Vbeta->size1; i++) {
gsl_vector_set (se_beta, i, sqrt(gsl_matrix_get(Vbeta, i, i) ) );
}
-
+
gsl_matrix_free(Uab);
gsl_matrix_free(Pab);
gsl_vector_free(ab);
@@ -1764,8 +2006,309 @@ void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_v
gsl_matrix_free(WHiW);
gsl_vector_free(WHiy);
gsl_matrix_free(Vbeta);
-
+
gsl_permutation_free(pmt);
return;
}
+
+
+
+
+
+
+void LMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y, const gsl_vector *env)
+{
+ igzstream infile (file_geno.c_str(), igzstream::in);
+// ifstream infile (file_geno.c_str(), ifstream::in);
+ if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return;}
+
+ clock_t time_start=clock();
+
+ string line;
+ char *ch_ptr;
+
+ double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
+ double logl_H1=0.0, logl_H0=0.0;
+ int n_miss, c_phen;
+ double geno, x_mean;
+
+ //Calculate basic quantities
+ size_t n_index=(n_cvt+2+2+1)*(n_cvt+2+2)/2;
+
+ gsl_vector *x=gsl_vector_alloc (U->size1);
+ gsl_vector *x_miss=gsl_vector_alloc (U->size1);
+ gsl_vector *Utx=gsl_vector_alloc (U->size2);
+ gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
+ gsl_vector *ab=gsl_vector_alloc (n_index);
+
+ gsl_matrix *UtW_expand=gsl_matrix_alloc (U->size1, UtW->size2+2);
+ gsl_matrix_view UtW_expand_mat=gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2);
+ gsl_matrix_memcpy (&UtW_expand_mat.matrix, UtW);
+ gsl_vector_view UtW_expand_env=gsl_matrix_column(UtW_expand, UtW->size2);
+ gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector);
+ gsl_vector_view UtW_expand_x=gsl_matrix_column(UtW_expand, UtW->size2+1);
+
+ //gsl_matrix_set_zero (Uab);
+ // CalcUab (UtW, Uty, Uab);
+// if (e_mode!=0) {
+// gsl_vector_set_zero (ab);
+// Calcab (W, y, ab);
+// }
+
+ //start reading genotypes and analyze
+ for (size_t t=0; t<indicator_snp.size(); ++t) {
+// if (t>1) {break;}
+ !safeGetline(infile, line).eof();
+ if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs ", t, ns_total-1);}
+ if (indicator_snp[t]==0) {continue;}
+
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+ ch_ptr=strtok (NULL, " , \t");
+ ch_ptr=strtok (NULL, " , \t");
+
+ x_mean=0.0; c_phen=0; n_miss=0;
+ gsl_vector_set_zero(x_miss);
+ for (size_t i=0; i<ni_total; ++i) {
+ ch_ptr=strtok (NULL, " , \t");
+ if (indicator_idv[i]==0) {continue;}
+
+ if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;}
+ else {
+ geno=atof(ch_ptr);
+
+ gsl_vector_set(x, c_phen, geno);
+ gsl_vector_set(x_miss, c_phen, 1.0);
+ x_mean+=geno;
+ }
+ c_phen++;
+ }
+
+ x_mean/=(double)(ni_test-n_miss);
+
+ for (size_t i=0; i<ni_test; ++i) {
+ if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
+ geno=gsl_vector_get(x, i);
+ if (x_mean>1) {
+ gsl_vector_set(x, i, 2-geno);
+ }
+ }
+
+
+ //calculate statistics
+ time_start=clock();
+ gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &UtW_expand_x.vector);
+ gsl_vector_mul (x, env);
+ gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx);
+ time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ gsl_matrix_set_zero (Uab);
+ CalcUab (UtW_expand, Uty, Uab);
+
+ if (a_mode==2 || a_mode==4) {
+ FUNC_PARAM param0={true, ni_test, n_cvt+2, eval, Uab, ab, 0};
+ CalcLambda ('L', param0, l_min, l_max, n_region, lambda_mle, logl_H0);
+ }
+
+ CalcUab(UtW_expand, Uty, Utx, Uab);
+// if (e_mode!=0) {
+// Calcab (W, y, x, ab);
+// }
+
+ time_start=clock();
+ FUNC_PARAM param1={false, ni_test, n_cvt+2, eval, Uab, ab, 0};
+
+ //3 is before 1
+ if (a_mode==3 || a_mode==4) {
+ CalcRLScore (l_mle_null, param1, beta, se, p_score);
+ }
+
+ if (a_mode==1 || a_mode==4) {
+ CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
+ CalcRLWald (lambda_remle, param1, beta, se, p_wald);
+ }
+
+ if (a_mode==2 || a_mode==4) {
+ CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1);
+ }
+
+ if (x_mean>1) {beta*=-1;}
+
+ time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ //store summary data
+ SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+ sumStat.push_back(SNPs);
+ }
+ cout<<endl;
+
+ gsl_vector_free (x);
+ gsl_vector_free (x_miss);
+ gsl_vector_free (Utx);
+ gsl_matrix_free (Uab);
+ gsl_vector_free (ab);
+
+ gsl_matrix_free (UtW_expand);
+
+ infile.close();
+ infile.clear();
+
+ return;
+}
+
+
+
+
+
+
+
+void LMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y, const gsl_vector *env)
+{
+ string file_bed=file_bfile+".bed";
+ ifstream infile (file_bed.c_str(), ios::binary);
+ if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;}
+
+ clock_t time_start=clock();
+
+ char ch[1];
+ bitset<8> b;
+
+ double lambda_mle=0, lambda_remle=0, beta=0, se=0, p_wald=0, p_lrt=0, p_score=0;
+ double logl_H1=0.0, logl_H0=0.0;
+ int n_bit, n_miss, ci_total, ci_test;
+ double geno, x_mean;
+
+ //Calculate basic quantities
+ size_t n_index=(n_cvt+2+2+1)*(n_cvt+2+2)/2;
+
+ gsl_vector *x=gsl_vector_alloc (U->size1);
+ gsl_vector *Utx=gsl_vector_alloc (U->size2);
+ gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
+ gsl_vector *ab=gsl_vector_alloc (n_index);
+
+ gsl_matrix *UtW_expand=gsl_matrix_alloc (U->size1, UtW->size2+2);
+ gsl_matrix_view UtW_expand_mat=gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2);
+ gsl_matrix_memcpy (&UtW_expand_mat.matrix, UtW);
+ gsl_vector_view UtW_expand_env=gsl_matrix_column(UtW_expand, UtW->size2);
+ gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector);
+ gsl_vector_view UtW_expand_x=gsl_matrix_column(UtW_expand, UtW->size2+1);
+
+ //gsl_matrix_set_zero (Uab);
+ //CalcUab (UtW, Uty, Uab);
+// if (e_mode!=0) {
+// gsl_vector_set_zero (ab);
+// Calcab (W, y, ab);
+// }
+
+ //calculate n_bit and c, the number of bit for each snp
+ if (ni_total%4==0) {n_bit=ni_total/4;}
+ else {n_bit=ni_total/4+1; }
+
+ //print the first three majic numbers
+ for (int i=0; i<3; ++i) {
+ infile.read(ch,1);
+ b=ch[0];
+ }
+
+
+ for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
+ if (t%d_pace==0 || t==snpInfo.size()-1) {ProgressBar ("Reading SNPs ", t, snpInfo.size()-1);}
+ if (indicator_snp[t]==0) {continue;}
+
+ infile.seekg(t*n_bit+3); //n_bit, and 3 is the number of magic numbers
+
+ //read genotypes
+ x_mean=0.0; n_miss=0; ci_total=0; ci_test=0;
+ for (int i=0; i<n_bit; ++i) {
+ infile.read(ch,1);
+ b=ch[0];
+ for (size_t j=0; j<4; ++j) { //minor allele homozygous: 2.0; major: 0.0;
+ if ((i==(n_bit-1)) && ci_total==(int)ni_total) {break;}
+ if (indicator_idv[ci_total]==0) {ci_total++; continue;}
+
+ if (b[2*j]==0) {
+ if (b[2*j+1]==0) {gsl_vector_set(x, ci_test, 2); x_mean+=2.0; }
+ else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
+ }
+ else {
+ if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
+ else {gsl_vector_set(x, ci_test, -9); n_miss++; }
+ }
+
+ ci_total++;
+ ci_test++;
+ }
+ }
+
+ x_mean/=(double)(ni_test-n_miss);
+
+ for (size_t i=0; i<ni_test; ++i) {
+ geno=gsl_vector_get(x,i);
+ if (geno==-9) {gsl_vector_set(x, i, x_mean); geno=x_mean;}
+ if (x_mean>1) {
+ gsl_vector_set(x, i, 2-geno);
+ }
+ }
+
+ //calculate statistics
+ time_start=clock();
+ gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &UtW_expand_x.vector);
+ gsl_vector_mul (x, env);
+ gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx);
+ time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ gsl_matrix_set_zero (Uab);
+ CalcUab (UtW_expand, Uty, Uab);
+
+ if (a_mode==2 || a_mode==4) {
+ FUNC_PARAM param0={true, ni_test, n_cvt+2, eval, Uab, ab, 0};
+ CalcLambda ('L', param0, l_min, l_max, n_region, lambda_mle, logl_H0);
+ }
+
+ CalcUab(UtW_expand, Uty, Utx, Uab);
+
+// if (e_mode!=0) {
+// Calcab (W, y, x, ab);
+// }
+
+ time_start=clock();
+ FUNC_PARAM param1={false, ni_test, n_cvt+2, eval, Uab, ab, 0};
+
+ //3 is before 1, for beta
+ if (a_mode==3 || a_mode==4) {
+ CalcRLScore (l_mle_null, param1, beta, se, p_score);
+ }
+
+ if (a_mode==1 || a_mode==4) {
+ CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
+ CalcRLWald (lambda_remle, param1, beta, se, p_wald);
+ }
+
+ if (a_mode==2 || a_mode==4) {
+ CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), 1);
+ }
+
+ if (x_mean>1) {beta*=-1;}
+
+ time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ //store summary data
+ SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+ sumStat.push_back(SNPs);
+ }
+ cout<<endl;
+
+ gsl_vector_free (x);
+ gsl_vector_free (Utx);
+ gsl_matrix_free (Uab);
+ gsl_vector_free (ab);
+
+ gsl_matrix_free (UtW_expand);
+
+ infile.close();
+ infile.clear();
+
+ return;
+}
diff --git a/src/lmm.h b/src/lmm.h
index 45f9b72..efbda4d 100644
--- a/src/lmm.h
+++ b/src/lmm.h
@@ -16,7 +16,7 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#ifndef __LMM_H__
+#ifndef __LMM_H__
#define __LMM_H__
#include "gsl/gsl_vector.h"
@@ -57,21 +57,23 @@ public:
// IO related parameters
int a_mode; //analysis mode, 1/2/3/4 for Frequentist tests
size_t d_pace; //display pace
-
+
string file_bfile;
string file_geno;
string file_out;
string path_out;
-
+
string file_gene;
-
+ // WJA added
+ string file_oxford;
+
// LMM related parameters
double l_min;
double l_max;
size_t n_region;
double l_mle_null;
- double logl_mle_H0;
-
+ double logl_mle_H0;
+
// Summary statistics
size_t ni_total, ni_test; //number of individuals
size_t ns_total, ns_test; //number of snps
@@ -79,25 +81,29 @@ public:
size_t n_cvt;
double time_UtX; //time spent on optimization iterations
double time_opt; //time spent on optimization iterations
-
+
vector<int> indicator_idv; //indicator for individuals (phenotypes), 0 missing, 1 available for analysis
vector<int> indicator_snp; //sequence indicator for SNPs: 0 ignored because of (a) maf, (b) miss, (c) non-poly; 1 available for analysis
-
+
vector<SNPINFO> snpInfo; //record SNP information
-
+
// Not included in PARAM
vector<SUMSTAT> sumStat; //Output SNPSummary Data
-
+
// Main functions
void CopyFromParam (PARAM &cPar);
void CopyToParam (PARAM &cPar);
void AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Utx, const gsl_matrix *W, const gsl_vector *x);
void AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y);
+ // WJA added
+ void Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y);
void AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y);
+ void AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y, const gsl_vector *env);
+ void AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y, const gsl_vector *env);
void WriteFiles ();
-
+
void CalcRLWald (const double &lambda, const FUNC_PARAM &params, double &beta, double &se, double &p_wald);
- void CalcRLScore (const double &l, const FUNC_PARAM &params, double &beta, double &se, double &p_score);
+ void CalcRLScore (const double &l, const FUNC_PARAM &params, double &beta, double &se, double &p_score);
};
void MatrixCalcLR (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty, const gsl_vector *K_eval, const double l_min, const double l_max, const size_t n_region, vector<pair<size_t, double> > &pos_loglr);
diff --git a/src/mvlmm.cpp b/src/mvlmm.cpp
index 4b910ee..5826a1f 100644
--- a/src/mvlmm.cpp
+++ b/src/mvlmm.cpp
@@ -1,17 +1,17 @@
/*
Genome-wide Efficient Mixed Model Association (GEMMA)
Copyright (C) 2011 Xiang Zhou
-
+
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
-
+
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
+
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
@@ -26,7 +26,7 @@
#include <cmath>
#include <iostream>
#include <stdio.h>
-#include <stdlib.h>
+#include <stdlib.h>
#include <bitset>
#include <cstring>
@@ -60,16 +60,17 @@ using namespace std;
//in this file, X, Y are already transformed (i.e. UtX and UtY)
-void MVLMM::CopyFromParam (PARAM &cPar)
+void MVLMM::CopyFromParam (PARAM &cPar)
{
a_mode=cPar.a_mode;
d_pace=cPar.d_pace;
-
+
file_bfile=cPar.file_bfile;
file_geno=cPar.file_geno;
+ file_oxford=cPar.file_oxford;
file_out=cPar.file_out;
path_out=cPar.path_out;
-
+
l_min=cPar.l_min;
l_max=cPar.l_max;
n_region=cPar.n_region;
@@ -79,68 +80,68 @@ void MVLMM::CopyFromParam (PARAM &cPar)
em_prec=cPar.em_prec;
nr_prec=cPar.nr_prec;
crt=cPar.crt;
-
+
Vg_remle_null=cPar.Vg_remle_null;
Ve_remle_null=cPar.Ve_remle_null;
Vg_mle_null=cPar.Vg_mle_null;
Ve_mle_null=cPar.Ve_mle_null;
-
+
time_UtX=0.0;
time_opt=0.0;
-
+
ni_total=cPar.ni_total;
ns_total=cPar.ns_total;
ni_test=cPar.ni_test;
ns_test=cPar.ns_test;
n_cvt=cPar.n_cvt;
-
+
n_ph=cPar.n_ph;
-
- indicator_idv=cPar.indicator_idv;
+
+ indicator_idv=cPar.indicator_idv;
indicator_snp=cPar.indicator_snp;
snpInfo=cPar.snpInfo;
-
+
return;
}
-void MVLMM::CopyToParam (PARAM &cPar)
+void MVLMM::CopyToParam (PARAM &cPar)
{
cPar.time_UtX=time_UtX;
- cPar.time_opt=time_opt;
-
+ cPar.time_opt=time_opt;
+
cPar.Vg_remle_null=Vg_remle_null;
cPar.Ve_remle_null=Ve_remle_null;
cPar.Vg_mle_null=Vg_mle_null;
cPar.Ve_mle_null=Ve_mle_null;
-
+
cPar.VVg_remle_null=VVg_remle_null;
cPar.VVe_remle_null=VVe_remle_null;
cPar.VVg_mle_null=VVg_mle_null;
cPar.VVe_mle_null=VVe_mle_null;
-
+
cPar.beta_remle_null=beta_remle_null;
cPar.se_beta_remle_null=se_beta_remle_null;
cPar.beta_mle_null=beta_mle_null;
cPar.se_beta_mle_null=se_beta_mle_null;
-
+
cPar.logl_remle_H0=logl_remle_H0;
- cPar.logl_mle_H0=logl_mle_H0;
+ cPar.logl_mle_H0=logl_mle_H0;
return;
}
-void MVLMM::WriteFiles ()
+void MVLMM::WriteFiles ()
{
string file_str;
file_str=path_out+"/"+file_out;
file_str+=".assoc.txt";
-
+
ofstream outfile (file_str.c_str(), ofstream::out);
if (!outfile) {cout<<"error writing file: "<<file_str.c_str()<<endl; return;}
-
+
outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_miss"<<"\t"<<"allele1"<<"\t"<<"allele0"<<"\t"<<"af"<<"\t";
-
+
for (size_t i=0; i<n_ph; i++) {
outfile<<"beta_"<<i+1<<"\t";
}
@@ -149,7 +150,7 @@ void MVLMM::WriteFiles ()
outfile<<"Vbeta_"<<i+1<<"_"<<j+1<<"\t";
}
}
-
+
if (a_mode==1) {
outfile<<"p_wald"<<endl;
} else if (a_mode==2) {
@@ -159,20 +160,20 @@ void MVLMM::WriteFiles ()
} else if (a_mode==4) {
outfile<<"p_wald"<<"\t"<<"p_lrt"<<"\t"<<"p_score"<<endl;
} else {}
-
-
+
+
size_t t=0, c=0;
for (size_t i=0; i<snpInfo.size(); ++i) {
if (indicator_snp[i]==0) {continue;}
-
+
outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"<<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<"\t"<<snpInfo[i].a_minor<<"\t"<<snpInfo[i].a_major<<"\t"<<fixed<<setprecision(3)<<snpInfo[i].maf<<"\t";
-
+
outfile<<scientific<<setprecision(6);
-
+
for (size_t i=0; i<n_ph; i++) {
outfile<<sumStat[t].v_beta[i]<<"\t";
}
-
+
c=0;
for (size_t i=0; i<n_ph; i++) {
for (size_t j=i; j<n_ph; j++) {
@@ -180,7 +181,7 @@ void MVLMM::WriteFiles ()
c++;
}
}
-
+
if (a_mode==1) {
outfile<<sumStat[t].p_wald <<endl;
} else if (a_mode==2) {
@@ -190,11 +191,11 @@ void MVLMM::WriteFiles ()
} else if (a_mode==4) {
outfile<<sumStat[t].p_wald <<"\t"<<sumStat[t].p_lrt<<"\t"<<sumStat[t].p_score<<endl;
} else {}
-
+
t++;
}
-
-
+
+
outfile.close();
outfile.clear();
return;
@@ -208,24 +209,24 @@ void MVLMM::WriteFiles ()
-
+
double EigenProc (const gsl_matrix *V_g, const gsl_matrix *V_e, gsl_vector *D_l, gsl_matrix *UltVeh, gsl_matrix *UltVehi)
{
size_t d_size=V_g->size1;
- double d, logdet_Ve=0.0;
-
+ double d, logdet_Ve=0.0;
+
//eigen decomposition of V_e
gsl_matrix *Lambda=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *V_e_temp=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *V_e_h=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *V_e_hi=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *VgVehi=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *U_l=gsl_matrix_alloc (d_size, d_size);
-
+ gsl_matrix *VgVehi=gsl_matrix_alloc (d_size, d_size);
+ gsl_matrix *U_l=gsl_matrix_alloc (d_size, d_size);
+
gsl_matrix_memcpy(V_e_temp, V_e);
EigenDecomp(V_e_temp, U_l, D_l, 0);
-
+
//calculate V_e_h and V_e_hi
gsl_matrix_set_zero(V_e_h);
gsl_matrix_set_zero(V_e_hi);
@@ -233,14 +234,14 @@ double EigenProc (const gsl_matrix *V_g, const gsl_matrix *V_e, gsl_vector *D_l,
d=gsl_vector_get (D_l, i);
if (d<=0) {continue;}
logdet_Ve+=log(d);
-
+
gsl_vector_view U_col=gsl_matrix_column(U_l, i);
d=sqrt(d);
gsl_blas_dsyr (CblasUpper, d, &U_col.vector, V_e_h);
d=1.0/d;
gsl_blas_dsyr (CblasUpper, d, &U_col.vector, V_e_hi);
}
-
+
//copy the upper part to lower part
for (size_t i=0; i<d_size; i++) {
for (size_t j=0; j<i; j++) {
@@ -248,19 +249,19 @@ double EigenProc (const gsl_matrix *V_g, const gsl_matrix *V_e, gsl_vector *D_l,
gsl_matrix_set (V_e_hi, i, j, gsl_matrix_get(V_e_hi, j, i));
}
}
-
+
//calculate Lambda=V_ehi V_g V_ehi
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, V_g, V_e_hi, 0.0, VgVehi);
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, V_e_hi, VgVehi, 0.0, Lambda);
-
+
//eigen decomposition of Lambda
EigenDecomp(Lambda, U_l, D_l, 0);
-
+
for (size_t i=0; i<d_size; i++) {
d=gsl_vector_get (D_l, i);
if (d<0) {gsl_vector_set (D_l, i, 0);}
}
-
+
//calculate UltVeh and UltVehi
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, U_l, V_e_h, 0.0, UltVeh);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, U_l, V_e_hi, 0.0, UltVehi);
@@ -279,7 +280,7 @@ double EigenProc (const gsl_matrix *V_g, const gsl_matrix *V_e, gsl_vector *D_l,
}
cout<<endl;
}
-
+
cout<<"Dl: "<<endl;
for (size_t i=0; i<d_size; i++) {
cout<<gsl_vector_get (D_l, i)<<endl;
@@ -292,7 +293,7 @@ double EigenProc (const gsl_matrix *V_g, const gsl_matrix *V_e, gsl_vector *D_l,
cout<<endl;
}
*/
-
+
//free memory
gsl_matrix_free (Lambda);
gsl_matrix_free (V_e_temp);
@@ -300,54 +301,54 @@ double EigenProc (const gsl_matrix *V_g, const gsl_matrix *V_e, gsl_vector *D_l,
gsl_matrix_free (V_e_hi);
gsl_matrix_free (VgVehi);
gsl_matrix_free (U_l);
-
+
return logdet_Ve;
}
-
+
//Qi=(\sum_{k=1}^n x_kx_k^T\otimes(delta_k*Dl+I)^{-1} )^{-1}
double CalcQi (const gsl_vector *eval, const gsl_vector *D_l, const gsl_matrix *X, gsl_matrix *Qi)
{
size_t n_size=eval->size, d_size=D_l->size, dc_size=Qi->size1;
size_t c_size=dc_size/d_size;
-
+
double delta, dl, d1, d2, d, logdet_Q;
-
+
gsl_matrix *Q=gsl_matrix_alloc (dc_size, dc_size);
gsl_matrix_set_zero (Q);
-
- for (size_t i=0; i<c_size; i++) {
- for (size_t j=0; j<c_size; j++) {
+
+ for (size_t i=0; i<c_size; i++) {
+ for (size_t j=0; j<c_size; j++) {
for (size_t l=0; l<d_size; l++) {
dl=gsl_vector_get(D_l, l);
-
+
if (j<i) {
- d=gsl_matrix_get (Q, j*d_size+l, i*d_size+l);
+ d=gsl_matrix_get (Q, j*d_size+l, i*d_size+l);
} else {
d=0.0;
for (size_t k=0; k<n_size; k++) {
d1=gsl_matrix_get(X, i, k);
d2=gsl_matrix_get(X, j, k);
delta=gsl_vector_get(eval, k);
- d+=d1*d2/(dl*delta+1.0);
+ d+=d1*d2/(dl*delta+1.0);
}
}
-
+
gsl_matrix_set (Q, i*d_size+l, j*d_size+l, d);
}
}
}
-
- //calculate LU decomposition of Q, and invert Q and calculate |Q|
+
+ //calculate LU decomposition of Q, and invert Q and calculate |Q|
int sig;
gsl_permutation * pmt=gsl_permutation_alloc (dc_size);
- LUDecomp (Q, pmt, &sig);
+ LUDecomp (Q, pmt, &sig);
LUInvert (Q, pmt, Qi);
-
+
logdet_Q=LULndet (Q);
-
+
gsl_matrix_free (Q);
gsl_permutation_free (pmt);
-
+
return logdet_Q;
}
@@ -355,13 +356,13 @@ double CalcQi (const gsl_vector *eval, const gsl_vector *D_l, const gsl_matrix *
void CalcXHiY(const gsl_vector *eval, const gsl_vector *D_l, const gsl_matrix *X, const gsl_matrix *UltVehiY, gsl_vector *xHiy)
{
size_t n_size=eval->size, c_size=X->size1, d_size=D_l->size;
-
+
gsl_vector_set_zero (xHiy);
-
+
double x, delta, dl, y, d;
- for (size_t i=0; i<d_size; i++) {
+ for (size_t i=0; i<d_size; i++) {
dl=gsl_vector_get(D_l, i);
- for (size_t j=0; j<c_size; j++) {
+ for (size_t j=0; j<c_size; j++) {
d=0.0;
for (size_t k=0; k<n_size; k++) {
x=gsl_matrix_get(X, j, k);
@@ -388,20 +389,20 @@ void CalcOmega (const gsl_vector *eval, const gsl_vector *D_l, gsl_matrix *Omega
{
size_t n_size=eval->size, d_size=D_l->size;
double delta, dl, d_u, d_e;
-
+
for (size_t k=0; k<n_size; k++) {
delta=gsl_vector_get(eval, k);
for (size_t i=0; i<d_size; i++) {
dl=gsl_vector_get(D_l, i);
-
+
d_u=dl/(delta*dl+1.0);
d_e=delta*d_u;
-
+
gsl_matrix_set(OmegaU, i, k, d_u);
gsl_matrix_set(OmegaE, i, k, d_e);
}
}
-
+
return;
}
@@ -410,8 +411,8 @@ void UpdateU (const gsl_matrix *OmegaE, const gsl_matrix *UltVehiY, const gsl_ma
{
gsl_matrix_memcpy (UltVehiU, UltVehiY);
gsl_matrix_sub (UltVehiU, UltVehiBX);
-
- gsl_matrix_mul_elements (UltVehiU, OmegaE);
+
+ gsl_matrix_mul_elements (UltVehiU, OmegaE);
return;
}
@@ -421,7 +422,7 @@ void UpdateE (const gsl_matrix *UltVehiY, const gsl_matrix *UltVehiBX, const gsl
gsl_matrix_memcpy (UltVehiE, UltVehiY);
gsl_matrix_sub (UltVehiE, UltVehiBX);
gsl_matrix_sub (UltVehiE, UltVehiU);
-
+
return;
}
@@ -430,38 +431,38 @@ void UpdateE (const gsl_matrix *UltVehiY, const gsl_matrix *UltVehiBX, const gsl
void UpdateL_B (const gsl_matrix *X, const gsl_matrix *XXti, const gsl_matrix *UltVehiY, const gsl_matrix *UltVehiU, gsl_matrix *UltVehiBX, gsl_matrix *UltVehiB)
{
size_t c_size=X->size1, d_size=UltVehiY->size1;
-
+
gsl_matrix *YUX=gsl_matrix_alloc (d_size, c_size);
-
+
gsl_matrix_memcpy (UltVehiBX, UltVehiY);
gsl_matrix_sub (UltVehiBX, UltVehiU);
-
+
gsl_blas_dgemm(CblasNoTrans, CblasTrans, 1.0, UltVehiBX, X, 0.0, YUX);
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, YUX, XXti, 0.0, UltVehiB);
-
- gsl_matrix_free(YUX);
-
+
+ gsl_matrix_free(YUX);
+
return;
}
void UpdateRL_B (const gsl_vector *xHiy, const gsl_matrix *Qi, gsl_matrix *UltVehiB)
{
size_t d_size=UltVehiB->size1, c_size=UltVehiB->size2, dc_size=Qi->size1;
-
+
gsl_vector *b=gsl_vector_alloc (dc_size);
-
+
//calculate b=Qiv
gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, xHiy, 0.0, b);
-
+
//copy b to UltVehiB
for (size_t i=0; i<c_size; i++) {
gsl_vector_view UltVehiB_col=gsl_matrix_column (UltVehiB, i);
gsl_vector_const_view b_subcol=gsl_vector_const_subvector (b, i*d_size, d_size);
gsl_vector_memcpy (&UltVehiB_col.vector, &b_subcol.vector);
- }
-
+ }
+
gsl_vector_free(b);
-
+
return;
}
@@ -470,23 +471,23 @@ void UpdateRL_B (const gsl_vector *xHiy, const gsl_matrix *Qi, gsl_matrix *UltVe
void UpdateV (const gsl_vector *eval, const gsl_matrix *U, const gsl_matrix *E, const gsl_matrix *Sigma_uu, const gsl_matrix *Sigma_ee, gsl_matrix *V_g, gsl_matrix *V_e)
{
size_t n_size=eval->size, d_size=U->size1;
-
+
gsl_matrix_set_zero (V_g);
gsl_matrix_set_zero (V_e);
-
+
double delta;
-
- //calculate the first part: UD^{-1}U^T and EE^T
+
+ //calculate the first part: UD^{-1}U^T and EE^T
for (size_t k=0; k<n_size; k++) {
delta=gsl_vector_get (eval, k);
if (delta==0) {continue;}
-
- gsl_vector_const_view U_col=gsl_matrix_const_column (U, k);
+
+ gsl_vector_const_view U_col=gsl_matrix_const_column (U, k);
gsl_blas_dsyr (CblasUpper, 1.0/delta, &U_col.vector, V_g);
}
-
- gsl_blas_dsyrk(CblasUpper, CblasNoTrans, 1.0, E, 0.0, V_e);
-
+
+ gsl_blas_dsyrk(CblasUpper, CblasNoTrans, 1.0, E, 0.0, V_e);
+
//copy the upper part to lower part
for (size_t i=0; i<d_size; i++) {
for (size_t j=0; j<i; j++) {
@@ -494,84 +495,84 @@ void UpdateV (const gsl_vector *eval, const gsl_matrix *U, const gsl_matrix *E,
gsl_matrix_set (V_e, i, j, gsl_matrix_get(V_e, j, i));
}
}
-
+
//add Sigma
gsl_matrix_add (V_g, Sigma_uu);
gsl_matrix_add (V_e, Sigma_ee);
-
+
//scale by 1/n
gsl_matrix_scale (V_g, 1.0/(double)n_size);
gsl_matrix_scale (V_e, 1.0/(double)n_size);
-
+
return;
}
void CalcSigma (const char func_name, const gsl_vector *eval, const gsl_vector *D_l, const gsl_matrix *X, const gsl_matrix *OmegaU, const gsl_matrix *OmegaE, const gsl_matrix *UltVeh, const gsl_matrix *Qi, gsl_matrix *Sigma_uu, gsl_matrix *Sigma_ee)
-{
+{
if (func_name!='R' && func_name!='L' && func_name!='r' && func_name!='l') {cout<<"func_name only takes 'R' or 'L': 'R' for log-restricted likelihood, 'L' for log-likelihood."<<endl; return;}
size_t n_size=eval->size, c_size=X->size1, d_size=D_l->size, dc_size=Qi->size1;
-
+
gsl_matrix_set_zero(Sigma_uu);
gsl_matrix_set_zero(Sigma_ee);
-
- double delta, dl, x, d;
-
+
+ double delta, dl, x, d;
+
//calculate the first diagonal term
gsl_vector_view Suu_diag=gsl_matrix_diagonal (Sigma_uu);
gsl_vector_view See_diag=gsl_matrix_diagonal (Sigma_ee);
-
+
for (size_t k=0; k<n_size; k++) {
gsl_vector_const_view OmegaU_col=gsl_matrix_const_column (OmegaU, k);
gsl_vector_const_view OmegaE_col=gsl_matrix_const_column (OmegaE, k);
-
+
gsl_vector_add (&Suu_diag.vector, &OmegaU_col.vector);
gsl_vector_add (&See_diag.vector, &OmegaE_col.vector);
- }
-
+ }
+
//calculate the second term for reml
- if (func_name=='R' || func_name=='r') {
+ if (func_name=='R' || func_name=='r') {
gsl_matrix *M_u=gsl_matrix_alloc(dc_size, d_size);
gsl_matrix *M_e=gsl_matrix_alloc(dc_size, d_size);
- gsl_matrix *QiM=gsl_matrix_alloc(dc_size, d_size);
-
+ gsl_matrix *QiM=gsl_matrix_alloc(dc_size, d_size);
+
gsl_matrix_set_zero(M_u);
gsl_matrix_set_zero(M_e);
-
+
for (size_t k=0; k<n_size; k++) {
delta=gsl_vector_get(eval, k);
//if (delta==0) {continue;}
-
+
for (size_t i=0; i<d_size; i++) {
dl=gsl_vector_get(D_l, i);
- for (size_t j=0; j<c_size; j++) {
+ for (size_t j=0; j<c_size; j++) {
x=gsl_matrix_get(X, j, k);
d=x/(delta*dl+1.0);
gsl_matrix_set(M_e, j*d_size+i, i, d);
- gsl_matrix_set(M_u, j*d_size+i, i, d*dl);
+ gsl_matrix_set(M_u, j*d_size+i, i, d*dl);
}
- }
+ }
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, M_u, 0.0, QiM);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, delta, M_u, QiM, 1.0, Sigma_uu);
-
+
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, M_e, 0.0, QiM);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, M_e, QiM, 1.0, Sigma_ee);
- }
-
+ }
+
gsl_matrix_free(M_u);
gsl_matrix_free(M_e);
- gsl_matrix_free(QiM);
+ gsl_matrix_free(QiM);
}
-
+
//multiply both sides by VehUl
gsl_matrix *M=gsl_matrix_alloc (d_size, d_size);
-
+
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Sigma_uu, UltVeh, 0.0, M);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, M, 0.0, Sigma_uu);
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Sigma_ee, UltVeh, 0.0, M);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, M, 0.0, Sigma_ee);
-
+
gsl_matrix_free(M);
return;
}
@@ -580,33 +581,33 @@ void CalcSigma (const char func_name, const gsl_vector *eval, const gsl_vector *
//'R' for restricted likelihood and 'L' for likelihood
//'R' update B and 'L' don't
//only calculate -0.5*\sum_{k=1}^n|H_k|-0.5yPxy
-double MphCalcLogL (const gsl_vector *eval, const gsl_vector *xHiy, const gsl_vector *D_l, const gsl_matrix *UltVehiY, const gsl_matrix *Qi)
+double MphCalcLogL (const gsl_vector *eval, const gsl_vector *xHiy, const gsl_vector *D_l, const gsl_matrix *UltVehiY, const gsl_matrix *Qi)
{
size_t n_size=eval->size, d_size=D_l->size, dc_size=Qi->size1;
double logl=0.0, delta, dl, y, d;
-
+
//calculate yHiy+log|H_k|
- for (size_t k=0; k<n_size; k++) {
+ for (size_t k=0; k<n_size; k++) {
delta=gsl_vector_get(eval, k);
for (size_t i=0; i<d_size; i++) {
y=gsl_matrix_get(UltVehiY, i, k);
dl=gsl_vector_get(D_l, i);
d=delta*dl+1.0;
-
+
logl+=y*y/d+log(d);
}
}
-
+
//calculate the rest of yPxy
gsl_vector *Qiv=gsl_vector_alloc(dc_size);
-
+
gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, xHiy, 0.0, Qiv);
gsl_blas_ddot(xHiy, Qiv, &d);
-
+
logl-=d;
-
+
gsl_vector_free(Qiv);
-
+
return -0.5*logl;
}
@@ -619,10 +620,10 @@ double MphCalcLogL (const gsl_vector *eval, const gsl_vector *xHiy, const gsl_ve
double MphEM (const char func_name, const size_t max_iter, const double max_prec, const gsl_vector *eval, const gsl_matrix *X, const gsl_matrix *Y, gsl_matrix *U_hat, gsl_matrix *E_hat, gsl_matrix *OmegaU, gsl_matrix *OmegaE, gsl_matrix *UltVehiY, gsl_matrix *UltVehiBX, gsl_matrix *UltVehiU, gsl_matrix *UltVehiE, gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *B)
{
if (func_name!='R' && func_name!='L' && func_name!='r' && func_name!='l') {cout<<"func_name only takes 'R' or 'L': 'R' for log-restricted likelihood, 'L' for log-likelihood."<<endl; return 0.0;}
-
+
size_t n_size=eval->size, c_size=X->size1, d_size=Y->size1;
- size_t dc_size=d_size*c_size;
-
+ size_t dc_size=d_size*c_size;
+
gsl_matrix *XXt=gsl_matrix_alloc (c_size, c_size);
gsl_matrix *XXti=gsl_matrix_alloc (c_size, c_size);
gsl_vector *D_l=gsl_vector_alloc (d_size);
@@ -633,11 +634,11 @@ double MphEM (const char func_name, const size_t max_iter, const double max_prec
gsl_matrix *Sigma_uu=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *Sigma_ee=gsl_matrix_alloc (d_size, d_size);
gsl_vector *xHiy=gsl_vector_alloc (dc_size);
- gsl_permutation * pmt=gsl_permutation_alloc (c_size);
-
+ gsl_permutation * pmt=gsl_permutation_alloc (c_size);
+
double logl_const=0.0, logl_old=0.0, logl_new=0.0, logdet_Q, logdet_Ve;
int sig;
-
+
//calculate |XXt| and (XXt)^{-1}
gsl_blas_dsyrk (CblasUpper, CblasNoTrans, 1.0, X, 0.0, XXt);
for (size_t i=0; i<c_size; ++i) {
@@ -645,17 +646,17 @@ double MphEM (const char func_name, const size_t max_iter, const double max_prec
gsl_matrix_set (XXt, i, j, gsl_matrix_get (XXt, j, i));
}
}
-
+
LUDecomp (XXt, pmt, &sig);
LUInvert (XXt, pmt, XXti);
-
- //calculate the constant for logl
- if (func_name=='R' || func_name=='r') {
+
+ //calculate the constant for logl
+ if (func_name=='R' || func_name=='r') {
logl_const=-0.5*(double)(n_size-c_size)*(double)d_size*log(2.0*M_PI)+0.5*(double)d_size*LULndet (XXt);
} else {
logl_const=-0.5*(double)n_size*(double)d_size*log(2.0*M_PI);
- }
-
+ }
+
//start EM
for (size_t t=0; t<max_iter; t++) {
logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi);
@@ -665,17 +666,17 @@ double MphEM (const char func_name, const size_t max_iter, const double max_prec
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY);
CalcXHiY(eval, D_l, X, UltVehiY, xHiy);
- //calculate log likelihood/restricted likelihood value, and terminate if change is small
+ //calculate log likelihood/restricted likelihood value, and terminate if change is small
logl_new=logl_const+MphCalcLogL (eval, xHiy, D_l, UltVehiY, Qi)-0.5*(double)n_size*logdet_Ve;
- if (func_name=='R' || func_name=='r') {
+ if (func_name=='R' || func_name=='r') {
logl_new+=-0.5*(logdet_Q-(double)c_size*logdet_Ve);
- }
+ }
if (t!=0 && abs(logl_new-logl_old)<max_prec) {break;}
logl_old=logl_new;
-
+
/*
cout<<"iteration = "<<t<<" log-likelihood = "<<logl_old<<"\t"<<logl_new<<endl;
-
+
cout<<"Vg: "<<endl;
for (size_t i=0; i<d_size; i++) {
for (size_t j=0; j<d_size; j++) {
@@ -691,28 +692,28 @@ double MphEM (const char func_name, const size_t max_iter, const double max_prec
cout<<endl;
}
*/
-
+
CalcOmega (eval, D_l, OmegaU, OmegaE);
//Update UltVehiB, UltVehiU
- if (func_name=='R' || func_name=='r') {
+ if (func_name=='R' || func_name=='r') {
UpdateRL_B(xHiy, Qi, UltVehiB);
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X, 0.0, UltVehiBX);
} else if (t==0) {
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, B, 0.0, UltVehiB);
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X, 0.0, UltVehiBX);
}
-
+
UpdateU(OmegaE, UltVehiY, UltVehiBX, UltVehiU);
-
- if (func_name=='L' || func_name=='l') {
+
+ if (func_name=='L' || func_name=='l') {
//UltVehiBX is destroyed here
UpdateL_B(X, XXti, UltVehiY, UltVehiU, UltVehiBX, UltVehiB);
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehiB, X, 0.0, UltVehiBX);
}
UpdateE(UltVehiY, UltVehiBX, UltVehiU, UltVehiE);
-
+
//calculate U_hat, E_hat and B
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, UltVehiU, 0.0, U_hat);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, UltVehiE, 0.0, E_hat);
@@ -720,11 +721,11 @@ double MphEM (const char func_name, const size_t max_iter, const double max_prec
//calculate Sigma_uu and Sigma_ee
CalcSigma (func_name, eval, D_l, X, OmegaU, OmegaE, UltVeh, Qi, Sigma_uu, Sigma_ee);
-
+
//update V_g and V_e
- UpdateV (eval, U_hat, E_hat, Sigma_uu, Sigma_ee, V_g, V_e);
+ UpdateV (eval, U_hat, E_hat, Sigma_uu, Sigma_ee, V_g, V_e);
}
-
+
gsl_matrix_free(XXt);
gsl_matrix_free(XXti);
gsl_vector_free(D_l);
@@ -736,7 +737,7 @@ double MphEM (const char func_name, const size_t max_iter, const double max_prec
gsl_matrix_free(Sigma_ee);
gsl_vector_free(xHiy);
gsl_permutation_free(pmt);
-
+
return logl_new;
}
@@ -747,63 +748,63 @@ double MphEM (const char func_name, const size_t max_iter, const double max_prec
//calculate p-value, beta (d by 1 vector) and V(beta)
-double MphCalcP (const gsl_vector *eval, const gsl_vector *x_vec, const gsl_matrix *W, const gsl_matrix *Y, const gsl_matrix *V_g, const gsl_matrix *V_e, gsl_matrix *UltVehiY, gsl_vector *beta, gsl_matrix *Vbeta)
+double MphCalcP (const gsl_vector *eval, const gsl_vector *x_vec, const gsl_matrix *W, const gsl_matrix *Y, const gsl_matrix *V_g, const gsl_matrix *V_e, gsl_matrix *UltVehiY, gsl_vector *beta, gsl_matrix *Vbeta)
{
size_t n_size=eval->size, c_size=W->size1, d_size=V_g->size1;
size_t dc_size=d_size*c_size;
double delta, dl, d, d1, d2, dy, dx, dw, logdet_Ve, logdet_Q, p_value;
-
+
gsl_vector *D_l=gsl_vector_alloc (d_size);
gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *Qi=gsl_matrix_alloc (dc_size, dc_size);
- gsl_matrix *WHix=gsl_matrix_alloc (dc_size, d_size);
+ gsl_matrix *WHix=gsl_matrix_alloc (dc_size, d_size);
gsl_matrix *QiWHix=gsl_matrix_alloc(dc_size, d_size);
-
- gsl_matrix *xPx=gsl_matrix_alloc (d_size, d_size);
+
+ gsl_matrix *xPx=gsl_matrix_alloc (d_size, d_size);
gsl_vector *xPy=gsl_vector_alloc (d_size);
//gsl_vector *UltVehiy=gsl_vector_alloc (d_size);
gsl_vector *WHiy=gsl_vector_alloc (dc_size);
-
+
gsl_matrix_set_zero (xPx);
gsl_matrix_set_zero (WHix);
gsl_vector_set_zero (xPy);
gsl_vector_set_zero (WHiy);
-
+
//eigen decomposition and calculate log|Ve|
- logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi);
-
+ logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi);
+
//calculate Qi and log|Q|
- logdet_Q=CalcQi (eval, D_l, W, Qi);
-
+ logdet_Q=CalcQi (eval, D_l, W, Qi);
+
//calculate UltVehiY
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY);
-
+
//calculate WHix, WHiy, xHiy, xHix
for (size_t i=0; i<d_size; i++) {
dl=gsl_vector_get(D_l, i);
-
+
d1=0.0; d2=0.0;
for (size_t k=0; k<n_size; k++) {
delta=gsl_vector_get(eval, k);
dx=gsl_vector_get(x_vec, k);
dy=gsl_matrix_get(UltVehiY, i, k);
-
+
d1+=dx*dy/(delta*dl+1.0);
d2+=dx*dx/(delta*dl+1.0);
}
gsl_vector_set (xPy, i, d1);
gsl_matrix_set (xPx, i, i, d2);
-
- for (size_t j=0; j<c_size; j++) {
+
+ for (size_t j=0; j<c_size; j++) {
d1=0.0; d2=0.0;
for (size_t k=0; k<n_size; k++) {
delta=gsl_vector_get(eval, k);
dx=gsl_vector_get(x_vec, k);
dw=gsl_matrix_get(W, j, k);
dy=gsl_matrix_get(UltVehiY, i, k);
-
- //if (delta==0) {continue;}
+
+ //if (delta==0) {continue;}
d1+=dx*dw/(delta*dl+1.0);
d2+=dy*dw/(delta*dl+1.0);
}
@@ -811,11 +812,11 @@ double MphCalcP (const gsl_vector *eval, const gsl_vector *x_vec, const gsl_matr
gsl_vector_set(WHiy, j*d_size+i, d2);
}
}
-
+
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, WHix, 0.0, QiWHix);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, -1.0, WHix, QiWHix, 1.0, xPx);
gsl_blas_dgemv(CblasTrans, -1.0, QiWHix, WHiy, 1.0, xPy);
-
+
//calculate V(beta) and beta
int sig;
gsl_permutation * pmt=gsl_permutation_alloc (d_size);
@@ -826,40 +827,40 @@ double MphCalcP (const gsl_vector *eval, const gsl_vector *x_vec, const gsl_matr
//need to multiply UltVehi on both sides or one side
gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, D_l, 0.0, beta);
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Vbeta, UltVeh, 0.0, xPx);
- gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, xPx, 0.0, Vbeta);
+ gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, xPx, 0.0, Vbeta);
- //calculate test statistic and p value
+ //calculate test statistic and p value
gsl_blas_ddot(D_l, xPy, &d);
-
+
p_value=gsl_cdf_chisq_Q (d, (double)d_size);
//d*=(double)(n_size-c_size-d_size)/((double)d_size*(double)(n_size-c_size-1));
- //p_value=gsl_cdf_fdist_Q (d, (double)d_size, (double)(n_size-c_size-d_size));
-
+ //p_value=gsl_cdf_fdist_Q (d, (double)d_size, (double)(n_size-c_size-d_size));
+
gsl_vector_free(D_l);
gsl_matrix_free(UltVeh);
gsl_matrix_free(UltVehi);
gsl_matrix_free(Qi);
- gsl_matrix_free(WHix);
+ gsl_matrix_free(WHix);
gsl_matrix_free(QiWHix);
-
- gsl_matrix_free(xPx);
+
+ gsl_matrix_free(xPx);
gsl_vector_free(xPy);
gsl_vector_free(WHiy);
-
+
gsl_permutation_free(pmt);
-
+
return p_value;
}
//calculate B and its standard error (which is a matrix of the same dimension as B)
-void MphCalcBeta (const gsl_vector *eval, const gsl_matrix *W, const gsl_matrix *Y, const gsl_matrix *V_g, const gsl_matrix *V_e, gsl_matrix *UltVehiY, gsl_matrix *B, gsl_matrix *se_B)
+void MphCalcBeta (const gsl_vector *eval, const gsl_matrix *W, const gsl_matrix *Y, const gsl_matrix *V_g, const gsl_matrix *V_e, gsl_matrix *UltVehiY, gsl_matrix *B, gsl_matrix *se_B)
{
size_t n_size=eval->size, c_size=W->size1, d_size=V_g->size1;
size_t dc_size=d_size*c_size;
double delta, dl, d, dy, dw, logdet_Ve, logdet_Q;
-
+
gsl_vector *D_l=gsl_vector_alloc (d_size);
gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size);
@@ -870,67 +871,67 @@ void MphCalcBeta (const gsl_vector *eval, const gsl_matrix *W, const gsl_matrix
gsl_vector *QiWHiy=gsl_vector_alloc (dc_size);
gsl_vector *beta=gsl_vector_alloc (dc_size);
gsl_matrix *Vbeta=gsl_matrix_alloc (dc_size, dc_size);
-
+
gsl_vector_set_zero (WHiy);
-
+
//eigen decomposition and calculate log|Ve|
- logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi);
-
+ logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi);
+
//calculate Qi and log|Q|
- logdet_Q=CalcQi (eval, D_l, W, Qi);
-
+ logdet_Q=CalcQi (eval, D_l, W, Qi);
+
//calculate UltVehiY
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY);
-
+
//calculate WHiy
for (size_t i=0; i<d_size; i++) {
dl=gsl_vector_get(D_l, i);
-
- for (size_t j=0; j<c_size; j++) {
+
+ for (size_t j=0; j<c_size; j++) {
d=0.0;
for (size_t k=0; k<n_size; k++) {
delta=gsl_vector_get(eval, k);
dw=gsl_matrix_get(W, j, k);
dy=gsl_matrix_get(UltVehiY, i, k);
-
- //if (delta==0) {continue;}
+
+ //if (delta==0) {continue;}
d+=dy*dw/(delta*dl+1.0);
}
gsl_vector_set(WHiy, j*d_size+i, d);
}
}
-
+
gsl_blas_dgemv(CblasNoTrans, 1.0, Qi, WHiy, 0.0, QiWHiy);
-
+
//need to multiply I_c\otimes UltVehi on both sides or one side
for (size_t i=0; i<c_size; i++) {
gsl_vector_view QiWHiy_sub=gsl_vector_subvector(QiWHiy, i*d_size, d_size);
- gsl_vector_view beta_sub=gsl_vector_subvector(beta, i*d_size, d_size);
+ gsl_vector_view beta_sub=gsl_vector_subvector(beta, i*d_size, d_size);
gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, &QiWHiy_sub.vector, 0.0, &beta_sub.vector);
-
+
for (size_t j=0; j<c_size; j++) {
gsl_matrix_view Qi_sub=gsl_matrix_submatrix (Qi, i*d_size, j*d_size, d_size, d_size);
gsl_matrix_view Qitemp_sub=gsl_matrix_submatrix (Qi_temp, i*d_size, j*d_size, d_size, d_size);
gsl_matrix_view Vbeta_sub=gsl_matrix_submatrix (Vbeta, i*d_size, j*d_size, d_size, d_size);
-
+
if (j<i) {
gsl_matrix_view Vbeta_sym=gsl_matrix_submatrix (Vbeta, j*d_size, i*d_size, d_size, d_size);
gsl_matrix_transpose_memcpy (&Vbeta_sub.matrix, &Vbeta_sym.matrix);
} else {
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &Qi_sub.matrix, UltVeh, 0.0, &Qitemp_sub.matrix);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVeh, &Qitemp_sub.matrix, 0.0, &Vbeta_sub.matrix);
- }
+ }
}
}
-
+
//copy beta to B, and Vbeta to se_B
for (size_t j=0; j<B->size2; j++) {
for (size_t i=0; i<B->size1; i++) {
gsl_matrix_set(B, i, j, gsl_vector_get(beta, j*d_size+i));
gsl_matrix_set(se_B, i, j, sqrt(gsl_matrix_get(Vbeta, j*d_size+i, j*d_size+i)));
}
- }
-
+ }
+
//free matrices
gsl_vector_free(D_l);
gsl_matrix_free(UltVeh);
@@ -941,7 +942,7 @@ void MphCalcBeta (const gsl_vector *eval, const gsl_matrix *W, const gsl_matrix
gsl_vector_free(QiWHiy);
gsl_vector_free(beta);
gsl_matrix_free(Vbeta);
-
+
return;
}
@@ -961,42 +962,42 @@ void CalcHiQi (const gsl_vector *eval, const gsl_matrix *X, const gsl_matrix *V_
gsl_matrix_set_zero (Hi_all);
gsl_matrix_set_zero (Qi);
logdet_H=0.0; logdet_Q=0.0;
-
+
size_t n_size=eval->size, c_size=X->size1, d_size=V_g->size1;
- double logdet_Ve=0.0, delta, dl, d;
-
+ double logdet_Ve=0.0, delta, dl, d;
+
gsl_matrix *mat_dd=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size);
gsl_vector *D_l=gsl_vector_alloc (d_size);
-
+
//calculate D_l, UltVeh and UltVehi
logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi);
-
+
//calculate each Hi and log|H_k|
logdet_H=(double)n_size*logdet_Ve;
for (size_t k=0; k<n_size; k++) {
delta=gsl_vector_get (eval, k);
-
+
gsl_matrix_memcpy (mat_dd, UltVehi);
for (size_t i=0; i<d_size; i++) {
dl=gsl_vector_get(D_l, i);
d=delta*dl+1.0;
-
+
gsl_vector_view mat_row=gsl_matrix_row (mat_dd, i);
gsl_vector_scale (&mat_row.vector, 1.0/d);
-
+
logdet_H+=log(d);
}
-
+
gsl_matrix_view Hi_k=gsl_matrix_submatrix(Hi_all, 0, k*d_size, d_size, d_size);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, UltVehi, mat_dd, 0.0, &Hi_k.matrix);
- }
-
+ }
+
//calculate Qi, and multiply I\otimes UtVeh on both side
//and calculate logdet_Q, don't forget to substract c_size*logdet_Ve
logdet_Q=CalcQi (eval, D_l, X, Qi)-(double)c_size*logdet_Ve;
-
+
for (size_t i=0; i<c_size; i++) {
for (size_t j=0; j<c_size; j++) {
gsl_matrix_view Qi_sub=gsl_matrix_submatrix (Qi, i*d_size, j*d_size, d_size, d_size);
@@ -1015,7 +1016,7 @@ void CalcHiQi (const gsl_vector *eval, const gsl_matrix *X, const gsl_matrix *V_
gsl_matrix_free(UltVeh);
gsl_matrix_free(UltVehi);
gsl_vector_free(D_l);
-
+
return;
}
@@ -1026,17 +1027,17 @@ void CalcHiQi (const gsl_vector *eval, const gsl_matrix *X, const gsl_matrix *V_
void Calc_Hiy_all (const gsl_matrix *Y, const gsl_matrix *Hi_all, gsl_matrix *Hiy_all)
{
gsl_matrix_set_zero (Hiy_all);
-
+
size_t n_size=Y->size2, d_size=Y->size1;
-
+
for (size_t k=0; k<n_size; k++) {
gsl_matrix_const_view Hi_k=gsl_matrix_const_submatrix(Hi_all, 0, k*d_size, d_size, d_size);
gsl_vector_const_view y_k=gsl_matrix_const_column(Y, k);
gsl_vector_view Hiy_k=gsl_matrix_column(Hiy_all, k);
-
+
gsl_blas_dgemv (CblasNoTrans, 1.0, &Hi_k.matrix, &y_k.vector, 0.0, &Hiy_k.vector);
}
-
+
return;
}
@@ -1045,11 +1046,11 @@ void Calc_Hiy_all (const gsl_matrix *Y, const gsl_matrix *Hi_all, gsl_matrix *Hi
void Calc_xHi_all (const gsl_matrix *X, const gsl_matrix *Hi_all, gsl_matrix *xHi_all)
{
gsl_matrix_set_zero (xHi_all);
-
+
size_t n_size=X->size2, c_size=X->size1, d_size=Hi_all->size1;
double d;
-
+
for (size_t k=0; k<n_size; k++) {
gsl_matrix_const_view Hi_k=gsl_matrix_const_submatrix(Hi_all, 0, k*d_size, d_size, d_size);
@@ -1060,7 +1061,7 @@ void Calc_xHi_all (const gsl_matrix *X, const gsl_matrix *Hi_all, gsl_matrix *xH
gsl_matrix_scale(&xHi_sub.matrix, d);
}
}
-
+
return;
}
@@ -1070,15 +1071,15 @@ double Calc_yHiy (const gsl_matrix *Y, const gsl_matrix *Hiy_all)
{
double yHiy=0.0, d;
size_t n_size=Y->size2;
-
+
for (size_t k=0; k<n_size; k++) {
gsl_vector_const_view y_k=gsl_matrix_const_column(Y, k);
gsl_vector_const_view Hiy_k=gsl_matrix_const_column(Hiy_all, k);
-
+
gsl_blas_ddot (&Hiy_k.vector, &y_k.vector, &d);
yHiy+=d;
}
-
+
return yHiy;
}
@@ -1087,16 +1088,16 @@ double Calc_yHiy (const gsl_matrix *Y, const gsl_matrix *Hiy_all)
void Calc_xHiy (const gsl_matrix *Y, const gsl_matrix *xHi, gsl_vector *xHiy)
{
gsl_vector_set_zero (xHiy);
-
+
size_t n_size=Y->size2, d_size=Y->size1, dc_size=xHi->size1;
-
+
for (size_t k=0; k<n_size; k++) {
gsl_matrix_const_view xHi_k=gsl_matrix_const_submatrix(xHi, 0, k*d_size, dc_size, d_size);
gsl_vector_const_view y_k=gsl_matrix_const_column(Y, k);
-
+
gsl_blas_dgemv (CblasNoTrans, 1.0, &xHi_k.matrix, &y_k.vector, 1.0, xHiy);
}
-
+
return;
}
@@ -1107,10 +1108,10 @@ void Calc_xHiy (const gsl_matrix *Y, const gsl_matrix *xHi, gsl_vector *xHiy)
size_t GetIndex (const size_t i, const size_t j, const size_t d_size)
{
if (i>=d_size || j>=d_size) {cout<<"error in GetIndex."<<endl; return 0;}
-
+
size_t s, l;
if (j<i) {s=j; l=i;} else {s=i; l=j;}
-
+
return (2*d_size-s+1)*s/2+l-s;
}
@@ -1120,16 +1121,16 @@ void Calc_yHiDHiy (const gsl_vector *eval, const gsl_matrix *Hiy, const size_t i
{
yHiDHiy_g=0.0;
yHiDHiy_e=0.0;
-
+
size_t n_size=eval->size;
-
+
double delta, d1, d2;
-
+
for (size_t k=0; k<n_size; k++) {
delta=gsl_vector_get (eval, k);
d1=gsl_matrix_get (Hiy, i, k);
d2=gsl_matrix_get (Hiy, j, k);
-
+
if (i==j) {
yHiDHiy_g+=delta*d1*d2;
yHiDHiy_e+=d1*d2;
@@ -1137,8 +1138,8 @@ void Calc_yHiDHiy (const gsl_vector *eval, const gsl_matrix *Hiy, const size_t i
yHiDHiy_g+=delta*d1*d2*2.0;
yHiDHiy_e+=d1*d2*2.0;
}
- }
-
+ }
+
return;
}
@@ -1148,29 +1149,29 @@ void Calc_xHiDHiy (const gsl_vector *eval, const gsl_matrix *xHi, const gsl_matr
{
gsl_vector_set_zero(xHiDHiy_g);
gsl_vector_set_zero(xHiDHiy_e);
-
+
size_t n_size=eval->size, d_size=Hiy->size1;
-
+
double delta, d;
-
+
for (size_t k=0; k<n_size; k++) {
delta=gsl_vector_get (eval, k);
-
+
gsl_vector_const_view xHi_col_i=gsl_matrix_const_column (xHi, k*d_size+i);
d=gsl_matrix_get (Hiy, j, k);
-
+
gsl_blas_daxpy (d*delta, &xHi_col_i.vector, xHiDHiy_g);
- gsl_blas_daxpy (d, &xHi_col_i.vector, xHiDHiy_e);
-
+ gsl_blas_daxpy (d, &xHi_col_i.vector, xHiDHiy_e);
+
if (i!=j) {
gsl_vector_const_view xHi_col_j=gsl_matrix_const_column (xHi, k*d_size+j);
d=gsl_matrix_get (Hiy, i, k);
-
+
gsl_blas_daxpy (d*delta, &xHi_col_j.vector, xHiDHiy_g);
- gsl_blas_daxpy (d, &xHi_col_j.vector, xHiDHiy_e);
+ gsl_blas_daxpy (d, &xHi_col_j.vector, xHiDHiy_e);
}
}
-
+
return;
}
@@ -1179,42 +1180,42 @@ void Calc_xHiDHix (const gsl_vector *eval, const gsl_matrix *xHi, const size_t i
{
gsl_matrix_set_zero(xHiDHix_g);
gsl_matrix_set_zero(xHiDHix_e);
-
+
size_t n_size=eval->size, dc_size=xHi->size1;
size_t d_size=xHi->size2/n_size;
-
+
double delta;
-
+
gsl_matrix *mat_dcdc=gsl_matrix_alloc (dc_size, dc_size);
gsl_matrix *mat_dcdc_t=gsl_matrix_alloc (dc_size, dc_size);
-
+
for (size_t k=0; k<n_size; k++) {
delta=gsl_vector_get (eval, k);
-
+
gsl_vector_const_view xHi_col_i=gsl_matrix_const_column (xHi, k*d_size+i);
gsl_vector_const_view xHi_col_j=gsl_matrix_const_column (xHi, k*d_size+j);
-
+
gsl_matrix_set_zero (mat_dcdc);
gsl_blas_dger (1.0, &xHi_col_i.vector, &xHi_col_j.vector, mat_dcdc);
-
+
gsl_matrix_transpose_memcpy (mat_dcdc_t, mat_dcdc);
-
+
gsl_matrix_add (xHiDHix_e, mat_dcdc);
-
+
gsl_matrix_scale (mat_dcdc, delta);
gsl_matrix_add (xHiDHix_g, mat_dcdc);
-
+
if (i!=j) {
- gsl_matrix_add (xHiDHix_e, mat_dcdc_t);
-
+ gsl_matrix_add (xHiDHix_e, mat_dcdc_t);
+
gsl_matrix_scale (mat_dcdc_t, delta);
gsl_matrix_add (xHiDHix_g, mat_dcdc_t);
}
}
-
+
gsl_matrix_free(mat_dcdc);
gsl_matrix_free(mat_dcdc_t);
-
+
return;
}
@@ -1225,30 +1226,30 @@ void Calc_yHiDHiDHiy (const gsl_vector *eval, const gsl_matrix *Hi, const gsl_ma
yHiDHiDHiy_gg=0.0;
yHiDHiDHiy_ee=0.0;
yHiDHiDHiy_ge=0.0;
-
+
size_t n_size=eval->size, d_size=Hiy->size1;
-
+
double delta, d_Hiy_i1, d_Hiy_j1, d_Hiy_i2, d_Hiy_j2, d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2;
-
+
for (size_t k=0; k<n_size; k++) {
delta=gsl_vector_get (eval, k);
-
+
d_Hiy_i1=gsl_matrix_get (Hiy, i1, k);
d_Hiy_j1=gsl_matrix_get (Hiy, j1, k);
d_Hiy_i2=gsl_matrix_get (Hiy, i2, k);
d_Hiy_j2=gsl_matrix_get (Hiy, j2, k);
-
- d_Hi_i1i2=gsl_matrix_get (Hi, i1, k*d_size+i2);
- d_Hi_i1j2=gsl_matrix_get (Hi, i1, k*d_size+j2);
- d_Hi_j1i2=gsl_matrix_get (Hi, j1, k*d_size+i2);
- d_Hi_j1j2=gsl_matrix_get (Hi, j1, k*d_size+j2);
-
- if (i1==j1) {
+
+ d_Hi_i1i2=gsl_matrix_get (Hi, i1, k*d_size+i2);
+ d_Hi_i1j2=gsl_matrix_get (Hi, i1, k*d_size+j2);
+ d_Hi_j1i2=gsl_matrix_get (Hi, j1, k*d_size+i2);
+ d_Hi_j1j2=gsl_matrix_get (Hi, j1, k*d_size+j2);
+
+ if (i1==j1) {
yHiDHiDHiy_gg+=delta*delta*(d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2);
yHiDHiDHiy_ee+=(d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2);
yHiDHiDHiy_ge+=delta*(d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2);
-
- if (i2!=j2) {
+
+ if (i2!=j2) {
yHiDHiDHiy_gg+=delta*delta*(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2);
yHiDHiDHiy_ee+=(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2);
yHiDHiDHiy_ge+=delta*(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2);
@@ -1257,7 +1258,7 @@ void Calc_yHiDHiDHiy (const gsl_vector *eval, const gsl_matrix *Hi, const gsl_ma
yHiDHiDHiy_gg+=delta*delta*(d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2+d_Hiy_j1*d_Hi_i1i2*d_Hiy_j2);
yHiDHiDHiy_ee+=(d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2+d_Hiy_j1*d_Hi_i1i2*d_Hiy_j2);
yHiDHiDHiy_ge+=delta*(d_Hiy_i1*d_Hi_j1i2*d_Hiy_j2+d_Hiy_j1*d_Hi_i1i2*d_Hiy_j2);
-
+
if (i2!=j2) {
yHiDHiDHiy_gg+=delta*delta*(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2+d_Hiy_j1*d_Hi_i1j2*d_Hiy_i2);
yHiDHiDHiy_ee+=(d_Hiy_i1*d_Hi_j1j2*d_Hiy_i2+d_Hiy_j1*d_Hi_i1j2*d_Hiy_i2);
@@ -1265,7 +1266,7 @@ void Calc_yHiDHiDHiy (const gsl_vector *eval, const gsl_matrix *Hi, const gsl_ma
}
}
}
-
+
return;
}
@@ -1275,56 +1276,56 @@ void Calc_xHiDHiDHiy (const gsl_vector *eval, const gsl_matrix *Hi, const gsl_ma
gsl_vector_set_zero(xHiDHiDHiy_gg);
gsl_vector_set_zero(xHiDHiDHiy_ee);
gsl_vector_set_zero(xHiDHiDHiy_ge);
-
+
size_t n_size=eval->size, d_size=Hiy->size1;
-
+
double delta, d_Hiy_i, d_Hiy_j, d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2;
-
+
for (size_t k=0; k<n_size; k++) {
delta=gsl_vector_get (eval, k);
-
+
gsl_vector_const_view xHi_col_i=gsl_matrix_const_column (xHi, k*d_size+i1);
gsl_vector_const_view xHi_col_j=gsl_matrix_const_column (xHi, k*d_size+j1);
-
+
d_Hiy_i=gsl_matrix_get (Hiy, i2, k);
d_Hiy_j=gsl_matrix_get (Hiy, j2, k);
-
- d_Hi_i1i2=gsl_matrix_get (Hi, i1, k*d_size+i2);
- d_Hi_i1j2=gsl_matrix_get (Hi, i1, k*d_size+j2);
- d_Hi_j1i2=gsl_matrix_get (Hi, j1, k*d_size+i2);
- d_Hi_j1j2=gsl_matrix_get (Hi, j1, k*d_size+j2);
-
+
+ d_Hi_i1i2=gsl_matrix_get (Hi, i1, k*d_size+i2);
+ d_Hi_i1j2=gsl_matrix_get (Hi, i1, k*d_size+j2);
+ d_Hi_j1i2=gsl_matrix_get (Hi, j1, k*d_size+i2);
+ d_Hi_j1j2=gsl_matrix_get (Hi, j1, k*d_size+j2);
+
if (i1==j1) {
gsl_blas_daxpy (delta*delta*d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector, xHiDHiDHiy_gg);
gsl_blas_daxpy (d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector, xHiDHiDHiy_ee);
gsl_blas_daxpy (delta*d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector, xHiDHiDHiy_ge);
-
+
if (i2!=j2) {
gsl_blas_daxpy (delta*delta*d_Hi_j1j2*d_Hiy_i, &xHi_col_i.vector, xHiDHiDHiy_gg);
gsl_blas_daxpy (d_Hi_j1j2*d_Hiy_i, &xHi_col_i.vector, xHiDHiDHiy_ee);
gsl_blas_daxpy (delta*d_Hi_j1j2*d_Hiy_i, &xHi_col_i.vector, xHiDHiDHiy_ge);
}
- } else {
+ } else {
gsl_blas_daxpy (delta*delta*d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector, xHiDHiDHiy_gg);
gsl_blas_daxpy (d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector, xHiDHiDHiy_ee);
gsl_blas_daxpy (delta*d_Hi_j1i2*d_Hiy_j, &xHi_col_i.vector, xHiDHiDHiy_ge);
-
+
gsl_blas_daxpy (delta*delta*d_Hi_i1i2*d_Hiy_j, &xHi_col_j.vector, xHiDHiDHiy_gg);
gsl_blas_daxpy (d_Hi_i1i2*d_Hiy_j, &xHi_col_j.vector, xHiDHiDHiy_ee);
gsl_blas_daxpy (delta*d_Hi_i1i2*d_Hiy_j, &xHi_col_j.vector, xHiDHiDHiy_ge);
-
+
if (i2!=j2) {
gsl_blas_daxpy (delta*delta*d_Hi_j1j2*d_Hiy_i, &xHi_col_i.vector, xHiDHiDHiy_gg);
gsl_blas_daxpy (d_Hi_j1j2*d_Hiy_i, &xHi_col_i.vector, xHiDHiDHiy_ee);
gsl_blas_daxpy (delta*d_Hi_j1j2*d_Hiy_i, &xHi_col_i.vector, xHiDHiDHiy_ge);
-
+
gsl_blas_daxpy (delta*delta*d_Hi_i1j2*d_Hiy_i, &xHi_col_j.vector, xHiDHiDHiy_gg);
gsl_blas_daxpy (d_Hi_i1j2*d_Hiy_i, &xHi_col_j.vector, xHiDHiDHiy_ee);
gsl_blas_daxpy (delta*d_Hi_i1j2*d_Hiy_i, &xHi_col_j.vector, xHiDHiDHiy_ge);
}
}
}
-
+
return;
}
@@ -1334,106 +1335,106 @@ void Calc_xHiDHiDHix (const gsl_vector *eval, const gsl_matrix *Hi, const gsl_ma
gsl_matrix_set_zero(xHiDHiDHix_gg);
gsl_matrix_set_zero(xHiDHiDHix_ee);
gsl_matrix_set_zero(xHiDHiDHix_ge);
-
+
size_t n_size=eval->size, d_size=Hi->size1, dc_size=xHi->size1;
-
+
double delta, d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2;
-
+
gsl_matrix *mat_dcdc=gsl_matrix_alloc (dc_size, dc_size);
-
+
for (size_t k=0; k<n_size; k++) {
delta=gsl_vector_get (eval, k);
-
+
gsl_vector_const_view xHi_col_i1=gsl_matrix_const_column (xHi, k*d_size+i1);
gsl_vector_const_view xHi_col_j1=gsl_matrix_const_column (xHi, k*d_size+j1);
gsl_vector_const_view xHi_col_i2=gsl_matrix_const_column (xHi, k*d_size+i2);
- gsl_vector_const_view xHi_col_j2=gsl_matrix_const_column (xHi, k*d_size+j2);
-
- d_Hi_i1i2=gsl_matrix_get (Hi, i1, k*d_size+i2);
- d_Hi_i1j2=gsl_matrix_get (Hi, i1, k*d_size+j2);
- d_Hi_j1i2=gsl_matrix_get (Hi, j1, k*d_size+i2);
- d_Hi_j1j2=gsl_matrix_get (Hi, j1, k*d_size+j2);
-
+ gsl_vector_const_view xHi_col_j2=gsl_matrix_const_column (xHi, k*d_size+j2);
+
+ d_Hi_i1i2=gsl_matrix_get (Hi, i1, k*d_size+i2);
+ d_Hi_i1j2=gsl_matrix_get (Hi, i1, k*d_size+j2);
+ d_Hi_j1i2=gsl_matrix_get (Hi, j1, k*d_size+i2);
+ d_Hi_j1j2=gsl_matrix_get (Hi, j1, k*d_size+j2);
+
if (i1==j1) {
gsl_matrix_set_zero (mat_dcdc);
gsl_blas_dger (d_Hi_j1i2, &xHi_col_i1.vector, &xHi_col_j2.vector, mat_dcdc);
-
- gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
+
+ gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
gsl_matrix_scale(mat_dcdc, delta);
- gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
+ gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
gsl_matrix_scale(mat_dcdc, delta);
gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
if (i2!=j2) {
gsl_matrix_set_zero (mat_dcdc);
gsl_blas_dger (d_Hi_j1j2, &xHi_col_i1.vector, &xHi_col_i2.vector, mat_dcdc);
-
- gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
+
+ gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
gsl_matrix_scale(mat_dcdc, delta);
- gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
+ gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
gsl_matrix_scale(mat_dcdc, delta);
gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
}
} else {
gsl_matrix_set_zero (mat_dcdc);
gsl_blas_dger (d_Hi_j1i2, &xHi_col_i1.vector, &xHi_col_j2.vector, mat_dcdc);
-
- gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
+
+ gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
gsl_matrix_scale(mat_dcdc, delta);
- gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
+ gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
gsl_matrix_scale(mat_dcdc, delta);
gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
-
+
gsl_matrix_set_zero (mat_dcdc);
gsl_blas_dger (d_Hi_i1i2, &xHi_col_j1.vector, &xHi_col_j2.vector, mat_dcdc);
-
- gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
+
+ gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
gsl_matrix_scale(mat_dcdc, delta);
- gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
+ gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
gsl_matrix_scale(mat_dcdc, delta);
gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
-
+
if (i2!=j2) {
gsl_matrix_set_zero (mat_dcdc);
gsl_blas_dger (d_Hi_j1j2, &xHi_col_i1.vector, &xHi_col_i2.vector, mat_dcdc);
-
- gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
+
+ gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
gsl_matrix_scale(mat_dcdc, delta);
- gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
+ gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
gsl_matrix_scale(mat_dcdc, delta);
gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
-
+
gsl_matrix_set_zero (mat_dcdc);
gsl_blas_dger (d_Hi_i1j2, &xHi_col_j1.vector, &xHi_col_i2.vector, mat_dcdc);
-
- gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
+
+ gsl_matrix_add(xHiDHiDHix_ee, mat_dcdc);
gsl_matrix_scale(mat_dcdc, delta);
- gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
+ gsl_matrix_add(xHiDHiDHix_ge, mat_dcdc);
gsl_matrix_scale(mat_dcdc, delta);
gsl_matrix_add(xHiDHiDHix_gg, mat_dcdc);
}
}
}
-
+
gsl_matrix_free(mat_dcdc);
-
+
return;
}
-void Calc_traceHiD (const gsl_vector *eval, const gsl_matrix *Hi, const size_t i, const size_t j, double &tHiD_g, double &tHiD_e)
+void Calc_traceHiD (const gsl_vector *eval, const gsl_matrix *Hi, const size_t i, const size_t j, double &tHiD_g, double &tHiD_e)
{
tHiD_g=0.0;
tHiD_e=0.0;
-
+
size_t n_size=eval->size, d_size=Hi->size1;
double delta, d;
-
+
for (size_t k=0; k<n_size; k++) {
delta=gsl_vector_get (eval, k);
d=gsl_matrix_get (Hi, j, k*d_size+i);
-
+
if (i==j) {
tHiD_g+=delta*d;
tHiD_e+=d;
@@ -1442,33 +1443,33 @@ void Calc_traceHiD (const gsl_vector *eval, const gsl_matrix *Hi, const size_t i
tHiD_e+=d*2.0;
}
}
-
+
return;
}
-void Calc_traceHiDHiD (const gsl_vector *eval, const gsl_matrix *Hi, const size_t i1, const size_t j1, const size_t i2, const size_t j2, double &tHiDHiD_gg, double &tHiDHiD_ee, double &tHiDHiD_ge)
+void Calc_traceHiDHiD (const gsl_vector *eval, const gsl_matrix *Hi, const size_t i1, const size_t j1, const size_t i2, const size_t j2, double &tHiDHiD_gg, double &tHiDHiD_ee, double &tHiDHiD_ge)
{
tHiDHiD_gg=0.0;
tHiDHiD_ee=0.0;
tHiDHiD_ge=0.0;
-
+
size_t n_size=eval->size, d_size=Hi->size1;
double delta, d_Hi_i1i2, d_Hi_i1j2, d_Hi_j1i2, d_Hi_j1j2;
-
+
for (size_t k=0; k<n_size; k++) {
delta=gsl_vector_get (eval, k);
-
- d_Hi_i1i2=gsl_matrix_get (Hi, i1, k*d_size+i2);
- d_Hi_i1j2=gsl_matrix_get (Hi, i1, k*d_size+j2);
- d_Hi_j1i2=gsl_matrix_get (Hi, j1, k*d_size+i2);
+
+ d_Hi_i1i2=gsl_matrix_get (Hi, i1, k*d_size+i2);
+ d_Hi_i1j2=gsl_matrix_get (Hi, i1, k*d_size+j2);
+ d_Hi_j1i2=gsl_matrix_get (Hi, j1, k*d_size+i2);
d_Hi_j1j2=gsl_matrix_get (Hi, j1, k*d_size+j2);
-
+
if (i1==j1) {
tHiDHiD_gg+=delta*delta*d_Hi_i1j2*d_Hi_j1i2;
tHiDHiD_ee+=d_Hi_i1j2*d_Hi_j1i2;
tHiDHiD_ge+=delta*d_Hi_i1j2*d_Hi_j1i2;
-
+
if (i2!=j2) {
tHiDHiD_gg+=delta*delta*d_Hi_i1i2*d_Hi_j1j2;
tHiDHiD_ee+=d_Hi_i1i2*d_Hi_j1j2;
@@ -1478,7 +1479,7 @@ void Calc_traceHiDHiD (const gsl_vector *eval, const gsl_matrix *Hi, const size_
tHiDHiD_gg+=delta*delta*(d_Hi_i1j2*d_Hi_j1i2+d_Hi_j1j2*d_Hi_i1i2);
tHiDHiD_ee+=(d_Hi_i1j2*d_Hi_j1i2+d_Hi_j1j2*d_Hi_i1i2);
tHiDHiD_ge+=delta*(d_Hi_i1j2*d_Hi_j1i2+d_Hi_j1j2*d_Hi_i1i2);
-
+
if (i2!=j2) {
tHiDHiD_gg+=delta*delta*(d_Hi_i1i2*d_Hi_j1j2+d_Hi_j1i2*d_Hi_i1j2);
tHiDHiD_ee+=(d_Hi_i1i2*d_Hi_j1j2+d_Hi_j1i2*d_Hi_i1j2);
@@ -1486,34 +1487,34 @@ void Calc_traceHiDHiD (const gsl_vector *eval, const gsl_matrix *Hi, const size_
}
}
}
-
+
return;
}
//trace(PD)=trace((Hi-HixQixHi)D)=trace(HiD)-trace(HixQixHiD)
-void Calc_tracePD (const gsl_vector *eval, const gsl_matrix *Qi, const gsl_matrix *Hi, const gsl_matrix *xHiDHix_all_g, const gsl_matrix *xHiDHix_all_e, const size_t i, const size_t j, double &tPD_g, double &tPD_e)
+void Calc_tracePD (const gsl_vector *eval, const gsl_matrix *Qi, const gsl_matrix *Hi, const gsl_matrix *xHiDHix_all_g, const gsl_matrix *xHiDHix_all_e, const size_t i, const size_t j, double &tPD_g, double &tPD_e)
{
size_t dc_size=Qi->size1, d_size=Hi->size1;
size_t v=GetIndex(i, j, d_size);
-
+
double d;
-
+
//calculate the first part: trace(HiD)
Calc_traceHiD (eval, Hi, i, j, tPD_g, tPD_e);
-
+
//calculate the second part: -trace(HixQixHiD)
for (size_t k=0; k<dc_size; k++) {
gsl_vector_const_view Qi_row=gsl_matrix_const_row (Qi, k);
gsl_vector_const_view xHiDHix_g_col=gsl_matrix_const_column (xHiDHix_all_g, v*dc_size+k);
gsl_vector_const_view xHiDHix_e_col=gsl_matrix_const_column (xHiDHix_all_e, v*dc_size+k);
-
+
gsl_blas_ddot(&Qi_row.vector, &xHiDHix_g_col.vector, &d);
tPD_g-=d;
gsl_blas_ddot(&Qi_row.vector, &xHiDHix_e_col.vector, &d);
tPD_e-=d;
}
-
+
return;
}
@@ -1521,14 +1522,14 @@ void Calc_tracePD (const gsl_vector *eval, const gsl_matrix *Qi, const gsl_matri
//trace(PDPD)=trace((Hi-HixQixHi)D(Hi-HixQixHi)D)
//=trace(HiDHiD)-trace(HixQixHiDHiD)-trace(HiDHixQixHiD)+trace(HixQixHiDHixQixHiD)
-void Calc_tracePDPD (const gsl_vector *eval, const gsl_matrix *Qi, const gsl_matrix *Hi, const gsl_matrix *xHi, const gsl_matrix *QixHiDHix_all_g, const gsl_matrix *QixHiDHix_all_e, const gsl_matrix *xHiDHiDHix_all_gg, const gsl_matrix *xHiDHiDHix_all_ee, const gsl_matrix *xHiDHiDHix_all_ge, const size_t i1, const size_t j1, const size_t i2, const size_t j2, double &tPDPD_gg, double &tPDPD_ee, double &tPDPD_ge)
+void Calc_tracePDPD (const gsl_vector *eval, const gsl_matrix *Qi, const gsl_matrix *Hi, const gsl_matrix *xHi, const gsl_matrix *QixHiDHix_all_g, const gsl_matrix *QixHiDHix_all_e, const gsl_matrix *xHiDHiDHix_all_gg, const gsl_matrix *xHiDHiDHix_all_ee, const gsl_matrix *xHiDHiDHix_all_ge, const size_t i1, const size_t j1, const size_t i2, const size_t j2, double &tPDPD_gg, double &tPDPD_ee, double &tPDPD_ge)
{
size_t dc_size=Qi->size1, d_size=Hi->size1;
size_t v_size=d_size*(d_size+1)/2;
size_t v1=GetIndex(i1, j1, d_size), v2=GetIndex(i2, j2, d_size);
-
+
double d;
-
+
//calculate the first part: trace(HiDHiD)
Calc_traceHiDHiD (eval, Hi, i1, j1, i2, j2, tPDPD_gg, tPDPD_ee, tPDPD_ge);
@@ -1549,7 +1550,7 @@ void Calc_tracePDPD (const gsl_vector *eval, const gsl_matrix *Qi, const gsl_mat
gsl_vector_const_view xHiDHiDHix_gg_row=gsl_matrix_const_row (xHiDHiDHix_gg, i);
gsl_vector_const_view xHiDHiDHix_ee_row=gsl_matrix_const_row (xHiDHiDHix_ee, i);
gsl_vector_const_view xHiDHiDHix_ge_row=gsl_matrix_const_row (xHiDHiDHix_ge, i);
-
+
gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_gg_row.vector, &d);
tPDPD_gg-=d;
gsl_blas_ddot(&Qi_row.vector, &xHiDHiDHix_ee_row.vector, &d);
@@ -1560,7 +1561,7 @@ void Calc_tracePDPD (const gsl_vector *eval, const gsl_matrix *Qi, const gsl_mat
}
//calculate the fourth part: trace(HixQixHiDHixQixHiD)
- for (size_t i=0; i<dc_size; i++) {
+ for (size_t i=0; i<dc_size; i++) {
//gsl_vector_const_view QixHiDHix_g_row1=gsl_matrix_const_subrow (QixHiDHix_all_g, i, v1*dc_size, dc_size);
//gsl_vector_const_view QixHiDHix_e_row1=gsl_matrix_const_subrow (QixHiDHix_all_e, i, v1*dc_size, dc_size);
@@ -1578,7 +1579,7 @@ void Calc_tracePDPD (const gsl_vector *eval, const gsl_matrix *Qi, const gsl_mat
tPDPD_ee+=d;
gsl_blas_ddot(&QixHiDHix_g_row1.vector, &QixHiDHix_e_col2.vector, &d);
tPDPD_ge+=d;
- }
+ }
return;
}
@@ -1590,18 +1591,18 @@ void Calc_xHiDHiy_all (const gsl_vector *eval, const gsl_matrix *xHi, const gsl_
{
gsl_matrix_set_zero(xHiDHiy_all_g);
gsl_matrix_set_zero(xHiDHiy_all_e);
-
+
size_t d_size=Hiy->size1;
size_t v;
-
+
for (size_t i=0; i<d_size; i++) {
for (size_t j=0; j<d_size; j++) {
if (j<i) {continue;}
v=GetIndex(i, j, d_size);
-
+
gsl_vector_view xHiDHiy_g=gsl_matrix_column (xHiDHiy_all_g, v);
gsl_vector_view xHiDHiy_e=gsl_matrix_column (xHiDHiy_all_e, v);
-
+
Calc_xHiDHiy (eval, xHi, Hiy, i, j, &xHiDHiy_g.vector, &xHiDHiy_e.vector);
}
}
@@ -1614,18 +1615,18 @@ void Calc_xHiDHix_all (const gsl_vector *eval, const gsl_matrix *xHi, gsl_matrix
{
gsl_matrix_set_zero(xHiDHix_all_g);
gsl_matrix_set_zero(xHiDHix_all_e);
-
+
size_t d_size=xHi->size2/eval->size, dc_size=xHi->size1;
size_t v;
-
+
for (size_t i=0; i<d_size; i++) {
for (size_t j=0; j<d_size; j++) {
if (j<i) {continue;}
v=GetIndex(i, j, d_size);
-
+
gsl_matrix_view xHiDHix_g=gsl_matrix_submatrix (xHiDHix_all_g, 0, v*dc_size, dc_size, dc_size);
gsl_matrix_view xHiDHix_e=gsl_matrix_submatrix (xHiDHix_all_e, 0, v*dc_size, dc_size, dc_size);
-
+
Calc_xHiDHix (eval, xHi, i, j, &xHiDHix_g.matrix, &xHiDHix_e.matrix);
}
}
@@ -1640,24 +1641,24 @@ void Calc_xHiDHiDHiy_all (const size_t v_size, const gsl_vector *eval, const gsl
gsl_matrix_set_zero(xHiDHiDHiy_all_gg);
gsl_matrix_set_zero(xHiDHiDHiy_all_ee);
gsl_matrix_set_zero(xHiDHiDHiy_all_ge);
-
+
size_t d_size=Hiy->size1;
size_t v1, v2;
-
+
for (size_t i1=0; i1<d_size; i1++) {
for (size_t j1=0; j1<d_size; j1++) {
if (j1<i1) {continue;}
v1=GetIndex(i1, j1, d_size);
-
+
for (size_t i2=0; i2<d_size; i2++) {
for (size_t j2=0; j2<d_size; j2++) {
if (j2<i2) {continue;}
v2=GetIndex(i2, j2, d_size);
-
+
gsl_vector_view xHiDHiDHiy_gg=gsl_matrix_column (xHiDHiDHiy_all_gg, v1*v_size+v2);
gsl_vector_view xHiDHiDHiy_ee=gsl_matrix_column (xHiDHiDHiy_all_ee, v1*v_size+v2);
gsl_vector_view xHiDHiDHiy_ge=gsl_matrix_column (xHiDHiDHiy_all_ge, v1*v_size+v2);
-
+
Calc_xHiDHiDHiy (eval, Hi, xHi, Hiy, i1, j1, i2, j2, &xHiDHiDHiy_gg.vector, &xHiDHiDHiy_ee.vector, &xHiDHiDHiy_ge.vector);
}
}
@@ -1673,33 +1674,33 @@ void Calc_xHiDHiDHix_all (const size_t v_size, const gsl_vector *eval, const gsl
gsl_matrix_set_zero(xHiDHiDHix_all_gg);
gsl_matrix_set_zero(xHiDHiDHix_all_ee);
gsl_matrix_set_zero(xHiDHiDHix_all_ge);
-
+
size_t d_size=xHi->size2/eval->size, dc_size=xHi->size1;
- size_t v1, v2;
-
+ size_t v1, v2;
+
for (size_t i1=0; i1<d_size; i1++) {
for (size_t j1=0; j1<d_size; j1++) {
if (j1<i1) {continue;}
v1=GetIndex(i1, j1, d_size);
-
+
for (size_t i2=0; i2<d_size; i2++) {
for (size_t j2=0; j2<d_size; j2++) {
if (j2<i2) {continue;}
v2=GetIndex(i2, j2, d_size);
-
+
if (v2<v1) {continue;}
-
+
gsl_matrix_view xHiDHiDHix_gg1=gsl_matrix_submatrix (xHiDHiDHix_all_gg, 0, (v1*v_size+v2)*dc_size, dc_size, dc_size);
gsl_matrix_view xHiDHiDHix_ee1=gsl_matrix_submatrix (xHiDHiDHix_all_ee, 0, (v1*v_size+v2)*dc_size, dc_size, dc_size);
gsl_matrix_view xHiDHiDHix_ge1=gsl_matrix_submatrix (xHiDHiDHix_all_ge, 0, (v1*v_size+v2)*dc_size, dc_size, dc_size);
-
+
Calc_xHiDHiDHix (eval, Hi, xHi, i1, j1, i2, j2, &xHiDHiDHix_gg1.matrix, &xHiDHiDHix_ee1.matrix, &xHiDHiDHix_ge1.matrix);
-
+
if (v2!=v1) {
gsl_matrix_view xHiDHiDHix_gg2=gsl_matrix_submatrix (xHiDHiDHix_all_gg, 0, (v2*v_size+v1)*dc_size, dc_size, dc_size);
gsl_matrix_view xHiDHiDHix_ee2=gsl_matrix_submatrix (xHiDHiDHix_all_ee, 0, (v2*v_size+v1)*dc_size, dc_size, dc_size);
gsl_matrix_view xHiDHiDHix_ge2=gsl_matrix_submatrix (xHiDHiDHix_all_ge, 0, (v2*v_size+v1)*dc_size, dc_size, dc_size);
-
+
gsl_matrix_memcpy (&xHiDHiDHix_gg2.matrix, &xHiDHiDHix_gg1.matrix);
gsl_matrix_memcpy (&xHiDHiDHix_ee2.matrix, &xHiDHiDHix_ee1.matrix);
gsl_matrix_memcpy (&xHiDHiDHix_ge2.matrix, &xHiDHiDHix_ge1.matrix);
@@ -1708,39 +1709,39 @@ void Calc_xHiDHiDHix_all (const size_t v_size, const gsl_vector *eval, const gsl
}
}
}
-
-
+
+
/*
size_t n_size=eval->size;
double delta, d_Hi_ij;
-
+
gsl_matrix *mat_dcdc=gsl_matrix_alloc (dc_size, dc_size);
gsl_matrix *mat_dcdc_temp=gsl_matrix_alloc (dc_size, dc_size);
-
+
for (size_t k=0; k<n_size; k++) {
delta=gsl_vector_get (eval, k);
-
+
for (size_t i1=0; i1<d_size; i1++) {
- for (size_t j2=0; j2<d_size; j2++) {
+ for (size_t j2=0; j2<d_size; j2++) {
gsl_vector_const_view xHi_col_i=gsl_matrix_const_column (xHi, k*d_size+i1);
gsl_vector_const_view xHi_col_j=gsl_matrix_const_column (xHi, k*d_size+j2);
-
+
gsl_matrix_set_zero (mat_dcdc);
- gsl_blas_dger (1.0, &xHi_col_i.vector, &xHi_col_j.vector, mat_dcdc);
-
+ gsl_blas_dger (1.0, &xHi_col_i.vector, &xHi_col_j.vector, mat_dcdc);
+
for (size_t j1=0; j1<d_size; j1++) {
for (size_t i2=0; i2<d_size; i2++) {
- d_Hi_ij=gsl_matrix_get (Hi, j1, k*d_size+i2);
-
+ d_Hi_ij=gsl_matrix_get (Hi, j1, k*d_size+i2);
+
v1=GetIndex(i1, j1, d_size);
- v2=GetIndex(i2, j2, d_size);
-
+ v2=GetIndex(i2, j2, d_size);
+
gsl_matrix_view xHiDHiDHix_gg=gsl_matrix_submatrix (xHiDHiDHix_all_gg, 0, (v1*v_size+v2)*dc_size, dc_size, dc_size);
gsl_matrix_view xHiDHiDHix_ee=gsl_matrix_submatrix (xHiDHiDHix_all_ee, 0, (v1*v_size+v2)*dc_size, dc_size, dc_size);
gsl_matrix_view xHiDHiDHix_ge=gsl_matrix_submatrix (xHiDHiDHix_all_ge, 0, (v1*v_size+v2)*dc_size, dc_size, dc_size);
-
+
gsl_matrix_memcpy (mat_dcdc_temp, mat_dcdc);
-
+
gsl_matrix_scale (mat_dcdc_temp, d_Hi_ij);
gsl_matrix_add(&xHiDHiDHix_ee.matrix, mat_dcdc_temp);
gsl_matrix_scale(mat_dcdc_temp, delta);
@@ -1752,21 +1753,21 @@ void Calc_xHiDHiDHix_all (const size_t v_size, const gsl_vector *eval, const gsl
}
}
}
-
+
for (size_t i1=0; i1<d_size; i1++) {
for (size_t j1=0; j1<d_size; j1++) {
v1=GetIndex(i1, j1, d_size);
-
+
for (size_t i2=0; i2<d_size; i2++) {
for (size_t j2=0; j2<d_size; j2++) {
v2=GetIndex(i2, j2, d_size);
-
+
if (i1!=j1 && i2!=j2) {continue;}
-
+
gsl_matrix_view xHiDHiDHix_gg=gsl_matrix_submatrix (xHiDHiDHix_all_gg, 0, (v1*v_size+v2)*dc_size, dc_size, dc_size);
gsl_matrix_view xHiDHiDHix_ee=gsl_matrix_submatrix (xHiDHiDHix_all_ee, 0, (v1*v_size+v2)*dc_size, dc_size, dc_size);
gsl_matrix_view xHiDHiDHix_ge=gsl_matrix_submatrix (xHiDHiDHix_all_ge, 0, (v1*v_size+v2)*dc_size, dc_size, dc_size);
-
+
if ( (i1==j1 && i2!=j2) || (i1!=j1 && i2==j2) ) {
gsl_matrix_scale (&xHiDHiDHix_gg.matrix, 0.5);
gsl_matrix_scale (&xHiDHiDHix_ee.matrix, 0.5);
@@ -1780,11 +1781,11 @@ void Calc_xHiDHiDHix_all (const size_t v_size, const gsl_vector *eval, const gsl
}
}
}
-
+
gsl_matrix_free (mat_dcdc);
- gsl_matrix_free (mat_dcdc_temp);
+ gsl_matrix_free (mat_dcdc_temp);
*/
-
+
return;
}
@@ -1795,18 +1796,18 @@ void Calc_xHiDHixQixHiy_all (const gsl_matrix *xHiDHix_all_g, const gsl_matrix *
{
size_t dc_size=xHiDHix_all_g->size1;
size_t v_size=xHiDHix_all_g->size2/dc_size;
-
- for (size_t i=0; i<v_size; i++) {
+
+ for (size_t i=0; i<v_size; i++) {
gsl_matrix_const_view xHiDHix_g=gsl_matrix_const_submatrix (xHiDHix_all_g, 0, i*dc_size, dc_size, dc_size);
gsl_matrix_const_view xHiDHix_e=gsl_matrix_const_submatrix (xHiDHix_all_e, 0, i*dc_size, dc_size, dc_size);
-
+
gsl_vector_view xHiDHixQixHiy_g=gsl_matrix_column (xHiDHixQixHiy_all_g, i);
gsl_vector_view xHiDHixQixHiy_e=gsl_matrix_column (xHiDHixQixHiy_all_e, i);
-
+
gsl_blas_dgemv (CblasNoTrans, 1.0, &xHiDHix_g.matrix, QixHiy, 0.0, &xHiDHixQixHiy_g.vector);
gsl_blas_dgemv (CblasNoTrans, 1.0, &xHiDHix_e.matrix, QixHiy, 0.0, &xHiDHixQixHiy_e.vector);
}
-
+
return;
}
@@ -1816,14 +1817,14 @@ void Calc_QiVec_all (const gsl_matrix *Qi, const gsl_matrix *vec_all_g, const gs
for (size_t i=0; i<vec_all_g->size2; i++) {
gsl_vector_const_view vec_g=gsl_matrix_const_column (vec_all_g, i);
gsl_vector_const_view vec_e=gsl_matrix_const_column (vec_all_e, i);
-
+
gsl_vector_view Qivec_g=gsl_matrix_column (Qivec_all_g, i);
gsl_vector_view Qivec_e=gsl_matrix_column (Qivec_all_e, i);
-
+
gsl_blas_dgemv (CblasNoTrans, 1.0, Qi, &vec_g.vector, 0.0, &Qivec_g.vector);
gsl_blas_dgemv (CblasNoTrans, 1.0, Qi, &vec_e.vector, 0.0, &Qivec_e.vector);
}
-
+
return;
}
@@ -1833,18 +1834,18 @@ void Calc_QiMat_all (const gsl_matrix *Qi, const gsl_matrix *mat_all_g, const gs
{
size_t dc_size=Qi->size1;
size_t v_size=mat_all_g->size2/mat_all_g->size1;
-
+
for (size_t i=0; i<v_size; i++) {
gsl_matrix_const_view mat_g=gsl_matrix_const_submatrix (mat_all_g, 0, i*dc_size, dc_size, dc_size);
gsl_matrix_const_view mat_e=gsl_matrix_const_submatrix (mat_all_e, 0, i*dc_size, dc_size, dc_size);
-
+
gsl_matrix_view Qimat_g=gsl_matrix_submatrix (Qimat_all_g, 0, i*dc_size, dc_size, dc_size);
gsl_matrix_view Qimat_e=gsl_matrix_submatrix (Qimat_all_e, 0, i*dc_size, dc_size, dc_size);
-
+
gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, Qi, &mat_g.matrix, 0.0, &Qimat_g.matrix);
gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, Qi, &mat_e.matrix, 0.0, &Qimat_e.matrix);
}
-
+
return;
}
@@ -1856,28 +1857,28 @@ void Calc_QiMat_all (const gsl_matrix *Qi, const gsl_matrix *mat_all_g, const gs
//-(yHix)Qi(xHiDHiy)-(yHiDHix)Qi(xHiy)
//+(yHix)Qi(xHiDHix)Qi(xtHiy)
void Calc_yPDPy (const gsl_vector *eval, const gsl_matrix *Hiy, const gsl_vector *QixHiy, const gsl_matrix *xHiDHiy_all_g, const gsl_matrix *xHiDHiy_all_e, const gsl_matrix *xHiDHixQixHiy_all_g, const gsl_matrix *xHiDHixQixHiy_all_e, const size_t i, const size_t j, double &yPDPy_g, double &yPDPy_e)
-{
+{
size_t d_size=Hiy->size1;
size_t v=GetIndex(i, j, d_size);
-
- double d;
-
+
+ double d;
+
//first part: ytHiDHiy
Calc_yHiDHiy (eval, Hiy, i, j, yPDPy_g, yPDPy_e);
-
+
//second and third parts: -(yHix)Qi(xHiDHiy)-(yHiDHix)Qi(xHiy)
gsl_vector_const_view xHiDHiy_g=gsl_matrix_const_column (xHiDHiy_all_g, v);
gsl_vector_const_view xHiDHiy_e=gsl_matrix_const_column (xHiDHiy_all_e, v);
-
+
gsl_blas_ddot(QixHiy, &xHiDHiy_g.vector, &d);
yPDPy_g-=d*2.0;
gsl_blas_ddot(QixHiy, &xHiDHiy_e.vector, &d);
- yPDPy_e-=d*2.0;
-
+ yPDPy_e-=d*2.0;
+
//fourth part: +(yHix)Qi(xHiDHix)Qi(xHiy)
gsl_vector_const_view xHiDHixQixHiy_g=gsl_matrix_const_column (xHiDHixQixHiy_all_g, v);
gsl_vector_const_view xHiDHixQixHiy_e=gsl_matrix_const_column (xHiDHixQixHiy_all_e, v);
-
+
gsl_blas_ddot(QixHiy, &xHiDHixQixHiy_g.vector, &d);
yPDPy_g+=d;
gsl_blas_ddot(QixHiy, &xHiDHixQixHiy_e.vector, &d);
@@ -1894,73 +1895,73 @@ void Calc_yPDPy (const gsl_vector *eval, const gsl_matrix *Hiy, const gsl_vector
//+(yHix)Qi(xHiDHiDHix)Qi(xHiy)
//-(yHix)Qi(xHiDHix)Qi(xHiDHix)Qi(xHiy)
void Calc_yPDPDPy (const gsl_vector *eval, const gsl_matrix *Hi, const gsl_matrix *xHi, const gsl_matrix *Hiy, const gsl_vector *QixHiy, const gsl_matrix *xHiDHiy_all_g, const gsl_matrix *xHiDHiy_all_e, const gsl_matrix *QixHiDHiy_all_g, const gsl_matrix *QixHiDHiy_all_e, const gsl_matrix *xHiDHixQixHiy_all_g, const gsl_matrix *xHiDHixQixHiy_all_e, const gsl_matrix *QixHiDHixQixHiy_all_g, const gsl_matrix *QixHiDHixQixHiy_all_e, const gsl_matrix *xHiDHiDHiy_all_gg, const gsl_matrix *xHiDHiDHiy_all_ee, const gsl_matrix *xHiDHiDHiy_all_ge, const gsl_matrix *xHiDHiDHix_all_gg, const gsl_matrix *xHiDHiDHix_all_ee, const gsl_matrix *xHiDHiDHix_all_ge, const size_t i1, const size_t j1, const size_t i2, const size_t j2, double &yPDPDPy_gg, double &yPDPDPy_ee, double &yPDPDPy_ge)
-{
+{
size_t d_size=Hi->size1, dc_size=xHi->size1;
size_t v1=GetIndex(i1, j1, d_size), v2=GetIndex(i2, j2, d_size);
- size_t v_size=d_size*(d_size+1)/2;
-
+ size_t v_size=d_size*(d_size+1)/2;
+
double d;
-
+
gsl_vector *xHiDHiDHixQixHiy=gsl_vector_alloc (dc_size);
-
+
//first part: yHiDHiDHiy
- Calc_yHiDHiDHiy (eval, Hi, Hiy, i1, j1, i2, j2, yPDPDPy_gg, yPDPDPy_ee, yPDPDPy_ge);
-
- //second and third parts: -(yHix)Qi(xHiDHiDHiy)-(yHiDHiDHix)Qi(xHiy)
+ Calc_yHiDHiDHiy (eval, Hi, Hiy, i1, j1, i2, j2, yPDPDPy_gg, yPDPDPy_ee, yPDPDPy_ge);
+
+ //second and third parts: -(yHix)Qi(xHiDHiDHiy)-(yHiDHiDHix)Qi(xHiy)
gsl_vector_const_view xHiDHiDHiy_gg1=gsl_matrix_const_column (xHiDHiDHiy_all_gg, v1*v_size+v2);
gsl_vector_const_view xHiDHiDHiy_ee1=gsl_matrix_const_column (xHiDHiDHiy_all_ee, v1*v_size+v2);
gsl_vector_const_view xHiDHiDHiy_ge1=gsl_matrix_const_column (xHiDHiDHiy_all_ge, v1*v_size+v2);
-
+
gsl_vector_const_view xHiDHiDHiy_gg2=gsl_matrix_const_column (xHiDHiDHiy_all_gg, v2*v_size+v1);
gsl_vector_const_view xHiDHiDHiy_ee2=gsl_matrix_const_column (xHiDHiDHiy_all_ee, v2*v_size+v1);
gsl_vector_const_view xHiDHiDHiy_ge2=gsl_matrix_const_column (xHiDHiDHiy_all_ge, v2*v_size+v1);
-
- gsl_blas_ddot(QixHiy, &xHiDHiDHiy_gg1.vector, &d);
+
+ gsl_blas_ddot(QixHiy, &xHiDHiDHiy_gg1.vector, &d);
yPDPDPy_gg-=d;
- gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ee1.vector, &d);
+ gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ee1.vector, &d);
yPDPDPy_ee-=d;
- gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ge1.vector, &d);
+ gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ge1.vector, &d);
yPDPDPy_ge-=d;
-
- gsl_blas_ddot(QixHiy, &xHiDHiDHiy_gg2.vector, &d);
+
+ gsl_blas_ddot(QixHiy, &xHiDHiDHiy_gg2.vector, &d);
yPDPDPy_gg-=d;
- gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ee2.vector, &d);
+ gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ee2.vector, &d);
yPDPDPy_ee-=d;
- gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ge2.vector, &d);
+ gsl_blas_ddot(QixHiy, &xHiDHiDHiy_ge2.vector, &d);
yPDPDPy_ge-=d;
-
+
//fourth part: -(yHiDHix)Qi(xHiDHiy)
gsl_vector_const_view xHiDHiy_g1=gsl_matrix_const_column (xHiDHiy_all_g, v1);
gsl_vector_const_view xHiDHiy_e1=gsl_matrix_const_column (xHiDHiy_all_e, v1);
gsl_vector_const_view QixHiDHiy_g2=gsl_matrix_const_column (QixHiDHiy_all_g, v2);
gsl_vector_const_view QixHiDHiy_e2=gsl_matrix_const_column (QixHiDHiy_all_e, v2);
-
+
gsl_blas_ddot(&xHiDHiy_g1.vector, &QixHiDHiy_g2.vector, &d);
yPDPDPy_gg-=d;
gsl_blas_ddot(&xHiDHiy_e1.vector, &QixHiDHiy_e2.vector, &d);
yPDPDPy_ee-=d;
gsl_blas_ddot(&xHiDHiy_g1.vector, &QixHiDHiy_e2.vector, &d);
yPDPDPy_ge-=d;
-
+
//fifth and sixth parts: +(yHix)Qi(xHiDHix)Qi(xHiDHiy)+(yHiDHix)Qi(xHiDHix)Qi(xHiy)
gsl_vector_const_view QixHiDHiy_g1=gsl_matrix_const_column (QixHiDHiy_all_g, v1);
gsl_vector_const_view QixHiDHiy_e1=gsl_matrix_const_column (QixHiDHiy_all_e, v1);
-
+
gsl_vector_const_view xHiDHixQixHiy_g1=gsl_matrix_const_column (xHiDHixQixHiy_all_g, v1);
gsl_vector_const_view xHiDHixQixHiy_e1=gsl_matrix_const_column (xHiDHixQixHiy_all_e, v1);
gsl_vector_const_view xHiDHixQixHiy_g2=gsl_matrix_const_column (xHiDHixQixHiy_all_g, v2);
gsl_vector_const_view xHiDHixQixHiy_e2=gsl_matrix_const_column (xHiDHixQixHiy_all_e, v2);
-
+
gsl_blas_ddot(&xHiDHixQixHiy_g1.vector, &QixHiDHiy_g2.vector, &d);
yPDPDPy_gg+=d;
gsl_blas_ddot(&xHiDHixQixHiy_g2.vector, &QixHiDHiy_g1.vector, &d);
yPDPDPy_gg+=d;
-
+
gsl_blas_ddot(&xHiDHixQixHiy_e1.vector, &QixHiDHiy_e2.vector, &d);
yPDPDPy_ee+=d;
gsl_blas_ddot(&xHiDHixQixHiy_e2.vector, &QixHiDHiy_e1.vector, &d);
yPDPDPy_ee+=d;
-
+
gsl_blas_ddot(&xHiDHixQixHiy_g1.vector, &QixHiDHiy_e2.vector, &d);
yPDPDPy_ge+=d;
gsl_blas_ddot(&xHiDHixQixHiy_e2.vector, &QixHiDHiy_g1.vector, &d);
@@ -1970,7 +1971,7 @@ void Calc_yPDPDPy (const gsl_vector *eval, const gsl_matrix *Hi, const gsl_matri
gsl_matrix_const_view xHiDHiDHix_gg=gsl_matrix_const_submatrix (xHiDHiDHix_all_gg, 0, (v1*v_size+v2)*dc_size, dc_size, dc_size);
gsl_matrix_const_view xHiDHiDHix_ee=gsl_matrix_const_submatrix (xHiDHiDHix_all_ee, 0, (v1*v_size+v2)*dc_size, dc_size, dc_size);
gsl_matrix_const_view xHiDHiDHix_ge=gsl_matrix_const_submatrix (xHiDHiDHix_all_ge, 0, (v1*v_size+v2)*dc_size, dc_size, dc_size);
-
+
gsl_blas_dgemv (CblasNoTrans, 1.0, &xHiDHiDHix_gg.matrix, QixHiy, 0.0, xHiDHiDHixQixHiy);
gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d);
yPDPDPy_gg+=d;
@@ -1980,21 +1981,21 @@ void Calc_yPDPDPy (const gsl_vector *eval, const gsl_matrix *Hi, const gsl_matri
gsl_blas_dgemv (CblasNoTrans, 1.0, &xHiDHiDHix_ge.matrix, QixHiy, 0.0, xHiDHiDHixQixHiy);
gsl_blas_ddot(xHiDHiDHixQixHiy, QixHiy, &d);
yPDPDPy_ge+=d;
-
+
//eighth part: -(yHix)Qi(xHiDHix)Qi(xHiDHix)Qi(xHiy)
gsl_vector_const_view QixHiDHixQixHiy_g1=gsl_matrix_const_column (QixHiDHixQixHiy_all_g, v1);
gsl_vector_const_view QixHiDHixQixHiy_e1=gsl_matrix_const_column (QixHiDHixQixHiy_all_e, v1);
-
+
gsl_blas_ddot(&QixHiDHixQixHiy_g1.vector, &xHiDHixQixHiy_g2.vector, &d);
yPDPDPy_gg-=d;
gsl_blas_ddot(&QixHiDHixQixHiy_e1.vector, &xHiDHixQixHiy_e2.vector, &d);
yPDPDPy_ee-=d;
gsl_blas_ddot(&QixHiDHixQixHiy_g1.vector, &xHiDHixQixHiy_e2.vector, &d);
yPDPDPy_ge-=d;
-
- //free memory
- gsl_vector_free(xHiDHiDHixQixHiy);
-
+
+ //free memory
+ gsl_vector_free(xHiDHiDHixQixHiy);
+
return;
}
@@ -2005,62 +2006,62 @@ void Calc_yPDPDPy (const gsl_vector *eval, const gsl_matrix *Hi, const gsl_matri
void CalcCRT (const gsl_matrix *Hessian_inv, const gsl_matrix *Qi, const gsl_matrix *QixHiDHix_all_g, const gsl_matrix *QixHiDHix_all_e, const gsl_matrix *xHiDHiDHix_all_gg, const gsl_matrix *xHiDHiDHix_all_ee, const gsl_matrix *xHiDHiDHix_all_ge, const size_t d_size, double &crt_a, double &crt_b, double &crt_c)
{
crt_a=0.0; crt_b=0.0; crt_c=0.0;
-
+
size_t dc_size=Qi->size1, v_size=Hessian_inv->size1/2;
size_t c_size=dc_size/d_size;
double h_gg, h_ge, h_ee, d, B=0.0, C=0.0, D=0.0;
double trCg1, trCe1, trCg2, trCe2, trB_gg, trB_ge, trB_ee, trCC_gg, trCC_ge, trCC_ee, trD_gg=0.0, trD_ge=0.0, trD_ee=0.0;
-
+
gsl_matrix *QiMQi_g1=gsl_matrix_alloc (dc_size, dc_size);
gsl_matrix *QiMQi_e1=gsl_matrix_alloc (dc_size, dc_size);
gsl_matrix *QiMQi_g2=gsl_matrix_alloc (dc_size, dc_size);
gsl_matrix *QiMQi_e2=gsl_matrix_alloc (dc_size, dc_size);
-
+
gsl_matrix *QiMQisQisi_g1=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *QiMQisQisi_e1=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *QiMQisQisi_g2=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *QiMQisQisi_e2=gsl_matrix_alloc (d_size, d_size);
-
+
gsl_matrix *QiMQiMQi_gg=gsl_matrix_alloc (dc_size, dc_size);
gsl_matrix *QiMQiMQi_ge=gsl_matrix_alloc (dc_size, dc_size);
gsl_matrix *QiMQiMQi_ee=gsl_matrix_alloc (dc_size, dc_size);
-
+
gsl_matrix *QiMMQi_gg=gsl_matrix_alloc (dc_size, dc_size);
gsl_matrix *QiMMQi_ge=gsl_matrix_alloc (dc_size, dc_size);
gsl_matrix *QiMMQi_ee=gsl_matrix_alloc (dc_size, dc_size);
-
- gsl_matrix *Qi_si=gsl_matrix_alloc (d_size, d_size);
-
+
+ gsl_matrix *Qi_si=gsl_matrix_alloc (d_size, d_size);
+
gsl_matrix *M_dd=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *M_dcdc=gsl_matrix_alloc (dc_size, dc_size);
-
+
//invert Qi_sub to Qi_si
gsl_matrix *Qi_sub=gsl_matrix_alloc (d_size, d_size);
-
+
gsl_matrix_const_view Qi_s=gsl_matrix_const_submatrix (Qi, (c_size-1)*d_size, (c_size-1)*d_size, d_size, d_size);
-
+
int sig;
gsl_permutation * pmt=gsl_permutation_alloc (d_size);
-
+
gsl_matrix_memcpy (Qi_sub, &Qi_s.matrix);
LUDecomp (Qi_sub, pmt, &sig);
LUInvert (Qi_sub, pmt, Qi_si);
-
+
gsl_permutation_free(pmt);
gsl_matrix_free(Qi_sub);
-
+
//calculate correctation factors
for (size_t v1=0; v1<v_size; v1++) {
//calculate Qi(xHiDHix)Qi, and subpart of it
gsl_matrix_const_view QiM_g1=gsl_matrix_const_submatrix (QixHiDHix_all_g, 0, v1*dc_size, dc_size, dc_size);
gsl_matrix_const_view QiM_e1=gsl_matrix_const_submatrix (QixHiDHix_all_e, 0, v1*dc_size, dc_size, dc_size);
-
+
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix, Qi, 0.0, QiMQi_g1);
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix, Qi, 0.0, QiMQi_e1);
-
+
gsl_matrix_view QiMQi_g1_s=gsl_matrix_submatrix (QiMQi_g1, (c_size-1)*d_size, (c_size-1)*d_size, d_size, d_size);
gsl_matrix_view QiMQi_e1_s=gsl_matrix_submatrix (QiMQi_e1, (c_size-1)*d_size, (c_size-1)*d_size, d_size, d_size);
-
+
/*
for (size_t i=0; i<d_size; i++) {
for (size_t j=0; j<d_size; j++) {
@@ -2075,7 +2076,7 @@ void CalcCRT (const gsl_matrix *Hessian_inv, const gsl_matrix *Qi, const gsl_mat
for (size_t k=0; k<d_size; k++) {
trCg1-=gsl_matrix_get (QiMQisQisi_g1, k, k);
}
-
+
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_e1_s.matrix, Qi_si, 0.0, QiMQisQisi_e1);
trCe1=0.0;
for (size_t k=0; k<d_size; k++) {
@@ -2083,64 +2084,64 @@ void CalcCRT (const gsl_matrix *Hessian_inv, const gsl_matrix *Qi, const gsl_mat
}
/*
cout<<v1<<endl;
- cout<<"trCg1 = "<<trCg1<<", trCe1 = "<<trCe1<<endl;
+ cout<<"trCg1 = "<<trCg1<<", trCe1 = "<<trCe1<<endl;
*/
for (size_t v2=0; v2<v_size; v2++) {
if (v2<v1) {continue;}
-
+
//calculate Qi(xHiDHix)Qi, and subpart of it
gsl_matrix_const_view QiM_g2=gsl_matrix_const_submatrix (QixHiDHix_all_g, 0, v2*dc_size, dc_size, dc_size);
gsl_matrix_const_view QiM_e2=gsl_matrix_const_submatrix (QixHiDHix_all_e, 0, v2*dc_size, dc_size, dc_size);
-
+
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g2.matrix, Qi, 0.0, QiMQi_g2);
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e2.matrix, Qi, 0.0, QiMQi_e2);
-
+
gsl_matrix_view QiMQi_g2_s=gsl_matrix_submatrix (QiMQi_g2, (c_size-1)*d_size, (c_size-1)*d_size, d_size, d_size);
gsl_matrix_view QiMQi_e2_s=gsl_matrix_submatrix (QiMQi_e2, (c_size-1)*d_size, (c_size-1)*d_size, d_size, d_size);
-
+
//calculate trCg2 and trCe2
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_g2_s.matrix, Qi_si, 0.0, QiMQisQisi_g2);
trCg2=0.0;
for (size_t k=0; k<d_size; k++) {
trCg2-=gsl_matrix_get (QiMQisQisi_g2, k, k);
}
-
+
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQi_e2_s.matrix, Qi_si, 0.0, QiMQisQisi_e2);
trCe2=0.0;
for (size_t k=0; k<d_size; k++) {
trCe2-=gsl_matrix_get (QiMQisQisi_e2, k, k);
}
-
+
//calculate trCC_gg, trCC_ge, trCC_ee
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_g1, QiMQisQisi_g2, 0.0, M_dd);
trCC_gg=0.0;
for (size_t k=0; k<d_size; k++) {
trCC_gg+=gsl_matrix_get (M_dd, k, k);
}
-
+
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_g1, QiMQisQisi_e2, 0.0, M_dd);
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_e1, QiMQisQisi_g2, 1.0, M_dd);
trCC_ge=0.0;
for (size_t k=0; k<d_size; k++) {
trCC_ge+=gsl_matrix_get (M_dd, k, k);
}
-
+
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, QiMQisQisi_e1, QiMQisQisi_e2, 0.0, M_dd);
trCC_ee=0.0;
for (size_t k=0; k<d_size; k++) {
trCC_ee+=gsl_matrix_get (M_dd, k, k);
}
-
- //calculate Qi(xHiDHix)Qi(xHiDHix)Qi, and subpart of it
+
+ //calculate Qi(xHiDHix)Qi(xHiDHix)Qi, and subpart of it
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix, QiMQi_g2, 0.0, QiMQiMQi_gg);
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_g1.matrix, QiMQi_e2, 0.0, QiMQiMQi_ge);
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix, QiMQi_g2, 1.0, QiMQiMQi_ge);
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiM_e1.matrix, QiMQi_e2, 0.0, QiMQiMQi_ee);
-
+
gsl_matrix_view QiMQiMQi_gg_s=gsl_matrix_submatrix (QiMQiMQi_gg, (c_size-1)*d_size, (c_size-1)*d_size, d_size, d_size);
gsl_matrix_view QiMQiMQi_ge_s=gsl_matrix_submatrix (QiMQiMQi_ge, (c_size-1)*d_size, (c_size-1)*d_size, d_size, d_size);
gsl_matrix_view QiMQiMQi_ee_s=gsl_matrix_submatrix (QiMQiMQi_ee, (c_size-1)*d_size, (c_size-1)*d_size, d_size, d_size);
-
+
//and part of trB_gg, trB_ge, trB_ee
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQiMQi_gg_s.matrix, Qi_si, 0.0, M_dd);
trB_gg=0.0;
@@ -2148,37 +2149,37 @@ void CalcCRT (const gsl_matrix *Hessian_inv, const gsl_matrix *Qi, const gsl_mat
d=gsl_matrix_get (M_dd, k, k);
trB_gg-=d;
}
-
+
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQiMQi_ge_s.matrix, Qi_si, 0.0, M_dd);
trB_ge=0.0;
for (size_t k=0; k<d_size; k++) {
d=gsl_matrix_get (M_dd, k, k);
trB_ge-=d;
}
-
+
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMQiMQi_ee_s.matrix, Qi_si, 0.0, M_dd);
trB_ee=0.0;
for (size_t k=0; k<d_size; k++) {
d=gsl_matrix_get (M_dd, k, k);
trB_ee-=d;
}
-
- //calculate Qi(xHiDHiDHix)Qi, and subpart of it
+
+ //calculate Qi(xHiDHiDHix)Qi, and subpart of it
gsl_matrix_const_view MM_gg=gsl_matrix_const_submatrix (xHiDHiDHix_all_gg, 0, (v1*v_size+v2)*dc_size, dc_size, dc_size);
gsl_matrix_const_view MM_ge=gsl_matrix_const_submatrix (xHiDHiDHix_all_ge, 0, (v1*v_size+v2)*dc_size, dc_size, dc_size);
gsl_matrix_const_view MM_ee=gsl_matrix_const_submatrix (xHiDHiDHix_all_ee, 0, (v1*v_size+v2)*dc_size, dc_size, dc_size);
-
+
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &MM_gg.matrix, 0.0, M_dcdc);
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc, Qi, 0.0, QiMMQi_gg);
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &MM_ge.matrix, 0.0, M_dcdc);
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc, Qi, 0.0, QiMMQi_ge);
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, Qi, &MM_ee.matrix, 0.0, M_dcdc);
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, M_dcdc, Qi, 0.0, QiMMQi_ee);
-
+
gsl_matrix_view QiMMQi_gg_s=gsl_matrix_submatrix (QiMMQi_gg, (c_size-1)*d_size, (c_size-1)*d_size, d_size, d_size);
gsl_matrix_view QiMMQi_ge_s=gsl_matrix_submatrix (QiMMQi_ge, (c_size-1)*d_size, (c_size-1)*d_size, d_size, d_size);
gsl_matrix_view QiMMQi_ee_s=gsl_matrix_submatrix (QiMMQi_ee, (c_size-1)*d_size, (c_size-1)*d_size, d_size, d_size);
-
+
//calculate the other part of trB_gg, trB_ge, trB_ee
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, &QiMMQi_gg_s.matrix, Qi_si, 0.0, M_dd);
for (size_t k=0; k<d_size; k++) {
@@ -2192,28 +2193,28 @@ void CalcCRT (const gsl_matrix *Hessian_inv, const gsl_matrix *Qi, const gsl_mat
for (size_t k=0; k<d_size; k++) {
trB_ee+=gsl_matrix_get (M_dd, k, k);
}
-
-
+
+
//calculate trD_gg, trD_ge, trD_ee
trD_gg=2.0*trB_gg;
trD_ge=2.0*trB_ge;
trD_ee=2.0*trB_ee;
-
+
//calculate B, C and D
h_gg=-1.0*gsl_matrix_get (Hessian_inv, v1, v2);
h_ge=-1.0*gsl_matrix_get (Hessian_inv, v1, v2+v_size);
h_ee=-1.0*gsl_matrix_get (Hessian_inv, v1+v_size, v2+v_size);
-
+
B+=h_gg*trB_gg+h_ge*trB_ge+h_ee*trB_ee;
C+=h_gg*(trCC_gg+0.5*trCg1*trCg2)+h_ge*(trCC_ge+0.5*trCg1*trCe2+0.5*trCe1*trCg2)+h_ee*(trCC_ee+0.5*trCe1*trCe2);
D+=h_gg*(trCC_gg+0.5*trD_gg)+h_ge*(trCC_ge+0.5*trD_ge)+h_ee*(trCC_ee+0.5*trD_ee);
-
+
if (v1!=v2) {
B+=h_gg*trB_gg+h_ge*trB_ge+h_ee*trB_ee;
C+=h_gg*(trCC_gg+0.5*trCg1*trCg2)+h_ge*(trCC_ge+0.5*trCg1*trCe2+0.5*trCe1*trCg2)+h_ee*(trCC_ee+0.5*trCe1*trCe2);
D+=h_gg*(trCC_gg+0.5*trD_gg)+h_ge*(trCC_ge+0.5*trD_ge)+h_ee*(trCC_ee+0.5*trD_ee);
}
-
+
/*
cout<<v1<<"\t"<<v2<<endl;
cout<<h_gg<<"\t"<<h_ge<<"\t"<<h_ee<<endl;
@@ -2224,7 +2225,7 @@ void CalcCRT (const gsl_matrix *Hessian_inv, const gsl_matrix *Qi, const gsl_mat
*/
}
}
-
+
//calculate a, b, c from B C D
crt_a=2.0*D-C;
crt_b=2.0*B;
@@ -2238,25 +2239,25 @@ void CalcCRT (const gsl_matrix *Hessian_inv, const gsl_matrix *Qi, const gsl_mat
gsl_matrix_free(QiMQi_e1);
gsl_matrix_free(QiMQi_g2);
gsl_matrix_free(QiMQi_e2);
-
+
gsl_matrix_free(QiMQisQisi_g1);
gsl_matrix_free(QiMQisQisi_e1);
gsl_matrix_free(QiMQisQisi_g2);
gsl_matrix_free(QiMQisQisi_e2);
-
+
gsl_matrix_free(QiMQiMQi_gg);
gsl_matrix_free(QiMQiMQi_ge);
gsl_matrix_free(QiMQiMQi_ee);
-
+
gsl_matrix_free(QiMMQi_gg);
gsl_matrix_free(QiMMQi_ge);
gsl_matrix_free(QiMMQi_ee);
-
+
gsl_matrix_free(Qi_si);
-
+
gsl_matrix_free(M_dd);
gsl_matrix_free(M_dcdc);
-
+
return;
}
@@ -2266,7 +2267,7 @@ void CalcCRT (const gsl_matrix *Hessian_inv, const gsl_matrix *Qi, const gsl_mat
//calculate first-order and second-order derivatives
void CalcDev (const char func_name, const gsl_vector *eval, const gsl_matrix *Qi, const gsl_matrix *Hi, const gsl_matrix *xHi, const gsl_matrix *Hiy, const gsl_vector *QixHiy, gsl_vector *gradient, gsl_matrix *Hessian_inv, double &crt_a, double &crt_b, double &crt_c)
-{
+{
if (func_name!='R' && func_name!='L' && func_name!='r' && func_name!='l') {cout<<"func_name only takes 'R' or 'L': 'R' for log-restricted likelihood, 'L' for log-likelihood."<<endl; return;}
size_t dc_size=Qi->size1, d_size=Hi->size1;
@@ -2276,73 +2277,73 @@ void CalcDev (const char func_name, const gsl_vector *eval, const gsl_matrix *Qi
double dev1_g, dev1_e, dev2_gg, dev2_ee, dev2_ge;
gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
-
+
gsl_matrix *xHiDHiy_all_g=gsl_matrix_alloc (dc_size, v_size);
gsl_matrix *xHiDHiy_all_e=gsl_matrix_alloc (dc_size, v_size);
gsl_matrix *xHiDHix_all_g=gsl_matrix_alloc (dc_size, v_size*dc_size);
- gsl_matrix *xHiDHix_all_e=gsl_matrix_alloc (dc_size, v_size*dc_size);
+ gsl_matrix *xHiDHix_all_e=gsl_matrix_alloc (dc_size, v_size*dc_size);
gsl_matrix *xHiDHixQixHiy_all_g=gsl_matrix_alloc (dc_size, v_size);
gsl_matrix *xHiDHixQixHiy_all_e=gsl_matrix_alloc (dc_size, v_size);
-
+
gsl_matrix *QixHiDHiy_all_g=gsl_matrix_alloc (dc_size, v_size);
gsl_matrix *QixHiDHiy_all_e=gsl_matrix_alloc (dc_size, v_size);
gsl_matrix *QixHiDHix_all_g=gsl_matrix_alloc (dc_size, v_size*dc_size);
- gsl_matrix *QixHiDHix_all_e=gsl_matrix_alloc (dc_size, v_size*dc_size);
+ gsl_matrix *QixHiDHix_all_e=gsl_matrix_alloc (dc_size, v_size*dc_size);
gsl_matrix *QixHiDHixQixHiy_all_g=gsl_matrix_alloc (dc_size, v_size);
gsl_matrix *QixHiDHixQixHiy_all_e=gsl_matrix_alloc (dc_size, v_size);
-
+
gsl_matrix *xHiDHiDHiy_all_gg=gsl_matrix_alloc (dc_size, v_size*v_size);
gsl_matrix *xHiDHiDHiy_all_ee=gsl_matrix_alloc (dc_size, v_size*v_size);
gsl_matrix *xHiDHiDHiy_all_ge=gsl_matrix_alloc (dc_size, v_size*v_size);
gsl_matrix *xHiDHiDHix_all_gg=gsl_matrix_alloc (dc_size, v_size*v_size*dc_size);
gsl_matrix *xHiDHiDHix_all_ee=gsl_matrix_alloc (dc_size, v_size*v_size*dc_size);
gsl_matrix *xHiDHiDHix_all_ge=gsl_matrix_alloc (dc_size, v_size*v_size*dc_size);
-
+
//calculate xHiDHiy_all, xHiDHix_all and xHiDHixQixHiy_all
- Calc_xHiDHiy_all (eval, xHi, Hiy, xHiDHiy_all_g, xHiDHiy_all_e);
+ Calc_xHiDHiy_all (eval, xHi, Hiy, xHiDHiy_all_g, xHiDHiy_all_e);
Calc_xHiDHix_all (eval, xHi, xHiDHix_all_g, xHiDHix_all_e);
Calc_xHiDHixQixHiy_all (xHiDHix_all_g, xHiDHix_all_e, QixHiy, xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e);
-
+
Calc_xHiDHiDHiy_all (v_size, eval, Hi, xHi, Hiy, xHiDHiDHiy_all_gg, xHiDHiDHiy_all_ee, xHiDHiDHiy_all_ge);
Calc_xHiDHiDHix_all (v_size, eval, Hi, xHi, xHiDHiDHix_all_gg, xHiDHiDHix_all_ee, xHiDHiDHix_all_ge);
-
+
//calculate QixHiDHiy_all, QixHiDHix_all and QixHiDHixQixHiy_all
Calc_QiVec_all (Qi, xHiDHiy_all_g, xHiDHiy_all_e, QixHiDHiy_all_g, QixHiDHiy_all_e);
Calc_QiVec_all (Qi, xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e, QixHiDHixQixHiy_all_g, QixHiDHixQixHiy_all_e);
Calc_QiMat_all (Qi, xHiDHix_all_g, xHiDHix_all_e, QixHiDHix_all_g, QixHiDHix_all_e);
-
+
double tHiD_g, tHiD_e, tPD_g, tPD_e, tHiDHiD_gg, tHiDHiD_ee, tHiDHiD_ge, tPDPD_gg, tPDPD_ee, tPDPD_ge;
double yPDPy_g, yPDPy_e, yPDPDPy_gg, yPDPDPy_ee, yPDPDPy_ge;
- //calculate gradient and Hessian for Vg
+ //calculate gradient and Hessian for Vg
for (size_t i1=0; i1<d_size; i1++) {
for (size_t j1=0; j1<d_size; j1++) {
if (j1<i1) {continue;}
v1=GetIndex (i1, j1, d_size);
Calc_yPDPy (eval, Hiy, QixHiy, xHiDHiy_all_g, xHiDHiy_all_e, xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e, i1, j1, yPDPy_g, yPDPy_e);
-
- if (func_name=='R' || func_name=='r') {
- Calc_tracePD (eval, Qi, Hi, xHiDHix_all_g, xHiDHix_all_e, i1, j1, tPD_g, tPD_e);
+
+ if (func_name=='R' || func_name=='r') {
+ Calc_tracePD (eval, Qi, Hi, xHiDHix_all_g, xHiDHix_all_e, i1, j1, tPD_g, tPD_e);
//cout<<i1<<" "<<j1<<" "<<yPDPy_g<<" "<<yPDPy_e<<" "<<tPD_g<<" "<<tPD_e<<endl;
-
+
dev1_g=-0.5*tPD_g+0.5*yPDPy_g;
dev1_e=-0.5*tPD_e+0.5*yPDPy_e;
} else {
Calc_traceHiD (eval, Hi, i1, j1, tHiD_g, tHiD_e);
-
+
dev1_g=-0.5*tHiD_g+0.5*yPDPy_g;
dev1_e=-0.5*tHiD_e+0.5*yPDPy_e;
}
gsl_vector_set (gradient, v1, dev1_g);
gsl_vector_set (gradient, v1+v_size, dev1_e);
-
+
for (size_t i2=0; i2<d_size; i2++) {
for (size_t j2=0; j2<d_size; j2++) {
if (j2<i2) {continue;}
v2=GetIndex (i2, j2, d_size);
-
+
if (v2<v1) {continue;}
Calc_yPDPDPy (eval, Hi, xHi, Hiy, QixHiy, xHiDHiy_all_g, xHiDHiy_all_e, QixHiDHiy_all_g, QixHiDHiy_all_e, xHiDHixQixHiy_all_g, xHiDHixQixHiy_all_e, QixHiDHixQixHiy_all_g, QixHiDHixQixHiy_all_e, xHiDHiDHiy_all_gg, xHiDHiDHiy_all_ee, xHiDHiDHiy_all_ge, xHiDHiDHix_all_gg, xHiDHiDHix_all_ee, xHiDHiDHix_all_ge, i1, j1, i2, j2, yPDPDPy_gg, yPDPDPy_ee, yPDPDPy_ge);
@@ -2351,21 +2352,21 @@ void CalcDev (const char func_name, const gsl_vector *eval, const gsl_matrix *Qi
//AI for reml
if (func_name=='R' || func_name=='r') {
Calc_tracePDPD (eval, Qi, Hi, xHi, QixHiDHix_all_g, QixHiDHix_all_e, xHiDHiDHix_all_gg, xHiDHiDHix_all_ee, xHiDHiDHix_all_ge, i1, j1, i2, j2, tPDPD_gg, tPDPD_ee, tPDPD_ge);
-
- dev2_gg=0.5*tPDPD_gg-yPDPDPy_gg;
- dev2_ee=0.5*tPDPD_ee-yPDPDPy_ee;
- dev2_ge=0.5*tPDPD_ge-yPDPDPy_ge;
+
+ dev2_gg=0.5*tPDPD_gg-yPDPDPy_gg;
+ dev2_ee=0.5*tPDPD_ee-yPDPDPy_ee;
+ dev2_ge=0.5*tPDPD_ge-yPDPDPy_ge;
/*
- dev2_gg=-0.5*yPDPDPy_gg;
- dev2_ee=-0.5*yPDPDPy_ee;
- dev2_ge=-0.5*yPDPDPy_ge;
+ dev2_gg=-0.5*yPDPDPy_gg;
+ dev2_ee=-0.5*yPDPDPy_ee;
+ dev2_ge=-0.5*yPDPDPy_ge;
*/
} else {
Calc_traceHiDHiD (eval, Hi, i1, j1, i2, j2, tHiDHiD_gg, tHiDHiD_ee, tHiDHiD_ge);
-
- dev2_gg=0.5*tHiDHiD_gg-yPDPDPy_gg;
- dev2_ee=0.5*tHiDHiD_ee-yPDPDPy_ee;
- dev2_ge=0.5*tHiDHiD_ge-yPDPDPy_ge;
+
+ dev2_gg=0.5*tHiDHiD_gg-yPDPDPy_gg;
+ dev2_ee=0.5*tHiDHiD_ee-yPDPDPy_ee;
+ dev2_ge=0.5*tHiDHiD_ge-yPDPDPy_ge;
}
//set up Hessian
@@ -2373,7 +2374,7 @@ void CalcDev (const char func_name, const gsl_vector *eval, const gsl_matrix *Qi
gsl_matrix_set (Hessian, v1+v_size, v2+v_size, dev2_ee);
gsl_matrix_set (Hessian, v1, v2+v_size, dev2_ge);
gsl_matrix_set (Hessian, v2+v_size, v1, dev2_ge);
-
+
if (v1!=v2) {
gsl_matrix_set (Hessian, v2, v1, dev2_gg);
gsl_matrix_set (Hessian, v2+v_size, v1+v_size, dev2_ee);
@@ -2384,7 +2385,7 @@ void CalcDev (const char func_name, const gsl_vector *eval, const gsl_matrix *Qi
}
}
}
-
+
/*
cout<<"Hessian: "<<endl;
for (size_t i=0; i<2*v_size; i++) {
@@ -2394,12 +2395,12 @@ void CalcDev (const char func_name, const gsl_vector *eval, const gsl_matrix *Qi
cout<<endl;
}
*/
-
-
+
+
//Invert Hessian
int sig;
gsl_permutation * pmt=gsl_permutation_alloc (v_size*2);
-
+
LUDecomp (Hessian, pmt, &sig);
LUInvert (Hessian, pmt, Hessian_inv);
/*
@@ -2411,38 +2412,38 @@ void CalcDev (const char func_name, const gsl_vector *eval, const gsl_matrix *Qi
cout<<endl;
}
*/
- gsl_permutation_free(pmt);
+ gsl_permutation_free(pmt);
gsl_matrix_free(Hessian);
-
+
//calculate Edgeworth correction factors
//after inverting Hessian
if (c_size>1) {
CalcCRT (Hessian_inv, Qi, QixHiDHix_all_g, QixHiDHix_all_e, xHiDHiDHix_all_gg, xHiDHiDHix_all_ee, xHiDHiDHix_all_ge, d_size, crt_a, crt_b, crt_c);
} else {
- crt_a=0.0; crt_b=0.0; crt_c=0.0;
- }
-
+ crt_a=0.0; crt_b=0.0; crt_c=0.0;
+ }
+
gsl_matrix_free(xHiDHiy_all_g);
gsl_matrix_free(xHiDHiy_all_e);
gsl_matrix_free(xHiDHix_all_g);
- gsl_matrix_free(xHiDHix_all_e);
+ gsl_matrix_free(xHiDHix_all_e);
gsl_matrix_free(xHiDHixQixHiy_all_g);
gsl_matrix_free(xHiDHixQixHiy_all_e);
-
+
gsl_matrix_free(QixHiDHiy_all_g);
gsl_matrix_free(QixHiDHiy_all_e);
gsl_matrix_free(QixHiDHix_all_g);
- gsl_matrix_free(QixHiDHix_all_e);
+ gsl_matrix_free(QixHiDHix_all_e);
gsl_matrix_free(QixHiDHixQixHiy_all_g);
gsl_matrix_free(QixHiDHixQixHiy_all_e);
-
+
gsl_matrix_free(xHiDHiDHiy_all_gg);
gsl_matrix_free(xHiDHiDHiy_all_ee);
gsl_matrix_free(xHiDHiDHiy_all_ge);
gsl_matrix_free(xHiDHiDHix_all_gg);
gsl_matrix_free(xHiDHiDHix_all_ee);
gsl_matrix_free(xHiDHiDHix_all_ge);
-
+
return;
}
@@ -2452,25 +2453,25 @@ void UpdateVgVe (const gsl_matrix *Hessian_inv, const gsl_vector *gradient, cons
{
size_t v_size=gradient->size/2, d_size=V_g->size1;
size_t v;
-
+
gsl_vector *vec_v=gsl_vector_alloc (v_size*2);
-
+
double d;
-
+
//vectorize Vg and Ve
for (size_t i=0; i<d_size; i++) {
for (size_t j=0; j<d_size; j++) {
if (j<i) {continue;}
v=GetIndex(i, j, d_size);
-
+
d=gsl_matrix_get (V_g, i, j);
gsl_vector_set (vec_v, v, d);
-
+
d=gsl_matrix_get (V_e, i, j);
gsl_vector_set (vec_v, v+v_size, d);
}
- }
-
+ }
+
gsl_blas_dgemv (CblasNoTrans, -1.0*step_scale, Hessian_inv, gradient, 1.0, vec_v);
//save Vg and Ve
@@ -2478,19 +2479,19 @@ void UpdateVgVe (const gsl_matrix *Hessian_inv, const gsl_vector *gradient, cons
for (size_t j=0; j<d_size; j++) {
if (j<i) {continue;}
v=GetIndex(i, j, d_size);
-
+
d=gsl_vector_get (vec_v, v);
gsl_matrix_set (V_g, i, j, d);
gsl_matrix_set (V_g, j, i, d);
-
+
d=gsl_vector_get (vec_v, v+v_size);
gsl_matrix_set (V_e, i, j, d);
gsl_matrix_set (V_e, j, i, d);
}
- }
-
+ }
+
gsl_vector_free(vec_v);
-
+
return;
}
@@ -2505,23 +2506,23 @@ double MphNR (const char func_name, const size_t max_iter, const double max_prec
size_t n_size=eval->size, c_size=X->size1, d_size=Y->size1;
size_t dc_size=d_size*c_size;
size_t v_size=d_size*(d_size+1)/2;
-
+
double logdet_H, logdet_Q, yPy, logl_const, logl_old=0.0, logl_new=0.0, step_scale;
int sig;
size_t step_iter, flag_pd;
-
+
gsl_matrix *Vg_save=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *Ve_save=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *V_temp=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *U_temp=gsl_matrix_alloc (d_size, d_size);
gsl_vector *D_temp=gsl_vector_alloc (d_size);
gsl_vector *xHiy=gsl_vector_alloc (dc_size);
- gsl_vector *QixHiy=gsl_vector_alloc (dc_size);
+ gsl_vector *QixHiy=gsl_vector_alloc (dc_size);
gsl_matrix *Qi=gsl_matrix_alloc (dc_size, dc_size);
gsl_matrix *XXt=gsl_matrix_alloc (c_size, c_size);
-
- gsl_vector *gradient=gsl_vector_alloc (v_size*2);
-
+
+ gsl_vector *gradient=gsl_vector_alloc (v_size*2);
+
//calculate |XXt| and (XXt)^{-1}
gsl_blas_dsyrk (CblasUpper, CblasNoTrans, 1.0, X, 0.0, XXt);
for (size_t i=0; i<c_size; ++i) {
@@ -2533,17 +2534,17 @@ double MphNR (const char func_name, const size_t max_iter, const double max_prec
gsl_permutation * pmt=gsl_permutation_alloc (c_size);
LUDecomp (XXt, pmt, &sig);
gsl_permutation_free (pmt);
-// LUInvert (XXt, pmt, XXti);
-
- //calculate the constant for logl
- if (func_name=='R' || func_name=='r') {
+// LUInvert (XXt, pmt, XXti);
+
+ //calculate the constant for logl
+ if (func_name=='R' || func_name=='r') {
logl_const=-0.5*(double)(n_size-c_size)*(double)d_size*log(2.0*M_PI)+0.5*(double)d_size*LULndet (XXt);
} else {
logl_const=-0.5*(double)n_size*(double)d_size*log(2.0*M_PI);
}
//optimization iterations
-
- for (size_t t=0; t<max_iter; t++) {
+
+ for (size_t t=0; t<max_iter; t++) {
gsl_matrix_memcpy (Vg_save, V_g);
gsl_matrix_memcpy (Ve_save, V_e);
@@ -2551,10 +2552,10 @@ double MphNR (const char func_name, const size_t max_iter, const double max_prec
do {
gsl_matrix_memcpy (V_g, Vg_save);
gsl_matrix_memcpy (V_e, Ve_save);
-
+
//update Vg, Ve, and invert Hessian
if (t!=0) {UpdateVgVe (Hessian_inv, gradient, step_scale, V_g, V_e);}
-
+
//check if both Vg and Ve are positive definite
flag_pd=1;
gsl_matrix_memcpy (V_temp, V_e);
@@ -2565,33 +2566,33 @@ double MphNR (const char func_name, const size_t max_iter, const double max_prec
gsl_matrix_memcpy (V_temp, V_g);
EigenDecomp(V_temp, U_temp, D_temp, 0);
for (size_t i=0; i<d_size; i++) {
- if (gsl_vector_get (D_temp, i)<=0) {flag_pd=0;}
+ if (gsl_vector_get (D_temp, i)<=0) {flag_pd=0;}
}
//if flag_pd==1 continue to calculate quantities and logl
- if (flag_pd==1) {
+ if (flag_pd==1) {
CalcHiQi (eval, X, V_g, V_e, Hi_all, Qi, logdet_H, logdet_Q);
Calc_Hiy_all (Y, Hi_all, Hiy_all);
Calc_xHi_all (X, Hi_all, xHi_all);
-
+
//calculate QixHiy and yPy
Calc_xHiy (Y, xHi_all, xHiy);
gsl_blas_dgemv (CblasNoTrans, 1.0, Qi, xHiy, 0.0, QixHiy);
-
+
gsl_blas_ddot (QixHiy, xHiy, &yPy);
yPy=Calc_yHiy (Y, Hiy_all)-yPy;
-
+
//calculate log likelihood/restricted likelihood value
- if (func_name=='R' || func_name=='r') {
+ if (func_name=='R' || func_name=='r') {
logl_new=logl_const-0.5*logdet_H-0.5*logdet_Q-0.5*yPy;
} else {
logl_new=logl_const-0.5*logdet_H-0.5*yPy;
- }
+ }
}
- step_scale/=2.0;
+ step_scale/=2.0;
step_iter++;
-
+
//cout<<t<<"\t"<<step_iter<<"\t"<<logl_old<<"\t"<<logl_new<<"\t"<<flag_pd<<endl;
} while ( (flag_pd==0 || logl_new<logl_old || logl_new-logl_old>10 ) && step_iter<10 && t!=0);
@@ -2602,21 +2603,21 @@ double MphNR (const char func_name, const size_t max_iter, const double max_prec
gsl_matrix_memcpy (V_e, Ve_save);
break;
}
-
+
if (logl_new-logl_old<max_prec) {
break;
}
}
logl_old=logl_new;
-
+
CalcDev (func_name, eval, Qi, Hi_all, xHi_all, Hiy_all, QixHiy, gradient, Hessian_inv, crt_a, crt_b, crt_c);
-
-
+
+
//output estimates in each iteration
/*
cout<<func_name<<" iteration = "<<t<<" log-likelihood = "<<logl_old<<"\t"<<logl_new<<endl;
-
+
cout<<"Vg: "<<endl;
for (size_t i=0; i<d_size; i++) {
for (size_t j=0; j<d_size; j++) {
@@ -2640,24 +2641,24 @@ double MphNR (const char func_name, const size_t max_iter, const double max_prec
}
*/
}
-
+
//mutiply Hessian_inv with -1.0
//now Hessian_inv is the variance matrix
gsl_matrix_scale (Hessian_inv, -1.0);
-
+
gsl_matrix_free(Vg_save);
gsl_matrix_free(Ve_save);
gsl_matrix_free(V_temp);
gsl_matrix_free(U_temp);
gsl_vector_free(D_temp);
gsl_vector_free(xHiy);
- gsl_vector_free(QixHiy);
-
+ gsl_vector_free(QixHiy);
+
gsl_matrix_free(Qi);
gsl_matrix_free(XXt);
-
+
gsl_vector_free(gradient);
-
+
return logl_new;
}
@@ -2671,23 +2672,23 @@ void MphInitial(const size_t em_iter, const double em_prec, const size_t nr_iter
gsl_matrix_set_zero (V_g);
gsl_matrix_set_zero (V_e);
gsl_matrix_set_zero (B);
-
- size_t n_size=eval->size, c_size=X->size1, d_size=Y->size1;
+
+ size_t n_size=eval->size, c_size=X->size1, d_size=Y->size1;
double a, b, c;
double lambda, logl, vg, ve;
-
+
//Initial the diagonal elements of Vg and Ve using univariate LMM and REML estimates
- gsl_matrix *Xt=gsl_matrix_alloc (n_size, c_size);
+ gsl_matrix *Xt=gsl_matrix_alloc (n_size, c_size);
gsl_vector *beta_temp=gsl_vector_alloc(c_size);
gsl_vector *se_beta_temp=gsl_vector_alloc(c_size);
-
- gsl_matrix_transpose_memcpy (Xt, X);
-
+
+ gsl_matrix_transpose_memcpy (Xt, X);
+
for (size_t i=0; i<d_size; i++) {
gsl_vector_const_view Y_row=gsl_matrix_const_row (Y, i);
CalcLambda ('R', eval, Xt, &Y_row.vector, l_min, l_max, n_region, lambda, logl);
CalcLmmVgVeBeta (eval, Xt, &Y_row.vector, lambda, vg, ve, beta_temp, se_beta_temp);
-
+
gsl_matrix_set(V_g, i, i, vg);
gsl_matrix_set(V_e, i, i, ve);
}
@@ -2695,7 +2696,7 @@ void MphInitial(const size_t em_iter, const double em_prec, const size_t nr_iter
gsl_matrix_free (Xt);
gsl_vector_free (beta_temp);
gsl_vector_free (se_beta_temp);
-
+
//if number of phenotypes is above four, then obtain the off diagonal elements with two trait models
if (d_size>4) {
//first obtain good initial values
@@ -2707,48 +2708,48 @@ void MphInitial(const size_t em_iter, const double em_prec, const size_t nr_iter
gsl_matrix *UltVehiY=gsl_matrix_alloc (2, n_size);
gsl_matrix *UltVehiBX=gsl_matrix_alloc (2, n_size);
gsl_matrix *UltVehiU=gsl_matrix_alloc (2, n_size);
- gsl_matrix *UltVehiE=gsl_matrix_alloc (2, n_size);
-
+ gsl_matrix *UltVehiE=gsl_matrix_alloc (2, n_size);
+
//large matrices for NR
gsl_matrix *Hi_all=gsl_matrix_alloc (2, 2*n_size); //each dxd block is H_k^{-1}
gsl_matrix *Hiy_all=gsl_matrix_alloc (2, n_size); //each column is H_k^{-1}y_k
gsl_matrix *xHi_all=gsl_matrix_alloc (2*c_size, 2*n_size); //each dcxdc block is x_k\otimes H_k^{-1}
gsl_matrix *Hessian=gsl_matrix_alloc (6, 6);
-
+
//2 by n matrix of Y
gsl_matrix *Y_sub=gsl_matrix_alloc (2, n_size);
gsl_matrix *Vg_sub=gsl_matrix_alloc (2, 2);
gsl_matrix *Ve_sub=gsl_matrix_alloc (2, 2);
gsl_matrix *B_sub=gsl_matrix_alloc (2, c_size);
-
+
for (size_t i=0; i<d_size; i++) {
gsl_vector_view Y_sub1=gsl_matrix_row (Y_sub, 0);
gsl_vector_const_view Y_1=gsl_matrix_const_row (Y, i);
gsl_vector_memcpy (&Y_sub1.vector, &Y_1.vector);
-
+
for (size_t j=i+1; j<d_size; j++) {
gsl_vector_view Y_sub2=gsl_matrix_row (Y_sub, 1);
gsl_vector_const_view Y_2=gsl_matrix_const_row (Y, j);
gsl_vector_memcpy (&Y_sub2.vector, &Y_2.vector);
-
+
gsl_matrix_set_zero (Vg_sub);
gsl_matrix_set_zero (Ve_sub);
gsl_matrix_set (Vg_sub, 0, 0, gsl_matrix_get (V_g, i, i));
gsl_matrix_set (Ve_sub, 0, 0, gsl_matrix_get (V_e, i, i));
gsl_matrix_set (Vg_sub, 1, 1, gsl_matrix_get (V_g, j, j));
gsl_matrix_set (Ve_sub, 1, 1, gsl_matrix_get (V_e, j, j));
-
- logl=MphEM ('R', em_iter, em_prec, eval, X, Y_sub, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, Vg_sub, Ve_sub, B_sub);
+
+ logl=MphEM ('R', em_iter, em_prec, eval, X, Y_sub, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, Vg_sub, Ve_sub, B_sub);
logl=MphNR ('R', nr_iter, nr_prec, eval, X, Y_sub, Hi_all, xHi_all, Hiy_all, Vg_sub, Ve_sub, Hessian, a, b, c);
-
+
gsl_matrix_set(V_g, i, j, gsl_matrix_get (Vg_sub, 0, 1));
gsl_matrix_set(V_g, j, i, gsl_matrix_get (Vg_sub, 0, 1));
-
+
gsl_matrix_set(V_e, i, j, ve=gsl_matrix_get (Ve_sub, 0, 1));
gsl_matrix_set(V_e, j, i, ve=gsl_matrix_get (Ve_sub, 0, 1));
}
}
-
+
//free matrices
gsl_matrix_free(U_hat);
gsl_matrix_free(E_hat);
@@ -2757,21 +2758,21 @@ void MphInitial(const size_t em_iter, const double em_prec, const size_t nr_iter
gsl_matrix_free(UltVehiY);
gsl_matrix_free(UltVehiBX);
gsl_matrix_free(UltVehiU);
- gsl_matrix_free(UltVehiE);
-
+ gsl_matrix_free(UltVehiE);
+
gsl_matrix_free(Hi_all);
gsl_matrix_free(Hiy_all);
gsl_matrix_free(xHi_all);
gsl_matrix_free(Hessian);
-
+
gsl_matrix_free(Y_sub);
gsl_matrix_free(Vg_sub);
gsl_matrix_free(Ve_sub);
gsl_matrix_free(B_sub);
-
+
/*
//second, maximize a increasingly large matrix
- for (size_t i=1; i<d_size; i++) {
+ for (size_t i=1; i<d_size; i++) {
//large matrices for EM
gsl_matrix *U_hat=gsl_matrix_alloc (i+1, n_size);
gsl_matrix *E_hat=gsl_matrix_alloc (i+1, n_size);
@@ -2780,34 +2781,34 @@ void MphInitial(const size_t em_iter, const double em_prec, const size_t nr_iter
gsl_matrix *UltVehiY=gsl_matrix_alloc (i+1, n_size);
gsl_matrix *UltVehiBX=gsl_matrix_alloc (i+1, n_size);
gsl_matrix *UltVehiU=gsl_matrix_alloc (i+1, n_size);
- gsl_matrix *UltVehiE=gsl_matrix_alloc (i+1, n_size);
-
+ gsl_matrix *UltVehiE=gsl_matrix_alloc (i+1, n_size);
+
//large matrices for NR
gsl_matrix *Hi_all=gsl_matrix_alloc (i+1, (i+1)*n_size); //each dxd block is H_k^{-1}
gsl_matrix *Hiy_all=gsl_matrix_alloc (i+1, n_size); //each column is H_k^{-1}y_k
gsl_matrix *xHi_all=gsl_matrix_alloc ((i+1)*c_size, (i+1)*n_size); //each dcxdc block is x_k\otimes H_k^{-1}
gsl_matrix *Hessian=gsl_matrix_alloc ((i+1)*(i+2), (i+1)*(i+2));
-
+
//(i+1) by n matrix of Y
gsl_matrix *Y_sub=gsl_matrix_alloc (i+1, n_size);
gsl_matrix *Vg_sub=gsl_matrix_alloc (i+1, i+1);
gsl_matrix *Ve_sub=gsl_matrix_alloc (i+1, i+1);
gsl_matrix *B_sub=gsl_matrix_alloc (i+1, c_size);
-
+
gsl_matrix_const_view Y_sub_view=gsl_matrix_const_submatrix (Y, 0, 0, i+1, n_size);
gsl_matrix_view Vg_sub_view=gsl_matrix_submatrix (V_g, 0, 0, i+1, i+1);
gsl_matrix_view Ve_sub_view=gsl_matrix_submatrix (V_e, 0, 0, i+1, i+1);
-
+
gsl_matrix_memcpy (Y_sub, &Y_sub_view.matrix);
gsl_matrix_memcpy (Vg_sub, &Vg_sub_view.matrix);
gsl_matrix_memcpy (Ve_sub, &Ve_sub_view.matrix);
-
- logl=MphEM ('R', em_iter, em_prec, eval, X, Y_sub, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, Vg_sub, Ve_sub, B_sub);
+
+ logl=MphEM ('R', em_iter, em_prec, eval, X, Y_sub, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, Vg_sub, Ve_sub, B_sub);
logl=MphNR ('R', nr_iter, nr_prec, eval, X, Y_sub, Hi_all, xHi_all, Hiy_all, Vg_sub, Ve_sub, Hessian, crt_a, crt_b, crt_c);
-
+
gsl_matrix_memcpy (&Vg_sub_view.matrix, Vg_sub);
gsl_matrix_memcpy (&Ve_sub_view.matrix, Ve_sub);
-
+
//free matrices
gsl_matrix_free(U_hat);
gsl_matrix_free(E_hat);
@@ -2816,13 +2817,13 @@ void MphInitial(const size_t em_iter, const double em_prec, const size_t nr_iter
gsl_matrix_free(UltVehiY);
gsl_matrix_free(UltVehiBX);
gsl_matrix_free(UltVehiU);
- gsl_matrix_free(UltVehiE);
-
+ gsl_matrix_free(UltVehiE);
+
gsl_matrix_free(Hi_all);
gsl_matrix_free(Hiy_all);
gsl_matrix_free(xHi_all);
gsl_matrix_free(Hessian);
-
+
gsl_matrix_free(Y_sub);
gsl_matrix_free(Vg_sub);
gsl_matrix_free(Ve_sub);
@@ -2830,42 +2831,42 @@ void MphInitial(const size_t em_iter, const double em_prec, const size_t nr_iter
}
*/
}
-
+
//calculate B hat using GSL estimate
gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
-
+
gsl_vector *D_l=gsl_vector_alloc (d_size);
gsl_matrix *UltVeh=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *UltVehi=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *Qi=gsl_matrix_alloc (d_size*c_size, d_size*c_size);
gsl_vector *XHiy=gsl_vector_alloc (d_size*c_size);
gsl_vector *beta=gsl_vector_alloc (d_size*c_size);
-
+
gsl_vector_set_zero (XHiy);
-
+
double logdet_Ve, logdet_Q, dl, d, delta, dx, dy;
-
+
//eigen decomposition and calculate log|Ve|
- logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi);
-
+ logdet_Ve=EigenProc (V_g, V_e, D_l, UltVeh, UltVehi);
+
//calculate Qi and log|Q|
- logdet_Q=CalcQi (eval, D_l, X, Qi);
-
+ logdet_Q=CalcQi (eval, D_l, X, Qi);
+
//calculate UltVehiY
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY);
//calculate XHiy
for (size_t i=0; i<d_size; i++) {
dl=gsl_vector_get(D_l, i);
-
- for (size_t j=0; j<c_size; j++) {
+
+ for (size_t j=0; j<c_size; j++) {
d=0.0;
for (size_t k=0; k<n_size; k++) {
delta=gsl_vector_get(eval, k);
dx=gsl_matrix_get(X, j, k);
dy=gsl_matrix_get(UltVehiY, i, k);
-
- //if (delta==0) {continue;}
+
+ //if (delta==0) {continue;}
d+=dy*dx/(delta*dl+1.0);
}
gsl_vector_set(XHiy, j*d_size+i, d);
@@ -2877,20 +2878,20 @@ void MphInitial(const size_t em_iter, const double em_prec, const size_t nr_iter
//multiply beta by UltVeh and save to B
for (size_t i=0; i<c_size; i++) {
gsl_vector_view B_col=gsl_matrix_column (B, i);
- gsl_vector_view beta_sub=gsl_vector_subvector (beta, i*d_size, d_size);
+ gsl_vector_view beta_sub=gsl_vector_subvector (beta, i*d_size, d_size);
gsl_blas_dgemv(CblasTrans, 1.0, UltVeh, &beta_sub.vector, 0.0, &B_col.vector);
}
//free memory
gsl_matrix_free(UltVehiY);
-
+
gsl_vector_free(D_l);
gsl_matrix_free(UltVeh);
gsl_matrix_free(UltVehi);
gsl_matrix_free(Qi);
gsl_vector_free(XHiy);
gsl_vector_free(beta);
-
+
return;
}
@@ -2902,33 +2903,511 @@ double PCRT (const size_t mode, const size_t d_size, const double p_value, const
{
double p_crt=0.0, chisq_crt=0.0, q=(double)d_size;
double chisq=gsl_cdf_chisq_Qinv(p_value, (double)d_size );
-
- if (mode==1) {
+
+ if (mode==1) {
double a=crt_c/(2.0*q*(q+2.0));
- double b=1.0+(crt_a+crt_b)/(2.0*q);
- chisq_crt=(-1.0*b+sqrt(b*b+4.0*a*chisq))/(2.0*a);
+ double b=1.0+(crt_a+crt_b)/(2.0*q);
+ chisq_crt=(-1.0*b+sqrt(b*b+4.0*a*chisq))/(2.0*a);
} else if (mode==2) {
- chisq_crt=chisq/(1.0+crt_a/(2.0*q) );
+ chisq_crt=chisq/(1.0+crt_a/(2.0*q) );
} else {
/*
double a=-1.0*crt_c/(2.0*q*(q+2.0));
- double b=1.0+(crt_a-crt_b)/(2.0*q);
+ double b=1.0+(crt_a-crt_b)/(2.0*q);
chisq_crt=(-1.0*b+sqrt(b*b+4.0*a*chisq))/(2.0*a);
*/
chisq_crt=chisq;
}
-
- p_crt=gsl_cdf_chisq_Q (chisq_crt, (double)d_size );
-
+
+ p_crt=gsl_cdf_chisq_Q (chisq_crt, (double)d_size );
+
//cout<<crt_a<<"\t"<<crt_b<<"\t"<<crt_c<<endl;
//cout<<setprecision(10)<<p_value<<"\t"<<p_crt<<endl;
-
+
return p_crt;
}
+// WJA added
+#include <assert.h>
+void MVLMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY)
+{
+ string file_bgen=file_oxford+".bgen";
+ ifstream infile (file_bgen.c_str(), ios::binary);
+ if (!infile) {cout<<"error reading bgen file:"<<file_bgen<<endl; return;}
+
+
+ clock_t time_start=clock();
+ time_UtX=0; time_opt=0;
+
+ string line;
+
+ // double lambda_mle=0, lambda_remle=0, beta=0, se=0, ;
+ double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0;
+ double crt_a, crt_b, crt_c;
+ int n_miss, c_phen;
+ double geno, x_mean;
+ size_t c=0;
+ // double s=0.0;
+ size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2;
+
+ size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2;
+
+ //large matrices for EM
+ gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
+ gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
+ gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size);
+ gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size);
+ gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
+ gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size);
+ gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
+ gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
+
+ //large matrices for NR
+ gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size); //each dxd block is H_k^{-1}
+ gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size); //each column is H_k^{-1}y_k
+ gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size); //each dcxdc block is x_k\otimes H_k^{-1}
+ gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
+
+ gsl_vector *x=gsl_vector_alloc (n_size);
+ gsl_vector *x_miss=gsl_vector_alloc (n_size);
+
+ gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size);
+ gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size);
+ gsl_matrix *V_g=gsl_matrix_alloc (d_size, d_size);
+ gsl_matrix *V_e=gsl_matrix_alloc (d_size, d_size);
+ gsl_matrix *B=gsl_matrix_alloc (d_size, c_size+1);
+ gsl_vector *beta=gsl_vector_alloc (d_size);
+ gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size);
+
+ //null estimates for initial values
+ gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size);
+ gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size);
+ gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1);
+ gsl_matrix *se_B_null=gsl_matrix_alloc (d_size, c_size);
+
+ gsl_matrix_view X_sub=gsl_matrix_submatrix (X, 0, 0, c_size, n_size);
+ gsl_matrix_view B_sub=gsl_matrix_submatrix (B, 0, 0, d_size, c_size);
+ gsl_matrix_view xHi_all_sub=gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
+
+ gsl_matrix_transpose_memcpy (Y, UtY);
+
+ gsl_matrix_transpose_memcpy (&X_sub.matrix, UtW);
+
+ gsl_vector_view X_row=gsl_matrix_row(X, c_size);
+ gsl_vector_set_zero(&X_row.vector);
+ gsl_vector_view B_col=gsl_matrix_column(B, c_size);
+ gsl_vector_set_zero(&B_col.vector);
+
+ MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix, Y, l_min, l_max, n_region, V_g, V_e, &B_sub.matrix);
+ logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub.matrix);
+ logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix, se_B_null);
+
+ c=0;
+ Vg_remle_null.clear();
+ Ve_remle_null.clear();
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=i; j<d_size; j++) {
+ Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) );
+ Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) );
+ VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) );
+ VVe_remle_null.push_back(gsl_matrix_get (Hessian, c+v_size, c+v_size) );
+ c++;
+ }
+ }
+ beta_remle_null.clear();
+ se_beta_remle_null.clear();
+ for (size_t i=0; i<se_B_null->size1; i++) {
+ for (size_t j=0; j<se_B_null->size2; j++) {
+ beta_remle_null.push_back(gsl_matrix_get(B, i, j) );
+ se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
+ }
+ }
+ logl_remle_H0=logl_H0;
+
+ cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
+ cout.precision(4);
+
+ cout<<"REMLE estimate for Vg in the null model: "<<endl;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_g, i, j)<<"\t";
+ }
+ cout<<endl;
+ }
+ cout<<"se(Vg): "<<endl;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
+ }
+ cout<<endl;
+ }
+ cout<<"REMLE estimate for Ve in the null model: "<<endl;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_e, i, j)<<"\t";
+ }
+ cout<<endl;
+ }
+ cout<<"se(Ve): "<<endl;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
+ }
+ cout<<endl;
+ }
+ cout<<"REMLE likelihood = "<<logl_H0<<endl;
+
+
+ logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub.matrix);
+ logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix, se_B_null);
+
+ c=0;
+ Vg_mle_null.clear();
+ Ve_mle_null.clear();
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=i; j<d_size; j++) {
+ Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) );
+ Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) );
+ VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) );
+ VVe_mle_null.push_back(gsl_matrix_get (Hessian, c+v_size, c+v_size) );
+ c++;
+ }
+ }
+ beta_mle_null.clear();
+ se_beta_mle_null.clear();
+ for (size_t i=0; i<se_B_null->size1; i++) {
+ for (size_t j=0; j<se_B_null->size2; j++) {
+ beta_mle_null.push_back(gsl_matrix_get(B, i, j) );
+ se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j) );
+ }
+ }
+ logl_mle_H0=logl_H0;
+
+ cout<<"MLE estimate for Vg in the null model: "<<endl;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_g, i, j)<<"\t";
+ }
+ cout<<endl;
+ }
+ cout<<"se(Vg): "<<endl;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
+ }
+ cout<<endl;
+ }
+ cout<<"MLE estimate for Ve in the null model: "<<endl;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_e, i, j)<<"\t";
+ }
+ cout<<endl;
+ }
+ cout<<"se(Ve): "<<endl;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
+ }
+ cout<<endl;
+ }
+ cout<<"MLE likelihood = "<<logl_H0<<endl;
+
+
+ vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
+ for (size_t i=0; i<d_size; i++) {
+ v_beta.push_back(0.0);
+ }
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=i; j<d_size; j++) {
+ v_Vg.push_back(0.0);
+ v_Ve.push_back(0.0);
+ v_Vbeta.push_back(0.0);
+ }
+ }
+
+ gsl_matrix_memcpy (V_g_null, V_g);
+ gsl_matrix_memcpy (V_e_null, V_e);
+ gsl_matrix_memcpy (B_null, B);
+
+ // read in header
+ uint32_t bgen_snp_block_offset;
+ uint32_t bgen_header_length;
+ uint32_t bgen_nsamples;
+ uint32_t bgen_nsnps;
+ uint32_t bgen_flags;
+ infile.read(reinterpret_cast<char*>(&bgen_snp_block_offset),4);
+ infile.read(reinterpret_cast<char*>(&bgen_header_length),4);
+ bgen_snp_block_offset-=4;
+ infile.read(reinterpret_cast<char*>(&bgen_nsnps),4);
+ bgen_snp_block_offset-=4;
+ infile.read(reinterpret_cast<char*>(&bgen_nsamples),4);
+ bgen_snp_block_offset-=4;
+ infile.ignore(4+bgen_header_length-20);
+ bgen_snp_block_offset-=4+bgen_header_length-20;
+ infile.read(reinterpret_cast<char*>(&bgen_flags),4);
+ bgen_snp_block_offset-=4;
+ bool CompressedSNPBlocks=bgen_flags&0x1;
+// bool LongIds=bgen_flags&0x4;
+
+ infile.ignore(bgen_snp_block_offset);
+
+ double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB, bgen_geno_prob_non_miss;
+
+ uint32_t bgen_N;
+ uint16_t bgen_LS;
+ uint16_t bgen_LR;
+ uint16_t bgen_LC;
+ uint32_t bgen_SNP_pos;
+ uint32_t bgen_LA;
+ std::string bgen_A_allele;
+ uint32_t bgen_LB;
+ std::string bgen_B_allele;
+ uint32_t bgen_P;
+ size_t unzipped_data_size;
+ string id;
+ string rs;
+ string chr;
+ std::cout<<"Warning: WJA hard coded SNP missingness threshold of 10%"<<std::endl;
+
+
+
+ //start reading genotypes and analyze
+ for (size_t t=0; t<indicator_snp.size(); ++t) {
+
+
+// if (t>1) {break;}
+ if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs ", t, ns_total-1);}
+ // read SNP header
+ id.clear();
+ rs.clear();
+ chr.clear();
+ bgen_A_allele.clear();
+ bgen_B_allele.clear();
+
+ infile.read(reinterpret_cast<char*>(&bgen_N),4);
+ infile.read(reinterpret_cast<char*>(&bgen_LS),2);
+
+ id.resize(bgen_LS);
+ infile.read(&id[0], bgen_LS);
+
+ infile.read(reinterpret_cast<char*>(&bgen_LR),2);
+ rs.resize(bgen_LR);
+ infile.read(&rs[0], bgen_LR);
+
+ infile.read(reinterpret_cast<char*>(&bgen_LC),2);
+ chr.resize(bgen_LC);
+ infile.read(&chr[0], bgen_LC);
+
+ infile.read(reinterpret_cast<char*>(&bgen_SNP_pos),4);
+
+ infile.read(reinterpret_cast<char*>(&bgen_LA),4);
+ bgen_A_allele.resize(bgen_LA);
+ infile.read(&bgen_A_allele[0], bgen_LA);
+
+
+ infile.read(reinterpret_cast<char*>(&bgen_LB),4);
+ bgen_B_allele.resize(bgen_LB);
+ infile.read(&bgen_B_allele[0], bgen_LB);
+
+
+
+
+ uint16_t unzipped_data[3*bgen_N];
+
+ if (indicator_snp[t]==0) {
+ if(CompressedSNPBlocks)
+ infile.read(reinterpret_cast<char*>(&bgen_P),4);
+ else
+ bgen_P=6*bgen_N;
+ infile.ignore(static_cast<size_t>(bgen_P));
+
+ continue;
+ }
+
+
+ if(CompressedSNPBlocks)
+ {
+
+
+ infile.read(reinterpret_cast<char*>(&bgen_P),4);
+ uint8_t zipped_data[bgen_P];
+
+ unzipped_data_size=6*bgen_N;
+
+ infile.read(reinterpret_cast<char*>(zipped_data),bgen_P);
+
+ int result=uncompress(reinterpret_cast<Bytef*>(unzipped_data), reinterpret_cast<uLongf*>(&unzipped_data_size), reinterpret_cast<Bytef*>(zipped_data), static_cast<uLong> (bgen_P));
+ assert(result == Z_OK);
+
+ }
+ else
+ {
+
+ bgen_P=6*bgen_N;
+ infile.read(reinterpret_cast<char*>(unzipped_data),bgen_P);
+ }
+
+ x_mean=0.0; c_phen=0; n_miss=0;
+ gsl_vector_set_zero(x_miss);
+ for (size_t i=0; i<bgen_N; ++i) {
+ if (indicator_idv[i]==0) {continue;}
+
+
+ bgen_geno_prob_AA=static_cast<double>(unzipped_data[i*3])/32768.0;
+ bgen_geno_prob_AB=static_cast<double>(unzipped_data[i*3+1])/32768.0;
+ bgen_geno_prob_BB=static_cast<double>(unzipped_data[i*3+2])/32768.0;
+ // WJA
+ bgen_geno_prob_non_miss=bgen_geno_prob_AA+bgen_geno_prob_AB+bgen_geno_prob_BB;
+ if (bgen_geno_prob_non_miss<0.9) {gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;}
+ else {
+
+ bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
+ bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
+ bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
+
+ geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
+
+ gsl_vector_set(x, c_phen, geno);
+ gsl_vector_set(x_miss, c_phen, 1.0);
+ x_mean+=geno;
+ }
+ c_phen++;
+ }
+
+ x_mean/=static_cast<double>(ni_test-n_miss);
+
+ for (size_t i=0; i<ni_test; ++i) {
+ if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
+ geno=gsl_vector_get(x, i);
+ if (x_mean>1) {
+ gsl_vector_set(x, i, 2-geno);
+ }
+ }
+
+ //calculate statistics
+ time_start=clock();
+ gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row.vector);
+ time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ //initial values
+ gsl_matrix_memcpy (V_g, V_g_null);
+ gsl_matrix_memcpy (V_e, V_e_null);
+ gsl_matrix_memcpy (B, B_null);
+
+ time_start=clock();
+
+ //3 is before 1
+ if (a_mode==3 || a_mode==4) {
+ p_score=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g_null, V_e_null, UltVehiY, beta, Vbeta);
+ if (p_score<p_nr && crt==1) {
+ logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
+ }
+ }
+
+ if (a_mode==2 || a_mode==4) {
+ logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
+ //calculate beta and Vbeta
+ p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
+
+ if (p_lrt<p_nr) {
+ logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ //calculate beta and Vbeta
+ p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
+
+ if (crt==1) {
+ p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
+ }
+ }
+ }
+
+ if (a_mode==1 || a_mode==4) {
+ logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
+ p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
+
+ if (p_wald<p_nr) {
+ logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
+
+ if (crt==1) {
+ p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
+ }
+ }
+ }
+
+ if (x_mean>1) {gsl_vector_scale(beta, -1.0);}
+
+ time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ //store summary data
+ //SUMSTAT SNPs={snpInfo[t].get_chr(), snpInfo[t].get_rs(), snpInfo[t].get_pos(), n_miss, beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+ for (size_t i=0; i<d_size; i++) {
+ v_beta[i]=gsl_vector_get (beta, i);
+ }
+
+ c=0;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=i; j<d_size; j++) {
+ v_Vg[c]=gsl_matrix_get (V_g, i, j);
+ v_Ve[c]=gsl_matrix_get (V_e, i, j);
+ v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
+ c++;
+ }
+ }
-void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY)
+ MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
+ sumStat.push_back(SNPs);
+ }
+ cout<<endl;
+
+
+ infile.close();
+ infile.clear();
+
+ gsl_matrix_free(U_hat);
+ gsl_matrix_free(E_hat);
+ gsl_matrix_free(OmegaU);
+ gsl_matrix_free(OmegaE);
+ gsl_matrix_free(UltVehiY);
+ gsl_matrix_free(UltVehiBX);
+ gsl_matrix_free(UltVehiU);
+ gsl_matrix_free(UltVehiE);
+
+ gsl_matrix_free(Hi_all);
+ gsl_matrix_free(Hiy_all);
+ gsl_matrix_free(xHi_all);
+ gsl_matrix_free(Hessian);
+
+ gsl_vector_free(x);
+ gsl_vector_free(x_miss);
+
+ gsl_matrix_free(Y);
+ gsl_matrix_free(X);
+ gsl_matrix_free(V_g);
+ gsl_matrix_free(V_e);
+ gsl_matrix_free(B);
+ gsl_vector_free(beta);
+ gsl_matrix_free(Vbeta);
+
+ gsl_matrix_free(V_g_null);
+ gsl_matrix_free(V_e_null);
+ gsl_matrix_free(B_null);
+ gsl_matrix_free(se_B_null);
+
+ return;
+}
+
+void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY)
{
igzstream infile (file_geno.c_str(), igzstream::in);
// ifstream infile (file_geno.c_str(), ifstream::in);
@@ -2936,10 +3415,10 @@ void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gs
clock_t time_start=clock();
time_UtX=0; time_opt=0;
-
+
string line;
char *ch_ptr;
-
+
// double lambda_mle=0, lambda_remle=0, beta=0, se=0, ;
double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0;
double crt_a, crt_b, crt_c;
@@ -2947,10 +3426,10 @@ void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gs
double geno, x_mean;
size_t c=0;
// double s=0.0;
- size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2;
+ size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2;
size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2;
-
+
//large matrices for EM
gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
@@ -2959,17 +3438,17 @@ void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gs
gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size);
gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
-
+ gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
+
//large matrices for NR
gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size); //each dxd block is H_k^{-1}
gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size); //each column is H_k^{-1}y_k
gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size); //each dcxdc block is x_k\otimes H_k^{-1}
gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
-
+
gsl_vector *x=gsl_vector_alloc (n_size);
gsl_vector *x_miss=gsl_vector_alloc (n_size);
-
+
gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size);
gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size);
gsl_matrix *V_g=gsl_matrix_alloc (d_size, d_size);
@@ -2977,31 +3456,31 @@ void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gs
gsl_matrix *B=gsl_matrix_alloc (d_size, c_size+1);
gsl_vector *beta=gsl_vector_alloc (d_size);
gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size);
-
+
//null estimates for initial values
gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1);
gsl_matrix *se_B_null=gsl_matrix_alloc (d_size, c_size);
-
- gsl_matrix_view X_sub=gsl_matrix_submatrix (X, 0, 0, c_size, n_size);
+
+ gsl_matrix_view X_sub=gsl_matrix_submatrix (X, 0, 0, c_size, n_size);
gsl_matrix_view B_sub=gsl_matrix_submatrix (B, 0, 0, d_size, c_size);
gsl_matrix_view xHi_all_sub=gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
-
+
gsl_matrix_transpose_memcpy (Y, UtY);
gsl_matrix_transpose_memcpy (&X_sub.matrix, UtW);
-
+
gsl_vector_view X_row=gsl_matrix_row(X, c_size);
gsl_vector_set_zero(&X_row.vector);
gsl_vector_view B_col=gsl_matrix_column(B, c_size);
- gsl_vector_set_zero(&B_col.vector);
+ gsl_vector_set_zero(&B_col.vector);
MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix, Y, l_min, l_max, n_region, V_g, V_e, &B_sub.matrix);
- logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub.matrix);
+ logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub.matrix);
logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix, se_B_null);
-
+
c=0;
Vg_remle_null.clear();
Ve_remle_null.clear();
@@ -3014,7 +3493,7 @@ void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gs
c++;
}
}
- beta_remle_null.clear();
+ beta_remle_null.clear();
se_beta_remle_null.clear();
for (size_t i=0; i<se_B_null->size1; i++) {
for (size_t j=0; j<se_B_null->size2; j++) {
@@ -3023,10 +3502,10 @@ void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gs
}
}
logl_remle_H0=logl_H0;
-
+
cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
cout.precision(4);
-
+
cout<<"REMLE estimate for Vg in the null model: "<<endl;
for (size_t i=0; i<d_size; i++) {
for (size_t j=0; j<=i; j++) {
@@ -3034,13 +3513,13 @@ void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gs
}
cout<<endl;
}
- cout<<"se(Vg): "<<endl;
+ cout<<"se(Vg): "<<endl;
for (size_t i=0; i<d_size; i++) {
for (size_t j=0; j<=i; j++) {
c=GetIndex(i, j, d_size);
cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
}
- cout<<endl;
+ cout<<endl;
}
cout<<"REMLE estimate for Ve in the null model: "<<endl;
for (size_t i=0; i<d_size; i++) {
@@ -3049,21 +3528,21 @@ void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gs
}
cout<<endl;
}
- cout<<"se(Ve): "<<endl;
+ cout<<"se(Ve): "<<endl;
for (size_t i=0; i<d_size; i++) {
for (size_t j=0; j<=i; j++) {
c=GetIndex(i, j, d_size);
cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
}
- cout<<endl;
+ cout<<endl;
}
cout<<"REMLE likelihood = "<<logl_H0<<endl;
-
-
+
+
logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub.matrix);
logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix, se_B_null);
-
+
c=0;
Vg_mle_null.clear();
Ve_mle_null.clear();
@@ -3076,7 +3555,7 @@ void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gs
c++;
}
}
- beta_mle_null.clear();
+ beta_mle_null.clear();
se_beta_mle_null.clear();
for (size_t i=0; i<se_B_null->size1; i++) {
for (size_t j=0; j<se_B_null->size2; j++) {
@@ -3085,7 +3564,7 @@ void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gs
}
}
logl_mle_H0=logl_H0;
-
+
cout<<"MLE estimate for Vg in the null model: "<<endl;
for (size_t i=0; i<d_size; i++) {
for (size_t j=0; j<=i; j++) {
@@ -3093,13 +3572,13 @@ void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gs
}
cout<<endl;
}
- cout<<"se(Vg): "<<endl;
+ cout<<"se(Vg): "<<endl;
for (size_t i=0; i<d_size; i++) {
for (size_t j=0; j<=i; j++) {
c=GetIndex(i, j, d_size);
cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
}
- cout<<endl;
+ cout<<endl;
}
cout<<"MLE estimate for Ve in the null model: "<<endl;
for (size_t i=0; i<d_size; i++) {
@@ -3108,17 +3587,17 @@ void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gs
}
cout<<endl;
}
- cout<<"se(Ve): "<<endl;
+ cout<<"se(Ve): "<<endl;
for (size_t i=0; i<d_size; i++) {
for (size_t j=0; j<=i; j++) {
c=GetIndex(i, j, d_size);
cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
}
- cout<<endl;
+ cout<<endl;
}
cout<<"MLE likelihood = "<<logl_H0<<endl;
-
+
vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
for (size_t i=0; i<d_size; i++) {
v_beta.push_back(0.0);
@@ -3130,41 +3609,41 @@ void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gs
v_Vbeta.push_back(0.0);
}
}
-
+
gsl_matrix_memcpy (V_g_null, V_g);
gsl_matrix_memcpy (V_e_null, V_e);
gsl_matrix_memcpy (B_null, B);
-
- //start reading genotypes and analyze
+
+ //start reading genotypes and analyze
for (size_t t=0; t<indicator_snp.size(); ++t) {
//if (t>=1) {break;}
!safeGetline(infile, line).eof();
if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs ", t, ns_total-1);}
if (indicator_snp[t]==0) {continue;}
-
+
ch_ptr=strtok ((char *)line.c_str(), " , \t");
ch_ptr=strtok (NULL, " , \t");
- ch_ptr=strtok (NULL, " , \t");
+ ch_ptr=strtok (NULL, " , \t");
x_mean=0.0; c_phen=0; n_miss=0;
gsl_vector_set_zero(x_miss);
for (size_t i=0; i<ni_total; ++i) {
ch_ptr=strtok (NULL, " , \t");
if (indicator_idv[i]==0) {continue;}
-
+
if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;}
else {
- geno=atof(ch_ptr);
-
- gsl_vector_set(x, c_phen, geno);
- gsl_vector_set(x_miss, c_phen, 1.0);
+ geno=atof(ch_ptr);
+
+ gsl_vector_set(x, c_phen, geno);
+ gsl_vector_set(x_miss, c_phen, 1.0);
x_mean+=geno;
}
c_phen++;
}
x_mean/=(double)(ni_test-n_miss);
-
+
for (size_t i=0; i<ni_test; ++i) {
if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
geno=gsl_vector_get(x, i);
@@ -3174,68 +3653,68 @@ void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gs
}
//calculate statistics
- time_start=clock();
+ time_start=clock();
gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row.vector);
time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
+
//initial values
gsl_matrix_memcpy (V_g, V_g_null);
gsl_matrix_memcpy (V_e, V_e_null);
gsl_matrix_memcpy (B, B_null);
-
+
time_start=clock();
-
+
//3 is before 1
- if (a_mode==3 || a_mode==4) {
+ if (a_mode==3 || a_mode==4) {
p_score=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g_null, V_e_null, UltVehiY, beta, Vbeta);
if (p_score<p_nr && crt==1) {
logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
}
- }
+ }
if (a_mode==2 || a_mode==4) {
logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
//calculate beta and Vbeta
p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
-
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
+
if (p_lrt<p_nr) {
logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
//calculate beta and Vbeta
p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
-
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
+
if (crt==1) {
p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
}
- }
- }
+ }
+ }
if (a_mode==1 || a_mode==4) {
logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
-
+
if (p_wald<p_nr) {
logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
-
+
if (crt==1) {
p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
}
- }
- }
+ }
+ }
if (x_mean>1) {gsl_vector_scale(beta, -1.0);}
-
+
time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
+
//store summary data
//SUMSTAT SNPs={snpInfo[t].get_chr(), snpInfo[t].get_rs(), snpInfo[t].get_pos(), n_miss, beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
for (size_t i=0; i<d_size; i++) {
- v_beta[i]=gsl_vector_get (beta, i);
+ v_beta[i]=gsl_vector_get (beta, i);
}
-
+
c=0;
for (size_t i=0; i<d_size; i++) {
for (size_t j=i; j<d_size; j++) {
@@ -3245,16 +3724,16 @@ void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gs
c++;
}
}
-
+
MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
sumStat.push_back(SNPs);
- }
+ }
cout<<endl;
-
-
+
+
infile.close();
infile.clear();
-
+
gsl_matrix_free(U_hat);
gsl_matrix_free(E_hat);
gsl_matrix_free(OmegaU);
@@ -3263,28 +3742,28 @@ void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gs
gsl_matrix_free(UltVehiBX);
gsl_matrix_free(UltVehiU);
gsl_matrix_free(UltVehiE);
-
+
gsl_matrix_free(Hi_all);
gsl_matrix_free(Hiy_all);
gsl_matrix_free(xHi_all);
gsl_matrix_free(Hessian);
-
+
gsl_vector_free(x);
gsl_vector_free(x_miss);
-
+
gsl_matrix_free(Y);
- gsl_matrix_free(X);
+ gsl_matrix_free(X);
gsl_matrix_free(V_g);
gsl_matrix_free(V_e);
gsl_matrix_free(B);
gsl_vector_free(beta);
gsl_matrix_free(Vbeta);
-
+
gsl_matrix_free(V_g_null);
gsl_matrix_free(V_e_null);
- gsl_matrix_free(B_null);
+ gsl_matrix_free(B_null);
gsl_matrix_free(se_B_null);
-
+
return;
}
@@ -3294,18 +3773,18 @@ void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gs
-void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY)
+void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY)
{
string file_bed=file_bfile+".bed";
ifstream infile (file_bed.c_str(), ios::binary);
if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;}
-
+
clock_t time_start=clock();
time_UtX=0; time_opt=0;
-
+
char ch[1];
bitset<8> b;
-
+
// double lambda_mle=0, lambda_remle=0, beta=0, se=0, ;
double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0;
double crt_a, crt_b, crt_c;
@@ -3313,9 +3792,9 @@ void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl
double geno, x_mean;
size_t c=0;
// double s=0.0;
- size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2;
+ size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2;
size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2;
-
+
//large matrices for EM
gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
@@ -3324,50 +3803,50 @@ void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl
gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size);
gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
-
+ gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
+
//large matrices for NR
gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size); //each dxd block is H_k^{-1}
gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size); //each column is H_k^{-1}y_k
gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size); //each dcxdc block is x_k\otimes H_k^{-1}
gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
-
+
gsl_vector *x=gsl_vector_alloc (n_size);
-
+
gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size);
+ gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size);
gsl_matrix *V_g=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *V_e=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *B=gsl_matrix_alloc (d_size, c_size+1);
gsl_vector *beta=gsl_vector_alloc (d_size);
gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size);
-
+
//null estimates for initial values
gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size);
gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size);
- gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1);
+ gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1);
gsl_matrix *se_B_null=gsl_matrix_alloc (d_size, c_size);
-
- gsl_matrix_view X_sub=gsl_matrix_submatrix (X, 0, 0, c_size, n_size);
+
+ gsl_matrix_view X_sub=gsl_matrix_submatrix (X, 0, 0, c_size, n_size);
gsl_matrix_view B_sub=gsl_matrix_submatrix (B, 0, 0, d_size, c_size);
gsl_matrix_view xHi_all_sub=gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
-
+
gsl_matrix_transpose_memcpy (Y, UtY);
gsl_matrix_transpose_memcpy (&X_sub.matrix, UtW);
-
+
gsl_vector_view X_row=gsl_matrix_row(X, c_size);
gsl_vector_set_zero(&X_row.vector);
gsl_vector_view B_col=gsl_matrix_column(B, c_size);
- gsl_vector_set_zero(&B_col.vector);
-
- //time_start=clock();
+ gsl_vector_set_zero(&B_col.vector);
+
+ //time_start=clock();
MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix, Y, l_min, l_max, n_region, V_g, V_e, &B_sub.matrix);
-
+
logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub.matrix);
logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix, se_B_null);
//cout<<"time for REML in the null = "<<(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0)<<endl;
-
+
c=0;
Vg_remle_null.clear();
Ve_remle_null.clear();
@@ -3380,7 +3859,7 @@ void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl
c++;
}
}
- beta_remle_null.clear();
+ beta_remle_null.clear();
se_beta_remle_null.clear();
for (size_t i=0; i<se_B_null->size1; i++) {
for (size_t j=0; j<se_B_null->size2; j++) {
@@ -3389,7 +3868,7 @@ void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl
}
}
logl_remle_H0=logl_H0;
-
+
cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
cout.precision(4);
cout<<"REMLE estimate for Vg in the null model: "<<endl;
@@ -3399,13 +3878,13 @@ void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl
}
cout<<endl;
}
- cout<<"se(Vg): "<<endl;
+ cout<<"se(Vg): "<<endl;
for (size_t i=0; i<d_size; i++) {
for (size_t j=0; j<=i; j++) {
c=GetIndex(i, j, d_size);
cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
}
- cout<<endl;
+ cout<<endl;
}
cout<<"REMLE estimate for Ve in the null model: "<<endl;
for (size_t i=0; i<d_size; i++) {
@@ -3414,22 +3893,22 @@ void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl
}
cout<<endl;
}
- cout<<"se(Ve): "<<endl;
+ cout<<"se(Ve): "<<endl;
for (size_t i=0; i<d_size; i++) {
for (size_t j=0; j<=i; j++) {
c=GetIndex(i, j, d_size);
cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
}
- cout<<endl;
+ cout<<endl;
}
cout<<"REMLE likelihood = "<<logl_H0<<endl;
-
- //time_start=clock();
+
+ //time_start=clock();
logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub.matrix);
logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all, &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
MphCalcBeta (eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix, se_B_null);
//cout<<"time for MLE in the null = "<<(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0)<<endl;
-
+
c=0;
Vg_mle_null.clear();
Ve_mle_null.clear();
@@ -3442,7 +3921,7 @@ void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl
c++;
}
}
- beta_mle_null.clear();
+ beta_mle_null.clear();
se_beta_mle_null.clear();
for (size_t i=0; i<se_B_null->size1; i++) {
for (size_t j=0; j<se_B_null->size2; j++) {
@@ -3451,7 +3930,7 @@ void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl
}
}
logl_mle_H0=logl_H0;
-
+
cout<<"MLE estimate for Vg in the null model: "<<endl;
for (size_t i=0; i<d_size; i++) {
for (size_t j=0; j<=i; j++) {
@@ -3459,13 +3938,13 @@ void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl
}
cout<<endl;
}
- cout<<"se(Vg): "<<endl;
+ cout<<"se(Vg): "<<endl;
for (size_t i=0; i<d_size; i++) {
for (size_t j=0; j<=i; j++) {
c=GetIndex(i, j, d_size);
cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
}
- cout<<endl;
+ cout<<endl;
}
cout<<"MLE estimate for Ve in the null model: "<<endl;
for (size_t i=0; i<d_size; i++) {
@@ -3474,16 +3953,16 @@ void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl
}
cout<<endl;
}
- cout<<"se(Ve): "<<endl;
+ cout<<"se(Ve): "<<endl;
for (size_t i=0; i<d_size; i++) {
for (size_t j=0; j<=i; j++) {
c=GetIndex(i, j, d_size);
cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
}
- cout<<endl;
+ cout<<endl;
}
cout<<"MLE likelihood = "<<logl_H0<<endl;
-
+
vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
for (size_t i=0; i<d_size; i++) {
v_beta.push_back(0.0);
@@ -3495,143 +3974,143 @@ void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl
v_Vbeta.push_back(0.0);
}
}
-
+
gsl_matrix_memcpy (V_g_null, V_g);
gsl_matrix_memcpy (V_e_null, V_e);
- gsl_matrix_memcpy (B_null, B);
-
-
- //start reading genotypes and analyze
-
+ gsl_matrix_memcpy (B_null, B);
+
+
+ //start reading genotypes and analyze
+
//calculate n_bit and c, the number of bit for each snp
if (ni_total%4==0) {n_bit=ni_total/4;}
else {n_bit=ni_total/4+1; }
-
+
//print the first three majic numbers
for (int i=0; i<3; ++i) {
infile.read(ch,1);
b=ch[0];
}
-
+
for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
if (t%d_pace==0 || t==snpInfo.size()-1) {ProgressBar ("Reading SNPs ", t, snpInfo.size()-1);}
if (indicator_snp[t]==0) {continue;}
-
+
//if (t>=0) {break;}
//if (snpInfo[t].rs_number!="MAG18140902") {continue;}
//cout<<t<<endl;
-
+
infile.seekg(t*n_bit+3); //n_bit, and 3 is the number of magic numbers
-
+
//read genotypes
- x_mean=0.0; n_miss=0; ci_total=0; ci_test=0;
+ x_mean=0.0; n_miss=0; ci_total=0; ci_test=0;
for (int i=0; i<n_bit; ++i) {
infile.read(ch,1);
b=ch[0];
for (size_t j=0; j<4; ++j) { //minor allele homozygous: 2.0; major: 0.0;
if ((i==(n_bit-1)) && ci_total==(int)ni_total) {break;}
if (indicator_idv[ci_total]==0) {ci_total++; continue;}
-
+
if (b[2*j]==0) {
if (b[2*j+1]==0) {gsl_vector_set(x, ci_test, 2); x_mean+=2.0; }
else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
}
else {
- if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
+ if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
else {gsl_vector_set(x, ci_test, -9); n_miss++; }
}
-
+
ci_total++;
ci_test++;
}
}
-
+
x_mean/=(double)(ni_test-n_miss);
-
- for (size_t i=0; i<ni_test; ++i) {
+
+ for (size_t i=0; i<ni_test; ++i) {
geno=gsl_vector_get(x,i);
if (geno==-9) {gsl_vector_set(x, i, x_mean); geno=x_mean;}
if (x_mean>1) {
gsl_vector_set(x, i, 2-geno);
}
- }
-
+ }
+
/*
- if (t==0) {
+ if (t==0) {
ofstream outfile ("./snp1.txt", ofstream::out);
if (!outfile) {cout<<"error writing file: "<<endl; return;}
for (size_t i=0; i<x->size; i++) {
outfile<<gsl_vector_get(x, i)<<endl;
}
outfile.clear();
- outfile.close();
+ outfile.close();
}
*/
-
+
//calculate statistics
- time_start=clock();
+ time_start=clock();
gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row.vector);
time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
+
//initial values
gsl_matrix_memcpy (V_g, V_g_null);
gsl_matrix_memcpy (V_e, V_e_null);
gsl_matrix_memcpy (B, B_null);
-
+
time_start=clock();
-
+
//3 is before 1
if (a_mode==3 || a_mode==4) {
p_score=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g_null, V_e_null, UltVehiY, beta, Vbeta);
-
+
if (p_score<p_nr && crt==1) {
logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
}
- }
-
+ }
+
if (a_mode==2 || a_mode==4) {
logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
//calculate beta and Vbeta
p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
-
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
+
if (p_lrt<p_nr) {
logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
//calculate beta and Vbeta
p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
- p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
if (crt==1) {
p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
- }
+ }
}
- }
-
+ }
+
if (a_mode==1 || a_mode==4) {
logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
-
+
if (p_wald<p_nr) {
logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
-
+
if (crt==1) {
p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
}
}
}
-
+
//cout<<setprecision(10)<<p_wald<<"\t"<<p_lrt<<"\t"<<p_score<<endl;
-
+
if (x_mean>1) {gsl_vector_scale(beta, -1.0);}
-
+
time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
+
//store summary data
//SUMSTAT SNPs={snpInfo[t].get_chr(), snpInfo[t].get_rs(), snpInfo[t].get_pos(), n_miss, beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
for (size_t i=0; i<d_size; i++) {
- v_beta[i]=gsl_vector_get (beta, i);
+ v_beta[i]=gsl_vector_get (beta, i);
}
c=0;
@@ -3643,17 +4122,17 @@ void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl
c++;
}
}
-
+
MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
sumStat.push_back(SNPs);
- }
- cout<<endl;
-
+ }
+ cout<<endl;
+
//cout<<"time_opt = "<<time_opt<<endl;
-
+
infile.close();
infile.clear();
-
+
gsl_matrix_free(U_hat);
gsl_matrix_free(E_hat);
gsl_matrix_free(OmegaU);
@@ -3662,27 +4141,27 @@ void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl
gsl_matrix_free(UltVehiBX);
gsl_matrix_free(UltVehiU);
gsl_matrix_free(UltVehiE);
-
+
gsl_matrix_free(Hi_all);
gsl_matrix_free(Hiy_all);
gsl_matrix_free(xHi_all);
gsl_matrix_free(Hessian);
-
+
gsl_vector_free(x);
-
+
gsl_matrix_free(Y);
- gsl_matrix_free(X);
+ gsl_matrix_free(X);
gsl_matrix_free(V_g);
gsl_matrix_free(V_e);
gsl_matrix_free(B);
gsl_vector_free(beta);
gsl_matrix_free(Vbeta);
-
+
gsl_matrix_free(V_g_null);
gsl_matrix_free(V_e_null);
gsl_matrix_free(B_null);
gsl_matrix_free(se_B_null);
-
+
return;
}
@@ -3693,11 +4172,11 @@ void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl
//both B and se_B are d by c matrices
void CalcMvLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY, const size_t em_iter, const size_t nr_iter, const double em_prec, const double nr_prec, const double l_min, const double l_max, const size_t n_region, gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *B, gsl_matrix *se_B)
{
- size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2;
+ size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2;
size_t dc_size=d_size*c_size, v_size=d_size*(d_size+1)/2;
double logl, crt_a, crt_b, crt_c;
-
+
//large matrices for EM
gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
@@ -3706,22 +4185,22 @@ void CalcMvLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl
gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size);
gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
- gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
-
+ gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
+
//large matrices for NR
gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size); //each dxd block is H_k^{-1}
gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size); //each column is H_k^{-1}y_k
gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size); //each dcxdc block is x_k\otimes H_k^{-1}
gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
-
+
//transpose matrices
gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size);
gsl_matrix *W=gsl_matrix_alloc (c_size, n_size);
gsl_matrix_transpose_memcpy (Y, UtY);
gsl_matrix_transpose_memcpy (W, UtW);
-
+
//initial, EM, NR, and calculate B
- MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, W, Y, l_min, l_max, n_region, V_g, V_e, B);
+ MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, W, Y, l_min, l_max, n_region, V_g, V_e, B);
logl=MphEM ('R', em_iter, em_prec, eval, W, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
logl=MphNR ('R', nr_iter, nr_prec, eval, W, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
MphCalcBeta (eval, W, Y, V_g, V_e, UltVehiY, B, se_B);
@@ -3735,15 +4214,837 @@ void CalcMvLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl
gsl_matrix_free(UltVehiBX);
gsl_matrix_free(UltVehiU);
gsl_matrix_free(UltVehiE);
-
+
gsl_matrix_free(Hi_all);
gsl_matrix_free(Hiy_all);
gsl_matrix_free(xHi_all);
gsl_matrix_free(Hessian);
-
+
gsl_matrix_free(Y);
gsl_matrix_free(W);
-
+
return;
}
+
+
+
+
+void MVLMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY, const gsl_vector *env)
+{
+ igzstream infile (file_geno.c_str(), igzstream::in);
+// ifstream infile (file_geno.c_str(), ifstream::in);
+ if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return;}
+
+ clock_t time_start=clock();
+ time_UtX=0; time_opt=0;
+
+ string line;
+ char *ch_ptr;
+
+ // double lambda_mle=0, lambda_remle=0, beta=0, se=0, ;
+ double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0;
+ double crt_a, crt_b, crt_c;
+ int n_miss, c_phen;
+ double geno, x_mean;
+ size_t c=0;
+ // double s=0.0;
+ size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2+2;
+ size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2;
+
+ //large matrices for EM
+ gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
+ gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
+ gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size);
+ gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size);
+ gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
+ gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size);
+ gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
+ gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
+
+ //large matrices for NR
+ gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size); //each dxd block is H_k^{-1}
+ gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size); //each column is H_k^{-1}y_k
+ gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size); //each dcxdc block is x_k\otimes H_k^{-1}
+ gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
+
+ gsl_vector *x=gsl_vector_alloc (n_size);
+ gsl_vector *x_miss=gsl_vector_alloc (n_size);
+
+ gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size);
+ gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size);
+ gsl_matrix *V_g=gsl_matrix_alloc (d_size, d_size);
+ gsl_matrix *V_e=gsl_matrix_alloc (d_size, d_size);
+ gsl_matrix *B=gsl_matrix_alloc (d_size, c_size+1);
+ gsl_vector *beta=gsl_vector_alloc (d_size);
+ gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size);
+
+ //null estimates for initial values; including env but not including x
+ gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size);
+ gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size);
+ gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1);
+ gsl_matrix *se_B_null1=gsl_matrix_alloc (d_size, c_size-1);
+ gsl_matrix *se_B_null2=gsl_matrix_alloc (d_size, c_size);
+
+ gsl_matrix_view X_sub1=gsl_matrix_submatrix (X, 0, 0, c_size-1, n_size);
+ gsl_matrix_view B_sub1=gsl_matrix_submatrix (B, 0, 0, d_size, c_size-1);
+ gsl_matrix_view xHi_all_sub1=gsl_matrix_submatrix (xHi_all, 0, 0, d_size*(c_size-1), d_size*n_size);
+
+ gsl_matrix_view X_sub2=gsl_matrix_submatrix (X, 0, 0, c_size, n_size);
+ gsl_matrix_view B_sub2=gsl_matrix_submatrix (B, 0, 0, d_size, c_size);
+ gsl_matrix_view xHi_all_sub2=gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
+
+ gsl_matrix_transpose_memcpy (Y, UtY);
+
+ gsl_matrix_view X_sub0=gsl_matrix_submatrix (X, 0, 0, c_size-2, n_size);
+ gsl_matrix_transpose_memcpy (&X_sub0.matrix, UtW);
+ gsl_vector_view X_row0=gsl_matrix_row(X, c_size-2);
+ gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &X_row0.vector);
+
+ gsl_vector_view X_row1=gsl_matrix_row(X, c_size-1);
+ gsl_vector_set_zero(&X_row1.vector);
+ gsl_vector_view X_row2=gsl_matrix_row(X, c_size);
+ gsl_vector_set_zero(&X_row2.vector);
+
+ gsl_vector_view B_col1=gsl_matrix_column(B, c_size-1);
+ gsl_vector_set_zero(&B_col1.vector);
+ gsl_vector_view B_col2=gsl_matrix_column(B, c_size);
+ gsl_vector_set_zero(&B_col2.vector);
+
+ MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub1.matrix, Y, l_min, l_max, n_region, V_g, V_e, &B_sub1.matrix);
+ logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub1.matrix);
+ logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix, se_B_null1);
+
+ c=0;
+ Vg_remle_null.clear();
+ Ve_remle_null.clear();
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=i; j<d_size; j++) {
+ Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) );
+ Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) );
+ VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) );
+ VVe_remle_null.push_back(gsl_matrix_get (Hessian, c+v_size, c+v_size) );
+ c++;
+ }
+ }
+ beta_remle_null.clear();
+ se_beta_remle_null.clear();
+ for (size_t i=0; i<se_B_null1->size1; i++) {
+ for (size_t j=0; j<se_B_null1->size2; j++) {
+ beta_remle_null.push_back(gsl_matrix_get(B, i, j) );
+ se_beta_remle_null.push_back(gsl_matrix_get(se_B_null1, i, j) );
+ }
+ }
+ logl_remle_H0=logl_H0;
+
+ cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
+ cout.precision(4);
+
+ cout<<"REMLE estimate for Vg in the null model: "<<endl;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_g, i, j)<<"\t";
+ }
+ cout<<endl;
+ }
+ cout<<"se(Vg): "<<endl;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
+ }
+ cout<<endl;
+ }
+ cout<<"REMLE estimate for Ve in the null model: "<<endl;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_e, i, j)<<"\t";
+ }
+ cout<<endl;
+ }
+ cout<<"se(Ve): "<<endl;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
+ }
+ cout<<endl;
+ }
+ cout<<"REMLE likelihood = "<<logl_H0<<endl;
+
+
+ logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub1.matrix);
+ logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix, se_B_null1);
+
+ c=0;
+ Vg_mle_null.clear();
+ Ve_mle_null.clear();
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=i; j<d_size; j++) {
+ Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) );
+ Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) );
+ VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) );
+ VVe_mle_null.push_back(gsl_matrix_get (Hessian, c+v_size, c+v_size) );
+ c++;
+ }
+ }
+ beta_mle_null.clear();
+ se_beta_mle_null.clear();
+ for (size_t i=0; i<se_B_null1->size1; i++) {
+ for (size_t j=0; j<se_B_null1->size2; j++) {
+ beta_mle_null.push_back(gsl_matrix_get(B, i, j) );
+ se_beta_mle_null.push_back(gsl_matrix_get(se_B_null1, i, j) );
+ }
+ }
+ logl_mle_H0=logl_H0;
+
+ cout<<"MLE estimate for Vg in the null model: "<<endl;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_g, i, j)<<"\t";
+ }
+ cout<<endl;
+ }
+ cout<<"se(Vg): "<<endl;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
+ }
+ cout<<endl;
+ }
+ cout<<"MLE estimate for Ve in the null model: "<<endl;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_e, i, j)<<"\t";
+ }
+ cout<<endl;
+ }
+ cout<<"se(Ve): "<<endl;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
+ }
+ cout<<endl;
+ }
+ cout<<"MLE likelihood = "<<logl_H0<<endl;
+
+
+ vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
+ for (size_t i=0; i<d_size; i++) {
+ v_beta.push_back(0.0);
+ }
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=i; j<d_size; j++) {
+ v_Vg.push_back(0.0);
+ v_Ve.push_back(0.0);
+ v_Vbeta.push_back(0.0);
+ }
+ }
+
+ gsl_matrix_memcpy (V_g_null, V_g);
+ gsl_matrix_memcpy (V_e_null, V_e);
+ gsl_matrix_memcpy (B_null, B);
+
+ //start reading genotypes and analyze
+ for (size_t t=0; t<indicator_snp.size(); ++t) {
+ //if (t>=1) {break;}
+ !safeGetline(infile, line).eof();
+ if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs ", t, ns_total-1);}
+ if (indicator_snp[t]==0) {continue;}
+
+ ch_ptr=strtok ((char *)line.c_str(), " , \t");
+ ch_ptr=strtok (NULL, " , \t");
+ ch_ptr=strtok (NULL, " , \t");
+
+ x_mean=0.0; c_phen=0; n_miss=0;
+ gsl_vector_set_zero(x_miss);
+ for (size_t i=0; i<ni_total; ++i) {
+ ch_ptr=strtok (NULL, " , \t");
+ if (indicator_idv[i]==0) {continue;}
+
+ if (strcmp(ch_ptr, "NA")==0) {gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;}
+ else {
+ geno=atof(ch_ptr);
+
+ gsl_vector_set(x, c_phen, geno);
+ gsl_vector_set(x_miss, c_phen, 1.0);
+ x_mean+=geno;
+ }
+ c_phen++;
+ }
+
+ x_mean/=(double)(ni_test-n_miss);
+
+ for (size_t i=0; i<ni_test; ++i) {
+ if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
+ geno=gsl_vector_get(x, i);
+ if (x_mean>1) {
+ gsl_vector_set(x, i, 2-geno);
+ }
+ }
+
+ //calculate statistics
+ time_start=clock();
+ gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row1.vector);
+ gsl_vector_mul (x, env);
+ gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row2.vector);
+ time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ //initial values
+ gsl_matrix_memcpy (V_g, V_g_null);
+ gsl_matrix_memcpy (V_e, V_e_null);
+ gsl_matrix_memcpy (B, B_null);
+
+ if (a_mode==2 || a_mode==3 || a_mode==4) {
+ if (a_mode==3 || a_mode==4) {
+ logl_H0=MphEM ('R', em_iter/10, em_prec*10, eval, &X_sub2.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix);
+ logl_H0=MphNR ('R', nr_iter/10, nr_prec*10, eval, &X_sub2.matrix, Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix, se_B_null2);
+ }
+
+ if (a_mode==2 || a_mode==4) {
+ logl_H0=MphEM ('L', em_iter/10, em_prec*10, eval, &X_sub2.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix);
+ logl_H0=MphNR ('L', nr_iter/10, nr_prec*10, eval, &X_sub2.matrix, Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix, se_B_null2);
+ }
+ }
+
+
+ time_start=clock();
+
+ //3 is before 1
+ if (a_mode==3 || a_mode==4) {
+ p_score=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g_null, V_e_null, UltVehiY, beta, Vbeta);
+ if (p_score<p_nr && crt==1) {
+ logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
+ }
+ }
+
+ if (a_mode==2 || a_mode==4) {
+ logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
+ //calculate beta and Vbeta
+ p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
+
+ if (p_lrt<p_nr) {
+ logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ //calculate beta and Vbeta
+ p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
+
+ if (crt==1) {
+ p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
+ }
+ }
+ }
+
+ if (a_mode==1 || a_mode==4) {
+ logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
+ p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
+
+ if (p_wald<p_nr) {
+ logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
+
+ if (crt==1) {
+ p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
+ }
+ }
+ }
+
+ if (x_mean>1) {gsl_vector_scale(beta, -1.0);}
+
+ time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ //store summary data
+ //SUMSTAT SNPs={snpInfo[t].get_chr(), snpInfo[t].get_rs(), snpInfo[t].get_pos(), n_miss, beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+ for (size_t i=0; i<d_size; i++) {
+ v_beta[i]=gsl_vector_get (beta, i);
+ }
+
+ c=0;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=i; j<d_size; j++) {
+ v_Vg[c]=gsl_matrix_get (V_g, i, j);
+ v_Ve[c]=gsl_matrix_get (V_e, i, j);
+ v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
+ c++;
+ }
+ }
+
+ MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
+ sumStat.push_back(SNPs);
+ }
+ cout<<endl;
+
+
+ infile.close();
+ infile.clear();
+
+ gsl_matrix_free(U_hat);
+ gsl_matrix_free(E_hat);
+ gsl_matrix_free(OmegaU);
+ gsl_matrix_free(OmegaE);
+ gsl_matrix_free(UltVehiY);
+ gsl_matrix_free(UltVehiBX);
+ gsl_matrix_free(UltVehiU);
+ gsl_matrix_free(UltVehiE);
+
+ gsl_matrix_free(Hi_all);
+ gsl_matrix_free(Hiy_all);
+ gsl_matrix_free(xHi_all);
+ gsl_matrix_free(Hessian);
+
+ gsl_vector_free(x);
+ gsl_vector_free(x_miss);
+
+ gsl_matrix_free(Y);
+ gsl_matrix_free(X);
+ gsl_matrix_free(V_g);
+ gsl_matrix_free(V_e);
+ gsl_matrix_free(B);
+ gsl_vector_free(beta);
+ gsl_matrix_free(Vbeta);
+
+ gsl_matrix_free(V_g_null);
+ gsl_matrix_free(V_e_null);
+ gsl_matrix_free(B_null);
+ gsl_matrix_free(se_B_null1);
+ gsl_matrix_free(se_B_null2);
+
+ return;
+}
+
+
+
+
+
+
+
+void MVLMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY, const gsl_vector *env)
+{
+ string file_bed=file_bfile+".bed";
+ ifstream infile (file_bed.c_str(), ios::binary);
+ if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;}
+
+ clock_t time_start=clock();
+ time_UtX=0; time_opt=0;
+
+ char ch[1];
+ bitset<8> b;
+
+ // double lambda_mle=0, lambda_remle=0, beta=0, se=0, ;
+ double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0;
+ double crt_a, crt_b, crt_c;
+ int n_bit, n_miss, ci_total, ci_test;
+ double geno, x_mean;
+ size_t c=0;
+ // double s=0.0;
+ size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2+2;
+ size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2;
+
+ //large matrices for EM
+ gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
+ gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
+ gsl_matrix *OmegaU=gsl_matrix_alloc (d_size, n_size);
+ gsl_matrix *OmegaE=gsl_matrix_alloc (d_size, n_size);
+ gsl_matrix *UltVehiY=gsl_matrix_alloc (d_size, n_size);
+ gsl_matrix *UltVehiBX=gsl_matrix_alloc (d_size, n_size);
+ gsl_matrix *UltVehiU=gsl_matrix_alloc (d_size, n_size);
+ gsl_matrix *UltVehiE=gsl_matrix_alloc (d_size, n_size);
+
+ //large matrices for NR
+ gsl_matrix *Hi_all=gsl_matrix_alloc (d_size, d_size*n_size); //each dxd block is H_k^{-1}
+ gsl_matrix *Hiy_all=gsl_matrix_alloc (d_size, n_size); //each column is H_k^{-1}y_k
+ gsl_matrix *xHi_all=gsl_matrix_alloc (dc_size, d_size*n_size); //each dcxdc block is x_k\otimes H_k^{-1}
+ gsl_matrix *Hessian=gsl_matrix_alloc (v_size*2, v_size*2);
+
+ gsl_vector *x=gsl_vector_alloc (n_size);
+
+ gsl_matrix *Y=gsl_matrix_alloc (d_size, n_size);
+ gsl_matrix *X=gsl_matrix_alloc (c_size+1, n_size);
+ gsl_matrix *V_g=gsl_matrix_alloc (d_size, d_size);
+ gsl_matrix *V_e=gsl_matrix_alloc (d_size, d_size);
+ gsl_matrix *B=gsl_matrix_alloc (d_size, c_size+1);
+ gsl_vector *beta=gsl_vector_alloc (d_size);
+ gsl_matrix *Vbeta=gsl_matrix_alloc (d_size, d_size);
+
+ //null estimates for initial values
+ gsl_matrix *V_g_null=gsl_matrix_alloc (d_size, d_size);
+ gsl_matrix *V_e_null=gsl_matrix_alloc (d_size, d_size);
+ gsl_matrix *B_null=gsl_matrix_alloc (d_size, c_size+1);
+ gsl_matrix *se_B_null1=gsl_matrix_alloc (d_size, c_size-1);
+ gsl_matrix *se_B_null2=gsl_matrix_alloc (d_size, c_size);
+
+ gsl_matrix_view X_sub1=gsl_matrix_submatrix (X, 0, 0, c_size-1, n_size);
+ gsl_matrix_view B_sub1=gsl_matrix_submatrix (B, 0, 0, d_size, c_size-1);
+ gsl_matrix_view xHi_all_sub1=gsl_matrix_submatrix (xHi_all, 0, 0, d_size*(c_size-1), d_size*n_size);
+
+ gsl_matrix_view X_sub2=gsl_matrix_submatrix (X, 0, 0, c_size, n_size);
+ gsl_matrix_view B_sub2=gsl_matrix_submatrix (B, 0, 0, d_size, c_size);
+ gsl_matrix_view xHi_all_sub2=gsl_matrix_submatrix (xHi_all, 0, 0, d_size*c_size, d_size*n_size);
+
+ gsl_matrix_transpose_memcpy (Y, UtY);
+
+ gsl_matrix_view X_sub0=gsl_matrix_submatrix (X, 0, 0, c_size-2, n_size);
+ gsl_matrix_transpose_memcpy (&X_sub0.matrix, UtW);
+ gsl_vector_view X_row0=gsl_matrix_row(X, c_size-2);
+ gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &X_row0.vector);
+
+ gsl_vector_view X_row1=gsl_matrix_row(X, c_size-1);
+ gsl_vector_set_zero(&X_row1.vector);
+ gsl_vector_view X_row2=gsl_matrix_row(X, c_size);
+ gsl_vector_set_zero(&X_row2.vector);
+
+ gsl_vector_view B_col1=gsl_matrix_column(B, c_size-1);
+ gsl_vector_set_zero(&B_col1.vector);
+ gsl_vector_view B_col2=gsl_matrix_column(B, c_size);
+ gsl_vector_set_zero(&B_col2.vector);
+
+ //time_start=clock();
+ MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub1.matrix, Y, l_min, l_max, n_region, V_g, V_e, &B_sub1.matrix);
+
+ logl_H0=MphEM ('R', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub1.matrix);
+ logl_H0=MphNR ('R', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix, se_B_null1);
+ //cout<<"time for REML in the null = "<<(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0)<<endl;
+
+ c=0;
+ Vg_remle_null.clear();
+ Ve_remle_null.clear();
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=i; j<d_size; j++) {
+ Vg_remle_null.push_back(gsl_matrix_get (V_g, i, j) );
+ Ve_remle_null.push_back(gsl_matrix_get (V_e, i, j) );
+ VVg_remle_null.push_back(gsl_matrix_get (Hessian, c, c) );
+ VVe_remle_null.push_back(gsl_matrix_get (Hessian, c+v_size, c+v_size) );
+ c++;
+ }
+ }
+ beta_remle_null.clear();
+ se_beta_remle_null.clear();
+ for (size_t i=0; i<se_B_null1->size1; i++) {
+ for (size_t j=0; j<se_B_null1->size2; j++) {
+ beta_remle_null.push_back(gsl_matrix_get(B, i, j) );
+ se_beta_remle_null.push_back(gsl_matrix_get(se_B_null1, i, j) );
+ }
+ }
+ logl_remle_H0=logl_H0;
+
+ cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
+ cout.precision(4);
+ cout<<"REMLE estimate for Vg in the null model: "<<endl;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_g, i, j)<<"\t";
+ }
+ cout<<endl;
+ }
+ cout<<"se(Vg): "<<endl;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
+ }
+ cout<<endl;
+ }
+ cout<<"REMLE estimate for Ve in the null model: "<<endl;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_e, i, j)<<"\t";
+ }
+ cout<<endl;
+ }
+ cout<<"se(Ve): "<<endl;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
+ }
+ cout<<endl;
+ }
+ cout<<"REMLE likelihood = "<<logl_H0<<endl;
+
+ //time_start=clock();
+ logl_H0=MphEM ('L', em_iter, em_prec, eval, &X_sub1.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub1.matrix);
+ logl_H0=MphNR ('L', nr_iter, nr_prec, eval, &X_sub1.matrix, Y, Hi_all, &xHi_all_sub1.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ MphCalcBeta (eval, &X_sub1.matrix, Y, V_g, V_e, UltVehiY, &B_sub1.matrix, se_B_null1);
+ //cout<<"time for MLE in the null = "<<(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0)<<endl;
+
+ c=0;
+ Vg_mle_null.clear();
+ Ve_mle_null.clear();
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=i; j<d_size; j++) {
+ Vg_mle_null.push_back(gsl_matrix_get (V_g, i, j) );
+ Ve_mle_null.push_back(gsl_matrix_get (V_e, i, j) );
+ VVg_mle_null.push_back(gsl_matrix_get (Hessian, c, c) );
+ VVe_mle_null.push_back(gsl_matrix_get (Hessian, c+v_size, c+v_size) );
+ c++;
+ }
+ }
+ beta_mle_null.clear();
+ se_beta_mle_null.clear();
+ for (size_t i=0; i<se_B_null1->size1; i++) {
+ for (size_t j=0; j<se_B_null1->size2; j++) {
+ beta_mle_null.push_back(gsl_matrix_get(B, i, j) );
+ se_beta_mle_null.push_back(gsl_matrix_get(se_B_null1, i, j) );
+ }
+ }
+ logl_mle_H0=logl_H0;
+
+ cout<<"MLE estimate for Vg in the null model: "<<endl;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_g, i, j)<<"\t";
+ }
+ cout<<endl;
+ }
+ cout<<"se(Vg): "<<endl;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c, c))<<"\t";
+ }
+ cout<<endl;
+ }
+ cout<<"MLE estimate for Ve in the null model: "<<endl;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=0; j<=i; j++) {
+ cout<<gsl_matrix_get(V_e, i, j)<<"\t";
+ }
+ cout<<endl;
+ }
+ cout<<"se(Ve): "<<endl;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=0; j<=i; j++) {
+ c=GetIndex(i, j, d_size);
+ cout<<sqrt(gsl_matrix_get(Hessian, c+v_size, c+v_size))<<"\t";
+ }
+ cout<<endl;
+ }
+ cout<<"MLE likelihood = "<<logl_H0<<endl;
+
+ vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
+ for (size_t i=0; i<d_size; i++) {
+ v_beta.push_back(0.0);
+ }
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=i; j<d_size; j++) {
+ v_Vg.push_back(0.0);
+ v_Ve.push_back(0.0);
+ v_Vbeta.push_back(0.0);
+ }
+ }
+
+ gsl_matrix_memcpy (V_g_null, V_g);
+ gsl_matrix_memcpy (V_e_null, V_e);
+ gsl_matrix_memcpy (B_null, B);
+
+
+ //start reading genotypes and analyze
+
+ //calculate n_bit and c, the number of bit for each snp
+ if (ni_total%4==0) {n_bit=ni_total/4;}
+ else {n_bit=ni_total/4+1; }
+
+ //print the first three majic numbers
+ for (int i=0; i<3; ++i) {
+ infile.read(ch,1);
+ b=ch[0];
+ }
+
+ for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
+ if (t%d_pace==0 || t==snpInfo.size()-1) {ProgressBar ("Reading SNPs ", t, snpInfo.size()-1);}
+ if (indicator_snp[t]==0) {continue;}
+
+ //if (t>=0) {break;}
+ //if (snpInfo[t].rs_number!="MAG18140902") {continue;}
+ //cout<<t<<endl;
+
+ infile.seekg(t*n_bit+3); //n_bit, and 3 is the number of magic numbers
+
+ //read genotypes
+ x_mean=0.0; n_miss=0; ci_total=0; ci_test=0;
+ for (int i=0; i<n_bit; ++i) {
+ infile.read(ch,1);
+ b=ch[0];
+ for (size_t j=0; j<4; ++j) { //minor allele homozygous: 2.0; major: 0.0;
+ if ((i==(n_bit-1)) && ci_total==(int)ni_total) {break;}
+ if (indicator_idv[ci_total]==0) {ci_total++; continue;}
+
+ if (b[2*j]==0) {
+ if (b[2*j+1]==0) {gsl_vector_set(x, ci_test, 2); x_mean+=2.0; }
+ else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
+ }
+ else {
+ if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
+ else {gsl_vector_set(x, ci_test, -9); n_miss++; }
+ }
+
+ ci_total++;
+ ci_test++;
+ }
+ }
+
+ x_mean/=(double)(ni_test-n_miss);
+
+ for (size_t i=0; i<ni_test; ++i) {
+ geno=gsl_vector_get(x,i);
+ if (geno==-9) {gsl_vector_set(x, i, x_mean); geno=x_mean;}
+ if (x_mean>1) {
+ gsl_vector_set(x, i, 2-geno);
+ }
+ }
+
+ /*
+ if (t==0) {
+ ofstream outfile ("./snp1.txt", ofstream::out);
+ if (!outfile) {cout<<"error writing file: "<<endl; return;}
+ for (size_t i=0; i<x->size; i++) {
+ outfile<<gsl_vector_get(x, i)<<endl;
+ }
+ outfile.clear();
+ outfile.close();
+ }
+ */
+
+ //calculate statistics
+ time_start=clock();
+ gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row1.vector);
+ gsl_vector_mul (x, env);
+ gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row2.vector);
+ time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ //initial values
+ gsl_matrix_memcpy (V_g, V_g_null);
+ gsl_matrix_memcpy (V_e, V_e_null);
+ gsl_matrix_memcpy (B, B_null);
+
+ if (a_mode==2 || a_mode==3 || a_mode==4) {
+ if (a_mode==3 || a_mode==4) {
+ logl_H0=MphEM ('R', em_iter/10, em_prec*10, eval, &X_sub2.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix);
+ logl_H0=MphNR ('R', nr_iter/10, nr_prec*10, eval, &X_sub2.matrix, Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix, se_B_null2);
+ }
+
+ if (a_mode==2 || a_mode==4) {
+ logl_H0=MphEM ('L', em_iter/10, em_prec*10, eval, &X_sub2.matrix, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, &B_sub2.matrix);
+ logl_H0=MphNR ('L', nr_iter/10, nr_prec*10, eval, &X_sub2.matrix, Y, Hi_all, &xHi_all_sub2.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ MphCalcBeta (eval, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, &B_sub2.matrix, se_B_null2);
+ }
+ }
+
+ time_start=clock();
+
+ //3 is before 1
+ if (a_mode==3 || a_mode==4) {
+ p_score=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g_null, V_e_null, UltVehiY, beta, Vbeta);
+
+ if (p_score<p_nr && crt==1) {
+ logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
+ }
+ }
+
+ if (a_mode==2 || a_mode==4) {
+ logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
+ //calculate beta and Vbeta
+ p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
+
+ if (p_lrt<p_nr) {
+ logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+
+ //calculate beta and Vbeta
+ p_lrt=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
+ p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
+ if (crt==1) {
+ p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
+ }
+ }
+ }
+
+ if (a_mode==1 || a_mode==4) {
+ logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
+ p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
+
+ if (p_wald<p_nr) {
+ logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+ p_wald=MphCalcP (eval, &X_row2.vector, &X_sub2.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
+
+ if (crt==1) {
+ p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
+ }
+ }
+ }
+
+ //cout<<setprecision(10)<<p_wald<<"\t"<<p_lrt<<"\t"<<p_score<<endl;
+
+ if (x_mean>1) {gsl_vector_scale(beta, -1.0);}
+
+ time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+ //store summary data
+ //SUMSTAT SNPs={snpInfo[t].get_chr(), snpInfo[t].get_rs(), snpInfo[t].get_pos(), n_miss, beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+ for (size_t i=0; i<d_size; i++) {
+ v_beta[i]=gsl_vector_get (beta, i);
+ }
+
+ c=0;
+ for (size_t i=0; i<d_size; i++) {
+ for (size_t j=i; j<d_size; j++) {
+ v_Vg[c]=gsl_matrix_get (V_g, i, j);
+ v_Ve[c]=gsl_matrix_get (V_e, i, j);
+ v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
+ c++;
+ }
+ }
+
+ MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
+ sumStat.push_back(SNPs);
+ }
+ cout<<endl;
+
+ //cout<<"time_opt = "<<time_opt<<endl;
+
+ infile.close();
+ infile.clear();
+
+ gsl_matrix_free(U_hat);
+ gsl_matrix_free(E_hat);
+ gsl_matrix_free(OmegaU);
+ gsl_matrix_free(OmegaE);
+ gsl_matrix_free(UltVehiY);
+ gsl_matrix_free(UltVehiBX);
+ gsl_matrix_free(UltVehiU);
+ gsl_matrix_free(UltVehiE);
+
+ gsl_matrix_free(Hi_all);
+ gsl_matrix_free(Hiy_all);
+ gsl_matrix_free(xHi_all);
+ gsl_matrix_free(Hessian);
+
+ gsl_vector_free(x);
+
+ gsl_matrix_free(Y);
+ gsl_matrix_free(X);
+ gsl_matrix_free(V_g);
+ gsl_matrix_free(V_e);
+ gsl_matrix_free(B);
+ gsl_vector_free(beta);
+ gsl_matrix_free(Vbeta);
+
+ gsl_matrix_free(V_g_null);
+ gsl_matrix_free(V_e_null);
+ gsl_matrix_free(B_null);
+ gsl_matrix_free(se_B_null1);
+ gsl_matrix_free(se_B_null2);
+
+ return;
+}
diff --git a/src/mvlmm.h b/src/mvlmm.h
index 129879c..9ff567c 100644
--- a/src/mvlmm.h
+++ b/src/mvlmm.h
@@ -1,22 +1,22 @@
/*
Genome-wide Efficient Mixed Model Association (GEMMA)
Copyright (C) 2011 Xiang Zhou
-
+
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
-
+
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
+
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#ifndef __MVLMM_H__
+#ifndef __MVLMM_H__
#define __MVLMM_H__
#include "gsl/gsl_vector.h"
@@ -38,17 +38,18 @@ using namespace std;
class MVLMM {
-
+
public:
// IO related parameters
int a_mode; //analysis mode, 1/2/3/4 for Frequentist tests
size_t d_pace; //display pace
-
+
string file_bfile;
string file_geno;
+ string file_oxford;
string file_out;
string path_out;
-
+
// MVLMM related parameters
double l_min;
double l_max;
@@ -61,7 +62,7 @@ public:
size_t em_iter, nr_iter;
double em_prec, nr_prec;
size_t crt;
-
+
// Summary statistics
size_t ni_total, ni_test; //number of individuals
size_t ns_total, ns_test; //number of snps
@@ -69,22 +70,25 @@ public:
size_t n_ph;
double time_UtX; //time spent on optimization iterations
double time_opt; //time spent on optimization iterations
-
+
vector<int> indicator_idv; //indicator for individuals (phenotypes), 0 missing, 1 available for analysis
vector<int> indicator_snp; //sequence indicator for SNPs: 0 ignored because of (a) maf, (b) miss, (c) non-poly; 1 available for analysis
-
+
vector<SNPINFO> snpInfo; //record SNP information
-
+
// Not included in PARAM
vector<MPHSUMSTAT> sumStat; //Output SNPSummary Data
-
+
// Main functions
void CopyFromParam (PARAM &cPar);
void CopyToParam (PARAM &cPar);
void AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY);
void AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY);
+ void Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY);
+ void AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY, const gsl_vector *env);
+ void AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY, const gsl_vector *env);
void WriteFiles ();
-
+
};
void CalcMvLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY, const size_t em_iter, const size_t nr_iter, const double em_prec, const double nr_prec, const double l_min, const double l_max, const size_t n_region, gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *B, gsl_matrix *se_B);
diff --git a/src/param.cpp b/src/param.cpp
index 7a89ff8..c4b234a 100644
--- a/src/param.cpp
+++ b/src/param.cpp
@@ -24,6 +24,15 @@
#include <cmath>
#include <algorithm>
+#include "gsl/gsl_randist.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_blas.h"
+
+#include "eigenlib.h"
+#include "mathfunc.h"
#ifdef FORCE_FLOAT
#include "param_float.h"
@@ -39,12 +48,12 @@ using namespace std;
-PARAM::PARAM(void):
+PARAM::PARAM(void):
mode_silence (false), a_mode (0), k_mode(1), d_pace (100000),
file_out("result"), path_out("./output/"),
miss_level(0.05), maf_level(0.01), hwe_level(0), r2_level(0.9999),
l_min(1e-5), l_max(1e5), n_region(10),p_nr(0.001),em_prec(0.0001),nr_prec(0.0001),em_iter(10000),nr_iter(100),crt(0),
-pheno_mean(0),
+pheno_mean(0), noconstrain (false),
h_min(-1), h_max(-1), h_scale(-1),
rho_min(0.0), rho_max(1.0), rho_scale(-1),
logp_min(0.0), logp_max(0.0), logp_scale(-1),
@@ -55,53 +64,64 @@ n_accept(0),
n_mh(10),
geo_mean(2000.0),
randseed(-1),
+window_cm(0), window_bp(0), window_ns(0),
error(false),
- n_cvt(1), n_vc(1),
+ni_subsample(0), n_cvt(1), n_vc(1),
time_total(0.0), time_G(0.0), time_eigen(0.0), time_UtX(0.0), time_UtZ(0.0), time_opt(0.0), time_Omega(0.0)
{}
//read files
//obtain ns_total, ng_total, ns_test, ni_test
-void PARAM::ReadFiles (void)
+void PARAM::ReadFiles (void)
{
string file_str;
- if (!file_mk.empty()) {
+
+
+ if (!file_cat.empty()) {
+ if (ReadFile_cat (file_cat, mapRS2cat, n_vc)==false) {error=true;}
+ }
+
+ if (!file_var.empty()) {
+ if (ReadFile_var (file_var, mapRS2var)==false) {error=true;}
+ }
+
+ if (!file_mk.empty()) {
if (CountFileLines (file_mk, n_vc)==false) {error=true;}
}
-
+
if (!file_snps.empty()) {
if (ReadFile_snps (file_snps, setSnps)==false) {error=true;}
} else {
setSnps.clear();
}
-
+
//for prediction
if (!file_epm.empty()) {
if (ReadFile_est (file_epm, est_column, mapRS2est)==false) {error=true;}
-
+
if (!file_bfile.empty()) {
file_str=file_bfile+".bim";
- if (ReadFile_bim (file_str, snpInfo)==false) {error=true;}
-
+ if (ReadFile_bim (file_str, snpInfo)==false) {error=true;}
+
file_str=file_bfile+".fam";
- if (ReadFile_fam (file_str, indicator_pheno, pheno, mapID2num, p_column)==false) {error=true;}
+ if (ReadFile_fam (file_str, indicator_pheno, pheno, mapID2num, p_column)==false) {error=true;}
}
-
- if (!file_geno.empty()) {
- if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, p_column)==false) {error=true;}
-
- if (CountFileLines (file_geno, ns_total)==false) {error=true;}
+
+ if (!file_geno.empty()) {
+ if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, p_column)==false) {error=true;}
+
+ if (CountFileLines (file_geno, ns_total)==false) {error=true;}
}
-
+
if (!file_ebv.empty() ) {
if (ReadFile_column (file_ebv, indicator_bv, vec_bv, 1)==false) {error=true;}
}
-
+
if (!file_log.empty() ) {
if (ReadFile_log (file_log, pheno_mean)==false) {error=true;}
}
-
+
//convert indicator_pheno to indicator_idv
int k=1;
for (size_t i=0; i<indicator_pheno.size(); i++) {
@@ -111,46 +131,80 @@ void PARAM::ReadFiles (void)
}
indicator_idv.push_back(k);
}
-
+
ns_test=0;
-
+
return;
}
-
+
//read covariates before the genotype files
if (!file_cvt.empty() ) {
if (ReadFile_cvt (file_cvt, indicator_cvt, cvt, n_cvt)==false) {error=true;}
if ((indicator_cvt).size()==0) {
n_cvt=1;
- }
+ }
} else {
n_cvt=1;
}
+ if (!file_gxe.empty() ) {
+ if (ReadFile_column (file_gxe, indicator_gxe, gxe, 1)==false) {error=true;}
+ }
+ if (!file_weight.empty() ) {
+ if (ReadFile_column (file_weight, indicator_weight, weight, 1)==false) {error=true;}
+ }
+
+
+ // WJA added
+ //read genotype and phenotype file for bgen format
+ if (!file_oxford.empty()) {
+ file_str=file_oxford+".sample";
+ if (ReadFile_sample(file_str, indicator_pheno, pheno, p_column,indicator_cvt, cvt, n_cvt)==false) {error=true;}
+ if ((indicator_cvt).size()==0) {
+ n_cvt=1;
+ }
+ // n_cvt=1;
+
+ //post-process covariates and phenotypes, obtain ni_test, save all useful covariates
+ ProcessCvtPhen();
+
+
+ //obtain covariate matrix
+ gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt);
+ CopyCvt (W);
+
+ file_str=file_oxford+".bgen";
+ if (ReadFile_bgen (file_str, setSnps, W, indicator_idv, indicator_snp, snpInfo, maf_level, miss_level, hwe_level, r2_level, ns_test)==false) {error=true;}
+ gsl_matrix_free(W);
+
+ ns_total=indicator_snp.size();
+ }
+
+
//read genotype and phenotype file for plink format
if (!file_bfile.empty()) {
file_str=file_bfile+".bim";
- if (ReadFile_bim (file_str, snpInfo)==false) {error=true;}
-
+ if (ReadFile_bim (file_str, snpInfo)==false) {error=true;}
+
file_str=file_bfile+".fam";
if (ReadFile_fam (file_str, indicator_pheno, pheno, mapID2num, p_column)==false) {error=true;}
-
+
//post-process covariates and phenotypes, obtain ni_test, save all useful covariates
ProcessCvtPhen();
-
+
//obtain covariate matrix
gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt);
CopyCvt (W);
-
+
file_str=file_bfile+".bed";
if (ReadFile_bed (file_str, setSnps, W, indicator_idv, indicator_snp, snpInfo, maf_level, miss_level, hwe_level, r2_level, ns_test)==false) {error=true;}
-
+
gsl_matrix_free(W);
-
+
ns_total=indicator_snp.size();
}
-
+
//read genotype and phenotype file for bimbam format
if (!file_geno.empty()) {
//annotation file before genotype file
@@ -163,7 +217,7 @@ void PARAM::ReadFiles (void)
//post-process covariates and phenotypes, obtain ni_test, save all useful covariates
ProcessCvtPhen();
-
+
//obtain covariate matrix
gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt);
CopyCvt (W);
@@ -171,13 +225,13 @@ void PARAM::ReadFiles (void)
if (ReadFile_geno (file_geno, setSnps, W, indicator_idv, indicator_snp, maf_level, miss_level, hwe_level, r2_level, mapRS2chr, mapRS2bp, mapRS2cM, snpInfo, ns_test)==false) {error=true;}
gsl_matrix_free(W);
-
+
ns_total=indicator_snp.size();
}
-
+
if (!file_gene.empty()) {
if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, p_column)==false) {error=true;}
-
+
//convert indicator_pheno to indicator_idv
int k=1;
for (size_t i=0; i<indicator_pheno.size(); i++) {
@@ -187,32 +241,39 @@ void PARAM::ReadFiles (void)
}
indicator_idv.push_back(k);
}
-
- if (ReadFile_gene (file_gene, vec_read, snpInfo, ng_total)==false) {error=true;}
+
+ //post-process covariates and phenotypes, obtain ni_test, save all useful covariates
+ ProcessCvtPhen();
+
+ //obtain covariate matrix
+ gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt);
+ CopyCvt (W);
+
+ if (ReadFile_gene (file_gene, vec_read, snpInfo, ng_total)==false) {error=true;}
}
-
-
+
+
//read is after gene file
if (!file_read.empty() ) {
if (ReadFile_column (file_read, indicator_read, vec_read, 1)==false) {error=true;}
-
- ni_test=0;
+
+ ni_test=0;
for (vector<int>::size_type i=0; i<(indicator_idv).size(); ++i) {
indicator_idv[i]*=indicator_read[i];
ni_test+=indicator_idv[i];
}
-
+
if (ni_test==0) {
error=true;
cout<<"error! number of analyzed individuals equals 0. "<<endl;
return;
}
}
-
+
//for ridge prediction, read phenotype only
if (file_geno.empty() && file_gene.empty() && !file_pheno.empty()) {
- if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, p_column)==false) {error=true;}
-
+ if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, p_column)==false) {error=true;}
+
//post-process covariates and phenotypes, obtain ni_test, save all useful covariates
ProcessCvtPhen();
}
@@ -225,37 +286,43 @@ void PARAM::ReadFiles (void)
-void PARAM::CheckParam (void)
-{
+void PARAM::CheckParam (void)
+{
struct stat fileInfo;
string str;
-
+
//check parameters
if (k_mode!=1 && k_mode!=2) {cout<<"error! unknown kinship/relatedness input mode: "<<k_mode<<endl; error=true;}
- if (a_mode!=1 && a_mode!=2 && a_mode!=3 && a_mode!=4 && a_mode!=5 && a_mode!=11 && a_mode!=12 && a_mode!=13 && a_mode!=21 && a_mode!=22 && a_mode!=31 && a_mode!=41 && a_mode!=42 && a_mode!=43 && a_mode!=51 && a_mode!=52 && a_mode!=53 && a_mode!=54 && a_mode!=61)
- {cout<<"error! unknown analysis mode: "<<a_mode<<". make sure -gk or -eigen or -lmm or -bslmm or -predict is sepcified correctly."<<endl; error=true;}
+ if (a_mode!=1 && a_mode!=2 && a_mode!=3 && a_mode!=4 && a_mode!=5 && a_mode!=11 && a_mode!=12 && a_mode!=13 && a_mode!=14 && a_mode!=21 && a_mode!=22 && a_mode!=25 && a_mode!=26 && a_mode!=27 && a_mode!=28 && a_mode!=31 && a_mode!=41 && a_mode!=42 && a_mode!=43 && a_mode!=51 && a_mode!=52 && a_mode!=53 && a_mode!=54 && a_mode!=61 && a_mode!=62 && a_mode!=71)
+ {cout<<"error! unknown analysis mode: "<<a_mode<<". make sure -gk or -eigen or -lmm or -bslmm -predict or -calccov is sepcified correctly."<<endl; error=true;}
if (miss_level>1) {cout<<"error! missing level needs to be between 0 and 1. current value = "<<miss_level<<endl; error=true;}
if (maf_level>0.5) {cout<<"error! maf level needs to be between 0 and 0.5. current value = "<<maf_level<<endl; error=true;}
if (hwe_level>1) {cout<<"error! hwe level needs to be between 0 and 1. current value = "<<hwe_level<<endl; error=true;}
if (r2_level>1) {cout<<"error! r2 level needs to be between 0 and 1. current value = "<<r2_level<<endl; error=true;}
-
- if (l_max<l_min) {cout<<"error! maximum lambda value must be larger than the minimal value. current values = "<<l_max<<" and "<<l_min<<endl; error=true;}
+
+ if (l_max<l_min) {cout<<"error! maximum lambda value must be larger than the minimal value. current values = "<<l_max<<" and "<<l_min<<endl; error=true;}
if (h_max<h_min) {cout<<"error! maximum h value must be larger than the minimal value. current values = "<<h_max<<" and "<<h_min<<endl; error=true;}
if (s_max<s_min) {cout<<"error! maximum s value must be larger than the minimal value. current values = "<<s_max<<" and "<<s_min<<endl; error=true;}
if (rho_max<rho_min) {cout<<"error! maximum rho value must be larger than the minimal value. current values = "<<rho_max<<" and "<<rho_min<<endl; error=true;}
if (logp_max<logp_min) {cout<<"error! maximum logp value must be larger than the minimal value. current values = "<<logp_max/log(10)<<" and "<<logp_min/log(10)<<endl; error=true;}
-
+
if (h_max>1) {cout<<"error! h values must be bewtween 0 and 1. current values = "<<h_max<<" and "<<h_min<<endl; error=true;}
if (rho_max>1) {cout<<"error! rho values must be between 0 and 1. current values = "<<rho_max<<" and "<<rho_min<<endl; error=true;}
if (logp_max>0) {cout<<"error! maximum logp value must be smaller than 0. current values = "<<logp_max/log(10)<<" and "<<logp_min/log(10)<<endl; error=true;}
if (l_max<l_min) {cout<<"error! maximum lambda value must be larger than the minimal value. current values = "<<l_max<<" and "<<l_min<<endl; error=true;}
-
+
if (h_scale>1.0) {cout<<"error! hscale value must be between 0 and 1. current value = "<<h_scale<<endl; error=true;}
if (rho_scale>1.0) {cout<<"error! rscale value must be between 0 and 1. current value = "<<rho_scale<<endl; error=true;}
if (logp_scale>1.0) {cout<<"error! pscale value must be between 0 and 1. current value = "<<logp_scale<<endl; error=true;}
if (rho_max==1 && rho_min==1 && a_mode==12) {cout<<"error! ridge regression does not support a rho parameter. current values = "<<rho_max<<" and "<<rho_min<<endl; error=true;}
-
+
+ if (window_cm<0) {cout<<"error! windowcm values must be non-negative. current values = "<<window_cm<<endl; error=true;}
+
+ if (window_cm==0 && window_bp==0 && window_ns==0) {
+ window_bp=1000000;
+ }
+
//check p_column, and (no need to) sort p_column into ascending order
if (p_column.size()==0) {
p_column.push_back(1);
@@ -266,12 +333,12 @@ void PARAM::CheckParam (void)
}
}
}
-
+
//sort (p_column.begin(), p_column.end() );
n_ph=p_column.size();
-
-
-
+
+
+
//only lmm option (and one prediction option) can deal with multiple phenotypes
//and no gene expression files
if (n_ph>1 && a_mode!=1 && a_mode!=2 && a_mode!=3 && a_mode!=4 && a_mode!=43) {
@@ -280,11 +347,11 @@ void PARAM::CheckParam (void)
if (n_ph>1 && !file_gene.empty() ) {
cout<<"error! multiple phenotype analysis option not allowed with gene expression files. "<<endl; error=true;
}
-
+
if (p_nr>1) {
cout<<"error! pnr value must be between 0 and 1. current value = "<<p_nr<<endl; error=true;
}
-
+
//check est_column
if (est_column.size()==0) {
if (file_ebv.empty()) {
@@ -299,10 +366,10 @@ void PARAM::CheckParam (void)
est_column.push_back(7);
}
}
-
- if (est_column.size()!=4) {cout<<"error! -en not followed by four numbers. current number = "<<est_column.size()<<endl; error=true;}
+
+ if (est_column.size()!=4) {cout<<"error! -en not followed by four numbers. current number = "<<est_column.size()<<endl; error=true;}
if (est_column[0]==0) {cout<<"error! -en rs column can not be zero. current number = "<<est_column.size()<<endl; error=true;}
-
+
//check if files are compatible with each other, and if files exist
if (!file_bfile.empty()) {
str=file_bfile+".bim";
@@ -310,44 +377,101 @@ void PARAM::CheckParam (void)
str=file_bfile+".bed";
if (stat(str.c_str(),&fileInfo)==-1) {cout<<"error! fail to open .bed file: "<<str<<endl; error=true;}
str=file_bfile+".fam";
- if (stat(str.c_str(),&fileInfo)==-1) {cout<<"error! fail to open .fam file: "<<str<<endl; error=true;}
+ if (stat(str.c_str(),&fileInfo)==-1) {cout<<"error! fail to open .fam file: "<<str<<endl; error=true;}
+ }
+
+ if (!file_oxford.empty()) {
+ str=file_bfile+".bgen";
+ if (stat(str.c_str(),&fileInfo)==-1) {cout<<"error! fail to open .bgen file: "<<str<<endl; error=true;}
+ str=file_bfile+".sample";
+ if (stat(str.c_str(),&fileInfo)==-1) {cout<<"error! fail to open .sample file: "<<str<<endl; error=true;}
}
-
+
if ((!file_geno.empty() || !file_gene.empty()) ) {
str=file_pheno;
if (stat(str.c_str(),&fileInfo)==-1) {cout<<"error! fail to open phenotype file: "<<str<<endl; error=true;}
- }
-
+ }
+
str=file_geno;
if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open mean genotype file: "<<str<<endl; error=true;}
-
+
str=file_gene;
if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open gene expression file: "<<str<<endl; error=true;}
-
+
+ str=file_cat;
+ if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open category file: "<<str<<endl; error=true;}
+
+ str=file_var;
+ if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open category file: "<<str<<endl; error=true;}
+
+ str=file_beta;
+ if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open beta file: "<<str<<endl; error=true;}
+
+ str=file_cor;
+ if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open correlation file: "<<str<<endl; error=true;}
+
+ str=file_q;
+ if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open q file: "<<str<<endl; error=true;}
+
+ str=file_s;
+ if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open s file: "<<str<<endl; error=true;}
+
+ str=file_v;
+ if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open v file: "<<str<<endl; error=true;}
+
+ str=file_mq;
+ if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open mq file: "<<str<<endl; error=true;}
+
+ str=file_ms;
+ if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open ms file: "<<str<<endl; error=true;}
+
+ str=file_mv;
+ if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open mv file: "<<str<<endl; error=true;}
+
size_t flag=0;
if (!file_bfile.empty()) {flag++;}
if (!file_geno.empty()) {flag++;}
if (!file_gene.empty()) {flag++;}
-
- if (flag!=1 && a_mode!=43 && a_mode!=5 && a_mode!=61) {
+ // WJA added
+ if (!file_oxford.empty()) {flag++;}
+
+ if (flag!=1 && a_mode!=27 && a_mode!=28 && a_mode!=43 && a_mode!=5 && a_mode!=61 && a_mode!=62) {
cout<<"error! either plink binary files, or bimbam mean genotype files, or gene expression files are required."<<endl; error=true;
}
-
- if (file_pheno.empty() && (a_mode==43 || a_mode==5 || a_mode==61) ) {
+
+ if (file_pheno.empty() && (a_mode==43 || a_mode==5) ) {
cout<<"error! phenotype file is required."<<endl; error=true;
}
-
+
+ if (a_mode==61 || a_mode==62) {
+ if (!file_pheno.empty()) {
+ if (file_kin.empty() && (file_ku.empty()||file_kd.empty()) && file_mk.empty() ) {
+ cout<<"error! missing relatedness file. "<<endl; error=true;
+ }
+ } else if (!file_cor.empty()) {
+ if (file_beta.empty() ) {
+ cout<<"error! missing cor file."<<endl; error=true;
+ }
+ } else {
+ if ( (file_mq.empty() || file_ms.empty() || file_mv.empty() ) && (file_q.empty() || file_s.empty() || file_v.empty() ) ) {
+ cout<<"error! either phenotype/kinship files or ms/mq/mv s/q/v files are required."<<endl; error=true;
+ }
+ }
+ }
+
+
+
if (!file_epm.empty() && file_bfile.empty() && file_geno.empty() ) {cout<<"error! estimated parameter file also requires genotype file."<<endl; error=true;}
if (!file_ebv.empty() && file_kin.empty()) {cout<<"error! estimated breeding value file also requires relatedness file."<<endl; error=true;}
-
+
if (!file_log.empty() && pheno_mean!=0) {cout<<"error! either log file or mu value can be provide."<<endl; error=true;}
-
+
str=file_snps;
if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open snps file: "<<str<<endl; error=true;}
-
+
str=file_log;
if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open log file: "<<str<<endl; error=true;}
-
+
str=file_anno;
if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open annotation file: "<<str<<endl; error=true;}
@@ -356,52 +480,75 @@ void PARAM::CheckParam (void)
str=file_mk;
if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open relatedness matrix file: "<<str<<endl; error=true;}
-
+
str=file_cvt;
if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open covariates file: "<<str<<endl; error=true;}
-
+
+ str=file_gxe;
+ if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open environmental covariate file: "<<str<<endl; error=true;}
+
+ str=file_weight;
+ if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open the residual weight file: "<<str<<endl; error=true;}
+
str=file_epm;
if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open estimated parameter file: "<<str<<endl; error=true;}
-
+
str=file_ebv;
if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open estimated breeding value file: "<<str<<endl; error=true;}
-
+
str=file_read;
if (!str.empty() && stat(str.c_str(),&fileInfo)==-1 ) {cout<<"error! fail to open total read file: "<<str<<endl; error=true;}
-
+
//check if files are compatible with analysis mode
if (k_mode==2 && !file_geno.empty() ) {cout<<"error! use \"-km 1\" when using bimbam mean genotype file. "<<endl; error=true;}
-
- if ((a_mode==1 || a_mode==2 || a_mode==3 || a_mode==4 || a_mode==5 || a_mode==31) && (file_kin.empty() && (file_ku.empty()||file_kd.empty())) ) {cout<<"error! missing relatedness file. "<<endl; error=true;}
- if (a_mode==61 && (file_kin.empty() && (file_ku.empty()||file_kd.empty()) && file_mk.empty() ) ) {cout<<"error! missing relatedness file. "<<endl; error=true;}
+ if ((a_mode==1 || a_mode==2 || a_mode==3 || a_mode==4 || a_mode==5 || a_mode==31) && (file_kin.empty() && (file_ku.empty()||file_kd.empty())) ) {cout<<"error! missing relatedness file. "<<endl; error=true;}
if ((a_mode==43) && file_kin.empty()) {cout<<"error! missing relatedness file. -predict option requires -k option to provide a relatedness file."<<endl; error=true;}
-
+
if ((a_mode==11 || a_mode==12 || a_mode==13) && !file_cvt.empty() ) {cout<<"error! -bslmm option does not support covariates files."<<endl; error=true;}
-
+
if (a_mode==41 || a_mode==42) {
- if (!file_cvt.empty() ) {cout<<"error! -predict option does not support covariates files."<<endl; error=true;}
- if (file_epm.empty() ) {cout<<"error! -predict option requires estimated parameter files."<<endl; error=true;}
+ if (!file_cvt.empty() ) {cout<<"error! -predict option does not support covariates files."<<endl; error=true;}
+ if (file_epm.empty() ) {cout<<"error! -predict option requires estimated parameter files."<<endl; error=true;}
+ }
+
+ if (file_beta.empty() && (a_mode==27 || a_mode==28) ) {
+ cout<<"error! beta effects file is required."<<endl; error=true;
}
return;
}
-
+
void PARAM::CheckData (void) {
- if ((file_cvt).empty() || (indicator_cvt).size()==0) {
- n_cvt=1;
+ if(file_oxford.empty()) // WJA NOTE: I added this condition so that covariates can be added through sample, probably not exactly what is wanted
+
+ {
+ if ((file_cvt).empty() || (indicator_cvt).size()==0) {
+ n_cvt=1;
+ }
}
+
if ( (indicator_cvt).size()!=0 && (indicator_cvt).size()!=(indicator_idv).size()) {
error=true;
cout<<"error! number of rows in the covariates file do not match the number of individuals. "<<endl;
return;
}
-
+ if ( (indicator_gxe).size()!=0 && (indicator_gxe).size()!=(indicator_idv).size()) {
+ error=true;
+ cout<<"error! number of rows in the gxe file do not match the number of individuals. "<<endl;
+ return;
+ }
+ if ( (indicator_weight).size()!=0 && (indicator_weight).size()!=(indicator_idv).size()) {
+ error=true;
+ cout<<"error! number of rows in the weight file do not match the number of individuals. "<<endl;
+ return;
+ }
+
if ( (indicator_read).size()!=0 && (indicator_read).size()!=(indicator_idv).size()) {
error=true;
cout<<"error! number of rows in the total read file do not match the number of individuals. "<<endl;
@@ -411,13 +558,13 @@ void PARAM::CheckData (void) {
//calculate ni_total and ni_test, and set indicator_idv to 0 whenever indicator_cvt=0
//and calculate np_obs and np_miss
ni_total=(indicator_idv).size();
-
- ni_test=0;
+
+ ni_test=0;
for (vector<int>::size_type i=0; i<(indicator_idv).size(); ++i) {
if (indicator_idv[i]==0) {continue;}
ni_test++;
}
-
+
ni_cvt=0;
for (size_t i=0; i<indicator_cvt.size(); i++) {
if (indicator_cvt[i]==0) {continue;}
@@ -429,8 +576,16 @@ void PARAM::CheckData (void) {
if (indicator_cvt.size()!=0) {
if (indicator_cvt[i]==0) {continue;}
}
-
- for (size_t j=0; j<indicator_pheno[i].size(); j++) {
+
+ if (indicator_gxe.size()!=0) {
+ if (indicator_gxe[i]==0) {continue;}
+ }
+
+ if (indicator_weight.size()!=0) {
+ if (indicator_weight[i]==0) {continue;}
+ }
+
+ for (size_t j=0; j<indicator_pheno[i].size(); j++) {
if (indicator_pheno[i][j]==0) {
np_miss++;
} else {
@@ -441,101 +596,103 @@ void PARAM::CheckData (void) {
/*
if ((indicator_cvt).size()!=0) {
- ni_test=0;
+ ni_test=0;
for (vector<int>::size_type i=0; i<(indicator_idv).size(); ++i) {
indicator_idv[i]*=indicator_cvt[i];
ni_test+=indicator_idv[i];
}
- }
-
+ }
+
if ((indicator_read).size()!=0) {
- ni_test=0;
+ ni_test=0;
for (vector<int>::size_type i=0; i<(indicator_idv).size(); ++i) {
indicator_idv[i]*=indicator_read[i];
ni_test+=indicator_idv[i];
}
}
*/
- if (ni_test==0) {
+ if (ni_test==0 && file_cor.empty() && file_mq.empty() && file_q.empty() && file_beta.empty() ) {
error=true;
cout<<"error! number of analyzed individuals equals 0. "<<endl;
return;
}
-
+
if (a_mode==43) {
if (ni_cvt==ni_test) {
error=true;
- cout<<"error! no individual has missing phenotypes."<<endl;
+ cout<<"error! no individual has missing phenotypes."<<endl;
return;
}
if ((np_obs+np_miss)!=(ni_cvt*n_ph)) {
error=true;
//cout<<ni_cvt<<"\t"<<ni_test<<"\t"<<ni_total<<"\t"<<np_obs<<"\t"<<np_miss<<"\t"<<indicator_cvt.size()<<endl;
- cout<<"error! number of phenotypes do not match the summation of missing and observed phenotypes."<<endl;
+ cout<<"error! number of phenotypes do not match the summation of missing and observed phenotypes."<<endl;
return;
}
}
//output some information
- cout<<"## number of total individuals = "<<ni_total<<endl;
- if (a_mode==43) {
- cout<<"## number of analyzed individuals = "<<ni_cvt<<endl;
- cout<<"## number of individuals with full phenotypes = "<<ni_test<<endl;
- } else {
- cout<<"## number of analyzed individuals = "<<ni_test<<endl;
+ if (file_cor.empty() && file_mq.empty() && file_q.empty() ) {
+ cout<<"## number of total individuals = "<<ni_total<<endl;
+ if (a_mode==43) {
+ cout<<"## number of analyzed individuals = "<<ni_cvt<<endl;
+ cout<<"## number of individuals with full phenotypes = "<<ni_test<<endl;
+ } else {
+ cout<<"## number of analyzed individuals = "<<ni_test<<endl;
+ }
+ cout<<"## number of covariates = "<<n_cvt<<endl;
+ cout<<"## number of phenotypes = "<<n_ph<<endl;
+ if (a_mode==43) {
+ cout<<"## number of observed data = "<<np_obs<<endl;
+ cout<<"## number of missing data = "<<np_miss<<endl;
+ }
+ if (!file_gene.empty()) {
+ cout<<"## number of total genes = "<<ng_total<<endl;
+ } else if (file_epm.empty() && a_mode!=43 && a_mode!=5) {
+ cout<<"## number of total SNPs = "<<ns_total<<endl;
+ cout<<"## number of analyzed SNPs = "<<ns_test<<endl;
+ } else {}
}
- cout<<"## number of covariates = "<<n_cvt<<endl;
- cout<<"## number of phenotypes = "<<n_ph<<endl;
- if (a_mode==43) {
- cout<<"## number of observed data = "<<np_obs<<endl;
- cout<<"## number of missing data = "<<np_miss<<endl;
- }
- if (!file_gene.empty()) {
- cout<<"## number of total genes = "<<ng_total<<endl;
- } else if (file_epm.empty() && a_mode!=43 && a_mode!=5) {
- cout<<"## number of total SNPs = "<<ns_total<<endl;
- cout<<"## number of analyzed SNPs = "<<ns_test<<endl;
- } else {}
-
+
//set d_pace to 1000 for gene expression
if (!file_gene.empty() && d_pace==100000) {
d_pace=1000;
}
-
+
//for case-control studies, count #cases and #controls
int flag_cc=0;
- if (a_mode==13) {
+ if (a_mode==13) {
ni_case=0;
ni_control=0;
for (size_t i=0; i<indicator_idv.size(); i++) {
if (indicator_idv[i]==0) {continue;}
-
+
if (pheno[i][0]==0) {ni_control++;}
else if (pheno[i][0]==1) {ni_case++;}
else {flag_cc=1;}
}
- cout<<"## number of cases = "<<ni_case<<endl;
- cout<<"## number of controls = "<<ni_control<<endl;
- }
-
+ cout<<"## number of cases = "<<ni_case<<endl;
+ cout<<"## number of controls = "<<ni_control<<endl;
+ }
+
if (flag_cc==1) {cout<<"Unexpected non-binary phenotypes for case/control analysis. Use default (BSLMM) analysis instead."<<endl; a_mode=11;}
-
+
//set parameters for BSLMM
//and check for predict
if (a_mode==11 || a_mode==12 || a_mode==13) {
- if (a_mode==11) {n_mh=1;}
+ if (a_mode==11) {n_mh=1;}
if (logp_min==0) {logp_min=-1.0*log((double)ns_test);}
-
+
if (h_scale==-1) {h_scale=min(1.0, 10.0/sqrt((double)ni_test) );}
if (rho_scale==-1) {rho_scale=min(1.0, 10.0/sqrt((double)ni_test) );}
if (logp_scale==-1) {logp_scale=min(1.0, 5.0/sqrt((double)ni_test) );}
-
+
if (h_min==-1) {h_min=0.0;}
if (h_max==-1) {h_max=1.0;}
-
+
if (s_max>ns_test) {s_max=ns_test; cout<<"s_max is re-set to the number of analyzed SNPs."<<endl;}
if (s_max<s_min) {cout<<"error! maximum s value must be larger than the minimal value. current values = "<<s_max<<" and "<<s_min<<endl; error=true;}
- } else if (a_mode==41 || a_mode==42) {
+ } else if (a_mode==41 || a_mode==42) {
if (indicator_bv.size()!=0) {
if (indicator_idv.size()!=indicator_bv.size()) {
cout<<"error! number of rows in the phenotype file does not match that in the estimated breeding value file: "<<indicator_idv.size()<<"\t"<<indicator_bv.size()<<endl;
@@ -555,18 +712,18 @@ void PARAM::CheckData (void) {
//file_mk needs to contain more than one line
if (n_vc==1 && !file_mk.empty()) {cout<<"error! -mk file should contain more than one line."<<endl; error=true;}
-
+
return;
}
-void PARAM::PrintSummary ()
+void PARAM::PrintSummary ()
{
if (n_ph==1) {
cout<<"pve estimate ="<<pve_null<<endl;
cout<<"se(pve) ="<<pve_se_null<<endl;
} else {
-
+
}
return;
}
@@ -575,7 +732,7 @@ void PARAM::PrintSummary ()
void PARAM::ReadGenotypes (gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) {
string file_str;
-
+
if (!file_bfile.empty()) {
file_str=file_bfile+".bed";
if (ReadFile_bed (file_str, indicator_idv, indicator_snp, UtX, K, calc_K)==false) {error=true;}
@@ -583,91 +740,563 @@ void PARAM::ReadGenotypes (gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) {
else {
if (ReadFile_geno (file_geno, indicator_idv, indicator_snp, UtX, K, calc_K)==false) {error=true;}
}
-
+
return;
}
-
+
+
+void PARAM::ReadGenotypes (vector<vector<unsigned char> > &Xt, gsl_matrix *K, const bool calc_K) {
+ string file_str;
+
+ if (!file_bfile.empty()) {
+ file_str=file_bfile+".bed";
+ if (ReadFile_bed (file_str, indicator_idv, indicator_snp, Xt, K, calc_K, ni_test, ns_test)==false) {error=true;}
+ } else {
+ if (ReadFile_geno (file_geno, indicator_idv, indicator_snp, Xt, K, calc_K, ni_test, ns_test)==false) {error=true;}
+ }
+
+ return;
+}
+
void PARAM::CalcKin (gsl_matrix *matrix_kin) {
string file_str;
-
+
gsl_matrix_set_zero (matrix_kin);
-
- if (!file_bfile.empty() ) {
+
+ if (!file_bfile.empty() ) {
file_str=file_bfile+".bed";
if (PlinkKin (file_str, indicator_snp, a_mode-20, d_pace, matrix_kin)==false) {error=true;}
}
+ else if (!file_oxford.empty() ) {
+ file_str=file_oxford+".bgen";
+ if (bgenKin (file_str, indicator_snp, a_mode-20, d_pace, matrix_kin)==false) {error=true;}
+ }
else {
file_str=file_geno;
if (BimbamKin (file_str, indicator_snp, a_mode-20, d_pace, matrix_kin)==false) {error=true;}
}
-
+
+ return;
+}
+
+
+
+//from an existing n by nd G matrix, compute the d by d S matrix
+void compKtoS (const gsl_matrix *G, gsl_matrix *S) {
+ size_t n_vc=S->size1, ni_test=G->size1;
+ double di, dj, tr_KiKj, sum_Ki, sum_Kj, s_Ki, s_Kj, s_KiKj, si, sj, d;
+
+ for (size_t i=0; i<n_vc; i++) {
+ for (size_t j=i; j<n_vc; j++) {
+ tr_KiKj=0; sum_Ki=0; sum_Kj=0; s_KiKj=0; si=0; sj=0;
+ for (size_t l=0; l<ni_test; l++) {
+ s_Ki=0; s_Kj=0;
+ for (size_t k=0; k<ni_test; k++) {
+ di=gsl_matrix_get(G, l, k+ni_test*i);
+ dj=gsl_matrix_get(G, l, k+ni_test*j);
+ s_Ki+=di; s_Kj+=dj;
+
+ tr_KiKj+=di*dj; sum_Ki+=di; sum_Kj+=dj;
+ if (l==k) {si+=di; sj+=dj;}
+ }
+ s_KiKj+=s_Ki*s_Kj;
+ }
+
+ sum_Ki/=(double)ni_test;
+ sum_Kj/=(double)ni_test;
+ s_KiKj/=(double)ni_test;
+ si-=sum_Ki;
+ sj-=sum_Kj;
+ d=tr_KiKj-2*s_KiKj+sum_Ki*sum_Kj;
+ d=d/(si*sj)-1/(double)(ni_test-1);
+
+ gsl_matrix_set (S, i, j, d);
+ if (i!=j) {gsl_matrix_set (S, j, i, d);}
+ }
+ }
+ //cout<<tr_KiKj<<" "<<s_KiKj<<" "<<sum_Ki<<" "<<sum_Kj<<" "<<si<<" "<<sj<<" "<<d*1000000<<endl;
+ return;
+}
+
+
+
+//copied from lmm.cpp; is used in the following function compKtoQ
+//map a number 1-(n_cvt+2) to an index between 0 and [(n_c+2)^2+(n_c+2)]/2-1
+size_t GetabIndex (const size_t a, const size_t b, const size_t n_cvt) {
+ if (a>n_cvt+2 || b>n_cvt+2 || a<=0 || b<=0) {cout<<"error in GetabIndex."<<endl; return 0;}
+ size_t index;
+ size_t l, h;
+ if (b>a) {l=a; h=b;} else {l=b; h=a;}
+
+ size_t n=n_cvt+2;
+ index=(2*n-l+2)*(l-1)/2+h-l;
+
+ return index;
+}
+
+//from an existing n by nd (centered) G matrix, compute the d+1 by d*(d+1) Q matrix
+//where inside i'th d+1 by d+1 matrix, each element is tr(KiKjKiKl)-r*tr(KjKiKl)-r*tr(KlKiKj)+r^2*tr(KjKl), where r=n/(n-1)
+void compKtoQ (const gsl_matrix *G, gsl_matrix *Q) {
+ size_t n_vc=G->size2/G->size1, ni_test=G->size1;
+
+ gsl_matrix *KiKj=gsl_matrix_alloc(ni_test, n_vc*(n_vc+1)/2*ni_test);
+ gsl_vector *trKiKjKi=gsl_vector_alloc ( n_vc*n_vc );
+ gsl_vector *trKiKj=gsl_vector_alloc( n_vc*(n_vc+1)/2 );
+ gsl_vector *trKi=gsl_vector_alloc(n_vc);
+
+ double d, tr, r=(double)ni_test/(double)(ni_test-1);
+ size_t t, t_ij, t_il, t_jl, t_ii;
+
+ //compute KiKj for all pairs of i and j (including the identity matrix)
+ t=0;
+ for (size_t i=0; i<n_vc; i++) {
+ gsl_matrix_const_view Ki=gsl_matrix_const_submatrix(G, 0, i*ni_test, ni_test, ni_test);
+ for (size_t j=i; j<n_vc; j++) {
+ gsl_matrix_const_view Kj=gsl_matrix_const_submatrix(G, 0, j*ni_test, ni_test, ni_test);
+ gsl_matrix_view KiKj_sub=gsl_matrix_submatrix (KiKj, 0, t*ni_test, ni_test, ni_test);
+ eigenlib_dgemm ("N", "N", 1.0, &Ki.matrix, &Kj.matrix, 0.0, &KiKj_sub.matrix);
+ t++;
+ }
+ }
+ /*
+ for (size_t i=0; i<5; i++) {
+ for (size_t j=0; j<5; j++) {
+ cout<<gsl_matrix_get (G, i, j)<<" ";
+ }
+ cout<<endl;
+ }
+ */
+
+ //compute trKi, trKiKj
+ t=0;
+ for (size_t i=0; i<n_vc; i++) {
+ for (size_t j=i; j<n_vc; j++) {
+ tr=0;
+ for (size_t k=0; k<ni_test; k++) {
+ tr+=gsl_matrix_get (KiKj, k, t*ni_test+k);
+ }
+ gsl_vector_set (trKiKj, t, tr);
+
+ t++;
+ }
+
+ tr=0;
+ for (size_t k=0; k<ni_test; k++) {
+ tr+=gsl_matrix_get (G, k, i*ni_test+k);
+ }
+ gsl_vector_set (trKi, i, tr);
+ }
+
+ //compute trKiKjKi (it is not symmetric w.r.t. i and j)
+ for (size_t i=0; i<n_vc; i++) {
+ for (size_t j=0; j<n_vc; j++) {
+ tr=0;
+ t=GetabIndex (i+1, j+1, n_vc-2);
+ for (size_t k=0; k<ni_test; k++) {
+ gsl_vector_const_view KiKj_row=gsl_matrix_const_subrow (KiKj, k, t*ni_test, ni_test);
+ gsl_vector_const_view KiKj_col=gsl_matrix_const_column (KiKj, t*ni_test+k);
+
+ gsl_vector_const_view Ki_col=gsl_matrix_const_column (G, i*ni_test+k);
+
+ if (i<=j) {
+ gsl_blas_ddot (&KiKj_row.vector, &Ki_col.vector, &d);
+ tr+=d;
+ } else {
+ gsl_blas_ddot (&KiKj_col.vector, &Ki_col.vector, &d);
+ tr+=d;
+ }
+ }
+ gsl_vector_set (trKiKjKi, i*n_vc+j, tr);
+ }
+ }
+
+ //compute Q
+ for (size_t i=0; i<n_vc; i++) {
+ for (size_t j=0; j<n_vc+1; j++) {
+ for (size_t l=j; l<n_vc+1; l++) {
+ if (j!=n_vc && l!=n_vc) {
+ t_ij=GetabIndex (i+1, j+1, n_vc-2);
+ t_il=GetabIndex (i+1, l+1, n_vc-2);
+ t_jl=GetabIndex (j+1, l+1, n_vc-2);
+
+ //cout<<ni_test<<" "<<r<<t_ij<<" "<<t_il<<" "<<t_jl<<" "<<endl;
+ tr=0;
+ for (size_t k=0; k<ni_test; k++) {
+ gsl_vector_const_view KiKj_row=gsl_matrix_const_subrow (KiKj, k, t_ij*ni_test, ni_test);
+ gsl_vector_const_view KiKj_col=gsl_matrix_const_column (KiKj, t_ij*ni_test+k);
+ gsl_vector_const_view KiKl_row=gsl_matrix_const_subrow (KiKj, k, t_il*ni_test, ni_test);
+ gsl_vector_const_view KiKl_col=gsl_matrix_const_column (KiKj, t_il*ni_test+k);
+
+ gsl_vector_const_view Kj_row=gsl_matrix_const_subrow (G, k, j*ni_test, ni_test);
+ gsl_vector_const_view Kl_row=gsl_matrix_const_subrow (G, k, l*ni_test, ni_test);
+
+ if (i<=j && i<=l) {
+ gsl_blas_ddot (&KiKj_row.vector, &KiKl_col.vector, &d);
+ tr+=d;
+ gsl_blas_ddot (&Kj_row.vector, &KiKl_col.vector, &d);
+ tr-=r*d;
+ gsl_blas_ddot (&Kl_row.vector, &KiKj_col.vector, &d);
+ tr-=r*d;
+ } else if (i<=j && i>l) {
+ gsl_blas_ddot (&KiKj_row.vector, &KiKl_row.vector, &d);
+ tr+=d;
+ gsl_blas_ddot (&Kj_row.vector, &KiKl_row.vector, &d);
+ tr-=r*d;
+ gsl_blas_ddot (&Kl_row.vector, &KiKj_col.vector, &d);
+ tr-=r*d;
+ } else if (i>j && i<=l) {
+ gsl_blas_ddot (&KiKj_col.vector, &KiKl_col.vector, &d);
+ tr+=d;
+ gsl_blas_ddot (&Kj_row.vector, &KiKl_col.vector, &d);
+ tr-=r*d;
+ gsl_blas_ddot (&Kl_row.vector, &KiKj_row.vector, &d);
+ tr-=r*d;
+ } else {
+ gsl_blas_ddot (&KiKj_col.vector, &KiKl_row.vector, &d);
+ tr+=d;
+ gsl_blas_ddot (&Kj_row.vector, &KiKl_row.vector, &d);
+ tr-=r*d;
+ gsl_blas_ddot (&Kl_row.vector, &KiKj_row.vector, &d);
+ tr-=r*d;
+ }
+ }
+
+ tr+=r*r*gsl_vector_get (trKiKj, t_jl);
+ } else if (j!=n_vc && l==n_vc) {
+ t_ij=GetabIndex (i+1, j+1, n_vc-2);
+ tr=gsl_vector_get (trKiKjKi, i*n_vc+j)-2*r*gsl_vector_get (trKiKj, t_ij)+r*r*gsl_vector_get (trKi, j);
+ } else if (j==n_vc && l==n_vc) {
+ t_ii=GetabIndex (i+1, i+1, n_vc-2);
+ tr=gsl_vector_get (trKiKj, t_ii)-2*r*gsl_vector_get (trKi, i)+r*r*(double)(ni_test-1);
+ }
+
+ gsl_matrix_set (Q, j, i*(n_vc+1)+l, tr);
+ if (l!=j) {gsl_matrix_set (Q, l, i*(n_vc+1)+j, tr);}
+ }
+ }
+ }
+
+ gsl_matrix_scale (Q, 1.0/pow((double)ni_test, 2) );
+
+ gsl_matrix_free(KiKj);
+ gsl_vector_free(trKiKjKi);
+ gsl_vector_free(trKiKj);
+ gsl_vector_free(trKi);
+
+ return;
+}
+
+
+
+//perform Jacknife sampling for variance of S
+void JacknifeGtoS (const gsl_matrix *G, gsl_matrix *S, gsl_matrix *Svar) {
+ size_t n_vc=Svar->size1, ni_test=G->size1;
+ vector<vector<vector<double> > > tr_KiKj, s_KiKj;
+ vector<vector<double> > sum_Ki, s_Ki, si;
+ vector<double> vec_tmp;
+ double di, dj, d, m, v;
+
+ //initialize and set all elements to zero
+ for (size_t i=0; i<ni_test; i++) {
+ vec_tmp.push_back(0);
+ }
+
+ for (size_t i=0; i<n_vc; i++) {
+ sum_Ki.push_back(vec_tmp);
+ s_Ki.push_back(vec_tmp);
+ si.push_back(vec_tmp);
+ }
+
+ for (size_t i=0; i<n_vc; i++) {
+ tr_KiKj.push_back(sum_Ki);
+ s_KiKj.push_back(sum_Ki);
+ }
+
+ //run jacknife
+ for (size_t i=0; i<n_vc; i++) {
+ for (size_t l=0; l<ni_test; l++) {
+ for (size_t k=0; k<ni_test; k++) {
+ di=gsl_matrix_get(G, l, k+ni_test*i);
+
+ for (size_t t=0; t<ni_test; t++) {
+ if (t==l || t==k) {continue;}
+ sum_Ki[i][t]+=di;
+ if (l==k) {si[i][t]+=di;}
+ }
+ s_Ki[i][l]+=di;
+ }
+ }
+
+ for (size_t t=0; t<ni_test; t++) {
+ sum_Ki[i][t]/=(double)(ni_test-1);
+ }
+ }
+
+ for (size_t i=0; i<n_vc; i++) {
+ for (size_t j=i; j<n_vc; j++) {
+ for (size_t l=0; l<ni_test; l++) {
+ for (size_t k=0; k<ni_test; k++) {
+ di=gsl_matrix_get(G, l, k+ni_test*i);
+ dj=gsl_matrix_get(G, l, k+ni_test*j);
+ d=di*dj;
+
+ for (size_t t=0; t<ni_test; t++) {
+ if (t==l || t==k) {continue;}
+ tr_KiKj[i][j][t]+=d;
+ }
+ }
+
+ for (size_t t=0; t<ni_test; t++) {
+ if (t==l) {continue;}
+ di=gsl_matrix_get(G, l, t+ni_test*i);
+ dj=gsl_matrix_get(G, l, t+ni_test*j);
+
+ s_KiKj[i][j][t]+=(s_Ki[i][l]-di)*(s_Ki[j][l]-dj);
+ }
+ }
+
+ for (size_t t=0; t<ni_test; t++) {
+ s_KiKj[i][j][t]/=(double)(ni_test-1);
+ }
+
+ m=0; v=0;
+ for (size_t t=0; t<ni_test; t++) {
+ d=tr_KiKj[i][j][t]-2*s_KiKj[i][j][t]+sum_Ki[i][t]*sum_Ki[j][t];
+ d/=(si[i][t]-sum_Ki[i][t])*(si[j][t]-sum_Ki[j][t]);
+ d-=1/(double)(ni_test-2);
+
+ m+=d; v+=d*d;
+ }
+ m/=(double)ni_test;
+ v/=(double)ni_test;
+ v-=m*m;
+ v*=(double)(ni_test-1);
+
+ gsl_matrix_set (Svar, i, j, v);
+ d=gsl_matrix_get (S, i, j);
+ d=(double)ni_test*d-(double)(ni_test-1)*m;
+ gsl_matrix_set (S, i, j, d);
+ if (i!=j) {gsl_matrix_set (Svar, j, i, v); gsl_matrix_set (S, j, i, d);}
+ }
+ }
+
+ return;
+}
+
+
+
+//compute the d by d S matrix with its d by d variance matrix of Svar, and the d+1 by d(d+1) matrix of Q for V(q)
+void PARAM::CalcS (gsl_matrix *S, gsl_matrix *Svar, gsl_matrix *Q) {
+ string file_str;
+
+ gsl_matrix_set_zero (S);
+ gsl_matrix_set_zero (Svar);
+ gsl_matrix_set_zero (Q);
+
+ //compute the kinship matrix G for multiple categories; these matrices are not centered, for convienence of Jacknife sampling
+ gsl_matrix *G=gsl_matrix_alloc (ni_test, n_vc*ni_test);
+ gsl_matrix_set_zero (G);
+
+ if (!file_bfile.empty() ) {
+ file_str=file_bfile+".bed";
+ if (PlinkKin (file_str, indicator_idv, indicator_snp, a_mode-24, d_pace, mapRS2cat, mapRS2var, snpInfo, G)==false) {error=true;}
+ } else {
+ file_str=file_geno;
+ if (BimbamKin (file_str, indicator_idv, indicator_snp, a_mode-24, d_pace, mapRS2cat, mapRS2var, snpInfo, G)==false) {error=true;}
+ }
+
+ //center and scale every kinship matrix inside G
+ double d;
+ for (size_t i=0; i<n_vc; i++) {
+ gsl_matrix_view K=gsl_matrix_submatrix(G, 0, i*ni_test, ni_test, ni_test);
+ CenterMatrix(&K.matrix);
+ d=ScaleMatrix(&K.matrix);
+ }
+
+ //based on G, compute S
+ compKtoS (G, S);
+
+ //based on G, compute a matrix Q that can be used to calculate the variance of q
+ compKtoQ (G, Q);
+
+ /*
+ //set up random environment
+ gsl_rng_env_setup();
+ gsl_rng *gsl_r;
+ const gsl_rng_type * gslType;
+ gslType = gsl_rng_default;
+ if (randseed<0) {
+ time_t rawtime;
+ time (&rawtime);
+ tm * ptm = gmtime (&rawtime);
+
+ randseed = (unsigned) (ptm->tm_hour%24*3600+ptm->tm_min*60+ptm->tm_sec);
+ }
+ gsl_r = gsl_rng_alloc(gslType);
+ gsl_rng_set(gsl_r, randseed);
+
+ //bootstrap: in each iteration, sample individuals and compute S_pmt
+ size_t n_pmt=100;
+ vector<size_t> idv_order, idv_remove;
+ for (size_t i=0; i<ni_test; i++) {
+ idv_order.push_back(i);
+ }
+ for (size_t i=0; i<n_pmt; i++) {
+ idv_remove.push_back(0);
+ }
+ gsl_ran_choose (gsl_r, static_cast<void*>(&idv_remove[0]), n_pmt, static_cast<void*>(&idv_order[0]), ni_test, sizeof(size_t));
+
+ gsl_matrix *S_pmt=gsl_matrix_alloc(n_vc, n_vc*n_pmt);
+ for (size_t i=0; i<n_pmt; i++) {
+ gsl_matrix_view S_sub=gsl_matrix_submatrix (S_pmt, 0, n_vc*i, n_vc, n_vc);
+ compKtoS (G, idv_remove[i], &S_sub.matrix);
+ }
+
+ //based on S_pmt, compute Svar
+ double m, v, d;
+ for (size_t i=0; i<n_vc; i++) {
+ for (size_t j=i; j<n_vc; j++) {
+ m=0; v=0;
+ for (size_t t=0; t<n_pmt; t++) {
+ d=gsl_matrix_get(S_pmt, i, j);
+ m+=d; v+=d*d;
+ }
+ m/=(double)n_pmt; v/=(double)n_pmt;
+ v=v-m*m;
+ gsl_matrix_set(Svar, i, j, v);
+ if (i!=j) {gsl_matrix_set(Svar, j, i, v);}
+ }
+ }
+ */
+
+ //compute Svar and update S with Jacknife
+ JacknifeGtoS (G, S, Svar);
+
+ gsl_matrix_free(G);
+ return;
+}
+
+
+
+void PARAM::WriteVector (const gsl_vector *q, const gsl_vector *s, const size_t n_total, const string suffix)
+{
+ string file_str;
+ file_str=path_out+"/"+file_out;
+ file_str+=".";
+ file_str+=suffix;
+ file_str+=".txt";
+
+ ofstream outfile (file_str.c_str(), ofstream::out);
+ if (!outfile) {cout<<"error writing file: "<<file_str.c_str()<<endl; return;}
+
+ outfile.precision(10);
+
+ for (size_t i=0; i<q->size; ++i) {
+ outfile<<gsl_vector_get (q, i)<<endl;
+ }
+
+ for (size_t i=0; i<s->size; ++i) {
+ outfile<<gsl_vector_get (s, i)<<endl;
+ }
+
+ outfile<<n_total<<endl;
+
+ outfile.close();
+ outfile.clear();
+ return;
+}
+
+
+
+void PARAM::WriteVar (const string suffix)
+{
+ string file_str, rs;
+ file_str=path_out+"/"+file_out;
+ file_str+=".";
+ file_str+=suffix;
+ file_str+=".txt.gz";
+
+ ogzstream outfile (file_str.c_str(), ogzstream::out);
+ if (!outfile) {cout<<"error writing file: "<<file_str.c_str()<<endl; return;}
+
+ outfile.precision(10);
+
+ for (size_t i=0; i<indicator_snp.size(); i++) {
+ if (indicator_snp[i]==0) {continue;}
+ rs=snpInfo[i].rs_number;
+ if (mapRS2var.count(rs)!=0) {
+ outfile<<rs<<"\t"<<mapRS2var.at(rs)<<endl;
+ }
+ }
+
+ outfile.close();
+ outfile.clear();
return;
}
-
-void PARAM::WriteMatrix (const gsl_matrix *matrix_U, const string suffix)
+void PARAM::WriteMatrix (const gsl_matrix *matrix_U, const string suffix)
{
string file_str;
file_str=path_out+"/"+file_out;
file_str+=".";
file_str+=suffix;
- file_str+=".txt";
-
+ file_str+=".txt";
+
ofstream outfile (file_str.c_str(), ofstream::out);
if (!outfile) {cout<<"error writing file: "<<file_str.c_str()<<endl; return;}
-
+
outfile.precision(10);
-
+
for (size_t i=0; i<matrix_U->size1; ++i) {
for (size_t j=0; j<matrix_U->size2; ++j) {
outfile<<gsl_matrix_get (matrix_U, i, j)<<"\t";
}
outfile<<endl;
}
-
+
outfile.close();
outfile.clear();
return;
}
-void PARAM::WriteVector (const gsl_vector *vector_D, const string suffix)
+void PARAM::WriteVector (const gsl_vector *vector_D, const string suffix)
{
string file_str;
file_str=path_out+"/"+file_out;
file_str+=".";
file_str+=suffix;
file_str+=".txt";
-
+
ofstream outfile (file_str.c_str(), ofstream::out);
if (!outfile) {cout<<"error writing file: "<<file_str.c_str()<<endl; return;}
-
+
outfile.precision(10);
-
+
for (size_t i=0; i<vector_D->size; ++i) {
outfile<<gsl_vector_get (vector_D, i)<<endl;
}
-
+
outfile.close();
outfile.clear();
return;
}
-void PARAM::CheckCvt ()
+void PARAM::CheckCvt ()
{
if (indicator_cvt.size()==0) {return;}
-
+
size_t ci_test=0;
-
+
gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt);
-
+
for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) {
if (indicator_idv[i]==0 || indicator_cvt[i]==0) {continue;}
for (size_t j=0; j<n_cvt; ++j) {
@@ -679,14 +1308,14 @@ void PARAM::CheckCvt ()
size_t flag_ipt=0;
double v_min, v_max;
set<size_t> set_remove;
-
+
//check if any columns is an intercept
for (size_t i=0; i<W->size2; i++) {
gsl_vector_view w_col=gsl_matrix_column (W, i);
gsl_vector_minmax (&w_col.vector, &v_min, &v_max);
if (v_min==v_max) {flag_ipt=1; set_remove.insert (i);}
}
-
+
//add an intecept term if needed
if (n_cvt==set_remove.size()) {
indicator_cvt.clear();
@@ -697,19 +1326,19 @@ void PARAM::CheckCvt ()
if (indicator_idv[i]==0 || indicator_cvt[i]==0) {continue;}
cvt[i].push_back(1.0);
}
-
+
n_cvt++;
- } else {}
-
+ } else {}
+
gsl_matrix_free(W);
-
+
return;
}
//post-process phentoypes, covariates
void PARAM::ProcessCvtPhen ()
-{
+{
//convert indicator_pheno to indicator_idv
int k=1;
indicator_idv.clear();
@@ -720,27 +1349,88 @@ void PARAM::ProcessCvtPhen ()
}
indicator_idv.push_back(k);
}
-
+
//remove individuals with missing covariates
if ((indicator_cvt).size()!=0) {
for (vector<int>::size_type i=0; i<(indicator_idv).size(); ++i) {
indicator_idv[i]*=indicator_cvt[i];
}
}
-
+
+ //remove individuals with missing gxe variables
+ if ((indicator_gxe).size()!=0) {
+ for (vector<int>::size_type i=0; i<(indicator_idv).size(); ++i) {
+ indicator_idv[i]*=indicator_gxe[i];
+ }
+ }
+
+ //remove individuals with missing residual weights
+ if ((indicator_weight).size()!=0) {
+ for (vector<int>::size_type i=0; i<(indicator_idv).size(); ++i) {
+ indicator_idv[i]*=indicator_weight[i];
+ }
+ }
+
//obtain ni_test
- ni_test=0;
+ ni_test=0;
for (vector<int>::size_type i=0; i<(indicator_idv).size(); ++i) {
- if (indicator_idv[i]==0) {continue;}
+ if (indicator_idv[i]==0) {continue;}
ni_test++;
}
-
+
+
+
+ //if subsample number is set, perform a random sub-sampling to determine the subsampled ids
+ if (ni_subsample!=0) {
+ if (ni_test<ni_subsample) {
+ cout<<"error! number of subsamples is less than number of analyzed individuals. "<<endl;
+ } else {
+ //set up random environment
+ gsl_rng_env_setup();
+ gsl_rng *gsl_r;
+ const gsl_rng_type * gslType;
+ gslType = gsl_rng_default;
+ if (randseed<0) {
+ time_t rawtime;
+ time (&rawtime);
+ tm * ptm = gmtime (&rawtime);
+
+ randseed = (unsigned) (ptm->tm_hour%24*3600+ptm->tm_min*60+ptm->tm_sec);
+ }
+ gsl_r = gsl_rng_alloc(gslType);
+ gsl_rng_set(gsl_r, randseed);
+
+ //from ni_test, sub-sample ni_subsample
+ vector<size_t> a, b;
+ for (size_t i=0; i<ni_subsample; i++) {
+ a.push_back(0);
+ }
+ for (size_t i=0; i<ni_test; i++) {
+ b.push_back(i);
+ }
+
+ gsl_ran_choose (gsl_r, static_cast<void*>(&a[0]), ni_subsample, static_cast<void*>(&b[0]), ni_test, sizeof (size_t) );
+
+ //re-set indicator_idv and ni_test
+ int j=0;
+ for (vector<int>::size_type i=0; i<(indicator_idv).size(); ++i) {
+ if (indicator_idv[i]==0) {continue;}
+ if(find(a.begin(), a.end(), j) == a.end()) {
+ indicator_idv[i]=0;
+ }
+ j++;
+ }
+ ni_test=ni_subsample;
+ }
+ }
+
+ //check ni_test
if (ni_test==0) {
error=true;
cout<<"error! number of analyzed individuals equals 0. "<<endl;
return;
}
-
+
//check covariates to see if they are correlated with each other, and to see if the intercept term is included
//after getting ni_test
//add or remove covariates
@@ -749,24 +1439,24 @@ void PARAM::ProcessCvtPhen ()
} else {
vector<double> cvt_row;
cvt_row.push_back(1);
-
+
for (vector<int>::size_type i=0; i<(indicator_idv).size(); ++i) {
indicator_cvt.push_back(1);
-
+
cvt.push_back(cvt_row);
}
}
-
+
return;
}
-void PARAM::CopyCvt (gsl_matrix *W)
+void PARAM::CopyCvt (gsl_matrix *W)
{
size_t ci_test=0;
-
+
for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) {
if (indicator_idv[i]==0 || indicator_cvt[i]==0) {continue;}
for (size_t j=0; j<n_cvt; ++j) {
@@ -774,57 +1464,85 @@ void PARAM::CopyCvt (gsl_matrix *W)
}
ci_test++;
}
-
+
+ return;
+}
+
+
+void PARAM::CopyGxe (gsl_vector *env)
+{
+ size_t ci_test=0;
+
+ for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) {
+ if (indicator_idv[i]==0 || indicator_gxe[i]==0) {continue;}
+ gsl_vector_set (env, ci_test, gxe[i]);
+ ci_test++;
+ }
+
+ return;
+}
+
+void PARAM::CopyWeight (gsl_vector *w)
+{
+ size_t ci_test=0;
+
+ for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) {
+ if (indicator_idv[i]==0 || indicator_weight[i]==0) {continue;}
+ gsl_vector_set (w, ci_test, weight[i]);
+ ci_test++;
+ }
+
return;
}
//if flag=0, then use indicator_idv to load W and Y
//else, use indicator_cvt to load them
-void PARAM::CopyCvtPhen (gsl_matrix *W, gsl_vector *y, size_t flag)
+void PARAM::CopyCvtPhen (gsl_matrix *W, gsl_vector *y, size_t flag)
{
size_t ci_test=0;
-
+
for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) {
if (flag==0) {
if (indicator_idv[i]==0) {continue;}
} else {
if (indicator_cvt[i]==0) {continue;}
}
-
+
gsl_vector_set (y, ci_test, (pheno)[i][0]);
-
+
for (size_t j=0; j<n_cvt; ++j) {
gsl_matrix_set (W, ci_test, j, (cvt)[i][j]);
}
ci_test++;
}
-
+
return;
}
//if flag=0, then use indicator_idv to load W and Y
//else, use indicator_cvt to load them
-void PARAM::CopyCvtPhen (gsl_matrix *W, gsl_matrix *Y, size_t flag)
+void PARAM::CopyCvtPhen (gsl_matrix *W, gsl_matrix *Y, size_t flag)
{
size_t ci_test=0;
-
+
for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) {
if (flag==0) {
if (indicator_idv[i]==0) {continue;}
} else {
if (indicator_cvt[i]==0) {continue;}
- }
-
- for (size_t j=0; j<n_ph; ++j) {
+ }
+
+ for (size_t j=0; j<n_ph; ++j) {
gsl_matrix_set (Y, ci_test, j, (pheno)[i][j]);
}
for (size_t j=0; j<n_cvt; ++j) {
gsl_matrix_set (W, ci_test, j, (cvt)[i][j]);
}
+
ci_test++;
}
-
+
return;
}
@@ -832,18 +1550,18 @@ void PARAM::CopyCvtPhen (gsl_matrix *W, gsl_matrix *Y, size_t flag)
-void PARAM::CopyRead (gsl_vector *log_N)
+void PARAM::CopyRead (gsl_vector *log_N)
{
size_t ci_test=0;
-
+
for (vector<int>::size_type i=0; i<indicator_idv.size(); ++i) {
if (indicator_idv[i]==0) {continue;}
- gsl_vector_set (log_N, ci_test, log(vec_read[i]) );
+ gsl_vector_set (log_N, ci_test, log(vec_read[i]) );
ci_test++;
}
-
+
return;
}
-
-
+
+
diff --git a/src/param.h b/src/param.h
index fa18181..3c3b42e 100644
--- a/src/param.h
+++ b/src/param.h
@@ -16,7 +16,7 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#ifndef __PARAM_H__
+#ifndef __PARAM_H__
#define __PARAM_H__
#include <vector>
@@ -39,14 +39,17 @@ public:
string a_major;
size_t n_miss;
double missingness;
- double maf;
+ double maf;
+ size_t n_idv;//number of non-missing individuals
+ size_t n_nb;//number of neighbours on the right hand side
+ size_t file_position;//snp location on file
};
//results for lmm
class SUMSTAT {
public:
double beta; //REML estimator for beta
- double se; //SE for beta
+ double se; //SE for beta
double lambda_remle; //REML estimator for lambda
double lambda_mle; //MLE estimator for lambda
double p_wald; //p value from a Wald test
@@ -75,50 +78,87 @@ public:
double rho;
double pge;
double logp;
-
+
size_t n_gamma;
};
+//header class
+class HEADER
+{
+
+public:
+ size_t rs_col;
+ size_t chr_col;
+ size_t pos_col;
+ size_t cm_col;
+ size_t a1_col;
+ size_t a0_col;
+ size_t z_col;
+ size_t beta_col;
+ size_t sebeta_col;
+ size_t chisq_col;
+ size_t p_col;
+ size_t n_col;
+ size_t nmis_col;
+ size_t nobs_col;
+ size_t af_col;
+ size_t var_col;
+ size_t ws_col;
+ size_t cor_col;
+ size_t coln;//number of columns
+};
+
class PARAM {
-public:
+public:
// IO related parameters
bool mode_silence;
int a_mode; //analysis mode, 1/2/3/4 for Frequentist tests
- int k_mode; //kinship read mode: 1: n by n matrix, 2: id/id/k_value;
+ int k_mode; //kinship read mode: 1: n by n matrix, 2: id/id/k_value;
vector<size_t> p_column; //which phenotype column needs analysis
size_t d_pace; //display pace
-
+
string file_bfile;
string file_geno;
string file_pheno;
string file_anno; //optional
+ string file_gxe; //optional
string file_cvt; //optional
+ string file_cat;
+ string file_var;
+ string file_beta;
+ string file_cor;
string file_kin;
string file_ku, file_kd;
string file_mk;
+ string file_q, file_mq;
+ string file_s, file_ms;
+ string file_v, file_mv;
+ string file_weight;
string file_out;
string path_out;
-
+
+
string file_epm; //estimated parameter file
string file_ebv; //estimated breeding value file
string file_log; //log file containing mean estimate
-
+
string file_read; //file containing total number of reads
string file_gene; //gene expression file
-
+
string file_snps; //file containing analyzed snps or genes
-
-
-
- // QC related parameters
+// WJA Added
+ string file_oxford;
+
+
+ // QC related parameters
double miss_level;
- double maf_level;
+ double maf_level;
double hwe_level;
double r2_level;
-
+
// LMM related parameters
double l_min;
double l_max;
@@ -130,7 +170,7 @@ public:
vector<double> Vg_remle_null, Ve_remle_null, Vg_mle_null, Ve_mle_null;
vector<double> VVg_remle_null, VVe_remle_null, VVg_mle_null, VVe_mle_null;
vector<double> beta_remle_null, se_beta_remle_null, beta_mle_null, se_beta_mle_null;
- double p_nr;
+ double p_nr;
double em_prec, nr_prec;
size_t em_iter, nr_iter;
size_t crt;
@@ -138,15 +178,16 @@ public:
//for fitting multiple variance components
//the first three are of size n_vc, and the next two are of size n_vc+1
+ bool noconstrain;
vector<double> v_traceG;
vector<double> v_pve;
vector<double> v_se_pve;
vector<double> v_sigma2;
- vector<double> v_se_sigma2;
+ vector<double> v_se_sigma2;
vector<double> v_beta;
- vector<double> v_se_beta;
-
+ vector<double> v_se_beta;
+
// BSLMM MCMC related parameters
double h_min, h_max, h_scale; //priors for h
double rho_min, rho_max, rho_scale; //priors for rho
@@ -163,7 +204,12 @@ public:
double trace_G;
HYPBSLMM cHyp_initial;
-
+
+ //VARCOV related parameters
+ double window_cm;
+ size_t window_bp;
+ size_t window_ns;
+
// Summary statistics
bool error;
size_t ni_total, ni_test, ni_cvt; //number of individuals
@@ -171,6 +217,8 @@ public:
size_t ns_total, ns_test; //number of snps
size_t ng_total, ng_test; //number of genes
size_t ni_control, ni_case; //number of controls and number of cases
+ size_t ni_subsample; //number of subsampled individuals
+ size_t ni_total_ref, ns_total_ref, ns_pair;//max number of individuals, number of snps and number of snp pairs in the reference panel
size_t n_cvt; //number of covariates
size_t n_ph; //number of phenotypes
size_t n_vc; //number of variance components (including the diagonal matrix)
@@ -186,42 +234,54 @@ public:
// Data
vector<vector<double> > pheno; //a vector record all phenotypes, NA replaced with -9
- vector<vector<double> > cvt; //a vector record all covariates, NA replaced with -9
+ vector<vector<double> > cvt; //a vector record all covariates, NA replaced with -9
+ vector<double> gxe; //a vector record all covariates, NA replaced with -9
+ vector<double> weight; //a vector record weights for the individuals, which is useful for animal breeding studies
vector<vector<int> > indicator_pheno; //a matrix record when a phenotype is missing for an individual; 0 missing, 1 available
vector<int> indicator_idv; //indicator for individuals (phenotypes), 0 missing, 1 available for analysis
vector<int> indicator_snp; //sequence indicator for SNPs: 0 ignored because of (a) maf, (b) miss, (c) non-poly; 1 available for analysis
vector<int> indicator_cvt; //indicator for covariates, 0 missing, 1 available for analysis
-
+ vector<int> indicator_gxe; //indicator for gxe, 0 missing, 1 available for analysis
+ vector<int> indicator_weight; //indicator for weight, 0 missing, 1 available for analysis
+
vector<int> indicator_bv; //indicator for estimated breeding value file, 0 missing, 1 available for analysis
vector<int> indicator_read; //indicator for read file, 0 missing, 1 available for analysis
vector<double> vec_read; //total number of reads
vector<double> vec_bv; //breeding values
vector<size_t> est_column;
-
+
map<string, int> mapID2num; //map small ID number to number, from 0 to n-1
map<string, string> mapRS2chr; //map rs# to chromosome location
map<string, long int> mapRS2bp; //map rs# to base position
map<string, double> mapRS2cM; //map rs# to cM
map<string, double> mapRS2est; //map rs# to parameters
-
+ map<string, size_t> mapRS2cat; //map rs# to category number
+ map<string, double> mapRS2var; //map rs# to category number
+
vector<SNPINFO> snpInfo; //record SNP information
set<string> setSnps; //a set of snps for analysis
-
+
//constructor
PARAM();
-
+
//functions
- void ReadFiles ();
- void CheckParam ();
- void CheckData ();
+ void ReadFiles ();
+ void CheckParam ();
+ void CheckData ();
void PrintSummary ();
- void ReadGenotypes (gsl_matrix *UtX, gsl_matrix *K, const bool calc_K);
+ void ReadGenotypes (gsl_matrix *UtX, gsl_matrix *K, const bool calc_K);
+ void ReadGenotypes (vector<vector<unsigned char> > &Xt, gsl_matrix *K, const bool calc_K);
void CheckCvt ();
void CopyCvt (gsl_matrix *W);
+ void CopyGxe (gsl_vector *gxe);
+ void CopyWeight (gsl_vector *w);
void ProcessCvtPhen();
void CopyCvtPhen (gsl_matrix *W, gsl_vector *y, size_t flag);
void CopyCvtPhen (gsl_matrix *W, gsl_matrix *Y, size_t flag);
void CalcKin (gsl_matrix *matrix_kin);
+ void CalcS (gsl_matrix *S, gsl_matrix *Svar, gsl_matrix *Q);
+ void WriteVector (const gsl_vector *q, const gsl_vector *s, const size_t n_total, const string suffix);
+ void WriteVar (const string suffix);
void WriteMatrix (const gsl_matrix *matrix_U, const string suffix);
void WriteVector (const gsl_vector *vector_D, const string suffix);
void CopyRead (gsl_vector *log_N);