aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Carbonetto2017-05-25 22:20:02 -0500
committerPeter Carbonetto2017-05-25 22:20:02 -0500
commit40e338ab5d1754f36b6a12b6f29eee74792e3b3e (patch)
treea5e47c0d1c5b383a737a604f594db238785ea387
parent991cba8fc62c792bfe43afb8659154e97bc8f309 (diff)
downloadpangemma-40e338ab5d1754f36b6a12b6f29eee74792e3b3e.tar.gz
A couple small adjustments to README.
-rw-r--r--README.md8
-rw-r--r--src/varcov.cpp143
-rw-r--r--src/varcov.h15
3 files changed, 36 insertions, 130 deletions
diff --git a/README.md b/README.md
index 334d3d9..12081d4 100644
--- a/README.md
+++ b/README.md
@@ -118,8 +118,7 @@ In both cases, we recommend downloading the
### Using precompiled binaries
1. Go to the [latest stable release](latest_release) and download the
-file appropriate for your platform: `gemma.linux.gz` or
-`gemma.macosx.gz`.
+file appropriate for your platform: gemma.linux.gz or gemma.macosx.gz.
2. Run `gunzip gemma.linux.gz` or `gunzip gemma.linux.gz` to
decompress the file.
@@ -133,8 +132,9 @@ example, `gemma.linux` was built using `gcc 4.8.5`, so you should have
`gcc 4.8.x`. If the libraries are installed somewhere non-standard,
you can tell where GEMMA can find the libraries by setting the
`LD_LIBRARY_PATH` environment variable. If you have the wrong C++
-compiler version, then the program will likely complain about dynamic
-linking errors.
+compiler version, or the libraries are in a place where GEMMA cannot
+find them, then the program will complain about dynamic linking
+errors.
### Building the binaries from source
diff --git a/src/varcov.cpp b/src/varcov.cpp
index fdc6f10..66cf3c4 100644
--- a/src/varcov.cpp
+++ b/src/varcov.cpp
@@ -1,6 +1,6 @@
/*
Genome-wide Efficient Mixed Model Association (GEMMA)
- Copyright (C) 2011 Xiang Zhou
+ Copyright (C) 2011-2017 Xiang Zhou
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -51,12 +51,8 @@
#include "mathfunc.h"
#endif
-
using namespace std;
-
-
-
void VARCOV::CopyFromParam (PARAM &cPar)
{
d_pace=cPar.d_pace;
@@ -79,17 +75,14 @@ void VARCOV::CopyFromParam (PARAM &cPar)
return;
}
-
void VARCOV::CopyToParam (PARAM &cPar)
{
cPar.time_opt=time_opt;
return;
}
-
-
-//chr rs ps n_idv allele1 allele0 af var window_size r2 (r2_11,n_11,r2_12,n_12...r2_1m,n_1m)
-void VARCOV::WriteCov (const int flag, const vector<SNPINFO> &snpInfo_sub, const vector<vector<double> > &Cov_mat)
+void VARCOV::WriteCov (const int flag, const vector<SNPINFO> &snpInfo_sub,
+ const vector<vector<double> > &Cov_mat)
{
string file_cov;
file_cov=path_out+"/"+file_out;
@@ -134,11 +127,6 @@ void VARCOV::WriteCov (const int flag, const vector<SNPINFO> &snpInfo_sub, const
return;
}
-
-
-
-// sort SNPs first based on chromosomes then based on bp (or cm, if bp is not available)
-//if chr1=chr2 and bp1=bp2, then return with the same order
bool CompareSNPinfo (const SNPINFO &snpInfo1, const SNPINFO &snpInfo2)
{
int c_chr=snpInfo1.chr.compare(snpInfo2.chr);
@@ -160,12 +148,10 @@ bool CompareSNPinfo (const SNPINFO &snpInfo1, const SNPINFO &snpInfo2)
}
-// do not sort SNPs (because gzip files do not support random access)
-// then calculate n_nb, the number of neighbours, for each snp
+// Do not sort SNPs (because gzip files do not support random access)
+// then calculate n_nb, the number of neighbours, for each snp.
void VARCOV::CalcNB (vector<SNPINFO> &snpInfo_sort)
{
- // stable_sort(snpInfo_sort.begin(), snpInfo_sort.end(), CompareSNPinfo);
-
size_t t2=0, n_nb=0;
for (size_t t=0; t<indicator_snp.size(); ++t) {
if (indicator_snp[t]==0) {continue;}
@@ -191,9 +177,7 @@ void VARCOV::CalcNB (vector<SNPINFO> &snpInfo_sort)
return;
}
-
-
-//vector double is centered to have mean 0
+// Vector double is centered to have mean 0.
void Calc_Cor(vector<vector<double> > &X_mat, vector<double> &cov_vec)
{
cov_vec.clear();
@@ -215,94 +199,22 @@ void Calc_Cor(vector<vector<double> > &X_mat, vector<double> &cov_vec)
return;
}
-/*
-//somehow this can produce nan for some snps; perhaps due to missing values?
-//vector int is not centered, and have 0/1/2 values only
-//missing value is -9; to calculate covariance, these values are replaced by
-void Calc_Cor(const vector<vector<int> > &X_mat, vector<double> &cov_vec)
-{
- cov_vec.clear();
-
- int d1, d2, m1, m2, n1, n2, n12;
- double m1d, m2d, m12d, v1d, v2d, v;
-
- vector<int> x_vec=X_mat[0];
-
- //calculate m2
- m2=0; n2=0;
- for (size_t j=0; j<x_vec.size(); j++) {
- d2=x_vec[j];
- if (d2==-9) {continue;}
- m2+=d2; n2++;
- }
- m2d=(double)m2/(double)n2;
-
- for (size_t i=0; i<X_mat.size(); i++) {
- //calculate m1
- m1=0; n1=0;
- for (size_t j=0; j<x_vec.size(); j++) {
- d1=X_mat[i][j];
- if (d1==-9) {continue;}
- m1+=d1; n1++;
- }
- m1d=(double)m1/(double)n1;
-
- //calculate v1
- m1=0; m2=0; m12d=0; n12=0;
- for (size_t j=0; j<x_vec.size(); j++) {
- d1=X_mat[i][j];
- if (d1==-9) {
- n12++;
- } else if (d1!=0) {
- if (d1==1) {m12d+=1;} else if (d1==2) {m12d+=4;} else {m12d+=(double)(d1*d1);}
- }
- }
-
- v1d=((double)m12d+m1d*(double)m2+m2d*(double)m1+(double)n12*m1d*m2d)/(double)x_vec.size()-m1d*m2d;
-
- //calculate covariance
- if (i!=0) {
- m1=0; m2=0; m12d=0; n12=0;
- for (size_t j=0; j<x_vec.size(); j++) {
- d1=X_mat[i][j]; d2=x_vec[j];
- if (d1==-9 && d2==-9) {
- n12++;
- } else if (d1==-9) {
- m2+=d2;
- } else if (d2==-9) {
- m1+=d1;
- } else if (d1!=0 && d2!=0) {
- if (d1==1) {m12d+=d2;} else if (d1==2) {m12d+=d2+d2;} else {m12d+=(double)(d1*d2);}
- }
- }
-
- v=((double)m12d+m1d*(double)m2+m2d*(double)m1+(double)n12*m1d*m2d)/(double)x_vec.size()-m1d*m2d;
- v/=sqrt(v1d*v2d);
- } else {
- v2d=v1d;
- v=v1d/(double)x_vec.size();
- }
-
- cov_vec.push_back(v);
- }
- return;
-}
-*/
-
-
-//read the genotype file again, calculate r2 between pairs of SNPs within a window, output the file every 10K SNPs
-//the output results are sorted based on chr and bp
-//output format similar to assoc.txt files (remember n_miss is replaced by n_idv)
-
-//r2 between the current SNP and every following SNPs within the window_size (which can vary if cM was used)
-//read bimbam mean genotype file and calculate the covariance matrix for neighboring SNPs
-//output values at 10000-SNP-interval
+// Read the genotype file again, calculate r2 between pairs of SNPs
+// within a window, output the file every 10K SNPs the output results
+// are sorted based on chr and bp output format similar to assoc.txt
+// files (remember n_miss is replaced by n_idv).
+//
+// r2 between the current SNP and every following SNPs within the
+// window_size (which can vary if cM was used) read bimbam mean
+// genotype file and calculate the covariance matrix for neighboring
+// SNPs output values at 10000-SNP-interval.
+//
void VARCOV::AnalyzeBimbam ()
{
igzstream infile (file_geno.c_str(), igzstream::in);
if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return;}
- //calculate the number of right-hand-side neighbours for each SNP
+ // Calculate the number of right-hand-side neighbours for each SNP.
vector<SNPINFO> snpInfo_sub;
CalcNB(snpInfo);
@@ -327,9 +239,9 @@ void VARCOV::AnalyzeBimbam ()
int n_nb=0;
for (size_t t=0; t<indicator_snp.size(); ++t) {
- if (t%d_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);}
+ if (t%d_pace==0 || t==(indicator_snp.size()-1))
+ {ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);}
if (indicator_snp[t]==0) {continue;}
- // if (t>=2) {break;}
if (X_mat.size()==0) {
n_nb=snpInfo[t].n_nb+1;
@@ -340,7 +252,7 @@ void VARCOV::AnalyzeBimbam ()
for (int i=0; i<n_nb; i++) {
if (X_mat.size()==0) {t2=t;}
- //read a line of the snp is filtered out
+ // Read a line of the snp is filtered out.
inc=0;
while (t2<indicator_snp.size() && indicator_snp[t2]==0) {
t2++; inc++;
@@ -365,7 +277,7 @@ void VARCOV::AnalyzeBimbam ()
X_mat.erase(X_mat.begin());
- //write out var/cov values
+ // Write out var/cov values.
if (Cov_mat.size()==10000) {
WriteCov (1, snpInfo_sub, Cov_mat);
Cov_mat.clear();
@@ -387,16 +299,13 @@ void VARCOV::AnalyzeBimbam ()
return;
}
-
-
-
void VARCOV::AnalyzePlink ()
{
string file_bed=file_bfile+".bed";
ifstream infile (file_bed.c_str(), ios::binary);
if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;}
- //calculate the number of right-hand-side neighbours for each SNP
+ // Calculate the number of right-hand-side neighbours for each SNP.
vector<SNPINFO> snpInfo_sub;
CalcNB(snpInfo);
@@ -421,9 +330,9 @@ void VARCOV::AnalyzePlink ()
int n_nb=0;
for (size_t t=0; t<indicator_snp.size(); ++t) {
- if (t%d_pace==0 || t==(indicator_snp.size()-1)) {ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);}
+ if (t%d_pace==0 || t==(indicator_snp.size()-1))
+ {ProgressBar ("Reading SNPs ", t, indicator_snp.size()-1);}
if (indicator_snp[t]==0) {continue;}
- // if (t>=2) {break;}
if (X_mat.size()==0) {
n_nb=snpInfo[t].n_nb+1;
@@ -434,7 +343,7 @@ void VARCOV::AnalyzePlink ()
for (int i=0; i<n_nb; i++) {
if (X_mat.size()==0) {t2=t;}
- //read a line of the snp is filtered out
+ // Read a line of the snp is filtered out.
inc=0;
while (t2<indicator_snp.size() && indicator_snp[t2]==0) {
t2++; inc++;
@@ -459,7 +368,7 @@ void VARCOV::AnalyzePlink ()
X_mat.erase(X_mat.begin());
- //write out var/cov values
+ // Write out var/cov values.
if (Cov_mat.size()==10000) {
WriteCov (1, snpInfo_sub, Cov_mat);
Cov_mat.clear();
diff --git a/src/varcov.h b/src/varcov.h
index b380b8c..a7fcd9c 100644
--- a/src/varcov.h
+++ b/src/varcov.h
@@ -1,6 +1,6 @@
/*
Genome-wide Efficient Mixed Model Association (GEMMA)
- Copyright (C) 2011 Xiang Zhou
+ Copyright (C) 2011-2017 Xiang Zhou
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -22,7 +22,6 @@
#include "gsl/gsl_vector.h"
#include "gsl/gsl_matrix.h"
-
#ifdef FORCE_FLOAT
#include "param_float.h"
#include "io_float.h"
@@ -33,13 +32,10 @@
using namespace std;
-
-
-
class VARCOV {
public:
- // IO related parameters
+ // IO related parameters.
string file_out;
string path_out;
string file_geno;
@@ -53,16 +49,17 @@ public:
double time_opt;
- // Class specific parameters
+ // Class specific parameters.
double window_cm;
size_t window_bp;
size_t window_ns;
- // Main functions
+ // Main functions.
void CopyFromParam (PARAM &cPar);
void CopyToParam (PARAM &cPar);
void CalcNB (vector<SNPINFO> &snpInfo_sort);
- void WriteCov (const int flag, const vector<SNPINFO> &snpInfo_sub, const vector<vector<double> > &Cov_mat);
+ void WriteCov (const int flag, const vector<SNPINFO> &snpInfo_sub,
+ const vector<vector<double> > &Cov_mat);
void AnalyzeBimbam ();
void AnalyzePlink ();
};