aboutsummaryrefslogtreecommitdiff
path: root/src/param.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/param.h')
-rw-r--r--src/param.h319
1 files changed, 182 insertions, 137 deletions
diff --git a/src/param.h b/src/param.h
index 72b7d85..18d5e36 100644
--- a/src/param.h
+++ b/src/param.h
@@ -1,6 +1,6 @@
/*
- Genome-wide Efficient Mixed Model Association (GEMMA)
- Copyright (C) 2011 Xiang Zhou
+ Genome-wide Efficient Mixed Model Association (GEMMA)
+ Copyright (C) 2011-2017 Xiang Zhou
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -13,7 +13,7 @@
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __PARAM_H__
@@ -27,8 +27,6 @@
using namespace std;
-
-
class SNPINFO {
public:
string chr;
@@ -40,37 +38,36 @@ public:
size_t n_miss;
double missingness;
double maf;
- size_t n_idv;//number of non-missing individuals
- size_t n_nb;//number of neighbours on the right hand side
- size_t file_position;//snp location on file
+ size_t n_idv; // Number of non-missing individuals.
+ size_t n_nb; // Number of neighbours on the right hand side.
+ size_t file_position; // SNP location in file.
};
-//results for lmm
+// Results for LMM.
class SUMSTAT {
public:
- double beta; //REML estimator for beta
- double se; //SE for beta
- double lambda_remle; //REML estimator for lambda
- double lambda_mle; //MLE estimator for lambda
- double p_wald; //p value from a Wald test
- double p_lrt; //p value from a likelihood ratio test
- double p_score; //p value from a score test
+ double beta; // REML estimator for beta.
+ double se; // SE for beta.
+ double lambda_remle; // REML estimator for lambda.
+ double lambda_mle; // MLE estimator for lambda.
+ double p_wald; // p value from a Wald test.
+ double p_lrt; // p value from a likelihood ratio test.
+ double p_score; // p value from a score test.
};
-//results for mvlmm
+// Results for mvLMM.
class MPHSUMSTAT {
public:
- vector<double> v_beta; //REML estimator for beta
- double p_wald; //p value from a Wald test
- double p_lrt; //p value from a likelihood ratio test
- double p_score; //p value from a score test
- vector<double> v_Vg; //estimator for Vg, right half
- vector<double> v_Ve; //estimator for Ve, right half
- vector<double> v_Vbeta; //estimator for Vbeta, right half
+ vector<double> v_beta; // REML estimator for beta.
+ double p_wald; // p value from a Wald test.
+ double p_lrt; // p value from a likelihood ratio test.
+ double p_score; // p value from a score test.
+ vector<double> v_Vg; // Estimator for Vg, right half.
+ vector<double> v_Ve; // Estimator for Ve, right half.
+ vector<double> v_Vbeta; // Estimator for Vbeta, right half.
};
-
-//hyper-parameters for bslmm
+// Hyper-parameters for BSLMM.
class HYPBSLMM {
public:
double h;
@@ -78,15 +75,11 @@ public:
double rho;
double pge;
double logp;
-
size_t n_gamma;
};
-
-//header class
-class HEADER
-{
-
+// Header class.
+class HEADER {
public:
size_t rs_col;
size_t chr_col;
@@ -108,28 +101,26 @@ public:
size_t var_col;
size_t ws_col;
size_t cor_col;
- size_t coln;//number of columns
+ size_t coln; // Number of columns.
set<size_t> catc_col;
set<size_t> catd_col;
};
-
-
class PARAM {
public:
- // IO related parameters
+ // IO-related parameters.
bool mode_silence;
- int a_mode; //analysis mode, 1/2/3/4 for Frequentist tests
- int k_mode; //kinship read mode: 1: n by n matrix, 2: id/id/k_value;
- vector<size_t> p_column; //which phenotype column needs analysis
- size_t d_pace; //display pace
+ int a_mode; // Analysis mode, 1/2/3/4 for Frequentist tests
+ int k_mode; // Kinship read mode: 1: n by n matrix, 2: id/id/k_value;
+ vector<size_t> p_column; // Which phenotype column needs analysis.
+ size_t d_pace; // Display pace
string file_bfile, file_mbfile;
string file_geno, file_mgeno;
string file_pheno;
- string file_anno; //optional
- string file_gxe; //optional
- string file_cvt; //optional
+ string file_anno; // Optional.
+ string file_gxe; // Optional.
+ string file_cvt; // Optional.
string file_cat, file_mcat;
string file_catc, file_mcatc;
string file_var;
@@ -144,26 +135,23 @@ public:
string file_bf, file_hyp;
string path_out;
-
- string file_epm; //estimated parameter file
- string file_ebv; //estimated breeding value file
- string file_log; //log file containing mean estimate
-
- string file_read; //file containing total number of reads
- string file_gene; //gene expression file
-
- string file_snps; //file containing analyzed snps or genes
-// WJA Added
+ string file_epm; // Estimated parameter file.
+ string file_ebv; // Estimated breeding value file.
+ string file_log; // Log file containing mean estimate.
+ string file_read; // File containing total number of reads.
+ string file_gene; // Gene expression file.
+ string file_snps; // File containing analyzed SNPs or genes.
+
+ // WJA added.
string file_oxford;
-
- // QC related parameters
+ // QC-related parameters.
double miss_level;
double maf_level;
double hwe_level;
double r2_level;
- // LMM related parameters
+ // LMM-related parameters.
double l_min;
double l_max;
size_t n_region;
@@ -172,16 +160,18 @@ public:
double pve_null, pve_se_null, pve_total, se_pve_total;
double vg_remle_null, ve_remle_null, vg_mle_null, ve_mle_null;
vector<double> Vg_remle_null, Ve_remle_null, Vg_mle_null, Ve_mle_null;
- vector<double> VVg_remle_null, VVe_remle_null, VVg_mle_null, VVe_mle_null;
- vector<double> beta_remle_null, se_beta_remle_null, beta_mle_null, se_beta_mle_null;
+ vector<double> VVg_remle_null, VVe_remle_null, VVg_mle_null;
+ vector<double> VVe_mle_null;
+ vector<double> beta_remle_null, se_beta_remle_null, beta_mle_null;
+ vector<double> se_beta_mle_null;
double p_nr;
double em_prec, nr_prec;
size_t em_iter, nr_iter;
size_t crt;
- double pheno_mean; //phenotype mean from bslmm fitting or for prediction
+ double pheno_mean; // Phenotype mean from BSLMM fitting or prediction.
- //for fitting multiple variance components
- //the first three are of size n_vc, and the next two are of size n_vc+1
+ // For fitting multiple variance components.
+ // The first 3 are of size (n_vc), and the next 2 are of size n_vc+1.
bool noconstrain;
vector<double> v_traceG;
vector<double> v_pve;
@@ -194,98 +184,141 @@ public:
vector<double> v_beta;
vector<double> v_se_beta;
- // BSLMM MCMC related parameters
- double h_min, h_max, h_scale; //priors for h
- double rho_min, rho_max, rho_scale; //priors for rho
- double logp_min, logp_max, logp_scale; //priors for log(pi)
+ // BSLMM/MCMC-related parameters.
+ double h_min, h_max, h_scale; // Priors for h.
+ double rho_min, rho_max, rho_scale; // Priors for rho.
+ double logp_min, logp_max, logp_scale; // Priors for log(pi).
size_t h_ngrid, rho_ngrid;
- size_t s_min, s_max; //minimum and maximum number of gammas
- size_t w_step; //number of warm up/burn in iterations
- size_t s_step; //number of sampling iterations
- size_t r_pace; //record pace
- size_t w_pace; //write pace
- size_t n_accept; //number of acceptance
- size_t n_mh; //number of MH steps within each iteration
- double geo_mean; //mean of the geometric distribution
+ size_t s_min, s_max; // Min & max. number of gammas.
+ size_t w_step; // # warm up/burn in iter.
+ size_t s_step; // # sampling iterations.
+ size_t r_pace; // Record pace.
+ size_t w_pace; // Write pace.
+ size_t n_accept; // Number of acceptance.
+ size_t n_mh; // # MH steps in each iter.
+ double geo_mean; // Mean of geometric dist.
long int randseed;
double trace_G;
HYPBSLMM cHyp_initial;
- //VARCOV related parameters
+ // VARCOV-related parameters.
double window_cm;
size_t window_bp;
size_t window_ns;
- //vc related parameters
+ // vc-related parameters.
size_t n_block;
- // Summary statistics
+ // Summary statistics.
bool error;
- size_t ni_total, ni_test, ni_cvt, ni_study, ni_ref; //number of individuals
- size_t np_obs, np_miss; //number of observed and missing phenotypes
- size_t ns_total, ns_test, ns_study, ns_ref; //number of snps
- size_t ng_total, ng_test; //number of genes
- size_t ni_control, ni_case; //number of controls and number of cases
- size_t ni_subsample; //number of subsampled individuals
- //size_t ni_total_ref, ns_total_ref, ns_pair;//max number of individuals, number of snps and number of snp pairs in the reference panel
- size_t n_cvt; //number of covariates
- size_t n_cat; //number of continuous categories
- size_t n_ph; //number of phenotypes
- size_t n_vc; //number of variance components (including the diagonal matrix)
- double time_total; //record total time
- double time_G; //time spent on reading files the second time and calculate K
- double time_eigen; //time spent on eigen-decomposition
- double time_UtX; //time spent on calculating UX and Uy
- double time_UtZ; //time spent on calculating UtZ, for probit BSLMM
- double time_opt; //time spent on optimization iterations/or mcmc
- double time_Omega; //time spent on calculating Omega
- double time_hyp; //time spent on sampling hyper-parameters, in PMM
- double time_Proposal; //time spend on constructing the proposal distribution (i.e. the initial lmm or lm analysis)
-
- // Data
- vector<vector<double> > pheno; //a vector record all phenotypes, NA replaced with -9
- vector<vector<double> > cvt; //a vector record all covariates, NA replaced with -9
- vector<double> gxe; //a vector record all covariates, NA replaced with -9
- vector<double> weight; //a vector record weights for the individuals, which is useful for animal breeding studies
- vector<vector<int> > indicator_pheno; //a matrix record when a phenotype is missing for an individual; 0 missing, 1 available
- vector<int> indicator_idv; //indicator for individuals (phenotypes), 0 missing, 1 available for analysis
- vector<int> indicator_snp; //sequence indicator for SNPs: 0 ignored because of (a) maf, (b) miss, (c) non-poly; 1 available for analysis
- vector< vector<int> > mindicator_snp; //sequence indicator for SNPs: 0 ignored because of (a) maf, (b) miss, (c) non-poly; 1 available for analysis
- vector<int> indicator_cvt; //indicator for covariates, 0 missing, 1 available for analysis
- vector<int> indicator_gxe; //indicator for gxe, 0 missing, 1 available for analysis
- vector<int> indicator_weight; //indicator for weight, 0 missing, 1 available for analysis
-
- vector<int> indicator_bv; //indicator for estimated breeding value file, 0 missing, 1 available for analysis
- vector<int> indicator_read; //indicator for read file, 0 missing, 1 available for analysis
- vector<double> vec_read; //total number of reads
- vector<double> vec_bv; //breeding values
+
+ // Number of individuals.
+ size_t ni_total, ni_test, ni_cvt, ni_study, ni_ref;
+
+ // Number of observed and missing phenotypes.
+ size_t np_obs, np_miss;
+
+ // Number of SNPs.
+ size_t ns_total, ns_test, ns_study, ns_ref;
+
+ size_t ng_total, ng_test; // Number of genes.
+ size_t ni_control, ni_case; // Number of controls and number of cases.
+ size_t ni_subsample; // Number of subsampled individuals.
+ size_t n_cvt; // Number of covariates.
+ size_t n_cat; // Number of continuous categories.
+ size_t n_ph; // Number of phenotypes.
+ size_t n_vc; // Number of variance components
+ // (including the diagonal matrix).
+ double time_total; // Record total time.
+ double time_G; // Time spent on reading files the
+ // second time and calculate K.
+ double time_eigen; // Time spent on eigen-decomposition.
+ double time_UtX; // Time spent on calculating UX and Uy.
+ double time_UtZ; // Time calculating UtZ for probit BSLMM.
+ double time_opt; // Time on optimization iterations/MCMC.
+ double time_Omega; // Time spent on calculating Omega.
+ double time_hyp; // Time sampling hyperparameters in PMM.
+ double time_Proposal; // Time spent on constructing the
+ // proposal distribution (i.e. the
+ // initial LMM or LM analysis).
+
+ // Data.
+ // Vector recording all phenotypes (NA replaced with -9).
+ vector<vector<double> > pheno;
+
+ // Vector recording all covariates (NA replaced with -9).
+ vector<vector<double> > cvt;
+
+ // Vector recording all covariates (NA replaced with -9).
+ vector<double> gxe;
+
+ // Vector recording weights for the individuals, which is
+ // useful for animal breeding studies.
+ vector<double> weight;
+
+ // Matrix recording when a phenotype is missing for an
+ // individual; 0 missing, 1 available.
+ vector<vector<int> > indicator_pheno;
+
+ // Indicator for individuals (phenotypes): 0 missing, 1
+ // available for analysis
+ vector<int> indicator_idv;
+
+ // Sequence indicator for SNPs: 0 ignored because of (a) maf,
+ // (b) miss, (c) non-poly; 1 available for analysis.
+ vector<int> indicator_snp;
+
+ // Sequence indicator for SNPs: 0 ignored because of (a) maf,
+ // (b) miss, (c) non-poly; 1 available for analysis.
+ vector< vector<int> > mindicator_snp;
+
+ // Indicator for covariates: 0 missing, 1 available for
+ // analysis.
+ vector<int> indicator_cvt;
+
+ // Indicator for gxe: 0 missing, 1 available for analysis.
+ vector<int> indicator_gxe;
+
+ // Indicator for weight: 0 missing, 1 available for analysis.
+ vector<int> indicator_weight;
+
+ // Indicator for estimated breeding value file: 0 missing, 1
+ // available for analysis.
+ vector<int> indicator_bv;
+
+ // Indicator for read file: 0 missing, 1 available for analysis.
+ vector<int> indicator_read;
+ vector<double> vec_read; // Total number of reads.
+ vector<double> vec_bv; // Breeding values.
vector<size_t> est_column;
- map<string, int> mapID2num; //map small ID number to number, from 0 to n-1
- map<string, string> mapRS2chr; //map rs# to chromosome location
- map<string, long int> mapRS2bp; //map rs# to base position
- map<string, double> mapRS2cM; //map rs# to cM
- map<string, double> mapRS2est; //map rs# to parameters
- map<string, size_t> mapRS2cat; //map rs# to category number
- map<string, vector<double> > mapRS2catc; //map rs# to continuous categories
- map<string, double> mapRS2wsnp; //map rs# to snp weights
- map<string, vector<double> > mapRS2wcat; //map rs# to snp cat weights
-
- vector<SNPINFO> snpInfo; //record SNP information
- vector< vector<SNPINFO> > msnpInfo; //record SNP information
- set<string> setSnps; //a set of snps for analysis
-
- //constructor
+ map<string, int> mapID2num; // Map small ID to number, 0 to n-1.
+ map<string, string> mapRS2chr; // Map rs# to chromosome location.
+ map<string, long int> mapRS2bp; // Map rs# to base position.
+ map<string, double> mapRS2cM; // Map rs# to cM.
+ map<string, double> mapRS2est; // Map rs# to parameters.
+ map<string, size_t> mapRS2cat; // Map rs# to category number.
+ map<string, vector<double> > mapRS2catc; // Map rs# to cont. cat's.
+ map<string, double> mapRS2wsnp; // Map rs# to SNP weights.
+ map<string, vector<double> > mapRS2wcat; // Map rs# to SNP cat weights.
+
+ vector<SNPINFO> snpInfo; // Record SNP information.
+ vector< vector<SNPINFO> > msnpInfo; // Record SNP information.
+ set<string> setSnps; // Set of snps for analysis.
+
+ // Constructor.
PARAM();
- //functions
+ // Functions.
void ReadFiles ();
void CheckParam ();
void CheckData ();
void PrintSummary ();
- void ReadGenotypes (gsl_matrix *UtX, gsl_matrix *K, const bool calc_K);
- void ReadGenotypes (vector<vector<unsigned char> > &Xt, gsl_matrix *K, const bool calc_K);
+ void ReadGenotypes (gsl_matrix *UtX, gsl_matrix *K,
+ const bool calc_K);
+ void ReadGenotypes (vector<vector<unsigned char> > &Xt,
+ gsl_matrix *K, const bool calc_K);
void CheckCvt ();
void CopyCvt (gsl_matrix *W);
void CopyA (size_t flag, gsl_matrix *A);
@@ -295,15 +328,27 @@ public:
void CopyCvtPhen (gsl_matrix *W, gsl_vector *y, size_t flag);
void CopyCvtPhen (gsl_matrix *W, gsl_matrix *Y, size_t flag);
void CalcKin (gsl_matrix *matrix_kin);
- void CalcS (const map<string, double> &mapRS2wA, const map<string, double> &mapRS2wK, const gsl_matrix *W, gsl_matrix *A, gsl_matrix *K, gsl_matrix *S, gsl_matrix *Svar, gsl_vector *ns);
- void WriteVector (const gsl_vector *q, const gsl_vector *s, const size_t n_total, const string suffix);
+ void CalcS (const map<string, double> &mapRS2wA,
+ const map<string, double> &mapRS2wK,
+ const gsl_matrix *W, gsl_matrix *A, gsl_matrix *K,
+ gsl_matrix *S, gsl_matrix *Svar, gsl_vector *ns);
+ void WriteVector (const gsl_vector *q, const gsl_vector *s,
+ const size_t n_total, const string suffix);
void WriteVar (const string suffix);
void WriteMatrix (const gsl_matrix *matrix_U, const string suffix);
void WriteVector (const gsl_vector *vector_D, const string suffix);
void CopyRead (gsl_vector *log_N);
- void ObtainWeight (const set<string> &setSnps_beta, map<string, double> &mapRS2wK);
- void UpdateWeight (const size_t pve_flag, const map<string, double> &mapRS2wK, const size_t ni_test, const gsl_vector *ns, map<string, double> &mapRS2wA);
- void UpdateSNPnZ (const map<string, double> &mapRS2wA, const map<string, string> &mapRS2A1, const map<string, double> &mapRS2z, gsl_vector *w, gsl_vector *z, vector<size_t> &vec_cat);
+ void ObtainWeight (const set<string> &setSnps_beta, map<string,
+ double> &mapRS2wK);
+ void UpdateWeight (const size_t pve_flag,
+ const map<string,double> &mapRS2wK,
+ const size_t ni_test, const gsl_vector *ns,
+ map<string, double> &mapRS2wA);
+ void UpdateSNPnZ (const map<string, double> &mapRS2wA,
+ const map<string, string> &mapRS2A1,
+ const map<string, double> &mapRS2z,
+ gsl_vector *w, gsl_vector *z,
+ vector<size_t> &vec_cat);
void UpdateSNP (const map<string, double> &mapRS2wA);
};