aboutsummaryrefslogtreecommitdiff
path: root/src/param.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/param.h')
-rw-r--r--src/param.h122
1 files changed, 91 insertions, 31 deletions
diff --git a/src/param.h b/src/param.h
index fa18181..3c3b42e 100644
--- a/src/param.h
+++ b/src/param.h
@@ -16,7 +16,7 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#ifndef __PARAM_H__
+#ifndef __PARAM_H__
#define __PARAM_H__
#include <vector>
@@ -39,14 +39,17 @@ public:
string a_major;
size_t n_miss;
double missingness;
- double maf;
+ double maf;
+ size_t n_idv;//number of non-missing individuals
+ size_t n_nb;//number of neighbours on the right hand side
+ size_t file_position;//snp location on file
};
//results for lmm
class SUMSTAT {
public:
double beta; //REML estimator for beta
- double se; //SE for beta
+ double se; //SE for beta
double lambda_remle; //REML estimator for lambda
double lambda_mle; //MLE estimator for lambda
double p_wald; //p value from a Wald test
@@ -75,50 +78,87 @@ public:
double rho;
double pge;
double logp;
-
+
size_t n_gamma;
};
+//header class
+class HEADER
+{
+
+public:
+ size_t rs_col;
+ size_t chr_col;
+ size_t pos_col;
+ size_t cm_col;
+ size_t a1_col;
+ size_t a0_col;
+ size_t z_col;
+ size_t beta_col;
+ size_t sebeta_col;
+ size_t chisq_col;
+ size_t p_col;
+ size_t n_col;
+ size_t nmis_col;
+ size_t nobs_col;
+ size_t af_col;
+ size_t var_col;
+ size_t ws_col;
+ size_t cor_col;
+ size_t coln;//number of columns
+};
+
class PARAM {
-public:
+public:
// IO related parameters
bool mode_silence;
int a_mode; //analysis mode, 1/2/3/4 for Frequentist tests
- int k_mode; //kinship read mode: 1: n by n matrix, 2: id/id/k_value;
+ int k_mode; //kinship read mode: 1: n by n matrix, 2: id/id/k_value;
vector<size_t> p_column; //which phenotype column needs analysis
size_t d_pace; //display pace
-
+
string file_bfile;
string file_geno;
string file_pheno;
string file_anno; //optional
+ string file_gxe; //optional
string file_cvt; //optional
+ string file_cat;
+ string file_var;
+ string file_beta;
+ string file_cor;
string file_kin;
string file_ku, file_kd;
string file_mk;
+ string file_q, file_mq;
+ string file_s, file_ms;
+ string file_v, file_mv;
+ string file_weight;
string file_out;
string path_out;
-
+
+
string file_epm; //estimated parameter file
string file_ebv; //estimated breeding value file
string file_log; //log file containing mean estimate
-
+
string file_read; //file containing total number of reads
string file_gene; //gene expression file
-
+
string file_snps; //file containing analyzed snps or genes
-
-
-
- // QC related parameters
+// WJA Added
+ string file_oxford;
+
+
+ // QC related parameters
double miss_level;
- double maf_level;
+ double maf_level;
double hwe_level;
double r2_level;
-
+
// LMM related parameters
double l_min;
double l_max;
@@ -130,7 +170,7 @@ public:
vector<double> Vg_remle_null, Ve_remle_null, Vg_mle_null, Ve_mle_null;
vector<double> VVg_remle_null, VVe_remle_null, VVg_mle_null, VVe_mle_null;
vector<double> beta_remle_null, se_beta_remle_null, beta_mle_null, se_beta_mle_null;
- double p_nr;
+ double p_nr;
double em_prec, nr_prec;
size_t em_iter, nr_iter;
size_t crt;
@@ -138,15 +178,16 @@ public:
//for fitting multiple variance components
//the first three are of size n_vc, and the next two are of size n_vc+1
+ bool noconstrain;
vector<double> v_traceG;
vector<double> v_pve;
vector<double> v_se_pve;
vector<double> v_sigma2;
- vector<double> v_se_sigma2;
+ vector<double> v_se_sigma2;
vector<double> v_beta;
- vector<double> v_se_beta;
-
+ vector<double> v_se_beta;
+
// BSLMM MCMC related parameters
double h_min, h_max, h_scale; //priors for h
double rho_min, rho_max, rho_scale; //priors for rho
@@ -163,7 +204,12 @@ public:
double trace_G;
HYPBSLMM cHyp_initial;
-
+
+ //VARCOV related parameters
+ double window_cm;
+ size_t window_bp;
+ size_t window_ns;
+
// Summary statistics
bool error;
size_t ni_total, ni_test, ni_cvt; //number of individuals
@@ -171,6 +217,8 @@ public:
size_t ns_total, ns_test; //number of snps
size_t ng_total, ng_test; //number of genes
size_t ni_control, ni_case; //number of controls and number of cases
+ size_t ni_subsample; //number of subsampled individuals
+ size_t ni_total_ref, ns_total_ref, ns_pair;//max number of individuals, number of snps and number of snp pairs in the reference panel
size_t n_cvt; //number of covariates
size_t n_ph; //number of phenotypes
size_t n_vc; //number of variance components (including the diagonal matrix)
@@ -186,42 +234,54 @@ public:
// Data
vector<vector<double> > pheno; //a vector record all phenotypes, NA replaced with -9
- vector<vector<double> > cvt; //a vector record all covariates, NA replaced with -9
+ vector<vector<double> > cvt; //a vector record all covariates, NA replaced with -9
+ vector<double> gxe; //a vector record all covariates, NA replaced with -9
+ vector<double> weight; //a vector record weights for the individuals, which is useful for animal breeding studies
vector<vector<int> > indicator_pheno; //a matrix record when a phenotype is missing for an individual; 0 missing, 1 available
vector<int> indicator_idv; //indicator for individuals (phenotypes), 0 missing, 1 available for analysis
vector<int> indicator_snp; //sequence indicator for SNPs: 0 ignored because of (a) maf, (b) miss, (c) non-poly; 1 available for analysis
vector<int> indicator_cvt; //indicator for covariates, 0 missing, 1 available for analysis
-
+ vector<int> indicator_gxe; //indicator for gxe, 0 missing, 1 available for analysis
+ vector<int> indicator_weight; //indicator for weight, 0 missing, 1 available for analysis
+
vector<int> indicator_bv; //indicator for estimated breeding value file, 0 missing, 1 available for analysis
vector<int> indicator_read; //indicator for read file, 0 missing, 1 available for analysis
vector<double> vec_read; //total number of reads
vector<double> vec_bv; //breeding values
vector<size_t> est_column;
-
+
map<string, int> mapID2num; //map small ID number to number, from 0 to n-1
map<string, string> mapRS2chr; //map rs# to chromosome location
map<string, long int> mapRS2bp; //map rs# to base position
map<string, double> mapRS2cM; //map rs# to cM
map<string, double> mapRS2est; //map rs# to parameters
-
+ map<string, size_t> mapRS2cat; //map rs# to category number
+ map<string, double> mapRS2var; //map rs# to category number
+
vector<SNPINFO> snpInfo; //record SNP information
set<string> setSnps; //a set of snps for analysis
-
+
//constructor
PARAM();
-
+
//functions
- void ReadFiles ();
- void CheckParam ();
- void CheckData ();
+ void ReadFiles ();
+ void CheckParam ();
+ void CheckData ();
void PrintSummary ();
- void ReadGenotypes (gsl_matrix *UtX, gsl_matrix *K, const bool calc_K);
+ void ReadGenotypes (gsl_matrix *UtX, gsl_matrix *K, const bool calc_K);
+ void ReadGenotypes (vector<vector<unsigned char> > &Xt, gsl_matrix *K, const bool calc_K);
void CheckCvt ();
void CopyCvt (gsl_matrix *W);
+ void CopyGxe (gsl_vector *gxe);
+ void CopyWeight (gsl_vector *w);
void ProcessCvtPhen();
void CopyCvtPhen (gsl_matrix *W, gsl_vector *y, size_t flag);
void CopyCvtPhen (gsl_matrix *W, gsl_matrix *Y, size_t flag);
void CalcKin (gsl_matrix *matrix_kin);
+ void CalcS (gsl_matrix *S, gsl_matrix *Svar, gsl_matrix *Q);
+ void WriteVector (const gsl_vector *q, const gsl_vector *s, const size_t n_total, const string suffix);
+ void WriteVar (const string suffix);
void WriteMatrix (const gsl_matrix *matrix_U, const string suffix);
void WriteVector (const gsl_vector *vector_D, const string suffix);
void CopyRead (gsl_vector *log_N);