about summary refs log tree commit diff
path: root/src/param.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/param.h')
-rw-r--r--src/param.h122
1 files changed, 91 insertions, 31 deletions
diff --git a/src/param.h b/src/param.h
index fa18181..3c3b42e 100644
--- a/src/param.h
+++ b/src/param.h
@@ -16,7 +16,7 @@
     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
-#ifndef __PARAM_H__                
+#ifndef __PARAM_H__
 #define __PARAM_H__
 
 #include <vector>
@@ -39,14 +39,17 @@ public:
 	string a_major;
 	size_t n_miss;
 	double missingness;
-	double maf;	
+	double maf;
+	size_t n_idv;//number of non-missing individuals
+	size_t n_nb;//number of neighbours on the right hand side
+	size_t file_position;//snp location on file
 };
 
 //results for lmm
 class SUMSTAT {
 public:
 	double beta;				//REML estimator for beta
-	double se;				//SE for beta  
+	double se;				//SE for beta
 	double lambda_remle;		//REML estimator for lambda
 	double lambda_mle;		//MLE estimator for lambda
 	double p_wald;			//p value from a Wald test
@@ -75,50 +78,87 @@ public:
 	double rho;
 	double pge;
 	double logp;
-	
+
 	size_t n_gamma;
 };
 
 
+//header class
+class HEADER
+{
+
+public:
+    size_t rs_col;
+    size_t chr_col;
+    size_t pos_col;
+    size_t cm_col;
+    size_t a1_col;
+    size_t a0_col;
+    size_t z_col;
+    size_t beta_col;
+    size_t sebeta_col;
+    size_t chisq_col;
+    size_t p_col;
+    size_t n_col;
+    size_t nmis_col;
+    size_t nobs_col;
+    size_t af_col;
+    size_t var_col;
+    size_t ws_col;
+    size_t cor_col;
+    size_t coln;//number of columns
+};
+
 
 
 class PARAM {
-public:	
+public:
 	// IO related parameters
 	bool mode_silence;
 	int a_mode;				//analysis mode, 1/2/3/4 for Frequentist tests
-	int k_mode;				//kinship read mode: 1: n by n matrix, 2: id/id/k_value; 		
+	int k_mode;				//kinship read mode: 1: n by n matrix, 2: id/id/k_value;
 	vector<size_t> p_column;			//which phenotype column needs analysis
 	size_t d_pace;		//display pace
-	
+
 	string file_bfile;
 	string file_geno;
 	string file_pheno;
 	string file_anno;		//optional
+	string file_gxe;		//optional
 	string file_cvt;		//optional
+	string file_cat;
+	string file_var;
+	string file_beta;
+	string file_cor;
 	string file_kin;
 	string file_ku, file_kd;
 	string file_mk;
+	string file_q, file_mq;
+	string file_s, file_ms;
+	string file_v, file_mv;
+	string file_weight;
 	string file_out;
 	string path_out;
-	
+
+
 	string file_epm;		//estimated parameter file
 	string file_ebv;		//estimated breeding value file
 	string file_log;		//log file containing mean estimate
-	
+
 	string file_read;		//file containing total number of reads
 	string file_gene;		//gene expression file
-	
+
 	string file_snps;		//file containing analyzed snps or genes
-	
-	
-	
-	// QC related parameters	
+// WJA Added
+	string file_oxford;
+
+
+	// QC related parameters
 	double miss_level;
-	double maf_level;	
+	double maf_level;
 	double hwe_level;
 	double r2_level;
-	
+
 	// LMM related parameters
 	double l_min;
 	double l_max;
@@ -130,7 +170,7 @@ public:
 	vector<double> Vg_remle_null, Ve_remle_null, Vg_mle_null, Ve_mle_null;
 	vector<double> VVg_remle_null, VVe_remle_null, VVg_mle_null, VVe_mle_null;
 	vector<double> beta_remle_null, se_beta_remle_null, beta_mle_null, se_beta_mle_null;
-	double p_nr;	
+	double p_nr;
 	double em_prec, nr_prec;
 	size_t em_iter, nr_iter;
 	size_t crt;
@@ -138,15 +178,16 @@ public:
 
 	//for fitting multiple variance components
 	//the first three are of size n_vc, and the next two are of size n_vc+1
+	bool noconstrain;
 	vector<double> v_traceG;
 	vector<double> v_pve;
 	vector<double> v_se_pve;
 
 	vector<double> v_sigma2;
-	vector<double> v_se_sigma2;	
+	vector<double> v_se_sigma2;
 	vector<double> v_beta;
-	vector<double> v_se_beta;	
-	
+	vector<double> v_se_beta;
+
 	// BSLMM MCMC related parameters
 	double h_min, h_max, h_scale;			//priors for h
 	double rho_min, rho_max, rho_scale;		//priors for rho
@@ -163,7 +204,12 @@ public:
 	double trace_G;
 
 	HYPBSLMM cHyp_initial;
-		
+
+	//VARCOV related parameters
+	double window_cm;
+	size_t window_bp;
+	size_t window_ns;
+
 	// Summary statistics
 	bool error;
 	size_t ni_total, ni_test, ni_cvt;	//number of individuals
@@ -171,6 +217,8 @@ public:
 	size_t ns_total, ns_test;	//number of snps
 	size_t ng_total, ng_test;	//number of genes
 	size_t ni_control, ni_case;	//number of controls and number of cases
+	size_t ni_subsample;            //number of subsampled individuals
+	size_t ni_total_ref, ns_total_ref, ns_pair;//max number of individuals, number of snps and number of snp pairs in the reference panel
 	size_t n_cvt;			//number of covariates
 	size_t n_ph;			//number of phenotypes
 	size_t n_vc;			//number of variance components (including the diagonal matrix)
@@ -186,42 +234,54 @@ public:
 
 	// Data
 	vector<vector<double> > pheno;			//a vector record all phenotypes, NA replaced with -9
-	vector<vector<double> > cvt;			//a vector record all covariates, NA replaced with -9	
+	vector<vector<double> > cvt;			//a vector record all covariates, NA replaced with -9
+	vector<double> gxe;			//a vector record all covariates, NA replaced with -9
+	vector<double> weight;                          //a vector record weights for the individuals, which is useful for animal breeding studies
 	vector<vector<int> > indicator_pheno;			//a matrix record when a phenotype is missing for an individual; 0 missing, 1 available
 	vector<int> indicator_idv;				//indicator for individuals (phenotypes), 0 missing, 1 available for analysis
 	vector<int> indicator_snp;				//sequence indicator for SNPs: 0 ignored because of (a) maf, (b) miss, (c) non-poly; 1 available for analysis
 	vector<int> indicator_cvt;				//indicator for covariates, 0 missing, 1 available for analysis
-	
+	vector<int> indicator_gxe;				//indicator for gxe, 0 missing, 1 available for analysis
+	vector<int> indicator_weight;                           //indicator for weight, 0 missing, 1 available for analysis
+
 	vector<int> indicator_bv;				//indicator for estimated breeding value file, 0 missing, 1 available for analysis
 	vector<int> indicator_read;				//indicator for read file, 0 missing, 1 available for analysis
 	vector<double> vec_read;				//total number of reads
 	vector<double> vec_bv;					//breeding values
 	vector<size_t> est_column;
-	
+
 	map<string, int> mapID2num;		//map small ID number to number, from 0 to n-1
 	map<string, string> mapRS2chr;		//map rs# to chromosome location
 	map<string, long int> mapRS2bp;		//map rs# to base position
 	map<string, double> mapRS2cM;		//map rs# to cM
 	map<string, double> mapRS2est;			//map rs# to parameters
-	
+	map<string, size_t> mapRS2cat;          //map rs# to category number
+	map<string, double> mapRS2var;          //map rs# to category number
+
 	vector<SNPINFO> snpInfo;		//record SNP information
 	set<string> setSnps;			//a set of snps for analysis
-	
+
 	//constructor
 	PARAM();
-	
+
 	//functions
-	void ReadFiles ();		
-	void CheckParam (); 
-	void CheckData ();	
+	void ReadFiles ();
+	void CheckParam ();
+	void CheckData ();
 	void PrintSummary ();
-	void ReadGenotypes (gsl_matrix *UtX, gsl_matrix *K, const bool calc_K);	
+	void ReadGenotypes (gsl_matrix *UtX, gsl_matrix *K, const bool calc_K);
+	void ReadGenotypes (vector<vector<unsigned char> > &Xt, gsl_matrix *K, const bool calc_K);
 	void CheckCvt ();
 	void CopyCvt (gsl_matrix *W);
+	void CopyGxe (gsl_vector *gxe);
+	void CopyWeight (gsl_vector *w);
 	void ProcessCvtPhen();
 	void CopyCvtPhen (gsl_matrix *W, gsl_vector *y, size_t flag);
 	void CopyCvtPhen (gsl_matrix *W, gsl_matrix *Y, size_t flag);
 	void CalcKin (gsl_matrix *matrix_kin);
+	void CalcS (gsl_matrix *S, gsl_matrix *Svar, gsl_matrix *Q);
+	void WriteVector (const gsl_vector *q, const gsl_vector *s, const size_t n_total, const string suffix);
+	void WriteVar (const string suffix);
 	void WriteMatrix (const gsl_matrix *matrix_U, const string suffix);
 	void WriteVector (const gsl_vector *vector_D, const string suffix);
 	void CopyRead (gsl_vector *log_N);