diff options
Diffstat (limited to 'src/param.h')
-rw-r--r-- | src/param.h | 122 |
1 files changed, 91 insertions, 31 deletions
diff --git a/src/param.h b/src/param.h index fa18181..3c3b42e 100644 --- a/src/param.h +++ b/src/param.h @@ -16,7 +16,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#ifndef __PARAM_H__ +#ifndef __PARAM_H__ #define __PARAM_H__ #include <vector> @@ -39,14 +39,17 @@ public: string a_major; size_t n_miss; double missingness; - double maf; + double maf; + size_t n_idv;//number of non-missing individuals + size_t n_nb;//number of neighbours on the right hand side + size_t file_position;//snp location on file }; //results for lmm class SUMSTAT { public: double beta; //REML estimator for beta - double se; //SE for beta + double se; //SE for beta double lambda_remle; //REML estimator for lambda double lambda_mle; //MLE estimator for lambda double p_wald; //p value from a Wald test @@ -75,50 +78,87 @@ public: double rho; double pge; double logp; - + size_t n_gamma; }; +//header class +class HEADER +{ + +public: + size_t rs_col; + size_t chr_col; + size_t pos_col; + size_t cm_col; + size_t a1_col; + size_t a0_col; + size_t z_col; + size_t beta_col; + size_t sebeta_col; + size_t chisq_col; + size_t p_col; + size_t n_col; + size_t nmis_col; + size_t nobs_col; + size_t af_col; + size_t var_col; + size_t ws_col; + size_t cor_col; + size_t coln;//number of columns +}; + class PARAM { -public: +public: // IO related parameters bool mode_silence; int a_mode; //analysis mode, 1/2/3/4 for Frequentist tests - int k_mode; //kinship read mode: 1: n by n matrix, 2: id/id/k_value; + int k_mode; //kinship read mode: 1: n by n matrix, 2: id/id/k_value; vector<size_t> p_column; //which phenotype column needs analysis size_t d_pace; //display pace - + string file_bfile; string file_geno; string file_pheno; string file_anno; //optional + string file_gxe; //optional string file_cvt; //optional + string file_cat; + string file_var; + string file_beta; + string file_cor; string file_kin; string file_ku, file_kd; string file_mk; + string file_q, file_mq; + string file_s, file_ms; + string file_v, file_mv; + string file_weight; string file_out; string path_out; - + + string file_epm; //estimated parameter file string file_ebv; //estimated breeding value file string file_log; //log file containing mean estimate - + string file_read; //file containing total number of reads string file_gene; //gene expression file - + string file_snps; //file containing analyzed snps or genes - - - - // QC related parameters +// WJA Added + string file_oxford; + + + // QC related parameters double miss_level; - double maf_level; + double maf_level; double hwe_level; double r2_level; - + // LMM related parameters double l_min; double l_max; @@ -130,7 +170,7 @@ public: vector<double> Vg_remle_null, Ve_remle_null, Vg_mle_null, Ve_mle_null; vector<double> VVg_remle_null, VVe_remle_null, VVg_mle_null, VVe_mle_null; vector<double> beta_remle_null, se_beta_remle_null, beta_mle_null, se_beta_mle_null; - double p_nr; + double p_nr; double em_prec, nr_prec; size_t em_iter, nr_iter; size_t crt; @@ -138,15 +178,16 @@ public: //for fitting multiple variance components //the first three are of size n_vc, and the next two are of size n_vc+1 + bool noconstrain; vector<double> v_traceG; vector<double> v_pve; vector<double> v_se_pve; vector<double> v_sigma2; - vector<double> v_se_sigma2; + vector<double> v_se_sigma2; vector<double> v_beta; - vector<double> v_se_beta; - + vector<double> v_se_beta; + // BSLMM MCMC related parameters double h_min, h_max, h_scale; //priors for h double rho_min, rho_max, rho_scale; //priors for rho @@ -163,7 +204,12 @@ public: double trace_G; HYPBSLMM cHyp_initial; - + + //VARCOV related parameters + double window_cm; + size_t window_bp; + size_t window_ns; + // Summary statistics bool error; size_t ni_total, ni_test, ni_cvt; //number of individuals @@ -171,6 +217,8 @@ public: size_t ns_total, ns_test; //number of snps size_t ng_total, ng_test; //number of genes size_t ni_control, ni_case; //number of controls and number of cases + size_t ni_subsample; //number of subsampled individuals + size_t ni_total_ref, ns_total_ref, ns_pair;//max number of individuals, number of snps and number of snp pairs in the reference panel size_t n_cvt; //number of covariates size_t n_ph; //number of phenotypes size_t n_vc; //number of variance components (including the diagonal matrix) @@ -186,42 +234,54 @@ public: // Data vector<vector<double> > pheno; //a vector record all phenotypes, NA replaced with -9 - vector<vector<double> > cvt; //a vector record all covariates, NA replaced with -9 + vector<vector<double> > cvt; //a vector record all covariates, NA replaced with -9 + vector<double> gxe; //a vector record all covariates, NA replaced with -9 + vector<double> weight; //a vector record weights for the individuals, which is useful for animal breeding studies vector<vector<int> > indicator_pheno; //a matrix record when a phenotype is missing for an individual; 0 missing, 1 available vector<int> indicator_idv; //indicator for individuals (phenotypes), 0 missing, 1 available for analysis vector<int> indicator_snp; //sequence indicator for SNPs: 0 ignored because of (a) maf, (b) miss, (c) non-poly; 1 available for analysis vector<int> indicator_cvt; //indicator for covariates, 0 missing, 1 available for analysis - + vector<int> indicator_gxe; //indicator for gxe, 0 missing, 1 available for analysis + vector<int> indicator_weight; //indicator for weight, 0 missing, 1 available for analysis + vector<int> indicator_bv; //indicator for estimated breeding value file, 0 missing, 1 available for analysis vector<int> indicator_read; //indicator for read file, 0 missing, 1 available for analysis vector<double> vec_read; //total number of reads vector<double> vec_bv; //breeding values vector<size_t> est_column; - + map<string, int> mapID2num; //map small ID number to number, from 0 to n-1 map<string, string> mapRS2chr; //map rs# to chromosome location map<string, long int> mapRS2bp; //map rs# to base position map<string, double> mapRS2cM; //map rs# to cM map<string, double> mapRS2est; //map rs# to parameters - + map<string, size_t> mapRS2cat; //map rs# to category number + map<string, double> mapRS2var; //map rs# to category number + vector<SNPINFO> snpInfo; //record SNP information set<string> setSnps; //a set of snps for analysis - + //constructor PARAM(); - + //functions - void ReadFiles (); - void CheckParam (); - void CheckData (); + void ReadFiles (); + void CheckParam (); + void CheckData (); void PrintSummary (); - void ReadGenotypes (gsl_matrix *UtX, gsl_matrix *K, const bool calc_K); + void ReadGenotypes (gsl_matrix *UtX, gsl_matrix *K, const bool calc_K); + void ReadGenotypes (vector<vector<unsigned char> > &Xt, gsl_matrix *K, const bool calc_K); void CheckCvt (); void CopyCvt (gsl_matrix *W); + void CopyGxe (gsl_vector *gxe); + void CopyWeight (gsl_vector *w); void ProcessCvtPhen(); void CopyCvtPhen (gsl_matrix *W, gsl_vector *y, size_t flag); void CopyCvtPhen (gsl_matrix *W, gsl_matrix *Y, size_t flag); void CalcKin (gsl_matrix *matrix_kin); + void CalcS (gsl_matrix *S, gsl_matrix *Svar, gsl_matrix *Q); + void WriteVector (const gsl_vector *q, const gsl_vector *s, const size_t n_total, const string suffix); + void WriteVar (const string suffix); void WriteMatrix (const gsl_matrix *matrix_U, const string suffix); void WriteVector (const gsl_vector *vector_D, const string suffix); void CopyRead (gsl_vector *log_N); |