diff options
author | Pjotr Prins | 2015-02-28 12:31:46 +0300 |
---|---|---|
committer | Pjotr Prins | 2015-02-28 12:31:46 +0300 |
commit | d73ef7975657ed937d4a603ca32d6ca547292ec5 (patch) | |
tree | 96ab40bb75af7e3fcde74d83638b439b9db213a6 /wqflask | |
parent | 7fee695b79984912dd610a55e84db602780becac (diff) | |
download | genenetwork2-d73ef7975657ed937d4a603ca32d6ca547292ec5.tar.gz |
Started adaptations for introducing multi-core version
Diffstat (limited to 'wqflask')
-rwxr-xr-x | wqflask/wqflask/my_pylmm/pyLMM/input.py | 2 | ||||
-rwxr-xr-x | wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 46 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py | 55 |
3 files changed, 77 insertions, 26 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/input.py b/wqflask/wqflask/my_pylmm/pyLMM/input.py index dfe28a4e..fdf02e42 100755 --- a/wqflask/wqflask/my_pylmm/pyLMM/input.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/input.py @@ -260,4 +260,4 @@ class plink: L.append(D[self.indivs[i]]) P = P[L,:] - return P
\ No newline at end of file + return P diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index d7d60e6e..54a40e1f 100755 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -47,12 +47,13 @@ print("sys.path2:", sys.path) has_gn2=True +from utility.benchmark import Bench +from utility import temp_data + try: - from utility.benchmark import Bench - from utility import temp_data from wqflask.my_pylmm.pyLMM import chunks except ImportError: - print("WARNING: Standalone version\n") + print("WARNING: Standalone version missing the Genenetwork2 environment\n") has_gn2=False pass @@ -253,20 +254,21 @@ def run_other(pheno_vector, genotype_matrix, restricted_max_likelihood=True, refit=False, - tempdata=None): + tempdata=None, # <---- can not be None + is_testing=False): + """Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics restricted_max_likelihood -- whether to use restricted max likelihood; True or False refit -- whether to refit the variance component for each marker temp_data -- TempData object that stores the progress for each major step of the - calculations ("calculate_kinship" and "GWAS" take the majority of time) + calculations ("calculate_kinship" and "GWAS" take the majority of time) """ - print("In run_other") with Bench("Calculate Kinship"): - kinship_matrix = calculate_kinship(genotype_matrix, tempdata) + kinship_matrix = calculate_kinship(genotype_matrix, tempdata, is_testing) print("kinship_matrix: ", pf(kinship_matrix)) print("kinship_matrix.shape: ", pf(kinship_matrix.shape)) @@ -321,7 +323,7 @@ def matrixMult(A,B): return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB) -def calculate_kinship(genotype_matrix, temp_data): +def calculate_kinship(genotype_matrix, temp_data, is_testing=False): """ genotype_matrix is an n x m matrix encoding SNP minor alleles. @@ -331,10 +333,12 @@ def calculate_kinship(genotype_matrix, temp_data): """ n = genotype_matrix.shape[0] m = genotype_matrix.shape[1] - print("n is:", n) - print("m is:", m) + print("genotype matrix n is:", n) + print("genotype matrix m is:", m) keep = [] for counter in range(m): + if is_testing and counter>8: + break #print("type of genotype_matrix[:,counter]:", pf(genotype_matrix[:,counter])) #Checks if any values in column are not numbers not_number = np.isnan(genotype_matrix[:,counter]) @@ -478,7 +482,7 @@ class LMM: is not done consistently. """ - def __init__(self,Y,K,Kva=[],Kve=[],X0=None,verbose=True): + def __init__(self,Y,K,Kva=[],Kve=[],X0=None,verbose=True,is_testing=False): """ The constructor takes a phenotype vector or array of size n. @@ -487,7 +491,8 @@ class LMM: X0 is an optional covariate matrix of size n x q, where there are q covariates. When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect. """ - + + self.is_testing = True if X0 == None: X0 = np.ones(len(Y)).reshape(len(Y),1) self.verbose = verbose @@ -507,14 +512,14 @@ class LMM: self.nonmissing = x print("this K is:", pf(K)) - + if len(Kva) == 0 or len(Kve) == 0: if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) begin = time.time() Kva,Kve = linalg.eigh(K) end = time.time() if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) - + self.K = K self.Kva = Kva self.Kve = Kve @@ -745,7 +750,7 @@ def gn2_main(): genotype_matrix = np.array(params['genotype_matrix']), restricted_max_likelihood = params['restricted_max_likelihood'], refit = params['refit'], - tempdata = tempdata) + tempdata = tempdata, is_testing=False) results_key = "pylmm:results:" + params['temp_uuid'] @@ -755,21 +760,12 @@ def gn2_main(): #Pushing json_results into a list where it is the only item because blpop needs a list Redis.rpush(results_key, json_results) Redis.expire(results_key, 60*60) - -# This is the main version used without Genenetwork2's environment -def cli_main(): - ps, ts = run_human(pheno_vector = np.array(params['pheno_vector']), - covariate_matrix = np.array(params['covariate_matrix']), - plink_input_file = params['input_file_name'], - kinship_matrix = np.array(params['kinship_matrix']), - refit = params['refit'], - tempdata = tempdata) if __name__ == '__main__': if has_gn2: gn2_main() else: - cli_main() + print("Run from pylmmGWAS.py instead") diff --git a/wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py b/wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py new file mode 100644 index 00000000..abb72081 --- /dev/null +++ b/wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py @@ -0,0 +1,55 @@ +import sys +import time +import numpy as np +from numpy.distutils.system_info import get_info; +from scipy import linalg +from scipy import optimize +from scipy import stats + +useNumpy = None +hasBLAS = None + +def matrix_initialize(useBLAS): + global useNumpy # module based variable + if useBLAS and useNumpy == None: + print get_info('blas_opt') + try: + linalg.fblas + sys.stderr.write("INFO: using linalg.fblas\n") + useNumpy = False + hasBLAS = True + except AttributeError: + sys.stderr.write("WARNING: linalg.fblas not found, using numpy.dot instead!\n") + useNumpy = True + else: + sys.stderr.write("INFO: using numpy.dot\n") + useNumpy=True + +def matrixMult(A,B): + global useNumpy # module based variable + + if useNumpy: + return np.dot(A,B) + + # If the matrices are in Fortran order then the computations will be faster + # when using dgemm. Otherwise, the function will copy the matrix and that takes time. + if not A.flags['F_CONTIGUOUS']: + AA = A.T + transA = True + else: + AA = A + transA = False + + if not B.flags['F_CONTIGUOUS']: + BB = B.T + transB = True + else: + BB = B + transB = False + + return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB) + +def matrixMultT(M): + # res = np.dot(W,W.T) + # return linalg.fblas.dgemm(alpha=1.,a=M.T,b=M.T,trans_a=True,trans_b=False) + return matrixMult(M,M.T) |