From c48d1b4415a455a1f96ddbe8f2f89cf88b2dc283 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 15 Mar 2015 14:10:05 +0300 Subject: Added multi-core kinship support --- wqflask/wqflask/my_pylmm/pyLMM/kinship.py | 35 +++++++++++++++---------------- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 18 +++++++++++++--- 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py index 9ab48510..28f2042d 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py @@ -73,12 +73,11 @@ def f_init(q): # Calculate the kinship matrix from G (SNPs as rows!), returns K # -def kinship(G,options): +def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True): numThreads = None - if options.numThreads: - numThreads = int(options.numThreads) - options.computeSize = 1000 - matrix_initialize(options.useBLAS) + if numThreads: + numThreads = int(numThreads) + matrix_initialize(useBLAS) sys.stderr.write(str(G.shape)+"\n") n = G.shape[1] # inds @@ -92,9 +91,9 @@ def kinship(G,options): p = mp.Pool(numThreads, f_init, [q]) cpu_num = mp.cpu_count() print "CPU cores:",cpu_num - iterations = snps/options.computeSize+1 - if options.testing: - iterations = 8 + iterations = snps/computeSize+1 + # if testing: + # iterations = 8 # jobs = range(0,8) # range(0,iterations) results = [] @@ -103,14 +102,14 @@ def kinship(G,options): completed = 0 for job in range(iterations): - if options.verbose: - sys.stderr.write("Processing job %d first %d SNPs\n" % (job, ((job+1)*options.computeSize))) - W = compute_W(job,G,n,snps,options.computeSize) + if verbose: + sys.stderr.write("Processing job %d first %d SNPs\n" % (job, ((job+1)*computeSize))) + W = compute_W(job,G,n,snps,computeSize) if numThreads == 1: # Single-core compute_matrixMult(job,W,q) j,x = q.get() - if options.verbose: sys.stderr.write("Job "+str(j)+" finished\n") + if verbose: sys.stderr.write("Job "+str(j)+" finished\n") K_j = x # print j,K_j[:,0] K = K + K_j @@ -122,7 +121,7 @@ def kinship(G,options): time.sleep(0.1) try: j,x = q.get_nowait() - if options.verbose: sys.stderr.write("Job "+str(j)+" finished\n") + if verbose: sys.stderr.write("Job "+str(j)+" finished\n") K_j = x # print j,K_j[:,0] K = K + K_j @@ -134,7 +133,7 @@ def kinship(G,options): # results contains the growing result set for job in range(len(results)-completed): j,x = q.get(True,15) - if options.verbose: sys.stderr.write("Job "+str(j)+" finished\n") + if verbose: sys.stderr.write("Job "+str(j)+" finished\n") K_j = x # print j,K_j[:,0] K = K + K_j @@ -143,13 +142,13 @@ def kinship(G,options): K = K / float(snps) # outFile = 'runtest.kin' - # if options.verbose: sys.stderr.write("Saving Kinship file to %s\n" % outFile) + # if verbose: sys.stderr.write("Saving Kinship file to %s\n" % outFile) # np.savetxt(outFile,K) - # if options.saveKvaKve: - # if options.verbose: sys.stderr.write("Obtaining Eigendecomposition\n") + # if saveKvaKve: + # if verbose: sys.stderr.write("Obtaining Eigendecomposition\n") # Kva,Kve = linalg.eigh(K) - # if options.verbose: sys.stderr.write("Saving eigendecomposition to %s.[kva | kve]\n" % outFile) + # if verbose: sys.stderr.write("Saving eigendecomposition to %s.[kva | kve]\n" % outFile) # np.savetxt(outFile+".kva",Kva) # np.savetxt(outFile+".kve",Kve) return K diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index d6830787..bfb95045 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -49,6 +49,8 @@ has_gn2=True from utility.benchmark import Bench from utility import temp_data +from kinship import kinship, kinship_full +import genotype try: from wqflask.my_pylmm.pyLMM import chunks @@ -270,7 +272,7 @@ def run_other(pheno_vector, print("In run_other") print("REML=",restricted_max_likelihood," REFIT=",refit) with Bench("Calculate Kinship"): - kinship_matrix = calculate_kinship(genotype_matrix, tempdata) + kinship_matrix,genotype_matrix = calculate_kinship(genotype_matrix, tempdata) print("kinship_matrix: ", pf(kinship_matrix)) print("kinship_matrix.shape: ", pf(kinship_matrix.shape)) @@ -326,7 +328,15 @@ def matrixMult(A,B): return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB) -def calculate_kinship(genotype_matrix, temp_data=None): +def calculate_kinship_new(genotype_matrix, temp_data=None): + """ + Call the new kinship calculation where genotype_matrix contains + inds (columns) by snps (rows). + """ + G = np.apply_along_axis( genotype.normalize, axis=1, arr=genotype_matrix.T) + return kinship(G),G.T + +def calculate_kinship_old(genotype_matrix, temp_data=None): """ genotype_matrix is an n x m matrix encoding SNP minor alleles. @@ -366,7 +376,9 @@ def calculate_kinship(genotype_matrix, temp_data=None): genotype_matrix = genotype_matrix[:,keep] print("genotype_matrix: ", pf(genotype_matrix)) kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m) - return kinship_matrix + return kinship_matrix,genotype_matrix + +calculate_kinship = calculate_kinship_new # alias def GWAS(pheno_vector, genotype_matrix, -- cgit v1.2.3