From e6e3b12eeb3fc57b9652468304c1fd14a0a816d0 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 18 Mar 2015 10:08:29 +0300 Subject: Add callback handlers --- wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 38 ++++++++++++++++++++++++++ wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 41 ++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 wqflask/wqflask/my_pylmm/pyLMM/gn2.py create mode 100644 wqflask/wqflask/my_pylmm/pyLMM/standalone.py (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py new file mode 100644 index 00000000..e0c6c8a7 --- /dev/null +++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py @@ -0,0 +1,38 @@ +# Genenetwork2 specific methods and callback handler +# +# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl) +# + +from __future__ import absolute_import, print_function, division + +import sys +import logging + +# logging.basicConfig(level=logging.DEBUG) + +def progress(location, count, total): + print("Progress: %s %i %i @%d%%" % (location,count,total,round(count*100.0/total))) + +def callbacks(): + return dict( + write = sys.stdout.write, + writeln = print, + debug = logging.debug, + info = logging.info, + warning = logging.warning, + error = logging.error, + critical = logging.critical, + progress = progress + ) + +# ----- Minor test cases: + +if __name__ == '__main__': + logging.basicConfig(level=logging.DEBUG) + logging.debug("Test %i" % (1)) + d = callbacks()['debug'] + d("TEST") + wrln = callbacks()['writeln'] + wrln("Hello %i" % 34) + progress = callbacks()['progress'] + progress("I am half way",50,100) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py new file mode 100644 index 00000000..a806729e --- /dev/null +++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py @@ -0,0 +1,41 @@ +# Standalone specific methods and callback handler +# +# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl) +# +# Set the log level with +# +# logging.basicConfig(level=logging.DEBUG) + +from __future__ import absolute_import, print_function, division + +import sys +import logging + +logging.basicConfig(level=logging.DEBUG) + +def progress(location, count, total): + logging.info("Progress: %s %i %i @%d%%" % (location,count,total,round(count*100.0/total))) + +def callbacks(): + return dict( + write = sys.stdout.write, + writeln = print, + debug = logging.debug, + info = logging.info, + warning = logging.warning, + error = logging.error, + critical = logging.critical, + progress = progress + ) + +# ----- Minor test cases: + +if __name__ == '__main__': + # logging.basicConfig(level=logging.DEBUG) + logging.debug("Test %i" % (1)) + d = callbacks()['debug'] + d("TEST") + wrln = callbacks()['writeln'] + wrln("Hello %i" % 34) + progress = callbacks()['progress'] + progress("I am half way",50,100) -- cgit v1.2.3 From 178cdbbd1a52cfcab975ab27b36e148009cc3577 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 18 Mar 2015 11:05:39 +0300 Subject: Introducing callbacks --- wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 17 ++++++++++++-- wqflask/wqflask/my_pylmm/pyLMM/kinship.py | 17 +++++++------- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 5 +++- wqflask/wqflask/my_pylmm/pyLMM/lmm2.py | 12 +++++++++- wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 34 ++++++++++++++++++++++++++-- 5 files changed, 71 insertions(+), 14 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py index e0c6c8a7..4702c670 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py @@ -5,13 +5,25 @@ from __future__ import absolute_import, print_function, division +import numpy as np import sys import logging # logging.basicConfig(level=logging.DEBUG) +# np.set_printoptions() def progress(location, count, total): - print("Progress: %s %i %i @%d%%" % (location,count,total,round(count*100.0/total))) + """ + Progress update + """ + logging.info("Progress: %s %d%%" % (location,round(count*100.0/total))) + +def mprint(msg,data): + """ + Array/matrix print function + """ + m = np.array(data) + print(msg,m.shape,"=\n",m) def callbacks(): return dict( @@ -22,7 +34,8 @@ def callbacks(): warning = logging.warning, error = logging.error, critical = logging.critical, - progress = progress + progress = progress, + mprint = mprint ) # ----- Minor test cases: diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py index 0c43587e..43e7fe36 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py @@ -155,20 +155,21 @@ def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True): # np.savetxt(outFile+".kve",Kve) return K -def kvakve(K, verbose=True): +def kvakve(K, callbacks): """ Obtain eigendecomposition for K and return Kva,Kve where Kva is cleaned of small values < 1e-6 (notably smaller than zero) """ - if verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) - + info = callbacks()['info'] + mprint = callbacks()['mprint'] + + info("Obtaining eigendecomposition for %dx%d matrix" % (K.shape[0],K.shape[1]) ) Kva,Kve = linalg.eigh(K) - if verbose: - print("Kva is: ", Kva.shape, Kva) - print("Kve is: ", Kve.shape, Kve) + mprint("Kva",Kva) + mprint("Kve",Kve) - if sum(Kva < 1e-6): - if verbose: sys.stderr.write("Cleaning %d eigen values (Kva<0)\n" % (sum(Kva < 0))) + if sum(Kva < 0): + info("Cleaning %d eigen values (Kva<0)" % (sum(Kva < 0))) Kva[Kva < 1e-6] = 1e-6 return Kva,Kve diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 8a24d98b..5ad644e2 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -54,11 +54,14 @@ import genotype import phenotype import gwas +# ---- A trick to decide on the environment: try: from wqflask.my_pylmm.pyLMM import chunks + from gn2 import callbacks except ImportError: print("WARNING: Standalone version missing the Genenetwork2 environment\n") has_gn2=False + from standalone import callbacks pass #np.seterr('raise') @@ -594,7 +597,7 @@ class LMM: # if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) begin = time.time() # Kva,Kve = linalg.eigh(K) - Kva,Kve = kvakve(K) + Kva,Kve = kvakve(K,callbacks) end = time.time() if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve)) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py index d4b3ac82..6aefb9d3 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py @@ -24,6 +24,16 @@ from scipy import optimize from optmatrix import matrixMult import kinship +# A trick to decide on the environment: +try: + from wqflask.my_pylmm.pyLMM import chunks + from gn2 import callbacks +except ImportError: + print("WARNING: Standalone version missing the Genenetwork2 environment\n") + has_gn2=False + from standalone import callbacks + pass + def calculateKinship(W,center=False): """ W is an n x m matrix encoding SNP minor alleles. @@ -184,7 +194,7 @@ class LMM2: # if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) begin = time.time() # Kva,Kve = linalg.eigh(K) - Kva,Kve = kinship.kvakve(K) + Kva,Kve = kinship.kvakve(K,callbacks) end = time.time() if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve)) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py index a806729e..bbee3cd7 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py @@ -8,13 +8,29 @@ from __future__ import absolute_import, print_function, division +import numpy as np import sys import logging logging.basicConfig(level=logging.DEBUG) +np.set_printoptions(precision=3,suppress=True) def progress(location, count, total): - logging.info("Progress: %s %i %i @%d%%" % (location,count,total,round(count*100.0/total))) + logging.info("Progress: %s %d%%" % (location,round(count*100.0/total))) + +def mprint(msg,data): + """ + Array/matrix print function + """ + m = np.array(data) + if m.ndim == 1: + print(msg,m.shape,"=\n",m[0:3]," ... ",m[-3:]) + if m.ndim == 2: + print(msg,m.shape,"=\n[", + m[0][0:3]," ... ",m[0][-3:],"\n ", + m[1][0:3]," ... ",m[1][-3:],"\n ...\n ", + m[-2][0:3]," ... ",m[-2][-3:],"\n ", + m[-1][0:3]," ... ",m[-1][-3:],"]") def callbacks(): return dict( @@ -25,7 +41,8 @@ def callbacks(): warning = logging.warning, error = logging.error, critical = logging.critical, - progress = progress + progress = progress, + mprint = mprint ) # ----- Minor test cases: @@ -39,3 +56,16 @@ if __name__ == '__main__': wrln("Hello %i" % 34) progress = callbacks()['progress'] progress("I am half way",50,100) + list = [0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15] + mprint("list",list) + matrix = [[1,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [2,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [3,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [4,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [5,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [6,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]] + mprint("matrix",matrix) -- cgit v1.2.3 From 876e80148984274dfd3b8281677c7541504feb59 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 18 Mar 2015 11:18:58 +0300 Subject: Added uses as syntax sugar for callbacks --- wqflask/wqflask/my_pylmm/pyLMM/kinship.py | 5 ++--- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 6 +++--- wqflask/wqflask/my_pylmm/pyLMM/lmm2.py | 8 ++++---- wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 9 +++++++++ 4 files changed, 18 insertions(+), 10 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py index 43e7fe36..d3792570 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py @@ -155,13 +155,12 @@ def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True): # np.savetxt(outFile+".kve",Kve) return K -def kvakve(K, callbacks): +def kvakve(K, uses): """ Obtain eigendecomposition for K and return Kva,Kve where Kva is cleaned of small values < 1e-6 (notably smaller than zero) """ - info = callbacks()['info'] - mprint = callbacks()['mprint'] + info,mprint = uses('info','mprint') info("Obtaining eigendecomposition for %dx%d matrix" % (K.shape[0],K.shape[1]) ) Kva,Kve = linalg.eigh(K) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 5ad644e2..2076bc84 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -57,11 +57,11 @@ import gwas # ---- A trick to decide on the environment: try: from wqflask.my_pylmm.pyLMM import chunks - from gn2 import callbacks + from gn2 import uses except ImportError: print("WARNING: Standalone version missing the Genenetwork2 environment\n") has_gn2=False - from standalone import callbacks + from standalone import uses pass #np.seterr('raise') @@ -597,7 +597,7 @@ class LMM: # if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) begin = time.time() # Kva,Kve = linalg.eigh(K) - Kva,Kve = kvakve(K,callbacks) + Kva,Kve = kvakve(K,uses) end = time.time() if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve)) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py index 6aefb9d3..5b93ae0d 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py @@ -24,14 +24,14 @@ from scipy import optimize from optmatrix import matrixMult import kinship -# A trick to decide on the environment: +# ---- A trick to decide on the environment: try: from wqflask.my_pylmm.pyLMM import chunks - from gn2 import callbacks + from gn2 import uses except ImportError: print("WARNING: Standalone version missing the Genenetwork2 environment\n") has_gn2=False - from standalone import callbacks + from standalone import uses pass def calculateKinship(W,center=False): @@ -194,7 +194,7 @@ class LMM2: # if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) begin = time.time() # Kva,Kve = linalg.eigh(K) - Kva,Kve = kinship.kvakve(K,callbacks) + Kva,Kve = kinship.kvakve(K,uses) end = time.time() if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve)) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py index bbee3cd7..705da21f 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py @@ -44,6 +44,12 @@ def callbacks(): progress = progress, mprint = mprint ) + +def uses(*funcs): + """ + Some sugar + """ + return [callbacks()[func] for func in funcs] # ----- Minor test cases: @@ -69,3 +75,6 @@ if __name__ == '__main__': [5,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], [6,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]] mprint("matrix",matrix) + ix,dx = uses("info","debug") + ix("ix") + dx("dx") -- cgit v1.2.3 From 7f937ef3265f007c25ec2c386bc399a708bcdd5e Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 18 Mar 2015 11:46:06 +0300 Subject: Introduce sugar for callbacks --- wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 26 +++++++++++++-- wqflask/wqflask/my_pylmm/pyLMM/kinship.py | 49 +++++++++------------------- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 2 +- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 3 +- wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 14 ++++---- 5 files changed, 50 insertions(+), 44 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py index 4702c670..c71b9f22 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py @@ -37,11 +37,17 @@ def callbacks(): progress = progress, mprint = mprint ) - + +def uses(*funcs): + """ + Some sugar + """ + return [callbacks()[func] for func in funcs] + # ----- Minor test cases: if __name__ == '__main__': - logging.basicConfig(level=logging.DEBUG) + # logging.basicConfig(level=logging.DEBUG) logging.debug("Test %i" % (1)) d = callbacks()['debug'] d("TEST") @@ -49,3 +55,19 @@ if __name__ == '__main__': wrln("Hello %i" % 34) progress = callbacks()['progress'] progress("I am half way",50,100) + list = [0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15] + mprint("list",list) + matrix = [[1,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [2,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [3,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [4,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [5,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [6,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]] + mprint("matrix",matrix) + ix,dx = uses("info","debug") + ix("ix") + dx("dx") diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py index d3792570..62f7be47 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py @@ -74,46 +74,39 @@ def f_init(q): # Calculate the kinship matrix from G (SNPs as rows!), returns K # -def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True): - numThreads = None - if numThreads: - numThreads = int(numThreads) +def kinship(G,uses,computeSize=1000,numThreads=None,useBLAS=False): + progress,debug,info,mprint = uses('progress','debug','info','mprint') + matrix_initialize(useBLAS) - - sys.stderr.write(str(G.shape)+"\n") + + mprint("G",G) n = G.shape[1] # inds inds = n m = G.shape[0] # snps snps = m - sys.stderr.write(str(m)+" SNPs\n") + info("%i SNPs" % (m)) assert snps>inds, "snps should be larger than inds (%i snps, %i inds)" % (snps,inds) q = mp.Queue() p = mp.Pool(numThreads, f_init, [q]) cpu_num = mp.cpu_count() - print "CPU cores:",cpu_num - print snps,computeSize + info("CPU cores: %i" % cpu_num) iterations = snps/computeSize+1 - # if testing: - # iterations = 8 - # jobs = range(0,8) # range(0,iterations) results = [] - K = np.zeros((n,n)) # The Kinship matrix has dimension individuals x individuals completed = 0 for job in range(iterations): - if verbose: - sys.stderr.write("Processing job %d first %d SNPs\n" % (job, ((job+1)*computeSize))) + info("Processing job %d first %d SNPs" % (job, ((job+1)*computeSize))) W = compute_W(job,G,n,snps,computeSize) if numThreads == 1: # Single-core compute_matrixMult(job,W,q) j,x = q.get() - if verbose: sys.stderr.write("Job "+str(j)+" finished\n") + debug("Job "+str(j)+" finished") + progress("kinship",j,iterations) K_j = x - # print j,K_j[:,0] K = K + K_j else: # Multi-core @@ -123,39 +116,27 @@ def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True): time.sleep(0.1) try: j,x = q.get_nowait() - if verbose: sys.stderr.write("Job "+str(j)+" finished\n") + debug("Job "+str(j)+" finished") K_j = x - # print j,K_j[:,0] K = K + K_j completed += 1 + progress("kinship",completed,iterations) except Queue.Empty: pass if numThreads == None or numThreads > 1: - # results contains the growing result set for job in range(len(results)-completed): j,x = q.get(True,15) - if verbose: sys.stderr.write("Job "+str(j)+" finished\n") + debug("Job "+str(j)+" finished") K_j = x - # print j,K_j[:,0] K = K + K_j completed += 1 + progress("kinship",completed,iterations) K = K / float(snps) - - # outFile = 'runtest.kin' - # if verbose: sys.stderr.write("Saving Kinship file to %s\n" % outFile) - # np.savetxt(outFile,K) - - # if saveKvaKve: - # if verbose: sys.stderr.write("Obtaining Eigendecomposition\n") - # Kva,Kve = linalg.eigh(K) - # if verbose: sys.stderr.write("Saving eigendecomposition to %s.[kva | kve]\n" % outFile) - # np.savetxt(outFile+".kva",Kva) - # np.savetxt(outFile+".kve",Kve) return K -def kvakve(K, uses): +def kvakve(K,uses): """ Obtain eigendecomposition for K and return Kva,Kve where Kva is cleaned of small values < 1e-6 (notably smaller than zero) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 2076bc84..5182e73c 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -395,7 +395,7 @@ def calculate_kinship_new(genotype_matrix, temp_data=None): print("call genotype.normalize") G = np.apply_along_axis( genotype.normalize, axis=0, arr=genotype_matrix) print("call calculate_kinship_new") - return kinship(G.T),G # G gets transposed, we'll turn this into an iterator (FIXME) + return kinship(G.T,uses),G # G gets transposed, we'll turn this into an iterator (FIXME) def calculate_kinship_old(genotype_matrix, temp_data=None): """ diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 324c4f2c..e3e8659c 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -25,6 +25,7 @@ from lmm import gn2_load_redis, calculate_kinship_old from kinship import kinship, kinship_full import genotype import phenotype +from standalone import uses usage = """ python runlmm.py [options] command @@ -193,7 +194,7 @@ elif cmd == 'kinship': k2 = round(K2[0][0],4) print "Genotype",G.shape, "\n", G - K3 = kinship(G.T) + K3 = kinship(G.T,uses) print "third Kinship method",K3.shape,"\n",K3 sys.stderr.write(options.geno+"\n") k3 = round(K3[0][0],4) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py index 705da21f..538007f1 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py @@ -12,11 +12,13 @@ import numpy as np import sys import logging +# logger = logging.getLogger(__name__) +logger = logging.getLogger('lmm2') logging.basicConfig(level=logging.DEBUG) np.set_printoptions(precision=3,suppress=True) def progress(location, count, total): - logging.info("Progress: %s %d%%" % (location,round(count*100.0/total))) + logger.info("Progress: %s %d%%" % (location,round(count*100.0/total))) def mprint(msg,data): """ @@ -36,11 +38,11 @@ def callbacks(): return dict( write = sys.stdout.write, writeln = print, - debug = logging.debug, - info = logging.info, - warning = logging.warning, - error = logging.error, - critical = logging.critical, + debug = logger.debug, + info = logger.info, + warning = logger.warning, + error = logger.error, + critical = logger.critical, progress = progress, mprint = mprint ) -- cgit v1.2.3 From 204805157912aebb92967241850453f07729e2f6 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 18 Mar 2015 12:00:01 +0300 Subject: Warning to stderr --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 2 +- wqflask/wqflask/my_pylmm/pyLMM/lmm2.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 5182e73c..66c952aa 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -59,9 +59,9 @@ try: from wqflask.my_pylmm.pyLMM import chunks from gn2 import uses except ImportError: - print("WARNING: Standalone version missing the Genenetwork2 environment\n") has_gn2=False from standalone import uses + sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n") pass #np.seterr('raise') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py index 5b93ae0d..aa6b473d 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py @@ -29,7 +29,7 @@ try: from wqflask.my_pylmm.pyLMM import chunks from gn2 import uses except ImportError: - print("WARNING: Standalone version missing the Genenetwork2 environment\n") + sys.stderr.write("WARNING: LMM2 standalone version missing the Genenetwork2 environment\n") has_gn2=False from standalone import uses pass -- cgit v1.2.3 From f1056b9f4128fb91fbaf738914395697aa485b2e Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 18 Mar 2015 12:09:21 +0300 Subject: Warning to stderr --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 6 ++++-- wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 5 +++++ 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 66c952aa..95272818 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -57,10 +57,11 @@ import gwas # ---- A trick to decide on the environment: try: from wqflask.my_pylmm.pyLMM import chunks - from gn2 import uses + from gn2 import uses, set_progress_storage except ImportError: has_gn2=False - from standalone import uses + import standalone as handlers + from standalone import uses, set_progress_storage sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n") pass @@ -816,6 +817,7 @@ def gn2_redis(key,species,new_code=True): params = json.loads(json_params) tempdata = temp_data.TempData(params['temp_uuid']) + set_progress_storage(tempdata) print('kinship', np.array(params['kinship_matrix'])) print('pheno', np.array(params['pheno_vector'])) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py index 538007f1..e20d4092 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py @@ -17,7 +17,12 @@ logger = logging.getLogger('lmm2') logging.basicConfig(level=logging.DEBUG) np.set_printoptions(precision=3,suppress=True) +def set_progress_storage(location): + global storage + storage = location + def progress(location, count, total): + storage['percentage'] = round(count*100.0)/total) logger.info("Progress: %s %d%%" % (location,round(count*100.0/total))) def mprint(msg,data): -- cgit v1.2.3 From 6b8321d77e915dc5aec0c272c1cb84c2af3e6191 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 18 Mar 2015 12:17:59 +0300 Subject: Replace progress meter --- wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 7 ++++++- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 17 ++++++----------- wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py index c71b9f22..f8033ac5 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py @@ -12,11 +12,16 @@ import logging # logging.basicConfig(level=logging.DEBUG) # np.set_printoptions() +def set_progress_storage(location): + global storage + storage = location + def progress(location, count, total): """ Progress update """ - logging.info("Progress: %s %d%%" % (location,round(count*100.0/total))) + storage.store("percent_complete",round(count*100.0)/total) + logger.info("Progress: %s %d%%" % (location,round(count*100.0/total))) def mprint(msg,data): """ diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 95272818..eab7d91d 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -65,6 +65,8 @@ except ImportError: sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n") pass +progress,info = uses('progress','info') + #np.seterr('raise') #def run_human(pheno_vector, @@ -171,10 +173,7 @@ def run_human(pheno_vector, #if count > 1000: # break count += 1 - - percent_complete = (float(count) / total_snps) * 100 - #print("percent_complete: ", percent_complete) - tempdata.store("percent_complete", percent_complete) + progress("human",count,total_snps) #with Bench("actual association"): ps, ts = human_association(snp, @@ -431,10 +430,7 @@ def calculate_kinship_old(genotype_matrix, temp_data=None): continue keep.append(counter) genotype_matrix[:,counter] = (genotype_matrix[:,counter] - values_mean) / np.sqrt(vr) - - percent_complete = int(round((counter/m)*45)) - if temp_data != None: - temp_data.store("percent_complete", percent_complete) + progress('kinship_old',counter,m) genotype_matrix = genotype_matrix[:,keep] print("After kinship (old) genotype_matrix: ", pf(genotype_matrix)) @@ -539,9 +535,8 @@ def GWAS(pheno_vector, lmm_ob.fit(X=x) ts, ps, beta, betaVar = lmm_ob.association(x, REML=restricted_max_likelihood) - percent_complete = 45 + int(round((counter/m)*55)) - temp_data.store("percent_complete", percent_complete) - + progress("gwas_old",counter,m) + p_values.append(ps) t_statistics.append(ts) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py index e20d4092..b3d480c3 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py @@ -22,7 +22,7 @@ def set_progress_storage(location): storage = location def progress(location, count, total): - storage['percentage'] = round(count*100.0)/total) + storage.store("percent_complete",round(count*100.0)/total) logger.info("Progress: %s %d%%" % (location,round(count*100.0/total))) def mprint(msg,data): -- cgit v1.2.3 From de84be30502af4be014fa4c0a2e7b54e51cff6f6 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 18 Mar 2015 12:36:03 +0300 Subject: Progress handler --- wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 19 ++++++++++++++----- wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 15 +++++++++++++-- 2 files changed, 27 insertions(+), 7 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py index f8033ac5..b487ea25 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py @@ -12,17 +12,26 @@ import logging # logging.basicConfig(level=logging.DEBUG) # np.set_printoptions() +last_location = None +last_progress = 0 + def set_progress_storage(location): global storage storage = location def progress(location, count, total): - """ - Progress update - """ - storage.store("percent_complete",round(count*100.0)/total) - logger.info("Progress: %s %d%%" % (location,round(count*100.0/total))) + global last_location + global last_progress + + perc = round(count*100.0/total) + # print(last_progress,";",perc) + if perc != last_progress and (location != last_location or perc > 98 or perc > last_progress + 5): + storage.store("percent_complete",perc) + logger.info("Progress: %s %d%%" % (location,perc)) + last_location = location + last_progress = perc + def mprint(msg,data): """ Array/matrix print function diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py index b3d480c3..7cc3e871 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py @@ -17,13 +17,24 @@ logger = logging.getLogger('lmm2') logging.basicConfig(level=logging.DEBUG) np.set_printoptions(precision=3,suppress=True) +last_location = None +last_progress = 0 + def set_progress_storage(location): global storage storage = location def progress(location, count, total): - storage.store("percent_complete",round(count*100.0)/total) - logger.info("Progress: %s %d%%" % (location,round(count*100.0/total))) + global last_location + global last_progress + + perc = round(count*100.0/total) + # print(last_progress,";",perc) + if perc != last_progress and (location != last_location or perc > 98 or perc > last_progress + 5): + storage.store("percent_complete",perc) + logger.info("Progress: %s %d%%" % (location,perc)) + last_location = location + last_progress = perc def mprint(msg,data): """ -- cgit v1.2.3 From f0653da318cac9736777495e40de6853227904ec Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 18 Mar 2015 13:21:12 +0300 Subject: Cleaned up gwas.py to use uses and moved Redis call back into lmm.py --- wqflask/wqflask/my_pylmm/pyLMM/gwas.py | 70 +++++++++++----------------- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 10 ++-- wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 31 +++++++----- 3 files changed, 52 insertions(+), 59 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py index b901c0e2..8b344a90 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py @@ -19,7 +19,6 @@ import pdb import time -import sys # from utility import temp_data import lmm2 @@ -36,12 +35,10 @@ def formatResult(id,beta,betaSD,ts,ps): return "\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n" def compute_snp(j,n,snp_ids,lmm2,REML,q = None): - # print("COMPUTE SNP",j,snp_ids,"\n") result = [] for snp_id in snp_ids: snp,id = snp_id x = snp.reshape((n,1)) # all the SNPs - # print "X=",x # if refit: # L.fit(X=snp,REML=REML) ts,ps,beta,betaVar = lmm2.association(x,REML=REML,returnBeta=True) @@ -51,32 +48,28 @@ def compute_snp(j,n,snp_ids,lmm2,REML,q = None): q = compute_snp.q q.put([j,result]) return j - # PS.append(ps) - # TS.append(ts) - # return len(result) - # compute.q.put(result) - # return None def f_init(q): compute_snp.q = q -def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): +def gwas(Y,G,K,uses,restricted_max_likelihood=True,refit=False,verbose=True): """ - Execute a GWAS. The G matrix should be n inds (cols) x m snps (rows) + GWAS. The G matrix should be n inds (cols) x m snps (rows) """ + progress,debug,info,mprint = uses('progress','debug','info','mprint') + matrix_initialize() cpu_num = mp.cpu_count() numThreads = None # for now use all available threads kfile2 = False reml = restricted_max_likelihood - sys.stderr.write(str(G.shape)+"\n") + mprint("G",G) n = G.shape[1] # inds inds = n m = G.shape[0] # snps snps = m - sys.stderr.write(str(m)+" SNPs\n") - # print "***** GWAS: G",G.shape,G + info("%s SNPs",snps) assert snps>inds, "snps should be larger than inds (snps=%d,inds=%d)" % (snps,inds) # CREATE LMM object for association @@ -85,19 +78,10 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): lmm2 = LMM2(Y,K) # ,Kva,Kve,X0,verbose=verbose) if not refit: - if verbose: sys.stderr.write("Computing fit for null model\n") + info("Computing fit for null model") lmm2.fit() # follow GN model in run_other - if verbose: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (lmm2.optH,lmm2.optSigma)) - - # outFile = "test.out" - # out = open(outFile,'w') - out = sys.stderr - - def outputResult(id,beta,betaSD,ts,ps): - out.write(formatResult(id,beta,betaSD,ts,ps)) - def printOutHead(): out.write("\t".join(["SNP_ID","BETA","BETA_SD","F_STAT","P_VALUE"]) + "\n") - - # printOutHead() + info("heritability=%0.3f, sigma=%0.3f" % (lmm2.optH,lmm2.optSigma)) + res = [] # Set up the pool @@ -106,26 +90,24 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): p = mp.Pool(numThreads, f_init, [q]) collect = [] - # Buffers for pvalues and t-stats - # PS = [] - # TS = [] count = 0 job = 0 jobs_running = 0 + jobs_completed = 0 for snp in G: snp_id = (snp,'SNPID') count += 1 if count % 1000 == 0: job += 1 - if verbose: - sys.stderr.write("Job %d At SNP %d\n" % (job,count)) + debug("Job %d At SNP %d" % (job,count)) if numThreads == 1: - print "Running on 1 THREAD" + debug("Running on 1 THREAD") compute_snp(job,n,collect,lmm2,reml,q) collect = [] j,lst = q.get() - if verbose: - sys.stderr.write("Job "+str(j)+" finished\n") + debug("Job "+str(j)+" finished") + jobs_completed += 1 + progress("GWAS2",jobs_completed,snps/1000) res.append((j,lst)) else: p.apply_async(compute_snp,(job,n,collect,lmm2,reml)) @@ -134,8 +116,9 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): while jobs_running > cpu_num: try: j,lst = q.get_nowait() - if verbose: - sys.stderr.write("Job "+str(j)+" finished\n") + debug("Job "+str(j)+" finished") + jobs_completed += 1 + progress("GWAS2",jobs_completed,snps/1000) res.append((j,lst)) jobs_running -= 1 except Queue.Empty: @@ -150,24 +133,23 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): if numThreads==1 or count<1000 or len(collect)>0: job += 1 - print "Collect final batch size %i job %i @%i: " % (len(collect), job, count) + debug("Collect final batch size %i job %i @%i: " % (len(collect), job, count)) compute_snp(job,n,collect,lmm2,reml,q) collect = [] j,lst = q.get() res.append((j,lst)) - print "count=",count," running=",jobs_running," collect=",len(collect) + debug("count=%i running=%i collect=%i" % (count,jobs_running,len(collect))) for job in range(jobs_running): j,lst = q.get(True,15) # time out - if verbose: - sys.stderr.write("Job "+str(j)+" finished\n") + debug("Job "+str(j)+" finished") + jobs_completed += 1 + progress("GWAS2",jobs_completed,snps/1000) res.append((j,lst)) - print "Before sort",[res1[0] for res1 in res] + mprint("Before sort",[res1[0] for res1 in res]) res = sorted(res,key=lambda x: x[0]) - # if verbose: - # print "res=",res[0][0:10] - print "After sort",[res1[0] for res1 in res] - print [len(res1[1]) for res1 in res] + mprint("After sort",[res1[0] for res1 in res]) + info([len(res1[1]) for res1 in res]) ts = [item[0] for j,res1 in res for item in res1] ps = [item[1] for j,res1 in res for item in res1] return ts,ps diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index eab7d91d..1e00002a 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -57,11 +57,11 @@ import gwas # ---- A trick to decide on the environment: try: from wqflask.my_pylmm.pyLMM import chunks - from gn2 import uses, set_progress_storage + from gn2 import uses, progress_set_func except ImportError: has_gn2=False import standalone as handlers - from standalone import uses, set_progress_storage + from standalone import uses, progress_set_func sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n") pass @@ -348,6 +348,7 @@ def run_other_new(pheno_vector, t_stats, p_values = gwas.gwas(Y, G.T, K, + uses, restricted_max_likelihood=True, refit=False,verbose=True) Bench().report() @@ -812,7 +813,10 @@ def gn2_redis(key,species,new_code=True): params = json.loads(json_params) tempdata = temp_data.TempData(params['temp_uuid']) - set_progress_storage(tempdata) + def update_tempdata(loc,i,total): + tempdata.store("percent_complete",round(i*100.0/total)) + debug("Updating REDIS percent_complete=%d" % (round(i*100.0/total))) + progress_set_func(update_tempdata) print('kinship', np.array(params['kinship_matrix'])) print('pheno', np.array(params['pheno_vector'])) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py index 7cc3e871..36bf8fd5 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py @@ -17,24 +17,31 @@ logger = logging.getLogger('lmm2') logging.basicConfig(level=logging.DEBUG) np.set_printoptions(precision=3,suppress=True) -last_location = None -last_progress = 0 +progress_location = None +progress_current = None +progress_prev_perc = None -def set_progress_storage(location): - global storage - storage = location +def progress_default_func(location,count,total): + global progress_current + value = round(count*100.0/total) + progress_current = value + +progress_func = progress_default_func + +def progress_set_func(func): + global progress_func + progress_func = func def progress(location, count, total): - global last_location - global last_progress + global progress_location + global progress_prev_perc perc = round(count*100.0/total) - # print(last_progress,";",perc) - if perc != last_progress and (location != last_location or perc > 98 or perc > last_progress + 5): - storage.store("percent_complete",perc) + if perc != progress_prev_perc and (location != progress_location or perc > 98 or perc > progress_prev_perc + 5): + progress_func(location, count, total) logger.info("Progress: %s %d%%" % (location,perc)) - last_location = location - last_progress = perc + progress_location = location + progress_prev_perc = perc def mprint(msg,data): """ -- cgit v1.2.3 From 9b8a958494364fc6470cfe93f90d179e0bc7a787 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 18 Mar 2015 13:23:06 +0300 Subject: Aligned gn2 handlers with standalone --- wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py index b487ea25..f30cf1e6 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py @@ -12,25 +12,31 @@ import logging # logging.basicConfig(level=logging.DEBUG) # np.set_printoptions() -last_location = None -last_progress = 0 +progress_location = None +progress_current = None +progress_prev_perc = None -def set_progress_storage(location): - global storage - storage = location +def progress_default_func(location,count,total): + global progress_current + value = round(count*100.0/total) + progress_current = value + +progress_func = progress_default_func + +def progress_set_func(func): + global progress_func + progress_func = func def progress(location, count, total): - global last_location - global last_progress + global progress_location + global progress_prev_perc perc = round(count*100.0/total) - # print(last_progress,";",perc) - if perc != last_progress and (location != last_location or perc > 98 or perc > last_progress + 5): - storage.store("percent_complete",perc) + if perc != progress_prev_perc and (location != progress_location or perc > 98 or perc > progress_prev_perc + 5): + progress_func(location, count, total) logger.info("Progress: %s %d%%" % (location,perc)) - last_location = location - last_progress = perc - + progress_location = location + progress_prev_perc = perc def mprint(msg,data): """ -- cgit v1.2.3 From 130afd633fc50cbccaf2d12e5e643eb5f8b98c6f Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 18 Mar 2015 13:32:21 +0300 Subject: Add uses debug --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 1e00002a..e0fc8305 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -65,7 +65,7 @@ except ImportError: sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n") pass -progress,info = uses('progress','info') +progress,debug,info = uses('progress','debug','info') #np.seterr('raise') -- cgit v1.2.3 From 803c3c56c37e448fd52fa102fdb6eef8431154cc Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 18 Mar 2015 13:36:35 +0300 Subject: Tagging 0.50-gn2-pre2 --- wqflask/wqflask/my_pylmm/README.md | 35 +++++++++++++++++------------- wqflask/wqflask/my_pylmm/pyLMM/__init__.py | 2 +- 2 files changed, 21 insertions(+), 16 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/README.md b/wqflask/wqflask/my_pylmm/README.md index f6b0e72d..a84b5be2 100644 --- a/wqflask/wqflask/my_pylmm/README.md +++ b/wqflask/wqflask/my_pylmm/README.md @@ -1,21 +1,26 @@ -# RELEASE NOTES +# Genenetwork2/pylmm RELEASE NOTES -## 0.50-gn2-pre1 release +## 0.50-gn2-pre2 -This is the first test release of multi-core pylmm into GN2. Both -kinship calculation and GWAS have been made multi-threaded by -introducing the Python multiprocessing module. Note that only -run_other has been updated to use the new routines (so human is still -handled the old way). I have taken care that we can still run both -old-style and new-style LMM (through passing the 'new_code' -boolean). This could be an option in the web server for users to -select and test for any unexpected differences (of which there should -be none, naturally ;). +- Added abstractions for progress meter and info/debug statements; + Redis perc_complete is now updated through a lambda -The current version can handle missing phenotypes, but as they are -removed there is no way for GN2 to know what SNPs the P-values belong -to. A future version will pass a SNP index to allow for missing -phenotypes. +## 0.50-gn2-pre1 (release) + +- This is the first test release of multi-core pylmm into GN2. Both + kinship calculation and GWAS have been made multi-threaded by + introducing the Python multiprocessing module. Note that only + run_other has been updated to use the new routines (so human is + still handled the old way). I have taken care that we can still run + both old-style and new-style LMM (through passing the 'new_code' + boolean). This could be an option in the web server for users to + select and test for any unexpected differences (of which there + should be none, naturally ;). + +- The current version can handle missing phenotypes, but as they are + removed there is no way for GN2 to know what SNPs the P-values + belong to. A future version will pass a SNP index to allow for + missing phenotypes. \ No newline at end of file diff --git a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py index c40c3221..6ab60d02 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py @@ -1 +1 @@ -PYLMM_VERSION="0.50-gn2-pre1" +PYLMM_VERSION="0.50-gn2-pre2" -- cgit v1.2.3 From 8e9d7cde41800766fec835ca0c4a55c6327e05c8 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 20 Mar 2015 11:47:10 +0300 Subject: Trying to get kinship_old back in lmm1 --- wqflask/wqflask/my_pylmm/pyLMM/kinship.py | 14 +++++++----- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 35 ++++++++++++++--------------- wqflask/wqflask/my_pylmm/pyLMM/phenotype.py | 2 +- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 4 ++-- 4 files changed, 29 insertions(+), 26 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py index 62f7be47..be12417e 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py @@ -28,17 +28,21 @@ import time from optmatrix import matrix_initialize, matrixMultT -def kinship_full(G): +def kinship_full(G,uses): """ Calculate the Kinship matrix using a full dot multiplication """ - print G.shape + info,mprint = uses('info','mprint') + + # mprint("kinship_full G",G) m = G.shape[0] # snps n = G.shape[1] # inds - sys.stderr.write(str(m)+" SNPs\n") - assert m>n, "n should be larger than m (snps>inds)" - m = np.dot(G.T,G) + info("%d SNPs",m) + assert m>n, "n should be larger than m (%d snps > %d inds)" % (m,n) + # m = np.dot(G.T,G) + m = matrixMultT(G.T) m = m/G.shape[0] + # mprint("kinship_full K",m) return m def compute_W(job,G,n,snps,compute_size): diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index e0fc8305..c040e3c2 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -65,7 +65,7 @@ except ImportError: sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n") pass -progress,debug,info = uses('progress','debug','info') +progress,mprint,debug,info = uses('progress','mprint','debug','info') #np.seterr('raise') @@ -277,7 +277,7 @@ def run_other_old(pheno_vector, print("Running the original LMM engine in run_other (old)") print("REML=",restricted_max_likelihood," REFIT=",refit) with Bench("Calculate Kinship"): - kinship_matrix,genotype_matrix = calculate_kinship(genotype_matrix, tempdata) + kinship_matrix,genotype_matrix = calculate_kinship_old(genotype_matrix, tempdata) print("kinship_matrix: ", pf(kinship_matrix)) print("kinship_matrix.shape: ", pf(kinship_matrix.shape)) @@ -331,7 +331,7 @@ def run_other_new(pheno_vector, # G = np.apply_along_axis( genotype.normalize, axis=1, arr=G) with Bench("Calculate Kinship"): - K,G = calculate_kinship(G, tempdata) + K,G = calculate_kinship_new(G, tempdata) print("kinship_matrix: ", pf(K)) print("kinship_matrix.shape: ", pf(K.shape)) @@ -393,9 +393,9 @@ def calculate_kinship_new(genotype_matrix, temp_data=None): Call the new kinship calculation where genotype_matrix contains inds (columns) by snps (rows). """ - print("call genotype.normalize") + info("call genotype.normalize") G = np.apply_along_axis( genotype.normalize, axis=0, arr=genotype_matrix) - print("call calculate_kinship_new") + info("call calculate_kinship_new") return kinship(G.T,uses),G # G gets transposed, we'll turn this into an iterator (FIXME) def calculate_kinship_old(genotype_matrix, temp_data=None): @@ -406,11 +406,11 @@ def calculate_kinship_old(genotype_matrix, temp_data=None): normalizes the resulting vectors and returns the RRM matrix. """ - print("call calculate_kinship_old") + info("call calculate_kinship_old") n = genotype_matrix.shape[0] m = genotype_matrix.shape[1] - print("genotype 2D matrix n (inds) is:", n) - print("genotype 2D matrix m (snps) is:", m) + info("genotype 2D matrix n (inds) is: %d" % (n)) + info("genotype 2D matrix m (snps) is: %d" % (m)) assert m>n, "n should be larger than m (snps>inds)" keep = [] for counter in range(m): @@ -431,14 +431,13 @@ def calculate_kinship_old(genotype_matrix, temp_data=None): continue keep.append(counter) genotype_matrix[:,counter] = (genotype_matrix[:,counter] - values_mean) / np.sqrt(vr) - progress('kinship_old',counter,m) + progress('kinship_old normalize genotype',counter,m) genotype_matrix = genotype_matrix[:,keep] - print("After kinship (old) genotype_matrix: ", pf(genotype_matrix)) - kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m) - return kinship_matrix,genotype_matrix - -calculate_kinship = calculate_kinship_new # alias + mprint("After kinship (old) genotype_matrix: ", genotype_matrix) + # kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m) + # return kinship_matrix,genotype_matrix + return kinship_full(genotype_matrix.T,uses),genotype_matrix def GWAS(pheno_vector, genotype_matrix, @@ -464,9 +463,9 @@ def GWAS(pheno_vector, refit - refit the variance component for each SNP """ - if kinship_eigen_vals == None: + if kinship_eigen_vals is None: kinship_eigen_vals = [] - if kinship_eigen_vectors == None: + if kinship_eigen_vectors is None: kinship_eigen_vectors = [] n = genotype_matrix.shape[0] @@ -570,7 +569,7 @@ class LMM: When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect. """ - if X0 == None: X0 = np.ones(len(Y)).reshape(len(Y),1) + if X0 is None: X0 = np.ones(len(Y)).reshape(len(Y),1) self.verbose = verbose #x = Y != -9 @@ -663,7 +662,7 @@ class LMM: REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True. """ - if X == None: + if X is None: X = self.X0t elif stack: self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] diff --git a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py index 682ba371..4c8175f7 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py @@ -24,7 +24,7 @@ def remove_missing(y,g,verbose=False): Remove missing data from matrices, make sure the genotype data has individuals as rows """ - assert(y!=None) + assert(y is not None) assert(y.shape[0] == g.shape[0]) y1 = y diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index e3e8659c..6a38da56 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -134,7 +134,7 @@ elif cmd == 'redis': # Emulating the redis setup of GN2 G = g print "Original G",G.shape, "\n", G - if y != None and options.remove_missing_phenotypes: + if y is not None and options.remove_missing_phenotypes: gnt = np.array(g).T Y,g,keep = phenotype.remove_missing(y,g.T,options.verbose) G = g.T @@ -165,7 +165,7 @@ elif cmd == 'redis': assert p1==0.0897, "p1=%f" % p1 assert p2==0.0405, "p2=%f" % p2 if options.geno == 'data/test8000.geno': - assert round(sum(ps)) == 4070 + assert int(sum(ps)) == 4070 assert len(ps) == 8000 elif cmd == 'kinship': G = g -- cgit v1.2.3 From 38594c7781b587a24be14b9631a73662ee3fdc2b Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 20 Mar 2015 12:18:03 +0300 Subject: Fall back on calculate_kinship_new again --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 2 +- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index c040e3c2..a649029c 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -277,7 +277,7 @@ def run_other_old(pheno_vector, print("Running the original LMM engine in run_other (old)") print("REML=",restricted_max_likelihood," REFIT=",refit) with Bench("Calculate Kinship"): - kinship_matrix,genotype_matrix = calculate_kinship_old(genotype_matrix, tempdata) + kinship_matrix,genotype_matrix = calculate_kinship_new(genotype_matrix, tempdata) print("kinship_matrix: ", pf(kinship_matrix)) print("kinship_matrix.shape: ", pf(kinship_matrix.shape)) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 6a38da56..88e2a033 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -184,7 +184,7 @@ elif cmd == 'kinship': gnt = None if options.test_kinship: - K = kinship_full(np.copy(G)) + K = kinship_full(np.copy(G),uses) print "Genotype",G.shape, "\n", G print "first Kinship method",K.shape,"\n",K k1 = round(K[0][0],4) -- cgit v1.2.3 From 490e0919b2757f6815a7e6c7f0cb08e55e1cd02e Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 30 Mar 2015 10:32:11 +0200 Subject: Percentage complete: Add method description --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 8844118f..200424ba 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -814,6 +814,9 @@ def gn2_redis(key,species,new_code=True): tempdata = temp_data.TempData(params['temp_uuid']) def update_tempdata(loc,i,total): + """ + This is the single method that updates Redis for percentage complete! + """ tempdata.store("percent_complete",round(i*100.0/total)) debug("Updating REDIS percent_complete=%d" % (round(i*100.0/total))) progress_set_func(update_tempdata) -- cgit v1.2.3 From 6fc112431c0edb0ecae6cd5fa45716c349094a7f Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 30 Mar 2015 11:49:43 +0200 Subject: Use of is vs == when testing None --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 4 ++-- wqflask/wqflask/my_pylmm/pyLMM/lmm2.py | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 200424ba..f0473f99 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -278,7 +278,7 @@ def run_other_old(pheno_vector, print("Running the original LMM engine in run_other (old)") print("REML=",restricted_max_likelihood," REFIT=",refit) with Bench("Calculate Kinship"): - kinship_matrix,genotype_matrix = calculate_kinship_new(genotype_matrix, tempdata) + kinship_matrix,genotype_matrix = calculate_kinship_old(genotype_matrix, tempdata) print("kinship_matrix: ", pf(kinship_matrix)) print("kinship_matrix.shape: ", pf(kinship_matrix.shape)) @@ -880,7 +880,7 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): k = kinship.tolist() params = dict(pheno_vector = pheno.tolist(), genotype_matrix = geno.tolist(), - kinship_matrix= k, + kinship_matrix = k, restricted_max_likelihood = True, refit = False, temp_uuid = "testrun_temp_uuid", diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py index aa6b473d..d67e1205 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py @@ -85,7 +85,7 @@ def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False): print("genotype matrix n is:", n) print("genotype matrix m is:", m) - if X0 == None: + if X0 is None: X0 = np.ones((n,1)) # Remove missing values in Y and adjust associated parameters @@ -173,7 +173,7 @@ class LMM2: When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect. """ - if X0 == None: + if X0 is None: X0 = np.ones(len(Y)).reshape(len(Y),1) self.verbose = verbose @@ -260,7 +260,7 @@ class LMM2: REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True. """ - if X == None: X = self.X0t + if X is None: X = self.X0t elif stack: self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] X = self.X0t_stack @@ -316,7 +316,7 @@ class LMM2: Given this optimum, the function computes the LL and associated ML solutions. """ - if X == None: X = self.X0t + if X is None: X = self.X0t else: #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)]) self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] @@ -340,7 +340,7 @@ class LMM2: def association(self,X,h=None,stack=True,REML=True,returnBeta=False): """ Calculates association statitics for the SNPs encoded in the vector X of size n. - If h == None, the optimal h stored in optH is used. + If h is None, the optimal h stored in optH is used. """ if False: @@ -358,7 +358,7 @@ class LMM2: self.X0t_stack[:,(self.q)] = m X = self.X0t_stack - if h == None: h = self.optH + if h is None: h = self.optH L,beta,sigma,betaVAR = self.LL(h,X,stack=False,REML=REML) q = len(beta) -- cgit v1.2.3 From 8b88be4f48baa6cd0cc3c37a851144d5b1dc24af Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 30 Mar 2015 13:01:22 +0200 Subject: Refactoring genotype normalization --- wqflask/wqflask/my_pylmm/pyLMM/genotype.py | 19 ++++++++++--------- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 9 +++++---- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 2 ++ 3 files changed, 17 insertions(+), 13 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py index 315fd824..49f32e3a 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py @@ -37,14 +37,15 @@ def normalize(ind_g): Run for every SNP list (for one individual) and return normalized SNP genotype values with missing data filled in """ - g1 = np.copy(ind_g) # avoid side effects - x = True - np.isnan(ind_g) # Matrix of True/False - m = ind_g[x].mean() # Global mean value - s = np.sqrt(ind_g[x].var()) # Global stddev - g1[np.isnan(ind_g)] = m # Plug-in mean values for missing data - if s == 0: - g1 = g1 - m # Subtract the mean + g = np.copy(ind_g) # copy to avoid side effects + missing = np.isnan(g) + values = g[True - missing] + mean = values.mean() # Global mean value + stddev = np.sqrt(values.var()) # Global stddev + g[missing] = mean # Plug-in mean values for missing data + if stddev == 0: + g = g - mean # Subtract the mean else: - g1 = (g1 - m) / s # Normalize the deviation - return g1 + g = (g - mean) / stddev # Normalize the deviation + return g diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index f0473f99..035f31e8 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -414,6 +414,7 @@ def calculate_kinship_old(genotype_matrix, temp_data=None): info("genotype 2D matrix m (snps) is: %d" % (m)) assert m>n, "n should be larger than m (snps>inds)" keep = [] + mprint("G (before old normalize)",genotype_matrix) for counter in range(m): #print("type of genotype_matrix[:,counter]:", pf(genotype_matrix[:,counter])) #Checks if any values in column are not numbers @@ -435,10 +436,10 @@ def calculate_kinship_old(genotype_matrix, temp_data=None): progress('kinship_old normalize genotype',counter,m) genotype_matrix = genotype_matrix[:,keep] - mprint("After kinship (old) genotype_matrix: ", genotype_matrix) - # kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m) - # return kinship_matrix,genotype_matrix - return kinship_full(genotype_matrix.T,uses),genotype_matrix + mprint("G (after old normalize)",genotype_matrix.T) + kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m) + return kinship_matrix,genotype_matrix + # return kinship_full(genotype_matrix.T,uses),genotype_matrix def GWAS(pheno_vector, genotype_matrix, diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 88e2a033..fc7a4b9d 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -106,6 +106,8 @@ if options.geno: if cmd == 'redis_new': # The main difference between redis_new and redis is that missing # phenotypes are handled by the first + if options.remove_missing_phenotypes: + raise Exception('Can not use --remove-missing-phenotypes with LMM2') Y = y G = g print "Original G",G.shape, "\n", G -- cgit v1.2.3 From 153317412a090d5b17bc176ff7da2e61e6ec4f2c Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Thu, 2 Apr 2015 09:55:42 +0200 Subject: Make the new version of genotype normalization default --- wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 15 ++++++++++----- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 5 +++-- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 4 ++-- wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 5 +++++ 4 files changed, 20 insertions(+), 9 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py index f30cf1e6..7bceb089 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py @@ -45,15 +45,20 @@ def mprint(msg,data): m = np.array(data) print(msg,m.shape,"=\n",m) +def fatal(msg): + logger.critical(msg) + raise Exception(msg) + def callbacks(): return dict( write = sys.stdout.write, writeln = print, - debug = logging.debug, - info = logging.info, - warning = logging.warning, - error = logging.error, - critical = logging.critical, + debug = logger.debug, + info = logger.info, + warning = logger.warning, + error = logger.error, + critical = logger.critical, + fatal = fatal, progress = progress, mprint = mprint ) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 035f31e8..8be3fc6f 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -66,7 +66,7 @@ except ImportError: sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n") pass -progress,mprint,debug,info = uses('progress','mprint','debug','info') +progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal') #np.seterr('raise') @@ -278,7 +278,7 @@ def run_other_old(pheno_vector, print("Running the original LMM engine in run_other (old)") print("REML=",restricted_max_likelihood," REFIT=",refit) with Bench("Calculate Kinship"): - kinship_matrix,genotype_matrix = calculate_kinship_old(genotype_matrix, tempdata) + kinship_matrix,genotype_matrix = calculate_kinship_new(genotype_matrix, tempdata) print("kinship_matrix: ", pf(kinship_matrix)) print("kinship_matrix.shape: ", pf(kinship_matrix.shape)) @@ -408,6 +408,7 @@ def calculate_kinship_old(genotype_matrix, temp_data=None): """ info("call calculate_kinship_old") + fatal("THE FUNCTION calculate_kinship_old IS OBSOLETE, use calculate_kinship_new instead - see Genotype Normalization Problem on Pjotr's blog") n = genotype_matrix.shape[0] m = genotype_matrix.shape[1] info("genotype 2D matrix n (inds) is: %d" % (n)) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index fc7a4b9d..ef0bdd7e 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -21,7 +21,7 @@ from optparse import OptionParser import sys import tsvreader import numpy as np -from lmm import gn2_load_redis, calculate_kinship_old +from lmm import gn2_load_redis, calculate_kinship_new from kinship import kinship, kinship_full import genotype import phenotype @@ -190,7 +190,7 @@ elif cmd == 'kinship': print "Genotype",G.shape, "\n", G print "first Kinship method",K.shape,"\n",K k1 = round(K[0][0],4) - K2,G = calculate_kinship_old(np.copy(G).T,temp_data=None) + K2,G = calculate_kinship_new(np.copy(G).T,temp_data=None) print "Genotype",G.shape, "\n", G print "GN2 Kinship method",K2.shape,"\n",K2 k2 = round(K2[0][0],4) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py index 36bf8fd5..40b2021d 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py @@ -57,6 +57,10 @@ def mprint(msg,data): m[-2][0:3]," ... ",m[-2][-3:],"\n ", m[-1][0:3]," ... ",m[-1][-3:],"]") +def fatal(msg): + logger.critical(msg) + raise Exception(msg) + def callbacks(): return dict( write = sys.stdout.write, @@ -66,6 +70,7 @@ def callbacks(): warning = logger.warning, error = logger.error, critical = logger.critical, + fatal = fatal, progress = progress, mprint = mprint ) -- cgit v1.2.3 From 0f132d0cc4a77e69ab593fd9c8a2d5218d083ed7 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Thu, 2 Apr 2015 10:15:49 +0200 Subject: Release 0.50-gn2 --- wqflask/wqflask/my_pylmm/README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/README.md b/wqflask/wqflask/my_pylmm/README.md index a84b5be2..4845ec03 100644 --- a/wqflask/wqflask/my_pylmm/README.md +++ b/wqflask/wqflask/my_pylmm/README.md @@ -1,11 +1,15 @@ # Genenetwork2/pylmm RELEASE NOTES -## 0.50-gn2-pre2 +## 0.50-gn2 (April 2nd, 2015) + +- Replaced the GN2 genotype normalization + +## 0.50-gn2-pre2 (March 18, 2015) - Added abstractions for progress meter and info/debug statements; Redis perc_complete is now updated through a lambda -## 0.50-gn2-pre1 (release) +## 0.50-gn2-pre1 (release, March 17, 2015) - This is the first test release of multi-core pylmm into GN2. Both kinship calculation and GWAS have been made multi-threaded by -- cgit v1.2.3 From 43295e57621e9a08ca4cb90e95cc14a87e0d8b5e Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Thu, 2 Apr 2015 12:04:14 +0200 Subject: Create test geno iterator --- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 9 +++++++-- wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py | 25 +++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index ef0bdd7e..5a4bd268 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -99,11 +99,16 @@ if options.pheno: y = tsvreader.pheno(options.pheno) print y.shape -if options.geno: +if options.geno and cmd != 'iterator': g = tsvreader.geno(options.geno) print g.shape -if cmd == 'redis_new': +if cmd == 'iterator': + print "ITERATE over SNPs" + def pretty(snpid,values): + print snpid,values + print tsvreader.geno_iter(options.geno,pretty) +elif cmd == 'redis_new': # The main difference between redis_new and redis is that missing # phenotypes are handled by the first if options.remove_missing_phenotypes: diff --git a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py index b4027fa3..7fe75e3f 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py @@ -74,3 +74,28 @@ def geno(fn): G = np.array(G1) return G +def geno(fn): + G1 = [] + def append(id,values): + G1.append(values) # <--- slow + geno_iter(fn,append) + G = np.array(G1) + return G + +def geno_iter(fn,func): + hab_mapper = {'A':0,'H':1,'B':2,'-':3} + pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ] + + print fn + with open(fn,'r') as tsvin: + assert(tsvin.readline().strip() == "# Genotype format version 1.0") + tsvin.readline() + tsvin.readline() + tsvin.readline() + tsvin.readline() + tsv = csv.reader(tsvin, delimiter='\t') + for row in tsv: + id = row[0] + gs = list(row[1]) + gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs] + func(id,gs2) -- cgit v1.2.3 From 5151bc389aa98415da9f4d49b3c279ed1380ea7d Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Thu, 2 Apr 2015 12:14:43 +0200 Subject: Prepare iterator --- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 5a4bd268..036bf7d5 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -104,10 +104,17 @@ if options.geno and cmd != 'iterator': print g.shape if cmd == 'iterator': - print "ITERATE over SNPs" - def pretty(snpid,values): - print snpid,values - print tsvreader.geno_iter(options.geno,pretty) + def snp_iterator(func): + tsvreader.geno_iter(options.geno,func) + + if options.remove_missing_phenotypes: + raise Exception('Can not use --remove-missing-phenotypes with LMM2') + ps, ts = gn2_iter_redis('testrun_iter','other',k,y,snp_iterator) + print np.array(ps) + print len(ps),sum(ps) + # Test results + p1 = round(ps[0],4) + p2 = round(ps[-1],4) elif cmd == 'redis_new': # The main difference between redis_new and redis is that missing # phenotypes are handled by the first -- cgit v1.2.3 From b9c79ef58ff6ec4da3e65290ea802c783bb17742 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Thu, 2 Apr 2015 13:40:42 +0200 Subject: Passing in an iterator --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 33 ++++++++++++++++++++++++++++- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 6 ++---- wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py | 26 ++++++++++++++++++++--- 3 files changed, 57 insertions(+), 8 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 8be3fc6f..07b55726 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -875,6 +875,9 @@ def gn2_main(): gn2_redis(key,species) def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): + """ + This function emulates current GN2 behaviour by pre-loading Redis + """ print("Loading Redis from parsed data") if kinship == None: k = None @@ -896,7 +899,35 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): Redis.expire(key, 60*60) return gn2_redis(key,species,new_code) - + +def gn2_iter_redis(key,species,kinship,pheno,geno_iterator): + """ + This function emulates GN2 behaviour by pre-loading Redis with + a SNP iterator + """ + print("Loading Redis using a SNP iterator") + if kinship == None: + k = None + else: + k = kinship.tolist() + params = dict(pheno_vector = pheno.tolist(), + genotype_matrix = geno_iterator.tolist(), + kinship_matrix = k, + restricted_max_likelihood = True, + refit = False, + temp_uuid = "testrun_temp_uuid", + + # meta data + timestamp = datetime.datetime.now().isoformat(), + ) + + json_params = json.dumps(params) + Redis.set(key, json_params) + Redis.expire(key, 60*60) + + return gn2_redis(key,species,new_code) + + if __name__ == '__main__': print("WARNING: Calling pylmm from lmm.py will become OBSOLETE, use runlmm.py instead!") if has_gn2: diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 036bf7d5..3b0672b4 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -21,7 +21,7 @@ from optparse import OptionParser import sys import tsvreader import numpy as np -from lmm import gn2_load_redis, calculate_kinship_new +from lmm import gn2_load_redis, gn2_iter_redis, calculate_kinship_new from kinship import kinship, kinship_full import genotype import phenotype @@ -104,11 +104,9 @@ if options.geno and cmd != 'iterator': print g.shape if cmd == 'iterator': - def snp_iterator(func): - tsvreader.geno_iter(options.geno,func) - if options.remove_missing_phenotypes: raise Exception('Can not use --remove-missing-phenotypes with LMM2') + snp_iterator = tsvreader.geno_iter(options.geno) ps, ts = gn2_iter_redis('testrun_iter','other',k,y,snp_iterator) print np.array(ps) print len(ps),sum(ps) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py index 7fe75e3f..27daf43f 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py @@ -76,13 +76,12 @@ def geno(fn): def geno(fn): G1 = [] - def append(id,values): + for id,values in geno_iter(fn): G1.append(values) # <--- slow - geno_iter(fn,append) G = np.array(G1) return G -def geno_iter(fn,func): +def geno_callback(fn,func): hab_mapper = {'A':0,'H':1,'B':2,'-':3} pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ] @@ -99,3 +98,24 @@ def geno_iter(fn,func): gs = list(row[1]) gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs] func(id,gs2) + +def geno_iter(fn): + """ + Yield a tuple of snpid and values + """ + hab_mapper = {'A':0,'H':1,'B':2,'-':3} + pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ] + + print fn + with open(fn,'r') as tsvin: + assert(tsvin.readline().strip() == "# Genotype format version 1.0") + tsvin.readline() + tsvin.readline() + tsvin.readline() + tsvin.readline() + tsv = csv.reader(tsvin, delimiter='\t') + for row in tsv: + id = row[0] + gs = list(row[1]) + gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs] + yield (id,gs2) -- cgit v1.2.3 From 146b4a45c28b7d3ba4bf982cfaf93eda2e71d1ea Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 3 Apr 2015 10:58:53 +0200 Subject: Refactoring GN2 interface --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 07b55726..6e22e6c9 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -805,7 +805,7 @@ class LMM: pl.title(title) -def gn2_redis(key,species,new_code=True): +def gwas_using_redis(key,species,new_code=True): """ Invoke pylmm using Redis as a container. new_code runs the new version @@ -861,18 +861,6 @@ def gn2_redis(key,species,new_code=True): Redis.expire(results_key, 60*60) return ps, ts -# This is the main function used by Genenetwork2 (with environment) -def gn2_main(): - parser = argparse.ArgumentParser(description='Run pyLMM') - parser.add_argument('-k', '--key') - parser.add_argument('-s', '--species') - - opts = parser.parse_args() - - key = opts.key - species = opts.species - - gn2_redis(key,species) def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): """ @@ -898,7 +886,7 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): Redis.set(key, json_params) Redis.expire(key, 60*60) - return gn2_redis(key,species,new_code) + return gwas_using_redis(key,species,new_code) def gn2_iter_redis(key,species,kinship,pheno,geno_iterator): """ @@ -925,7 +913,23 @@ def gn2_iter_redis(key,species,kinship,pheno,geno_iterator): Redis.set(key, json_params) Redis.expire(key, 60*60) - return gn2_redis(key,species,new_code) + return gwas_using_redis(key,species,new_code) + +# This is the main function used by Genenetwork2 (with environment) +# +# Note that this calling route will become OBSOLETE (we should use runlmm.py +# instead) +def gn2_main(): + parser = argparse.ArgumentParser(description='Run pyLMM') + parser.add_argument('-k', '--key') + parser.add_argument('-s', '--species') + + opts = parser.parse_args() + + key = opts.key + species = opts.species + + gwas_using_redis(key,species) if __name__ == '__main__': -- cgit v1.2.3 From fabbcac393627badf0542377fc22325ae7e96f3d Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 3 Apr 2015 11:15:29 +0200 Subject: Passing in an iterator --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 6e22e6c9..b8650938 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -891,15 +891,21 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): def gn2_iter_redis(key,species,kinship,pheno,geno_iterator): """ This function emulates GN2 behaviour by pre-loading Redis with - a SNP iterator + a SNP iterator, for this it sets a key for every genotype (SNP) """ print("Loading Redis using a SNP iterator") + for i,genotypes in enumerate(geno_iterator): + gkey = key+'_geno_'+str(i) + Redis.set(gkey, genotypes) + Redis.expire(gkey, 60*60) + if kinship == None: k = None else: k = kinship.tolist() params = dict(pheno_vector = pheno.tolist(), - genotype_matrix = geno_iterator.tolist(), + genotype_matrix = "iterator", + genotypes = i, kinship_matrix = k, restricted_max_likelihood = True, refit = False, @@ -913,7 +919,7 @@ def gn2_iter_redis(key,species,kinship,pheno,geno_iterator): Redis.set(key, json_params) Redis.expire(key, 60*60) - return gwas_using_redis(key,species,new_code) + return gwas_using_redis(key,species) # This is the main function used by Genenetwork2 (with environment) # -- cgit v1.2.3 From 7d13eec7f67578aa75d8430bb5ed74a4dd825b51 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 3 Apr 2015 12:10:55 +0200 Subject: Refactoring Redis use to one function --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 65 ++++++++++++++++++-------------- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 4 +- 2 files changed, 38 insertions(+), 31 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index b8650938..88ca6a7f 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -805,6 +805,36 @@ class LMM: pl.title(title) +def gwas_without_redis(species,k,y,geno,cov,reml,refit,inputfn,new_code): + """ + Invoke pylmm using a genotype (SNP) iterator + """ + info("gwas_without_redis") + print('pheno', y) + + if species == "human" : + print('kinship', k ) + ps, ts = run_human(pheno_vector = y, + covariate_matrix = cov, + plink_input_file = inputfn, + kinship_matrix = k, + refit = refit, tempdata=tempdata) + else: + print('geno', geno.shape, geno) + + if new_code: + ps, ts = run_other_new(pheno_vector = y, + genotype_matrix = geno, + restricted_max_likelihood = reml, + refit = refit, + tempdata = tempdata) + else: + ps, ts = run_other_old(pheno_vector = y, + genotype_matrix = geno, + restricted_max_likelihood = reml, + refit = refit, + tempdata = tempdata) + def gwas_using_redis(key,species,new_code=True): """ Invoke pylmm using Redis as a container. new_code runs the new @@ -823,33 +853,7 @@ def gwas_using_redis(key,species,new_code=True): debug("Updating REDIS percent_complete=%d" % (round(i*100.0/total))) progress_set_func(update_tempdata) - - print('pheno', np.array(params['pheno_vector'])) - - if species == "human" : - print('kinship', np.array(params['kinship_matrix'])) - ps, ts = run_human(pheno_vector = np.array(params['pheno_vector']), - covariate_matrix = np.array(params['covariate_matrix']), - plink_input_file = params['input_file_name'], - kinship_matrix = np.array(params['kinship_matrix']), - refit = params['refit'], - tempdata = tempdata) - else: - geno = np.array(params['genotype_matrix']) - print('geno', geno.shape, geno) - - if new_code: - ps, ts = run_other_new(pheno_vector = np.array(params['pheno_vector']), - genotype_matrix = geno, - restricted_max_likelihood = params['restricted_max_likelihood'], - refit = params['refit'], - tempdata = tempdata) - else: - ps, ts = run_other_old(pheno_vector = np.array(params['pheno_vector']), - genotype_matrix = geno, - restricted_max_likelihood = params['restricted_max_likelihood'], - refit = params['refit'], - tempdata = tempdata) + ps,ts = gwas_without_redis(species,np.array(params['kinship_matrix']),np.array(params['pheno_vector']),np.array(params['genotype_matrix']),np.array(params['covariate_matrix']),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code) results_key = "pylmm:results:" + params['temp_uuid'] @@ -874,6 +878,8 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): params = dict(pheno_vector = pheno.tolist(), genotype_matrix = geno.tolist(), kinship_matrix = k, + covariate_matrix = None, + input_file_name = None, restricted_max_likelihood = True, refit = False, temp_uuid = "testrun_temp_uuid", @@ -888,7 +894,7 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): return gwas_using_redis(key,species,new_code) -def gn2_iter_redis(key,species,kinship,pheno,geno_iterator): +def gn2_load_redis_iter(key,species,kinship,pheno,geno_iterator): """ This function emulates GN2 behaviour by pre-loading Redis with a SNP iterator, for this it sets a key for every genotype (SNP) @@ -907,6 +913,8 @@ def gn2_iter_redis(key,species,kinship,pheno,geno_iterator): genotype_matrix = "iterator", genotypes = i, kinship_matrix = k, + covariate_matrix = None, + input_file_name = None, restricted_max_likelihood = True, refit = False, temp_uuid = "testrun_temp_uuid", @@ -918,7 +926,6 @@ def gn2_iter_redis(key,species,kinship,pheno,geno_iterator): json_params = json.dumps(params) Redis.set(key, json_params) Redis.expire(key, 60*60) - return gwas_using_redis(key,species) # This is the main function used by Genenetwork2 (with environment) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 3b0672b4..ab698e41 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -21,7 +21,7 @@ from optparse import OptionParser import sys import tsvreader import numpy as np -from lmm import gn2_load_redis, gn2_iter_redis, calculate_kinship_new +from lmm import gn2_load_redis, gn2_load_redis_iter, calculate_kinship_new from kinship import kinship, kinship_full import genotype import phenotype @@ -107,7 +107,7 @@ if cmd == 'iterator': if options.remove_missing_phenotypes: raise Exception('Can not use --remove-missing-phenotypes with LMM2') snp_iterator = tsvreader.geno_iter(options.geno) - ps, ts = gn2_iter_redis('testrun_iter','other',k,y,snp_iterator) + ps, ts = gn2_load_redis_iter('testrun_iter','other',k,y,snp_iterator) print np.array(ps) print len(ps),sum(ps) # Test results -- cgit v1.2.3 From fc6f0ef9fc8d2607e70c775c51ca55f50806cc7a Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 3 Apr 2015 13:13:09 +0200 Subject: temp_data is no longer passed around --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 50 +++++++++++++++----------------- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 2 +- 2 files changed, 24 insertions(+), 28 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 88ca6a7f..9e25f56d 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -81,8 +81,7 @@ def run_human(pheno_vector, covariate_matrix, plink_input_file, kinship_matrix, - refit=False, - tempdata=None): + refit=False): v = np.isnan(pheno_vector) keep = True - v @@ -262,23 +261,19 @@ def human_association(snp, def run_other_old(pheno_vector, genotype_matrix, restricted_max_likelihood=True, - refit=False, - tempdata=None # <---- can not be None - ): + refit=False): """Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics restricted_max_likelihood -- whether to use restricted max likelihood; True or False refit -- whether to refit the variance component for each marker - temp_data -- TempData object that stores the progress for each major step of the - calculations ("calculate_kinship" and "GWAS" take the majority of time) """ print("Running the original LMM engine in run_other (old)") print("REML=",restricted_max_likelihood," REFIT=",refit) with Bench("Calculate Kinship"): - kinship_matrix,genotype_matrix = calculate_kinship_new(genotype_matrix, tempdata) + kinship_matrix,genotype_matrix = calculate_kinship_new(genotype_matrix) print("kinship_matrix: ", pf(kinship_matrix)) print("kinship_matrix.shape: ", pf(kinship_matrix.shape)) @@ -297,24 +292,19 @@ def run_other_old(pheno_vector, genotype_matrix, kinship_matrix, restricted_max_likelihood=True, - refit=False, - temp_data=tempdata) + refit=False) Bench().report() return p_values, t_stats def run_other_new(pheno_vector, genotype_matrix, restricted_max_likelihood=True, - refit=False, - tempdata=None # <---- can not be None - ): + refit=False): """Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics restricted_max_likelihood -- whether to use restricted max likelihood; True or False refit -- whether to refit the variance component for each marker - temp_data -- TempData object that stores the progress for each major step of the - calculations ("calculate_kinship" and "GWAS" take the majority of time) """ @@ -332,7 +322,7 @@ def run_other_new(pheno_vector, # G = np.apply_along_axis( genotype.normalize, axis=1, arr=G) with Bench("Calculate Kinship"): - K,G = calculate_kinship_new(G, tempdata) + K,G = calculate_kinship_new(G) print("kinship_matrix: ", pf(K)) print("kinship_matrix.shape: ", pf(K.shape)) @@ -815,25 +805,24 @@ def gwas_without_redis(species,k,y,geno,cov,reml,refit,inputfn,new_code): if species == "human" : print('kinship', k ) ps, ts = run_human(pheno_vector = y, - covariate_matrix = cov, - plink_input_file = inputfn, - kinship_matrix = k, - refit = refit, tempdata=tempdata) + covariate_matrix = cov, + plink_input_file = inputfn, + kinship_matrix = k, + refit = refit) else: print('geno', geno.shape, geno) if new_code: ps, ts = run_other_new(pheno_vector = y, - genotype_matrix = geno, - restricted_max_likelihood = reml, - refit = refit, - tempdata = tempdata) + genotype_matrix = geno, + restricted_max_likelihood = reml, + refit = refit) else: ps, ts = run_other_old(pheno_vector = y, genotype_matrix = geno, restricted_max_likelihood = reml, - refit = refit, - tempdata = tempdata) + refit = refit) + return ps,ts def gwas_using_redis(key,species,new_code=True): """ @@ -853,7 +842,14 @@ def gwas_using_redis(key,species,new_code=True): debug("Updating REDIS percent_complete=%d" % (round(i*100.0/total))) progress_set_func(update_tempdata) - ps,ts = gwas_without_redis(species,np.array(params['kinship_matrix']),np.array(params['pheno_vector']),np.array(params['genotype_matrix']),np.array(params['covariate_matrix']),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code) + def narray(key): + print(key) + v = params[key] + if v is not None: + v = np.array(v) + return v + + ps,ts = gwas_without_redis(species,narray('kinship_matrix'),narray('pheno_vector'),narray('genotype_matrix'),narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code) results_key = "pylmm:results:" + params['temp_uuid'] diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index ab698e41..3801529e 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -200,7 +200,7 @@ elif cmd == 'kinship': print "Genotype",G.shape, "\n", G print "first Kinship method",K.shape,"\n",K k1 = round(K[0][0],4) - K2,G = calculate_kinship_new(np.copy(G).T,temp_data=None) + K2,G = calculate_kinship_new(np.copy(G).T) print "Genotype",G.shape, "\n", G print "GN2 Kinship method",K2.shape,"\n",K2 k2 = round(K2[0][0],4) -- cgit v1.2.3 From 3c738e6901ecc2ec0b4c1c667f20ebe3dc186f5c Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 3 Apr 2015 13:17:56 +0200 Subject: Rename gwas_using_redis to gwas_with_redis --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 9e25f56d..ad6375e9 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -795,7 +795,7 @@ class LMM: pl.title(title) -def gwas_without_redis(species,k,y,geno,cov,reml,refit,inputfn,new_code): +def run_gwas(species,k,y,geno,cov,reml,refit,inputfn,new_code): """ Invoke pylmm using a genotype (SNP) iterator """ @@ -824,10 +824,10 @@ def gwas_without_redis(species,k,y,geno,cov,reml,refit,inputfn,new_code): refit = refit) return ps,ts -def gwas_using_redis(key,species,new_code=True): +def gwas_with_redis(key,species,new_code=True): """ Invoke pylmm using Redis as a container. new_code runs the new - version + version. All the Redis code goes here! """ json_params = Redis.get(key) @@ -849,7 +849,7 @@ def gwas_using_redis(key,species,new_code=True): v = np.array(v) return v - ps,ts = gwas_without_redis(species,narray('kinship_matrix'),narray('pheno_vector'),narray('genotype_matrix'),narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code) + ps,ts = run_gwas(species,narray('kinship_matrix'),narray('pheno_vector'),narray('genotype_matrix'),narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code) results_key = "pylmm:results:" + params['temp_uuid'] @@ -888,7 +888,7 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): Redis.set(key, json_params) Redis.expire(key, 60*60) - return gwas_using_redis(key,species,new_code) + return gwas_with_redis(key,species,new_code) def gn2_load_redis_iter(key,species,kinship,pheno,geno_iterator): """ @@ -922,7 +922,7 @@ def gn2_load_redis_iter(key,species,kinship,pheno,geno_iterator): json_params = json.dumps(params) Redis.set(key, json_params) Redis.expire(key, 60*60) - return gwas_using_redis(key,species) + return gwas_with_redis(key,species) # This is the main function used by Genenetwork2 (with environment) # @@ -938,7 +938,7 @@ def gn2_main(): key = opts.key species = opts.species - gwas_using_redis(key,species) + gwas_with_redis(key,species) if __name__ == '__main__': -- cgit v1.2.3 From e9865707ef447b8bc23eb8c872703f156936499d Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 3 Apr 2015 14:03:32 +0200 Subject: - Calculate n,m from the start - added test function to runlmm.py to run without Redis (25% faster) --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 17 ++++++++++------- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 17 +++++++++++++++-- 2 files changed, 25 insertions(+), 9 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index ad6375e9..e51742c4 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -795,9 +795,9 @@ class LMM: pl.title(title) -def run_gwas(species,k,y,geno,cov,reml,refit,inputfn,new_code): +def run_gwas(species,n,m,k,y,geno,cov=None,reml=True,refit=False,inputfn=None,new_code=True): """ - Invoke pylmm using a genotype (SNP) iterator + Invoke pylmm using genotype as a matrix or as a (SNP) iterator. """ info("gwas_without_redis") print('pheno', y) @@ -848,8 +848,11 @@ def gwas_with_redis(key,species,new_code=True): if v is not None: v = np.array(v) return v - - ps,ts = run_gwas(species,narray('kinship_matrix'),narray('pheno_vector'),narray('genotype_matrix'),narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code) + + y = narray('pheno_vector') + n = len(y) + m = params['num_genotypes'] + ps,ts = run_gwas(species,n,m,narray('kinship_matrix'),y,narray('genotype_matrix'),narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code) results_key = "pylmm:results:" + params['temp_uuid'] @@ -873,6 +876,7 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): k = kinship.tolist() params = dict(pheno_vector = pheno.tolist(), genotype_matrix = geno.tolist(), + num_genotypes = geno.shape[1], kinship_matrix = k, covariate_matrix = None, input_file_name = None, @@ -881,8 +885,7 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): temp_uuid = "testrun_temp_uuid", # meta data - timestamp = datetime.datetime.now().isoformat(), - ) + timestamp = datetime.datetime.now().isoformat()) json_params = json.dumps(params) Redis.set(key, json_params) @@ -907,7 +910,7 @@ def gn2_load_redis_iter(key,species,kinship,pheno,geno_iterator): k = kinship.tolist() params = dict(pheno_vector = pheno.tolist(), genotype_matrix = "iterator", - genotypes = i, + num_genotypes = i, kinship_matrix = k, covariate_matrix = None, input_file_name = None, diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 3801529e..f095bb73 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -21,7 +21,7 @@ from optparse import OptionParser import sys import tsvreader import numpy as np -from lmm import gn2_load_redis, gn2_load_redis_iter, calculate_kinship_new +from lmm import gn2_load_redis, gn2_load_redis_iter, calculate_kinship_new, run_gwas from kinship import kinship, kinship_full import genotype import phenotype @@ -103,7 +103,20 @@ if options.geno and cmd != 'iterator': g = tsvreader.geno(options.geno) print g.shape -if cmd == 'iterator': +if cmd == 'run': + if options.remove_missing_phenotypes: + raise Exception('Can not use --remove-missing-phenotypes with LMM2') + snp_iterator = tsvreader.geno_iter(options.geno) + n = len(y) + m = g.shape[1] + ps, ts = run_gwas('other',n,m,k,y,g.T) + print np.array(ps) + print len(ps),sum(ps) + # Test results + p1 = round(ps[0],4) + p2 = round(ps[-1],4) + +elif cmd == 'iterator': if options.remove_missing_phenotypes: raise Exception('Can not use --remove-missing-phenotypes with LMM2') snp_iterator = tsvreader.geno_iter(options.geno) -- cgit v1.2.3 From 163fe965bc1dcb807124c1c70c965d48bf2c2688 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sat, 4 Apr 2015 09:52:24 +0200 Subject: Consolidate tests now they all agree for redis, redis_new and run --- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 74 +++++++++++++------------------- 1 file changed, 30 insertions(+), 44 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index f095bb73..2d02e195 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -27,6 +27,8 @@ import genotype import phenotype from standalone import uses +progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal') + usage = """ python runlmm.py [options] command @@ -103,6 +105,29 @@ if options.geno and cmd != 'iterator': g = tsvreader.geno(options.geno) print g.shape +def check_results(ps,ts): + print np.array(ps) + print len(ps),sum(ps) + # Test results + p1 = round(ps[0],4) + p2 = round(ps[-1],4) + # sys.stderr.write(options.geno+"\n") + if options.geno == 'data/small.geno': + info("Validating results for "+options.geno) + assert p1==0.0708, "p1=%f" % p1 + assert p2==0.1417, "p2=%f" % p2 + if options.geno == 'data/small_na.geno': + info("Validating results for "+options.geno) + assert p1==0.0897, "p1=%f" % p1 + assert p2==0.0405, "p2=%f" % p2 + if options.geno == 'data/test8000.geno': + info("Validating results for "+options.geno) + # assert p1==0.8984, "p1=%f" % p1 + # assert p2==0.9621, "p2=%f" % p2 + assert round(sum(ps)) == 4070 + assert len(ps) == 8000 + info("Run completed") + if cmd == 'run': if options.remove_missing_phenotypes: raise Exception('Can not use --remove-missing-phenotypes with LMM2') @@ -110,22 +135,13 @@ if cmd == 'run': n = len(y) m = g.shape[1] ps, ts = run_gwas('other',n,m,k,y,g.T) - print np.array(ps) - print len(ps),sum(ps) - # Test results - p1 = round(ps[0],4) - p2 = round(ps[-1],4) - + check_results(ps,ts) elif cmd == 'iterator': if options.remove_missing_phenotypes: raise Exception('Can not use --remove-missing-phenotypes with LMM2') snp_iterator = tsvreader.geno_iter(options.geno) ps, ts = gn2_load_redis_iter('testrun_iter','other',k,y,snp_iterator) - print np.array(ps) - print len(ps),sum(ps) - # Test results - p1 = round(ps[0],4) - p2 = round(ps[-1],4) + check_results(ps,ts) elif cmd == 'redis_new': # The main difference between redis_new and redis is that missing # phenotypes are handled by the first @@ -138,23 +154,7 @@ elif cmd == 'redis_new': gt = G.T G = None ps, ts = gn2_load_redis('testrun','other',k,Y,gt,new_code=True) - print np.array(ps) - print len(ps),sum(ps) - # Test results - p1 = round(ps[0],4) - p2 = round(ps[-1],4) - sys.stderr.write(options.geno+"\n") - if options.geno == 'data/small.geno': - assert p1==0.0708, "p1=%f" % p1 - assert p2==0.1417, "p2=%f" % p2 - if options.geno == 'data/small_na.geno': - assert p1==0.0897, "p1=%f" % p1 - assert p2==0.0405, "p2=%f" % p2 - if options.geno == 'data/test8000.geno': - # assert p1==0.8984, "p1=%f" % p1 - # assert p2==0.9621, "p2=%f" % p2 - assert round(sum(ps)) == 4070 - assert len(ps) == 8000 + check_results(ps,ts) elif cmd == 'redis': # Emulating the redis setup of GN2 G = g @@ -177,21 +177,7 @@ elif cmd == 'redis': gt = G.T G = None ps, ts = gn2_load_redis('testrun','other',k,Y,gt, new_code=False) - print np.array(ps) - print len(ps),sum(ps) - # Test results 4070.02346579 - p1 = round(ps[0],4) - p2 = round(ps[-1],4) - sys.stderr.write(options.geno+"\n") - if options.geno == 'data/small.geno': - assert p1==0.0708, "p1=%f" % p1 - assert p2==0.1417, "p2=%f" % p2 - if options.geno == 'data/small_na.geno': - assert p1==0.0897, "p1=%f" % p1 - assert p2==0.0405, "p2=%f" % p2 - if options.geno == 'data/test8000.geno': - assert int(sum(ps)) == 4070 - assert len(ps) == 8000 + check_results(ps,ts) elif cmd == 'kinship': G = g print "Original G",G.shape, "\n", G @@ -235,4 +221,4 @@ elif cmd == 'kinship': assert k3==1.4352, "k3=%f" % k3 else: - print "Doing nothing" + fatal("Doing nothing") -- cgit v1.2.3 From 99fef2888f02551191cf6031c2c7222fce27e360 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sat, 4 Apr 2015 12:33:07 +0200 Subject: Run works without transposes --- wqflask/wqflask/my_pylmm/pyLMM/gwas.py | 21 +++++++--- wqflask/wqflask/my_pylmm/pyLMM/kinship.py | 24 +++++++---- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 65 +++++++++++++++++++---------- wqflask/wqflask/my_pylmm/pyLMM/lmm2.py | 43 ++++++++++--------- wqflask/wqflask/my_pylmm/pyLMM/phenotype.py | 35 +++++++++++++--- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 21 +++++----- 6 files changed, 136 insertions(+), 73 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py index 8b344a90..ae3769d4 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py @@ -19,7 +19,7 @@ import pdb import time -# from utility import temp_data +import sys import lmm2 import os @@ -31,6 +31,18 @@ from lmm2 import LMM2 import multiprocessing as mp # Multiprocessing is part of the Python stdlib import Queue +# ---- A trick to decide on the environment: +try: + from wqflask.my_pylmm.pyLMM import chunks + from gn2 import uses +except ImportError: + sys.stderr.write("WARNING: LMM2 standalone version missing the Genenetwork2 environment\n") + has_gn2=False + from standalone import uses + +progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal') + + def formatResult(id,beta,betaSD,ts,ps): return "\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n" @@ -52,12 +64,11 @@ def compute_snp(j,n,snp_ids,lmm2,REML,q = None): def f_init(q): compute_snp.q = q -def gwas(Y,G,K,uses,restricted_max_likelihood=True,refit=False,verbose=True): +def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): """ GWAS. The G matrix should be n inds (cols) x m snps (rows) """ - progress,debug,info,mprint = uses('progress','debug','info','mprint') - + info("In gwas.gwas") matrix_initialize() cpu_num = mp.cpu_count() numThreads = None # for now use all available threads @@ -70,7 +81,7 @@ def gwas(Y,G,K,uses,restricted_max_likelihood=True,refit=False,verbose=True): m = G.shape[0] # snps snps = m info("%s SNPs",snps) - assert snps>inds, "snps should be larger than inds (snps=%d,inds=%d)" % (snps,inds) + assert snps>=inds, "snps should be larger than inds (snps=%d,inds=%d)" % (snps,inds) # CREATE LMM object for association # if not kfile2: L = LMM(Y,K,Kva,Kve,X0,verbose=verbose) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py index be12417e..1c157fd8 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py @@ -28,12 +28,21 @@ import time from optmatrix import matrix_initialize, matrixMultT -def kinship_full(G,uses): +# ---- A trick to decide on the environment: +try: + from wqflask.my_pylmm.pyLMM import chunks + from gn2 import uses, progress_set_func +except ImportError: + has_gn2=False + import standalone as handlers + from standalone import uses, progress_set_func + +progress,debug,info,mprint = uses('progress','debug','info','mprint') + +def kinship_full(G): """ Calculate the Kinship matrix using a full dot multiplication """ - info,mprint = uses('info','mprint') - # mprint("kinship_full G",G) m = G.shape[0] # snps n = G.shape[1] # inds @@ -78,8 +87,7 @@ def f_init(q): # Calculate the kinship matrix from G (SNPs as rows!), returns K # -def kinship(G,uses,computeSize=1000,numThreads=None,useBLAS=False): - progress,debug,info,mprint = uses('progress','debug','info','mprint') +def kinship(G,computeSize=1000,numThreads=None,useBLAS=False): matrix_initialize(useBLAS) @@ -89,7 +97,7 @@ def kinship(G,uses,computeSize=1000,numThreads=None,useBLAS=False): m = G.shape[0] # snps snps = m info("%i SNPs" % (m)) - assert snps>inds, "snps should be larger than inds (%i snps, %i inds)" % (snps,inds) + assert snps>=inds, "snps should be larger than inds (%i snps, %i inds)" % (snps,inds) q = mp.Queue() p = mp.Pool(numThreads, f_init, [q]) @@ -140,13 +148,11 @@ def kinship(G,uses,computeSize=1000,numThreads=None,useBLAS=False): K = K / float(snps) return K -def kvakve(K,uses): +def kvakve(K): """ Obtain eigendecomposition for K and return Kva,Kve where Kva is cleaned of small values < 1e-6 (notably smaller than zero) """ - info,mprint = uses('info','mprint') - info("Obtaining eigendecomposition for %dx%d matrix" % (K.shape[0],K.shape[1]) ) Kva,Kve = linalg.eigh(K) mprint("Kva",Kva) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index e51742c4..82bd7f0b 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -64,7 +64,6 @@ except ImportError: import standalone as handlers from standalone import uses, progress_set_func sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n") - pass progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal') @@ -296,8 +295,8 @@ def run_other_old(pheno_vector, Bench().report() return p_values, t_stats -def run_other_new(pheno_vector, - genotype_matrix, +def run_other_new(n,m,pheno_vector, + geno, restricted_max_likelihood=True, refit=False): @@ -312,8 +311,7 @@ def run_other_new(pheno_vector, print("REML=",restricted_max_likelihood," REFIT=",refit) # Adjust phenotypes - Y,G,keep = phenotype.remove_missing(pheno_vector,genotype_matrix,verbose=True) - print("Removed missing phenotypes",Y.shape) + n,Y,keep = phenotype.remove_missing_new(n,pheno_vector) # if options.maf_normalization: # G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g ) @@ -321,8 +319,9 @@ def run_other_new(pheno_vector, # if not options.skip_genotype_normalization: # G = np.apply_along_axis( genotype.normalize, axis=1, arr=G) + geno = geno[:,keep] with Bench("Calculate Kinship"): - K,G = calculate_kinship_new(G) + K,G = calculate_kinship_new(geno) print("kinship_matrix: ", pf(K)) print("kinship_matrix.shape: ", pf(K.shape)) @@ -337,9 +336,8 @@ def run_other_new(pheno_vector, with Bench("Doing GWAS"): t_stats, p_values = gwas.gwas(Y, - G.T, + G, K, - uses, restricted_max_likelihood=True, refit=False,verbose=True) Bench().report() @@ -378,18 +376,30 @@ def matrixMult(A,B): return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB) +def calculate_kinship_new(genotype_matrix): + """ + Call the new kinship calculation where genotype_matrix contains + inds (columns) by snps (rows). + """ + assert type(genotype_matrix) is np.ndarray + info("call genotype.normalize") + G = np.apply_along_axis( genotype.normalize, axis=1, arr=genotype_matrix) + mprint("G",genotype_matrix) + info("call calculate_kinship_new") + return kinship(G),G # G gets transposed, we'll turn this into an iterator (FIXME) -def calculate_kinship_new(genotype_matrix, temp_data=None): +def calculate_kinship_iter(geno): """ Call the new kinship calculation where genotype_matrix contains inds (columns) by snps (rows). """ + assert type(genotype_matrix) is iter info("call genotype.normalize") G = np.apply_along_axis( genotype.normalize, axis=0, arr=genotype_matrix) info("call calculate_kinship_new") - return kinship(G.T,uses),G # G gets transposed, we'll turn this into an iterator (FIXME) + return kinship(G) -def calculate_kinship_old(genotype_matrix, temp_data=None): +def calculate_kinship_old(genotype_matrix): """ genotype_matrix is an n x m matrix encoding SNP minor alleles. @@ -430,7 +440,7 @@ def calculate_kinship_old(genotype_matrix, temp_data=None): mprint("G (after old normalize)",genotype_matrix.T) kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m) return kinship_matrix,genotype_matrix - # return kinship_full(genotype_matrix.T,uses),genotype_matrix + # return kinship_full(genotype_matrix.T),genotype_matrix def GWAS(pheno_vector, genotype_matrix, @@ -586,7 +596,7 @@ class LMM: # if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) begin = time.time() # Kva,Kve = linalg.eigh(K) - Kva,Kve = kvakve(K,uses) + Kva,Kve = kvakve(K) end = time.time() if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve)) @@ -794,12 +804,11 @@ class LMM: pl.ylabel("Probability of data") pl.title(title) - def run_gwas(species,n,m,k,y,geno,cov=None,reml=True,refit=False,inputfn=None,new_code=True): """ Invoke pylmm using genotype as a matrix or as a (SNP) iterator. """ - info("gwas_without_redis") + info("run_gwas") print('pheno', y) if species == "human" : @@ -813,8 +822,8 @@ def run_gwas(species,n,m,k,y,geno,cov=None,reml=True,refit=False,inputfn=None,ne print('geno', geno.shape, geno) if new_code: - ps, ts = run_other_new(pheno_vector = y, - genotype_matrix = geno, + ps, ts = run_other_new(n,m,pheno_vector = y, + geno = geno, restricted_max_likelihood = reml, refit = refit) else: @@ -849,10 +858,20 @@ def gwas_with_redis(key,species,new_code=True): v = np.array(v) return v + def narrayT(key): + m = narray(key) + if m is not None: + return m.T + return m + + # We are transposing before we enter run_gwas - this should happen on the webserver + # side (or when reading data from file) + k = narray('kinship_matrix') + g = narrayT('genotype_matrix') y = narray('pheno_vector') n = len(y) m = params['num_genotypes'] - ps,ts = run_gwas(species,n,m,narray('kinship_matrix'),y,narray('genotype_matrix'),narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code) + ps,ts = run_gwas(species,n,m,k,y,g,narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code) results_key = "pylmm:results:" + params['temp_uuid'] @@ -864,19 +883,19 @@ def gwas_with_redis(key,species,new_code=True): Redis.expire(results_key, 60*60) return ps, ts - def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): """ - This function emulates current GN2 behaviour by pre-loading Redis + This function emulates current GN2 behaviour by pre-loading Redis (note the input + genotype is transposed to emulate GN2 (FIXME!) """ - print("Loading Redis from parsed data") + info("Loading Redis from parsed data") if kinship == None: k = None else: k = kinship.tolist() params = dict(pheno_vector = pheno.tolist(), - genotype_matrix = geno.tolist(), - num_genotypes = geno.shape[1], + genotype_matrix = geno.T.tolist(), + num_genotypes = geno.shape[0], kinship_matrix = k, covariate_matrix = None, input_file_name = None, diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py index d67e1205..358bf27e 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py @@ -32,7 +32,6 @@ except ImportError: sys.stderr.write("WARNING: LMM2 standalone version missing the Genenetwork2 environment\n") has_gn2=False from standalone import uses - pass def calculateKinship(W,center=False): """ @@ -149,28 +148,32 @@ def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False): class LMM2: - """ - This is a simple version of EMMA/fastLMM. - The main purpose of this module is to take a phenotype vector (Y), a set of covariates (X) and a kinship matrix (K) - and to optimize this model by finding the maximum-likelihood estimates for the model parameters. - There are three model parameters: heritability (h), covariate coefficients (beta) and the total - phenotypic variance (sigma). - Heritability as defined here is the proportion of the total variance (sigma) that is attributed to - the kinship matrix. - - For simplicity, we assume that everything being input is a numpy array. - If this is not the case, the module may throw an error as conversion from list to numpy array - is not done consistently. + """This is a simple version of EMMA/fastLMM. + + The main purpose of this module is to take a phenotype vector (Y), + a set of covariates (X) and a kinship matrix (K) and to optimize + this model by finding the maximum-likelihood estimates for the + model parameters. There are three model parameters: heritability + (h), covariate coefficients (beta) and the total phenotypic + variance (sigma). Heritability as defined here is the proportion + of the total variance (sigma) that is attributed to the kinship + matrix. + + For simplicity, we assume that everything being input is a numpy + array. If this is not the case, the module may throw an error as + conversion from list to numpy array is not done consistently. """ def __init__(self,Y,K,Kva=[],Kve=[],X0=None,verbose=False): - """ - The constructor takes a phenotype vector or array Y of size n. - It takes a kinship matrix K of size n x n. Kva and Kve can be computed as Kva,Kve = linalg.eigh(K) and cached. - If they are not provided, the constructor will calculate them. - X0 is an optional covariate matrix of size n x q, where there are q covariates. - When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect. + """The constructor takes a phenotype vector or array Y of size n. It + takes a kinship matrix K of size n x n. Kva and Kve can be + computed as Kva,Kve = linalg.eigh(K) and cached. If they are + not provided, the constructor will calculate them. X0 is an + optional covariate matrix of size n x q, where there are q + covariates. When this parameter is not provided, the + constructor will set X0 to an n x 1 matrix of all ones to + represent a mean effect. """ if X0 is None: @@ -194,7 +197,7 @@ class LMM2: # if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) begin = time.time() # Kva,Kve = linalg.eigh(K) - Kva,Kve = kinship.kvakve(K,uses) + Kva,Kve = kinship.kvakve(K) end = time.time() if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve)) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py index 4c8175f7..7b652515 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py @@ -19,22 +19,47 @@ import sys import numpy as np -def remove_missing(y,g,verbose=False): +# ---- A trick to decide on the environment: +try: + from wqflask.my_pylmm.pyLMM import chunks + from gn2 import uses, progress_set_func +except ImportError: + has_gn2=False + import standalone as handlers + from standalone import uses, progress_set_func + +progress,debug,info,mprint = uses('progress','debug','info','mprint') + +def remove_missing(n,y,g): """ Remove missing data from matrices, make sure the genotype data has individuals as rows """ assert(y is not None) - assert(y.shape[0] == g.shape[0]) + assert y.shape[0] == g.shape[0],"y (n) %d, g (n,m) %s" % (y.shape[0],g.shape) y1 = y g1 = g v = np.isnan(y) keep = True - v if v.sum(): - if verbose: - sys.stderr.write("runlmm.py: Cleaning the phenotype vector and genotype matrix by removing %d individuals...\n" % (v.sum())) + info("runlmm.py: Cleaning the phenotype vector and genotype matrix by removing %d individuals...\n" % (v.sum())) y1 = y[keep] g1 = g[keep,:] - return y1,g1,keep + n = y1.shape[0] + return n,y1,g1,keep + +def remove_missing_new(n,y): + """ + Remove missing data. Returns new n,y,keep + """ + assert(y is not None) + y1 = y + v = np.isnan(y) + keep = True - v + if v.sum(): + info("runlmm.py: Cleaning the phenotype vector by removing %d individuals" % (v.sum())) + y1 = y[keep] + n = y1.shape[0] + return n,y1,keep diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 2d02e195..d248dee2 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -131,16 +131,15 @@ def check_results(ps,ts): if cmd == 'run': if options.remove_missing_phenotypes: raise Exception('Can not use --remove-missing-phenotypes with LMM2') - snp_iterator = tsvreader.geno_iter(options.geno) n = len(y) m = g.shape[1] - ps, ts = run_gwas('other',n,m,k,y,g.T) + ps, ts = run_gwas('other',n,m,k,y,g) # <--- pass in geno by SNP check_results(ps,ts) elif cmd == 'iterator': if options.remove_missing_phenotypes: raise Exception('Can not use --remove-missing-phenotypes with LMM2') - snp_iterator = tsvreader.geno_iter(options.geno) - ps, ts = gn2_load_redis_iter('testrun_iter','other',k,y,snp_iterator) + geno_iterator = tsvreader.geno_iter(options.geno) + ps, ts = gn2_load_redis_iter('testrun_iter','other',k,y,geno_iterator) check_results(ps,ts) elif cmd == 'redis_new': # The main difference between redis_new and redis is that missing @@ -150,10 +149,9 @@ elif cmd == 'redis_new': Y = y G = g print "Original G",G.shape, "\n", G - - gt = G.T - G = None - ps, ts = gn2_load_redis('testrun','other',k,Y,gt,new_code=True) + # gt = G.T + # G = None + ps, ts = gn2_load_redis('testrun','other',k,Y,G,new_code=True) check_results(ps,ts) elif cmd == 'redis': # Emulating the redis setup of GN2 @@ -174,9 +172,10 @@ elif cmd == 'redis': g = None gnt = None - gt = G.T - G = None - ps, ts = gn2_load_redis('testrun','other',k,Y,gt, new_code=False) + # gt = G.T + # G = None + mprint("G",G) + ps, ts = gn2_load_redis('testrun','other',k,Y,G, new_code=False) check_results(ps,ts) elif cmd == 'kinship': G = g -- cgit v1.2.3 From 49f5eb3e825c953bc7f6da87460ccfe9b891d493 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sat, 4 Apr 2015 13:01:44 +0200 Subject: Fixing transpose issues --- wqflask/wqflask/my_pylmm/pyLMM/gwas.py | 1 - wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 2 +- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 26 ++++++++++++-------------- 3 files changed, 13 insertions(+), 16 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py index ae3769d4..247a8729 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py @@ -36,7 +36,6 @@ try: from wqflask.my_pylmm.pyLMM import chunks from gn2 import uses except ImportError: - sys.stderr.write("WARNING: LMM2 standalone version missing the Genenetwork2 environment\n") has_gn2=False from standalone import uses diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 82bd7f0b..6f03eaf7 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -288,7 +288,7 @@ def run_other_old(pheno_vector, with Bench("Doing GWAS"): t_stats, p_values = GWAS(pheno_vector, - genotype_matrix, + genotype_matrix.T, kinship_matrix, restricted_max_likelihood=True, refit=False) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index d248dee2..44d5c0f4 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -108,26 +108,25 @@ if options.geno and cmd != 'iterator': def check_results(ps,ts): print np.array(ps) print len(ps),sum(ps) - # Test results p1 = round(ps[0],4) p2 = round(ps[-1],4) - # sys.stderr.write(options.geno+"\n") if options.geno == 'data/small.geno': info("Validating results for "+options.geno) - assert p1==0.0708, "p1=%f" % p1 - assert p2==0.1417, "p2=%f" % p2 + assert p1==0.7387, "p1=%f" % p1 + assert p2==0.7387, "p2=%f" % p2 if options.geno == 'data/small_na.geno': info("Validating results for "+options.geno) - assert p1==0.0897, "p1=%f" % p1 - assert p2==0.0405, "p2=%f" % p2 + assert p1==0.062, "p1=%f" % p1 + assert p2==0.062, "p2=%f" % p2 if options.geno == 'data/test8000.geno': info("Validating results for "+options.geno) - # assert p1==0.8984, "p1=%f" % p1 - # assert p2==0.9621, "p2=%f" % p2 assert round(sum(ps)) == 4070 assert len(ps) == 8000 info("Run completed") - + +if y is not None: + n = y.shape[0] + if cmd == 'run': if options.remove_missing_phenotypes: raise Exception('Can not use --remove-missing-phenotypes with LMM2') @@ -159,7 +158,7 @@ elif cmd == 'redis': print "Original G",G.shape, "\n", G if y is not None and options.remove_missing_phenotypes: gnt = np.array(g).T - Y,g,keep = phenotype.remove_missing(y,g.T,options.verbose) + n,Y,g,keep = phenotype.remove_missing(n,y,gnt) G = g.T print "Removed missing phenotypes",G.shape, "\n", G else: @@ -174,7 +173,6 @@ elif cmd == 'redis': # gt = G.T # G = None - mprint("G",G) ps, ts = gn2_load_redis('testrun','other',k,Y,G, new_code=False) check_results(ps,ts) elif cmd == 'kinship': @@ -182,7 +180,7 @@ elif cmd == 'kinship': print "Original G",G.shape, "\n", G if y != None and options.remove_missing_phenotypes: gnt = np.array(g).T - Y,g = phenotype.remove_missing(y,g.T,options.verbose) + n,Y,g,keep = phenotype.remove_missing(n,y,g.T) G = g.T print "Removed missing phenotypes",G.shape, "\n", G if options.maf_normalization: @@ -194,7 +192,7 @@ elif cmd == 'kinship': gnt = None if options.test_kinship: - K = kinship_full(np.copy(G),uses) + K = kinship_full(np.copy(G)) print "Genotype",G.shape, "\n", G print "first Kinship method",K.shape,"\n",K k1 = round(K[0][0],4) @@ -204,7 +202,7 @@ elif cmd == 'kinship': k2 = round(K2[0][0],4) print "Genotype",G.shape, "\n", G - K3 = kinship(G.T,uses) + K3 = kinship(G.T) print "third Kinship method",K3.shape,"\n",K3 sys.stderr.write(options.geno+"\n") k3 = round(K3[0][0],4) -- cgit v1.2.3 From 17f453e50ebac657d9f3096811d92bedc9bfc064 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sat, 4 Apr 2015 13:15:48 +0200 Subject: Regression tests --- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 44d5c0f4..52c3c80a 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -196,20 +196,20 @@ elif cmd == 'kinship': print "Genotype",G.shape, "\n", G print "first Kinship method",K.shape,"\n",K k1 = round(K[0][0],4) - K2,G = calculate_kinship_new(np.copy(G).T) + K2,G = calculate_kinship_new(np.copy(G)) print "Genotype",G.shape, "\n", G print "GN2 Kinship method",K2.shape,"\n",K2 k2 = round(K2[0][0],4) print "Genotype",G.shape, "\n", G - K3 = kinship(G.T) + K3 = kinship(G) print "third Kinship method",K3.shape,"\n",K3 sys.stderr.write(options.geno+"\n") k3 = round(K3[0][0],4) if options.geno == 'data/small.geno': - assert k1==0.8, "k1=%f" % k1 - assert k2==0.7939, "k2=%f" % k2 - assert k3==0.7939, "k3=%f" % k3 + assert k1==0.8333, "k1=%f" % k1 + assert k2==0.9375, "k2=%f" % k2 + assert k3==0.9375, "k3=%f" % k3 if options.geno == 'data/small_na.geno': assert k1==0.8333, "k1=%f" % k1 assert k2==0.7172, "k2=%f" % k2 -- cgit v1.2.3