From e6e3b12eeb3fc57b9652468304c1fd14a0a816d0 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 18 Mar 2015 10:08:29 +0300 Subject: Add callback handlers --- wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 38 ++++++++++++++++++++++++++ wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 41 ++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 wqflask/wqflask/my_pylmm/pyLMM/gn2.py create mode 100644 wqflask/wqflask/my_pylmm/pyLMM/standalone.py diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py new file mode 100644 index 00000000..e0c6c8a7 --- /dev/null +++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py @@ -0,0 +1,38 @@ +# Genenetwork2 specific methods and callback handler +# +# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl) +# + +from __future__ import absolute_import, print_function, division + +import sys +import logging + +# logging.basicConfig(level=logging.DEBUG) + +def progress(location, count, total): + print("Progress: %s %i %i @%d%%" % (location,count,total,round(count*100.0/total))) + +def callbacks(): + return dict( + write = sys.stdout.write, + writeln = print, + debug = logging.debug, + info = logging.info, + warning = logging.warning, + error = logging.error, + critical = logging.critical, + progress = progress + ) + +# ----- Minor test cases: + +if __name__ == '__main__': + logging.basicConfig(level=logging.DEBUG) + logging.debug("Test %i" % (1)) + d = callbacks()['debug'] + d("TEST") + wrln = callbacks()['writeln'] + wrln("Hello %i" % 34) + progress = callbacks()['progress'] + progress("I am half way",50,100) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py new file mode 100644 index 00000000..a806729e --- /dev/null +++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py @@ -0,0 +1,41 @@ +# Standalone specific methods and callback handler +# +# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl) +# +# Set the log level with +# +# logging.basicConfig(level=logging.DEBUG) + +from __future__ import absolute_import, print_function, division + +import sys +import logging + +logging.basicConfig(level=logging.DEBUG) + +def progress(location, count, total): + logging.info("Progress: %s %i %i @%d%%" % (location,count,total,round(count*100.0/total))) + +def callbacks(): + return dict( + write = sys.stdout.write, + writeln = print, + debug = logging.debug, + info = logging.info, + warning = logging.warning, + error = logging.error, + critical = logging.critical, + progress = progress + ) + +# ----- Minor test cases: + +if __name__ == '__main__': + # logging.basicConfig(level=logging.DEBUG) + logging.debug("Test %i" % (1)) + d = callbacks()['debug'] + d("TEST") + wrln = callbacks()['writeln'] + wrln("Hello %i" % 34) + progress = callbacks()['progress'] + progress("I am half way",50,100) -- cgit v1.2.3 From 178cdbbd1a52cfcab975ab27b36e148009cc3577 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 18 Mar 2015 11:05:39 +0300 Subject: Introducing callbacks --- wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 17 ++++++++++++-- wqflask/wqflask/my_pylmm/pyLMM/kinship.py | 17 +++++++------- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 5 +++- wqflask/wqflask/my_pylmm/pyLMM/lmm2.py | 12 +++++++++- wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 34 ++++++++++++++++++++++++++-- 5 files changed, 71 insertions(+), 14 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py index e0c6c8a7..4702c670 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py @@ -5,13 +5,25 @@ from __future__ import absolute_import, print_function, division +import numpy as np import sys import logging # logging.basicConfig(level=logging.DEBUG) +# np.set_printoptions() def progress(location, count, total): - print("Progress: %s %i %i @%d%%" % (location,count,total,round(count*100.0/total))) + """ + Progress update + """ + logging.info("Progress: %s %d%%" % (location,round(count*100.0/total))) + +def mprint(msg,data): + """ + Array/matrix print function + """ + m = np.array(data) + print(msg,m.shape,"=\n",m) def callbacks(): return dict( @@ -22,7 +34,8 @@ def callbacks(): warning = logging.warning, error = logging.error, critical = logging.critical, - progress = progress + progress = progress, + mprint = mprint ) # ----- Minor test cases: diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py index 0c43587e..43e7fe36 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py @@ -155,20 +155,21 @@ def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True): # np.savetxt(outFile+".kve",Kve) return K -def kvakve(K, verbose=True): +def kvakve(K, callbacks): """ Obtain eigendecomposition for K and return Kva,Kve where Kva is cleaned of small values < 1e-6 (notably smaller than zero) """ - if verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) - + info = callbacks()['info'] + mprint = callbacks()['mprint'] + + info("Obtaining eigendecomposition for %dx%d matrix" % (K.shape[0],K.shape[1]) ) Kva,Kve = linalg.eigh(K) - if verbose: - print("Kva is: ", Kva.shape, Kva) - print("Kve is: ", Kve.shape, Kve) + mprint("Kva",Kva) + mprint("Kve",Kve) - if sum(Kva < 1e-6): - if verbose: sys.stderr.write("Cleaning %d eigen values (Kva<0)\n" % (sum(Kva < 0))) + if sum(Kva < 0): + info("Cleaning %d eigen values (Kva<0)" % (sum(Kva < 0))) Kva[Kva < 1e-6] = 1e-6 return Kva,Kve diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 8a24d98b..5ad644e2 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -54,11 +54,14 @@ import genotype import phenotype import gwas +# ---- A trick to decide on the environment: try: from wqflask.my_pylmm.pyLMM import chunks + from gn2 import callbacks except ImportError: print("WARNING: Standalone version missing the Genenetwork2 environment\n") has_gn2=False + from standalone import callbacks pass #np.seterr('raise') @@ -594,7 +597,7 @@ class LMM: # if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) begin = time.time() # Kva,Kve = linalg.eigh(K) - Kva,Kve = kvakve(K) + Kva,Kve = kvakve(K,callbacks) end = time.time() if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve)) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py index d4b3ac82..6aefb9d3 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py @@ -24,6 +24,16 @@ from scipy import optimize from optmatrix import matrixMult import kinship +# A trick to decide on the environment: +try: + from wqflask.my_pylmm.pyLMM import chunks + from gn2 import callbacks +except ImportError: + print("WARNING: Standalone version missing the Genenetwork2 environment\n") + has_gn2=False + from standalone import callbacks + pass + def calculateKinship(W,center=False): """ W is an n x m matrix encoding SNP minor alleles. @@ -184,7 +194,7 @@ class LMM2: # if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) begin = time.time() # Kva,Kve = linalg.eigh(K) - Kva,Kve = kinship.kvakve(K) + Kva,Kve = kinship.kvakve(K,callbacks) end = time.time() if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve)) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py index a806729e..bbee3cd7 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py @@ -8,13 +8,29 @@ from __future__ import absolute_import, print_function, division +import numpy as np import sys import logging logging.basicConfig(level=logging.DEBUG) +np.set_printoptions(precision=3,suppress=True) def progress(location, count, total): - logging.info("Progress: %s %i %i @%d%%" % (location,count,total,round(count*100.0/total))) + logging.info("Progress: %s %d%%" % (location,round(count*100.0/total))) + +def mprint(msg,data): + """ + Array/matrix print function + """ + m = np.array(data) + if m.ndim == 1: + print(msg,m.shape,"=\n",m[0:3]," ... ",m[-3:]) + if m.ndim == 2: + print(msg,m.shape,"=\n[", + m[0][0:3]," ... ",m[0][-3:],"\n ", + m[1][0:3]," ... ",m[1][-3:],"\n ...\n ", + m[-2][0:3]," ... ",m[-2][-3:],"\n ", + m[-1][0:3]," ... ",m[-1][-3:],"]") def callbacks(): return dict( @@ -25,7 +41,8 @@ def callbacks(): warning = logging.warning, error = logging.error, critical = logging.critical, - progress = progress + progress = progress, + mprint = mprint ) # ----- Minor test cases: @@ -39,3 +56,16 @@ if __name__ == '__main__': wrln("Hello %i" % 34) progress = callbacks()['progress'] progress("I am half way",50,100) + list = [0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15] + mprint("list",list) + matrix = [[1,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [2,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [3,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [4,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [5,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [6,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]] + mprint("matrix",matrix) -- cgit v1.2.3 From 876e80148984274dfd3b8281677c7541504feb59 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 18 Mar 2015 11:18:58 +0300 Subject: Added uses as syntax sugar for callbacks --- wqflask/wqflask/my_pylmm/pyLMM/kinship.py | 5 ++--- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 6 +++--- wqflask/wqflask/my_pylmm/pyLMM/lmm2.py | 8 ++++---- wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 9 +++++++++ 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py index 43e7fe36..d3792570 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py @@ -155,13 +155,12 @@ def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True): # np.savetxt(outFile+".kve",Kve) return K -def kvakve(K, callbacks): +def kvakve(K, uses): """ Obtain eigendecomposition for K and return Kva,Kve where Kva is cleaned of small values < 1e-6 (notably smaller than zero) """ - info = callbacks()['info'] - mprint = callbacks()['mprint'] + info,mprint = uses('info','mprint') info("Obtaining eigendecomposition for %dx%d matrix" % (K.shape[0],K.shape[1]) ) Kva,Kve = linalg.eigh(K) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 5ad644e2..2076bc84 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -57,11 +57,11 @@ import gwas # ---- A trick to decide on the environment: try: from wqflask.my_pylmm.pyLMM import chunks - from gn2 import callbacks + from gn2 import uses except ImportError: print("WARNING: Standalone version missing the Genenetwork2 environment\n") has_gn2=False - from standalone import callbacks + from standalone import uses pass #np.seterr('raise') @@ -597,7 +597,7 @@ class LMM: # if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) begin = time.time() # Kva,Kve = linalg.eigh(K) - Kva,Kve = kvakve(K,callbacks) + Kva,Kve = kvakve(K,uses) end = time.time() if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve)) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py index 6aefb9d3..5b93ae0d 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py @@ -24,14 +24,14 @@ from scipy import optimize from optmatrix import matrixMult import kinship -# A trick to decide on the environment: +# ---- A trick to decide on the environment: try: from wqflask.my_pylmm.pyLMM import chunks - from gn2 import callbacks + from gn2 import uses except ImportError: print("WARNING: Standalone version missing the Genenetwork2 environment\n") has_gn2=False - from standalone import callbacks + from standalone import uses pass def calculateKinship(W,center=False): @@ -194,7 +194,7 @@ class LMM2: # if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) begin = time.time() # Kva,Kve = linalg.eigh(K) - Kva,Kve = kinship.kvakve(K,callbacks) + Kva,Kve = kinship.kvakve(K,uses) end = time.time() if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve)) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py index bbee3cd7..705da21f 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py @@ -44,6 +44,12 @@ def callbacks(): progress = progress, mprint = mprint ) + +def uses(*funcs): + """ + Some sugar + """ + return [callbacks()[func] for func in funcs] # ----- Minor test cases: @@ -69,3 +75,6 @@ if __name__ == '__main__': [5,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], [6,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]] mprint("matrix",matrix) + ix,dx = uses("info","debug") + ix("ix") + dx("dx") -- cgit v1.2.3 From 7f937ef3265f007c25ec2c386bc399a708bcdd5e Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 18 Mar 2015 11:46:06 +0300 Subject: Introduce sugar for callbacks --- wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 26 +++++++++++++-- wqflask/wqflask/my_pylmm/pyLMM/kinship.py | 49 +++++++++------------------- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 2 +- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 3 +- wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 14 ++++---- 5 files changed, 50 insertions(+), 44 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py index 4702c670..c71b9f22 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py @@ -37,11 +37,17 @@ def callbacks(): progress = progress, mprint = mprint ) - + +def uses(*funcs): + """ + Some sugar + """ + return [callbacks()[func] for func in funcs] + # ----- Minor test cases: if __name__ == '__main__': - logging.basicConfig(level=logging.DEBUG) + # logging.basicConfig(level=logging.DEBUG) logging.debug("Test %i" % (1)) d = callbacks()['debug'] d("TEST") @@ -49,3 +55,19 @@ if __name__ == '__main__': wrln("Hello %i" % 34) progress = callbacks()['progress'] progress("I am half way",50,100) + list = [0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15] + mprint("list",list) + matrix = [[1,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [2,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [3,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [4,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [5,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [6,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]] + mprint("matrix",matrix) + ix,dx = uses("info","debug") + ix("ix") + dx("dx") diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py index d3792570..62f7be47 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py @@ -74,46 +74,39 @@ def f_init(q): # Calculate the kinship matrix from G (SNPs as rows!), returns K # -def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True): - numThreads = None - if numThreads: - numThreads = int(numThreads) +def kinship(G,uses,computeSize=1000,numThreads=None,useBLAS=False): + progress,debug,info,mprint = uses('progress','debug','info','mprint') + matrix_initialize(useBLAS) - - sys.stderr.write(str(G.shape)+"\n") + + mprint("G",G) n = G.shape[1] # inds inds = n m = G.shape[0] # snps snps = m - sys.stderr.write(str(m)+" SNPs\n") + info("%i SNPs" % (m)) assert snps>inds, "snps should be larger than inds (%i snps, %i inds)" % (snps,inds) q = mp.Queue() p = mp.Pool(numThreads, f_init, [q]) cpu_num = mp.cpu_count() - print "CPU cores:",cpu_num - print snps,computeSize + info("CPU cores: %i" % cpu_num) iterations = snps/computeSize+1 - # if testing: - # iterations = 8 - # jobs = range(0,8) # range(0,iterations) results = [] - K = np.zeros((n,n)) # The Kinship matrix has dimension individuals x individuals completed = 0 for job in range(iterations): - if verbose: - sys.stderr.write("Processing job %d first %d SNPs\n" % (job, ((job+1)*computeSize))) + info("Processing job %d first %d SNPs" % (job, ((job+1)*computeSize))) W = compute_W(job,G,n,snps,computeSize) if numThreads == 1: # Single-core compute_matrixMult(job,W,q) j,x = q.get() - if verbose: sys.stderr.write("Job "+str(j)+" finished\n") + debug("Job "+str(j)+" finished") + progress("kinship",j,iterations) K_j = x - # print j,K_j[:,0] K = K + K_j else: # Multi-core @@ -123,39 +116,27 @@ def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True): time.sleep(0.1) try: j,x = q.get_nowait() - if verbose: sys.stderr.write("Job "+str(j)+" finished\n") + debug("Job "+str(j)+" finished") K_j = x - # print j,K_j[:,0] K = K + K_j completed += 1 + progress("kinship",completed,iterations) except Queue.Empty: pass if numThreads == None or numThreads > 1: - # results contains the growing result set for job in range(len(results)-completed): j,x = q.get(True,15) - if verbose: sys.stderr.write("Job "+str(j)+" finished\n") + debug("Job "+str(j)+" finished") K_j = x - # print j,K_j[:,0] K = K + K_j completed += 1 + progress("kinship",completed,iterations) K = K / float(snps) - - # outFile = 'runtest.kin' - # if verbose: sys.stderr.write("Saving Kinship file to %s\n" % outFile) - # np.savetxt(outFile,K) - - # if saveKvaKve: - # if verbose: sys.stderr.write("Obtaining Eigendecomposition\n") - # Kva,Kve = linalg.eigh(K) - # if verbose: sys.stderr.write("Saving eigendecomposition to %s.[kva | kve]\n" % outFile) - # np.savetxt(outFile+".kva",Kva) - # np.savetxt(outFile+".kve",Kve) return K -def kvakve(K, uses): +def kvakve(K,uses): """ Obtain eigendecomposition for K and return Kva,Kve where Kva is cleaned of small values < 1e-6 (notably smaller than zero) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 2076bc84..5182e73c 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -395,7 +395,7 @@ def calculate_kinship_new(genotype_matrix, temp_data=None): print("call genotype.normalize") G = np.apply_along_axis( genotype.normalize, axis=0, arr=genotype_matrix) print("call calculate_kinship_new") - return kinship(G.T),G # G gets transposed, we'll turn this into an iterator (FIXME) + return kinship(G.T,uses),G # G gets transposed, we'll turn this into an iterator (FIXME) def calculate_kinship_old(genotype_matrix, temp_data=None): """ diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 324c4f2c..e3e8659c 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -25,6 +25,7 @@ from lmm import gn2_load_redis, calculate_kinship_old from kinship import kinship, kinship_full import genotype import phenotype +from standalone import uses usage = """ python runlmm.py [options] command @@ -193,7 +194,7 @@ elif cmd == 'kinship': k2 = round(K2[0][0],4) print "Genotype",G.shape, "\n", G - K3 = kinship(G.T) + K3 = kinship(G.T,uses) print "third Kinship method",K3.shape,"\n",K3 sys.stderr.write(options.geno+"\n") k3 = round(K3[0][0],4) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py index 705da21f..538007f1 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py @@ -12,11 +12,13 @@ import numpy as np import sys import logging +# logger = logging.getLogger(__name__) +logger = logging.getLogger('lmm2') logging.basicConfig(level=logging.DEBUG) np.set_printoptions(precision=3,suppress=True) def progress(location, count, total): - logging.info("Progress: %s %d%%" % (location,round(count*100.0/total))) + logger.info("Progress: %s %d%%" % (location,round(count*100.0/total))) def mprint(msg,data): """ @@ -36,11 +38,11 @@ def callbacks(): return dict( write = sys.stdout.write, writeln = print, - debug = logging.debug, - info = logging.info, - warning = logging.warning, - error = logging.error, - critical = logging.critical, + debug = logger.debug, + info = logger.info, + warning = logger.warning, + error = logger.error, + critical = logger.critical, progress = progress, mprint = mprint ) -- cgit v1.2.3 From 204805157912aebb92967241850453f07729e2f6 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 18 Mar 2015 12:00:01 +0300 Subject: Warning to stderr --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 2 +- wqflask/wqflask/my_pylmm/pyLMM/lmm2.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 5182e73c..66c952aa 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -59,9 +59,9 @@ try: from wqflask.my_pylmm.pyLMM import chunks from gn2 import uses except ImportError: - print("WARNING: Standalone version missing the Genenetwork2 environment\n") has_gn2=False from standalone import uses + sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n") pass #np.seterr('raise') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py index 5b93ae0d..aa6b473d 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py @@ -29,7 +29,7 @@ try: from wqflask.my_pylmm.pyLMM import chunks from gn2 import uses except ImportError: - print("WARNING: Standalone version missing the Genenetwork2 environment\n") + sys.stderr.write("WARNING: LMM2 standalone version missing the Genenetwork2 environment\n") has_gn2=False from standalone import uses pass -- cgit v1.2.3 From f1056b9f4128fb91fbaf738914395697aa485b2e Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 18 Mar 2015 12:09:21 +0300 Subject: Warning to stderr --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 6 ++++-- wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 5 +++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 66c952aa..95272818 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -57,10 +57,11 @@ import gwas # ---- A trick to decide on the environment: try: from wqflask.my_pylmm.pyLMM import chunks - from gn2 import uses + from gn2 import uses, set_progress_storage except ImportError: has_gn2=False - from standalone import uses + import standalone as handlers + from standalone import uses, set_progress_storage sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n") pass @@ -816,6 +817,7 @@ def gn2_redis(key,species,new_code=True): params = json.loads(json_params) tempdata = temp_data.TempData(params['temp_uuid']) + set_progress_storage(tempdata) print('kinship', np.array(params['kinship_matrix'])) print('pheno', np.array(params['pheno_vector'])) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py index 538007f1..e20d4092 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py @@ -17,7 +17,12 @@ logger = logging.getLogger('lmm2') logging.basicConfig(level=logging.DEBUG) np.set_printoptions(precision=3,suppress=True) +def set_progress_storage(location): + global storage + storage = location + def progress(location, count, total): + storage['percentage'] = round(count*100.0)/total) logger.info("Progress: %s %d%%" % (location,round(count*100.0/total))) def mprint(msg,data): -- cgit v1.2.3 From 6b8321d77e915dc5aec0c272c1cb84c2af3e6191 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 18 Mar 2015 12:17:59 +0300 Subject: Replace progress meter --- wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 7 ++++++- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 17 ++++++----------- wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py index c71b9f22..f8033ac5 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py @@ -12,11 +12,16 @@ import logging # logging.basicConfig(level=logging.DEBUG) # np.set_printoptions() +def set_progress_storage(location): + global storage + storage = location + def progress(location, count, total): """ Progress update """ - logging.info("Progress: %s %d%%" % (location,round(count*100.0/total))) + storage.store("percent_complete",round(count*100.0)/total) + logger.info("Progress: %s %d%%" % (location,round(count*100.0/total))) def mprint(msg,data): """ diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 95272818..eab7d91d 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -65,6 +65,8 @@ except ImportError: sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n") pass +progress,info = uses('progress','info') + #np.seterr('raise') #def run_human(pheno_vector, @@ -171,10 +173,7 @@ def run_human(pheno_vector, #if count > 1000: # break count += 1 - - percent_complete = (float(count) / total_snps) * 100 - #print("percent_complete: ", percent_complete) - tempdata.store("percent_complete", percent_complete) + progress("human",count,total_snps) #with Bench("actual association"): ps, ts = human_association(snp, @@ -431,10 +430,7 @@ def calculate_kinship_old(genotype_matrix, temp_data=None): continue keep.append(counter) genotype_matrix[:,counter] = (genotype_matrix[:,counter] - values_mean) / np.sqrt(vr) - - percent_complete = int(round((counter/m)*45)) - if temp_data != None: - temp_data.store("percent_complete", percent_complete) + progress('kinship_old',counter,m) genotype_matrix = genotype_matrix[:,keep] print("After kinship (old) genotype_matrix: ", pf(genotype_matrix)) @@ -539,9 +535,8 @@ def GWAS(pheno_vector, lmm_ob.fit(X=x) ts, ps, beta, betaVar = lmm_ob.association(x, REML=restricted_max_likelihood) - percent_complete = 45 + int(round((counter/m)*55)) - temp_data.store("percent_complete", percent_complete) - + progress("gwas_old",counter,m) + p_values.append(ps) t_statistics.append(ts) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py index e20d4092..b3d480c3 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py @@ -22,7 +22,7 @@ def set_progress_storage(location): storage = location def progress(location, count, total): - storage['percentage'] = round(count*100.0)/total) + storage.store("percent_complete",round(count*100.0)/total) logger.info("Progress: %s %d%%" % (location,round(count*100.0/total))) def mprint(msg,data): -- cgit v1.2.3 From de84be30502af4be014fa4c0a2e7b54e51cff6f6 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 18 Mar 2015 12:36:03 +0300 Subject: Progress handler --- wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 19 ++++++++++++++----- wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 15 +++++++++++++-- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py index f8033ac5..b487ea25 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py @@ -12,17 +12,26 @@ import logging # logging.basicConfig(level=logging.DEBUG) # np.set_printoptions() +last_location = None +last_progress = 0 + def set_progress_storage(location): global storage storage = location def progress(location, count, total): - """ - Progress update - """ - storage.store("percent_complete",round(count*100.0)/total) - logger.info("Progress: %s %d%%" % (location,round(count*100.0/total))) + global last_location + global last_progress + + perc = round(count*100.0/total) + # print(last_progress,";",perc) + if perc != last_progress and (location != last_location or perc > 98 or perc > last_progress + 5): + storage.store("percent_complete",perc) + logger.info("Progress: %s %d%%" % (location,perc)) + last_location = location + last_progress = perc + def mprint(msg,data): """ Array/matrix print function diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py index b3d480c3..7cc3e871 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py @@ -17,13 +17,24 @@ logger = logging.getLogger('lmm2') logging.basicConfig(level=logging.DEBUG) np.set_printoptions(precision=3,suppress=True) +last_location = None +last_progress = 0 + def set_progress_storage(location): global storage storage = location def progress(location, count, total): - storage.store("percent_complete",round(count*100.0)/total) - logger.info("Progress: %s %d%%" % (location,round(count*100.0/total))) + global last_location + global last_progress + + perc = round(count*100.0/total) + # print(last_progress,";",perc) + if perc != last_progress and (location != last_location or perc > 98 or perc > last_progress + 5): + storage.store("percent_complete",perc) + logger.info("Progress: %s %d%%" % (location,perc)) + last_location = location + last_progress = perc def mprint(msg,data): """ -- cgit v1.2.3 From f0653da318cac9736777495e40de6853227904ec Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 18 Mar 2015 13:21:12 +0300 Subject: Cleaned up gwas.py to use uses and moved Redis call back into lmm.py --- wqflask/wqflask/my_pylmm/pyLMM/gwas.py | 70 +++++++++++----------------- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 10 ++-- wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 31 +++++++----- 3 files changed, 52 insertions(+), 59 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py index b901c0e2..8b344a90 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py @@ -19,7 +19,6 @@ import pdb import time -import sys # from utility import temp_data import lmm2 @@ -36,12 +35,10 @@ def formatResult(id,beta,betaSD,ts,ps): return "\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n" def compute_snp(j,n,snp_ids,lmm2,REML,q = None): - # print("COMPUTE SNP",j,snp_ids,"\n") result = [] for snp_id in snp_ids: snp,id = snp_id x = snp.reshape((n,1)) # all the SNPs - # print "X=",x # if refit: # L.fit(X=snp,REML=REML) ts,ps,beta,betaVar = lmm2.association(x,REML=REML,returnBeta=True) @@ -51,32 +48,28 @@ def compute_snp(j,n,snp_ids,lmm2,REML,q = None): q = compute_snp.q q.put([j,result]) return j - # PS.append(ps) - # TS.append(ts) - # return len(result) - # compute.q.put(result) - # return None def f_init(q): compute_snp.q = q -def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): +def gwas(Y,G,K,uses,restricted_max_likelihood=True,refit=False,verbose=True): """ - Execute a GWAS. The G matrix should be n inds (cols) x m snps (rows) + GWAS. The G matrix should be n inds (cols) x m snps (rows) """ + progress,debug,info,mprint = uses('progress','debug','info','mprint') + matrix_initialize() cpu_num = mp.cpu_count() numThreads = None # for now use all available threads kfile2 = False reml = restricted_max_likelihood - sys.stderr.write(str(G.shape)+"\n") + mprint("G",G) n = G.shape[1] # inds inds = n m = G.shape[0] # snps snps = m - sys.stderr.write(str(m)+" SNPs\n") - # print "***** GWAS: G",G.shape,G + info("%s SNPs",snps) assert snps>inds, "snps should be larger than inds (snps=%d,inds=%d)" % (snps,inds) # CREATE LMM object for association @@ -85,19 +78,10 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): lmm2 = LMM2(Y,K) # ,Kva,Kve,X0,verbose=verbose) if not refit: - if verbose: sys.stderr.write("Computing fit for null model\n") + info("Computing fit for null model") lmm2.fit() # follow GN model in run_other - if verbose: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (lmm2.optH,lmm2.optSigma)) - - # outFile = "test.out" - # out = open(outFile,'w') - out = sys.stderr - - def outputResult(id,beta,betaSD,ts,ps): - out.write(formatResult(id,beta,betaSD,ts,ps)) - def printOutHead(): out.write("\t".join(["SNP_ID","BETA","BETA_SD","F_STAT","P_VALUE"]) + "\n") - - # printOutHead() + info("heritability=%0.3f, sigma=%0.3f" % (lmm2.optH,lmm2.optSigma)) + res = [] # Set up the pool @@ -106,26 +90,24 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): p = mp.Pool(numThreads, f_init, [q]) collect = [] - # Buffers for pvalues and t-stats - # PS = [] - # TS = [] count = 0 job = 0 jobs_running = 0 + jobs_completed = 0 for snp in G: snp_id = (snp,'SNPID') count += 1 if count % 1000 == 0: job += 1 - if verbose: - sys.stderr.write("Job %d At SNP %d\n" % (job,count)) + debug("Job %d At SNP %d" % (job,count)) if numThreads == 1: - print "Running on 1 THREAD" + debug("Running on 1 THREAD") compute_snp(job,n,collect,lmm2,reml,q) collect = [] j,lst = q.get() - if verbose: - sys.stderr.write("Job "+str(j)+" finished\n") + debug("Job "+str(j)+" finished") + jobs_completed += 1 + progress("GWAS2",jobs_completed,snps/1000) res.append((j,lst)) else: p.apply_async(compute_snp,(job,n,collect,lmm2,reml)) @@ -134,8 +116,9 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): while jobs_running > cpu_num: try: j,lst = q.get_nowait() - if verbose: - sys.stderr.write("Job "+str(j)+" finished\n") + debug("Job "+str(j)+" finished") + jobs_completed += 1 + progress("GWAS2",jobs_completed,snps/1000) res.append((j,lst)) jobs_running -= 1 except Queue.Empty: @@ -150,24 +133,23 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): if numThreads==1 or count<1000 or len(collect)>0: job += 1 - print "Collect final batch size %i job %i @%i: " % (len(collect), job, count) + debug("Collect final batch size %i job %i @%i: " % (len(collect), job, count)) compute_snp(job,n,collect,lmm2,reml,q) collect = [] j,lst = q.get() res.append((j,lst)) - print "count=",count," running=",jobs_running," collect=",len(collect) + debug("count=%i running=%i collect=%i" % (count,jobs_running,len(collect))) for job in range(jobs_running): j,lst = q.get(True,15) # time out - if verbose: - sys.stderr.write("Job "+str(j)+" finished\n") + debug("Job "+str(j)+" finished") + jobs_completed += 1 + progress("GWAS2",jobs_completed,snps/1000) res.append((j,lst)) - print "Before sort",[res1[0] for res1 in res] + mprint("Before sort",[res1[0] for res1 in res]) res = sorted(res,key=lambda x: x[0]) - # if verbose: - # print "res=",res[0][0:10] - print "After sort",[res1[0] for res1 in res] - print [len(res1[1]) for res1 in res] + mprint("After sort",[res1[0] for res1 in res]) + info([len(res1[1]) for res1 in res]) ts = [item[0] for j,res1 in res for item in res1] ps = [item[1] for j,res1 in res for item in res1] return ts,ps diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index eab7d91d..1e00002a 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -57,11 +57,11 @@ import gwas # ---- A trick to decide on the environment: try: from wqflask.my_pylmm.pyLMM import chunks - from gn2 import uses, set_progress_storage + from gn2 import uses, progress_set_func except ImportError: has_gn2=False import standalone as handlers - from standalone import uses, set_progress_storage + from standalone import uses, progress_set_func sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n") pass @@ -348,6 +348,7 @@ def run_other_new(pheno_vector, t_stats, p_values = gwas.gwas(Y, G.T, K, + uses, restricted_max_likelihood=True, refit=False,verbose=True) Bench().report() @@ -812,7 +813,10 @@ def gn2_redis(key,species,new_code=True): params = json.loads(json_params) tempdata = temp_data.TempData(params['temp_uuid']) - set_progress_storage(tempdata) + def update_tempdata(loc,i,total): + tempdata.store("percent_complete",round(i*100.0/total)) + debug("Updating REDIS percent_complete=%d" % (round(i*100.0/total))) + progress_set_func(update_tempdata) print('kinship', np.array(params['kinship_matrix'])) print('pheno', np.array(params['pheno_vector'])) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py index 7cc3e871..36bf8fd5 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py @@ -17,24 +17,31 @@ logger = logging.getLogger('lmm2') logging.basicConfig(level=logging.DEBUG) np.set_printoptions(precision=3,suppress=True) -last_location = None -last_progress = 0 +progress_location = None +progress_current = None +progress_prev_perc = None -def set_progress_storage(location): - global storage - storage = location +def progress_default_func(location,count,total): + global progress_current + value = round(count*100.0/total) + progress_current = value + +progress_func = progress_default_func + +def progress_set_func(func): + global progress_func + progress_func = func def progress(location, count, total): - global last_location - global last_progress + global progress_location + global progress_prev_perc perc = round(count*100.0/total) - # print(last_progress,";",perc) - if perc != last_progress and (location != last_location or perc > 98 or perc > last_progress + 5): - storage.store("percent_complete",perc) + if perc != progress_prev_perc and (location != progress_location or perc > 98 or perc > progress_prev_perc + 5): + progress_func(location, count, total) logger.info("Progress: %s %d%%" % (location,perc)) - last_location = location - last_progress = perc + progress_location = location + progress_prev_perc = perc def mprint(msg,data): """ -- cgit v1.2.3 From 9b8a958494364fc6470cfe93f90d179e0bc7a787 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 18 Mar 2015 13:23:06 +0300 Subject: Aligned gn2 handlers with standalone --- wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py index b487ea25..f30cf1e6 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py @@ -12,25 +12,31 @@ import logging # logging.basicConfig(level=logging.DEBUG) # np.set_printoptions() -last_location = None -last_progress = 0 +progress_location = None +progress_current = None +progress_prev_perc = None -def set_progress_storage(location): - global storage - storage = location +def progress_default_func(location,count,total): + global progress_current + value = round(count*100.0/total) + progress_current = value + +progress_func = progress_default_func + +def progress_set_func(func): + global progress_func + progress_func = func def progress(location, count, total): - global last_location - global last_progress + global progress_location + global progress_prev_perc perc = round(count*100.0/total) - # print(last_progress,";",perc) - if perc != last_progress and (location != last_location or perc > 98 or perc > last_progress + 5): - storage.store("percent_complete",perc) + if perc != progress_prev_perc and (location != progress_location or perc > 98 or perc > progress_prev_perc + 5): + progress_func(location, count, total) logger.info("Progress: %s %d%%" % (location,perc)) - last_location = location - last_progress = perc - + progress_location = location + progress_prev_perc = perc def mprint(msg,data): """ -- cgit v1.2.3 From 130afd633fc50cbccaf2d12e5e643eb5f8b98c6f Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 18 Mar 2015 13:32:21 +0300 Subject: Add uses debug --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 1e00002a..e0fc8305 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -65,7 +65,7 @@ except ImportError: sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n") pass -progress,info = uses('progress','info') +progress,debug,info = uses('progress','debug','info') #np.seterr('raise') -- cgit v1.2.3 From 803c3c56c37e448fd52fa102fdb6eef8431154cc Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 18 Mar 2015 13:36:35 +0300 Subject: Tagging 0.50-gn2-pre2 --- wqflask/wqflask/my_pylmm/README.md | 35 +++++++++++++++++------------- wqflask/wqflask/my_pylmm/pyLMM/__init__.py | 2 +- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/README.md b/wqflask/wqflask/my_pylmm/README.md index f6b0e72d..a84b5be2 100644 --- a/wqflask/wqflask/my_pylmm/README.md +++ b/wqflask/wqflask/my_pylmm/README.md @@ -1,21 +1,26 @@ -# RELEASE NOTES +# Genenetwork2/pylmm RELEASE NOTES -## 0.50-gn2-pre1 release +## 0.50-gn2-pre2 -This is the first test release of multi-core pylmm into GN2. Both -kinship calculation and GWAS have been made multi-threaded by -introducing the Python multiprocessing module. Note that only -run_other has been updated to use the new routines (so human is still -handled the old way). I have taken care that we can still run both -old-style and new-style LMM (through passing the 'new_code' -boolean). This could be an option in the web server for users to -select and test for any unexpected differences (of which there should -be none, naturally ;). +- Added abstractions for progress meter and info/debug statements; + Redis perc_complete is now updated through a lambda -The current version can handle missing phenotypes, but as they are -removed there is no way for GN2 to know what SNPs the P-values belong -to. A future version will pass a SNP index to allow for missing -phenotypes. +## 0.50-gn2-pre1 (release) + +- This is the first test release of multi-core pylmm into GN2. Both + kinship calculation and GWAS have been made multi-threaded by + introducing the Python multiprocessing module. Note that only + run_other has been updated to use the new routines (so human is + still handled the old way). I have taken care that we can still run + both old-style and new-style LMM (through passing the 'new_code' + boolean). This could be an option in the web server for users to + select and test for any unexpected differences (of which there + should be none, naturally ;). + +- The current version can handle missing phenotypes, but as they are + removed there is no way for GN2 to know what SNPs the P-values + belong to. A future version will pass a SNP index to allow for + missing phenotypes. \ No newline at end of file diff --git a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py index c40c3221..6ab60d02 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py @@ -1 +1 @@ -PYLMM_VERSION="0.50-gn2-pre1" +PYLMM_VERSION="0.50-gn2-pre2" -- cgit v1.2.3 From 8e9d7cde41800766fec835ca0c4a55c6327e05c8 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 20 Mar 2015 11:47:10 +0300 Subject: Trying to get kinship_old back in lmm1 --- wqflask/wqflask/my_pylmm/pyLMM/kinship.py | 14 +++++++----- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 35 ++++++++++++++--------------- wqflask/wqflask/my_pylmm/pyLMM/phenotype.py | 2 +- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 4 ++-- 4 files changed, 29 insertions(+), 26 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py index 62f7be47..be12417e 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py @@ -28,17 +28,21 @@ import time from optmatrix import matrix_initialize, matrixMultT -def kinship_full(G): +def kinship_full(G,uses): """ Calculate the Kinship matrix using a full dot multiplication """ - print G.shape + info,mprint = uses('info','mprint') + + # mprint("kinship_full G",G) m = G.shape[0] # snps n = G.shape[1] # inds - sys.stderr.write(str(m)+" SNPs\n") - assert m>n, "n should be larger than m (snps>inds)" - m = np.dot(G.T,G) + info("%d SNPs",m) + assert m>n, "n should be larger than m (%d snps > %d inds)" % (m,n) + # m = np.dot(G.T,G) + m = matrixMultT(G.T) m = m/G.shape[0] + # mprint("kinship_full K",m) return m def compute_W(job,G,n,snps,compute_size): diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index e0fc8305..c040e3c2 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -65,7 +65,7 @@ except ImportError: sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n") pass -progress,debug,info = uses('progress','debug','info') +progress,mprint,debug,info = uses('progress','mprint','debug','info') #np.seterr('raise') @@ -277,7 +277,7 @@ def run_other_old(pheno_vector, print("Running the original LMM engine in run_other (old)") print("REML=",restricted_max_likelihood," REFIT=",refit) with Bench("Calculate Kinship"): - kinship_matrix,genotype_matrix = calculate_kinship(genotype_matrix, tempdata) + kinship_matrix,genotype_matrix = calculate_kinship_old(genotype_matrix, tempdata) print("kinship_matrix: ", pf(kinship_matrix)) print("kinship_matrix.shape: ", pf(kinship_matrix.shape)) @@ -331,7 +331,7 @@ def run_other_new(pheno_vector, # G = np.apply_along_axis( genotype.normalize, axis=1, arr=G) with Bench("Calculate Kinship"): - K,G = calculate_kinship(G, tempdata) + K,G = calculate_kinship_new(G, tempdata) print("kinship_matrix: ", pf(K)) print("kinship_matrix.shape: ", pf(K.shape)) @@ -393,9 +393,9 @@ def calculate_kinship_new(genotype_matrix, temp_data=None): Call the new kinship calculation where genotype_matrix contains inds (columns) by snps (rows). """ - print("call genotype.normalize") + info("call genotype.normalize") G = np.apply_along_axis( genotype.normalize, axis=0, arr=genotype_matrix) - print("call calculate_kinship_new") + info("call calculate_kinship_new") return kinship(G.T,uses),G # G gets transposed, we'll turn this into an iterator (FIXME) def calculate_kinship_old(genotype_matrix, temp_data=None): @@ -406,11 +406,11 @@ def calculate_kinship_old(genotype_matrix, temp_data=None): normalizes the resulting vectors and returns the RRM matrix. """ - print("call calculate_kinship_old") + info("call calculate_kinship_old") n = genotype_matrix.shape[0] m = genotype_matrix.shape[1] - print("genotype 2D matrix n (inds) is:", n) - print("genotype 2D matrix m (snps) is:", m) + info("genotype 2D matrix n (inds) is: %d" % (n)) + info("genotype 2D matrix m (snps) is: %d" % (m)) assert m>n, "n should be larger than m (snps>inds)" keep = [] for counter in range(m): @@ -431,14 +431,13 @@ def calculate_kinship_old(genotype_matrix, temp_data=None): continue keep.append(counter) genotype_matrix[:,counter] = (genotype_matrix[:,counter] - values_mean) / np.sqrt(vr) - progress('kinship_old',counter,m) + progress('kinship_old normalize genotype',counter,m) genotype_matrix = genotype_matrix[:,keep] - print("After kinship (old) genotype_matrix: ", pf(genotype_matrix)) - kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m) - return kinship_matrix,genotype_matrix - -calculate_kinship = calculate_kinship_new # alias + mprint("After kinship (old) genotype_matrix: ", genotype_matrix) + # kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m) + # return kinship_matrix,genotype_matrix + return kinship_full(genotype_matrix.T,uses),genotype_matrix def GWAS(pheno_vector, genotype_matrix, @@ -464,9 +463,9 @@ def GWAS(pheno_vector, refit - refit the variance component for each SNP """ - if kinship_eigen_vals == None: + if kinship_eigen_vals is None: kinship_eigen_vals = [] - if kinship_eigen_vectors == None: + if kinship_eigen_vectors is None: kinship_eigen_vectors = [] n = genotype_matrix.shape[0] @@ -570,7 +569,7 @@ class LMM: When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect. """ - if X0 == None: X0 = np.ones(len(Y)).reshape(len(Y),1) + if X0 is None: X0 = np.ones(len(Y)).reshape(len(Y),1) self.verbose = verbose #x = Y != -9 @@ -663,7 +662,7 @@ class LMM: REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True. """ - if X == None: + if X is None: X = self.X0t elif stack: self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] diff --git a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py index 682ba371..4c8175f7 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py @@ -24,7 +24,7 @@ def remove_missing(y,g,verbose=False): Remove missing data from matrices, make sure the genotype data has individuals as rows """ - assert(y!=None) + assert(y is not None) assert(y.shape[0] == g.shape[0]) y1 = y diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index e3e8659c..6a38da56 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -134,7 +134,7 @@ elif cmd == 'redis': # Emulating the redis setup of GN2 G = g print "Original G",G.shape, "\n", G - if y != None and options.remove_missing_phenotypes: + if y is not None and options.remove_missing_phenotypes: gnt = np.array(g).T Y,g,keep = phenotype.remove_missing(y,g.T,options.verbose) G = g.T @@ -165,7 +165,7 @@ elif cmd == 'redis': assert p1==0.0897, "p1=%f" % p1 assert p2==0.0405, "p2=%f" % p2 if options.geno == 'data/test8000.geno': - assert round(sum(ps)) == 4070 + assert int(sum(ps)) == 4070 assert len(ps) == 8000 elif cmd == 'kinship': G = g -- cgit v1.2.3 From 38594c7781b587a24be14b9631a73662ee3fdc2b Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 20 Mar 2015 12:18:03 +0300 Subject: Fall back on calculate_kinship_new again --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 2 +- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index c040e3c2..a649029c 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -277,7 +277,7 @@ def run_other_old(pheno_vector, print("Running the original LMM engine in run_other (old)") print("REML=",restricted_max_likelihood," REFIT=",refit) with Bench("Calculate Kinship"): - kinship_matrix,genotype_matrix = calculate_kinship_old(genotype_matrix, tempdata) + kinship_matrix,genotype_matrix = calculate_kinship_new(genotype_matrix, tempdata) print("kinship_matrix: ", pf(kinship_matrix)) print("kinship_matrix.shape: ", pf(kinship_matrix.shape)) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 6a38da56..88e2a033 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -184,7 +184,7 @@ elif cmd == 'kinship': gnt = None if options.test_kinship: - K = kinship_full(np.copy(G)) + K = kinship_full(np.copy(G),uses) print "Genotype",G.shape, "\n", G print "first Kinship method",K.shape,"\n",K k1 = round(K[0][0],4) -- cgit v1.2.3 From 490e0919b2757f6815a7e6c7f0cb08e55e1cd02e Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 30 Mar 2015 10:32:11 +0200 Subject: Percentage complete: Add method description --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 8844118f..200424ba 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -814,6 +814,9 @@ def gn2_redis(key,species,new_code=True): tempdata = temp_data.TempData(params['temp_uuid']) def update_tempdata(loc,i,total): + """ + This is the single method that updates Redis for percentage complete! + """ tempdata.store("percent_complete",round(i*100.0/total)) debug("Updating REDIS percent_complete=%d" % (round(i*100.0/total))) progress_set_func(update_tempdata) -- cgit v1.2.3 From 6fc112431c0edb0ecae6cd5fa45716c349094a7f Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 30 Mar 2015 11:49:43 +0200 Subject: Use of is vs == when testing None --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 4 ++-- wqflask/wqflask/my_pylmm/pyLMM/lmm2.py | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 200424ba..f0473f99 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -278,7 +278,7 @@ def run_other_old(pheno_vector, print("Running the original LMM engine in run_other (old)") print("REML=",restricted_max_likelihood," REFIT=",refit) with Bench("Calculate Kinship"): - kinship_matrix,genotype_matrix = calculate_kinship_new(genotype_matrix, tempdata) + kinship_matrix,genotype_matrix = calculate_kinship_old(genotype_matrix, tempdata) print("kinship_matrix: ", pf(kinship_matrix)) print("kinship_matrix.shape: ", pf(kinship_matrix.shape)) @@ -880,7 +880,7 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): k = kinship.tolist() params = dict(pheno_vector = pheno.tolist(), genotype_matrix = geno.tolist(), - kinship_matrix= k, + kinship_matrix = k, restricted_max_likelihood = True, refit = False, temp_uuid = "testrun_temp_uuid", diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py index aa6b473d..d67e1205 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py @@ -85,7 +85,7 @@ def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False): print("genotype matrix n is:", n) print("genotype matrix m is:", m) - if X0 == None: + if X0 is None: X0 = np.ones((n,1)) # Remove missing values in Y and adjust associated parameters @@ -173,7 +173,7 @@ class LMM2: When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect. """ - if X0 == None: + if X0 is None: X0 = np.ones(len(Y)).reshape(len(Y),1) self.verbose = verbose @@ -260,7 +260,7 @@ class LMM2: REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True. """ - if X == None: X = self.X0t + if X is None: X = self.X0t elif stack: self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] X = self.X0t_stack @@ -316,7 +316,7 @@ class LMM2: Given this optimum, the function computes the LL and associated ML solutions. """ - if X == None: X = self.X0t + if X is None: X = self.X0t else: #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)]) self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] @@ -340,7 +340,7 @@ class LMM2: def association(self,X,h=None,stack=True,REML=True,returnBeta=False): """ Calculates association statitics for the SNPs encoded in the vector X of size n. - If h == None, the optimal h stored in optH is used. + If h is None, the optimal h stored in optH is used. """ if False: @@ -358,7 +358,7 @@ class LMM2: self.X0t_stack[:,(self.q)] = m X = self.X0t_stack - if h == None: h = self.optH + if h is None: h = self.optH L,beta,sigma,betaVAR = self.LL(h,X,stack=False,REML=REML) q = len(beta) -- cgit v1.2.3 From 8b88be4f48baa6cd0cc3c37a851144d5b1dc24af Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 30 Mar 2015 13:01:22 +0200 Subject: Refactoring genotype normalization --- wqflask/wqflask/my_pylmm/pyLMM/genotype.py | 19 ++++++++++--------- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 9 +++++---- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 2 ++ 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py index 315fd824..49f32e3a 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py @@ -37,14 +37,15 @@ def normalize(ind_g): Run for every SNP list (for one individual) and return normalized SNP genotype values with missing data filled in """ - g1 = np.copy(ind_g) # avoid side effects - x = True - np.isnan(ind_g) # Matrix of True/False - m = ind_g[x].mean() # Global mean value - s = np.sqrt(ind_g[x].var()) # Global stddev - g1[np.isnan(ind_g)] = m # Plug-in mean values for missing data - if s == 0: - g1 = g1 - m # Subtract the mean + g = np.copy(ind_g) # copy to avoid side effects + missing = np.isnan(g) + values = g[True - missing] + mean = values.mean() # Global mean value + stddev = np.sqrt(values.var()) # Global stddev + g[missing] = mean # Plug-in mean values for missing data + if stddev == 0: + g = g - mean # Subtract the mean else: - g1 = (g1 - m) / s # Normalize the deviation - return g1 + g = (g - mean) / stddev # Normalize the deviation + return g diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index f0473f99..035f31e8 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -414,6 +414,7 @@ def calculate_kinship_old(genotype_matrix, temp_data=None): info("genotype 2D matrix m (snps) is: %d" % (m)) assert m>n, "n should be larger than m (snps>inds)" keep = [] + mprint("G (before old normalize)",genotype_matrix) for counter in range(m): #print("type of genotype_matrix[:,counter]:", pf(genotype_matrix[:,counter])) #Checks if any values in column are not numbers @@ -435,10 +436,10 @@ def calculate_kinship_old(genotype_matrix, temp_data=None): progress('kinship_old normalize genotype',counter,m) genotype_matrix = genotype_matrix[:,keep] - mprint("After kinship (old) genotype_matrix: ", genotype_matrix) - # kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m) - # return kinship_matrix,genotype_matrix - return kinship_full(genotype_matrix.T,uses),genotype_matrix + mprint("G (after old normalize)",genotype_matrix.T) + kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m) + return kinship_matrix,genotype_matrix + # return kinship_full(genotype_matrix.T,uses),genotype_matrix def GWAS(pheno_vector, genotype_matrix, diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 88e2a033..fc7a4b9d 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -106,6 +106,8 @@ if options.geno: if cmd == 'redis_new': # The main difference between redis_new and redis is that missing # phenotypes are handled by the first + if options.remove_missing_phenotypes: + raise Exception('Can not use --remove-missing-phenotypes with LMM2') Y = y G = g print "Original G",G.shape, "\n", G -- cgit v1.2.3 From 153317412a090d5b17bc176ff7da2e61e6ec4f2c Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Thu, 2 Apr 2015 09:55:42 +0200 Subject: Make the new version of genotype normalization default --- wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 15 ++++++++++----- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 5 +++-- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 4 ++-- wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 5 +++++ 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py index f30cf1e6..7bceb089 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py @@ -45,15 +45,20 @@ def mprint(msg,data): m = np.array(data) print(msg,m.shape,"=\n",m) +def fatal(msg): + logger.critical(msg) + raise Exception(msg) + def callbacks(): return dict( write = sys.stdout.write, writeln = print, - debug = logging.debug, - info = logging.info, - warning = logging.warning, - error = logging.error, - critical = logging.critical, + debug = logger.debug, + info = logger.info, + warning = logger.warning, + error = logger.error, + critical = logger.critical, + fatal = fatal, progress = progress, mprint = mprint ) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 035f31e8..8be3fc6f 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -66,7 +66,7 @@ except ImportError: sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n") pass -progress,mprint,debug,info = uses('progress','mprint','debug','info') +progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal') #np.seterr('raise') @@ -278,7 +278,7 @@ def run_other_old(pheno_vector, print("Running the original LMM engine in run_other (old)") print("REML=",restricted_max_likelihood," REFIT=",refit) with Bench("Calculate Kinship"): - kinship_matrix,genotype_matrix = calculate_kinship_old(genotype_matrix, tempdata) + kinship_matrix,genotype_matrix = calculate_kinship_new(genotype_matrix, tempdata) print("kinship_matrix: ", pf(kinship_matrix)) print("kinship_matrix.shape: ", pf(kinship_matrix.shape)) @@ -408,6 +408,7 @@ def calculate_kinship_old(genotype_matrix, temp_data=None): """ info("call calculate_kinship_old") + fatal("THE FUNCTION calculate_kinship_old IS OBSOLETE, use calculate_kinship_new instead - see Genotype Normalization Problem on Pjotr's blog") n = genotype_matrix.shape[0] m = genotype_matrix.shape[1] info("genotype 2D matrix n (inds) is: %d" % (n)) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index fc7a4b9d..ef0bdd7e 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -21,7 +21,7 @@ from optparse import OptionParser import sys import tsvreader import numpy as np -from lmm import gn2_load_redis, calculate_kinship_old +from lmm import gn2_load_redis, calculate_kinship_new from kinship import kinship, kinship_full import genotype import phenotype @@ -190,7 +190,7 @@ elif cmd == 'kinship': print "Genotype",G.shape, "\n", G print "first Kinship method",K.shape,"\n",K k1 = round(K[0][0],4) - K2,G = calculate_kinship_old(np.copy(G).T,temp_data=None) + K2,G = calculate_kinship_new(np.copy(G).T,temp_data=None) print "Genotype",G.shape, "\n", G print "GN2 Kinship method",K2.shape,"\n",K2 k2 = round(K2[0][0],4) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py index 36bf8fd5..40b2021d 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py @@ -57,6 +57,10 @@ def mprint(msg,data): m[-2][0:3]," ... ",m[-2][-3:],"\n ", m[-1][0:3]," ... ",m[-1][-3:],"]") +def fatal(msg): + logger.critical(msg) + raise Exception(msg) + def callbacks(): return dict( write = sys.stdout.write, @@ -66,6 +70,7 @@ def callbacks(): warning = logger.warning, error = logger.error, critical = logger.critical, + fatal = fatal, progress = progress, mprint = mprint ) -- cgit v1.2.3 From 0f132d0cc4a77e69ab593fd9c8a2d5218d083ed7 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Thu, 2 Apr 2015 10:15:49 +0200 Subject: Release 0.50-gn2 --- wqflask/wqflask/my_pylmm/README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/README.md b/wqflask/wqflask/my_pylmm/README.md index a84b5be2..4845ec03 100644 --- a/wqflask/wqflask/my_pylmm/README.md +++ b/wqflask/wqflask/my_pylmm/README.md @@ -1,11 +1,15 @@ # Genenetwork2/pylmm RELEASE NOTES -## 0.50-gn2-pre2 +## 0.50-gn2 (April 2nd, 2015) + +- Replaced the GN2 genotype normalization + +## 0.50-gn2-pre2 (March 18, 2015) - Added abstractions for progress meter and info/debug statements; Redis perc_complete is now updated through a lambda -## 0.50-gn2-pre1 (release) +## 0.50-gn2-pre1 (release, March 17, 2015) - This is the first test release of multi-core pylmm into GN2. Both kinship calculation and GWAS have been made multi-threaded by -- cgit v1.2.3 From 43295e57621e9a08ca4cb90e95cc14a87e0d8b5e Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Thu, 2 Apr 2015 12:04:14 +0200 Subject: Create test geno iterator --- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 9 +++++++-- wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py | 25 +++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index ef0bdd7e..5a4bd268 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -99,11 +99,16 @@ if options.pheno: y = tsvreader.pheno(options.pheno) print y.shape -if options.geno: +if options.geno and cmd != 'iterator': g = tsvreader.geno(options.geno) print g.shape -if cmd == 'redis_new': +if cmd == 'iterator': + print "ITERATE over SNPs" + def pretty(snpid,values): + print snpid,values + print tsvreader.geno_iter(options.geno,pretty) +elif cmd == 'redis_new': # The main difference between redis_new and redis is that missing # phenotypes are handled by the first if options.remove_missing_phenotypes: diff --git a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py index b4027fa3..7fe75e3f 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py @@ -74,3 +74,28 @@ def geno(fn): G = np.array(G1) return G +def geno(fn): + G1 = [] + def append(id,values): + G1.append(values) # <--- slow + geno_iter(fn,append) + G = np.array(G1) + return G + +def geno_iter(fn,func): + hab_mapper = {'A':0,'H':1,'B':2,'-':3} + pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ] + + print fn + with open(fn,'r') as tsvin: + assert(tsvin.readline().strip() == "# Genotype format version 1.0") + tsvin.readline() + tsvin.readline() + tsvin.readline() + tsvin.readline() + tsv = csv.reader(tsvin, delimiter='\t') + for row in tsv: + id = row[0] + gs = list(row[1]) + gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs] + func(id,gs2) -- cgit v1.2.3 From 5151bc389aa98415da9f4d49b3c279ed1380ea7d Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Thu, 2 Apr 2015 12:14:43 +0200 Subject: Prepare iterator --- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 5a4bd268..036bf7d5 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -104,10 +104,17 @@ if options.geno and cmd != 'iterator': print g.shape if cmd == 'iterator': - print "ITERATE over SNPs" - def pretty(snpid,values): - print snpid,values - print tsvreader.geno_iter(options.geno,pretty) + def snp_iterator(func): + tsvreader.geno_iter(options.geno,func) + + if options.remove_missing_phenotypes: + raise Exception('Can not use --remove-missing-phenotypes with LMM2') + ps, ts = gn2_iter_redis('testrun_iter','other',k,y,snp_iterator) + print np.array(ps) + print len(ps),sum(ps) + # Test results + p1 = round(ps[0],4) + p2 = round(ps[-1],4) elif cmd == 'redis_new': # The main difference between redis_new and redis is that missing # phenotypes are handled by the first -- cgit v1.2.3 From b9c79ef58ff6ec4da3e65290ea802c783bb17742 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Thu, 2 Apr 2015 13:40:42 +0200 Subject: Passing in an iterator --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 33 ++++++++++++++++++++++++++++- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 6 ++---- wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py | 26 ++++++++++++++++++++--- 3 files changed, 57 insertions(+), 8 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 8be3fc6f..07b55726 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -875,6 +875,9 @@ def gn2_main(): gn2_redis(key,species) def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): + """ + This function emulates current GN2 behaviour by pre-loading Redis + """ print("Loading Redis from parsed data") if kinship == None: k = None @@ -896,7 +899,35 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): Redis.expire(key, 60*60) return gn2_redis(key,species,new_code) - + +def gn2_iter_redis(key,species,kinship,pheno,geno_iterator): + """ + This function emulates GN2 behaviour by pre-loading Redis with + a SNP iterator + """ + print("Loading Redis using a SNP iterator") + if kinship == None: + k = None + else: + k = kinship.tolist() + params = dict(pheno_vector = pheno.tolist(), + genotype_matrix = geno_iterator.tolist(), + kinship_matrix = k, + restricted_max_likelihood = True, + refit = False, + temp_uuid = "testrun_temp_uuid", + + # meta data + timestamp = datetime.datetime.now().isoformat(), + ) + + json_params = json.dumps(params) + Redis.set(key, json_params) + Redis.expire(key, 60*60) + + return gn2_redis(key,species,new_code) + + if __name__ == '__main__': print("WARNING: Calling pylmm from lmm.py will become OBSOLETE, use runlmm.py instead!") if has_gn2: diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 036bf7d5..3b0672b4 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -21,7 +21,7 @@ from optparse import OptionParser import sys import tsvreader import numpy as np -from lmm import gn2_load_redis, calculate_kinship_new +from lmm import gn2_load_redis, gn2_iter_redis, calculate_kinship_new from kinship import kinship, kinship_full import genotype import phenotype @@ -104,11 +104,9 @@ if options.geno and cmd != 'iterator': print g.shape if cmd == 'iterator': - def snp_iterator(func): - tsvreader.geno_iter(options.geno,func) - if options.remove_missing_phenotypes: raise Exception('Can not use --remove-missing-phenotypes with LMM2') + snp_iterator = tsvreader.geno_iter(options.geno) ps, ts = gn2_iter_redis('testrun_iter','other',k,y,snp_iterator) print np.array(ps) print len(ps),sum(ps) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py index 7fe75e3f..27daf43f 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py @@ -76,13 +76,12 @@ def geno(fn): def geno(fn): G1 = [] - def append(id,values): + for id,values in geno_iter(fn): G1.append(values) # <--- slow - geno_iter(fn,append) G = np.array(G1) return G -def geno_iter(fn,func): +def geno_callback(fn,func): hab_mapper = {'A':0,'H':1,'B':2,'-':3} pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ] @@ -99,3 +98,24 @@ def geno_iter(fn,func): gs = list(row[1]) gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs] func(id,gs2) + +def geno_iter(fn): + """ + Yield a tuple of snpid and values + """ + hab_mapper = {'A':0,'H':1,'B':2,'-':3} + pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ] + + print fn + with open(fn,'r') as tsvin: + assert(tsvin.readline().strip() == "# Genotype format version 1.0") + tsvin.readline() + tsvin.readline() + tsvin.readline() + tsvin.readline() + tsv = csv.reader(tsvin, delimiter='\t') + for row in tsv: + id = row[0] + gs = list(row[1]) + gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs] + yield (id,gs2) -- cgit v1.2.3 From 146b4a45c28b7d3ba4bf982cfaf93eda2e71d1ea Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 3 Apr 2015 10:58:53 +0200 Subject: Refactoring GN2 interface --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 07b55726..6e22e6c9 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -805,7 +805,7 @@ class LMM: pl.title(title) -def gn2_redis(key,species,new_code=True): +def gwas_using_redis(key,species,new_code=True): """ Invoke pylmm using Redis as a container. new_code runs the new version @@ -861,18 +861,6 @@ def gn2_redis(key,species,new_code=True): Redis.expire(results_key, 60*60) return ps, ts -# This is the main function used by Genenetwork2 (with environment) -def gn2_main(): - parser = argparse.ArgumentParser(description='Run pyLMM') - parser.add_argument('-k', '--key') - parser.add_argument('-s', '--species') - - opts = parser.parse_args() - - key = opts.key - species = opts.species - - gn2_redis(key,species) def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): """ @@ -898,7 +886,7 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): Redis.set(key, json_params) Redis.expire(key, 60*60) - return gn2_redis(key,species,new_code) + return gwas_using_redis(key,species,new_code) def gn2_iter_redis(key,species,kinship,pheno,geno_iterator): """ @@ -925,7 +913,23 @@ def gn2_iter_redis(key,species,kinship,pheno,geno_iterator): Redis.set(key, json_params) Redis.expire(key, 60*60) - return gn2_redis(key,species,new_code) + return gwas_using_redis(key,species,new_code) + +# This is the main function used by Genenetwork2 (with environment) +# +# Note that this calling route will become OBSOLETE (we should use runlmm.py +# instead) +def gn2_main(): + parser = argparse.ArgumentParser(description='Run pyLMM') + parser.add_argument('-k', '--key') + parser.add_argument('-s', '--species') + + opts = parser.parse_args() + + key = opts.key + species = opts.species + + gwas_using_redis(key,species) if __name__ == '__main__': -- cgit v1.2.3 From fabbcac393627badf0542377fc22325ae7e96f3d Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 3 Apr 2015 11:15:29 +0200 Subject: Passing in an iterator --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 6e22e6c9..b8650938 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -891,15 +891,21 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): def gn2_iter_redis(key,species,kinship,pheno,geno_iterator): """ This function emulates GN2 behaviour by pre-loading Redis with - a SNP iterator + a SNP iterator, for this it sets a key for every genotype (SNP) """ print("Loading Redis using a SNP iterator") + for i,genotypes in enumerate(geno_iterator): + gkey = key+'_geno_'+str(i) + Redis.set(gkey, genotypes) + Redis.expire(gkey, 60*60) + if kinship == None: k = None else: k = kinship.tolist() params = dict(pheno_vector = pheno.tolist(), - genotype_matrix = geno_iterator.tolist(), + genotype_matrix = "iterator", + genotypes = i, kinship_matrix = k, restricted_max_likelihood = True, refit = False, @@ -913,7 +919,7 @@ def gn2_iter_redis(key,species,kinship,pheno,geno_iterator): Redis.set(key, json_params) Redis.expire(key, 60*60) - return gwas_using_redis(key,species,new_code) + return gwas_using_redis(key,species) # This is the main function used by Genenetwork2 (with environment) # -- cgit v1.2.3 From 7d13eec7f67578aa75d8430bb5ed74a4dd825b51 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 3 Apr 2015 12:10:55 +0200 Subject: Refactoring Redis use to one function --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 65 ++++++++++++++++++-------------- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 4 +- 2 files changed, 38 insertions(+), 31 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index b8650938..88ca6a7f 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -805,6 +805,36 @@ class LMM: pl.title(title) +def gwas_without_redis(species,k,y,geno,cov,reml,refit,inputfn,new_code): + """ + Invoke pylmm using a genotype (SNP) iterator + """ + info("gwas_without_redis") + print('pheno', y) + + if species == "human" : + print('kinship', k ) + ps, ts = run_human(pheno_vector = y, + covariate_matrix = cov, + plink_input_file = inputfn, + kinship_matrix = k, + refit = refit, tempdata=tempdata) + else: + print('geno', geno.shape, geno) + + if new_code: + ps, ts = run_other_new(pheno_vector = y, + genotype_matrix = geno, + restricted_max_likelihood = reml, + refit = refit, + tempdata = tempdata) + else: + ps, ts = run_other_old(pheno_vector = y, + genotype_matrix = geno, + restricted_max_likelihood = reml, + refit = refit, + tempdata = tempdata) + def gwas_using_redis(key,species,new_code=True): """ Invoke pylmm using Redis as a container. new_code runs the new @@ -823,33 +853,7 @@ def gwas_using_redis(key,species,new_code=True): debug("Updating REDIS percent_complete=%d" % (round(i*100.0/total))) progress_set_func(update_tempdata) - - print('pheno', np.array(params['pheno_vector'])) - - if species == "human" : - print('kinship', np.array(params['kinship_matrix'])) - ps, ts = run_human(pheno_vector = np.array(params['pheno_vector']), - covariate_matrix = np.array(params['covariate_matrix']), - plink_input_file = params['input_file_name'], - kinship_matrix = np.array(params['kinship_matrix']), - refit = params['refit'], - tempdata = tempdata) - else: - geno = np.array(params['genotype_matrix']) - print('geno', geno.shape, geno) - - if new_code: - ps, ts = run_other_new(pheno_vector = np.array(params['pheno_vector']), - genotype_matrix = geno, - restricted_max_likelihood = params['restricted_max_likelihood'], - refit = params['refit'], - tempdata = tempdata) - else: - ps, ts = run_other_old(pheno_vector = np.array(params['pheno_vector']), - genotype_matrix = geno, - restricted_max_likelihood = params['restricted_max_likelihood'], - refit = params['refit'], - tempdata = tempdata) + ps,ts = gwas_without_redis(species,np.array(params['kinship_matrix']),np.array(params['pheno_vector']),np.array(params['genotype_matrix']),np.array(params['covariate_matrix']),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code) results_key = "pylmm:results:" + params['temp_uuid'] @@ -874,6 +878,8 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): params = dict(pheno_vector = pheno.tolist(), genotype_matrix = geno.tolist(), kinship_matrix = k, + covariate_matrix = None, + input_file_name = None, restricted_max_likelihood = True, refit = False, temp_uuid = "testrun_temp_uuid", @@ -888,7 +894,7 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): return gwas_using_redis(key,species,new_code) -def gn2_iter_redis(key,species,kinship,pheno,geno_iterator): +def gn2_load_redis_iter(key,species,kinship,pheno,geno_iterator): """ This function emulates GN2 behaviour by pre-loading Redis with a SNP iterator, for this it sets a key for every genotype (SNP) @@ -907,6 +913,8 @@ def gn2_iter_redis(key,species,kinship,pheno,geno_iterator): genotype_matrix = "iterator", genotypes = i, kinship_matrix = k, + covariate_matrix = None, + input_file_name = None, restricted_max_likelihood = True, refit = False, temp_uuid = "testrun_temp_uuid", @@ -918,7 +926,6 @@ def gn2_iter_redis(key,species,kinship,pheno,geno_iterator): json_params = json.dumps(params) Redis.set(key, json_params) Redis.expire(key, 60*60) - return gwas_using_redis(key,species) # This is the main function used by Genenetwork2 (with environment) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 3b0672b4..ab698e41 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -21,7 +21,7 @@ from optparse import OptionParser import sys import tsvreader import numpy as np -from lmm import gn2_load_redis, gn2_iter_redis, calculate_kinship_new +from lmm import gn2_load_redis, gn2_load_redis_iter, calculate_kinship_new from kinship import kinship, kinship_full import genotype import phenotype @@ -107,7 +107,7 @@ if cmd == 'iterator': if options.remove_missing_phenotypes: raise Exception('Can not use --remove-missing-phenotypes with LMM2') snp_iterator = tsvreader.geno_iter(options.geno) - ps, ts = gn2_iter_redis('testrun_iter','other',k,y,snp_iterator) + ps, ts = gn2_load_redis_iter('testrun_iter','other',k,y,snp_iterator) print np.array(ps) print len(ps),sum(ps) # Test results -- cgit v1.2.3 From fc6f0ef9fc8d2607e70c775c51ca55f50806cc7a Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 3 Apr 2015 13:13:09 +0200 Subject: temp_data is no longer passed around --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 50 +++++++++++++++----------------- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 2 +- 2 files changed, 24 insertions(+), 28 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 88ca6a7f..9e25f56d 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -81,8 +81,7 @@ def run_human(pheno_vector, covariate_matrix, plink_input_file, kinship_matrix, - refit=False, - tempdata=None): + refit=False): v = np.isnan(pheno_vector) keep = True - v @@ -262,23 +261,19 @@ def human_association(snp, def run_other_old(pheno_vector, genotype_matrix, restricted_max_likelihood=True, - refit=False, - tempdata=None # <---- can not be None - ): + refit=False): """Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics restricted_max_likelihood -- whether to use restricted max likelihood; True or False refit -- whether to refit the variance component for each marker - temp_data -- TempData object that stores the progress for each major step of the - calculations ("calculate_kinship" and "GWAS" take the majority of time) """ print("Running the original LMM engine in run_other (old)") print("REML=",restricted_max_likelihood," REFIT=",refit) with Bench("Calculate Kinship"): - kinship_matrix,genotype_matrix = calculate_kinship_new(genotype_matrix, tempdata) + kinship_matrix,genotype_matrix = calculate_kinship_new(genotype_matrix) print("kinship_matrix: ", pf(kinship_matrix)) print("kinship_matrix.shape: ", pf(kinship_matrix.shape)) @@ -297,24 +292,19 @@ def run_other_old(pheno_vector, genotype_matrix, kinship_matrix, restricted_max_likelihood=True, - refit=False, - temp_data=tempdata) + refit=False) Bench().report() return p_values, t_stats def run_other_new(pheno_vector, genotype_matrix, restricted_max_likelihood=True, - refit=False, - tempdata=None # <---- can not be None - ): + refit=False): """Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics restricted_max_likelihood -- whether to use restricted max likelihood; True or False refit -- whether to refit the variance component for each marker - temp_data -- TempData object that stores the progress for each major step of the - calculations ("calculate_kinship" and "GWAS" take the majority of time) """ @@ -332,7 +322,7 @@ def run_other_new(pheno_vector, # G = np.apply_along_axis( genotype.normalize, axis=1, arr=G) with Bench("Calculate Kinship"): - K,G = calculate_kinship_new(G, tempdata) + K,G = calculate_kinship_new(G) print("kinship_matrix: ", pf(K)) print("kinship_matrix.shape: ", pf(K.shape)) @@ -815,25 +805,24 @@ def gwas_without_redis(species,k,y,geno,cov,reml,refit,inputfn,new_code): if species == "human" : print('kinship', k ) ps, ts = run_human(pheno_vector = y, - covariate_matrix = cov, - plink_input_file = inputfn, - kinship_matrix = k, - refit = refit, tempdata=tempdata) + covariate_matrix = cov, + plink_input_file = inputfn, + kinship_matrix = k, + refit = refit) else: print('geno', geno.shape, geno) if new_code: ps, ts = run_other_new(pheno_vector = y, - genotype_matrix = geno, - restricted_max_likelihood = reml, - refit = refit, - tempdata = tempdata) + genotype_matrix = geno, + restricted_max_likelihood = reml, + refit = refit) else: ps, ts = run_other_old(pheno_vector = y, genotype_matrix = geno, restricted_max_likelihood = reml, - refit = refit, - tempdata = tempdata) + refit = refit) + return ps,ts def gwas_using_redis(key,species,new_code=True): """ @@ -853,7 +842,14 @@ def gwas_using_redis(key,species,new_code=True): debug("Updating REDIS percent_complete=%d" % (round(i*100.0/total))) progress_set_func(update_tempdata) - ps,ts = gwas_without_redis(species,np.array(params['kinship_matrix']),np.array(params['pheno_vector']),np.array(params['genotype_matrix']),np.array(params['covariate_matrix']),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code) + def narray(key): + print(key) + v = params[key] + if v is not None: + v = np.array(v) + return v + + ps,ts = gwas_without_redis(species,narray('kinship_matrix'),narray('pheno_vector'),narray('genotype_matrix'),narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code) results_key = "pylmm:results:" + params['temp_uuid'] diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index ab698e41..3801529e 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -200,7 +200,7 @@ elif cmd == 'kinship': print "Genotype",G.shape, "\n", G print "first Kinship method",K.shape,"\n",K k1 = round(K[0][0],4) - K2,G = calculate_kinship_new(np.copy(G).T,temp_data=None) + K2,G = calculate_kinship_new(np.copy(G).T) print "Genotype",G.shape, "\n", G print "GN2 Kinship method",K2.shape,"\n",K2 k2 = round(K2[0][0],4) -- cgit v1.2.3 From 3c738e6901ecc2ec0b4c1c667f20ebe3dc186f5c Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 3 Apr 2015 13:17:56 +0200 Subject: Rename gwas_using_redis to gwas_with_redis --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 9e25f56d..ad6375e9 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -795,7 +795,7 @@ class LMM: pl.title(title) -def gwas_without_redis(species,k,y,geno,cov,reml,refit,inputfn,new_code): +def run_gwas(species,k,y,geno,cov,reml,refit,inputfn,new_code): """ Invoke pylmm using a genotype (SNP) iterator """ @@ -824,10 +824,10 @@ def gwas_without_redis(species,k,y,geno,cov,reml,refit,inputfn,new_code): refit = refit) return ps,ts -def gwas_using_redis(key,species,new_code=True): +def gwas_with_redis(key,species,new_code=True): """ Invoke pylmm using Redis as a container. new_code runs the new - version + version. All the Redis code goes here! """ json_params = Redis.get(key) @@ -849,7 +849,7 @@ def gwas_using_redis(key,species,new_code=True): v = np.array(v) return v - ps,ts = gwas_without_redis(species,narray('kinship_matrix'),narray('pheno_vector'),narray('genotype_matrix'),narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code) + ps,ts = run_gwas(species,narray('kinship_matrix'),narray('pheno_vector'),narray('genotype_matrix'),narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code) results_key = "pylmm:results:" + params['temp_uuid'] @@ -888,7 +888,7 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): Redis.set(key, json_params) Redis.expire(key, 60*60) - return gwas_using_redis(key,species,new_code) + return gwas_with_redis(key,species,new_code) def gn2_load_redis_iter(key,species,kinship,pheno,geno_iterator): """ @@ -922,7 +922,7 @@ def gn2_load_redis_iter(key,species,kinship,pheno,geno_iterator): json_params = json.dumps(params) Redis.set(key, json_params) Redis.expire(key, 60*60) - return gwas_using_redis(key,species) + return gwas_with_redis(key,species) # This is the main function used by Genenetwork2 (with environment) # @@ -938,7 +938,7 @@ def gn2_main(): key = opts.key species = opts.species - gwas_using_redis(key,species) + gwas_with_redis(key,species) if __name__ == '__main__': -- cgit v1.2.3 From e9865707ef447b8bc23eb8c872703f156936499d Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 3 Apr 2015 14:03:32 +0200 Subject: - Calculate n,m from the start - added test function to runlmm.py to run without Redis (25% faster) --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 17 ++++++++++------- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 17 +++++++++++++++-- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index ad6375e9..e51742c4 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -795,9 +795,9 @@ class LMM: pl.title(title) -def run_gwas(species,k,y,geno,cov,reml,refit,inputfn,new_code): +def run_gwas(species,n,m,k,y,geno,cov=None,reml=True,refit=False,inputfn=None,new_code=True): """ - Invoke pylmm using a genotype (SNP) iterator + Invoke pylmm using genotype as a matrix or as a (SNP) iterator. """ info("gwas_without_redis") print('pheno', y) @@ -848,8 +848,11 @@ def gwas_with_redis(key,species,new_code=True): if v is not None: v = np.array(v) return v - - ps,ts = run_gwas(species,narray('kinship_matrix'),narray('pheno_vector'),narray('genotype_matrix'),narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code) + + y = narray('pheno_vector') + n = len(y) + m = params['num_genotypes'] + ps,ts = run_gwas(species,n,m,narray('kinship_matrix'),y,narray('genotype_matrix'),narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code) results_key = "pylmm:results:" + params['temp_uuid'] @@ -873,6 +876,7 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): k = kinship.tolist() params = dict(pheno_vector = pheno.tolist(), genotype_matrix = geno.tolist(), + num_genotypes = geno.shape[1], kinship_matrix = k, covariate_matrix = None, input_file_name = None, @@ -881,8 +885,7 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): temp_uuid = "testrun_temp_uuid", # meta data - timestamp = datetime.datetime.now().isoformat(), - ) + timestamp = datetime.datetime.now().isoformat()) json_params = json.dumps(params) Redis.set(key, json_params) @@ -907,7 +910,7 @@ def gn2_load_redis_iter(key,species,kinship,pheno,geno_iterator): k = kinship.tolist() params = dict(pheno_vector = pheno.tolist(), genotype_matrix = "iterator", - genotypes = i, + num_genotypes = i, kinship_matrix = k, covariate_matrix = None, input_file_name = None, diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 3801529e..f095bb73 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -21,7 +21,7 @@ from optparse import OptionParser import sys import tsvreader import numpy as np -from lmm import gn2_load_redis, gn2_load_redis_iter, calculate_kinship_new +from lmm import gn2_load_redis, gn2_load_redis_iter, calculate_kinship_new, run_gwas from kinship import kinship, kinship_full import genotype import phenotype @@ -103,7 +103,20 @@ if options.geno and cmd != 'iterator': g = tsvreader.geno(options.geno) print g.shape -if cmd == 'iterator': +if cmd == 'run': + if options.remove_missing_phenotypes: + raise Exception('Can not use --remove-missing-phenotypes with LMM2') + snp_iterator = tsvreader.geno_iter(options.geno) + n = len(y) + m = g.shape[1] + ps, ts = run_gwas('other',n,m,k,y,g.T) + print np.array(ps) + print len(ps),sum(ps) + # Test results + p1 = round(ps[0],4) + p2 = round(ps[-1],4) + +elif cmd == 'iterator': if options.remove_missing_phenotypes: raise Exception('Can not use --remove-missing-phenotypes with LMM2') snp_iterator = tsvreader.geno_iter(options.geno) -- cgit v1.2.3 From 163fe965bc1dcb807124c1c70c965d48bf2c2688 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sat, 4 Apr 2015 09:52:24 +0200 Subject: Consolidate tests now they all agree for redis, redis_new and run --- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 74 +++++++++++++------------------- 1 file changed, 30 insertions(+), 44 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index f095bb73..2d02e195 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -27,6 +27,8 @@ import genotype import phenotype from standalone import uses +progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal') + usage = """ python runlmm.py [options] command @@ -103,6 +105,29 @@ if options.geno and cmd != 'iterator': g = tsvreader.geno(options.geno) print g.shape +def check_results(ps,ts): + print np.array(ps) + print len(ps),sum(ps) + # Test results + p1 = round(ps[0],4) + p2 = round(ps[-1],4) + # sys.stderr.write(options.geno+"\n") + if options.geno == 'data/small.geno': + info("Validating results for "+options.geno) + assert p1==0.0708, "p1=%f" % p1 + assert p2==0.1417, "p2=%f" % p2 + if options.geno == 'data/small_na.geno': + info("Validating results for "+options.geno) + assert p1==0.0897, "p1=%f" % p1 + assert p2==0.0405, "p2=%f" % p2 + if options.geno == 'data/test8000.geno': + info("Validating results for "+options.geno) + # assert p1==0.8984, "p1=%f" % p1 + # assert p2==0.9621, "p2=%f" % p2 + assert round(sum(ps)) == 4070 + assert len(ps) == 8000 + info("Run completed") + if cmd == 'run': if options.remove_missing_phenotypes: raise Exception('Can not use --remove-missing-phenotypes with LMM2') @@ -110,22 +135,13 @@ if cmd == 'run': n = len(y) m = g.shape[1] ps, ts = run_gwas('other',n,m,k,y,g.T) - print np.array(ps) - print len(ps),sum(ps) - # Test results - p1 = round(ps[0],4) - p2 = round(ps[-1],4) - + check_results(ps,ts) elif cmd == 'iterator': if options.remove_missing_phenotypes: raise Exception('Can not use --remove-missing-phenotypes with LMM2') snp_iterator = tsvreader.geno_iter(options.geno) ps, ts = gn2_load_redis_iter('testrun_iter','other',k,y,snp_iterator) - print np.array(ps) - print len(ps),sum(ps) - # Test results - p1 = round(ps[0],4) - p2 = round(ps[-1],4) + check_results(ps,ts) elif cmd == 'redis_new': # The main difference between redis_new and redis is that missing # phenotypes are handled by the first @@ -138,23 +154,7 @@ elif cmd == 'redis_new': gt = G.T G = None ps, ts = gn2_load_redis('testrun','other',k,Y,gt,new_code=True) - print np.array(ps) - print len(ps),sum(ps) - # Test results - p1 = round(ps[0],4) - p2 = round(ps[-1],4) - sys.stderr.write(options.geno+"\n") - if options.geno == 'data/small.geno': - assert p1==0.0708, "p1=%f" % p1 - assert p2==0.1417, "p2=%f" % p2 - if options.geno == 'data/small_na.geno': - assert p1==0.0897, "p1=%f" % p1 - assert p2==0.0405, "p2=%f" % p2 - if options.geno == 'data/test8000.geno': - # assert p1==0.8984, "p1=%f" % p1 - # assert p2==0.9621, "p2=%f" % p2 - assert round(sum(ps)) == 4070 - assert len(ps) == 8000 + check_results(ps,ts) elif cmd == 'redis': # Emulating the redis setup of GN2 G = g @@ -177,21 +177,7 @@ elif cmd == 'redis': gt = G.T G = None ps, ts = gn2_load_redis('testrun','other',k,Y,gt, new_code=False) - print np.array(ps) - print len(ps),sum(ps) - # Test results 4070.02346579 - p1 = round(ps[0],4) - p2 = round(ps[-1],4) - sys.stderr.write(options.geno+"\n") - if options.geno == 'data/small.geno': - assert p1==0.0708, "p1=%f" % p1 - assert p2==0.1417, "p2=%f" % p2 - if options.geno == 'data/small_na.geno': - assert p1==0.0897, "p1=%f" % p1 - assert p2==0.0405, "p2=%f" % p2 - if options.geno == 'data/test8000.geno': - assert int(sum(ps)) == 4070 - assert len(ps) == 8000 + check_results(ps,ts) elif cmd == 'kinship': G = g print "Original G",G.shape, "\n", G @@ -235,4 +221,4 @@ elif cmd == 'kinship': assert k3==1.4352, "k3=%f" % k3 else: - print "Doing nothing" + fatal("Doing nothing") -- cgit v1.2.3 From 99fef2888f02551191cf6031c2c7222fce27e360 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sat, 4 Apr 2015 12:33:07 +0200 Subject: Run works without transposes --- wqflask/wqflask/my_pylmm/pyLMM/gwas.py | 21 +++++++--- wqflask/wqflask/my_pylmm/pyLMM/kinship.py | 24 +++++++---- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 65 +++++++++++++++++++---------- wqflask/wqflask/my_pylmm/pyLMM/lmm2.py | 43 ++++++++++--------- wqflask/wqflask/my_pylmm/pyLMM/phenotype.py | 35 +++++++++++++--- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 21 +++++----- 6 files changed, 136 insertions(+), 73 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py index 8b344a90..ae3769d4 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py @@ -19,7 +19,7 @@ import pdb import time -# from utility import temp_data +import sys import lmm2 import os @@ -31,6 +31,18 @@ from lmm2 import LMM2 import multiprocessing as mp # Multiprocessing is part of the Python stdlib import Queue +# ---- A trick to decide on the environment: +try: + from wqflask.my_pylmm.pyLMM import chunks + from gn2 import uses +except ImportError: + sys.stderr.write("WARNING: LMM2 standalone version missing the Genenetwork2 environment\n") + has_gn2=False + from standalone import uses + +progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal') + + def formatResult(id,beta,betaSD,ts,ps): return "\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n" @@ -52,12 +64,11 @@ def compute_snp(j,n,snp_ids,lmm2,REML,q = None): def f_init(q): compute_snp.q = q -def gwas(Y,G,K,uses,restricted_max_likelihood=True,refit=False,verbose=True): +def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): """ GWAS. The G matrix should be n inds (cols) x m snps (rows) """ - progress,debug,info,mprint = uses('progress','debug','info','mprint') - + info("In gwas.gwas") matrix_initialize() cpu_num = mp.cpu_count() numThreads = None # for now use all available threads @@ -70,7 +81,7 @@ def gwas(Y,G,K,uses,restricted_max_likelihood=True,refit=False,verbose=True): m = G.shape[0] # snps snps = m info("%s SNPs",snps) - assert snps>inds, "snps should be larger than inds (snps=%d,inds=%d)" % (snps,inds) + assert snps>=inds, "snps should be larger than inds (snps=%d,inds=%d)" % (snps,inds) # CREATE LMM object for association # if not kfile2: L = LMM(Y,K,Kva,Kve,X0,verbose=verbose) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py index be12417e..1c157fd8 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py @@ -28,12 +28,21 @@ import time from optmatrix import matrix_initialize, matrixMultT -def kinship_full(G,uses): +# ---- A trick to decide on the environment: +try: + from wqflask.my_pylmm.pyLMM import chunks + from gn2 import uses, progress_set_func +except ImportError: + has_gn2=False + import standalone as handlers + from standalone import uses, progress_set_func + +progress,debug,info,mprint = uses('progress','debug','info','mprint') + +def kinship_full(G): """ Calculate the Kinship matrix using a full dot multiplication """ - info,mprint = uses('info','mprint') - # mprint("kinship_full G",G) m = G.shape[0] # snps n = G.shape[1] # inds @@ -78,8 +87,7 @@ def f_init(q): # Calculate the kinship matrix from G (SNPs as rows!), returns K # -def kinship(G,uses,computeSize=1000,numThreads=None,useBLAS=False): - progress,debug,info,mprint = uses('progress','debug','info','mprint') +def kinship(G,computeSize=1000,numThreads=None,useBLAS=False): matrix_initialize(useBLAS) @@ -89,7 +97,7 @@ def kinship(G,uses,computeSize=1000,numThreads=None,useBLAS=False): m = G.shape[0] # snps snps = m info("%i SNPs" % (m)) - assert snps>inds, "snps should be larger than inds (%i snps, %i inds)" % (snps,inds) + assert snps>=inds, "snps should be larger than inds (%i snps, %i inds)" % (snps,inds) q = mp.Queue() p = mp.Pool(numThreads, f_init, [q]) @@ -140,13 +148,11 @@ def kinship(G,uses,computeSize=1000,numThreads=None,useBLAS=False): K = K / float(snps) return K -def kvakve(K,uses): +def kvakve(K): """ Obtain eigendecomposition for K and return Kva,Kve where Kva is cleaned of small values < 1e-6 (notably smaller than zero) """ - info,mprint = uses('info','mprint') - info("Obtaining eigendecomposition for %dx%d matrix" % (K.shape[0],K.shape[1]) ) Kva,Kve = linalg.eigh(K) mprint("Kva",Kva) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index e51742c4..82bd7f0b 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -64,7 +64,6 @@ except ImportError: import standalone as handlers from standalone import uses, progress_set_func sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n") - pass progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal') @@ -296,8 +295,8 @@ def run_other_old(pheno_vector, Bench().report() return p_values, t_stats -def run_other_new(pheno_vector, - genotype_matrix, +def run_other_new(n,m,pheno_vector, + geno, restricted_max_likelihood=True, refit=False): @@ -312,8 +311,7 @@ def run_other_new(pheno_vector, print("REML=",restricted_max_likelihood," REFIT=",refit) # Adjust phenotypes - Y,G,keep = phenotype.remove_missing(pheno_vector,genotype_matrix,verbose=True) - print("Removed missing phenotypes",Y.shape) + n,Y,keep = phenotype.remove_missing_new(n,pheno_vector) # if options.maf_normalization: # G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g ) @@ -321,8 +319,9 @@ def run_other_new(pheno_vector, # if not options.skip_genotype_normalization: # G = np.apply_along_axis( genotype.normalize, axis=1, arr=G) + geno = geno[:,keep] with Bench("Calculate Kinship"): - K,G = calculate_kinship_new(G) + K,G = calculate_kinship_new(geno) print("kinship_matrix: ", pf(K)) print("kinship_matrix.shape: ", pf(K.shape)) @@ -337,9 +336,8 @@ def run_other_new(pheno_vector, with Bench("Doing GWAS"): t_stats, p_values = gwas.gwas(Y, - G.T, + G, K, - uses, restricted_max_likelihood=True, refit=False,verbose=True) Bench().report() @@ -378,18 +376,30 @@ def matrixMult(A,B): return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB) +def calculate_kinship_new(genotype_matrix): + """ + Call the new kinship calculation where genotype_matrix contains + inds (columns) by snps (rows). + """ + assert type(genotype_matrix) is np.ndarray + info("call genotype.normalize") + G = np.apply_along_axis( genotype.normalize, axis=1, arr=genotype_matrix) + mprint("G",genotype_matrix) + info("call calculate_kinship_new") + return kinship(G),G # G gets transposed, we'll turn this into an iterator (FIXME) -def calculate_kinship_new(genotype_matrix, temp_data=None): +def calculate_kinship_iter(geno): """ Call the new kinship calculation where genotype_matrix contains inds (columns) by snps (rows). """ + assert type(genotype_matrix) is iter info("call genotype.normalize") G = np.apply_along_axis( genotype.normalize, axis=0, arr=genotype_matrix) info("call calculate_kinship_new") - return kinship(G.T,uses),G # G gets transposed, we'll turn this into an iterator (FIXME) + return kinship(G) -def calculate_kinship_old(genotype_matrix, temp_data=None): +def calculate_kinship_old(genotype_matrix): """ genotype_matrix is an n x m matrix encoding SNP minor alleles. @@ -430,7 +440,7 @@ def calculate_kinship_old(genotype_matrix, temp_data=None): mprint("G (after old normalize)",genotype_matrix.T) kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m) return kinship_matrix,genotype_matrix - # return kinship_full(genotype_matrix.T,uses),genotype_matrix + # return kinship_full(genotype_matrix.T),genotype_matrix def GWAS(pheno_vector, genotype_matrix, @@ -586,7 +596,7 @@ class LMM: # if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) begin = time.time() # Kva,Kve = linalg.eigh(K) - Kva,Kve = kvakve(K,uses) + Kva,Kve = kvakve(K) end = time.time() if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve)) @@ -794,12 +804,11 @@ class LMM: pl.ylabel("Probability of data") pl.title(title) - def run_gwas(species,n,m,k,y,geno,cov=None,reml=True,refit=False,inputfn=None,new_code=True): """ Invoke pylmm using genotype as a matrix or as a (SNP) iterator. """ - info("gwas_without_redis") + info("run_gwas") print('pheno', y) if species == "human" : @@ -813,8 +822,8 @@ def run_gwas(species,n,m,k,y,geno,cov=None,reml=True,refit=False,inputfn=None,ne print('geno', geno.shape, geno) if new_code: - ps, ts = run_other_new(pheno_vector = y, - genotype_matrix = geno, + ps, ts = run_other_new(n,m,pheno_vector = y, + geno = geno, restricted_max_likelihood = reml, refit = refit) else: @@ -849,10 +858,20 @@ def gwas_with_redis(key,species,new_code=True): v = np.array(v) return v + def narrayT(key): + m = narray(key) + if m is not None: + return m.T + return m + + # We are transposing before we enter run_gwas - this should happen on the webserver + # side (or when reading data from file) + k = narray('kinship_matrix') + g = narrayT('genotype_matrix') y = narray('pheno_vector') n = len(y) m = params['num_genotypes'] - ps,ts = run_gwas(species,n,m,narray('kinship_matrix'),y,narray('genotype_matrix'),narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code) + ps,ts = run_gwas(species,n,m,k,y,g,narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code) results_key = "pylmm:results:" + params['temp_uuid'] @@ -864,19 +883,19 @@ def gwas_with_redis(key,species,new_code=True): Redis.expire(results_key, 60*60) return ps, ts - def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): """ - This function emulates current GN2 behaviour by pre-loading Redis + This function emulates current GN2 behaviour by pre-loading Redis (note the input + genotype is transposed to emulate GN2 (FIXME!) """ - print("Loading Redis from parsed data") + info("Loading Redis from parsed data") if kinship == None: k = None else: k = kinship.tolist() params = dict(pheno_vector = pheno.tolist(), - genotype_matrix = geno.tolist(), - num_genotypes = geno.shape[1], + genotype_matrix = geno.T.tolist(), + num_genotypes = geno.shape[0], kinship_matrix = k, covariate_matrix = None, input_file_name = None, diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py index d67e1205..358bf27e 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py @@ -32,7 +32,6 @@ except ImportError: sys.stderr.write("WARNING: LMM2 standalone version missing the Genenetwork2 environment\n") has_gn2=False from standalone import uses - pass def calculateKinship(W,center=False): """ @@ -149,28 +148,32 @@ def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False): class LMM2: - """ - This is a simple version of EMMA/fastLMM. - The main purpose of this module is to take a phenotype vector (Y), a set of covariates (X) and a kinship matrix (K) - and to optimize this model by finding the maximum-likelihood estimates for the model parameters. - There are three model parameters: heritability (h), covariate coefficients (beta) and the total - phenotypic variance (sigma). - Heritability as defined here is the proportion of the total variance (sigma) that is attributed to - the kinship matrix. - - For simplicity, we assume that everything being input is a numpy array. - If this is not the case, the module may throw an error as conversion from list to numpy array - is not done consistently. + """This is a simple version of EMMA/fastLMM. + + The main purpose of this module is to take a phenotype vector (Y), + a set of covariates (X) and a kinship matrix (K) and to optimize + this model by finding the maximum-likelihood estimates for the + model parameters. There are three model parameters: heritability + (h), covariate coefficients (beta) and the total phenotypic + variance (sigma). Heritability as defined here is the proportion + of the total variance (sigma) that is attributed to the kinship + matrix. + + For simplicity, we assume that everything being input is a numpy + array. If this is not the case, the module may throw an error as + conversion from list to numpy array is not done consistently. """ def __init__(self,Y,K,Kva=[],Kve=[],X0=None,verbose=False): - """ - The constructor takes a phenotype vector or array Y of size n. - It takes a kinship matrix K of size n x n. Kva and Kve can be computed as Kva,Kve = linalg.eigh(K) and cached. - If they are not provided, the constructor will calculate them. - X0 is an optional covariate matrix of size n x q, where there are q covariates. - When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect. + """The constructor takes a phenotype vector or array Y of size n. It + takes a kinship matrix K of size n x n. Kva and Kve can be + computed as Kva,Kve = linalg.eigh(K) and cached. If they are + not provided, the constructor will calculate them. X0 is an + optional covariate matrix of size n x q, where there are q + covariates. When this parameter is not provided, the + constructor will set X0 to an n x 1 matrix of all ones to + represent a mean effect. """ if X0 is None: @@ -194,7 +197,7 @@ class LMM2: # if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) begin = time.time() # Kva,Kve = linalg.eigh(K) - Kva,Kve = kinship.kvakve(K,uses) + Kva,Kve = kinship.kvakve(K) end = time.time() if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve)) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py index 4c8175f7..7b652515 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py @@ -19,22 +19,47 @@ import sys import numpy as np -def remove_missing(y,g,verbose=False): +# ---- A trick to decide on the environment: +try: + from wqflask.my_pylmm.pyLMM import chunks + from gn2 import uses, progress_set_func +except ImportError: + has_gn2=False + import standalone as handlers + from standalone import uses, progress_set_func + +progress,debug,info,mprint = uses('progress','debug','info','mprint') + +def remove_missing(n,y,g): """ Remove missing data from matrices, make sure the genotype data has individuals as rows """ assert(y is not None) - assert(y.shape[0] == g.shape[0]) + assert y.shape[0] == g.shape[0],"y (n) %d, g (n,m) %s" % (y.shape[0],g.shape) y1 = y g1 = g v = np.isnan(y) keep = True - v if v.sum(): - if verbose: - sys.stderr.write("runlmm.py: Cleaning the phenotype vector and genotype matrix by removing %d individuals...\n" % (v.sum())) + info("runlmm.py: Cleaning the phenotype vector and genotype matrix by removing %d individuals...\n" % (v.sum())) y1 = y[keep] g1 = g[keep,:] - return y1,g1,keep + n = y1.shape[0] + return n,y1,g1,keep + +def remove_missing_new(n,y): + """ + Remove missing data. Returns new n,y,keep + """ + assert(y is not None) + y1 = y + v = np.isnan(y) + keep = True - v + if v.sum(): + info("runlmm.py: Cleaning the phenotype vector by removing %d individuals" % (v.sum())) + y1 = y[keep] + n = y1.shape[0] + return n,y1,keep diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 2d02e195..d248dee2 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -131,16 +131,15 @@ def check_results(ps,ts): if cmd == 'run': if options.remove_missing_phenotypes: raise Exception('Can not use --remove-missing-phenotypes with LMM2') - snp_iterator = tsvreader.geno_iter(options.geno) n = len(y) m = g.shape[1] - ps, ts = run_gwas('other',n,m,k,y,g.T) + ps, ts = run_gwas('other',n,m,k,y,g) # <--- pass in geno by SNP check_results(ps,ts) elif cmd == 'iterator': if options.remove_missing_phenotypes: raise Exception('Can not use --remove-missing-phenotypes with LMM2') - snp_iterator = tsvreader.geno_iter(options.geno) - ps, ts = gn2_load_redis_iter('testrun_iter','other',k,y,snp_iterator) + geno_iterator = tsvreader.geno_iter(options.geno) + ps, ts = gn2_load_redis_iter('testrun_iter','other',k,y,geno_iterator) check_results(ps,ts) elif cmd == 'redis_new': # The main difference between redis_new and redis is that missing @@ -150,10 +149,9 @@ elif cmd == 'redis_new': Y = y G = g print "Original G",G.shape, "\n", G - - gt = G.T - G = None - ps, ts = gn2_load_redis('testrun','other',k,Y,gt,new_code=True) + # gt = G.T + # G = None + ps, ts = gn2_load_redis('testrun','other',k,Y,G,new_code=True) check_results(ps,ts) elif cmd == 'redis': # Emulating the redis setup of GN2 @@ -174,9 +172,10 @@ elif cmd == 'redis': g = None gnt = None - gt = G.T - G = None - ps, ts = gn2_load_redis('testrun','other',k,Y,gt, new_code=False) + # gt = G.T + # G = None + mprint("G",G) + ps, ts = gn2_load_redis('testrun','other',k,Y,G, new_code=False) check_results(ps,ts) elif cmd == 'kinship': G = g -- cgit v1.2.3 From 49f5eb3e825c953bc7f6da87460ccfe9b891d493 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sat, 4 Apr 2015 13:01:44 +0200 Subject: Fixing transpose issues --- wqflask/wqflask/my_pylmm/pyLMM/gwas.py | 1 - wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 2 +- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 26 ++++++++++++-------------- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py index ae3769d4..247a8729 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py @@ -36,7 +36,6 @@ try: from wqflask.my_pylmm.pyLMM import chunks from gn2 import uses except ImportError: - sys.stderr.write("WARNING: LMM2 standalone version missing the Genenetwork2 environment\n") has_gn2=False from standalone import uses diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 82bd7f0b..6f03eaf7 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -288,7 +288,7 @@ def run_other_old(pheno_vector, with Bench("Doing GWAS"): t_stats, p_values = GWAS(pheno_vector, - genotype_matrix, + genotype_matrix.T, kinship_matrix, restricted_max_likelihood=True, refit=False) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index d248dee2..44d5c0f4 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -108,26 +108,25 @@ if options.geno and cmd != 'iterator': def check_results(ps,ts): print np.array(ps) print len(ps),sum(ps) - # Test results p1 = round(ps[0],4) p2 = round(ps[-1],4) - # sys.stderr.write(options.geno+"\n") if options.geno == 'data/small.geno': info("Validating results for "+options.geno) - assert p1==0.0708, "p1=%f" % p1 - assert p2==0.1417, "p2=%f" % p2 + assert p1==0.7387, "p1=%f" % p1 + assert p2==0.7387, "p2=%f" % p2 if options.geno == 'data/small_na.geno': info("Validating results for "+options.geno) - assert p1==0.0897, "p1=%f" % p1 - assert p2==0.0405, "p2=%f" % p2 + assert p1==0.062, "p1=%f" % p1 + assert p2==0.062, "p2=%f" % p2 if options.geno == 'data/test8000.geno': info("Validating results for "+options.geno) - # assert p1==0.8984, "p1=%f" % p1 - # assert p2==0.9621, "p2=%f" % p2 assert round(sum(ps)) == 4070 assert len(ps) == 8000 info("Run completed") - + +if y is not None: + n = y.shape[0] + if cmd == 'run': if options.remove_missing_phenotypes: raise Exception('Can not use --remove-missing-phenotypes with LMM2') @@ -159,7 +158,7 @@ elif cmd == 'redis': print "Original G",G.shape, "\n", G if y is not None and options.remove_missing_phenotypes: gnt = np.array(g).T - Y,g,keep = phenotype.remove_missing(y,g.T,options.verbose) + n,Y,g,keep = phenotype.remove_missing(n,y,gnt) G = g.T print "Removed missing phenotypes",G.shape, "\n", G else: @@ -174,7 +173,6 @@ elif cmd == 'redis': # gt = G.T # G = None - mprint("G",G) ps, ts = gn2_load_redis('testrun','other',k,Y,G, new_code=False) check_results(ps,ts) elif cmd == 'kinship': @@ -182,7 +180,7 @@ elif cmd == 'kinship': print "Original G",G.shape, "\n", G if y != None and options.remove_missing_phenotypes: gnt = np.array(g).T - Y,g = phenotype.remove_missing(y,g.T,options.verbose) + n,Y,g,keep = phenotype.remove_missing(n,y,g.T) G = g.T print "Removed missing phenotypes",G.shape, "\n", G if options.maf_normalization: @@ -194,7 +192,7 @@ elif cmd == 'kinship': gnt = None if options.test_kinship: - K = kinship_full(np.copy(G),uses) + K = kinship_full(np.copy(G)) print "Genotype",G.shape, "\n", G print "first Kinship method",K.shape,"\n",K k1 = round(K[0][0],4) @@ -204,7 +202,7 @@ elif cmd == 'kinship': k2 = round(K2[0][0],4) print "Genotype",G.shape, "\n", G - K3 = kinship(G.T,uses) + K3 = kinship(G.T) print "third Kinship method",K3.shape,"\n",K3 sys.stderr.write(options.geno+"\n") k3 = round(K3[0][0],4) -- cgit v1.2.3 From 17f453e50ebac657d9f3096811d92bedc9bfc064 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sat, 4 Apr 2015 13:15:48 +0200 Subject: Regression tests --- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 44d5c0f4..52c3c80a 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -196,20 +196,20 @@ elif cmd == 'kinship': print "Genotype",G.shape, "\n", G print "first Kinship method",K.shape,"\n",K k1 = round(K[0][0],4) - K2,G = calculate_kinship_new(np.copy(G).T) + K2,G = calculate_kinship_new(np.copy(G)) print "Genotype",G.shape, "\n", G print "GN2 Kinship method",K2.shape,"\n",K2 k2 = round(K2[0][0],4) print "Genotype",G.shape, "\n", G - K3 = kinship(G.T) + K3 = kinship(G) print "third Kinship method",K3.shape,"\n",K3 sys.stderr.write(options.geno+"\n") k3 = round(K3[0][0],4) if options.geno == 'data/small.geno': - assert k1==0.8, "k1=%f" % k1 - assert k2==0.7939, "k2=%f" % k2 - assert k3==0.7939, "k3=%f" % k3 + assert k1==0.8333, "k1=%f" % k1 + assert k2==0.9375, "k2=%f" % k2 + assert k3==0.9375, "k3=%f" % k3 if options.geno == 'data/small_na.geno': assert k1==0.8333, "k1=%f" % k1 assert k2==0.7172, "k2=%f" % k2 -- cgit v1.2.3 From 102523493e2f8a7660c63f117f1d8dfd009eff02 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 13 Apr 2015 08:14:43 +0000 Subject: Improved assertion message --- wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py index b4027fa3..b24ffe8f 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py @@ -56,7 +56,8 @@ def geno(fn): print fn with open(fn,'r') as tsvin: - assert(tsvin.readline().strip() == "# Genotype format version 1.0") + line = tsvin.readline().strip() + assert line == "# Genotype format version 1.0", line tsvin.readline() tsvin.readline() tsvin.readline() -- cgit v1.2.3 From 6cef9c3b27d92383f89a432f6fa0e9fd16107f66 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 13 Apr 2015 10:15:59 +0200 Subject: Added examples for convertlmm.py --- wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py index 3b6b5d70..4312fed0 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py @@ -1,5 +1,5 @@ -# This is a converter for common LMM formats, so as to keep complexity -# outside the main routines. +# This is a converter for common LMM formats, so as to keep file +# reader complexity outside the main routines. # Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl) # @@ -31,6 +31,12 @@ python convertlmm.py [--plink] [--prefix out_basename] [--kinship kfile] [--phen Convert files for runlmm.py processing. Writes to stdout by default. try --help for more information + +Examples: + + python ./my_pylmm/pyLMM/convertlmm.py --plink --pheno data/test_snps.132k.clean.noX.fake.phenos > test.pheno + + python ./my_pylmm/pyLMM/convertlmm.py --plink --pheno data/test_snps.132k.clean.noX.fake.phenos --geno data/test_snps.132k.clean.noX > test.geno """ # if len(args) == 0: -- cgit v1.2.3 From 85ccb971687fda00538b248722454ea2aa514e27 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 13 Apr 2015 10:16:11 +0200 Subject: Started on INSTALL information --- INSTALL.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 INSTALL.md diff --git a/INSTALL.md b/INSTALL.md new file mode 100644 index 00000000..26eacc3a --- /dev/null +++ b/INSTALL.md @@ -0,0 +1,26 @@ +# INSTALL Genenetwork2 (GN2) + +## Fetch GN2 from github + +Clone the repository (currently ~800Mb) to local + + git clone git@github.com:genenetwork2/genenetwork2.git + +## Dependencies + +GN2 requires + +* redis +* mysql + +## Required python modules + +Install the following python modules: + +* Flask +* pyyaml +* redis +* qtlreaper +* numarray +* pp +* Flask-SQLAlchemy -- cgit v1.2.3 From 25bb886b733362edea657c72e7d29172b7e22755 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 17 Apr 2015 14:33:26 +0200 Subject: INSTALL --- INSTALL.md | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/INSTALL.md b/INSTALL.md index 26eacc3a..84b3d37c 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -24,3 +24,41 @@ Install the following python modules: * numarray * pp * Flask-SQLAlchemy + +## Set up local file settings.py + +```python +LOGFILE = """/tmp/flask_gn_log""" + +#This is needed because Flask turns key errors into a +#400 bad request response with no exception/log +TRAP_BAD_REQUEST_ERRORS = True + +DB_URI = """mysql://gn2:password@localhost/db_webqtl""" +SQLALCHEMY_DATABASE_URI = 'mysql://gn2:password@localhost/db_webqtl' + +# http://pythonhosted.org/Flask-Security/configuration.html +SECURITY_CONFIRMABLE = True +SECURITY_TRACKABLE = True +SECURITY_REGISTERABLE = True +SECURITY_RECOVERABLE = True + +SECURITY_EMAIL_SENDER = "no-reply@genenetwork.org" +SECURITY_POST_LOGIN_VIEW = "/thank_you" +SQLALCHEMY_POOL_RECYCLE = 3600 + +SERVER_PORT = 5051 + +SECRET_HMAC_CODE = '*' +``` + +```sh + export WQFLASK_SETTINGS=$HOME/settings.py + source /home/pjotr/ve27/bin/activate + cd genenetwork2/wqflask + python ./runserver.py + + or + + python ./secure_server.py +``` -- cgit v1.2.3 From bb8e466e00c622f7b28209378c1871a1d8469572 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 17 Apr 2015 14:41:40 +0200 Subject: spacing --- INSTALL.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index 84b3d37c..38d15090 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -53,12 +53,12 @@ SECRET_HMAC_CODE = '*' ``` ```sh - export WQFLASK_SETTINGS=$HOME/settings.py - source /home/pjotr/ve27/bin/activate - cd genenetwork2/wqflask - python ./runserver.py +export WQFLASK_SETTINGS=$HOME/settings.py +source /home/pjotr/ve27/bin/activate +cd genenetwork2/wqflask +python ./runserver.py - or +or - python ./secure_server.py +python ./secure_server.py ``` -- cgit v1.2.3 From 0929b16a5183538811260aef5c37f7406c302026 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sat, 18 Apr 2015 08:54:07 +0200 Subject: Use reduced outputter for GN2 logs too --- wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py index 7bceb089..40b2021d 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py @@ -1,7 +1,10 @@ -# Genenetwork2 specific methods and callback handler +# Standalone specific methods and callback handler # # Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl) # +# Set the log level with +# +# logging.basicConfig(level=logging.DEBUG) from __future__ import absolute_import, print_function, division @@ -9,8 +12,10 @@ import numpy as np import sys import logging -# logging.basicConfig(level=logging.DEBUG) -# np.set_printoptions() +# logger = logging.getLogger(__name__) +logger = logging.getLogger('lmm2') +logging.basicConfig(level=logging.DEBUG) +np.set_printoptions(precision=3,suppress=True) progress_location = None progress_current = None @@ -37,13 +42,20 @@ def progress(location, count, total): logger.info("Progress: %s %d%%" % (location,perc)) progress_location = location progress_prev_perc = perc - + def mprint(msg,data): """ Array/matrix print function """ m = np.array(data) - print(msg,m.shape,"=\n",m) + if m.ndim == 1: + print(msg,m.shape,"=\n",m[0:3]," ... ",m[-3:]) + if m.ndim == 2: + print(msg,m.shape,"=\n[", + m[0][0:3]," ... ",m[0][-3:],"\n ", + m[1][0:3]," ... ",m[1][-3:],"\n ...\n ", + m[-2][0:3]," ... ",m[-2][-3:],"\n ", + m[-1][0:3]," ... ",m[-1][-3:],"]") def fatal(msg): logger.critical(msg) @@ -68,7 +80,7 @@ def uses(*funcs): Some sugar """ return [callbacks()[func] for func in funcs] - + # ----- Minor test cases: if __name__ == '__main__': -- cgit v1.2.3 From a1d8f68d5428a4ceec9a2d9a771b000ecabec5e6 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sat, 18 Apr 2015 09:36:00 +0000 Subject: pylmm: fix integration problems --- wqflask/runserver.py | 6 ++--- .../wqflask/marker_regression/marker_regression.py | 14 +++++----- wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 30 +++++++++++++++------- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 19 ++++++++------ wqflask/wqflask/my_pylmm/pyLMM/lmm2.py | 18 ++++++++++--- 5 files changed, 56 insertions(+), 31 deletions(-) diff --git a/wqflask/runserver.py b/wqflask/runserver.py index 9d5686a9..fadae6bf 100755 --- a/wqflask/runserver.py +++ b/wqflask/runserver.py @@ -20,9 +20,9 @@ from wqflask import app import logging #from themodule import TheHandlerYouWant -file_handler = logging.FileHandler("/tmp/flask_gn_log_danny_unsecure") -file_handler.setLevel(logging.DEBUG) -app.logger.addHandler(file_handler) +# file_handler = logging.FileHandler("/tmp/flask_gn_log_danny_unsecure") +# file_handler.setLevel(logging.DEBUG) +# app.logger.addHandler(file_handler) import logging_tree logging_tree.printout() diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 7708356b..ae3e062f 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -40,6 +40,7 @@ from utility import temp_data from utility.benchmark import Bench +PYLMM_COMMAND= 'python /home/pjotr/izip/git/opensource/python/gn2/wqflask/wqflask/my_pylmm/pyLMM/lmm.py' class MarkerRegression(object): @@ -272,7 +273,7 @@ class MarkerRegression(object): """) def run_rqtl_geno(self): - print("Calling R/qtl from python") + print("Calling R/qtl") self.geno_to_rqtl_function() @@ -655,8 +656,7 @@ class MarkerRegression(object): Redis.set(key, json_params) Redis.expire(key, 60*60) - command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key, - "other") + command = PYLMM_COMMAND+' --key {} --species {}'.format(key,"other") os.system(command) @@ -713,8 +713,8 @@ class MarkerRegression(object): # "refit": False, # "temp_data": tempdata} - print("genotype_matrix:", str(genotype_matrix.tolist())) - print("pheno_vector:", str(pheno_vector.tolist())) + # print("genotype_matrix:", str(genotype_matrix.tolist())) + # print("pheno_vector:", str(pheno_vector.tolist())) params = dict(pheno_vector = pheno_vector.tolist(), genotype_matrix = genotype_matrix.tolist(), @@ -732,7 +732,7 @@ class MarkerRegression(object): Redis.expire(key, 60*60) print("before printing command") - command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key, + command = PYLMM_COMMAND + ' --key {} --species {}'.format(key, "other") print("command is:", command) print("after printing command") @@ -806,7 +806,7 @@ class MarkerRegression(object): print("Before creating the command") - command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key, + command = PYLMM_COMMAND+' --key {} --species {}'.format(key, "human") print("command is:", command) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py index 7bceb089..b128bfab 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py @@ -1,7 +1,10 @@ -# Genenetwork2 specific methods and callback handler +# Standalone specific methods and callback handler # # Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl) # +# Set the log level with +# +# logging.basicConfig(level=logging.DEBUG) from __future__ import absolute_import, print_function, division @@ -9,10 +12,12 @@ import numpy as np import sys import logging -# logging.basicConfig(level=logging.DEBUG) -# np.set_printoptions() +# logger = logging.getLogger(__name__) +logger = logging.getLogger('lmm2') +logging.basicConfig(level=logging.DEBUG) +np.set_printoptions(precision=3,suppress=True) -progress_location = None +progress_location = None progress_current = None progress_prev_perc = None @@ -20,30 +25,37 @@ def progress_default_func(location,count,total): global progress_current value = round(count*100.0/total) progress_current = value - + progress_func = progress_default_func def progress_set_func(func): global progress_func progress_func = func - + def progress(location, count, total): global progress_location global progress_prev_perc - + perc = round(count*100.0/total) if perc != progress_prev_perc and (location != progress_location or perc > 98 or perc > progress_prev_perc + 5): progress_func(location, count, total) logger.info("Progress: %s %d%%" % (location,perc)) progress_location = location progress_prev_perc = perc - + def mprint(msg,data): """ Array/matrix print function """ m = np.array(data) - print(msg,m.shape,"=\n",m) + if m.ndim == 1: + print(msg,m.shape,"=\n",m[0:3]," ... ",m[-3:]) + if m.ndim == 2: + print(msg,m.shape,"=\n[", + m[0][0:3]," ... ",m[0][-3:],"\n ", + m[1][0:3]," ... ",m[1][-3:],"\n ...\n ", + m[-2][0:3]," ... ",m[-2][-3:],"\n ", + m[-1][0:3]," ... ",m[-1][-3:],"]") def fatal(msg): logger.critical(msg) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index b2067b27..6fff5f1d 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -42,25 +42,27 @@ from redis import Redis Redis = Redis() import sys -sys.path.append("/home/zas1024/gene/wqflask/") - -has_gn2=True from utility.benchmark import Bench from utility import temp_data -sys.path.append("/home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/") - from kinship import kinship, kinship_full, kvakve import genotype import phenotype import gwas +has_gn2=True +sys.stderr.write("INFO: pylmm system path is "+":".join(sys.path)+"\n") +sys.stderr.write("INFO: pylmm file is "+__file__+"\n") + # ---- A trick to decide on the environment: try: - from wqflask.my_pylmm.pyLMM import chunks + sys.stderr.write("INFO: trying loading module\n") + import utility.formatting # this is never used, just to check the environment + sys.stderr.write("INFO: This is a genenetwork2 environment\n") from gn2 import uses, progress_set_func except ImportError: + # Failed to load gn2 has_gn2=False import standalone as handlers from standalone import uses, progress_set_func @@ -856,7 +858,8 @@ def gwas_with_redis(key,species,new_code=True): print(key) v = params[key] if v is not None: - v = np.array(v) + v = np.array(v).astype(np.float) + print(v) return v def narrayT(key): @@ -969,6 +972,6 @@ if __name__ == '__main__': if has_gn2: gn2_main() else: - print("Run from runlmm.py instead") + fatal("Run from runlmm.py instead") diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py index 358bf27e..c65843ec 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py @@ -24,14 +24,24 @@ from scipy import optimize from optmatrix import matrixMult import kinship +sys.stderr.write("INFO: pylmm (lmm2) system path is "+":".join(sys.path)+"\n") +sys.stderr.write("INFO: pylmm (lmm2) file is "+__file__+"\n") + # ---- A trick to decide on the environment: try: - from wqflask.my_pylmm.pyLMM import chunks - from gn2 import uses + sys.stderr.write("INFO: trying loading module\n") + import utility.formatting # this is never used, just to check the environment + sys.stderr.write("INFO: This is a genenetwork2 environment (lmm2)\n") + from gn2 import uses, progress_set_func except ImportError: - sys.stderr.write("WARNING: LMM2 standalone version missing the Genenetwork2 environment\n") + # Failed to load gn2 has_gn2=False - from standalone import uses + import standalone as handlers + from standalone import uses, progress_set_func + sys.stderr.write("WARNING: LMM2 standalone version missing the Genenetwork2 environment\n") + +progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal') + def calculateKinship(W,center=False): """ -- cgit v1.2.3 From 02660b9406a97943d4c33946250fc3f08b80c556 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sat, 18 Apr 2015 09:40:57 +0000 Subject: pylmm: fix integration problems --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 6fff5f1d..618f8332 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -854,22 +854,23 @@ def gwas_with_redis(key,species,new_code=True): debug("Updating REDIS percent_complete=%d" % (round(i*100.0/total))) progress_set_func(update_tempdata) - def narray(key): - print(key) - v = params[key] + def narray(t): + info("Type is "+t) + v = params[t] if v is not None: v = np.array(v).astype(np.float) print(v) return v - def narrayT(key): - m = narray(key) + def narrayT(t): + m = narray(t) if m is not None: return m.T return m # We are transposing before we enter run_gwas - this should happen on the webserver # side (or when reading data from file) + print(params) k = narray('kinship_matrix') g = narrayT('genotype_matrix') y = narray('pheno_vector') -- cgit v1.2.3 From 8706319923b3830a4d8cd63fd9a3f6b9a2b04563 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sat, 18 Apr 2015 10:58:26 +0000 Subject: Fix NA to float tranforms --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 37 +++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 618f8332..5b06c9ae 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -856,30 +856,47 @@ def gwas_with_redis(key,species,new_code=True): def narray(t): info("Type is "+t) - v = params[t] + v = params.get(t) if v is not None: - v = np.array(v).astype(np.float) - print(v) + # Note input values can be array of string or float + v1 = [x if x != 'NA' else 'nan' for x in v] + v = np.array(v1).astype(np.float) return v - def narrayT(t): - m = narray(t) + def marray(t): + info("Type is "+t) + v = params.get(t) + if v is not None: + m = [] + for r in v: + # Note input values can be array of string or float + r1 = [x if x != 'NA' else 'nan' for x in r] + m.append(np.array(r1).astype(np.float)) + return np.array(m) + return np.array(v) + + def marrayT(t): + m = marray(t) if m is not None: return m.T return m # We are transposing before we enter run_gwas - this should happen on the webserver # side (or when reading data from file) - print(params) - k = narray('kinship_matrix') - g = narrayT('genotype_matrix') + k = marray('kinship_matrix') + g = marrayT('genotype_matrix') + mprint("geno",g) y = narray('pheno_vector') n = len(y) - m = params['num_genotypes'] - ps,ts = run_gwas(species,n,m,k,y,g,narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code) + m = params.get('num_genotypes') + if m is None: + m = g.shape[0] + info("m=%d,n=%d" % (m,n)) + ps,ts = run_gwas(species,n,m,k,y,g,narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params.get('input_file_name'),new_code) results_key = "pylmm:results:" + params['temp_uuid'] + # fatal(results_key) json_results = json.dumps(dict(p_values = ps, t_stats = ts)) -- cgit v1.2.3 From ced6f0c49c155a2ab47adfe93578d4718504566b Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sat, 18 Apr 2015 11:09:20 +0000 Subject: Disable some print statements - will introduce debug levels soon --- wqflask/wqflask/marker_regression/marker_regression.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index ae3e062f..c80bba8e 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -128,7 +128,7 @@ class MarkerRegression(object): #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary self.qtl_results = [] for qtl in self.filtered_markers: - print("lod score is:", qtl['lod_score']) + # print("lod score is:", qtl['lod_score']) if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y": print("changing to X") self.json_data['chr'].append("X") @@ -145,7 +145,7 @@ class MarkerRegression(object): self.json_data['chrnames'].append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length]) chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length - print("json_data:", self.json_data) + # print("json_data:", self.json_data) self.js_data = dict( @@ -745,7 +745,7 @@ class MarkerRegression(object): json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60) results = json.loads(json_results[1]) p_values = [float(result) for result in results['p_values']] - print("p_values:", p_values) + print("p_values:", p_values[:10]) #p_values = self.trim_results(p_values) t_stats = results['t_stats'] -- cgit v1.2.3 From 1f6386cbddfd02d8abbd4e9bcb502c06be6864d1 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sat, 18 Apr 2015 12:41:07 +0000 Subject: Show first 40 LOD scores --- wqflask/wqflask/marker_regression/marker_regression.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index c80bba8e..fba34b99 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -127,8 +127,9 @@ class MarkerRegression(object): #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary self.qtl_results = [] - for qtl in self.filtered_markers: - # print("lod score is:", qtl['lod_score']) + for index,qtl in enumerate(self.filtered_markers): + if index<40: + print("lod score is:", qtl['lod_score']) if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y": print("changing to X") self.json_data['chr'].append("X") -- cgit v1.2.3 From 3736bd0044ddee68180a06809847af7542951743 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 19 Apr 2015 09:46:15 +0200 Subject: INSTALL info --- INSTALL.md | 15 ++++++++++++--- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 4 ++-- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index 38d15090..afe22678 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -1,5 +1,10 @@ # INSTALL Genenetwork2 (GN2) +## Use a Docker image + +A Docker image can be generated from +[here](https://github.com/lomereiter/gn2-docker). + ## Fetch GN2 from github Clone the repository (currently ~800Mb) to local @@ -10,12 +15,14 @@ Clone the repository (currently ~800Mb) to local GN2 requires +* python * redis * mysql ## Required python modules -Install the following python modules: +Install the following python modules (it is probably wise to use a local +Python with environment for this) * Flask * pyyaml @@ -53,12 +60,14 @@ SECRET_HMAC_CODE = '*' ``` ```sh +# Use a working copy of python +export python=$HOME/ve27/bin/python export WQFLASK_SETTINGS=$HOME/settings.py source /home/pjotr/ve27/bin/activate cd genenetwork2/wqflask -python ./runserver.py +$python ./runserver.py or -python ./secure_server.py +$python ./secure_server.py ``` diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index b2067b27..98bbead8 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -856,7 +856,7 @@ def gwas_with_redis(key,species,new_code=True): print(key) v = params[key] if v is not None: - v = np.array(v) + v = np.array(v).astype(np.float) return v def narrayT(key): @@ -969,6 +969,6 @@ if __name__ == '__main__': if has_gn2: gn2_main() else: - print("Run from runlmm.py instead") + fatal("Run from runlmm.py instead") -- cgit v1.2.3 From 5dad3ceb4acd652dd28d183f784005479089aa8a Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 19 Apr 2015 10:55:42 +0200 Subject: Restore logger in runserver --- wqflask/runserver.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/wqflask/runserver.py b/wqflask/runserver.py index fadae6bf..20d79218 100755 --- a/wqflask/runserver.py +++ b/wqflask/runserver.py @@ -19,10 +19,9 @@ from wqflask import app #_log.addHandler(_ch) import logging -#from themodule import TheHandlerYouWant -# file_handler = logging.FileHandler("/tmp/flask_gn_log_danny_unsecure") -# file_handler.setLevel(logging.DEBUG) -# app.logger.addHandler(file_handler) +file_handler = logging.FileHandler(app.config['LOGFILE']) +file_handler.setLevel(logging.DEBUG) +app.logger.addHandler(file_handler) import logging_tree logging_tree.printout() -- cgit v1.2.3 From 78dde9ccb4c24ea900b7a6d64ef392ec30ac89ea Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 19 Apr 2015 10:58:43 +0200 Subject: INSTALL: refer to information in ./misc --- INSTALL.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/INSTALL.md b/INSTALL.md index afe22678..a971ff78 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -71,3 +71,7 @@ or $python ./secure_server.py ``` + +## Other information + +Check also the ./misc/ directory for settings \ No newline at end of file -- cgit v1.2.3 From 93f663a2e865484cb6f476fb7a0fa2415410e4fd Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 19 Apr 2015 11:01:56 +0200 Subject: runserver: set port --- wqflask/runserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wqflask/runserver.py b/wqflask/runserver.py index 20d79218..4ae91e64 100755 --- a/wqflask/runserver.py +++ b/wqflask/runserver.py @@ -27,7 +27,7 @@ import logging_tree logging_tree.printout() app.run(host='0.0.0.0', - port=5003, + port=app.config['SERVER_PORT'], use_debugger=False, threaded=True, use_reloader=True) -- cgit v1.2.3 From 240c2db33b70b7d10a6bdd18e043fc0aa6766715 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 19 Apr 2015 11:04:31 +0200 Subject: Output configi --- wqflask/runserver.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/wqflask/runserver.py b/wqflask/runserver.py index 4ae91e64..5a76d1e2 100755 --- a/wqflask/runserver.py +++ b/wqflask/runserver.py @@ -18,6 +18,8 @@ from wqflask import app #_ch = logging.StreamHandler() #_log.addHandler(_ch) +print app.config + import logging file_handler = logging.FileHandler(app.config['LOGFILE']) file_handler.setLevel(logging.DEBUG) -- cgit v1.2.3 From e7cbe10d754e1e334746fc43a01e8b9fa3a666c0 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 19 Apr 2015 11:37:32 +0200 Subject: pylmm: Copied benchmark (preparing for a module) --- wqflask/wqflask/my_pylmm/pyLMM/benchmark.py | 44 +++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100755 wqflask/wqflask/my_pylmm/pyLMM/benchmark.py diff --git a/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py b/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py new file mode 100755 index 00000000..6c6c9f88 --- /dev/null +++ b/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py @@ -0,0 +1,44 @@ +from __future__ import print_function, division, absolute_import + +import collections +import inspect +import time + +class Bench(object): + entries = collections.OrderedDict() + + def __init__(self, name=None): + self.name = name + + def __enter__(self): + if self.name: + print("Starting benchmark: %s" % (self.name)) + else: + print("Starting benchmark at: %s [%i]" % (inspect.stack()[1][3], inspect.stack()[1][2])) + self.start_time = time.time() + + def __exit__(self, type, value, traceback): + if self.name: + name = self.name + else: + name = "That" + + time_taken = time.time() - self.start_time + print(" %s took: %f seconds" % (name, (time_taken))) + + if self.name: + Bench.entries[self.name] = Bench.entries.get(self.name, 0) + time_taken + + + @classmethod + def report(cls): + total_time = sum((time_taken for time_taken in cls.entries.itervalues())) + print("\nTiming report\n") + for name, time_taken in cls.entries.iteritems(): + percent = int(round((time_taken/total_time) * 100)) + print("[{}%] {}: {}".format(percent, name, time_taken)) + print() + + def reset(cls): + """Reset the entries""" + cls.entries = collections.OrderedDict() -- cgit v1.2.3 From ac57e839c8ffb52f65e74a9064e4adeb15c76b49 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 19 Apr 2015 11:54:42 +0200 Subject: pylmm: module loading --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 38 ++++++++++++++++------------------ wqflask/wqflask/my_pylmm/pyLMM/lmm2.py | 2 +- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 5b06c9ae..135ba1f4 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -19,48 +19,46 @@ from __future__ import absolute_import, print_function, division import sys import time -import argparse +# import argparse import uuid import numpy as np from scipy import linalg from scipy import optimize from scipy import stats -import pdb +# import pdb -import simplejson as json - -import gzip -import zlib +# import gzip +# import zlib import datetime -import cPickle as pickle -import simplejson as json - +# import cPickle as pickle from pprint import pformat as pf -from redis import Redis -Redis = Redis() - -import sys - -from utility.benchmark import Bench -from utility import temp_data - +# pylmm imports from kinship import kinship, kinship_full, kvakve import genotype import phenotype import gwas +from benchmark import Bench + +# The following imports are for exchanging data with the webserver +import simplejson as json +from redis import Redis +Redis = Redis() +from utility import temp_data + +has_gn2=None -has_gn2=True -sys.stderr.write("INFO: pylmm system path is "+":".join(sys.path)+"\n") +# sys.stderr.write("INFO: pylmm system path is "+":".join(sys.path)+"\n") sys.stderr.write("INFO: pylmm file is "+__file__+"\n") # ---- A trick to decide on the environment: try: - sys.stderr.write("INFO: trying loading module\n") + sys.stderr.write("INFO: lmm try loading module\n") import utility.formatting # this is never used, just to check the environment sys.stderr.write("INFO: This is a genenetwork2 environment\n") from gn2 import uses, progress_set_func + has_gn2=True except ImportError: # Failed to load gn2 has_gn2=False diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py index c65843ec..d871d8d2 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py @@ -29,7 +29,7 @@ sys.stderr.write("INFO: pylmm (lmm2) file is "+__file__+"\n") # ---- A trick to decide on the environment: try: - sys.stderr.write("INFO: trying loading module\n") + sys.stderr.write("INFO: lmm2 try loading module\n") import utility.formatting # this is never used, just to check the environment sys.stderr.write("INFO: This is a genenetwork2 environment (lmm2)\n") from gn2 import uses, progress_set_func -- cgit v1.2.3 From e3adbf898dd537688339c8af1b59ac440aef3848 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 19 Apr 2015 12:05:38 +0200 Subject: pylmm: move temp_data local --- wqflask/wqflask/my_pylmm/pyLMM/benchmark.py | 0 wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 2 +- wqflask/wqflask/my_pylmm/pyLMM/temp_data.py | 25 +++++++++++++++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) mode change 100755 => 100644 wqflask/wqflask/my_pylmm/pyLMM/benchmark.py create mode 100644 wqflask/wqflask/my_pylmm/pyLMM/temp_data.py diff --git a/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py b/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py old mode 100755 new mode 100644 diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 135ba1f4..2d9ca812 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -45,7 +45,7 @@ from benchmark import Bench import simplejson as json from redis import Redis Redis = Redis() -from utility import temp_data +import temp_data has_gn2=None diff --git a/wqflask/wqflask/my_pylmm/pyLMM/temp_data.py b/wqflask/wqflask/my_pylmm/pyLMM/temp_data.py new file mode 100644 index 00000000..004d45c6 --- /dev/null +++ b/wqflask/wqflask/my_pylmm/pyLMM/temp_data.py @@ -0,0 +1,25 @@ +from __future__ import print_function, division, absolute_import +from redis import Redis + +import simplejson as json + +class TempData(object): + + def __init__(self, temp_uuid): + self.temp_uuid = temp_uuid + self.redis = Redis() + self.key = "tempdata:{}".format(self.temp_uuid) + + def store(self, field, value): + self.redis.hset(self.key, field, value) + self.redis.expire(self.key, 60*15) # Expire in 15 minutes + + def get_all(self): + return self.redis.hgetall(self.key) + + +if __name__ == "__main__": + redis = Redis() + for key in redis.keys(): + for field in redis.hkeys(key): + print("{}.{}={}".format(key, field, redis.hget(key, field))) -- cgit v1.2.3 From 3c1e043dd63fe2d65a0bd44764867254b13aba32 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 19 Apr 2015 12:28:58 +0200 Subject: pylmm: auto add to pythonpath --- INSTALL.md | 4 ++-- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 6 ++++++ wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 8 ++++++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index a971ff78..9f28ac28 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -16,8 +16,8 @@ Clone the repository (currently ~800Mb) to local GN2 requires * python -* redis -* mysql +* redis-server +* mysql-server ## Required python modules diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 2d9ca812..4e35a4ac 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -34,6 +34,12 @@ import datetime # import cPickle as pickle from pprint import pformat as pf +# Add local dir to PYTHONPATH +import os +cwd = os.path.dirname(__file__) +if sys.path[0] != cwd: + sys.path.insert(1,cwd) + # pylmm imports from kinship import kinship, kinship_full, kvakve import genotype diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 52c3c80a..6b241cd6 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -21,6 +21,14 @@ from optparse import OptionParser import sys import tsvreader import numpy as np + +# Add local dir to PYTHONPATH +import os +cwd = os.path.dirname(__file__) +if sys.path[0] != cwd: + sys.path.insert(1,cwd) + +# pylmm modules from lmm import gn2_load_redis, gn2_load_redis_iter, calculate_kinship_new, run_gwas from kinship import kinship, kinship_full import genotype -- cgit v1.2.3 From 6e4a01fbdf2ac72230346d6f474edaa56a623bfe Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 19 Apr 2015 10:40:15 +0000 Subject: pylmm: can now be called from GN2 without path set --- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 4e35a4ac..2a0c7fdc 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -19,7 +19,6 @@ from __future__ import absolute_import, print_function, division import sys import time -# import argparse import uuid import numpy as np @@ -977,6 +976,7 @@ def gn2_load_redis_iter(key,species,kinship,pheno,geno_iterator): # Note that this calling route will become OBSOLETE (we should use runlmm.py # instead) def gn2_main(): + import argparse parser = argparse.ArgumentParser(description='Run pyLMM') parser.add_argument('-k', '--key') parser.add_argument('-s', '--species') @@ -991,9 +991,5 @@ def gn2_main(): if __name__ == '__main__': print("WARNING: Calling pylmm from lmm.py will become OBSOLETE, use runlmm.py instead!") - if has_gn2: - gn2_main() - else: - fatal("Run from runlmm.py instead") - + gn2_main() -- cgit v1.2.3 From 1b71fc64719ba5c5b56d23aad9d9dc45bc2898a9 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 19 Apr 2015 12:57:10 +0200 Subject: pylmm: Release --- wqflask/wqflask/my_pylmm/README.md | 7 +++++++ wqflask/wqflask/my_pylmm/pyLMM/__init__.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/wqflask/wqflask/my_pylmm/README.md b/wqflask/wqflask/my_pylmm/README.md index 4845ec03..b844c845 100644 --- a/wqflask/wqflask/my_pylmm/README.md +++ b/wqflask/wqflask/my_pylmm/README.md @@ -1,5 +1,12 @@ # Genenetwork2/pylmm RELEASE NOTES +## 0.51-gn2 (April 19, 2015) + +- Improved GN2 integration +- Less matrix transposes +- Able to run pylmm standalone without Redis again (still requires + the modules) + ## 0.50-gn2 (April 2nd, 2015) - Replaced the GN2 genotype normalization diff --git a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py index 6ab60d02..f33c4e74 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py @@ -1 +1 @@ -PYLMM_VERSION="0.50-gn2-pre2" +PYLMM_VERSION="0.51-gn2" -- cgit v1.2.3 From 561ad00c82f440aefd5dbaf741d927cb63a37e0f Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 19 Apr 2015 18:57:56 +0200 Subject: Pick up PYLMM_PATH from environment or setting.py --- wqflask/wqflask/marker_regression/marker_regression.py | 10 +++++++++- wqflask/wqflask/model.py | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index fba34b99..8d3eba48 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -25,6 +25,7 @@ from redis import Redis Redis = Redis() from flask import Flask, g +from wqflask import app from base.trait import GeneralTrait from base import data_set @@ -40,7 +41,14 @@ from utility import temp_data from utility.benchmark import Bench -PYLMM_COMMAND= 'python /home/pjotr/izip/git/opensource/python/gn2/wqflask/wqflask/my_pylmm/pyLMM/lmm.py' +import os +if os.environ['PYLMM_PATH'] is None: + PYLMM_PATH=app.config['PYLMM_PATH'] + if PYLMM_PATH is None: + PYLMM_PATH=os.environ['HOME']+'/gene/wqflask/wqflask/my_pylmm/pyLMM' +if !os.path.isfile(PYLMM_PATH+'lmm.py'): + raise 'PYLMM_PATH unknown or faulty' +PYLMM_COMMAND= 'python '+PYLMM_PATH+'/lmm.py' class MarkerRegression(object): diff --git a/wqflask/wqflask/model.py b/wqflask/wqflask/model.py index fa8c1aab..042cb8df 100755 --- a/wqflask/wqflask/model.py +++ b/wqflask/wqflask/model.py @@ -194,4 +194,4 @@ def display_collapsible(number): def user_uuid(): """Unique cookie for a user""" user_uuid = request.cookies.get('user_uuid') - \ No newline at end of file + -- cgit v1.2.3 From 4f4ceddafa1d172515b2ef24658e5cf39730e6c6 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 19 Apr 2015 17:03:10 +0000 Subject: Pick up PYLMM_PATH from environment or setting.py --- wqflask/wqflask/marker_regression/marker_regression.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 8d3eba48..67e1df0d 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -42,11 +42,11 @@ from utility import temp_data from utility.benchmark import Bench import os -if os.environ['PYLMM_PATH'] is None: - PYLMM_PATH=app.config['PYLMM_PATH'] +if os.environ.get('PYLMM_PATH') is None: + PYLMM_PATH=app.config.get('PYLMM_PATH') if PYLMM_PATH is None: PYLMM_PATH=os.environ['HOME']+'/gene/wqflask/wqflask/my_pylmm/pyLMM' -if !os.path.isfile(PYLMM_PATH+'lmm.py'): +if not os.path.isfile(PYLMM_PATH+'/lmm.py'): raise 'PYLMM_PATH unknown or faulty' PYLMM_COMMAND= 'python '+PYLMM_PATH+'/lmm.py' -- cgit v1.2.3 From 85a335df1fe499bc00b7feabc4f301b7a56b2b85 Mon Sep 17 00:00:00 2001 From: pjotrp Date: Mon, 11 May 2015 16:52:10 -0500 Subject: pylmm has moved out of the GN2 source tree to https://github.com/genenetwork/pylmm_gn2 --- wqflask/wqflask/my_pylmm/pyLMM/__init__.py | 1 - wqflask/wqflask/my_pylmm/pyLMM/benchmark.py | 44 -- wqflask/wqflask/my_pylmm/pyLMM/chunks.py | 96 --- wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py | 184 ----- wqflask/wqflask/my_pylmm/pyLMM/genotype.py | 51 -- wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 110 --- wqflask/wqflask/my_pylmm/pyLMM/gwas.py | 165 ----- wqflask/wqflask/my_pylmm/pyLMM/input.py | 267 ------- wqflask/wqflask/my_pylmm/pyLMM/kinship.py | 168 ----- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 995 --------------------------- wqflask/wqflask/my_pylmm/pyLMM/lmm2.py | 433 ------------ wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py | 55 -- wqflask/wqflask/my_pylmm/pyLMM/phenotype.py | 65 -- wqflask/wqflask/my_pylmm/pyLMM/plink.py | 107 --- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 229 ------ wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 110 --- wqflask/wqflask/my_pylmm/pyLMM/temp_data.py | 25 - wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py | 122 ---- 18 files changed, 3227 deletions(-) delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/__init__.py delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/benchmark.py delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/chunks.py delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/genotype.py delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/gn2.py delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/gwas.py delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/input.py delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/kinship.py delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/lmm.py delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/lmm2.py delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/phenotype.py delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/plink.py delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/runlmm.py delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/standalone.py delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/temp_data.py delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py diff --git a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py deleted file mode 100644 index f33c4e74..00000000 --- a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py +++ /dev/null @@ -1 +0,0 @@ -PYLMM_VERSION="0.51-gn2" diff --git a/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py b/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py deleted file mode 100644 index 6c6c9f88..00000000 --- a/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py +++ /dev/null @@ -1,44 +0,0 @@ -from __future__ import print_function, division, absolute_import - -import collections -import inspect -import time - -class Bench(object): - entries = collections.OrderedDict() - - def __init__(self, name=None): - self.name = name - - def __enter__(self): - if self.name: - print("Starting benchmark: %s" % (self.name)) - else: - print("Starting benchmark at: %s [%i]" % (inspect.stack()[1][3], inspect.stack()[1][2])) - self.start_time = time.time() - - def __exit__(self, type, value, traceback): - if self.name: - name = self.name - else: - name = "That" - - time_taken = time.time() - self.start_time - print(" %s took: %f seconds" % (name, (time_taken))) - - if self.name: - Bench.entries[self.name] = Bench.entries.get(self.name, 0) + time_taken - - - @classmethod - def report(cls): - total_time = sum((time_taken for time_taken in cls.entries.itervalues())) - print("\nTiming report\n") - for name, time_taken in cls.entries.iteritems(): - percent = int(round((time_taken/total_time) * 100)) - print("[{}%] {}: {}".format(percent, name, time_taken)) - print() - - def reset(cls): - """Reset the entries""" - cls.entries = collections.OrderedDict() diff --git a/wqflask/wqflask/my_pylmm/pyLMM/chunks.py b/wqflask/wqflask/my_pylmm/pyLMM/chunks.py deleted file mode 100644 index 9565fb96..00000000 --- a/wqflask/wqflask/my_pylmm/pyLMM/chunks.py +++ /dev/null @@ -1,96 +0,0 @@ -from __future__ import absolute_import, print_function, division - -import math -import time - - -def divide_into_chunks(the_list, number_chunks): - """Divides a list into approximately number_chunks smaller lists - - >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 3) - [[1, 2, 7], [3, 22, 8], [5, 22, 333]] - >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 4) - [[1, 2, 7], [3, 22, 8], [5, 22, 333]] - >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 5) - [[1, 2], [7, 3], [22, 8], [5, 22], [333]] - >>> - - """ - length = len(the_list) - - if length == 0: - return [[]] - - if length <= number_chunks: - number_chunks = length - - chunksize = int(math.ceil(length / number_chunks)) - - chunks = [] - for counter in range(0, length, chunksize): - chunks.append(the_list[counter:counter+chunksize]) - - return chunks - -def _confirm_chunk(original, result): - all_chunked = [] - for chunk in result: - all_chunked.extend(chunk) - print("length of all chunked:", len(all_chunked)) - assert original == all_chunked, "You didn't chunk right" - - -def _chunk_test(divide_func): - import random - random.seed(7) - - number_exact = 0 - total_amount_off = 0 - - for test in range(1, 1001): - print("\n\ntest:", test) - number_chunks = random.randint(1, 20) - number_elements = random.randint(0, 100) - the_list = list(range(1, number_elements)) - result = divide_func(the_list, number_chunks) - - print("Dividing list of length {} into approximately {} chunks - got {} chunks".format( - len(the_list), number_chunks, len(result))) - print("result:", result) - - _confirm_chunk(the_list, result) - - amount_off = abs(number_chunks - len(result)) - if amount_off == 0: - number_exact += 1 - else: - total_amount_off += amount_off - - - print("\n{} exact out of {} [Total amount off: {}]".format(number_exact, - test, - total_amount_off)) - assert number_exact == 558 - assert total_amount_off == 1580 - return number_exact, total_amount_off - - -def _main(): - info = dict() - #funcs = (("sam", sam_divide_into_chunks), ("zach", zach_divide_into_chunks)) - funcs = (("only one", divide_into_chunks),) - for name, func in funcs: - start = time.time() - number_exact, total_amount_off = _chunk_test(func) - took = time.time() - start - info[name] = dict(number_exact=number_exact, - total_amount_off=total_amount_off, - took=took) - - print("info is:", info) - -if __name__ == '__main__': - _main() - print("\nConfirming doctests...") - import doctest - doctest.testmod() diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py deleted file mode 100644 index 4312fed0..00000000 --- a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py +++ /dev/null @@ -1,184 +0,0 @@ -# This is a converter for common LMM formats, so as to keep file -# reader complexity outside the main routines. - -# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. - -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -from __future__ import print_function -from optparse import OptionParser -import sys -import os -import numpy as np -# from lmm import LMM, run_other -# import input -import plink - -usage = """ -python convertlmm.py [--plink] [--prefix out_basename] [--kinship kfile] [--pheno pname] [--geno gname] - - Convert files for runlmm.py processing. Writes to stdout by default. - - try --help for more information - -Examples: - - python ./my_pylmm/pyLMM/convertlmm.py --plink --pheno data/test_snps.132k.clean.noX.fake.phenos > test.pheno - - python ./my_pylmm/pyLMM/convertlmm.py --plink --pheno data/test_snps.132k.clean.noX.fake.phenos --geno data/test_snps.132k.clean.noX > test.geno -""" - -# if len(args) == 0: -# print usage -# sys.exit(1) - -option_parser = OptionParser(usage=usage) -option_parser.add_option("--kinship", dest="kinship", - help="Parse a kinship file. This is an nxn plain text file and can be computed with the pylmmKinship program") -option_parser.add_option("--pheno", dest="pheno", - help="Parse a phenotype file (use with --plink only)") -option_parser.add_option("--geno", dest="geno", - help="Parse a genotype file (use with --plink only)") -option_parser.add_option("--plink", dest="plink", action="store_true", default=False, - help="Parse PLINK style") -# option_parser.add_option("--kinship",action="store_false", dest="kinship", default=True, -# help="Parse a kinship file. This is an nxn plain text file and can be computed with the pylmmKinship program.") -option_parser.add_option("--prefix", dest="prefix", - help="Output prefix for output file(s)") -option_parser.add_option("-q", "--quiet", - action="store_false", dest="verbose", default=True, - help="don't print status messages to stdout") -option_parser.add_option("-v", "--verbose", - action="store_true", dest="verbose", default=False, - help="Print extra info") - -(options, args) = option_parser.parse_args() - -writer = None -num_inds = None -snp_names = [] -ind_names = [] - -def msg(s): - sys.stderr.write("INFO: ") - sys.stderr.write(s) - sys.stderr.write("\n") - -def wr(s): - if writer: - writer.write(s) - else: - sys.stdout.write(s) - -def wrln(s): - wr(s) - wr("\n") - - -if options.pheno: - if not options.plink: - raise Exception("Use --plink switch") - # Because plink does not track size we need to read the whole thing first - msg("Converting pheno "+options.pheno) - phenos = [] - count = 0 - count_pheno = None - for line in open(options.pheno,'r'): - count += 1 - list = line.split() - pcount = len(list)-2 - assert(pcount > 0) - if count_pheno == None: - count_pheno = pcount - assert(count_pheno == pcount) - row = [list[0]]+list[2:] - phenos.append(row) - - writer = None - if options.prefix: - writer = open(options.prefix+".pheno","w") - wrln("# Phenotype format version 1.0") - wrln("# Individuals = "+str(count)) - wrln("# Phenotypes = "+str(count_pheno)) - for i in range(count_pheno): - wr("\t"+str(i+1)) - wr("\n") - for i in range(count): - wr("\t".join(phenos[i])) - wr("\n") - num_inds = count - msg(str(count)+" pheno lines written") - -if options.kinship: - is_header = True - count = 0 - msg("Converting kinship "+options.kinship) - writer = None - if options.prefix: - writer = open(options.prefix+".kin","w") - for line in open(options.kinship,'r'): - count += 1 - if is_header: - size = len(line.split()) - wrln("# Kinship format version 1.0") - wrln("# Size="+str(size)) - for i in range(size): - wr("\t"+str(i+1)) - wr("\n") - is_header = False - wr(str(count)) - wr("\t") - wr("\t".join(line.split())) - wr("\n") - num_inds = count - msg(str(count)+" kinship lines written") - -if options.geno: - msg("Converting geno "+options.geno+'.bed') - if not options.plink: - raise Exception("Use --plink switch") - if not num_inds: - raise Exception("Can not figure out the number of individuals, use --pheno or --kinship") - bim_snps = plink.readbim(options.geno+'.bim') - num_snps = len(bim_snps) - writer = None - if options.prefix: - writer = open(options.prefix+".geno","w") - wrln("# Genotype format version 1.0") - wrln("# Individuals = "+str(num_inds)) - wrln("# SNPs = "+str(num_snps)) - wrln("# Encoding = HAB") - for i in range(num_inds): - wr("\t"+str(i+1)) - wr("\n") - - m = [] - def out(i,x): - # wr(str(i)+"\t") - # wr("\t".join(x)) - # wr("\n") - m.append(x) - - snps = plink.readbed(options.geno+'.bed',num_inds, ('A','H','B','-'), out) - - msg("Write transposed genotype matrix") - for g in range(num_snps): - wr(bim_snps[g][1]+"\t") - for i in range(num_inds): - wr(m[g][i]) - wr("\n") - - msg(str(count)+" geno lines written (with "+str(snps)+" snps)") - -msg("Converting done") diff --git a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py deleted file mode 100644 index 49f32e3a..00000000 --- a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py +++ /dev/null @@ -1,51 +0,0 @@ -# Genotype routines - -# Copyright (C) 2013 Nicholas A. Furlotte (nick.furlotte@gmail.com) -# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. - -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import numpy as np -from collections import Counter -import operator - -def replace_missing_with_MAF(snp_g): - """ - Replace the missing genotype with the minor allele frequency (MAF) - in the snp row. It is rather slow! - """ - cnt = Counter(snp_g) - tuples = sorted(cnt.items(), key=operator.itemgetter(1)) - l2 = [t for t in tuples if not np.isnan(t[0])] - maf = l2[0][0] - res = np.array([maf if np.isnan(snp) else snp for snp in snp_g]) - return res - -def normalize(ind_g): - """ - Run for every SNP list (for one individual) and return - normalized SNP genotype values with missing data filled in - """ - g = np.copy(ind_g) # copy to avoid side effects - missing = np.isnan(g) - values = g[True - missing] - mean = values.mean() # Global mean value - stddev = np.sqrt(values.var()) # Global stddev - g[missing] = mean # Plug-in mean values for missing data - if stddev == 0: - g = g - mean # Subtract the mean - else: - g = (g - mean) / stddev # Normalize the deviation - return g - diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py deleted file mode 100644 index 821195c8..00000000 --- a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py +++ /dev/null @@ -1,110 +0,0 @@ -# Standalone specific methods and callback handler -# -# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl) -# -# Set the log level with -# -# logging.basicConfig(level=logging.DEBUG) - -from __future__ import absolute_import, print_function, division - -import numpy as np -import sys -import logging - -# logger = logging.getLogger(__name__) -logger = logging.getLogger('lmm2') -logging.basicConfig(level=logging.DEBUG) -np.set_printoptions(precision=3,suppress=True) - -progress_location = None -progress_current = None -progress_prev_perc = None - -def progress_default_func(location,count,total): - global progress_current - value = round(count*100.0/total) - progress_current = value - -progress_func = progress_default_func - -def progress_set_func(func): - global progress_func - progress_func = func - -def progress(location, count, total): - global progress_location - global progress_prev_perc - - perc = round(count*100.0/total) - if perc != progress_prev_perc and (location != progress_location or perc > 98 or perc > progress_prev_perc + 5): - progress_func(location, count, total) - logger.info("Progress: %s %d%%" % (location,perc)) - progress_location = location - progress_prev_perc = perc - -def mprint(msg,data): - """ - Array/matrix print function - """ - m = np.array(data) - if m.ndim == 1: - print(msg,m.shape,"=\n",m[0:3]," ... ",m[-3:]) - if m.ndim == 2: - print(msg,m.shape,"=\n[", - m[0][0:3]," ... ",m[0][-3:],"\n ", - m[1][0:3]," ... ",m[1][-3:],"\n ...\n ", - m[-2][0:3]," ... ",m[-2][-3:],"\n ", - m[-1][0:3]," ... ",m[-1][-3:],"]") - -def fatal(msg): - logger.critical(msg) - raise Exception(msg) - -def callbacks(): - return dict( - write = sys.stdout.write, - writeln = print, - debug = logger.debug, - info = logger.info, - warning = logger.warning, - error = logger.error, - critical = logger.critical, - fatal = fatal, - progress = progress, - mprint = mprint - ) - -def uses(*funcs): - """ - Some sugar - """ - return [callbacks()[func] for func in funcs] - -# ----- Minor test cases: - -if __name__ == '__main__': - # logging.basicConfig(level=logging.DEBUG) - logging.debug("Test %i" % (1)) - d = callbacks()['debug'] - d("TEST") - wrln = callbacks()['writeln'] - wrln("Hello %i" % 34) - progress = callbacks()['progress'] - progress("I am half way",50,100) - list = [0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, - 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, - 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, - 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, - 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15] - mprint("list",list) - matrix = [[1,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], - [2,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], - [3,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], - [4,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], - [5,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], - [6,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]] - mprint("matrix",matrix) - ix,dx = uses("info","debug") - ix("ix") - dx("dx") diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py deleted file mode 100644 index 247a8729..00000000 --- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py +++ /dev/null @@ -1,165 +0,0 @@ -# pylmm-based GWAS calculation -# -# Copyright (C) 2013 Nicholas A. Furlotte (nick.furlotte@gmail.com) -# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. - -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -#!/usr/bin/python - -import pdb -import time -import sys -import lmm2 - -import os -import numpy as np -import input -from optmatrix import matrix_initialize -from lmm2 import LMM2 - -import multiprocessing as mp # Multiprocessing is part of the Python stdlib -import Queue - -# ---- A trick to decide on the environment: -try: - from wqflask.my_pylmm.pyLMM import chunks - from gn2 import uses -except ImportError: - has_gn2=False - from standalone import uses - -progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal') - - -def formatResult(id,beta,betaSD,ts,ps): - return "\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n" - -def compute_snp(j,n,snp_ids,lmm2,REML,q = None): - result = [] - for snp_id in snp_ids: - snp,id = snp_id - x = snp.reshape((n,1)) # all the SNPs - # if refit: - # L.fit(X=snp,REML=REML) - ts,ps,beta,betaVar = lmm2.association(x,REML=REML,returnBeta=True) - # result.append(formatResult(id,beta,np.sqrt(betaVar).sum(),ts,ps)) - result.append( (ts,ps) ) - if not q: - q = compute_snp.q - q.put([j,result]) - return j - -def f_init(q): - compute_snp.q = q - -def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): - """ - GWAS. The G matrix should be n inds (cols) x m snps (rows) - """ - info("In gwas.gwas") - matrix_initialize() - cpu_num = mp.cpu_count() - numThreads = None # for now use all available threads - kfile2 = False - reml = restricted_max_likelihood - - mprint("G",G) - n = G.shape[1] # inds - inds = n - m = G.shape[0] # snps - snps = m - info("%s SNPs",snps) - assert snps>=inds, "snps should be larger than inds (snps=%d,inds=%d)" % (snps,inds) - - # CREATE LMM object for association - # if not kfile2: L = LMM(Y,K,Kva,Kve,X0,verbose=verbose) - # else: L = LMM_withK2(Y,K,Kva,Kve,X0,verbose=verbose,K2=K2) - - lmm2 = LMM2(Y,K) # ,Kva,Kve,X0,verbose=verbose) - if not refit: - info("Computing fit for null model") - lmm2.fit() # follow GN model in run_other - info("heritability=%0.3f, sigma=%0.3f" % (lmm2.optH,lmm2.optSigma)) - - res = [] - - # Set up the pool - # mp.set_start_method('spawn') - q = mp.Queue() - p = mp.Pool(numThreads, f_init, [q]) - collect = [] - - count = 0 - job = 0 - jobs_running = 0 - jobs_completed = 0 - for snp in G: - snp_id = (snp,'SNPID') - count += 1 - if count % 1000 == 0: - job += 1 - debug("Job %d At SNP %d" % (job,count)) - if numThreads == 1: - debug("Running on 1 THREAD") - compute_snp(job,n,collect,lmm2,reml,q) - collect = [] - j,lst = q.get() - debug("Job "+str(j)+" finished") - jobs_completed += 1 - progress("GWAS2",jobs_completed,snps/1000) - res.append((j,lst)) - else: - p.apply_async(compute_snp,(job,n,collect,lmm2,reml)) - jobs_running += 1 - collect = [] - while jobs_running > cpu_num: - try: - j,lst = q.get_nowait() - debug("Job "+str(j)+" finished") - jobs_completed += 1 - progress("GWAS2",jobs_completed,snps/1000) - res.append((j,lst)) - jobs_running -= 1 - except Queue.Empty: - time.sleep(0.1) - pass - if jobs_running > cpu_num*2: - time.sleep(1.0) - else: - break - - collect.append(snp_id) - - if numThreads==1 or count<1000 or len(collect)>0: - job += 1 - debug("Collect final batch size %i job %i @%i: " % (len(collect), job, count)) - compute_snp(job,n,collect,lmm2,reml,q) - collect = [] - j,lst = q.get() - res.append((j,lst)) - debug("count=%i running=%i collect=%i" % (count,jobs_running,len(collect))) - for job in range(jobs_running): - j,lst = q.get(True,15) # time out - debug("Job "+str(j)+" finished") - jobs_completed += 1 - progress("GWAS2",jobs_completed,snps/1000) - res.append((j,lst)) - - mprint("Before sort",[res1[0] for res1 in res]) - res = sorted(res,key=lambda x: x[0]) - mprint("After sort",[res1[0] for res1 in res]) - info([len(res1[1]) for res1 in res]) - ts = [item[0] for j,res1 in res for item in res1] - ps = [item[1] for j,res1 in res for item in res1] - return ts,ps diff --git a/wqflask/wqflask/my_pylmm/pyLMM/input.py b/wqflask/wqflask/my_pylmm/pyLMM/input.py deleted file mode 100644 index 7063fedf..00000000 --- a/wqflask/wqflask/my_pylmm/pyLMM/input.py +++ /dev/null @@ -1,267 +0,0 @@ -# pylmm is a python-based linear mixed-model solver with applications to GWAS - -# Copyright (C) 2013 Nicholas A. Furlotte (nick.furlotte@gmail.com) - -#The program is free for academic use. Please contact Nick Furlotte -# if you are interested in using the software for -#commercial purposes. - -#The software must not be modified and distributed without prior -#permission of the author. - -#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -#"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -#LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -#A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -#CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -#EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -#PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -#PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -#LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -#NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import os -import sys -import numpy as np -import struct -import pdb - -class plink: - def __init__(self,fbase,kFile=None,phenoFile=None,type='b',normGenotype=True,readKFile=False): - self.fbase = fbase - self.type = type - self.indivs = self.getIndivs(self.fbase,type) - self.kFile = kFile - self.phenos = None - self.normGenotype = normGenotype - self.phenoFile = phenoFile - # Originally I was using the fastLMM style that has indiv IDs embedded. - # NOW I want to use this module to just read SNPs so I'm allowing - # the programmer to turn off the kinship reading. - self.readKFile = readKFile - - if self.kFile: - self.K = self.readKinship(self.kFile) - elif os.path.isfile("%s.kin" % fbase): - self.kFile = "%s.kin" %fbase - if self.readKFile: - self.K = self.readKinship(self.kFile) - else: - self.kFile = None - self.K = None - - self.getPhenos(self.phenoFile) - - self.fhandle = None - self.snpFileHandle = None - - def __del__(self): - if self.fhandle: self.fhandle.close() - if self.snpFileHandle: self.snpFileHandle.close() - - def getSNPIterator(self): - if not self.type == 'b': - sys.stderr.write("Have only implemented this for binary plink files (bed)\n") - return - - # get the number of snps - file = self.fbase + '.bim' - i = 0 - f = open(file,'r') - for line in f: i += 1 - f.close() - self.numSNPs = i - self.have_read = 0 - self.snpFileHandle = open(file,'r') - - self.BytestoRead = self.N / 4 + (self.N % 4 and 1 or 0) - self._formatStr = 'c'*self.BytestoRead - - file = self.fbase + '.bed' - self.fhandle = open(file,'rb') - - magicNumber = self.fhandle.read(2) - order = self.fhandle.read(1) - if not order == '\x01': - sys.stderr.write("This is not in SNP major order - you did not handle this case\n") - raise StopIteration - - return self - - def __iter__(self): - return self.getSNPIterator() - - def next(self): - if self.have_read == self.numSNPs: - raise StopIteration - X = self.fhandle.read(self.BytestoRead) - XX = [bin(ord(x)) for x in struct.unpack(self._formatStr,X)] - self.have_read += 1 - return self.formatBinaryGenotypes(XX,self.normGenotype),self.snpFileHandle.readline().strip().split()[1] - - def formatBinaryGenotypes(self,X,norm=True): - D = { \ - '00': 0.0, \ - '10': 0.5, \ - '11': 1.0, \ - '01': np.nan \ - } - - D_tped = { \ - '00': '1 1', \ - '10': '1 2', \ - '11': '2 2', \ - '01': '0 0' \ - } - - #D = D_tped - - G = [] - for x in X: - if not len(x) == 10: - xx = x[2:] - x = '0b' + '0'*(8 - len(xx)) + xx - a,b,c,d = (x[8:],x[6:8],x[4:6],x[2:4]) - L = [D[y] for y in [a,b,c,d]] - G += L - # only take the leading values because whatever is left should be null - G = G[:self.N] - G = np.array(G) - if norm: - G = self.normalizeGenotype(G) - return G - - def normalizeGenotype(self,G): - # print "Before",G - # print G.shape - print "call input.normalizeGenotype" - raise "This should not be used" - x = True - np.isnan(G) - m = G[x].mean() - s = np.sqrt(G[x].var()) - G[np.isnan(G)] = m - if s == 0: G = G - m - else: G = (G - m) / s - # print "After",G - return G - - def getPhenos(self,phenoFile=None): - if not phenoFile: - self.phenoFile = phenoFile = self.fbase+".phenos" - if not os.path.isfile(phenoFile): - sys.stderr.write("Could not find phenotype file: %s\n" % (phenoFile)) - return - f = open(phenoFile,'r') - keys = [] - P = [] - for line in f: - v = line.strip().split() - keys.append((v[0],v[1])) - P.append([(x == 'NA' or x == '-9') and np.nan or float(x) for x in v[2:]]) - f.close() - P = np.array(P) - - # reorder to match self.indivs - D = {} - L = [] - for i in range(len(keys)): - D[keys[i]] = i - for i in range(len(self.indivs)): - if not D.has_key(self.indivs[i]): - continue - L.append(D[self.indivs[i]]) - P = P[L,:] - - self.phenos = P - return P - - def getIndivs(self,base,type='b'): - if type == 't': - famFile = "%s.tfam" % base - else: - famFile = "%s.fam" % base - keys = [] - i = 0 - f = open(famFile,'r') - for line in f: - v = line.strip().split() - famId = v[0] - indivId = v[1] - k = (famId.strip(),indivId.strip()) - keys.append(k) - i += 1 - f.close() - - self.N = len(keys) - sys.stderr.write("Read %d individuals from %s\n" % (self.N, famFile)) - - return keys - - def readKinship(self,kFile): - # Assume the fastLMM style - # This will read in the kinship matrix and then reorder it - # according to self.indivs - additionally throwing out individuals - # that are not in both sets - if self.indivs == None or len(self.indivs) == 0: - sys.stderr.write("Did not read any individuals so can't load kinship\n") - return - - sys.stderr.write("Reading kinship matrix from %s\n" % (kFile) ) - - f = open(kFile,'r') - # read indivs - v = f.readline().strip().split("\t")[1:] - keys = [tuple(y.split()) for y in v] - D = {} - for i in range(len(keys)): D[keys[i]] = i - - # read matrix - K = [] - for line in f: - K.append([float(x) for x in line.strip().split("\t")[1:]]) - f.close() - K = np.array(K) - - # reorder to match self.indivs - L = [] - KK = [] - X = [] - for i in range(len(self.indivs)): - if not D.has_key(self.indivs[i]): - X.append(self.indivs[i]) - else: - KK.append(self.indivs[i]) - L.append(D[self.indivs[i]]) - K = K[L,:][:,L] - self.indivs = KK - self.indivs_removed = X - if len(self.indivs_removed): - sys.stderr.write("Removed %d individuals that did not appear in Kinship\n" % (len(self.indivs_removed))) - return K - - def getCovariates(self,covFile=None): - if not os.path.isfile(covFile): - sys.stderr.write("Could not find covariate file: %s\n" % (phenoFile)) - return - f = open(covFile,'r') - keys = [] - P = [] - for line in f: - v = line.strip().split() - keys.append((v[0],v[1])) - P.append([x == 'NA' and np.nan or float(x) for x in v[2:]]) - f.close() - P = np.array(P) - - # reorder to match self.indivs - D = {} - L = [] - for i in range(len(keys)): - D[keys[i]] = i - for i in range(len(self.indivs)): - if not D.has_key(self.indivs[i]): continue - L.append(D[self.indivs[i]]) - P = P[L,:] - - return P diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py deleted file mode 100644 index 1c157fd8..00000000 --- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py +++ /dev/null @@ -1,168 +0,0 @@ -# pylmm kinship calculation -# -# Copyright (C) 2013 Nicholas A. Furlotte (nick.furlotte@gmail.com) -# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. - -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -# env PYTHONPATH=$pylmm_lib_path:./lib python $pylmm_lib_path/runlmm.py --pheno test.pheno --geno test9000.geno kinship --test - -import sys -import os -import numpy as np -from scipy import linalg -import multiprocessing as mp # Multiprocessing is part of the Python stdlib -import Queue -import time - -from optmatrix import matrix_initialize, matrixMultT - -# ---- A trick to decide on the environment: -try: - from wqflask.my_pylmm.pyLMM import chunks - from gn2 import uses, progress_set_func -except ImportError: - has_gn2=False - import standalone as handlers - from standalone import uses, progress_set_func - -progress,debug,info,mprint = uses('progress','debug','info','mprint') - -def kinship_full(G): - """ - Calculate the Kinship matrix using a full dot multiplication - """ - # mprint("kinship_full G",G) - m = G.shape[0] # snps - n = G.shape[1] # inds - info("%d SNPs",m) - assert m>n, "n should be larger than m (%d snps > %d inds)" % (m,n) - # m = np.dot(G.T,G) - m = matrixMultT(G.T) - m = m/G.shape[0] - # mprint("kinship_full K",m) - return m - -def compute_W(job,G,n,snps,compute_size): - """ - Read 1000 SNPs at a time into matrix and return the result - """ - m = compute_size - W = np.ones((n,m)) * np.nan # W matrix has dimensions individuals x SNPs (initially all NaNs) - for j in range(0,compute_size): - pos = job*m + j # real position - if pos >= snps: - W = W[:,range(0,j)] - break - snp = G[job*compute_size+j] - if snp.var() == 0: - continue - W[:,j] = snp # set row to list of SNPs - return W - -def compute_matrixMult(job,W,q = None): - """ - Compute Kinship(W)*j - - For every set of SNPs matrixMult is used to multiply matrices T(W)*W - """ - res = matrixMultT(W) - if not q: q=compute_matrixMult.q - q.put([job,res]) - return job - -def f_init(q): - compute_matrixMult.q = q - -# Calculate the kinship matrix from G (SNPs as rows!), returns K -# -def kinship(G,computeSize=1000,numThreads=None,useBLAS=False): - - matrix_initialize(useBLAS) - - mprint("G",G) - n = G.shape[1] # inds - inds = n - m = G.shape[0] # snps - snps = m - info("%i SNPs" % (m)) - assert snps>=inds, "snps should be larger than inds (%i snps, %i inds)" % (snps,inds) - - q = mp.Queue() - p = mp.Pool(numThreads, f_init, [q]) - cpu_num = mp.cpu_count() - info("CPU cores: %i" % cpu_num) - iterations = snps/computeSize+1 - - results = [] - K = np.zeros((n,n)) # The Kinship matrix has dimension individuals x individuals - - completed = 0 - for job in range(iterations): - info("Processing job %d first %d SNPs" % (job, ((job+1)*computeSize))) - W = compute_W(job,G,n,snps,computeSize) - if numThreads == 1: - # Single-core - compute_matrixMult(job,W,q) - j,x = q.get() - debug("Job "+str(j)+" finished") - progress("kinship",j,iterations) - K_j = x - K = K + K_j - else: - # Multi-core - results.append(p.apply_async(compute_matrixMult, (job,W))) - # Do we have a result? - while (len(results)-completed>cpu_num*2): - time.sleep(0.1) - try: - j,x = q.get_nowait() - debug("Job "+str(j)+" finished") - K_j = x - K = K + K_j - completed += 1 - progress("kinship",completed,iterations) - except Queue.Empty: - pass - - if numThreads == None or numThreads > 1: - for job in range(len(results)-completed): - j,x = q.get(True,15) - debug("Job "+str(j)+" finished") - K_j = x - K = K + K_j - completed += 1 - progress("kinship",completed,iterations) - - K = K / float(snps) - return K - -def kvakve(K): - """ - Obtain eigendecomposition for K and return Kva,Kve where Kva is cleaned - of small values < 1e-6 (notably smaller than zero) - """ - info("Obtaining eigendecomposition for %dx%d matrix" % (K.shape[0],K.shape[1]) ) - Kva,Kve = linalg.eigh(K) - mprint("Kva",Kva) - mprint("Kve",Kve) - - if sum(Kva < 0): - info("Cleaning %d eigen values (Kva<0)" % (sum(Kva < 0))) - Kva[Kva < 1e-6] = 1e-6 - return Kva,Kve - - - - diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py deleted file mode 100644 index 2a0c7fdc..00000000 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ /dev/null @@ -1,995 +0,0 @@ -# pylmm is a python-based linear mixed-model solver with applications to GWAS - -# Copyright (C) 2013 Nicholas A. Furlotte (nick.furlotte@gmail.com) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. - -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -from __future__ import absolute_import, print_function, division - -import sys -import time -import uuid - -import numpy as np -from scipy import linalg -from scipy import optimize -from scipy import stats -# import pdb - -# import gzip -# import zlib -import datetime -# import cPickle as pickle -from pprint import pformat as pf - -# Add local dir to PYTHONPATH -import os -cwd = os.path.dirname(__file__) -if sys.path[0] != cwd: - sys.path.insert(1,cwd) - -# pylmm imports -from kinship import kinship, kinship_full, kvakve -import genotype -import phenotype -import gwas -from benchmark import Bench - -# The following imports are for exchanging data with the webserver -import simplejson as json -from redis import Redis -Redis = Redis() -import temp_data - -has_gn2=None - -# sys.stderr.write("INFO: pylmm system path is "+":".join(sys.path)+"\n") -sys.stderr.write("INFO: pylmm file is "+__file__+"\n") - -# ---- A trick to decide on the environment: -try: - sys.stderr.write("INFO: lmm try loading module\n") - import utility.formatting # this is never used, just to check the environment - sys.stderr.write("INFO: This is a genenetwork2 environment\n") - from gn2 import uses, progress_set_func - has_gn2=True -except ImportError: - # Failed to load gn2 - has_gn2=False - import standalone as handlers - from standalone import uses, progress_set_func - sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n") - -progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal') - -#np.seterr('raise') - -#def run_human(pheno_vector, -# covariate_matrix, -# plink_input_file, -# kinship_matrix, -# refit=False, -# loading_progress=None): - -def run_human(pheno_vector, - covariate_matrix, - plink_input_file, - kinship_matrix, - refit=False): - - v = np.isnan(pheno_vector) - keep = True - v - keep = keep.reshape((len(keep),)) - - identifier = str(uuid.uuid4()) - - #print("pheno_vector: ", pf(pheno_vector)) - #print("kinship_matrix: ", pf(kinship_matrix)) - #print("kinship_matrix.shape: ", pf(kinship_matrix.shape)) - - #lmm_vars = pickle.dumps(dict( - # pheno_vector = pheno_vector, - # covariate_matrix = covariate_matrix, - # kinship_matrix = kinship_matrix - #)) - #Redis.hset(identifier, "lmm_vars", lmm_vars) - #Redis.expire(identifier, 60*60) - - if v.sum(): - pheno_vector = pheno_vector[keep] - print("pheno_vector shape is now: ", pf(pheno_vector.shape)) - covariate_matrix = covariate_matrix[keep,:] - print("kinship_matrix shape is: ", pf(kinship_matrix.shape)) - print("keep is: ", pf(keep.shape)) - kinship_matrix = kinship_matrix[keep,:][:,keep] - - print("kinship_matrix:", pf(kinship_matrix)) - - n = kinship_matrix.shape[0] - print("n is:", n) - lmm_ob = LMM(pheno_vector, - kinship_matrix, - covariate_matrix) - lmm_ob.fit() - - - # Buffers for pvalues and t-stats - p_values = [] - t_stats = [] - - #print("input_file: ", plink_input_file) - - with Bench("Opening and loading pickle file"): - with gzip.open(plink_input_file, "rb") as input_file: - data = pickle.load(input_file) - - plink_input = data['plink_input'] - - #plink_input.getSNPIterator() - with Bench("Calculating numSNPs"): - total_snps = data['numSNPs'] - - with Bench("snp iterator loop"): - count = 0 - - with Bench("Create list of inputs"): - inputs = list(plink_input) - - with Bench("Divide into chunks"): - results = chunks.divide_into_chunks(inputs, 64) - - result_store = [] - - key = "plink_inputs" - - # Todo: Delete below line when done testing - Redis.delete(key) - - timestamp = datetime.datetime.utcnow().isoformat() - - # Pickle chunks of input SNPs (from Plink interator) and compress them - #print("Starting adding loop") - for part, result in enumerate(results): - #data = pickle.dumps(result, pickle.HIGHEST_PROTOCOL) - holder = pickle.dumps(dict( - identifier = identifier, - part = part, - timestamp = timestamp, - result = result - ), pickle.HIGHEST_PROTOCOL) - - #print("Adding:", part) - Redis.rpush(key, zlib.compress(holder)) - #print("End adding loop") - #print("***** Added to {} queue *****".format(key)) - for snp, this_id in plink_input: - #with Bench("part before association"): - #if count > 1000: - # break - count += 1 - progress("human",count,total_snps) - - #with Bench("actual association"): - ps, ts = human_association(snp, - n, - keep, - lmm_ob, - pheno_vector, - covariate_matrix, - kinship_matrix, - refit) - - #with Bench("after association"): - p_values.append(ps) - t_stats.append(ts) - - return p_values, t_stats - - -#class HumanAssociation(object): -# def __init__(self): -# - -def human_association(snp, - n, - keep, - lmm_ob, - pheno_vector, - covariate_matrix, - kinship_matrix, - refit): - - x = snp[keep].reshape((n,1)) - #x[[1,50,100,200,3000],:] = np.nan - v = np.isnan(x).reshape((-1,)) - - # Check SNPs for missing values - if v.sum(): - keeps = True - v - xs = x[keeps,:] - # If no variation at this snp or all genotypes missing - if keeps.sum() <= 1 or xs.var() <= 1e-6: - return np.nan, np.nan - #p_values.append(np.nan) - #t_stats.append(np.nan) - #continue - - # Its ok to center the genotype - I used options.normalizeGenotype to - # force the removal of missing genotypes as opposed to replacing them with MAF. - - #if not options.normalizeGenotype: - # xs = (xs - xs.mean()) / np.sqrt(xs.var()) - - filtered_pheno = pheno_vector[keeps] - filtered_covariate_matrix = covariate_matrix[keeps,:] - - print("kinship_matrix shape is: ", pf(kinship_matrix.shape)) - print("keeps is: ", pf(keeps.shape)) - filtered_kinship_matrix = kinship_matrix[keeps,:][:,keeps] - filtered_lmm_ob = lmm.LMM(filtered_pheno,filtered_kinship_matrix,X0=filtered_covariate_matrix) - if refit: - filtered_lmm_ob.fit(X=xs) - else: - #try: - filtered_lmm_ob.fit() - #except: pdb.set_trace() - ts,ps,beta,betaVar = Ls.association(xs,returnBeta=True) - else: - if x.var() == 0: - return np.nan, np.nan - #p_values.append(np.nan) - #t_stats.append(np.nan) - #continue - if refit: - lmm_ob.fit(X=x) - ts, ps, beta, betaVar = lmm_ob.association(x) - return ps, ts - - -#def run(pheno_vector, -# genotype_matrix, -# restricted_max_likelihood=True, -# refit=False, -# temp_data=None): - -def run_other_old(pheno_vector, - genotype_matrix, - restricted_max_likelihood=True, - refit=False): - - """Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics - - restricted_max_likelihood -- whether to use restricted max likelihood; True or False - refit -- whether to refit the variance component for each marker - - """ - - print("Running the original LMM engine in run_other (old)") - print("REML=",restricted_max_likelihood," REFIT=",refit) - with Bench("Calculate Kinship"): - kinship_matrix,genotype_matrix = calculate_kinship_new(genotype_matrix) - - print("kinship_matrix: ", pf(kinship_matrix)) - print("kinship_matrix.shape: ", pf(kinship_matrix.shape)) - - # with Bench("Create LMM object"): - # lmm_ob = LMM(pheno_vector, kinship_matrix) - - # with Bench("LMM_ob fitting"): - # lmm_ob.fit() - - print("run_other_old genotype_matrix: ", genotype_matrix.shape) - print(genotype_matrix) - - with Bench("Doing GWAS"): - t_stats, p_values = GWAS(pheno_vector, - genotype_matrix.T, - kinship_matrix, - restricted_max_likelihood=True, - refit=False) - Bench().report() - return p_values, t_stats - -def run_other_new(n,m,pheno_vector, - geno, - restricted_max_likelihood=True, - refit=False): - - """Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics - - restricted_max_likelihood -- whether to use restricted max likelihood; True or False - refit -- whether to refit the variance component for each marker - - """ - - print("Running the new LMM2 engine in run_other_new") - print("REML=",restricted_max_likelihood," REFIT=",refit) - - # Adjust phenotypes - n,Y,keep = phenotype.remove_missing_new(n,pheno_vector) - - # if options.maf_normalization: - # G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g ) - # print "MAF replacements: \n",G - # if not options.skip_genotype_normalization: - # G = np.apply_along_axis( genotype.normalize, axis=1, arr=G) - - geno = geno[:,keep] - with Bench("Calculate Kinship"): - K,G = calculate_kinship_new(geno) - - print("kinship_matrix: ", pf(K)) - print("kinship_matrix.shape: ", pf(K.shape)) - - # with Bench("Create LMM object"): - # lmm_ob = lmm2.LMM2(Y,K) - # with Bench("LMM_ob fitting"): - # lmm_ob.fit() - - print("run_other_new genotype_matrix: ", G.shape) - print(G) - - with Bench("Doing GWAS"): - t_stats, p_values = gwas.gwas(Y, - G, - K, - restricted_max_likelihood=True, - refit=False,verbose=True) - Bench().report() - return p_values, t_stats - -# def matrixMult(A,B): -# return np.dot(A,B) - -def matrixMult(A,B): - - # If there is no fblas then we will revert to np.dot() - - try: - linalg.fblas - except AttributeError: - return np.dot(A,B) - - #print("A is:", pf(A.shape)) - #print("B is:", pf(B.shape)) - - # If the matrices are in Fortran order then the computations will be faster - # when using dgemm. Otherwise, the function will copy the matrix and that takes time. - if not A.flags['F_CONTIGUOUS']: - AA = A.T - transA = True - else: - AA = A - transA = False - - if not B.flags['F_CONTIGUOUS']: - BB = B.T - transB = True - else: - BB = B - transB = False - - return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB) - -def calculate_kinship_new(genotype_matrix): - """ - Call the new kinship calculation where genotype_matrix contains - inds (columns) by snps (rows). - """ - assert type(genotype_matrix) is np.ndarray - info("call genotype.normalize") - G = np.apply_along_axis( genotype.normalize, axis=1, arr=genotype_matrix) - mprint("G",genotype_matrix) - info("call calculate_kinship_new") - return kinship(G),G # G gets transposed, we'll turn this into an iterator (FIXME) - -def calculate_kinship_iter(geno): - """ - Call the new kinship calculation where genotype_matrix contains - inds (columns) by snps (rows). - """ - assert type(genotype_matrix) is iter - info("call genotype.normalize") - G = np.apply_along_axis( genotype.normalize, axis=0, arr=genotype_matrix) - info("call calculate_kinship_new") - return kinship(G) - -def calculate_kinship_old(genotype_matrix): - """ - genotype_matrix is an n x m matrix encoding SNP minor alleles. - - This function takes a matrix oF SNPs, imputes missing values with the maf, - normalizes the resulting vectors and returns the RRM matrix. - - """ - info("call calculate_kinship_old") - fatal("THE FUNCTION calculate_kinship_old IS OBSOLETE, use calculate_kinship_new instead - see Genotype Normalization Problem on Pjotr's blog") - n = genotype_matrix.shape[0] - m = genotype_matrix.shape[1] - info("genotype 2D matrix n (inds) is: %d" % (n)) - info("genotype 2D matrix m (snps) is: %d" % (m)) - assert m>n, "n should be larger than m (snps>inds)" - keep = [] - mprint("G (before old normalize)",genotype_matrix) - for counter in range(m): - #print("type of genotype_matrix[:,counter]:", pf(genotype_matrix[:,counter])) - #Checks if any values in column are not numbers - not_number = np.isnan(genotype_matrix[:,counter]) - - #Gets vector of values for column (no values in vector if not all values in col are numbers) - marker_values = genotype_matrix[True - not_number, counter] - #print("marker_values is:", pf(marker_values)) - - #Gets mean of values in vector - values_mean = marker_values.mean() - - genotype_matrix[not_number,counter] = values_mean - vr = genotype_matrix[:,counter].var() - if vr == 0: - continue - keep.append(counter) - genotype_matrix[:,counter] = (genotype_matrix[:,counter] - values_mean) / np.sqrt(vr) - progress('kinship_old normalize genotype',counter,m) - - genotype_matrix = genotype_matrix[:,keep] - mprint("G (after old normalize)",genotype_matrix.T) - kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m) - return kinship_matrix,genotype_matrix - # return kinship_full(genotype_matrix.T),genotype_matrix - -def GWAS(pheno_vector, - genotype_matrix, - kinship_matrix, - kinship_eigen_vals=None, - kinship_eigen_vectors=None, - covariate_matrix=None, - restricted_max_likelihood=True, - refit=False, - temp_data=None): - """ - Performs a basic GWAS scan using the LMM. This function - uses the LMM module to assess association at each SNP and - does some simple cleanup, such as removing missing individuals - per SNP and re-computing the eigen-decomp - - pheno_vector - n x 1 phenotype vector - genotype_matrix - n x m SNP matrix - kinship_matrix - n x n kinship matrix - kinship_eigen_vals, kinship_eigen_vectors = linalg.eigh(K) - or the eigen vectors and values for K - covariate_matrix - n x q covariate matrix - restricted_max_likelihood - use restricted maximum likelihood - refit - refit the variance component for each SNP - - """ - if kinship_eigen_vals is None: - kinship_eigen_vals = [] - if kinship_eigen_vectors is None: - kinship_eigen_vectors = [] - - n = genotype_matrix.shape[0] - m = genotype_matrix.shape[1] - - if covariate_matrix == None: - covariate_matrix = np.ones((n,1)) - - # Remove missing values in pheno_vector and adjust associated parameters - v = np.isnan(pheno_vector) - if v.sum(): - keep = True - v - print(pheno_vector.shape,pheno_vector) - print(keep.shape,keep) - pheno_vector = pheno_vector[keep] - #genotype_matrix = genotype_matrix[keep,:] - #covariate_matrix = covariate_matrix[keep,:] - #kinship_matrix = kinship_matrix[keep,:][:,keep] - kinship_eigen_vals = [] - kinship_eigen_vectors = [] - - lmm_ob = LMM(pheno_vector, - kinship_matrix, - kinship_eigen_vals, - kinship_eigen_vectors, - covariate_matrix) - if not refit: - lmm_ob.fit() - - p_values = [] - t_statistics = [] - - n = genotype_matrix.shape[0] - m = genotype_matrix.shape[1] - - for counter in range(m): - x = genotype_matrix[:,counter].reshape((n, 1)) - v = np.isnan(x).reshape((-1,)) - if v.sum(): - keep = True - v - xs = x[keep,:] - if xs.var() == 0: - p_values.append(0) - t_statistics.append(np.nan) - continue - - print(genotype_matrix.shape,pheno_vector.shape,keep.shape) - - pheno_vector = pheno_vector[keep] - covariate_matrix = covariate_matrix[keep,:] - kinship_matrix = kinship_matrix[keep,:][:,keep] - lmm_ob_2 = LMM(pheno_vector, - kinship_matrix, - X0=covariate_matrix) - if refit: - lmm_ob_2.fit(X=xs) - else: - lmm_ob_2.fit() - ts, ps, beta, betaVar = lmm_ob_2.association(xs, REML=restricted_max_likelihood) - else: - if x.var() == 0: - p_values.append(0) - t_statistics.append(np.nan) - continue - - if refit: - lmm_ob.fit(X=x) - ts, ps, beta, betaVar = lmm_ob.association(x, REML=restricted_max_likelihood) - - progress("gwas_old",counter,m) - - p_values.append(ps) - t_statistics.append(ts) - - return t_statistics, p_values - - -class LMM: - - """ - This is a simple version of EMMA/fastLMM. - The main purpose of this module is to take a phenotype vector (Y), a set of covariates (X) and a kinship matrix (K) - and to optimize this model by finding the maximum-likelihood estimates for the model parameters. - There are three model parameters: heritability (h), covariate coefficients (beta) and the total - phenotypic variance (sigma). - Heritability as defined here is the proportion of the total variance (sigma) that is attributed to - the kinship matrix. - - For simplicity, we assume that everything being input is a numpy array. - If this is not the case, the module may throw an error as conversion from list to numpy array - is not done consistently. - - """ - def __init__(self,Y,K,Kva=[],Kve=[],X0=None,verbose=True): - - """ - The constructor takes a phenotype vector or array of size n. - It takes a kinship matrix of size n x n. Kva and Kve can be computed as Kva,Kve = linalg.eigh(K) and cached. - If they are not provided, the constructor will calculate them. - X0 is an optional covariate matrix of size n x q, where there are q covariates. - When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect. - """ - - if X0 is None: X0 = np.ones(len(Y)).reshape(len(Y),1) - self.verbose = verbose - - #x = Y != -9 - x = True - np.isnan(Y) - #pdb.set_trace() - if not x.sum() == len(Y): - print("Removing %d missing values from Y\n" % ((True - x).sum())) - if self.verbose: sys.stderr.write("Removing %d missing values from Y\n" % ((True - x).sum())) - Y = Y[x] - print("x: ", len(x)) - print("K: ", K.shape) - #K = K[x,:][:,x] - X0 = X0[x,:] - Kva = [] - Kve = [] - self.nonmissing = x - - print("this K is:", K.shape, pf(K)) - - if len(Kva) == 0 or len(Kve) == 0: - # if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) - begin = time.time() - # Kva,Kve = linalg.eigh(K) - Kva,Kve = kvakve(K) - end = time.time() - if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) - print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve)) - - self.K = K - self.Kva = Kva - self.Kve = Kve - print("self.Kva is: ", self.Kva.shape, pf(self.Kva)) - print("self.Kve is: ", self.Kve.shape, pf(self.Kve)) - self.Y = Y - self.X0 = X0 - self.N = self.K.shape[0] - - # ----> Below moved to kinship.kvakve(K) - # if sum(self.Kva < 1e-6): - # if self.verbose: sys.stderr.write("Cleaning %d eigen values\n" % (sum(self.Kva < 0))) - # self.Kva[self.Kva < 1e-6] = 1e-6 - - self.transform() - - def transform(self): - - """ - Computes a transformation on the phenotype vector and the covariate matrix. - The transformation is obtained by left multiplying each parameter by the transpose of the - eigenvector matrix of K (the kinship). - """ - - self.Yt = matrixMult(self.Kve.T, self.Y) - self.X0t = matrixMult(self.Kve.T, self.X0) - self.X0t_stack = np.hstack([self.X0t, np.ones((self.N,1))]) - self.q = self.X0t.shape[1] - - def getMLSoln(self,h,X): - - """ - Obtains the maximum-likelihood estimates for the covariate coefficients (beta), - the total variance of the trait (sigma) and also passes intermediates that can - be utilized in other functions. The input parameter h is a value between 0 and 1 and represents - the heritability or the proportion of the total variance attributed to genetics. The X is the - covariate matrix. - """ - - S = 1.0/(h*self.Kva + (1.0 - h)) - Xt = X.T*S - XX = matrixMult(Xt,X) - XX_i = linalg.inv(XX) - beta = matrixMult(matrixMult(XX_i,Xt),self.Yt) - Yt = self.Yt - matrixMult(X,beta) - Q = np.dot(Yt.T*S,Yt) - sigma = Q * 1.0 / (float(self.N) - float(X.shape[1])) - return beta,sigma,Q,XX_i,XX - - def LL_brent(self,h,X=None,REML=False): - #brent will not be bounded by the specified bracket. - # I return a large number if we encounter h < 0 to avoid errors in LL computation during the search. - if h < 0: return 1e6 - return -self.LL(h,X,stack=False,REML=REML)[0] - - def LL(self,h,X=None,stack=True,REML=False): - - """ - Computes the log-likelihood for a given heritability (h). If X==None, then the - default X0t will be used. If X is set and stack=True, then X0t will be matrix concatenated with - the input X. If stack is false, then X is used in place of X0t in the LL calculation. - REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True. - """ - - if X is None: - X = self.X0t - elif stack: - self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] - X = self.X0t_stack - - n = float(self.N) - q = float(X.shape[1]) - beta,sigma,Q,XX_i,XX = self.getMLSoln(h,X) - LL = n*np.log(2*np.pi) + np.log(h*self.Kva + (1.0-h)).sum() + n + n*np.log(1.0/n * Q) - LL = -0.5 * LL - - if REML: - LL_REML_part = q*np.log(2.0*np.pi*sigma) + np.log(linalg.det(matrixMult(X.T,X))) - np.log(linalg.det(XX)) - LL = LL + 0.5*LL_REML_part - - return LL,beta,sigma,XX_i - - def getMax(self,H, X=None,REML=False): - - """ - Helper functions for .fit(...). - This function takes a set of LLs computed over a grid and finds possible regions - containing a maximum. Within these regions, a Brent search is performed to find the - optimum. - - """ - n = len(self.LLs) - HOpt = [] - for i in range(1,n-2): - if self.LLs[i-1] < self.LLs[i] and self.LLs[i] > self.LLs[i+1]: - HOpt.append(optimize.brent(self.LL_brent,args=(X,REML),brack=(H[i-1],H[i+1]))) - if np.isnan(HOpt[-1][0]): - HOpt[-1][0] = [self.LLs[i-1]] - - if len(HOpt) > 1: - if self.verbose: - sys.stderr.write("NOTE: Found multiple optima. Returning first...\n") - return HOpt[0] - elif len(HOpt) == 1: - return HOpt[0] - elif self.LLs[0] > self.LLs[n-1]: - return H[0] - else: - return H[n-1] - - def fit(self,X=None,ngrids=100,REML=True): - - """ - Finds the maximum-likelihood solution for the heritability (h) given the current parameters. - X can be passed and will transformed and concatenated to X0t. Otherwise, X0t is used as - the covariate matrix. - - This function calculates the LLs over a grid and then uses .getMax(...) to find the optimum. - Given this optimum, the function computes the LL and associated ML solutions. - """ - - if X == None: - X = self.X0t - else: - #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)]) - self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] - X = self.X0t_stack - - H = np.array(range(ngrids)) / float(ngrids) - L = np.array([self.LL(h,X,stack=False,REML=REML)[0] for h in H]) - self.LLs = L - - hmax = self.getMax(H,X,REML) - L,beta,sigma,betaSTDERR = self.LL(hmax,X,stack=False,REML=REML) - - self.H = H - self.optH = hmax - self.optLL = L - self.optBeta = beta - self.optSigma = sigma - - return hmax,beta,sigma,L - - def association(self,X, h = None, stack=True,REML=True, returnBeta=True): - - """ - Calculates association statitics for the SNPs encoded in the vector X of size n. - If h == None, the optimal h stored in optH is used. - - """ - if stack: - #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)]) - self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] - X = self.X0t_stack - - if h == None: - h = self.optH - - L,beta,sigma,betaVAR = self.LL(h,X,stack=False,REML=REML) - q = len(beta) - ts,ps = self.tstat(beta[q-1],betaVAR[q-1,q-1],sigma,q) - - if returnBeta: - return ts,ps,beta[q-1].sum(),betaVAR[q-1,q-1].sum()*sigma - return ts,ps - - def tstat(self,beta,var,sigma,q): - - """ - Calculates a t-statistic and associated p-value given the estimate of beta and its standard error. - This is actually an F-test, but when only one hypothesis is being performed, it reduces to a t-test. - """ - - ts = beta / np.sqrt(var * sigma) - ps = 2.0*(1.0 - stats.t.cdf(np.abs(ts), self.N-q)) - if not len(ts) == 1 or not len(ps) == 1: - print("ts=",ts) - print("ps=",ps) - raise Exception("Something bad happened :(") - return ts.sum(),ps.sum() - - def plotFit(self,color='b-',title=''): - - """ - Simple function to visualize the likelihood space. It takes the LLs - calcualted over a grid and normalizes them by subtracting off the mean and exponentiating. - The resulting "probabilities" are normalized to one and plotted against heritability. - This can be seen as an approximation to the posterior distribuiton of heritability. - - For diagnostic purposes this lets you see if there is one distinct maximum or multiple - and what the variance of the parameter looks like. - """ - import matplotlib.pyplot as pl - - mx = self.LLs.max() - p = np.exp(self.LLs - mx) - p = p/p.sum() - - pl.plot(self.H,p,color) - pl.xlabel("Heritability") - pl.ylabel("Probability of data") - pl.title(title) - -def run_gwas(species,n,m,k,y,geno,cov=None,reml=True,refit=False,inputfn=None,new_code=True): - """ - Invoke pylmm using genotype as a matrix or as a (SNP) iterator. - """ - info("run_gwas") - print('pheno', y) - - if species == "human" : - print('kinship', k ) - ps, ts = run_human(pheno_vector = y, - covariate_matrix = cov, - plink_input_file = inputfn, - kinship_matrix = k, - refit = refit) - else: - print('geno', geno.shape, geno) - - if new_code: - ps, ts = run_other_new(n,m,pheno_vector = y, - geno = geno, - restricted_max_likelihood = reml, - refit = refit) - else: - ps, ts = run_other_old(pheno_vector = y, - genotype_matrix = geno, - restricted_max_likelihood = reml, - refit = refit) - return ps,ts - -def gwas_with_redis(key,species,new_code=True): - """ - Invoke pylmm using Redis as a container. new_code runs the new - version. All the Redis code goes here! - """ - json_params = Redis.get(key) - - params = json.loads(json_params) - - tempdata = temp_data.TempData(params['temp_uuid']) - def update_tempdata(loc,i,total): - """ - This is the single method that updates Redis for percentage complete! - """ - tempdata.store("percent_complete",round(i*100.0/total)) - debug("Updating REDIS percent_complete=%d" % (round(i*100.0/total))) - progress_set_func(update_tempdata) - - def narray(t): - info("Type is "+t) - v = params.get(t) - if v is not None: - # Note input values can be array of string or float - v1 = [x if x != 'NA' else 'nan' for x in v] - v = np.array(v1).astype(np.float) - return v - - def marray(t): - info("Type is "+t) - v = params.get(t) - if v is not None: - m = [] - for r in v: - # Note input values can be array of string or float - r1 = [x if x != 'NA' else 'nan' for x in r] - m.append(np.array(r1).astype(np.float)) - return np.array(m) - return np.array(v) - - def marrayT(t): - m = marray(t) - if m is not None: - return m.T - return m - - # We are transposing before we enter run_gwas - this should happen on the webserver - # side (or when reading data from file) - k = marray('kinship_matrix') - g = marrayT('genotype_matrix') - mprint("geno",g) - y = narray('pheno_vector') - n = len(y) - m = params.get('num_genotypes') - if m is None: - m = g.shape[0] - info("m=%d,n=%d" % (m,n)) - ps,ts = run_gwas(species,n,m,k,y,g,narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params.get('input_file_name'),new_code) - - results_key = "pylmm:results:" + params['temp_uuid'] - - # fatal(results_key) - json_results = json.dumps(dict(p_values = ps, - t_stats = ts)) - - #Pushing json_results into a list where it is the only item because blpop needs a list - Redis.rpush(results_key, json_results) - Redis.expire(results_key, 60*60) - return ps, ts - -def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True): - """ - This function emulates current GN2 behaviour by pre-loading Redis (note the input - genotype is transposed to emulate GN2 (FIXME!) - """ - info("Loading Redis from parsed data") - if kinship == None: - k = None - else: - k = kinship.tolist() - params = dict(pheno_vector = pheno.tolist(), - genotype_matrix = geno.T.tolist(), - num_genotypes = geno.shape[0], - kinship_matrix = k, - covariate_matrix = None, - input_file_name = None, - restricted_max_likelihood = True, - refit = False, - temp_uuid = "testrun_temp_uuid", - - # meta data - timestamp = datetime.datetime.now().isoformat()) - - json_params = json.dumps(params) - Redis.set(key, json_params) - Redis.expire(key, 60*60) - - return gwas_with_redis(key,species,new_code) - -def gn2_load_redis_iter(key,species,kinship,pheno,geno_iterator): - """ - This function emulates GN2 behaviour by pre-loading Redis with - a SNP iterator, for this it sets a key for every genotype (SNP) - """ - print("Loading Redis using a SNP iterator") - for i,genotypes in enumerate(geno_iterator): - gkey = key+'_geno_'+str(i) - Redis.set(gkey, genotypes) - Redis.expire(gkey, 60*60) - - if kinship == None: - k = None - else: - k = kinship.tolist() - params = dict(pheno_vector = pheno.tolist(), - genotype_matrix = "iterator", - num_genotypes = i, - kinship_matrix = k, - covariate_matrix = None, - input_file_name = None, - restricted_max_likelihood = True, - refit = False, - temp_uuid = "testrun_temp_uuid", - - # meta data - timestamp = datetime.datetime.now().isoformat(), - ) - - json_params = json.dumps(params) - Redis.set(key, json_params) - Redis.expire(key, 60*60) - return gwas_with_redis(key,species) - -# This is the main function used by Genenetwork2 (with environment) -# -# Note that this calling route will become OBSOLETE (we should use runlmm.py -# instead) -def gn2_main(): - import argparse - parser = argparse.ArgumentParser(description='Run pyLMM') - parser.add_argument('-k', '--key') - parser.add_argument('-s', '--species') - - opts = parser.parse_args() - - key = opts.key - species = opts.species - - gwas_with_redis(key,species) - - -if __name__ == '__main__': - print("WARNING: Calling pylmm from lmm.py will become OBSOLETE, use runlmm.py instead!") - gn2_main() - diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py deleted file mode 100644 index d871d8d2..00000000 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py +++ /dev/null @@ -1,433 +0,0 @@ -# pylmm is a python-based linear mixed-model solver with applications to GWAS - -# Copyright (C) 2013,2014 Nicholas A. Furlotte (nick.furlotte@gmail.com) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. - -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import sys -import time -import numpy as np -from scipy.linalg import eigh, inv, det -import scipy.stats as stats # t-tests -from scipy import optimize -from optmatrix import matrixMult -import kinship - -sys.stderr.write("INFO: pylmm (lmm2) system path is "+":".join(sys.path)+"\n") -sys.stderr.write("INFO: pylmm (lmm2) file is "+__file__+"\n") - -# ---- A trick to decide on the environment: -try: - sys.stderr.write("INFO: lmm2 try loading module\n") - import utility.formatting # this is never used, just to check the environment - sys.stderr.write("INFO: This is a genenetwork2 environment (lmm2)\n") - from gn2 import uses, progress_set_func -except ImportError: - # Failed to load gn2 - has_gn2=False - import standalone as handlers - from standalone import uses, progress_set_func - sys.stderr.write("WARNING: LMM2 standalone version missing the Genenetwork2 environment\n") - -progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal') - - -def calculateKinship(W,center=False): - """ - W is an n x m matrix encoding SNP minor alleles. - - This function takes a matrix oF SNPs, imputes missing values with the maf, - normalizes the resulting vectors and returns the RRM matrix. - """ - n = W.shape[0] - m = W.shape[1] - keep = [] - for i in range(m): - mn = W[True - np.isnan(W[:,i]),i].mean() - W[np.isnan(W[:,i]),i] = mn - vr = W[:,i].var() - if vr == 0: continue - - keep.append(i) - W[:,i] = (W[:,i] - mn) / np.sqrt(vr) - - W = W[:,keep] - K = matrixMult(W,W.T) * 1.0/float(m) - if center: - P = np.diag(np.repeat(1,n)) - 1/float(n) * np.ones((n,n)) - S = np.trace(matrixMult(matrixMult(P,K),P)) - K_n = (n - 1)*K / S - return K_n - return K - -def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False): - """ - - Performs a basic GWAS scan using the LMM. This function - uses the LMM module to assess association at each SNP and - does some simple cleanup, such as removing missing individuals - per SNP and re-computing the eigen-decomp - - Y - n x 1 phenotype vector - X - n x m SNP matrix (genotype matrix) - K - n x n kinship matrix - Kva,Kve = linalg.eigh(K) - or the eigen vectors and values for K - X0 - n x q covariate matrix - REML - use restricted maximum likelihood - refit - refit the variance component for each SNP - - """ - n = X.shape[0] - m = X.shape[1] - prins("Initialize GWAS") - print("genotype matrix n is:", n) - print("genotype matrix m is:", m) - - if X0 is None: - X0 = np.ones((n,1)) - - # Remove missing values in Y and adjust associated parameters - v = np.isnan(Y) - if v.sum(): - keep = True - v - keep = keep.reshape((-1,)) - Y = Y[keep] - X = X[keep,:] - X0 = X0[keep,:] - K = K[keep,:][:,keep] - Kva = [] - Kve = [] - - if len(Y) == 0: - return np.ones(m)*np.nan,np.ones(m)*np.nan - - L = LMM(Y,K,Kva,Kve,X0) - if not refit: L.fit() - - PS = [] - TS = [] - - n = X.shape[0] - m = X.shape[1] - - for i in range(m): - x = X[:,i].reshape((n,1)) - v = np.isnan(x).reshape((-1,)) - if v.sum(): - keep = True - v - xs = x[keep,:] - if xs.var() == 0: - PS.append(np.nan) - TS.append(np.nan) - continue - - Ys = Y[keep] - X0s = X0[keep,:] - Ks = K[keep,:][:,keep] - Ls = LMM(Ys,Ks,X0=X0s) - if refit: - Ls.fit(X=xs) - else: - Ls.fit() - ts,ps = Ls.association(xs,REML=REML) - else: - if x.var() == 0: - PS.append(np.nan) - TS.append(np.nan) - continue - - if refit: - L.fit(X=x) - ts,ps = L.association(x,REML=REML) - - PS.append(ps) - TS.append(ts) - - return TS,PS - -class LMM2: - - """This is a simple version of EMMA/fastLMM. - - The main purpose of this module is to take a phenotype vector (Y), - a set of covariates (X) and a kinship matrix (K) and to optimize - this model by finding the maximum-likelihood estimates for the - model parameters. There are three model parameters: heritability - (h), covariate coefficients (beta) and the total phenotypic - variance (sigma). Heritability as defined here is the proportion - of the total variance (sigma) that is attributed to the kinship - matrix. - - For simplicity, we assume that everything being input is a numpy - array. If this is not the case, the module may throw an error as - conversion from list to numpy array is not done consistently. - - """ - def __init__(self,Y,K,Kva=[],Kve=[],X0=None,verbose=False): - - """The constructor takes a phenotype vector or array Y of size n. It - takes a kinship matrix K of size n x n. Kva and Kve can be - computed as Kva,Kve = linalg.eigh(K) and cached. If they are - not provided, the constructor will calculate them. X0 is an - optional covariate matrix of size n x q, where there are q - covariates. When this parameter is not provided, the - constructor will set X0 to an n x 1 matrix of all ones to - represent a mean effect. - """ - - if X0 is None: - X0 = np.ones(len(Y)).reshape(len(Y),1) - self.verbose = verbose - - x = True - np.isnan(Y) - x = x.reshape(-1,) - if not x.sum() == len(Y): - if self.verbose: sys.stderr.write("Removing %d missing values from Y\n" % ((True - x).sum())) - Y = Y[x] - K = K[x,:][:,x] - X0 = X0[x,:] - Kva = [] - Kve = [] - self.nonmissing = x - - print("this K is:", K.shape, K) - - if len(Kva) == 0 or len(Kve) == 0: - # if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) - begin = time.time() - # Kva,Kve = linalg.eigh(K) - Kva,Kve = kinship.kvakve(K) - end = time.time() - if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) - print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve)) - - self.K = K - self.Kva = Kva - self.Kve = Kve - self.N = self.K.shape[0] - self.Y = Y.reshape((self.N,1)) - self.X0 = X0 - - if sum(self.Kva < 1e-6): - if self.verbose: sys.stderr.write("Cleaning %d eigen values\n" % (sum(self.Kva < 0))) - self.Kva[self.Kva < 1e-6] = 1e-6 - - self.transform() - - def transform(self): - - """ - Computes a transformation on the phenotype vector and the covariate matrix. - The transformation is obtained by left multiplying each parameter by the transpose of the - eigenvector matrix of K (the kinship). - """ - - self.Yt = matrixMult(self.Kve.T, self.Y) - self.X0t = matrixMult(self.Kve.T, self.X0) - self.X0t_stack = np.hstack([self.X0t, np.ones((self.N,1))]) - self.q = self.X0t.shape[1] - - def getMLSoln(self,h,X): - - """ - Obtains the maximum-likelihood estimates for the covariate coefficients (beta), - the total variance of the trait (sigma) and also passes intermediates that can - be utilized in other functions. The input parameter h is a value between 0 and 1 and represents - the heritability or the proportion of the total variance attributed to genetics. The X is the - covariate matrix. - """ - - S = 1.0/(h*self.Kva + (1.0 - h)) - Xt = X.T*S - XX = matrixMult(Xt,X) - XX_i = inv(XX) - beta = matrixMult(matrixMult(XX_i,Xt),self.Yt) - Yt = self.Yt - matrixMult(X,beta) - Q = np.dot(Yt.T*S,Yt) - sigma = Q * 1.0 / (float(self.N) - float(X.shape[1])) - return beta,sigma,Q,XX_i,XX - - def LL_brent(self,h,X=None,REML=False): - #brent will not be bounded by the specified bracket. - # I return a large number if we encounter h < 0 to avoid errors in LL computation during the search. - if h < 0: return 1e6 - return -self.LL(h,X,stack=False,REML=REML)[0] - - def LL(self,h,X=None,stack=True,REML=False): - - """ - Computes the log-likelihood for a given heritability (h). If X==None, then the - default X0t will be used. If X is set and stack=True, then X0t will be matrix concatenated with - the input X. If stack is false, then X is used in place of X0t in the LL calculation. - REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True. - """ - - if X is None: X = self.X0t - elif stack: - self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] - X = self.X0t_stack - - n = float(self.N) - q = float(X.shape[1]) - beta,sigma,Q,XX_i,XX = self.getMLSoln(h,X) - LL = n*np.log(2*np.pi) + np.log(h*self.Kva + (1.0-h)).sum() + n + n*np.log(1.0/n * Q) - LL = -0.5 * LL - - if REML: - LL_REML_part = q*np.log(2.0*np.pi*sigma) + np.log(det(matrixMult(X.T,X))) - np.log(det(XX)) - LL = LL + 0.5*LL_REML_part - - - LL = LL.sum() - return LL,beta,sigma,XX_i - - def getMax(self,H, X=None,REML=False): - - """ - Helper functions for .fit(...). - This function takes a set of LLs computed over a grid and finds possible regions - containing a maximum. Within these regions, a Brent search is performed to find the - optimum. - - """ - n = len(self.LLs) - HOpt = [] - for i in range(1,n-2): - if self.LLs[i-1] < self.LLs[i] and self.LLs[i] > self.LLs[i+1]: - HOpt.append(optimize.brent(self.LL_brent,args=(X,REML),brack=(H[i-1],H[i+1]))) - if np.isnan(HOpt[-1]): HOpt[-1] = H[i-1] - #if np.isnan(HOpt[-1]): HOpt[-1] = self.LLs[i-1] - #if np.isnan(HOpt[-1][0]): HOpt[-1][0] = [self.LLs[i-1]] - - if len(HOpt) > 1: - if self.verbose: sys.stderr.write("NOTE: Found multiple optima. Returning first...\n") - return HOpt[0] - elif len(HOpt) == 1: return HOpt[0] - elif self.LLs[0] > self.LLs[n-1]: return H[0] - else: return H[n-1] - - - def fit(self,X=None,ngrids=100,REML=True): - - """ - Finds the maximum-likelihood solution for the heritability (h) given the current parameters. - X can be passed and will transformed and concatenated to X0t. Otherwise, X0t is used as - the covariate matrix. - - This function calculates the LLs over a grid and then uses .getMax(...) to find the optimum. - Given this optimum, the function computes the LL and associated ML solutions. - """ - - if X is None: X = self.X0t - else: - #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)]) - self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] - X = self.X0t_stack - - H = np.array(range(ngrids)) / float(ngrids) - L = np.array([self.LL(h,X,stack=False,REML=REML)[0] for h in H]) - self.LLs = L - - hmax = self.getMax(H,X,REML) - L,beta,sigma,betaSTDERR = self.LL(hmax,X,stack=False,REML=REML) - - self.H = H - self.optH = hmax.sum() - self.optLL = L - self.optBeta = beta - self.optSigma = sigma.sum() - - return hmax,beta,sigma,L - - def association(self,X,h=None,stack=True,REML=True,returnBeta=False): - """ - Calculates association statitics for the SNPs encoded in the vector X of size n. - If h is None, the optimal h stored in optH is used. - - """ - if False: - print "X=",X - print "h=",h - print "q=",self.q - print "self.Kve=",self.Kve - print "X0t_stack=",self.X0t_stack.shape,self.X0t_stack - - if stack: - # X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)]) - m = matrixMult(self.Kve.T,X) - # print "m=",m - m = m[:,0] - self.X0t_stack[:,(self.q)] = m - X = self.X0t_stack - - if h is None: h = self.optH - - L,beta,sigma,betaVAR = self.LL(h,X,stack=False,REML=REML) - q = len(beta) - ts,ps = self.tstat(beta[q-1],betaVAR[q-1,q-1],sigma,q) - - if returnBeta: return ts,ps,beta[q-1].sum(),betaVAR[q-1,q-1].sum()*sigma - return ts,ps - - def tstat(self,beta,var,sigma,q,log=False): - - """ - Calculates a t-statistic and associated p-value given the estimate of beta and its standard error. - This is actually an F-test, but when only one hypothesis is being performed, it reduces to a t-test. - """ - - ts = beta / np.sqrt(var * sigma) - #ps = 2.0*(1.0 - stats.t.cdf(np.abs(ts), self.N-q)) - # sf == survival function - this is more accurate -- could also use logsf if the precision is not good enough - if log: - ps = 2.0 + (stats.t.logsf(np.abs(ts), self.N-q)) - else: - ps = 2.0*(stats.t.sf(np.abs(ts), self.N-q)) - if not len(ts) == 1 or not len(ps) == 1: - raise Exception("Something bad happened :(") - return ts.sum(),ps.sum() - - def plotFit(self,color='b-',title=''): - - """ - Simple function to visualize the likelihood space. It takes the LLs - calcualted over a grid and normalizes them by subtracting off the mean and exponentiating. - The resulting "probabilities" are normalized to one and plotted against heritability. - This can be seen as an approximation to the posterior distribuiton of heritability. - - For diagnostic purposes this lets you see if there is one distinct maximum or multiple - and what the variance of the parameter looks like. - """ - import matplotlib.pyplot as pl - - mx = self.LLs.max() - p = np.exp(self.LLs - mx) - p = p/p.sum() - - pl.plot(self.H,p,color) - pl.xlabel("Heritability") - pl.ylabel("Probability of data") - pl.title(title) - - def meanAndVar(self): - - mx = self.LLs.max() - p = np.exp(self.LLs - mx) - p = p/p.sum() - - mn = (self.H * p).sum() - vx = ((self.H - mn)**2 * p).sum() - - return mn,vx - diff --git a/wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py b/wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py deleted file mode 100644 index 5c71db6a..00000000 --- a/wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py +++ /dev/null @@ -1,55 +0,0 @@ -import sys -import time -import numpy as np -from numpy.distutils.system_info import get_info; -from scipy import linalg -from scipy import optimize -from scipy import stats - -useNumpy = None -hasBLAS = None - -def matrix_initialize(useBLAS=True): - global useNumpy # module based variable - if useBLAS and useNumpy == None: - print get_info('blas_opt') - try: - linalg.fblas - sys.stderr.write("INFO: using linalg.fblas\n") - useNumpy = False - hasBLAS = True - except AttributeError: - sys.stderr.write("WARNING: linalg.fblas not found, using numpy.dot instead!\n") - useNumpy = True - else: - sys.stderr.write("INFO: using numpy.dot\n") - useNumpy=True - -def matrixMult(A,B): - global useNumpy # module based variable - - if useNumpy: - return np.dot(A,B) - - # If the matrices are in Fortran order then the computations will be faster - # when using dgemm. Otherwise, the function will copy the matrix and that takes time. - if not A.flags['F_CONTIGUOUS']: - AA = A.T - transA = True - else: - AA = A - transA = False - - if not B.flags['F_CONTIGUOUS']: - BB = B.T - transB = True - else: - BB = B - transB = False - - return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB) - -def matrixMultT(M): - # res = np.dot(W,W.T) - # return linalg.fblas.dgemm(alpha=1.,a=M.T,b=M.T,trans_a=True,trans_b=False) - return matrixMult(M,M.T) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py deleted file mode 100644 index 7b652515..00000000 --- a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py +++ /dev/null @@ -1,65 +0,0 @@ -# Phenotype routines - -# Copyright (C) 2013 Nicholas A. Furlotte (nick.furlotte@gmail.com) -# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. - -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import sys -import numpy as np - -# ---- A trick to decide on the environment: -try: - from wqflask.my_pylmm.pyLMM import chunks - from gn2 import uses, progress_set_func -except ImportError: - has_gn2=False - import standalone as handlers - from standalone import uses, progress_set_func - -progress,debug,info,mprint = uses('progress','debug','info','mprint') - -def remove_missing(n,y,g): - """ - Remove missing data from matrices, make sure the genotype data has - individuals as rows - """ - assert(y is not None) - assert y.shape[0] == g.shape[0],"y (n) %d, g (n,m) %s" % (y.shape[0],g.shape) - - y1 = y - g1 = g - v = np.isnan(y) - keep = True - v - if v.sum(): - info("runlmm.py: Cleaning the phenotype vector and genotype matrix by removing %d individuals...\n" % (v.sum())) - y1 = y[keep] - g1 = g[keep,:] - n = y1.shape[0] - return n,y1,g1,keep - -def remove_missing_new(n,y): - """ - Remove missing data. Returns new n,y,keep - """ - assert(y is not None) - y1 = y - v = np.isnan(y) - keep = True - v - if v.sum(): - info("runlmm.py: Cleaning the phenotype vector by removing %d individuals" % (v.sum())) - y1 = y[keep] - n = y1.shape[0] - return n,y1,keep - diff --git a/wqflask/wqflask/my_pylmm/pyLMM/plink.py b/wqflask/wqflask/my_pylmm/pyLMM/plink.py deleted file mode 100644 index 7bd2df91..00000000 --- a/wqflask/wqflask/my_pylmm/pyLMM/plink.py +++ /dev/null @@ -1,107 +0,0 @@ -# Plink module -# -# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl) -# Some of the BED file parsing came from pylmm: -# Copyright (C) 2013 Nicholas A. Furlotte (nick.furlotte@gmail.com) - -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. - -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -# According to the PLINK information - -# Parse a textual BIM file and return the contents as a list of tuples -# -# Extended variant information file accompanying a .bed binary genotype table. -# -# A text file with no header line, and one line per variant with the following six fields: -# -# Chromosome code (either an integer, or 'X'/'Y'/'XY'/'MT'; '0' indicates unknown) or name -# Variant identifier -# Position in morgans or centimorgans (safe to use dummy value of '0') -# Base-pair coordinate (normally 1-based, but 0 ok; limited to 231-2) -# Allele 1 (corresponding to clear bits in .bed; usually minor) -# Allele 2 (corresponding to set bits in .bed; usually major) -# -# Allele codes can contain more than one character. Variants with negative bp coordinates are ignored by PLINK. Example -# -# 1 mm37-1-3125499 0 3125499 1 2 -# 1 mm37-1-3125701 0 3125701 1 2 -# 1 mm37-1-3187481 0 3187481 1 2 - -import struct -# import numpy as np - -def readbim(fn): - res = [] - for line in open(fn): - list = line.split() - if len([True for e in list if e == 'nan']) == 0: - res.append( (list[0],list[1],int(list[2]),int(list[3]),int(list[4]),int(list[5])) ) - else: - res.append( (list[0],list[1],list[2],float('nan'),float('nan'),float('nan')) ) - return res - -# .bed (PLINK binary biallelic genotype table) -# -# Primary representation of genotype calls at biallelic variants. Must -# be accompanied by .bim and .fam files. Basically contains num SNP -# blocks containing IND (compressed 4 IND into a byte) -# -# Since it is a biallelic format it supports for every individual -# whether the first allele is homozygous (b00), the second allele is -# homozygous (b11), it is heterozygous (b10) or that it is missing -# (b01). - -# http://pngu.mgh.harvard.edu/~purcell/plink2/formats.html#bed -# http://pngu.mgh.harvard.edu/~purcell/plink2/formats.html#fam -# http://pngu.mgh.harvard.edu/~purcell/plink2/formats.html#bim - -def readbed(fn,inds,encoding,func=None): - - # For every SNP block fetch the individual genotypes using values - # 0.0 and 1.0 for homozygous and 0.5 for heterozygous alleles - def fetchGenotypes(X): - # D = { \ - # '00': 0.0, \ - # '10': 0.5, \ - # '11': 1.0, \ - # '01': float('nan') \ - # } - - Didx = { '00': 0, '10': 1, '11': 2, '01': 3 } - G = [] - for x in X: - if not len(x) == 10: - xx = x[2:] - x = '0b' + '0'*(8 - len(xx)) + xx - a,b,c,d = (x[8:],x[6:8],x[4:6],x[2:4]) - L = [encoding[Didx[y]] for y in [a,b,c,d]] - G += L - G = G[:inds] - # G = np.array(G) - return G - - bytes = inds / 4 + (inds % 4 and 1 or 0) - format = 'c'*bytes - count = 0 - with open(fn,'rb') as f: - magic = f.read(3) - assert( ":".join("{:02x}".format(ord(c)) for c in magic) == "6c:1b:01") - while True: - count += 1 - X = f.read(bytes) - if not X: - return(count-1) - XX = [bin(ord(x)) for x in struct.unpack(format,X)] - xs = fetchGenotypes(XX) - func(count,xs) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py deleted file mode 100644 index 6b241cd6..00000000 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ /dev/null @@ -1,229 +0,0 @@ -# This is the LMM runner that calls the possible methods using command line -# switches. It acts as a multiplexer where all the invocation complexity -# is kept outside the main LMM routines. -# -# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. - -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -from optparse import OptionParser -import sys -import tsvreader -import numpy as np - -# Add local dir to PYTHONPATH -import os -cwd = os.path.dirname(__file__) -if sys.path[0] != cwd: - sys.path.insert(1,cwd) - -# pylmm modules -from lmm import gn2_load_redis, gn2_load_redis_iter, calculate_kinship_new, run_gwas -from kinship import kinship, kinship_full -import genotype -import phenotype -from standalone import uses - -progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal') - -usage = """ -python runlmm.py [options] command - - runlmm.py processing multiplexer reads standardised input formats - and calls the different routines (writes to stdout) - - Current commands are: - - parse : only parse input files - redis : use Redis to call into GN2 - kinship : calculate (new) kinship matrix - - try --help for more information -""" - - -parser = OptionParser(usage=usage) -# parser.add_option("-f", "--file", dest="input file", -# help="In", metavar="FILE") -parser.add_option("--kinship",dest="kinship", - help="Kinship file format 1.0") -parser.add_option("--pheno",dest="pheno", - help="Phenotype file format 1.0") -parser.add_option("--geno",dest="geno", - help="Genotype file format 1.0") -parser.add_option("--maf-normalization", - action="store_true", dest="maf_normalization", default=False, - help="Apply MAF genotype normalization") -parser.add_option("--genotype-normalization", - action="store_true", dest="genotype_normalization", default=False, - help="Force genotype normalization") -parser.add_option("--remove-missing-phenotypes", - action="store_true", dest="remove_missing_phenotypes", default=False, - help="Remove missing phenotypes") -parser.add_option("-q", "--quiet", - action="store_false", dest="verbose", default=True, - help="don't print status messages to stdout") -parser.add_option("--blas", action="store_true", default=False, dest="useBLAS", help="Use BLAS instead of numpy matrix multiplication") -parser.add_option("-t", "--threads", - type="int", dest="numThreads", - help="Threads to use") -parser.add_option("--saveKvaKve", - action="store_true", dest="saveKvaKve", default=False, - help="Testing mode") -parser.add_option("--test", - action="store_true", dest="testing", default=False, - help="Testing mode") -parser.add_option("--test-kinship", - action="store_true", dest="test_kinship", default=False, - help="Testing mode for Kinship calculation") - -(options, args) = parser.parse_args() - -if len(args) != 1: - print usage - sys.exit(1) - -cmd = args[0] -print "Command: ",cmd - -k = None -y = None -g = None - -if options.kinship: - k = tsvreader.kinship(options.kinship) - print k.shape - -if options.pheno: - y = tsvreader.pheno(options.pheno) - print y.shape - -if options.geno and cmd != 'iterator': - g = tsvreader.geno(options.geno) - print g.shape - -def check_results(ps,ts): - print np.array(ps) - print len(ps),sum(ps) - p1 = round(ps[0],4) - p2 = round(ps[-1],4) - if options.geno == 'data/small.geno': - info("Validating results for "+options.geno) - assert p1==0.7387, "p1=%f" % p1 - assert p2==0.7387, "p2=%f" % p2 - if options.geno == 'data/small_na.geno': - info("Validating results for "+options.geno) - assert p1==0.062, "p1=%f" % p1 - assert p2==0.062, "p2=%f" % p2 - if options.geno == 'data/test8000.geno': - info("Validating results for "+options.geno) - assert round(sum(ps)) == 4070 - assert len(ps) == 8000 - info("Run completed") - -if y is not None: - n = y.shape[0] - -if cmd == 'run': - if options.remove_missing_phenotypes: - raise Exception('Can not use --remove-missing-phenotypes with LMM2') - n = len(y) - m = g.shape[1] - ps, ts = run_gwas('other',n,m,k,y,g) # <--- pass in geno by SNP - check_results(ps,ts) -elif cmd == 'iterator': - if options.remove_missing_phenotypes: - raise Exception('Can not use --remove-missing-phenotypes with LMM2') - geno_iterator = tsvreader.geno_iter(options.geno) - ps, ts = gn2_load_redis_iter('testrun_iter','other',k,y,geno_iterator) - check_results(ps,ts) -elif cmd == 'redis_new': - # The main difference between redis_new and redis is that missing - # phenotypes are handled by the first - if options.remove_missing_phenotypes: - raise Exception('Can not use --remove-missing-phenotypes with LMM2') - Y = y - G = g - print "Original G",G.shape, "\n", G - # gt = G.T - # G = None - ps, ts = gn2_load_redis('testrun','other',k,Y,G,new_code=True) - check_results(ps,ts) -elif cmd == 'redis': - # Emulating the redis setup of GN2 - G = g - print "Original G",G.shape, "\n", G - if y is not None and options.remove_missing_phenotypes: - gnt = np.array(g).T - n,Y,g,keep = phenotype.remove_missing(n,y,gnt) - G = g.T - print "Removed missing phenotypes",G.shape, "\n", G - else: - Y = y - if options.maf_normalization: - G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g ) - print "MAF replacements: \n",G - if options.genotype_normalization: - G = np.apply_along_axis( genotype.normalize, axis=1, arr=G) - g = None - gnt = None - - # gt = G.T - # G = None - ps, ts = gn2_load_redis('testrun','other',k,Y,G, new_code=False) - check_results(ps,ts) -elif cmd == 'kinship': - G = g - print "Original G",G.shape, "\n", G - if y != None and options.remove_missing_phenotypes: - gnt = np.array(g).T - n,Y,g,keep = phenotype.remove_missing(n,y,g.T) - G = g.T - print "Removed missing phenotypes",G.shape, "\n", G - if options.maf_normalization: - G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g ) - print "MAF replacements: \n",G - if options.genotype_normalization: - G = np.apply_along_axis( genotype.normalize, axis=1, arr=G) - g = None - gnt = None - - if options.test_kinship: - K = kinship_full(np.copy(G)) - print "Genotype",G.shape, "\n", G - print "first Kinship method",K.shape,"\n",K - k1 = round(K[0][0],4) - K2,G = calculate_kinship_new(np.copy(G)) - print "Genotype",G.shape, "\n", G - print "GN2 Kinship method",K2.shape,"\n",K2 - k2 = round(K2[0][0],4) - - print "Genotype",G.shape, "\n", G - K3 = kinship(G) - print "third Kinship method",K3.shape,"\n",K3 - sys.stderr.write(options.geno+"\n") - k3 = round(K3[0][0],4) - if options.geno == 'data/small.geno': - assert k1==0.8333, "k1=%f" % k1 - assert k2==0.9375, "k2=%f" % k2 - assert k3==0.9375, "k3=%f" % k3 - if options.geno == 'data/small_na.geno': - assert k1==0.8333, "k1=%f" % k1 - assert k2==0.7172, "k2=%f" % k2 - assert k3==0.7172, "k3=%f" % k3 - if options.geno == 'data/test8000.geno': - assert k3==1.4352, "k3=%f" % k3 - -else: - fatal("Doing nothing") diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py deleted file mode 100644 index 40b2021d..00000000 --- a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py +++ /dev/null @@ -1,110 +0,0 @@ -# Standalone specific methods and callback handler -# -# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl) -# -# Set the log level with -# -# logging.basicConfig(level=logging.DEBUG) - -from __future__ import absolute_import, print_function, division - -import numpy as np -import sys -import logging - -# logger = logging.getLogger(__name__) -logger = logging.getLogger('lmm2') -logging.basicConfig(level=logging.DEBUG) -np.set_printoptions(precision=3,suppress=True) - -progress_location = None -progress_current = None -progress_prev_perc = None - -def progress_default_func(location,count,total): - global progress_current - value = round(count*100.0/total) - progress_current = value - -progress_func = progress_default_func - -def progress_set_func(func): - global progress_func - progress_func = func - -def progress(location, count, total): - global progress_location - global progress_prev_perc - - perc = round(count*100.0/total) - if perc != progress_prev_perc and (location != progress_location or perc > 98 or perc > progress_prev_perc + 5): - progress_func(location, count, total) - logger.info("Progress: %s %d%%" % (location,perc)) - progress_location = location - progress_prev_perc = perc - -def mprint(msg,data): - """ - Array/matrix print function - """ - m = np.array(data) - if m.ndim == 1: - print(msg,m.shape,"=\n",m[0:3]," ... ",m[-3:]) - if m.ndim == 2: - print(msg,m.shape,"=\n[", - m[0][0:3]," ... ",m[0][-3:],"\n ", - m[1][0:3]," ... ",m[1][-3:],"\n ...\n ", - m[-2][0:3]," ... ",m[-2][-3:],"\n ", - m[-1][0:3]," ... ",m[-1][-3:],"]") - -def fatal(msg): - logger.critical(msg) - raise Exception(msg) - -def callbacks(): - return dict( - write = sys.stdout.write, - writeln = print, - debug = logger.debug, - info = logger.info, - warning = logger.warning, - error = logger.error, - critical = logger.critical, - fatal = fatal, - progress = progress, - mprint = mprint - ) - -def uses(*funcs): - """ - Some sugar - """ - return [callbacks()[func] for func in funcs] - -# ----- Minor test cases: - -if __name__ == '__main__': - # logging.basicConfig(level=logging.DEBUG) - logging.debug("Test %i" % (1)) - d = callbacks()['debug'] - d("TEST") - wrln = callbacks()['writeln'] - wrln("Hello %i" % 34) - progress = callbacks()['progress'] - progress("I am half way",50,100) - list = [0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, - 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, - 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, - 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, - 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15] - mprint("list",list) - matrix = [[1,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], - [2,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], - [3,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], - [4,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], - [5,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], - [6,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]] - mprint("matrix",matrix) - ix,dx = uses("info","debug") - ix("ix") - dx("dx") diff --git a/wqflask/wqflask/my_pylmm/pyLMM/temp_data.py b/wqflask/wqflask/my_pylmm/pyLMM/temp_data.py deleted file mode 100644 index 004d45c6..00000000 --- a/wqflask/wqflask/my_pylmm/pyLMM/temp_data.py +++ /dev/null @@ -1,25 +0,0 @@ -from __future__ import print_function, division, absolute_import -from redis import Redis - -import simplejson as json - -class TempData(object): - - def __init__(self, temp_uuid): - self.temp_uuid = temp_uuid - self.redis = Redis() - self.key = "tempdata:{}".format(self.temp_uuid) - - def store(self, field, value): - self.redis.hset(self.key, field, value) - self.redis.expire(self.key, 60*15) # Expire in 15 minutes - - def get_all(self): - return self.redis.hgetall(self.key) - - -if __name__ == "__main__": - redis = Redis() - for key in redis.keys(): - for field in redis.hkeys(key): - print("{}.{}={}".format(key, field, redis.hget(key, field))) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py deleted file mode 100644 index 66b34ee2..00000000 --- a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py +++ /dev/null @@ -1,122 +0,0 @@ -# Standard file readers -# -# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as -# published by the Free Software Foundation, either version 3 of the -# License, or (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. - -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import sys -import os -import numpy as np -import csv - -def kinship(fn): - K1 = [] - print fn - with open(fn,'r') as tsvin: - assert(tsvin.readline().strip() == "# Kinship format version 1.0") - tsvin.readline() - tsvin.readline() - tsv = csv.reader(tsvin, delimiter='\t') - for row in tsv: - ns = np.genfromtxt(row[1:]) - K1.append(ns) # <--- slow - K = np.array(K1) - return K - -def pheno(fn): - Y1 = [] - print fn - with open(fn,'r') as tsvin: - assert(tsvin.readline().strip() == "# Phenotype format version 1.0") - tsvin.readline() - tsvin.readline() - tsvin.readline() - tsv = csv.reader(tsvin, delimiter='\t') - for row in tsv: - ns = np.genfromtxt(row[1:]) - Y1.append(ns) # <--- slow - Y = np.array(Y1) - return Y - -def geno(fn): - G1 = [] - hab_mapper = {'A':0,'H':1,'B':2,'-':3} - pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ] - - print fn - with open(fn,'r') as tsvin: - line = tsvin.readline().strip() - assert line == "# Genotype format version 1.0", line - tsvin.readline() - tsvin.readline() - tsvin.readline() - tsvin.readline() - tsv = csv.reader(tsvin, delimiter='\t') - for row in tsv: - # print(row) - id = row[0] - gs = list(row[1]) - # print id,gs - gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs] - # print id,gs2 - # ns = np.genfromtxt(row[1:]) - G1.append(gs2) # <--- slow - G = np.array(G1) - return G - -def geno(fn): - G1 = [] - for id,values in geno_iter(fn): - G1.append(values) # <--- slow - G = np.array(G1) - return G - -def geno_callback(fn,func): - hab_mapper = {'A':0,'H':1,'B':2,'-':3} - pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ] - - print fn - with open(fn,'r') as tsvin: - assert(tsvin.readline().strip() == "# Genotype format version 1.0") - tsvin.readline() - tsvin.readline() - tsvin.readline() - tsvin.readline() - tsv = csv.reader(tsvin, delimiter='\t') - for row in tsv: - id = row[0] - gs = list(row[1]) - gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs] - func(id,gs2) - -def geno_iter(fn): - """ - Yield a tuple of snpid and values - """ - hab_mapper = {'A':0,'H':1,'B':2,'-':3} - pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ] - - print fn - with open(fn,'r') as tsvin: - assert(tsvin.readline().strip() == "# Genotype format version 1.0") - tsvin.readline() - tsvin.readline() - tsvin.readline() - tsvin.readline() - tsv = csv.reader(tsvin, delimiter='\t') - for row in tsv: - id = row[0] - gs = list(row[1]) - gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs] - yield (id,gs2) -- cgit v1.2.3 From eef63adae30c1547f4c4189eb59a18d190c3aa08 Mon Sep 17 00:00:00 2001 From: pjotrp Date: Mon, 11 May 2015 17:03:42 -0500 Subject: Moving pylmm out of the tree --- wqflask/base/data_set.py | 2 +- wqflask/utility/chunks.py | 96 ++++ wqflask/wqflask/heatmap/heatmap.py | 635 +++++++++++---------- .../wqflask/marker_regression/marker_regression.py | 4 +- 4 files changed, 417 insertions(+), 320 deletions(-) create mode 100644 wqflask/utility/chunks.py diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 489bd374..9f805fc3 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -42,7 +42,7 @@ from base import species from dbFunction import webqtlDatabaseFunction from utility import webqtlUtil from utility.benchmark import Bench -from wqflask.my_pylmm.pyLMM import chunks +from wqflask.utility import chunks from maintenance import get_group_samplelists diff --git a/wqflask/utility/chunks.py b/wqflask/utility/chunks.py new file mode 100644 index 00000000..9565fb96 --- /dev/null +++ b/wqflask/utility/chunks.py @@ -0,0 +1,96 @@ +from __future__ import absolute_import, print_function, division + +import math +import time + + +def divide_into_chunks(the_list, number_chunks): + """Divides a list into approximately number_chunks smaller lists + + >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 3) + [[1, 2, 7], [3, 22, 8], [5, 22, 333]] + >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 4) + [[1, 2, 7], [3, 22, 8], [5, 22, 333]] + >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 5) + [[1, 2], [7, 3], [22, 8], [5, 22], [333]] + >>> + + """ + length = len(the_list) + + if length == 0: + return [[]] + + if length <= number_chunks: + number_chunks = length + + chunksize = int(math.ceil(length / number_chunks)) + + chunks = [] + for counter in range(0, length, chunksize): + chunks.append(the_list[counter:counter+chunksize]) + + return chunks + +def _confirm_chunk(original, result): + all_chunked = [] + for chunk in result: + all_chunked.extend(chunk) + print("length of all chunked:", len(all_chunked)) + assert original == all_chunked, "You didn't chunk right" + + +def _chunk_test(divide_func): + import random + random.seed(7) + + number_exact = 0 + total_amount_off = 0 + + for test in range(1, 1001): + print("\n\ntest:", test) + number_chunks = random.randint(1, 20) + number_elements = random.randint(0, 100) + the_list = list(range(1, number_elements)) + result = divide_func(the_list, number_chunks) + + print("Dividing list of length {} into approximately {} chunks - got {} chunks".format( + len(the_list), number_chunks, len(result))) + print("result:", result) + + _confirm_chunk(the_list, result) + + amount_off = abs(number_chunks - len(result)) + if amount_off == 0: + number_exact += 1 + else: + total_amount_off += amount_off + + + print("\n{} exact out of {} [Total amount off: {}]".format(number_exact, + test, + total_amount_off)) + assert number_exact == 558 + assert total_amount_off == 1580 + return number_exact, total_amount_off + + +def _main(): + info = dict() + #funcs = (("sam", sam_divide_into_chunks), ("zach", zach_divide_into_chunks)) + funcs = (("only one", divide_into_chunks),) + for name, func in funcs: + start = time.time() + number_exact, total_amount_off = _chunk_test(func) + took = time.time() - start + info[name] = dict(number_exact=number_exact, + total_amount_off=total_amount_off, + took=took) + + print("info is:", info) + +if __name__ == '__main__': + _main() + print("\nConfirming doctests...") + import doctest + doctest.testmod() diff --git a/wqflask/wqflask/heatmap/heatmap.py b/wqflask/wqflask/heatmap/heatmap.py index 9b6b1b69..035736fd 100644 --- a/wqflask/wqflask/heatmap/heatmap.py +++ b/wqflask/wqflask/heatmap/heatmap.py @@ -1,317 +1,318 @@ -from __future__ import absolute_import, print_function, division - -import sys -sys.path.append(".") - -import gc -import string -import cPickle -import os -import datetime -import time -import pp -import math -import collections -import resource - -import scipy -import numpy as np -from scipy import linalg - -from pprint import pformat as pf - -from htmlgen import HTMLgen2 as HT -import reaper - -from base.trait import GeneralTrait -from base import data_set -from base import species -from base import webqtlConfig -from utility import webqtlUtil -from wqflask.my_pylmm.data import prep_data -from wqflask.my_pylmm.pyLMM import lmm -from wqflask.my_pylmm.pyLMM import input -from utility import helper_functions -from utility import Plot, Bunch -from utility import temp_data - -from MySQLdb import escape_string as escape - -import cPickle as pickle -import simplejson as json - -from pprint import pformat as pf - -from redis import Redis -Redis = Redis() - -from flask import Flask, g - -class Heatmap(object): - - def __init__(self, start_vars, temp_uuid): - - trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] - - helper_functions.get_trait_db_obs(self, trait_db_list) - - self.temp_uuid = temp_uuid - self.num_permutations = 5000 - self.dataset = self.trait_list[0][1] - - self.json_data = {} #The dictionary that will be used to create the json object that contains all the data needed to create the figure - - self.all_sample_list = [] - self.traits = [] - - chrnames = [] - self.species = species.TheSpecies(dataset=self.trait_list[0][1]) - for key in self.species.chromosomes.chromosomes.keys(): - chrnames.append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length]) - - for trait_db in self.trait_list: - - this_trait = trait_db[0] - self.traits.append(this_trait.name) - this_sample_data = this_trait.data - - for sample in this_sample_data: - if sample not in self.all_sample_list: - self.all_sample_list.append(sample) - - self.sample_data = [] - for trait_db in self.trait_list: - this_trait = trait_db[0] - this_sample_data = this_trait.data - - #self.sample_data[this_trait.name] = [] - this_trait_vals = [] - for sample in self.all_sample_list: - if sample in this_sample_data: - this_trait_vals.append(this_sample_data[sample].value) - #self.sample_data[this_trait.name].append(this_sample_data[sample].value) - else: - this_trait_vals.append('') - #self.sample_data[this_trait.name].append('') - self.sample_data.append(this_trait_vals) - - self.gen_reaper_results() - #self.gen_pylmm_results() - - #chrnames = [] - lodnames = [] - chr_pos = [] - pos = [] - markernames = [] - - for trait in self.trait_results.keys(): - lodnames.append(trait) - - for marker in self.dataset.group.markers.markers: - #if marker['chr'] not in chrnames: - # chr_ob = [marker['chr'], "filler"] - # chrnames.append(chr_ob) - chr_pos.append(marker['chr']) - pos.append(marker['Mb']) - markernames.append(marker['name']) - - self.json_data['chrnames'] = chrnames - self.json_data['lodnames'] = lodnames - self.json_data['chr'] = chr_pos - self.json_data['pos'] = pos - self.json_data['markernames'] = markernames - - for trait in self.trait_results: - self.json_data[trait] = self.trait_results[trait] - - self.js_data = dict( - json_data = self.json_data - ) - - print("self.js_data:", self.js_data) - - - def gen_reaper_results(self): - self.trait_results = {} - for trait_db in self.trait_list: - self.dataset.group.get_markers() - this_trait = trait_db[0] - #this_db = trait_db[1] - genotype = self.dataset.group.read_genotype_file() - samples, values, variances = this_trait.export_informative() - - trimmed_samples = [] - trimmed_values = [] - for i in range(0, len(samples)): - if samples[i] in self.dataset.group.samplelist: - trimmed_samples.append(samples[i]) - trimmed_values.append(values[i]) - - self.lrs_array = genotype.permutation(strains = trimmed_samples, - trait = trimmed_values, - nperm= self.num_permutations) - - #self.suggestive = self.lrs_array[int(self.num_permutations*0.37-1)] - #self.significant = self.lrs_array[int(self.num_permutations*0.95-1)] - - reaper_results = genotype.regression(strains = trimmed_samples, - trait = trimmed_values) - - - lrs_values = [float(qtl.lrs) for qtl in reaper_results] - print("lrs_values:", lrs_values) - #self.dataset.group.markers.add_pvalues(p_values) - - self.trait_results[this_trait.name] = [] - for qtl in reaper_results: - if qtl.additive > 0: - self.trait_results[this_trait.name].append(-float(qtl.lrs)) - else: - self.trait_results[this_trait.name].append(float(qtl.lrs)) - #for lrs in lrs_values: - # if - # self.trait_results[this_trait.name].append(lrs) - - - #this_db_samples = self.dataset.group.samplelist - #this_sample_data = this_trait.data - ##print("this_sample_data", this_sample_data) - #this_trait_vals = [] - #for index, sample in enumerate(this_db_samples): - # if sample in this_sample_data: - # sample_value = this_sample_data[sample].value - # this_trait_vals.append(sample_value) - # else: - # this_trait_vals.append("x") - - #pheno_vector = np.array([val == "x" and np.nan or float(val) for val in this_trait_vals]) - - #key = "pylmm:input:" + str(self.temp_uuid) - #print("key is:", pf(key)) - - #genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers] - - #no_val_samples = self.identify_empty_samples(this_trait_vals) - #trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) - - #genotype_matrix = np.array(trimmed_genotype_data).T - - #print("genotype_matrix:", str(genotype_matrix.tolist())) - #print("pheno_vector:", str(pheno_vector.tolist())) - - #params = dict(pheno_vector = pheno_vector.tolist(), - # genotype_matrix = genotype_matrix.tolist(), - # restricted_max_likelihood = True, - # refit = False, - # temp_uuid = str(self.temp_uuid), - # - # # meta data - # timestamp = datetime.datetime.now().isoformat(), - # ) - # - #json_params = json.dumps(params) - ##print("json_params:", json_params) - #Redis.set(key, json_params) - #Redis.expire(key, 60*60) - #print("before printing command") - # - #command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key, - # "other") - #print("command is:", command) - #print("after printing command") - # - #os.system(command) - # - #json_results = Redis.blpop("pylmm:results:" + str(self.temp_uuid), 45*60) - - def gen_pylmm_results(self): - self.trait_results = {} - for trait_db in self.trait_list: - this_trait = trait_db[0] - #this_db = trait_db[1] - self.dataset.group.get_markers() - - this_db_samples = self.dataset.group.samplelist - this_sample_data = this_trait.data - #print("this_sample_data", this_sample_data) - this_trait_vals = [] - for index, sample in enumerate(this_db_samples): - if sample in this_sample_data: - sample_value = this_sample_data[sample].value - this_trait_vals.append(sample_value) - else: - this_trait_vals.append("x") - - pheno_vector = np.array([val == "x" and np.nan or float(val) for val in this_trait_vals]) - - key = "pylmm:input:" + str(self.temp_uuid) - #print("key is:", pf(key)) - - genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers] - - no_val_samples = self.identify_empty_samples(this_trait_vals) - trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) - - genotype_matrix = np.array(trimmed_genotype_data).T - - #print("genotype_matrix:", str(genotype_matrix.tolist())) - #print("pheno_vector:", str(pheno_vector.tolist())) - - params = dict(pheno_vector = pheno_vector.tolist(), - genotype_matrix = genotype_matrix.tolist(), - restricted_max_likelihood = True, - refit = False, - temp_uuid = str(self.temp_uuid), - - # meta data - timestamp = datetime.datetime.now().isoformat(), - ) - - json_params = json.dumps(params) - #print("json_params:", json_params) - Redis.set(key, json_params) - Redis.expire(key, 60*60) - print("before printing command") - - command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key, - "other") - print("command is:", command) - print("after printing command") - - os.system(command) - - json_results = Redis.blpop("pylmm:results:" + str(self.temp_uuid), 45*60) - results = json.loads(json_results[1]) - p_values = [float(result) for result in results['p_values']] - #print("p_values:", p_values) - self.dataset.group.markers.add_pvalues(p_values) - - self.trait_results[this_trait.name] = [] - for marker in self.dataset.group.markers.markers: - self.trait_results[this_trait.name].append(marker['lod_score']) - - - def identify_empty_samples(self, values): - no_val_samples = [] - for sample_count, val in enumerate(values): - if val == "x": - no_val_samples.append(sample_count) - return no_val_samples - - def trim_genotypes(self, genotype_data, no_value_samples): - trimmed_genotype_data = [] - for marker in genotype_data: - new_genotypes = [] - for item_count, genotype in enumerate(marker): - if item_count in no_value_samples: - continue - try: - genotype = float(genotype) - except ValueError: - genotype = np.nan - pass - new_genotypes.append(genotype) - trimmed_genotype_data.append(new_genotypes) - return trimmed_genotype_data - - \ No newline at end of file +from __future__ import absolute_import, print_function, division + +import sys +sys.path.append(".") + +import gc +import string +import cPickle +import os +import datetime +import time +import pp +import math +import collections +import resource + +import scipy +import numpy as np +from scipy import linalg + +from pprint import pformat as pf + +from htmlgen import HTMLgen2 as HT +import reaper + +from base.trait import GeneralTrait +from base import data_set +from base import species +from base import webqtlConfig +from utility import webqtlUtil +from wqflask.my_pylmm.data import prep_data +# from wqflask.my_pylmm.pyLMM import lmm +# from wqflask.my_pylmm.pyLMM import input +from utility import helper_functions +from utility import Plot, Bunch +from utility import temp_data + +from MySQLdb import escape_string as escape + +import cPickle as pickle +import simplejson as json + +from pprint import pformat as pf + +from redis import Redis +Redis = Redis() + +from flask import Flask, g + +class Heatmap(object): + + def __init__(self, start_vars, temp_uuid): + + trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')] + + helper_functions.get_trait_db_obs(self, trait_db_list) + + self.temp_uuid = temp_uuid + self.num_permutations = 5000 + self.dataset = self.trait_list[0][1] + + self.json_data = {} #The dictionary that will be used to create the json object that contains all the data needed to create the figure + + self.all_sample_list = [] + self.traits = [] + + chrnames = [] + self.species = species.TheSpecies(dataset=self.trait_list[0][1]) + for key in self.species.chromosomes.chromosomes.keys(): + chrnames.append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length]) + + for trait_db in self.trait_list: + + this_trait = trait_db[0] + self.traits.append(this_trait.name) + this_sample_data = this_trait.data + + for sample in this_sample_data: + if sample not in self.all_sample_list: + self.all_sample_list.append(sample) + + self.sample_data = [] + for trait_db in self.trait_list: + this_trait = trait_db[0] + this_sample_data = this_trait.data + + #self.sample_data[this_trait.name] = [] + this_trait_vals = [] + for sample in self.all_sample_list: + if sample in this_sample_data: + this_trait_vals.append(this_sample_data[sample].value) + #self.sample_data[this_trait.name].append(this_sample_data[sample].value) + else: + this_trait_vals.append('') + #self.sample_data[this_trait.name].append('') + self.sample_data.append(this_trait_vals) + + self.gen_reaper_results() + #self.gen_pylmm_results() + + #chrnames = [] + lodnames = [] + chr_pos = [] + pos = [] + markernames = [] + + for trait in self.trait_results.keys(): + lodnames.append(trait) + + for marker in self.dataset.group.markers.markers: + #if marker['chr'] not in chrnames: + # chr_ob = [marker['chr'], "filler"] + # chrnames.append(chr_ob) + chr_pos.append(marker['chr']) + pos.append(marker['Mb']) + markernames.append(marker['name']) + + self.json_data['chrnames'] = chrnames + self.json_data['lodnames'] = lodnames + self.json_data['chr'] = chr_pos + self.json_data['pos'] = pos + self.json_data['markernames'] = markernames + + for trait in self.trait_results: + self.json_data[trait] = self.trait_results[trait] + + self.js_data = dict( + json_data = self.json_data + ) + + print("self.js_data:", self.js_data) + + + def gen_reaper_results(self): + self.trait_results = {} + for trait_db in self.trait_list: + self.dataset.group.get_markers() + this_trait = trait_db[0] + #this_db = trait_db[1] + genotype = self.dataset.group.read_genotype_file() + samples, values, variances = this_trait.export_informative() + + trimmed_samples = [] + trimmed_values = [] + for i in range(0, len(samples)): + if samples[i] in self.dataset.group.samplelist: + trimmed_samples.append(samples[i]) + trimmed_values.append(values[i]) + + self.lrs_array = genotype.permutation(strains = trimmed_samples, + trait = trimmed_values, + nperm= self.num_permutations) + + #self.suggestive = self.lrs_array[int(self.num_permutations*0.37-1)] + #self.significant = self.lrs_array[int(self.num_permutations*0.95-1)] + + reaper_results = genotype.regression(strains = trimmed_samples, + trait = trimmed_values) + + + lrs_values = [float(qtl.lrs) for qtl in reaper_results] + print("lrs_values:", lrs_values) + #self.dataset.group.markers.add_pvalues(p_values) + + self.trait_results[this_trait.name] = [] + for qtl in reaper_results: + if qtl.additive > 0: + self.trait_results[this_trait.name].append(-float(qtl.lrs)) + else: + self.trait_results[this_trait.name].append(float(qtl.lrs)) + #for lrs in lrs_values: + # if + # self.trait_results[this_trait.name].append(lrs) + + + #this_db_samples = self.dataset.group.samplelist + #this_sample_data = this_trait.data + ##print("this_sample_data", this_sample_data) + #this_trait_vals = [] + #for index, sample in enumerate(this_db_samples): + # if sample in this_sample_data: + # sample_value = this_sample_data[sample].value + # this_trait_vals.append(sample_value) + # else: + # this_trait_vals.append("x") + + #pheno_vector = np.array([val == "x" and np.nan or float(val) for val in this_trait_vals]) + + #key = "pylmm:input:" + str(self.temp_uuid) + #print("key is:", pf(key)) + + #genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers] + + #no_val_samples = self.identify_empty_samples(this_trait_vals) + #trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) + + #genotype_matrix = np.array(trimmed_genotype_data).T + + #print("genotype_matrix:", str(genotype_matrix.tolist())) + #print("pheno_vector:", str(pheno_vector.tolist())) + + #params = dict(pheno_vector = pheno_vector.tolist(), + # genotype_matrix = genotype_matrix.tolist(), + # restricted_max_likelihood = True, + # refit = False, + # temp_uuid = str(self.temp_uuid), + # + # # meta data + # timestamp = datetime.datetime.now().isoformat(), + # ) + # + #json_params = json.dumps(params) + ##print("json_params:", json_params) + #Redis.set(key, json_params) + #Redis.expire(key, 60*60) + #print("before printing command") + # + #command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key, + # "other") + #print("command is:", command) + #print("after printing command") + # + #os.system(command) + # + #json_results = Redis.blpop("pylmm:results:" + str(self.temp_uuid), 45*60) + + def gen_pylmm_results(self): + # This function is NOT used. If it is, we should use a shared function with marker_regression.py + self.trait_results = {} + for trait_db in self.trait_list: + this_trait = trait_db[0] + #this_db = trait_db[1] + self.dataset.group.get_markers() + + this_db_samples = self.dataset.group.samplelist + this_sample_data = this_trait.data + #print("this_sample_data", this_sample_data) + this_trait_vals = [] + for index, sample in enumerate(this_db_samples): + if sample in this_sample_data: + sample_value = this_sample_data[sample].value + this_trait_vals.append(sample_value) + else: + this_trait_vals.append("x") + + pheno_vector = np.array([val == "x" and np.nan or float(val) for val in this_trait_vals]) + + key = "pylmm:input:" + str(self.temp_uuid) + #print("key is:", pf(key)) + + genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers] + + no_val_samples = self.identify_empty_samples(this_trait_vals) + trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) + + genotype_matrix = np.array(trimmed_genotype_data).T + + #print("genotype_matrix:", str(genotype_matrix.tolist())) + #print("pheno_vector:", str(pheno_vector.tolist())) + + params = dict(pheno_vector = pheno_vector.tolist(), + genotype_matrix = genotype_matrix.tolist(), + restricted_max_likelihood = True, + refit = False, + temp_uuid = str(self.temp_uuid), + + # meta data + timestamp = datetime.datetime.now().isoformat(), + ) + + json_params = json.dumps(params) + #print("json_params:", json_params) + Redis.set(key, json_params) + Redis.expire(key, 60*60) + print("before printing command") + + command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key, + "other") + print("command is:", command) + print("after printing command") + + os.system(command) + + json_results = Redis.blpop("pylmm:results:" + str(self.temp_uuid), 45*60) + results = json.loads(json_results[1]) + p_values = [float(result) for result in results['p_values']] + #print("p_values:", p_values) + self.dataset.group.markers.add_pvalues(p_values) + + self.trait_results[this_trait.name] = [] + for marker in self.dataset.group.markers.markers: + self.trait_results[this_trait.name].append(marker['lod_score']) + + + def identify_empty_samples(self, values): + no_val_samples = [] + for sample_count, val in enumerate(values): + if val == "x": + no_val_samples.append(sample_count) + return no_val_samples + + def trim_genotypes(self, genotype_data, no_value_samples): + trimmed_genotype_data = [] + for marker in genotype_data: + new_genotypes = [] + for item_count, genotype in enumerate(marker): + if item_count in no_value_samples: + continue + try: + genotype = float(genotype) + except ValueError: + genotype = np.nan + pass + new_genotypes.append(genotype) + trimmed_genotype_data.append(new_genotypes) + return trimmed_genotype_data + + diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 49521bd6..c5fab4ee 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -37,8 +37,8 @@ from utility import webqtlUtil from wqflask.marker_regression import gemma_mapping #from wqflask.marker_regression import rqtl_mapping from wqflask.my_pylmm.data import prep_data -from wqflask.my_pylmm.pyLMM import lmm -from wqflask.my_pylmm.pyLMM import input +# from wqflask.my_pylmm.pyLMM import lmm +# from wqflask.my_pylmm.pyLMM import input from utility import helper_functions from utility import Plot, Bunch from utility import temp_data -- cgit v1.2.3 From db5072e285e2579a7195d8007587236d5ce9757d Mon Sep 17 00:00:00 2001 From: pjotrp Date: Mon, 11 May 2015 17:04:43 -0500 Subject: Remove outdated code --- .../marker_regression/marker_regression_old.py | 576 --------------------- 1 file changed, 576 deletions(-) delete mode 100644 wqflask/wqflask/marker_regression/marker_regression_old.py diff --git a/wqflask/wqflask/marker_regression/marker_regression_old.py b/wqflask/wqflask/marker_regression/marker_regression_old.py deleted file mode 100644 index 36331250..00000000 --- a/wqflask/wqflask/marker_regression/marker_regression_old.py +++ /dev/null @@ -1,576 +0,0 @@ -from __future__ import absolute_import, print_function, division - -from base.trait import GeneralTrait -from base import data_set #import create_dataset - -from pprint import pformat as pf - -import string -import sys -import datetime -import os -import collections -import uuid - -import numpy as np -from scipy import linalg - -import cPickle as pickle - -import simplejson as json - -from redis import Redis -Redis = Redis() - -from flask import Flask, g - -from base.trait import GeneralTrait -from base import data_set -from base import species -from base import webqtlConfig -from utility import webqtlUtil -from wqflask.my_pylmm.data import prep_data -from wqflask.my_pylmm.pyLMM import lmm -from wqflask.my_pylmm.pyLMM import input -from utility import helper_functions -from utility import Plot, Bunch -from utility import temp_data - -from utility.benchmark import Bench - - -class MarkerRegression(object): - - def __init__(self, start_vars, temp_uuid): - - helper_functions.get_species_dataset_trait(self, start_vars) - - #tempdata = temp_data.TempData(temp_uuid) - - self.samples = [] # Want only ones with values - self.vals = [] - - for sample in self.dataset.group.samplelist: - value = start_vars['value:' + sample] - self.samples.append(str(sample)) - self.vals.append(value) - - self.mapping_method = start_vars['method'] - self.maf = start_vars['maf'] # Minor allele frequency - print("self.maf:", self.maf) - - self.dataset.group.get_markers() - if self.mapping_method == "gemma": - qtl_results = self.run_gemma() - elif self.mapping_method == "plink": - qtl_results = self.run_plink() - #print("qtl_results:", pf(qtl_results)) - elif self.mapping_method == "pylmm": - print("RUNNING PYLMM") - #self.qtl_results = self.gen_data(tempdata) - qtl_results = self.gen_data(str(temp_uuid)) - else: - print("RUNNING NOTHING") - - self.lod_cutoff = 2 - self.filtered_markers = [] - for marker in qtl_results: - if marker['chr'] > 0: - self.filtered_markers.append(marker) - - #Get chromosome lengths for drawing the manhattan plot - chromosome_mb_lengths = {} - for key in self.species.chromosomes.chromosomes.keys(): - chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length - - self.js_data = dict( - this_trait = self.this_trait.name, - data_set = self.dataset.name, - maf = self.maf, - chromosomes = chromosome_mb_lengths, - qtl_results = self.filtered_markers, - ) - - def run_gemma(self): - """Generates p-values for each marker using GEMMA""" - - #filename = webqtlUtil.genRandStr("{}_{}_".format(self.dataset.group.name, self.this_trait.name)) - self.gen_pheno_txt_file() - - os.chdir("/home/zas1024/gene/web/gemma") - - gemma_command = './gemma -bfile %s -k output_%s.cXX.txt -lmm 1 -o %s_output' % ( - self.dataset.group.name, - self.dataset.group.name, - self.dataset.group.name) - print("gemma_command:" + gemma_command) - - os.system(gemma_command) - - included_markers, p_values = self.parse_gemma_output() - - self.dataset.group.get_specified_markers(markers = included_markers) - - #for marker in self.dataset.group.markers.markers: - # if marker['name'] not in included_markers: - # print("marker:", marker) - # self.dataset.group.markers.markers.remove(marker) - # #del self.dataset.group.markers.markers[marker] - - self.dataset.group.markers.add_pvalues(p_values) - - return self.dataset.group.markers.markers - - - def parse_gemma_output(self): - included_markers = [] - p_values = [] - with open("/home/zas1024/gene/web/gemma/output/{}_output.assoc.txt".format(self.dataset.group.name)) as output_file: - for line in output_file: - if line.startswith("chr"): - continue - else: - included_markers.append(line.split("\t")[1]) - p_values.append(float(line.split("\t")[10])) - #p_values[line.split("\t")[1]] = float(line.split("\t")[10]) - print("p_values: ", p_values) - return included_markers, p_values - - def gen_pheno_txt_file(self): - """Generates phenotype file for GEMMA""" - - #with open("/home/zas1024/gene/web/gemma/tmp_pheno/{}.txt".format(filename), "w") as outfile: - # for sample, i in enumerate(self.samples): - # print("sample:" + str(i)) - # print("self.vals[i]:" + str(self.vals[sample])) - # outfile.write(str(i) + "\t" + str(self.vals[sample]) + "\n") - - with open("/home/zas1024/gene/web/gemma/{}.fam".format(self.dataset.group.name), "w") as outfile: - for i, sample in enumerate(self.samples): - outfile.write(str(sample) + " " + str(sample) + " 0 0 0 " + str(self.vals[i]) + "\n") - - #def gen_plink_for_gemma(self, filename): - # - # make_bed = "/home/zas1024/plink/plink --file /home/zas1024/plink/%s --noweb --no-fid --no-parents --no-sex --no-pheno --pheno %s%s.txt --out %s%s --make-bed" % (webqtlConfig.HTMLPATH, - # webqtlConfig.HTMLPATH, - # self.dataset.group.name, - # webqtlConfig.TMPDIR, - # filename, - # webqtlConfig.TMPDIR, - # filename) - # - # - - def run_plink(self): - - os.chdir("/home/zas1024/plink") - - plink_output_filename = webqtlUtil.genRandStr("%s_%s_"%(self.dataset.group.name, self.this_trait.name)) - - self.gen_pheno_txt_file_plink(pheno_filename = plink_output_filename) - - plink_command = './plink --noweb --ped %s.ped --no-fid --no-parents --no-sex --no-pheno --map %s.map --pheno %s/%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (self.dataset.group.name, self.dataset.group.name, webqtlConfig.TMPDIR, plink_output_filename, self.this_trait.name, self.maf, webqtlConfig.TMPDIR, plink_output_filename) - - os.system(plink_command) - - count, p_values = self.parse_plink_output(plink_output_filename) - #gemma_command = './gemma -bfile %s -k output_%s.cXX.txt -lmm 1 -o %s_output' % ( - # self.dataset.group.name, - # self.dataset.group.name, - # self.dataset.group.name) - #print("gemma_command:" + gemma_command) - # - #os.system(gemma_command) - # - #included_markers, p_values = self.parse_gemma_output() - # - #self.dataset.group.get_specified_markers(markers = included_markers) - - #for marker in self.dataset.group.markers.markers: - # if marker['name'] not in included_markers: - # print("marker:", marker) - # self.dataset.group.markers.markers.remove(marker) - # #del self.dataset.group.markers.markers[marker] - - print("p_values:", pf(p_values)) - - self.dataset.group.markers.add_pvalues(p_values) - - return self.dataset.group.markers.markers - - - def gen_pheno_txt_file_plink(self, pheno_filename = ''): - ped_sample_list = self.get_samples_from_ped_file() - output_file = open("%s%s.txt" % (webqtlConfig.TMPDIR, pheno_filename), "wb") - header = 'FID\tIID\t%s\n' % self.this_trait.name - output_file.write(header) - - new_value_list = [] - - #if valueDict does not include some strain, value will be set to -9999 as missing value - for i, sample in enumerate(ped_sample_list): - try: - value = self.vals[i] - value = str(value).replace('value=','') - value = value.strip() - except: - value = -9999 - - new_value_list.append(value) - - - new_line = '' - for i, sample in enumerate(ped_sample_list): - j = i+1 - value = new_value_list[i] - new_line += '%s\t%s\t%s\n'%(sample, sample, value) - - if j%1000 == 0: - output_file.write(newLine) - new_line = '' - - if new_line: - output_file.write(new_line) - - output_file.close() - - # get strain name from ped file in order - def get_samples_from_ped_file(self): - - os.chdir("/home/zas1024/plink") - - ped_file= open("{}.ped".format(self.dataset.group.name),"r") - line = ped_file.readline() - sample_list=[] - - while line: - lineList = string.split(string.strip(line), '\t') - lineList = map(string.strip, lineList) - - sample_name = lineList[0] - sample_list.append(sample_name) - - line = ped_file.readline() - - return sample_list - - ################################################################ - # Generate Chr list, Chr OrderId and Retrieve Length Information - ################################################################ - #def getChrNameOrderIdLength(self,RISet=''): - # try: - # query = """ - # Select - # Chr_Length.Name,Chr_Length.OrderId,Length from Chr_Length, InbredSet - # where - # Chr_Length.SpeciesId = InbredSet.SpeciesId AND - # InbredSet.Name = '%s' - # Order by OrderId - # """ % (self.dataset.group.name) - # results =g.db.execute(query).fetchall() - # ChrList=[] - # ChrLengthMbList=[] - # ChrNameOrderIdDict={} - # ChrOrderIdNameDict={} - # - # for item in results: - # ChrList.append(item[0]) - # ChrNameOrderIdDict[item[0]]=item[1] # key is chr name, value is orderId - # ChrOrderIdNameDict[item[1]]=item[0] # key is orderId, value is chr name - # ChrLengthMbList.append(item[2]) - # - # except: - # ChrList=[] - # ChrNameOrderIdDict={} - # ChrLengthMbList=[] - # - # return ChrList,ChrNameOrderIdDict,ChrOrderIdNameDict,ChrLengthMbList - - - def parse_plink_output(self, output_filename): - plink_results={} - - threshold_p_value = 0.01 - - result_fp = open("%s%s.qassoc"% (webqtlConfig.TMPDIR, output_filename), "rb") - - header_line = result_fp.readline()# read header line - line = result_fp.readline() - - value_list = [] # initialize value list, this list will include snp, bp and pvalue info - p_value_dict = {} - count = 0 - - while line: - #convert line from str to list - line_list = self.build_line_list(line=line) - - # only keep the records whose chromosome name is in db - if self.species.chromosomes.chromosomes.has_key(int(line_list[0])) and line_list[-1] and line_list[-1].strip()!='NA': - - chr_name = self.species.chromosomes.chromosomes[int(line_list[0])] - snp = line_list[1] - BP = line_list[2] - p_value = float(line_list[-1]) - if threshold_p_value >= 0 and threshold_p_value <= 1: - if p_value < threshold_p_value: - p_value_dict[snp] = p_value - - if plink_results.has_key(chr_name): - value_list = plink_results[chr_name] - - # pvalue range is [0,1] - if threshold_p_value >=0 and threshold_p_value <= 1: - if p_value < threshold_p_value: - value_list.append((snp, BP, p_value)) - count += 1 - - plink_results[chr_name] = value_list - value_list = [] - else: - if threshold_p_value >= 0 and threshold_p_value <= 1: - if p_value < threshold_p_value: - value_list.append((snp, BP, p_value)) - count += 1 - - if value_list: - plink_results[chr_name] = value_list - - value_list=[] - - line = result_fp.readline() - else: - line = result_fp.readline() - - #if p_value_list: - # min_p_value = min(p_value_list) - #else: - # min_p_value = 0 - - return count, p_value_dict - - ###################################################### - # input: line: str,one line read from file - # function: convert line from str to list; - # output: lineList list - ####################################################### - def build_line_list(self, line=None): - - line_list = string.split(string.strip(line),' ')# irregular number of whitespaces between columns - line_list = [item for item in line_list if item <>''] - line_list = map(string.strip, line_list) - - return line_list - - #def gen_data(self, tempdata): - def gen_data(self, temp_uuid): - """Generates p-values for each marker""" - - pheno_vector = np.array([val == "x" and np.nan or float(val) for val in self.vals]) - - #lmm_uuid = str(uuid.uuid4()) - - key = "pylmm:input:" + temp_uuid - print("key is:", pf(key)) - #with Bench("Loading cache"): - # result = Redis.get(key) - - if self.dataset.group.species == "human": - p_values, t_stats = self.gen_human_results(pheno_vector, key, temp_uuid) - #p_values = self.trim_results(p_values) - - else: - print("NOW CWD IS:", os.getcwd()) - genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers] - - no_val_samples = self.identify_empty_samples() - trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) - - genotype_matrix = np.array(trimmed_genotype_data).T - - #print("pheno_vector: ", pf(pheno_vector)) - #print("genotype_matrix: ", pf(genotype_matrix)) - #print("genotype_matrix.shape: ", pf(genotype_matrix.shape)) - - #params = {"pheno_vector": pheno_vector, - # "genotype_matrix": genotype_matrix, - # "restricted_max_likelihood": True, - # "refit": False, - # "temp_data": tempdata} - - params = dict(pheno_vector = pheno_vector.tolist(), - genotype_matrix = genotype_matrix.tolist(), - restricted_max_likelihood = True, - refit = False, - temp_uuid = temp_uuid, - - # meta data - timestamp = datetime.datetime.now().isoformat(), - ) - - json_params = json.dumps(params) - #print("json_params:", json_params) - Redis.set(key, json_params) - Redis.expire(key, 60*60) - print("before printing command") - - command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key, - "other") - print("command is:", command) - print("after printing command") - - os.system(command) - - #t_stats, p_values = lmm.run(key) - #lmm.run(key) - - json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60) - results = json.loads(json_results[1]) - p_values = [float(result) for result in results['p_values']] - print("p_values:", p_values) - #p_values = self.trim_results(p_values) - t_stats = results['t_stats'] - - #t_stats, p_values = lmm.run( - # pheno_vector, - # genotype_matrix, - # restricted_max_likelihood=True, - # refit=False, - # temp_data=tempdata - #) - #print("p_values:", p_values) - - self.dataset.group.markers.add_pvalues(p_values) - - #self.get_lod_score_cutoff() - - return self.dataset.group.markers.markers - - def trim_results(self, p_values): - print("len_p_values:", len(p_values)) - if len(p_values) > 500: - p_values.sort(reverse=True) - trimmed_values = p_values[:500] - - return trimmed_values - - #def gen_human_results(self, pheno_vector, tempdata): - def gen_human_results(self, pheno_vector, key, temp_uuid): - file_base = os.path.join(webqtlConfig.PYLMM_PATH, self.dataset.group.name) - - plink_input = input.plink(file_base, type='b') - input_file_name = os.path.join(webqtlConfig.SNP_PATH, self.dataset.group.name + ".snps.gz") - - pheno_vector = pheno_vector.reshape((len(pheno_vector), 1)) - covariate_matrix = np.ones((pheno_vector.shape[0],1)) - kinship_matrix = np.fromfile(open(file_base + '.kin','r'),sep=" ") - kinship_matrix.resize((len(plink_input.indivs),len(plink_input.indivs))) - - print("Before creating params") - - params = dict(pheno_vector = pheno_vector.tolist(), - covariate_matrix = covariate_matrix.tolist(), - input_file_name = input_file_name, - kinship_matrix = kinship_matrix.tolist(), - refit = False, - temp_uuid = temp_uuid, - - # meta data - timestamp = datetime.datetime.now().isoformat(), - ) - - print("After creating params") - - json_params = json.dumps(params) - Redis.set(key, json_params) - Redis.expire(key, 60*60) - - print("Before creating the command") - - command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key, - "human") - - print("command is:", command) - - os.system(command) - - json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60) - results = json.loads(json_results[1]) - t_stats = results['t_stats'] - p_values = results['p_values'] - - - #p_values, t_stats = lmm.run_human(key) - - #p_values, t_stats = lmm.run_human( - # pheno_vector, - # covariate_matrix, - # input_file_name, - # kinship_matrix, - # loading_progress=tempdata - # ) - - return p_values, t_stats - - def get_lod_score_cutoff(self): - print("INSIDE GET LOD CUTOFF") - high_qtl_count = 0 - for marker in self.dataset.group.markers.markers: - if marker['lod_score'] > 1: - high_qtl_count += 1 - - if high_qtl_count > 1000: - return 1 - else: - return 0 - - def identify_empty_samples(self): - no_val_samples = [] - for sample_count, val in enumerate(self.vals): - if val == "x": - no_val_samples.append(sample_count) - return no_val_samples - - def trim_genotypes(self, genotype_data, no_value_samples): - trimmed_genotype_data = [] - for marker in genotype_data: - new_genotypes = [] - for item_count, genotype in enumerate(marker): - if item_count in no_value_samples: - continue - try: - genotype = float(genotype) - except ValueError: - genotype = np.nan - pass - new_genotypes.append(genotype) - trimmed_genotype_data.append(new_genotypes) - return trimmed_genotype_data - -def create_snp_iterator_file(group): - plink_file_base = os.path.join(webqtlConfig.PYLMM_PATH, group) - plink_input = input.plink(plink_file_base, type='b') - - data = dict(plink_input = list(plink_input), - numSNPs = plink_input.numSNPs) - - #input_dict = {} - # - #input_dict['plink_input'] = list(plink_input) - #input_dict['numSNPs'] = plink_input.numSNPs - # - - snp_file_base = os.path.join(webqtlConfig.SNP_PATH, group + ".snps.gz") - - with gzip.open(snp_file_base, "wb") as fh: - pickle.dump(data, fh, pickle.HIGHEST_PROTOCOL) - -#if __name__ == '__main__': -# import cPickle as pickle -# import gzip -# create_snp_iterator_file("HLC") - -if __name__ == '__main__': - import cPickle as pickle - import gzip - create_snp_iterator_file("HLC") -- cgit v1.2.3 From 246a9ef2d345d6704b97c96d047d06dca13eece0 Mon Sep 17 00:00:00 2001 From: pjotrp Date: Mon, 11 May 2015 17:08:00 -0500 Subject: More pylmm dependencies disabled --- wqflask/wqflask/interval_mapping/interval_mapping.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/wqflask/wqflask/interval_mapping/interval_mapping.py b/wqflask/wqflask/interval_mapping/interval_mapping.py index 5511826a..1cd3fc80 100755 --- a/wqflask/wqflask/interval_mapping/interval_mapping.py +++ b/wqflask/wqflask/interval_mapping/interval_mapping.py @@ -24,9 +24,9 @@ from base import data_set from base import species from base import webqtlConfig from utility import webqtlUtil -from wqflask.my_pylmm.data import prep_data -from wqflask.my_pylmm.pyLMM import lmm -from wqflask.my_pylmm.pyLMM import input +# from wqflask.my_pylmm.data import prep_data +# from wqflask.my_pylmm.pyLMM import lmm +# from wqflask.my_pylmm.pyLMM import input from utility import helper_functions from utility import Plot, Bunch from utility import temp_data -- cgit v1.2.3 From bc98e46fc910357ea3aeca5950e94e38d9584f9e Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 11 May 2015 22:44:18 +0000 Subject: Moved chunks --- wqflask/base/data_set.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 9f805fc3..1cd57b4b 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -42,7 +42,7 @@ from base import species from dbFunction import webqtlDatabaseFunction from utility import webqtlUtil from utility.benchmark import Bench -from wqflask.utility import chunks +from utility import chunks from maintenance import get_group_samplelists -- cgit v1.2.3 From 06f7ff5ee29c1899a04b7f538564c8a34f43280b Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Mon, 11 May 2015 22:55:41 +0000 Subject: Show PYLMM_PATH on error --- wqflask/wqflask/marker_regression/marker_regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index c5fab4ee..76d05bd8 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -51,7 +51,7 @@ if os.environ.get('PYLMM_PATH') is None: if PYLMM_PATH is None: PYLMM_PATH=os.environ['HOME']+'/gene/wqflask/wqflask/my_pylmm/pyLMM' if not os.path.isfile(PYLMM_PATH+'/lmm.py'): - raise 'PYLMM_PATH unknown or faulty' + raise Exception('PYLMM_PATH '+PYLMM_PATH+' unknown or faulty') PYLMM_COMMAND= 'python '+PYLMM_PATH+'/lmm.py' class MarkerRegression(object): -- cgit v1.2.3