diff options
Diffstat (limited to 'wqflask')
-rw-r--r-- | wqflask/wqflask/my_pylmm/README.md | 35 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/__init__.py | 2 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 93 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/gwas.py | 70 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/kinship.py | 77 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 63 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/lmm2.py | 12 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/phenotype.py | 2 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 9 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 105 |
10 files changed, 326 insertions, 142 deletions
diff --git a/wqflask/wqflask/my_pylmm/README.md b/wqflask/wqflask/my_pylmm/README.md index f6b0e72d..a84b5be2 100644 --- a/wqflask/wqflask/my_pylmm/README.md +++ b/wqflask/wqflask/my_pylmm/README.md @@ -1,21 +1,26 @@ -# RELEASE NOTES +# Genenetwork2/pylmm RELEASE NOTES -## 0.50-gn2-pre1 release +## 0.50-gn2-pre2 -This is the first test release of multi-core pylmm into GN2. Both -kinship calculation and GWAS have been made multi-threaded by -introducing the Python multiprocessing module. Note that only -run_other has been updated to use the new routines (so human is still -handled the old way). I have taken care that we can still run both -old-style and new-style LMM (through passing the 'new_code' -boolean). This could be an option in the web server for users to -select and test for any unexpected differences (of which there should -be none, naturally ;). +- Added abstractions for progress meter and info/debug statements; + Redis perc_complete is now updated through a lambda -The current version can handle missing phenotypes, but as they are -removed there is no way for GN2 to know what SNPs the P-values belong -to. A future version will pass a SNP index to allow for missing -phenotypes. +## 0.50-gn2-pre1 (release) + +- This is the first test release of multi-core pylmm into GN2. Both + kinship calculation and GWAS have been made multi-threaded by + introducing the Python multiprocessing module. Note that only + run_other has been updated to use the new routines (so human is + still handled the old way). I have taken care that we can still run + both old-style and new-style LMM (through passing the 'new_code' + boolean). This could be an option in the web server for users to + select and test for any unexpected differences (of which there + should be none, naturally ;). + +- The current version can handle missing phenotypes, but as they are + removed there is no way for GN2 to know what SNPs the P-values + belong to. A future version will pass a SNP index to allow for + missing phenotypes.
\ No newline at end of file diff --git a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py index c40c3221..6ab60d02 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py @@ -1 +1 @@ -PYLMM_VERSION="0.50-gn2-pre1" +PYLMM_VERSION="0.50-gn2-pre2" diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py new file mode 100644 index 00000000..f30cf1e6 --- /dev/null +++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py @@ -0,0 +1,93 @@ +# Genenetwork2 specific methods and callback handler +# +# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl) +# + +from __future__ import absolute_import, print_function, division + +import numpy as np +import sys +import logging + +# logging.basicConfig(level=logging.DEBUG) +# np.set_printoptions() + +progress_location = None +progress_current = None +progress_prev_perc = None + +def progress_default_func(location,count,total): + global progress_current + value = round(count*100.0/total) + progress_current = value + +progress_func = progress_default_func + +def progress_set_func(func): + global progress_func + progress_func = func + +def progress(location, count, total): + global progress_location + global progress_prev_perc + + perc = round(count*100.0/total) + if perc != progress_prev_perc and (location != progress_location or perc > 98 or perc > progress_prev_perc + 5): + progress_func(location, count, total) + logger.info("Progress: %s %d%%" % (location,perc)) + progress_location = location + progress_prev_perc = perc + +def mprint(msg,data): + """ + Array/matrix print function + """ + m = np.array(data) + print(msg,m.shape,"=\n",m) + +def callbacks(): + return dict( + write = sys.stdout.write, + writeln = print, + debug = logging.debug, + info = logging.info, + warning = logging.warning, + error = logging.error, + critical = logging.critical, + progress = progress, + mprint = mprint + ) + +def uses(*funcs): + """ + Some sugar + """ + return [callbacks()[func] for func in funcs] + +# ----- Minor test cases: + +if __name__ == '__main__': + # logging.basicConfig(level=logging.DEBUG) + logging.debug("Test %i" % (1)) + d = callbacks()['debug'] + d("TEST") + wrln = callbacks()['writeln'] + wrln("Hello %i" % 34) + progress = callbacks()['progress'] + progress("I am half way",50,100) + list = [0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15] + mprint("list",list) + matrix = [[1,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [2,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [3,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [4,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [5,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [6,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]] + mprint("matrix",matrix) + ix,dx = uses("info","debug") + ix("ix") + dx("dx") diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py index b901c0e2..8b344a90 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py @@ -19,7 +19,6 @@ import pdb import time -import sys # from utility import temp_data import lmm2 @@ -36,12 +35,10 @@ def formatResult(id,beta,betaSD,ts,ps): return "\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n" def compute_snp(j,n,snp_ids,lmm2,REML,q = None): - # print("COMPUTE SNP",j,snp_ids,"\n") result = [] for snp_id in snp_ids: snp,id = snp_id x = snp.reshape((n,1)) # all the SNPs - # print "X=",x # if refit: # L.fit(X=snp,REML=REML) ts,ps,beta,betaVar = lmm2.association(x,REML=REML,returnBeta=True) @@ -51,32 +48,28 @@ def compute_snp(j,n,snp_ids,lmm2,REML,q = None): q = compute_snp.q q.put([j,result]) return j - # PS.append(ps) - # TS.append(ts) - # return len(result) - # compute.q.put(result) - # return None def f_init(q): compute_snp.q = q -def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): +def gwas(Y,G,K,uses,restricted_max_likelihood=True,refit=False,verbose=True): """ - Execute a GWAS. The G matrix should be n inds (cols) x m snps (rows) + GWAS. The G matrix should be n inds (cols) x m snps (rows) """ + progress,debug,info,mprint = uses('progress','debug','info','mprint') + matrix_initialize() cpu_num = mp.cpu_count() numThreads = None # for now use all available threads kfile2 = False reml = restricted_max_likelihood - sys.stderr.write(str(G.shape)+"\n") + mprint("G",G) n = G.shape[1] # inds inds = n m = G.shape[0] # snps snps = m - sys.stderr.write(str(m)+" SNPs\n") - # print "***** GWAS: G",G.shape,G + info("%s SNPs",snps) assert snps>inds, "snps should be larger than inds (snps=%d,inds=%d)" % (snps,inds) # CREATE LMM object for association @@ -85,19 +78,10 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): lmm2 = LMM2(Y,K) # ,Kva,Kve,X0,verbose=verbose) if not refit: - if verbose: sys.stderr.write("Computing fit for null model\n") + info("Computing fit for null model") lmm2.fit() # follow GN model in run_other - if verbose: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (lmm2.optH,lmm2.optSigma)) - - # outFile = "test.out" - # out = open(outFile,'w') - out = sys.stderr - - def outputResult(id,beta,betaSD,ts,ps): - out.write(formatResult(id,beta,betaSD,ts,ps)) - def printOutHead(): out.write("\t".join(["SNP_ID","BETA","BETA_SD","F_STAT","P_VALUE"]) + "\n") - - # printOutHead() + info("heritability=%0.3f, sigma=%0.3f" % (lmm2.optH,lmm2.optSigma)) + res = [] # Set up the pool @@ -106,26 +90,24 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): p = mp.Pool(numThreads, f_init, [q]) collect = [] - # Buffers for pvalues and t-stats - # PS = [] - # TS = [] count = 0 job = 0 jobs_running = 0 + jobs_completed = 0 for snp in G: snp_id = (snp,'SNPID') count += 1 if count % 1000 == 0: job += 1 - if verbose: - sys.stderr.write("Job %d At SNP %d\n" % (job,count)) + debug("Job %d At SNP %d" % (job,count)) if numThreads == 1: - print "Running on 1 THREAD" + debug("Running on 1 THREAD") compute_snp(job,n,collect,lmm2,reml,q) collect = [] j,lst = q.get() - if verbose: - sys.stderr.write("Job "+str(j)+" finished\n") + debug("Job "+str(j)+" finished") + jobs_completed += 1 + progress("GWAS2",jobs_completed,snps/1000) res.append((j,lst)) else: p.apply_async(compute_snp,(job,n,collect,lmm2,reml)) @@ -134,8 +116,9 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): while jobs_running > cpu_num: try: j,lst = q.get_nowait() - if verbose: - sys.stderr.write("Job "+str(j)+" finished\n") + debug("Job "+str(j)+" finished") + jobs_completed += 1 + progress("GWAS2",jobs_completed,snps/1000) res.append((j,lst)) jobs_running -= 1 except Queue.Empty: @@ -150,24 +133,23 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True): if numThreads==1 or count<1000 or len(collect)>0: job += 1 - print "Collect final batch size %i job %i @%i: " % (len(collect), job, count) + debug("Collect final batch size %i job %i @%i: " % (len(collect), job, count)) compute_snp(job,n,collect,lmm2,reml,q) collect = [] j,lst = q.get() res.append((j,lst)) - print "count=",count," running=",jobs_running," collect=",len(collect) + debug("count=%i running=%i collect=%i" % (count,jobs_running,len(collect))) for job in range(jobs_running): j,lst = q.get(True,15) # time out - if verbose: - sys.stderr.write("Job "+str(j)+" finished\n") + debug("Job "+str(j)+" finished") + jobs_completed += 1 + progress("GWAS2",jobs_completed,snps/1000) res.append((j,lst)) - print "Before sort",[res1[0] for res1 in res] + mprint("Before sort",[res1[0] for res1 in res]) res = sorted(res,key=lambda x: x[0]) - # if verbose: - # print "res=",res[0][0:10] - print "After sort",[res1[0] for res1 in res] - print [len(res1[1]) for res1 in res] + mprint("After sort",[res1[0] for res1 in res]) + info([len(res1[1]) for res1 in res]) ts = [item[0] for j,res1 in res for item in res1] ps = [item[1] for j,res1 in res for item in res1] return ts,ps diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py index 0c43587e..be12417e 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py @@ -28,17 +28,21 @@ import time from optmatrix import matrix_initialize, matrixMultT -def kinship_full(G): +def kinship_full(G,uses): """ Calculate the Kinship matrix using a full dot multiplication """ - print G.shape + info,mprint = uses('info','mprint') + + # mprint("kinship_full G",G) m = G.shape[0] # snps n = G.shape[1] # inds - sys.stderr.write(str(m)+" SNPs\n") - assert m>n, "n should be larger than m (snps>inds)" - m = np.dot(G.T,G) + info("%d SNPs",m) + assert m>n, "n should be larger than m (%d snps > %d inds)" % (m,n) + # m = np.dot(G.T,G) + m = matrixMultT(G.T) m = m/G.shape[0] + # mprint("kinship_full K",m) return m def compute_W(job,G,n,snps,compute_size): @@ -74,46 +78,39 @@ def f_init(q): # Calculate the kinship matrix from G (SNPs as rows!), returns K # -def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True): - numThreads = None - if numThreads: - numThreads = int(numThreads) +def kinship(G,uses,computeSize=1000,numThreads=None,useBLAS=False): + progress,debug,info,mprint = uses('progress','debug','info','mprint') + matrix_initialize(useBLAS) - - sys.stderr.write(str(G.shape)+"\n") + + mprint("G",G) n = G.shape[1] # inds inds = n m = G.shape[0] # snps snps = m - sys.stderr.write(str(m)+" SNPs\n") + info("%i SNPs" % (m)) assert snps>inds, "snps should be larger than inds (%i snps, %i inds)" % (snps,inds) q = mp.Queue() p = mp.Pool(numThreads, f_init, [q]) cpu_num = mp.cpu_count() - print "CPU cores:",cpu_num - print snps,computeSize + info("CPU cores: %i" % cpu_num) iterations = snps/computeSize+1 - # if testing: - # iterations = 8 - # jobs = range(0,8) # range(0,iterations) results = [] - K = np.zeros((n,n)) # The Kinship matrix has dimension individuals x individuals completed = 0 for job in range(iterations): - if verbose: - sys.stderr.write("Processing job %d first %d SNPs\n" % (job, ((job+1)*computeSize))) + info("Processing job %d first %d SNPs" % (job, ((job+1)*computeSize))) W = compute_W(job,G,n,snps,computeSize) if numThreads == 1: # Single-core compute_matrixMult(job,W,q) j,x = q.get() - if verbose: sys.stderr.write("Job "+str(j)+" finished\n") + debug("Job "+str(j)+" finished") + progress("kinship",j,iterations) K_j = x - # print j,K_j[:,0] K = K + K_j else: # Multi-core @@ -123,52 +120,40 @@ def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True): time.sleep(0.1) try: j,x = q.get_nowait() - if verbose: sys.stderr.write("Job "+str(j)+" finished\n") + debug("Job "+str(j)+" finished") K_j = x - # print j,K_j[:,0] K = K + K_j completed += 1 + progress("kinship",completed,iterations) except Queue.Empty: pass if numThreads == None or numThreads > 1: - # results contains the growing result set for job in range(len(results)-completed): j,x = q.get(True,15) - if verbose: sys.stderr.write("Job "+str(j)+" finished\n") + debug("Job "+str(j)+" finished") K_j = x - # print j,K_j[:,0] K = K + K_j completed += 1 + progress("kinship",completed,iterations) K = K / float(snps) - - # outFile = 'runtest.kin' - # if verbose: sys.stderr.write("Saving Kinship file to %s\n" % outFile) - # np.savetxt(outFile,K) - - # if saveKvaKve: - # if verbose: sys.stderr.write("Obtaining Eigendecomposition\n") - # Kva,Kve = linalg.eigh(K) - # if verbose: sys.stderr.write("Saving eigendecomposition to %s.[kva | kve]\n" % outFile) - # np.savetxt(outFile+".kva",Kva) - # np.savetxt(outFile+".kve",Kve) return K -def kvakve(K, verbose=True): +def kvakve(K,uses): """ Obtain eigendecomposition for K and return Kva,Kve where Kva is cleaned of small values < 1e-6 (notably smaller than zero) """ - if verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) - + info,mprint = uses('info','mprint') + + info("Obtaining eigendecomposition for %dx%d matrix" % (K.shape[0],K.shape[1]) ) Kva,Kve = linalg.eigh(K) - if verbose: - print("Kva is: ", Kva.shape, Kva) - print("Kve is: ", Kve.shape, Kve) + mprint("Kva",Kva) + mprint("Kve",Kve) - if sum(Kva < 1e-6): - if verbose: sys.stderr.write("Cleaning %d eigen values (Kva<0)\n" % (sum(Kva < 0))) + if sum(Kva < 0): + info("Cleaning %d eigen values (Kva<0)" % (sum(Kva < 0))) Kva[Kva < 1e-6] = 1e-6 return Kva,Kve diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 58ff809b..8844118f 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -55,13 +55,19 @@ import genotype import phenotype import gwas +# ---- A trick to decide on the environment: try: from wqflask.my_pylmm.pyLMM import chunks + from gn2 import uses, progress_set_func except ImportError: - print("WARNING: Standalone version missing the Genenetwork2 environment\n") has_gn2=False + import standalone as handlers + from standalone import uses, progress_set_func + sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n") pass +progress,mprint,debug,info = uses('progress','mprint','debug','info') + #np.seterr('raise') #def run_human(pheno_vector, @@ -168,10 +174,7 @@ def run_human(pheno_vector, #if count > 1000: # break count += 1 - - percent_complete = (float(count) / total_snps) * 100 - #print("percent_complete: ", percent_complete) - tempdata.store("percent_complete", percent_complete) + progress("human",count,total_snps) #with Bench("actual association"): ps, ts = human_association(snp, @@ -275,7 +278,7 @@ def run_other_old(pheno_vector, print("Running the original LMM engine in run_other (old)") print("REML=",restricted_max_likelihood," REFIT=",refit) with Bench("Calculate Kinship"): - kinship_matrix,genotype_matrix = calculate_kinship(genotype_matrix, tempdata) + kinship_matrix,genotype_matrix = calculate_kinship_new(genotype_matrix, tempdata) print("kinship_matrix: ", pf(kinship_matrix)) print("kinship_matrix.shape: ", pf(kinship_matrix.shape)) @@ -329,7 +332,7 @@ def run_other_new(pheno_vector, # G = np.apply_along_axis( genotype.normalize, axis=1, arr=G) with Bench("Calculate Kinship"): - K,G = calculate_kinship(G, tempdata) + K,G = calculate_kinship_new(G, tempdata) print("kinship_matrix: ", pf(K)) print("kinship_matrix.shape: ", pf(K.shape)) @@ -346,6 +349,7 @@ def run_other_new(pheno_vector, t_stats, p_values = gwas.gwas(Y, G.T, K, + uses, restricted_max_likelihood=True, refit=False,verbose=True) Bench().report() @@ -390,10 +394,10 @@ def calculate_kinship_new(genotype_matrix, temp_data=None): Call the new kinship calculation where genotype_matrix contains inds (columns) by snps (rows). """ - print("call genotype.normalize") + info("call genotype.normalize") G = np.apply_along_axis( genotype.normalize, axis=0, arr=genotype_matrix) - print("call calculate_kinship_new") - return kinship(G.T),G # G gets transposed, we'll turn this into an iterator (FIXME) + info("call calculate_kinship_new") + return kinship(G.T,uses),G # G gets transposed, we'll turn this into an iterator (FIXME) def calculate_kinship_old(genotype_matrix, temp_data=None): """ @@ -403,11 +407,11 @@ def calculate_kinship_old(genotype_matrix, temp_data=None): normalizes the resulting vectors and returns the RRM matrix. """ - print("call calculate_kinship_old") + info("call calculate_kinship_old") n = genotype_matrix.shape[0] m = genotype_matrix.shape[1] - print("genotype 2D matrix n (inds) is:", n) - print("genotype 2D matrix m (snps) is:", m) + info("genotype 2D matrix n (inds) is: %d" % (n)) + info("genotype 2D matrix m (snps) is: %d" % (m)) assert m>n, "n should be larger than m (snps>inds)" keep = [] for counter in range(m): @@ -428,17 +432,13 @@ def calculate_kinship_old(genotype_matrix, temp_data=None): continue keep.append(counter) genotype_matrix[:,counter] = (genotype_matrix[:,counter] - values_mean) / np.sqrt(vr) - - percent_complete = int(round((counter/m)*45)) - if temp_data != None: - temp_data.store("percent_complete", percent_complete) + progress('kinship_old normalize genotype',counter,m) genotype_matrix = genotype_matrix[:,keep] - print("After kinship (old) genotype_matrix: ", pf(genotype_matrix)) - kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m) - return kinship_matrix,genotype_matrix - -calculate_kinship = calculate_kinship_new # alias + mprint("After kinship (old) genotype_matrix: ", genotype_matrix) + # kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m) + # return kinship_matrix,genotype_matrix + return kinship_full(genotype_matrix.T,uses),genotype_matrix def GWAS(pheno_vector, genotype_matrix, @@ -464,9 +464,9 @@ def GWAS(pheno_vector, refit - refit the variance component for each SNP """ - if kinship_eigen_vals == None: + if kinship_eigen_vals is None: kinship_eigen_vals = [] - if kinship_eigen_vectors == None: + if kinship_eigen_vectors is None: kinship_eigen_vectors = [] n = genotype_matrix.shape[0] @@ -536,9 +536,8 @@ def GWAS(pheno_vector, lmm_ob.fit(X=x) ts, ps, beta, betaVar = lmm_ob.association(x, REML=restricted_max_likelihood) - percent_complete = 45 + int(round((counter/m)*55)) - temp_data.store("percent_complete", percent_complete) - + progress("gwas_old",counter,m) + p_values.append(ps) t_statistics.append(ts) @@ -571,7 +570,7 @@ class LMM: When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect. """ - if X0 == None: X0 = np.ones(len(Y)).reshape(len(Y),1) + if X0 is None: X0 = np.ones(len(Y)).reshape(len(Y),1) self.verbose = verbose #x = Y != -9 @@ -595,7 +594,7 @@ class LMM: # if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) begin = time.time() # Kva,Kve = linalg.eigh(K) - Kva,Kve = kvakve(K) + Kva,Kve = kvakve(K,uses) end = time.time() if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve)) @@ -664,7 +663,7 @@ class LMM: REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True. """ - if X == None: + if X is None: X = self.X0t elif stack: self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] @@ -814,6 +813,10 @@ def gn2_redis(key,species,new_code=True): params = json.loads(json_params) tempdata = temp_data.TempData(params['temp_uuid']) + def update_tempdata(loc,i,total): + tempdata.store("percent_complete",round(i*100.0/total)) + debug("Updating REDIS percent_complete=%d" % (round(i*100.0/total))) + progress_set_func(update_tempdata) print('pheno', np.array(params['pheno_vector'])) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py index d4b3ac82..aa6b473d 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py @@ -24,6 +24,16 @@ from scipy import optimize from optmatrix import matrixMult import kinship +# ---- A trick to decide on the environment: +try: + from wqflask.my_pylmm.pyLMM import chunks + from gn2 import uses +except ImportError: + sys.stderr.write("WARNING: LMM2 standalone version missing the Genenetwork2 environment\n") + has_gn2=False + from standalone import uses + pass + def calculateKinship(W,center=False): """ W is an n x m matrix encoding SNP minor alleles. @@ -184,7 +194,7 @@ class LMM2: # if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) ) begin = time.time() # Kva,Kve = linalg.eigh(K) - Kva,Kve = kinship.kvakve(K) + Kva,Kve = kinship.kvakve(K,uses) end = time.time() if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin)) print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve)) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py index 682ba371..4c8175f7 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py @@ -24,7 +24,7 @@ def remove_missing(y,g,verbose=False): Remove missing data from matrices, make sure the genotype data has individuals as rows """ - assert(y!=None) + assert(y is not None) assert(y.shape[0] == g.shape[0]) y1 = y diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 324c4f2c..88e2a033 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -25,6 +25,7 @@ from lmm import gn2_load_redis, calculate_kinship_old from kinship import kinship, kinship_full import genotype import phenotype +from standalone import uses usage = """ python runlmm.py [options] command @@ -133,7 +134,7 @@ elif cmd == 'redis': # Emulating the redis setup of GN2 G = g print "Original G",G.shape, "\n", G - if y != None and options.remove_missing_phenotypes: + if y is not None and options.remove_missing_phenotypes: gnt = np.array(g).T Y,g,keep = phenotype.remove_missing(y,g.T,options.verbose) G = g.T @@ -164,7 +165,7 @@ elif cmd == 'redis': assert p1==0.0897, "p1=%f" % p1 assert p2==0.0405, "p2=%f" % p2 if options.geno == 'data/test8000.geno': - assert round(sum(ps)) == 4070 + assert int(sum(ps)) == 4070 assert len(ps) == 8000 elif cmd == 'kinship': G = g @@ -183,7 +184,7 @@ elif cmd == 'kinship': gnt = None if options.test_kinship: - K = kinship_full(np.copy(G)) + K = kinship_full(np.copy(G),uses) print "Genotype",G.shape, "\n", G print "first Kinship method",K.shape,"\n",K k1 = round(K[0][0],4) @@ -193,7 +194,7 @@ elif cmd == 'kinship': k2 = round(K2[0][0],4) print "Genotype",G.shape, "\n", G - K3 = kinship(G.T) + K3 = kinship(G.T,uses) print "third Kinship method",K3.shape,"\n",K3 sys.stderr.write(options.geno+"\n") k3 = round(K3[0][0],4) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py new file mode 100644 index 00000000..36bf8fd5 --- /dev/null +++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py @@ -0,0 +1,105 @@ +# Standalone specific methods and callback handler +# +# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl) +# +# Set the log level with +# +# logging.basicConfig(level=logging.DEBUG) + +from __future__ import absolute_import, print_function, division + +import numpy as np +import sys +import logging + +# logger = logging.getLogger(__name__) +logger = logging.getLogger('lmm2') +logging.basicConfig(level=logging.DEBUG) +np.set_printoptions(precision=3,suppress=True) + +progress_location = None +progress_current = None +progress_prev_perc = None + +def progress_default_func(location,count,total): + global progress_current + value = round(count*100.0/total) + progress_current = value + +progress_func = progress_default_func + +def progress_set_func(func): + global progress_func + progress_func = func + +def progress(location, count, total): + global progress_location + global progress_prev_perc + + perc = round(count*100.0/total) + if perc != progress_prev_perc and (location != progress_location or perc > 98 or perc > progress_prev_perc + 5): + progress_func(location, count, total) + logger.info("Progress: %s %d%%" % (location,perc)) + progress_location = location + progress_prev_perc = perc + +def mprint(msg,data): + """ + Array/matrix print function + """ + m = np.array(data) + if m.ndim == 1: + print(msg,m.shape,"=\n",m[0:3]," ... ",m[-3:]) + if m.ndim == 2: + print(msg,m.shape,"=\n[", + m[0][0:3]," ... ",m[0][-3:],"\n ", + m[1][0:3]," ... ",m[1][-3:],"\n ...\n ", + m[-2][0:3]," ... ",m[-2][-3:],"\n ", + m[-1][0:3]," ... ",m[-1][-3:],"]") + +def callbacks(): + return dict( + write = sys.stdout.write, + writeln = print, + debug = logger.debug, + info = logger.info, + warning = logger.warning, + error = logger.error, + critical = logger.critical, + progress = progress, + mprint = mprint + ) + +def uses(*funcs): + """ + Some sugar + """ + return [callbacks()[func] for func in funcs] + +# ----- Minor test cases: + +if __name__ == '__main__': + # logging.basicConfig(level=logging.DEBUG) + logging.debug("Test %i" % (1)) + d = callbacks()['debug'] + d("TEST") + wrln = callbacks()['writeln'] + wrln("Hello %i" % 34) + progress = callbacks()['progress'] + progress("I am half way",50,100) + list = [0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15, + 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15] + mprint("list",list) + matrix = [[1,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [2,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [3,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [4,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [5,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15], + [6,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]] + mprint("matrix",matrix) + ix,dx = uses("info","debug") + ix("ix") + dx("dx") |