aboutsummaryrefslogtreecommitdiff
path: root/wqflask
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask')
-rw-r--r--wqflask/wqflask/my_pylmm/README.md35
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/__init__.py2
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/gn2.py93
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/gwas.py70
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/kinship.py77
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/lmm.py63
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/lmm2.py12
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/phenotype.py2
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/runlmm.py9
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/standalone.py105
10 files changed, 326 insertions, 142 deletions
diff --git a/wqflask/wqflask/my_pylmm/README.md b/wqflask/wqflask/my_pylmm/README.md
index f6b0e72d..a84b5be2 100644
--- a/wqflask/wqflask/my_pylmm/README.md
+++ b/wqflask/wqflask/my_pylmm/README.md
@@ -1,21 +1,26 @@
-# RELEASE NOTES
+# Genenetwork2/pylmm RELEASE NOTES
-## 0.50-gn2-pre1 release
+## 0.50-gn2-pre2
-This is the first test release of multi-core pylmm into GN2. Both
-kinship calculation and GWAS have been made multi-threaded by
-introducing the Python multiprocessing module. Note that only
-run_other has been updated to use the new routines (so human is still
-handled the old way). I have taken care that we can still run both
-old-style and new-style LMM (through passing the 'new_code'
-boolean). This could be an option in the web server for users to
-select and test for any unexpected differences (of which there should
-be none, naturally ;).
+- Added abstractions for progress meter and info/debug statements;
+ Redis perc_complete is now updated through a lambda
-The current version can handle missing phenotypes, but as they are
-removed there is no way for GN2 to know what SNPs the P-values belong
-to. A future version will pass a SNP index to allow for missing
-phenotypes.
+## 0.50-gn2-pre1 (release)
+
+- This is the first test release of multi-core pylmm into GN2. Both
+ kinship calculation and GWAS have been made multi-threaded by
+ introducing the Python multiprocessing module. Note that only
+ run_other has been updated to use the new routines (so human is
+ still handled the old way). I have taken care that we can still run
+ both old-style and new-style LMM (through passing the 'new_code'
+ boolean). This could be an option in the web server for users to
+ select and test for any unexpected differences (of which there
+ should be none, naturally ;).
+
+- The current version can handle missing phenotypes, but as they are
+ removed there is no way for GN2 to know what SNPs the P-values
+ belong to. A future version will pass a SNP index to allow for
+ missing phenotypes.
\ No newline at end of file
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py
index c40c3221..6ab60d02 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py
@@ -1 +1 @@
-PYLMM_VERSION="0.50-gn2-pre1"
+PYLMM_VERSION="0.50-gn2-pre2"
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
new file mode 100644
index 00000000..f30cf1e6
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
@@ -0,0 +1,93 @@
+# Genenetwork2 specific methods and callback handler
+#
+# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl)
+#
+
+from __future__ import absolute_import, print_function, division
+
+import numpy as np
+import sys
+import logging
+
+# logging.basicConfig(level=logging.DEBUG)
+# np.set_printoptions()
+
+progress_location = None
+progress_current = None
+progress_prev_perc = None
+
+def progress_default_func(location,count,total):
+ global progress_current
+ value = round(count*100.0/total)
+ progress_current = value
+
+progress_func = progress_default_func
+
+def progress_set_func(func):
+ global progress_func
+ progress_func = func
+
+def progress(location, count, total):
+ global progress_location
+ global progress_prev_perc
+
+ perc = round(count*100.0/total)
+ if perc != progress_prev_perc and (location != progress_location or perc > 98 or perc > progress_prev_perc + 5):
+ progress_func(location, count, total)
+ logger.info("Progress: %s %d%%" % (location,perc))
+ progress_location = location
+ progress_prev_perc = perc
+
+def mprint(msg,data):
+ """
+ Array/matrix print function
+ """
+ m = np.array(data)
+ print(msg,m.shape,"=\n",m)
+
+def callbacks():
+ return dict(
+ write = sys.stdout.write,
+ writeln = print,
+ debug = logging.debug,
+ info = logging.info,
+ warning = logging.warning,
+ error = logging.error,
+ critical = logging.critical,
+ progress = progress,
+ mprint = mprint
+ )
+
+def uses(*funcs):
+ """
+ Some sugar
+ """
+ return [callbacks()[func] for func in funcs]
+
+# ----- Minor test cases:
+
+if __name__ == '__main__':
+ # logging.basicConfig(level=logging.DEBUG)
+ logging.debug("Test %i" % (1))
+ d = callbacks()['debug']
+ d("TEST")
+ wrln = callbacks()['writeln']
+ wrln("Hello %i" % 34)
+ progress = callbacks()['progress']
+ progress("I am half way",50,100)
+ list = [0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]
+ mprint("list",list)
+ matrix = [[1,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [2,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [3,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [4,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [5,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [6,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]]
+ mprint("matrix",matrix)
+ ix,dx = uses("info","debug")
+ ix("ix")
+ dx("dx")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
index b901c0e2..8b344a90 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
@@ -19,7 +19,6 @@
import pdb
import time
-import sys
# from utility import temp_data
import lmm2
@@ -36,12 +35,10 @@ def formatResult(id,beta,betaSD,ts,ps):
return "\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n"
def compute_snp(j,n,snp_ids,lmm2,REML,q = None):
- # print("COMPUTE SNP",j,snp_ids,"\n")
result = []
for snp_id in snp_ids:
snp,id = snp_id
x = snp.reshape((n,1)) # all the SNPs
- # print "X=",x
# if refit:
# L.fit(X=snp,REML=REML)
ts,ps,beta,betaVar = lmm2.association(x,REML=REML,returnBeta=True)
@@ -51,32 +48,28 @@ def compute_snp(j,n,snp_ids,lmm2,REML,q = None):
q = compute_snp.q
q.put([j,result])
return j
- # PS.append(ps)
- # TS.append(ts)
- # return len(result)
- # compute.q.put(result)
- # return None
def f_init(q):
compute_snp.q = q
-def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
+def gwas(Y,G,K,uses,restricted_max_likelihood=True,refit=False,verbose=True):
"""
- Execute a GWAS. The G matrix should be n inds (cols) x m snps (rows)
+ GWAS. The G matrix should be n inds (cols) x m snps (rows)
"""
+ progress,debug,info,mprint = uses('progress','debug','info','mprint')
+
matrix_initialize()
cpu_num = mp.cpu_count()
numThreads = None # for now use all available threads
kfile2 = False
reml = restricted_max_likelihood
- sys.stderr.write(str(G.shape)+"\n")
+ mprint("G",G)
n = G.shape[1] # inds
inds = n
m = G.shape[0] # snps
snps = m
- sys.stderr.write(str(m)+" SNPs\n")
- # print "***** GWAS: G",G.shape,G
+ info("%s SNPs",snps)
assert snps>inds, "snps should be larger than inds (snps=%d,inds=%d)" % (snps,inds)
# CREATE LMM object for association
@@ -85,19 +78,10 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
lmm2 = LMM2(Y,K) # ,Kva,Kve,X0,verbose=verbose)
if not refit:
- if verbose: sys.stderr.write("Computing fit for null model\n")
+ info("Computing fit for null model")
lmm2.fit() # follow GN model in run_other
- if verbose: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (lmm2.optH,lmm2.optSigma))
-
- # outFile = "test.out"
- # out = open(outFile,'w')
- out = sys.stderr
-
- def outputResult(id,beta,betaSD,ts,ps):
- out.write(formatResult(id,beta,betaSD,ts,ps))
- def printOutHead(): out.write("\t".join(["SNP_ID","BETA","BETA_SD","F_STAT","P_VALUE"]) + "\n")
-
- # printOutHead()
+ info("heritability=%0.3f, sigma=%0.3f" % (lmm2.optH,lmm2.optSigma))
+
res = []
# Set up the pool
@@ -106,26 +90,24 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
p = mp.Pool(numThreads, f_init, [q])
collect = []
- # Buffers for pvalues and t-stats
- # PS = []
- # TS = []
count = 0
job = 0
jobs_running = 0
+ jobs_completed = 0
for snp in G:
snp_id = (snp,'SNPID')
count += 1
if count % 1000 == 0:
job += 1
- if verbose:
- sys.stderr.write("Job %d At SNP %d\n" % (job,count))
+ debug("Job %d At SNP %d" % (job,count))
if numThreads == 1:
- print "Running on 1 THREAD"
+ debug("Running on 1 THREAD")
compute_snp(job,n,collect,lmm2,reml,q)
collect = []
j,lst = q.get()
- if verbose:
- sys.stderr.write("Job "+str(j)+" finished\n")
+ debug("Job "+str(j)+" finished")
+ jobs_completed += 1
+ progress("GWAS2",jobs_completed,snps/1000)
res.append((j,lst))
else:
p.apply_async(compute_snp,(job,n,collect,lmm2,reml))
@@ -134,8 +116,9 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
while jobs_running > cpu_num:
try:
j,lst = q.get_nowait()
- if verbose:
- sys.stderr.write("Job "+str(j)+" finished\n")
+ debug("Job "+str(j)+" finished")
+ jobs_completed += 1
+ progress("GWAS2",jobs_completed,snps/1000)
res.append((j,lst))
jobs_running -= 1
except Queue.Empty:
@@ -150,24 +133,23 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
if numThreads==1 or count<1000 or len(collect)>0:
job += 1
- print "Collect final batch size %i job %i @%i: " % (len(collect), job, count)
+ debug("Collect final batch size %i job %i @%i: " % (len(collect), job, count))
compute_snp(job,n,collect,lmm2,reml,q)
collect = []
j,lst = q.get()
res.append((j,lst))
- print "count=",count," running=",jobs_running," collect=",len(collect)
+ debug("count=%i running=%i collect=%i" % (count,jobs_running,len(collect)))
for job in range(jobs_running):
j,lst = q.get(True,15) # time out
- if verbose:
- sys.stderr.write("Job "+str(j)+" finished\n")
+ debug("Job "+str(j)+" finished")
+ jobs_completed += 1
+ progress("GWAS2",jobs_completed,snps/1000)
res.append((j,lst))
- print "Before sort",[res1[0] for res1 in res]
+ mprint("Before sort",[res1[0] for res1 in res])
res = sorted(res,key=lambda x: x[0])
- # if verbose:
- # print "res=",res[0][0:10]
- print "After sort",[res1[0] for res1 in res]
- print [len(res1[1]) for res1 in res]
+ mprint("After sort",[res1[0] for res1 in res])
+ info([len(res1[1]) for res1 in res])
ts = [item[0] for j,res1 in res for item in res1]
ps = [item[1] for j,res1 in res for item in res1]
return ts,ps
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
index 0c43587e..be12417e 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
@@ -28,17 +28,21 @@ import time
from optmatrix import matrix_initialize, matrixMultT
-def kinship_full(G):
+def kinship_full(G,uses):
"""
Calculate the Kinship matrix using a full dot multiplication
"""
- print G.shape
+ info,mprint = uses('info','mprint')
+
+ # mprint("kinship_full G",G)
m = G.shape[0] # snps
n = G.shape[1] # inds
- sys.stderr.write(str(m)+" SNPs\n")
- assert m>n, "n should be larger than m (snps>inds)"
- m = np.dot(G.T,G)
+ info("%d SNPs",m)
+ assert m>n, "n should be larger than m (%d snps > %d inds)" % (m,n)
+ # m = np.dot(G.T,G)
+ m = matrixMultT(G.T)
m = m/G.shape[0]
+ # mprint("kinship_full K",m)
return m
def compute_W(job,G,n,snps,compute_size):
@@ -74,46 +78,39 @@ def f_init(q):
# Calculate the kinship matrix from G (SNPs as rows!), returns K
#
-def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True):
- numThreads = None
- if numThreads:
- numThreads = int(numThreads)
+def kinship(G,uses,computeSize=1000,numThreads=None,useBLAS=False):
+ progress,debug,info,mprint = uses('progress','debug','info','mprint')
+
matrix_initialize(useBLAS)
-
- sys.stderr.write(str(G.shape)+"\n")
+
+ mprint("G",G)
n = G.shape[1] # inds
inds = n
m = G.shape[0] # snps
snps = m
- sys.stderr.write(str(m)+" SNPs\n")
+ info("%i SNPs" % (m))
assert snps>inds, "snps should be larger than inds (%i snps, %i inds)" % (snps,inds)
q = mp.Queue()
p = mp.Pool(numThreads, f_init, [q])
cpu_num = mp.cpu_count()
- print "CPU cores:",cpu_num
- print snps,computeSize
+ info("CPU cores: %i" % cpu_num)
iterations = snps/computeSize+1
- # if testing:
- # iterations = 8
- # jobs = range(0,8) # range(0,iterations)
results = []
-
K = np.zeros((n,n)) # The Kinship matrix has dimension individuals x individuals
completed = 0
for job in range(iterations):
- if verbose:
- sys.stderr.write("Processing job %d first %d SNPs\n" % (job, ((job+1)*computeSize)))
+ info("Processing job %d first %d SNPs" % (job, ((job+1)*computeSize)))
W = compute_W(job,G,n,snps,computeSize)
if numThreads == 1:
# Single-core
compute_matrixMult(job,W,q)
j,x = q.get()
- if verbose: sys.stderr.write("Job "+str(j)+" finished\n")
+ debug("Job "+str(j)+" finished")
+ progress("kinship",j,iterations)
K_j = x
- # print j,K_j[:,0]
K = K + K_j
else:
# Multi-core
@@ -123,52 +120,40 @@ def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True):
time.sleep(0.1)
try:
j,x = q.get_nowait()
- if verbose: sys.stderr.write("Job "+str(j)+" finished\n")
+ debug("Job "+str(j)+" finished")
K_j = x
- # print j,K_j[:,0]
K = K + K_j
completed += 1
+ progress("kinship",completed,iterations)
except Queue.Empty:
pass
if numThreads == None or numThreads > 1:
- # results contains the growing result set
for job in range(len(results)-completed):
j,x = q.get(True,15)
- if verbose: sys.stderr.write("Job "+str(j)+" finished\n")
+ debug("Job "+str(j)+" finished")
K_j = x
- # print j,K_j[:,0]
K = K + K_j
completed += 1
+ progress("kinship",completed,iterations)
K = K / float(snps)
-
- # outFile = 'runtest.kin'
- # if verbose: sys.stderr.write("Saving Kinship file to %s\n" % outFile)
- # np.savetxt(outFile,K)
-
- # if saveKvaKve:
- # if verbose: sys.stderr.write("Obtaining Eigendecomposition\n")
- # Kva,Kve = linalg.eigh(K)
- # if verbose: sys.stderr.write("Saving eigendecomposition to %s.[kva | kve]\n" % outFile)
- # np.savetxt(outFile+".kva",Kva)
- # np.savetxt(outFile+".kve",Kve)
return K
-def kvakve(K, verbose=True):
+def kvakve(K,uses):
"""
Obtain eigendecomposition for K and return Kva,Kve where Kva is cleaned
of small values < 1e-6 (notably smaller than zero)
"""
- if verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
-
+ info,mprint = uses('info','mprint')
+
+ info("Obtaining eigendecomposition for %dx%d matrix" % (K.shape[0],K.shape[1]) )
Kva,Kve = linalg.eigh(K)
- if verbose:
- print("Kva is: ", Kva.shape, Kva)
- print("Kve is: ", Kve.shape, Kve)
+ mprint("Kva",Kva)
+ mprint("Kve",Kve)
- if sum(Kva < 1e-6):
- if verbose: sys.stderr.write("Cleaning %d eigen values (Kva<0)\n" % (sum(Kva < 0)))
+ if sum(Kva < 0):
+ info("Cleaning %d eigen values (Kva<0)" % (sum(Kva < 0)))
Kva[Kva < 1e-6] = 1e-6
return Kva,Kve
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 58ff809b..8844118f 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -55,13 +55,19 @@ import genotype
import phenotype
import gwas
+# ---- A trick to decide on the environment:
try:
from wqflask.my_pylmm.pyLMM import chunks
+ from gn2 import uses, progress_set_func
except ImportError:
- print("WARNING: Standalone version missing the Genenetwork2 environment\n")
has_gn2=False
+ import standalone as handlers
+ from standalone import uses, progress_set_func
+ sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n")
pass
+progress,mprint,debug,info = uses('progress','mprint','debug','info')
+
#np.seterr('raise')
#def run_human(pheno_vector,
@@ -168,10 +174,7 @@ def run_human(pheno_vector,
#if count > 1000:
# break
count += 1
-
- percent_complete = (float(count) / total_snps) * 100
- #print("percent_complete: ", percent_complete)
- tempdata.store("percent_complete", percent_complete)
+ progress("human",count,total_snps)
#with Bench("actual association"):
ps, ts = human_association(snp,
@@ -275,7 +278,7 @@ def run_other_old(pheno_vector,
print("Running the original LMM engine in run_other (old)")
print("REML=",restricted_max_likelihood," REFIT=",refit)
with Bench("Calculate Kinship"):
- kinship_matrix,genotype_matrix = calculate_kinship(genotype_matrix, tempdata)
+ kinship_matrix,genotype_matrix = calculate_kinship_new(genotype_matrix, tempdata)
print("kinship_matrix: ", pf(kinship_matrix))
print("kinship_matrix.shape: ", pf(kinship_matrix.shape))
@@ -329,7 +332,7 @@ def run_other_new(pheno_vector,
# G = np.apply_along_axis( genotype.normalize, axis=1, arr=G)
with Bench("Calculate Kinship"):
- K,G = calculate_kinship(G, tempdata)
+ K,G = calculate_kinship_new(G, tempdata)
print("kinship_matrix: ", pf(K))
print("kinship_matrix.shape: ", pf(K.shape))
@@ -346,6 +349,7 @@ def run_other_new(pheno_vector,
t_stats, p_values = gwas.gwas(Y,
G.T,
K,
+ uses,
restricted_max_likelihood=True,
refit=False,verbose=True)
Bench().report()
@@ -390,10 +394,10 @@ def calculate_kinship_new(genotype_matrix, temp_data=None):
Call the new kinship calculation where genotype_matrix contains
inds (columns) by snps (rows).
"""
- print("call genotype.normalize")
+ info("call genotype.normalize")
G = np.apply_along_axis( genotype.normalize, axis=0, arr=genotype_matrix)
- print("call calculate_kinship_new")
- return kinship(G.T),G # G gets transposed, we'll turn this into an iterator (FIXME)
+ info("call calculate_kinship_new")
+ return kinship(G.T,uses),G # G gets transposed, we'll turn this into an iterator (FIXME)
def calculate_kinship_old(genotype_matrix, temp_data=None):
"""
@@ -403,11 +407,11 @@ def calculate_kinship_old(genotype_matrix, temp_data=None):
normalizes the resulting vectors and returns the RRM matrix.
"""
- print("call calculate_kinship_old")
+ info("call calculate_kinship_old")
n = genotype_matrix.shape[0]
m = genotype_matrix.shape[1]
- print("genotype 2D matrix n (inds) is:", n)
- print("genotype 2D matrix m (snps) is:", m)
+ info("genotype 2D matrix n (inds) is: %d" % (n))
+ info("genotype 2D matrix m (snps) is: %d" % (m))
assert m>n, "n should be larger than m (snps>inds)"
keep = []
for counter in range(m):
@@ -428,17 +432,13 @@ def calculate_kinship_old(genotype_matrix, temp_data=None):
continue
keep.append(counter)
genotype_matrix[:,counter] = (genotype_matrix[:,counter] - values_mean) / np.sqrt(vr)
-
- percent_complete = int(round((counter/m)*45))
- if temp_data != None:
- temp_data.store("percent_complete", percent_complete)
+ progress('kinship_old normalize genotype',counter,m)
genotype_matrix = genotype_matrix[:,keep]
- print("After kinship (old) genotype_matrix: ", pf(genotype_matrix))
- kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m)
- return kinship_matrix,genotype_matrix
-
-calculate_kinship = calculate_kinship_new # alias
+ mprint("After kinship (old) genotype_matrix: ", genotype_matrix)
+ # kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m)
+ # return kinship_matrix,genotype_matrix
+ return kinship_full(genotype_matrix.T,uses),genotype_matrix
def GWAS(pheno_vector,
genotype_matrix,
@@ -464,9 +464,9 @@ def GWAS(pheno_vector,
refit - refit the variance component for each SNP
"""
- if kinship_eigen_vals == None:
+ if kinship_eigen_vals is None:
kinship_eigen_vals = []
- if kinship_eigen_vectors == None:
+ if kinship_eigen_vectors is None:
kinship_eigen_vectors = []
n = genotype_matrix.shape[0]
@@ -536,9 +536,8 @@ def GWAS(pheno_vector,
lmm_ob.fit(X=x)
ts, ps, beta, betaVar = lmm_ob.association(x, REML=restricted_max_likelihood)
- percent_complete = 45 + int(round((counter/m)*55))
- temp_data.store("percent_complete", percent_complete)
-
+ progress("gwas_old",counter,m)
+
p_values.append(ps)
t_statistics.append(ts)
@@ -571,7 +570,7 @@ class LMM:
When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect.
"""
- if X0 == None: X0 = np.ones(len(Y)).reshape(len(Y),1)
+ if X0 is None: X0 = np.ones(len(Y)).reshape(len(Y),1)
self.verbose = verbose
#x = Y != -9
@@ -595,7 +594,7 @@ class LMM:
# if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
begin = time.time()
# Kva,Kve = linalg.eigh(K)
- Kva,Kve = kvakve(K)
+ Kva,Kve = kvakve(K,uses)
end = time.time()
if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin))
print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve))
@@ -664,7 +663,7 @@ class LMM:
REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True.
"""
- if X == None:
+ if X is None:
X = self.X0t
elif stack:
self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0]
@@ -814,6 +813,10 @@ def gn2_redis(key,species,new_code=True):
params = json.loads(json_params)
tempdata = temp_data.TempData(params['temp_uuid'])
+ def update_tempdata(loc,i,total):
+ tempdata.store("percent_complete",round(i*100.0/total))
+ debug("Updating REDIS percent_complete=%d" % (round(i*100.0/total)))
+ progress_set_func(update_tempdata)
print('pheno', np.array(params['pheno_vector']))
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
index d4b3ac82..aa6b473d 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
@@ -24,6 +24,16 @@ from scipy import optimize
from optmatrix import matrixMult
import kinship
+# ---- A trick to decide on the environment:
+try:
+ from wqflask.my_pylmm.pyLMM import chunks
+ from gn2 import uses
+except ImportError:
+ sys.stderr.write("WARNING: LMM2 standalone version missing the Genenetwork2 environment\n")
+ has_gn2=False
+ from standalone import uses
+ pass
+
def calculateKinship(W,center=False):
"""
W is an n x m matrix encoding SNP minor alleles.
@@ -184,7 +194,7 @@ class LMM2:
# if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
begin = time.time()
# Kva,Kve = linalg.eigh(K)
- Kva,Kve = kinship.kvakve(K)
+ Kva,Kve = kinship.kvakve(K,uses)
end = time.time()
if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin))
print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve))
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
index 682ba371..4c8175f7 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
@@ -24,7 +24,7 @@ def remove_missing(y,g,verbose=False):
Remove missing data from matrices, make sure the genotype data has
individuals as rows
"""
- assert(y!=None)
+ assert(y is not None)
assert(y.shape[0] == g.shape[0])
y1 = y
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 324c4f2c..88e2a033 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -25,6 +25,7 @@ from lmm import gn2_load_redis, calculate_kinship_old
from kinship import kinship, kinship_full
import genotype
import phenotype
+from standalone import uses
usage = """
python runlmm.py [options] command
@@ -133,7 +134,7 @@ elif cmd == 'redis':
# Emulating the redis setup of GN2
G = g
print "Original G",G.shape, "\n", G
- if y != None and options.remove_missing_phenotypes:
+ if y is not None and options.remove_missing_phenotypes:
gnt = np.array(g).T
Y,g,keep = phenotype.remove_missing(y,g.T,options.verbose)
G = g.T
@@ -164,7 +165,7 @@ elif cmd == 'redis':
assert p1==0.0897, "p1=%f" % p1
assert p2==0.0405, "p2=%f" % p2
if options.geno == 'data/test8000.geno':
- assert round(sum(ps)) == 4070
+ assert int(sum(ps)) == 4070
assert len(ps) == 8000
elif cmd == 'kinship':
G = g
@@ -183,7 +184,7 @@ elif cmd == 'kinship':
gnt = None
if options.test_kinship:
- K = kinship_full(np.copy(G))
+ K = kinship_full(np.copy(G),uses)
print "Genotype",G.shape, "\n", G
print "first Kinship method",K.shape,"\n",K
k1 = round(K[0][0],4)
@@ -193,7 +194,7 @@ elif cmd == 'kinship':
k2 = round(K2[0][0],4)
print "Genotype",G.shape, "\n", G
- K3 = kinship(G.T)
+ K3 = kinship(G.T,uses)
print "third Kinship method",K3.shape,"\n",K3
sys.stderr.write(options.geno+"\n")
k3 = round(K3[0][0],4)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
new file mode 100644
index 00000000..36bf8fd5
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
@@ -0,0 +1,105 @@
+# Standalone specific methods and callback handler
+#
+# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl)
+#
+# Set the log level with
+#
+# logging.basicConfig(level=logging.DEBUG)
+
+from __future__ import absolute_import, print_function, division
+
+import numpy as np
+import sys
+import logging
+
+# logger = logging.getLogger(__name__)
+logger = logging.getLogger('lmm2')
+logging.basicConfig(level=logging.DEBUG)
+np.set_printoptions(precision=3,suppress=True)
+
+progress_location = None
+progress_current = None
+progress_prev_perc = None
+
+def progress_default_func(location,count,total):
+ global progress_current
+ value = round(count*100.0/total)
+ progress_current = value
+
+progress_func = progress_default_func
+
+def progress_set_func(func):
+ global progress_func
+ progress_func = func
+
+def progress(location, count, total):
+ global progress_location
+ global progress_prev_perc
+
+ perc = round(count*100.0/total)
+ if perc != progress_prev_perc and (location != progress_location or perc > 98 or perc > progress_prev_perc + 5):
+ progress_func(location, count, total)
+ logger.info("Progress: %s %d%%" % (location,perc))
+ progress_location = location
+ progress_prev_perc = perc
+
+def mprint(msg,data):
+ """
+ Array/matrix print function
+ """
+ m = np.array(data)
+ if m.ndim == 1:
+ print(msg,m.shape,"=\n",m[0:3]," ... ",m[-3:])
+ if m.ndim == 2:
+ print(msg,m.shape,"=\n[",
+ m[0][0:3]," ... ",m[0][-3:],"\n ",
+ m[1][0:3]," ... ",m[1][-3:],"\n ...\n ",
+ m[-2][0:3]," ... ",m[-2][-3:],"\n ",
+ m[-1][0:3]," ... ",m[-1][-3:],"]")
+
+def callbacks():
+ return dict(
+ write = sys.stdout.write,
+ writeln = print,
+ debug = logger.debug,
+ info = logger.info,
+ warning = logger.warning,
+ error = logger.error,
+ critical = logger.critical,
+ progress = progress,
+ mprint = mprint
+ )
+
+def uses(*funcs):
+ """
+ Some sugar
+ """
+ return [callbacks()[func] for func in funcs]
+
+# ----- Minor test cases:
+
+if __name__ == '__main__':
+ # logging.basicConfig(level=logging.DEBUG)
+ logging.debug("Test %i" % (1))
+ d = callbacks()['debug']
+ d("TEST")
+ wrln = callbacks()['writeln']
+ wrln("Hello %i" % 34)
+ progress = callbacks()['progress']
+ progress("I am half way",50,100)
+ list = [0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]
+ mprint("list",list)
+ matrix = [[1,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [2,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [3,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [4,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [5,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [6,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]]
+ mprint("matrix",matrix)
+ ix,dx = uses("info","debug")
+ ix("ix")
+ dx("dx")