From e6e3b12eeb3fc57b9652468304c1fd14a0a816d0 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Wed, 18 Mar 2015 10:08:29 +0300
Subject: Add callback handlers
---
wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 38 ++++++++++++++++++++++++++
wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 41 ++++++++++++++++++++++++++++
2 files changed, 79 insertions(+)
create mode 100644 wqflask/wqflask/my_pylmm/pyLMM/gn2.py
create mode 100644 wqflask/wqflask/my_pylmm/pyLMM/standalone.py
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
new file mode 100644
index 00000000..e0c6c8a7
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
@@ -0,0 +1,38 @@
+# Genenetwork2 specific methods and callback handler
+#
+# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl)
+#
+
+from __future__ import absolute_import, print_function, division
+
+import sys
+import logging
+
+# logging.basicConfig(level=logging.DEBUG)
+
+def progress(location, count, total):
+ print("Progress: %s %i %i @%d%%" % (location,count,total,round(count*100.0/total)))
+
+def callbacks():
+ return dict(
+ write = sys.stdout.write,
+ writeln = print,
+ debug = logging.debug,
+ info = logging.info,
+ warning = logging.warning,
+ error = logging.error,
+ critical = logging.critical,
+ progress = progress
+ )
+
+# ----- Minor test cases:
+
+if __name__ == '__main__':
+ logging.basicConfig(level=logging.DEBUG)
+ logging.debug("Test %i" % (1))
+ d = callbacks()['debug']
+ d("TEST")
+ wrln = callbacks()['writeln']
+ wrln("Hello %i" % 34)
+ progress = callbacks()['progress']
+ progress("I am half way",50,100)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
new file mode 100644
index 00000000..a806729e
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
@@ -0,0 +1,41 @@
+# Standalone specific methods and callback handler
+#
+# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl)
+#
+# Set the log level with
+#
+# logging.basicConfig(level=logging.DEBUG)
+
+from __future__ import absolute_import, print_function, division
+
+import sys
+import logging
+
+logging.basicConfig(level=logging.DEBUG)
+
+def progress(location, count, total):
+ logging.info("Progress: %s %i %i @%d%%" % (location,count,total,round(count*100.0/total)))
+
+def callbacks():
+ return dict(
+ write = sys.stdout.write,
+ writeln = print,
+ debug = logging.debug,
+ info = logging.info,
+ warning = logging.warning,
+ error = logging.error,
+ critical = logging.critical,
+ progress = progress
+ )
+
+# ----- Minor test cases:
+
+if __name__ == '__main__':
+ # logging.basicConfig(level=logging.DEBUG)
+ logging.debug("Test %i" % (1))
+ d = callbacks()['debug']
+ d("TEST")
+ wrln = callbacks()['writeln']
+ wrln("Hello %i" % 34)
+ progress = callbacks()['progress']
+ progress("I am half way",50,100)
--
cgit v1.2.3
From 178cdbbd1a52cfcab975ab27b36e148009cc3577 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Wed, 18 Mar 2015 11:05:39 +0300
Subject: Introducing callbacks
---
wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 17 ++++++++++++--
wqflask/wqflask/my_pylmm/pyLMM/kinship.py | 17 +++++++-------
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 5 +++-
wqflask/wqflask/my_pylmm/pyLMM/lmm2.py | 12 +++++++++-
wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 34 ++++++++++++++++++++++++++--
5 files changed, 71 insertions(+), 14 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
index e0c6c8a7..4702c670 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
@@ -5,13 +5,25 @@
from __future__ import absolute_import, print_function, division
+import numpy as np
import sys
import logging
# logging.basicConfig(level=logging.DEBUG)
+# np.set_printoptions()
def progress(location, count, total):
- print("Progress: %s %i %i @%d%%" % (location,count,total,round(count*100.0/total)))
+ """
+ Progress update
+ """
+ logging.info("Progress: %s %d%%" % (location,round(count*100.0/total)))
+
+def mprint(msg,data):
+ """
+ Array/matrix print function
+ """
+ m = np.array(data)
+ print(msg,m.shape,"=\n",m)
def callbacks():
return dict(
@@ -22,7 +34,8 @@ def callbacks():
warning = logging.warning,
error = logging.error,
critical = logging.critical,
- progress = progress
+ progress = progress,
+ mprint = mprint
)
# ----- Minor test cases:
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
index 0c43587e..43e7fe36 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
@@ -155,20 +155,21 @@ def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True):
# np.savetxt(outFile+".kve",Kve)
return K
-def kvakve(K, verbose=True):
+def kvakve(K, callbacks):
"""
Obtain eigendecomposition for K and return Kva,Kve where Kva is cleaned
of small values < 1e-6 (notably smaller than zero)
"""
- if verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
-
+ info = callbacks()['info']
+ mprint = callbacks()['mprint']
+
+ info("Obtaining eigendecomposition for %dx%d matrix" % (K.shape[0],K.shape[1]) )
Kva,Kve = linalg.eigh(K)
- if verbose:
- print("Kva is: ", Kva.shape, Kva)
- print("Kve is: ", Kve.shape, Kve)
+ mprint("Kva",Kva)
+ mprint("Kve",Kve)
- if sum(Kva < 1e-6):
- if verbose: sys.stderr.write("Cleaning %d eigen values (Kva<0)\n" % (sum(Kva < 0)))
+ if sum(Kva < 0):
+ info("Cleaning %d eigen values (Kva<0)" % (sum(Kva < 0)))
Kva[Kva < 1e-6] = 1e-6
return Kva,Kve
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 8a24d98b..5ad644e2 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -54,11 +54,14 @@ import genotype
import phenotype
import gwas
+# ---- A trick to decide on the environment:
try:
from wqflask.my_pylmm.pyLMM import chunks
+ from gn2 import callbacks
except ImportError:
print("WARNING: Standalone version missing the Genenetwork2 environment\n")
has_gn2=False
+ from standalone import callbacks
pass
#np.seterr('raise')
@@ -594,7 +597,7 @@ class LMM:
# if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
begin = time.time()
# Kva,Kve = linalg.eigh(K)
- Kva,Kve = kvakve(K)
+ Kva,Kve = kvakve(K,callbacks)
end = time.time()
if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin))
print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve))
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
index d4b3ac82..6aefb9d3 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
@@ -24,6 +24,16 @@ from scipy import optimize
from optmatrix import matrixMult
import kinship
+# A trick to decide on the environment:
+try:
+ from wqflask.my_pylmm.pyLMM import chunks
+ from gn2 import callbacks
+except ImportError:
+ print("WARNING: Standalone version missing the Genenetwork2 environment\n")
+ has_gn2=False
+ from standalone import callbacks
+ pass
+
def calculateKinship(W,center=False):
"""
W is an n x m matrix encoding SNP minor alleles.
@@ -184,7 +194,7 @@ class LMM2:
# if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
begin = time.time()
# Kva,Kve = linalg.eigh(K)
- Kva,Kve = kinship.kvakve(K)
+ Kva,Kve = kinship.kvakve(K,callbacks)
end = time.time()
if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin))
print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve))
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
index a806729e..bbee3cd7 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
@@ -8,13 +8,29 @@
from __future__ import absolute_import, print_function, division
+import numpy as np
import sys
import logging
logging.basicConfig(level=logging.DEBUG)
+np.set_printoptions(precision=3,suppress=True)
def progress(location, count, total):
- logging.info("Progress: %s %i %i @%d%%" % (location,count,total,round(count*100.0/total)))
+ logging.info("Progress: %s %d%%" % (location,round(count*100.0/total)))
+
+def mprint(msg,data):
+ """
+ Array/matrix print function
+ """
+ m = np.array(data)
+ if m.ndim == 1:
+ print(msg,m.shape,"=\n",m[0:3]," ... ",m[-3:])
+ if m.ndim == 2:
+ print(msg,m.shape,"=\n[",
+ m[0][0:3]," ... ",m[0][-3:],"\n ",
+ m[1][0:3]," ... ",m[1][-3:],"\n ...\n ",
+ m[-2][0:3]," ... ",m[-2][-3:],"\n ",
+ m[-1][0:3]," ... ",m[-1][-3:],"]")
def callbacks():
return dict(
@@ -25,7 +41,8 @@ def callbacks():
warning = logging.warning,
error = logging.error,
critical = logging.critical,
- progress = progress
+ progress = progress,
+ mprint = mprint
)
# ----- Minor test cases:
@@ -39,3 +56,16 @@ if __name__ == '__main__':
wrln("Hello %i" % 34)
progress = callbacks()['progress']
progress("I am half way",50,100)
+ list = [0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]
+ mprint("list",list)
+ matrix = [[1,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [2,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [3,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [4,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [5,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [6,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]]
+ mprint("matrix",matrix)
--
cgit v1.2.3
From 876e80148984274dfd3b8281677c7541504feb59 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Wed, 18 Mar 2015 11:18:58 +0300
Subject: Added uses as syntax sugar for callbacks
---
wqflask/wqflask/my_pylmm/pyLMM/kinship.py | 5 ++---
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 6 +++---
wqflask/wqflask/my_pylmm/pyLMM/lmm2.py | 8 ++++----
wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 9 +++++++++
4 files changed, 18 insertions(+), 10 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
index 43e7fe36..d3792570 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
@@ -155,13 +155,12 @@ def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True):
# np.savetxt(outFile+".kve",Kve)
return K
-def kvakve(K, callbacks):
+def kvakve(K, uses):
"""
Obtain eigendecomposition for K and return Kva,Kve where Kva is cleaned
of small values < 1e-6 (notably smaller than zero)
"""
- info = callbacks()['info']
- mprint = callbacks()['mprint']
+ info,mprint = uses('info','mprint')
info("Obtaining eigendecomposition for %dx%d matrix" % (K.shape[0],K.shape[1]) )
Kva,Kve = linalg.eigh(K)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 5ad644e2..2076bc84 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -57,11 +57,11 @@ import gwas
# ---- A trick to decide on the environment:
try:
from wqflask.my_pylmm.pyLMM import chunks
- from gn2 import callbacks
+ from gn2 import uses
except ImportError:
print("WARNING: Standalone version missing the Genenetwork2 environment\n")
has_gn2=False
- from standalone import callbacks
+ from standalone import uses
pass
#np.seterr('raise')
@@ -597,7 +597,7 @@ class LMM:
# if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
begin = time.time()
# Kva,Kve = linalg.eigh(K)
- Kva,Kve = kvakve(K,callbacks)
+ Kva,Kve = kvakve(K,uses)
end = time.time()
if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin))
print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve))
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
index 6aefb9d3..5b93ae0d 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
@@ -24,14 +24,14 @@ from scipy import optimize
from optmatrix import matrixMult
import kinship
-# A trick to decide on the environment:
+# ---- A trick to decide on the environment:
try:
from wqflask.my_pylmm.pyLMM import chunks
- from gn2 import callbacks
+ from gn2 import uses
except ImportError:
print("WARNING: Standalone version missing the Genenetwork2 environment\n")
has_gn2=False
- from standalone import callbacks
+ from standalone import uses
pass
def calculateKinship(W,center=False):
@@ -194,7 +194,7 @@ class LMM2:
# if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
begin = time.time()
# Kva,Kve = linalg.eigh(K)
- Kva,Kve = kinship.kvakve(K,callbacks)
+ Kva,Kve = kinship.kvakve(K,uses)
end = time.time()
if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin))
print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve))
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
index bbee3cd7..705da21f 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
@@ -44,6 +44,12 @@ def callbacks():
progress = progress,
mprint = mprint
)
+
+def uses(*funcs):
+ """
+ Some sugar
+ """
+ return [callbacks()[func] for func in funcs]
# ----- Minor test cases:
@@ -69,3 +75,6 @@ if __name__ == '__main__':
[5,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
[6,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]]
mprint("matrix",matrix)
+ ix,dx = uses("info","debug")
+ ix("ix")
+ dx("dx")
--
cgit v1.2.3
From 7f937ef3265f007c25ec2c386bc399a708bcdd5e Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Wed, 18 Mar 2015 11:46:06 +0300
Subject: Introduce sugar for callbacks
---
wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 26 +++++++++++++--
wqflask/wqflask/my_pylmm/pyLMM/kinship.py | 49 +++++++++-------------------
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 2 +-
wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 3 +-
wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 14 ++++----
5 files changed, 50 insertions(+), 44 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
index 4702c670..c71b9f22 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
@@ -37,11 +37,17 @@ def callbacks():
progress = progress,
mprint = mprint
)
-
+
+def uses(*funcs):
+ """
+ Some sugar
+ """
+ return [callbacks()[func] for func in funcs]
+
# ----- Minor test cases:
if __name__ == '__main__':
- logging.basicConfig(level=logging.DEBUG)
+ # logging.basicConfig(level=logging.DEBUG)
logging.debug("Test %i" % (1))
d = callbacks()['debug']
d("TEST")
@@ -49,3 +55,19 @@ if __name__ == '__main__':
wrln("Hello %i" % 34)
progress = callbacks()['progress']
progress("I am half way",50,100)
+ list = [0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
+ 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]
+ mprint("list",list)
+ matrix = [[1,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [2,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [3,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [4,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [5,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
+ [6,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]]
+ mprint("matrix",matrix)
+ ix,dx = uses("info","debug")
+ ix("ix")
+ dx("dx")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
index d3792570..62f7be47 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
@@ -74,46 +74,39 @@ def f_init(q):
# Calculate the kinship matrix from G (SNPs as rows!), returns K
#
-def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True):
- numThreads = None
- if numThreads:
- numThreads = int(numThreads)
+def kinship(G,uses,computeSize=1000,numThreads=None,useBLAS=False):
+ progress,debug,info,mprint = uses('progress','debug','info','mprint')
+
matrix_initialize(useBLAS)
-
- sys.stderr.write(str(G.shape)+"\n")
+
+ mprint("G",G)
n = G.shape[1] # inds
inds = n
m = G.shape[0] # snps
snps = m
- sys.stderr.write(str(m)+" SNPs\n")
+ info("%i SNPs" % (m))
assert snps>inds, "snps should be larger than inds (%i snps, %i inds)" % (snps,inds)
q = mp.Queue()
p = mp.Pool(numThreads, f_init, [q])
cpu_num = mp.cpu_count()
- print "CPU cores:",cpu_num
- print snps,computeSize
+ info("CPU cores: %i" % cpu_num)
iterations = snps/computeSize+1
- # if testing:
- # iterations = 8
- # jobs = range(0,8) # range(0,iterations)
results = []
-
K = np.zeros((n,n)) # The Kinship matrix has dimension individuals x individuals
completed = 0
for job in range(iterations):
- if verbose:
- sys.stderr.write("Processing job %d first %d SNPs\n" % (job, ((job+1)*computeSize)))
+ info("Processing job %d first %d SNPs" % (job, ((job+1)*computeSize)))
W = compute_W(job,G,n,snps,computeSize)
if numThreads == 1:
# Single-core
compute_matrixMult(job,W,q)
j,x = q.get()
- if verbose: sys.stderr.write("Job "+str(j)+" finished\n")
+ debug("Job "+str(j)+" finished")
+ progress("kinship",j,iterations)
K_j = x
- # print j,K_j[:,0]
K = K + K_j
else:
# Multi-core
@@ -123,39 +116,27 @@ def kinship(G,computeSize=1000,numThreads=None,useBLAS=False,verbose=True):
time.sleep(0.1)
try:
j,x = q.get_nowait()
- if verbose: sys.stderr.write("Job "+str(j)+" finished\n")
+ debug("Job "+str(j)+" finished")
K_j = x
- # print j,K_j[:,0]
K = K + K_j
completed += 1
+ progress("kinship",completed,iterations)
except Queue.Empty:
pass
if numThreads == None or numThreads > 1:
- # results contains the growing result set
for job in range(len(results)-completed):
j,x = q.get(True,15)
- if verbose: sys.stderr.write("Job "+str(j)+" finished\n")
+ debug("Job "+str(j)+" finished")
K_j = x
- # print j,K_j[:,0]
K = K + K_j
completed += 1
+ progress("kinship",completed,iterations)
K = K / float(snps)
-
- # outFile = 'runtest.kin'
- # if verbose: sys.stderr.write("Saving Kinship file to %s\n" % outFile)
- # np.savetxt(outFile,K)
-
- # if saveKvaKve:
- # if verbose: sys.stderr.write("Obtaining Eigendecomposition\n")
- # Kva,Kve = linalg.eigh(K)
- # if verbose: sys.stderr.write("Saving eigendecomposition to %s.[kva | kve]\n" % outFile)
- # np.savetxt(outFile+".kva",Kva)
- # np.savetxt(outFile+".kve",Kve)
return K
-def kvakve(K, uses):
+def kvakve(K,uses):
"""
Obtain eigendecomposition for K and return Kva,Kve where Kva is cleaned
of small values < 1e-6 (notably smaller than zero)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 2076bc84..5182e73c 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -395,7 +395,7 @@ def calculate_kinship_new(genotype_matrix, temp_data=None):
print("call genotype.normalize")
G = np.apply_along_axis( genotype.normalize, axis=0, arr=genotype_matrix)
print("call calculate_kinship_new")
- return kinship(G.T),G # G gets transposed, we'll turn this into an iterator (FIXME)
+ return kinship(G.T,uses),G # G gets transposed, we'll turn this into an iterator (FIXME)
def calculate_kinship_old(genotype_matrix, temp_data=None):
"""
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 324c4f2c..e3e8659c 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -25,6 +25,7 @@ from lmm import gn2_load_redis, calculate_kinship_old
from kinship import kinship, kinship_full
import genotype
import phenotype
+from standalone import uses
usage = """
python runlmm.py [options] command
@@ -193,7 +194,7 @@ elif cmd == 'kinship':
k2 = round(K2[0][0],4)
print "Genotype",G.shape, "\n", G
- K3 = kinship(G.T)
+ K3 = kinship(G.T,uses)
print "third Kinship method",K3.shape,"\n",K3
sys.stderr.write(options.geno+"\n")
k3 = round(K3[0][0],4)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
index 705da21f..538007f1 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
@@ -12,11 +12,13 @@ import numpy as np
import sys
import logging
+# logger = logging.getLogger(__name__)
+logger = logging.getLogger('lmm2')
logging.basicConfig(level=logging.DEBUG)
np.set_printoptions(precision=3,suppress=True)
def progress(location, count, total):
- logging.info("Progress: %s %d%%" % (location,round(count*100.0/total)))
+ logger.info("Progress: %s %d%%" % (location,round(count*100.0/total)))
def mprint(msg,data):
"""
@@ -36,11 +38,11 @@ def callbacks():
return dict(
write = sys.stdout.write,
writeln = print,
- debug = logging.debug,
- info = logging.info,
- warning = logging.warning,
- error = logging.error,
- critical = logging.critical,
+ debug = logger.debug,
+ info = logger.info,
+ warning = logger.warning,
+ error = logger.error,
+ critical = logger.critical,
progress = progress,
mprint = mprint
)
--
cgit v1.2.3
From 204805157912aebb92967241850453f07729e2f6 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Wed, 18 Mar 2015 12:00:01 +0300
Subject: Warning to stderr
---
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 2 +-
wqflask/wqflask/my_pylmm/pyLMM/lmm2.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 5182e73c..66c952aa 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -59,9 +59,9 @@ try:
from wqflask.my_pylmm.pyLMM import chunks
from gn2 import uses
except ImportError:
- print("WARNING: Standalone version missing the Genenetwork2 environment\n")
has_gn2=False
from standalone import uses
+ sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n")
pass
#np.seterr('raise')
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
index 5b93ae0d..aa6b473d 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
@@ -29,7 +29,7 @@ try:
from wqflask.my_pylmm.pyLMM import chunks
from gn2 import uses
except ImportError:
- print("WARNING: Standalone version missing the Genenetwork2 environment\n")
+ sys.stderr.write("WARNING: LMM2 standalone version missing the Genenetwork2 environment\n")
has_gn2=False
from standalone import uses
pass
--
cgit v1.2.3
From f1056b9f4128fb91fbaf738914395697aa485b2e Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Wed, 18 Mar 2015 12:09:21 +0300
Subject: Warning to stderr
---
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 6 ++++--
wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 5 +++++
2 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 66c952aa..95272818 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -57,10 +57,11 @@ import gwas
# ---- A trick to decide on the environment:
try:
from wqflask.my_pylmm.pyLMM import chunks
- from gn2 import uses
+ from gn2 import uses, set_progress_storage
except ImportError:
has_gn2=False
- from standalone import uses
+ import standalone as handlers
+ from standalone import uses, set_progress_storage
sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n")
pass
@@ -816,6 +817,7 @@ def gn2_redis(key,species,new_code=True):
params = json.loads(json_params)
tempdata = temp_data.TempData(params['temp_uuid'])
+ set_progress_storage(tempdata)
print('kinship', np.array(params['kinship_matrix']))
print('pheno', np.array(params['pheno_vector']))
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
index 538007f1..e20d4092 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
@@ -17,7 +17,12 @@ logger = logging.getLogger('lmm2')
logging.basicConfig(level=logging.DEBUG)
np.set_printoptions(precision=3,suppress=True)
+def set_progress_storage(location):
+ global storage
+ storage = location
+
def progress(location, count, total):
+ storage['percentage'] = round(count*100.0)/total)
logger.info("Progress: %s %d%%" % (location,round(count*100.0/total)))
def mprint(msg,data):
--
cgit v1.2.3
From 6b8321d77e915dc5aec0c272c1cb84c2af3e6191 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Wed, 18 Mar 2015 12:17:59 +0300
Subject: Replace progress meter
---
wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 7 ++++++-
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 17 ++++++-----------
wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 2 +-
3 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
index c71b9f22..f8033ac5 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
@@ -12,11 +12,16 @@ import logging
# logging.basicConfig(level=logging.DEBUG)
# np.set_printoptions()
+def set_progress_storage(location):
+ global storage
+ storage = location
+
def progress(location, count, total):
"""
Progress update
"""
- logging.info("Progress: %s %d%%" % (location,round(count*100.0/total)))
+ storage.store("percent_complete",round(count*100.0)/total)
+ logger.info("Progress: %s %d%%" % (location,round(count*100.0/total)))
def mprint(msg,data):
"""
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 95272818..eab7d91d 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -65,6 +65,8 @@ except ImportError:
sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n")
pass
+progress,info = uses('progress','info')
+
#np.seterr('raise')
#def run_human(pheno_vector,
@@ -171,10 +173,7 @@ def run_human(pheno_vector,
#if count > 1000:
# break
count += 1
-
- percent_complete = (float(count) / total_snps) * 100
- #print("percent_complete: ", percent_complete)
- tempdata.store("percent_complete", percent_complete)
+ progress("human",count,total_snps)
#with Bench("actual association"):
ps, ts = human_association(snp,
@@ -431,10 +430,7 @@ def calculate_kinship_old(genotype_matrix, temp_data=None):
continue
keep.append(counter)
genotype_matrix[:,counter] = (genotype_matrix[:,counter] - values_mean) / np.sqrt(vr)
-
- percent_complete = int(round((counter/m)*45))
- if temp_data != None:
- temp_data.store("percent_complete", percent_complete)
+ progress('kinship_old',counter,m)
genotype_matrix = genotype_matrix[:,keep]
print("After kinship (old) genotype_matrix: ", pf(genotype_matrix))
@@ -539,9 +535,8 @@ def GWAS(pheno_vector,
lmm_ob.fit(X=x)
ts, ps, beta, betaVar = lmm_ob.association(x, REML=restricted_max_likelihood)
- percent_complete = 45 + int(round((counter/m)*55))
- temp_data.store("percent_complete", percent_complete)
-
+ progress("gwas_old",counter,m)
+
p_values.append(ps)
t_statistics.append(ts)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
index e20d4092..b3d480c3 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
@@ -22,7 +22,7 @@ def set_progress_storage(location):
storage = location
def progress(location, count, total):
- storage['percentage'] = round(count*100.0)/total)
+ storage.store("percent_complete",round(count*100.0)/total)
logger.info("Progress: %s %d%%" % (location,round(count*100.0/total)))
def mprint(msg,data):
--
cgit v1.2.3
From de84be30502af4be014fa4c0a2e7b54e51cff6f6 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Wed, 18 Mar 2015 12:36:03 +0300
Subject: Progress handler
---
wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 19 ++++++++++++++-----
wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 15 +++++++++++++--
2 files changed, 27 insertions(+), 7 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
index f8033ac5..b487ea25 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
@@ -12,17 +12,26 @@ import logging
# logging.basicConfig(level=logging.DEBUG)
# np.set_printoptions()
+last_location = None
+last_progress = 0
+
def set_progress_storage(location):
global storage
storage = location
def progress(location, count, total):
- """
- Progress update
- """
- storage.store("percent_complete",round(count*100.0)/total)
- logger.info("Progress: %s %d%%" % (location,round(count*100.0/total)))
+ global last_location
+ global last_progress
+
+ perc = round(count*100.0/total)
+ # print(last_progress,";",perc)
+ if perc != last_progress and (location != last_location or perc > 98 or perc > last_progress + 5):
+ storage.store("percent_complete",perc)
+ logger.info("Progress: %s %d%%" % (location,perc))
+ last_location = location
+ last_progress = perc
+
def mprint(msg,data):
"""
Array/matrix print function
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
index b3d480c3..7cc3e871 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
@@ -17,13 +17,24 @@ logger = logging.getLogger('lmm2')
logging.basicConfig(level=logging.DEBUG)
np.set_printoptions(precision=3,suppress=True)
+last_location = None
+last_progress = 0
+
def set_progress_storage(location):
global storage
storage = location
def progress(location, count, total):
- storage.store("percent_complete",round(count*100.0)/total)
- logger.info("Progress: %s %d%%" % (location,round(count*100.0/total)))
+ global last_location
+ global last_progress
+
+ perc = round(count*100.0/total)
+ # print(last_progress,";",perc)
+ if perc != last_progress and (location != last_location or perc > 98 or perc > last_progress + 5):
+ storage.store("percent_complete",perc)
+ logger.info("Progress: %s %d%%" % (location,perc))
+ last_location = location
+ last_progress = perc
def mprint(msg,data):
"""
--
cgit v1.2.3
From f0653da318cac9736777495e40de6853227904ec Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Wed, 18 Mar 2015 13:21:12 +0300
Subject: Cleaned up gwas.py to use uses and moved Redis call back into lmm.py
---
wqflask/wqflask/my_pylmm/pyLMM/gwas.py | 70 +++++++++++-----------------
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 10 ++--
wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 31 +++++++-----
3 files changed, 52 insertions(+), 59 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
index b901c0e2..8b344a90 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
@@ -19,7 +19,6 @@
import pdb
import time
-import sys
# from utility import temp_data
import lmm2
@@ -36,12 +35,10 @@ def formatResult(id,beta,betaSD,ts,ps):
return "\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n"
def compute_snp(j,n,snp_ids,lmm2,REML,q = None):
- # print("COMPUTE SNP",j,snp_ids,"\n")
result = []
for snp_id in snp_ids:
snp,id = snp_id
x = snp.reshape((n,1)) # all the SNPs
- # print "X=",x
# if refit:
# L.fit(X=snp,REML=REML)
ts,ps,beta,betaVar = lmm2.association(x,REML=REML,returnBeta=True)
@@ -51,32 +48,28 @@ def compute_snp(j,n,snp_ids,lmm2,REML,q = None):
q = compute_snp.q
q.put([j,result])
return j
- # PS.append(ps)
- # TS.append(ts)
- # return len(result)
- # compute.q.put(result)
- # return None
def f_init(q):
compute_snp.q = q
-def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
+def gwas(Y,G,K,uses,restricted_max_likelihood=True,refit=False,verbose=True):
"""
- Execute a GWAS. The G matrix should be n inds (cols) x m snps (rows)
+ GWAS. The G matrix should be n inds (cols) x m snps (rows)
"""
+ progress,debug,info,mprint = uses('progress','debug','info','mprint')
+
matrix_initialize()
cpu_num = mp.cpu_count()
numThreads = None # for now use all available threads
kfile2 = False
reml = restricted_max_likelihood
- sys.stderr.write(str(G.shape)+"\n")
+ mprint("G",G)
n = G.shape[1] # inds
inds = n
m = G.shape[0] # snps
snps = m
- sys.stderr.write(str(m)+" SNPs\n")
- # print "***** GWAS: G",G.shape,G
+ info("%s SNPs",snps)
assert snps>inds, "snps should be larger than inds (snps=%d,inds=%d)" % (snps,inds)
# CREATE LMM object for association
@@ -85,19 +78,10 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
lmm2 = LMM2(Y,K) # ,Kva,Kve,X0,verbose=verbose)
if not refit:
- if verbose: sys.stderr.write("Computing fit for null model\n")
+ info("Computing fit for null model")
lmm2.fit() # follow GN model in run_other
- if verbose: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (lmm2.optH,lmm2.optSigma))
-
- # outFile = "test.out"
- # out = open(outFile,'w')
- out = sys.stderr
-
- def outputResult(id,beta,betaSD,ts,ps):
- out.write(formatResult(id,beta,betaSD,ts,ps))
- def printOutHead(): out.write("\t".join(["SNP_ID","BETA","BETA_SD","F_STAT","P_VALUE"]) + "\n")
-
- # printOutHead()
+ info("heritability=%0.3f, sigma=%0.3f" % (lmm2.optH,lmm2.optSigma))
+
res = []
# Set up the pool
@@ -106,26 +90,24 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
p = mp.Pool(numThreads, f_init, [q])
collect = []
- # Buffers for pvalues and t-stats
- # PS = []
- # TS = []
count = 0
job = 0
jobs_running = 0
+ jobs_completed = 0
for snp in G:
snp_id = (snp,'SNPID')
count += 1
if count % 1000 == 0:
job += 1
- if verbose:
- sys.stderr.write("Job %d At SNP %d\n" % (job,count))
+ debug("Job %d At SNP %d" % (job,count))
if numThreads == 1:
- print "Running on 1 THREAD"
+ debug("Running on 1 THREAD")
compute_snp(job,n,collect,lmm2,reml,q)
collect = []
j,lst = q.get()
- if verbose:
- sys.stderr.write("Job "+str(j)+" finished\n")
+ debug("Job "+str(j)+" finished")
+ jobs_completed += 1
+ progress("GWAS2",jobs_completed,snps/1000)
res.append((j,lst))
else:
p.apply_async(compute_snp,(job,n,collect,lmm2,reml))
@@ -134,8 +116,9 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
while jobs_running > cpu_num:
try:
j,lst = q.get_nowait()
- if verbose:
- sys.stderr.write("Job "+str(j)+" finished\n")
+ debug("Job "+str(j)+" finished")
+ jobs_completed += 1
+ progress("GWAS2",jobs_completed,snps/1000)
res.append((j,lst))
jobs_running -= 1
except Queue.Empty:
@@ -150,24 +133,23 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
if numThreads==1 or count<1000 or len(collect)>0:
job += 1
- print "Collect final batch size %i job %i @%i: " % (len(collect), job, count)
+ debug("Collect final batch size %i job %i @%i: " % (len(collect), job, count))
compute_snp(job,n,collect,lmm2,reml,q)
collect = []
j,lst = q.get()
res.append((j,lst))
- print "count=",count," running=",jobs_running," collect=",len(collect)
+ debug("count=%i running=%i collect=%i" % (count,jobs_running,len(collect)))
for job in range(jobs_running):
j,lst = q.get(True,15) # time out
- if verbose:
- sys.stderr.write("Job "+str(j)+" finished\n")
+ debug("Job "+str(j)+" finished")
+ jobs_completed += 1
+ progress("GWAS2",jobs_completed,snps/1000)
res.append((j,lst))
- print "Before sort",[res1[0] for res1 in res]
+ mprint("Before sort",[res1[0] for res1 in res])
res = sorted(res,key=lambda x: x[0])
- # if verbose:
- # print "res=",res[0][0:10]
- print "After sort",[res1[0] for res1 in res]
- print [len(res1[1]) for res1 in res]
+ mprint("After sort",[res1[0] for res1 in res])
+ info([len(res1[1]) for res1 in res])
ts = [item[0] for j,res1 in res for item in res1]
ps = [item[1] for j,res1 in res for item in res1]
return ts,ps
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index eab7d91d..1e00002a 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -57,11 +57,11 @@ import gwas
# ---- A trick to decide on the environment:
try:
from wqflask.my_pylmm.pyLMM import chunks
- from gn2 import uses, set_progress_storage
+ from gn2 import uses, progress_set_func
except ImportError:
has_gn2=False
import standalone as handlers
- from standalone import uses, set_progress_storage
+ from standalone import uses, progress_set_func
sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n")
pass
@@ -348,6 +348,7 @@ def run_other_new(pheno_vector,
t_stats, p_values = gwas.gwas(Y,
G.T,
K,
+ uses,
restricted_max_likelihood=True,
refit=False,verbose=True)
Bench().report()
@@ -812,7 +813,10 @@ def gn2_redis(key,species,new_code=True):
params = json.loads(json_params)
tempdata = temp_data.TempData(params['temp_uuid'])
- set_progress_storage(tempdata)
+ def update_tempdata(loc,i,total):
+ tempdata.store("percent_complete",round(i*100.0/total))
+ debug("Updating REDIS percent_complete=%d" % (round(i*100.0/total)))
+ progress_set_func(update_tempdata)
print('kinship', np.array(params['kinship_matrix']))
print('pheno', np.array(params['pheno_vector']))
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
index 7cc3e871..36bf8fd5 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
@@ -17,24 +17,31 @@ logger = logging.getLogger('lmm2')
logging.basicConfig(level=logging.DEBUG)
np.set_printoptions(precision=3,suppress=True)
-last_location = None
-last_progress = 0
+progress_location = None
+progress_current = None
+progress_prev_perc = None
-def set_progress_storage(location):
- global storage
- storage = location
+def progress_default_func(location,count,total):
+ global progress_current
+ value = round(count*100.0/total)
+ progress_current = value
+
+progress_func = progress_default_func
+
+def progress_set_func(func):
+ global progress_func
+ progress_func = func
def progress(location, count, total):
- global last_location
- global last_progress
+ global progress_location
+ global progress_prev_perc
perc = round(count*100.0/total)
- # print(last_progress,";",perc)
- if perc != last_progress and (location != last_location or perc > 98 or perc > last_progress + 5):
- storage.store("percent_complete",perc)
+ if perc != progress_prev_perc and (location != progress_location or perc > 98 or perc > progress_prev_perc + 5):
+ progress_func(location, count, total)
logger.info("Progress: %s %d%%" % (location,perc))
- last_location = location
- last_progress = perc
+ progress_location = location
+ progress_prev_perc = perc
def mprint(msg,data):
"""
--
cgit v1.2.3
From 9b8a958494364fc6470cfe93f90d179e0bc7a787 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Wed, 18 Mar 2015 13:23:06 +0300
Subject: Aligned gn2 handlers with standalone
---
wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 32 +++++++++++++++++++-------------
1 file changed, 19 insertions(+), 13 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
index b487ea25..f30cf1e6 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
@@ -12,25 +12,31 @@ import logging
# logging.basicConfig(level=logging.DEBUG)
# np.set_printoptions()
-last_location = None
-last_progress = 0
+progress_location = None
+progress_current = None
+progress_prev_perc = None
-def set_progress_storage(location):
- global storage
- storage = location
+def progress_default_func(location,count,total):
+ global progress_current
+ value = round(count*100.0/total)
+ progress_current = value
+
+progress_func = progress_default_func
+
+def progress_set_func(func):
+ global progress_func
+ progress_func = func
def progress(location, count, total):
- global last_location
- global last_progress
+ global progress_location
+ global progress_prev_perc
perc = round(count*100.0/total)
- # print(last_progress,";",perc)
- if perc != last_progress and (location != last_location or perc > 98 or perc > last_progress + 5):
- storage.store("percent_complete",perc)
+ if perc != progress_prev_perc and (location != progress_location or perc > 98 or perc > progress_prev_perc + 5):
+ progress_func(location, count, total)
logger.info("Progress: %s %d%%" % (location,perc))
- last_location = location
- last_progress = perc
-
+ progress_location = location
+ progress_prev_perc = perc
def mprint(msg,data):
"""
--
cgit v1.2.3
From 130afd633fc50cbccaf2d12e5e643eb5f8b98c6f Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Wed, 18 Mar 2015 13:32:21 +0300
Subject: Add uses debug
---
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 1e00002a..e0fc8305 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -65,7 +65,7 @@ except ImportError:
sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n")
pass
-progress,info = uses('progress','info')
+progress,debug,info = uses('progress','debug','info')
#np.seterr('raise')
--
cgit v1.2.3
From 803c3c56c37e448fd52fa102fdb6eef8431154cc Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Wed, 18 Mar 2015 13:36:35 +0300
Subject: Tagging 0.50-gn2-pre2
---
wqflask/wqflask/my_pylmm/README.md | 35 +++++++++++++++++-------------
wqflask/wqflask/my_pylmm/pyLMM/__init__.py | 2 +-
2 files changed, 21 insertions(+), 16 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/README.md b/wqflask/wqflask/my_pylmm/README.md
index f6b0e72d..a84b5be2 100644
--- a/wqflask/wqflask/my_pylmm/README.md
+++ b/wqflask/wqflask/my_pylmm/README.md
@@ -1,21 +1,26 @@
-# RELEASE NOTES
+# Genenetwork2/pylmm RELEASE NOTES
-## 0.50-gn2-pre1 release
+## 0.50-gn2-pre2
-This is the first test release of multi-core pylmm into GN2. Both
-kinship calculation and GWAS have been made multi-threaded by
-introducing the Python multiprocessing module. Note that only
-run_other has been updated to use the new routines (so human is still
-handled the old way). I have taken care that we can still run both
-old-style and new-style LMM (through passing the 'new_code'
-boolean). This could be an option in the web server for users to
-select and test for any unexpected differences (of which there should
-be none, naturally ;).
+- Added abstractions for progress meter and info/debug statements;
+ Redis perc_complete is now updated through a lambda
-The current version can handle missing phenotypes, but as they are
-removed there is no way for GN2 to know what SNPs the P-values belong
-to. A future version will pass a SNP index to allow for missing
-phenotypes.
+## 0.50-gn2-pre1 (release)
+
+- This is the first test release of multi-core pylmm into GN2. Both
+ kinship calculation and GWAS have been made multi-threaded by
+ introducing the Python multiprocessing module. Note that only
+ run_other has been updated to use the new routines (so human is
+ still handled the old way). I have taken care that we can still run
+ both old-style and new-style LMM (through passing the 'new_code'
+ boolean). This could be an option in the web server for users to
+ select and test for any unexpected differences (of which there
+ should be none, naturally ;).
+
+- The current version can handle missing phenotypes, but as they are
+ removed there is no way for GN2 to know what SNPs the P-values
+ belong to. A future version will pass a SNP index to allow for
+ missing phenotypes.
\ No newline at end of file
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py
index c40c3221..6ab60d02 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py
@@ -1 +1 @@
-PYLMM_VERSION="0.50-gn2-pre1"
+PYLMM_VERSION="0.50-gn2-pre2"
--
cgit v1.2.3
From 8e9d7cde41800766fec835ca0c4a55c6327e05c8 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Fri, 20 Mar 2015 11:47:10 +0300
Subject: Trying to get kinship_old back in lmm1
---
wqflask/wqflask/my_pylmm/pyLMM/kinship.py | 14 +++++++-----
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 35 ++++++++++++++---------------
wqflask/wqflask/my_pylmm/pyLMM/phenotype.py | 2 +-
wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 4 ++--
4 files changed, 29 insertions(+), 26 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
index 62f7be47..be12417e 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
@@ -28,17 +28,21 @@ import time
from optmatrix import matrix_initialize, matrixMultT
-def kinship_full(G):
+def kinship_full(G,uses):
"""
Calculate the Kinship matrix using a full dot multiplication
"""
- print G.shape
+ info,mprint = uses('info','mprint')
+
+ # mprint("kinship_full G",G)
m = G.shape[0] # snps
n = G.shape[1] # inds
- sys.stderr.write(str(m)+" SNPs\n")
- assert m>n, "n should be larger than m (snps>inds)"
- m = np.dot(G.T,G)
+ info("%d SNPs",m)
+ assert m>n, "n should be larger than m (%d snps > %d inds)" % (m,n)
+ # m = np.dot(G.T,G)
+ m = matrixMultT(G.T)
m = m/G.shape[0]
+ # mprint("kinship_full K",m)
return m
def compute_W(job,G,n,snps,compute_size):
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index e0fc8305..c040e3c2 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -65,7 +65,7 @@ except ImportError:
sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n")
pass
-progress,debug,info = uses('progress','debug','info')
+progress,mprint,debug,info = uses('progress','mprint','debug','info')
#np.seterr('raise')
@@ -277,7 +277,7 @@ def run_other_old(pheno_vector,
print("Running the original LMM engine in run_other (old)")
print("REML=",restricted_max_likelihood," REFIT=",refit)
with Bench("Calculate Kinship"):
- kinship_matrix,genotype_matrix = calculate_kinship(genotype_matrix, tempdata)
+ kinship_matrix,genotype_matrix = calculate_kinship_old(genotype_matrix, tempdata)
print("kinship_matrix: ", pf(kinship_matrix))
print("kinship_matrix.shape: ", pf(kinship_matrix.shape))
@@ -331,7 +331,7 @@ def run_other_new(pheno_vector,
# G = np.apply_along_axis( genotype.normalize, axis=1, arr=G)
with Bench("Calculate Kinship"):
- K,G = calculate_kinship(G, tempdata)
+ K,G = calculate_kinship_new(G, tempdata)
print("kinship_matrix: ", pf(K))
print("kinship_matrix.shape: ", pf(K.shape))
@@ -393,9 +393,9 @@ def calculate_kinship_new(genotype_matrix, temp_data=None):
Call the new kinship calculation where genotype_matrix contains
inds (columns) by snps (rows).
"""
- print("call genotype.normalize")
+ info("call genotype.normalize")
G = np.apply_along_axis( genotype.normalize, axis=0, arr=genotype_matrix)
- print("call calculate_kinship_new")
+ info("call calculate_kinship_new")
return kinship(G.T,uses),G # G gets transposed, we'll turn this into an iterator (FIXME)
def calculate_kinship_old(genotype_matrix, temp_data=None):
@@ -406,11 +406,11 @@ def calculate_kinship_old(genotype_matrix, temp_data=None):
normalizes the resulting vectors and returns the RRM matrix.
"""
- print("call calculate_kinship_old")
+ info("call calculate_kinship_old")
n = genotype_matrix.shape[0]
m = genotype_matrix.shape[1]
- print("genotype 2D matrix n (inds) is:", n)
- print("genotype 2D matrix m (snps) is:", m)
+ info("genotype 2D matrix n (inds) is: %d" % (n))
+ info("genotype 2D matrix m (snps) is: %d" % (m))
assert m>n, "n should be larger than m (snps>inds)"
keep = []
for counter in range(m):
@@ -431,14 +431,13 @@ def calculate_kinship_old(genotype_matrix, temp_data=None):
continue
keep.append(counter)
genotype_matrix[:,counter] = (genotype_matrix[:,counter] - values_mean) / np.sqrt(vr)
- progress('kinship_old',counter,m)
+ progress('kinship_old normalize genotype',counter,m)
genotype_matrix = genotype_matrix[:,keep]
- print("After kinship (old) genotype_matrix: ", pf(genotype_matrix))
- kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m)
- return kinship_matrix,genotype_matrix
-
-calculate_kinship = calculate_kinship_new # alias
+ mprint("After kinship (old) genotype_matrix: ", genotype_matrix)
+ # kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m)
+ # return kinship_matrix,genotype_matrix
+ return kinship_full(genotype_matrix.T,uses),genotype_matrix
def GWAS(pheno_vector,
genotype_matrix,
@@ -464,9 +463,9 @@ def GWAS(pheno_vector,
refit - refit the variance component for each SNP
"""
- if kinship_eigen_vals == None:
+ if kinship_eigen_vals is None:
kinship_eigen_vals = []
- if kinship_eigen_vectors == None:
+ if kinship_eigen_vectors is None:
kinship_eigen_vectors = []
n = genotype_matrix.shape[0]
@@ -570,7 +569,7 @@ class LMM:
When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect.
"""
- if X0 == None: X0 = np.ones(len(Y)).reshape(len(Y),1)
+ if X0 is None: X0 = np.ones(len(Y)).reshape(len(Y),1)
self.verbose = verbose
#x = Y != -9
@@ -663,7 +662,7 @@ class LMM:
REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True.
"""
- if X == None:
+ if X is None:
X = self.X0t
elif stack:
self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0]
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
index 682ba371..4c8175f7 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
@@ -24,7 +24,7 @@ def remove_missing(y,g,verbose=False):
Remove missing data from matrices, make sure the genotype data has
individuals as rows
"""
- assert(y!=None)
+ assert(y is not None)
assert(y.shape[0] == g.shape[0])
y1 = y
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index e3e8659c..6a38da56 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -134,7 +134,7 @@ elif cmd == 'redis':
# Emulating the redis setup of GN2
G = g
print "Original G",G.shape, "\n", G
- if y != None and options.remove_missing_phenotypes:
+ if y is not None and options.remove_missing_phenotypes:
gnt = np.array(g).T
Y,g,keep = phenotype.remove_missing(y,g.T,options.verbose)
G = g.T
@@ -165,7 +165,7 @@ elif cmd == 'redis':
assert p1==0.0897, "p1=%f" % p1
assert p2==0.0405, "p2=%f" % p2
if options.geno == 'data/test8000.geno':
- assert round(sum(ps)) == 4070
+ assert int(sum(ps)) == 4070
assert len(ps) == 8000
elif cmd == 'kinship':
G = g
--
cgit v1.2.3
From 38594c7781b587a24be14b9631a73662ee3fdc2b Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Fri, 20 Mar 2015 12:18:03 +0300
Subject: Fall back on calculate_kinship_new again
---
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 2 +-
wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index c040e3c2..a649029c 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -277,7 +277,7 @@ def run_other_old(pheno_vector,
print("Running the original LMM engine in run_other (old)")
print("REML=",restricted_max_likelihood," REFIT=",refit)
with Bench("Calculate Kinship"):
- kinship_matrix,genotype_matrix = calculate_kinship_old(genotype_matrix, tempdata)
+ kinship_matrix,genotype_matrix = calculate_kinship_new(genotype_matrix, tempdata)
print("kinship_matrix: ", pf(kinship_matrix))
print("kinship_matrix.shape: ", pf(kinship_matrix.shape))
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 6a38da56..88e2a033 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -184,7 +184,7 @@ elif cmd == 'kinship':
gnt = None
if options.test_kinship:
- K = kinship_full(np.copy(G))
+ K = kinship_full(np.copy(G),uses)
print "Genotype",G.shape, "\n", G
print "first Kinship method",K.shape,"\n",K
k1 = round(K[0][0],4)
--
cgit v1.2.3
From 490e0919b2757f6815a7e6c7f0cb08e55e1cd02e Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 30 Mar 2015 10:32:11 +0200
Subject: Percentage complete: Add method description
---
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 8844118f..200424ba 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -814,6 +814,9 @@ def gn2_redis(key,species,new_code=True):
tempdata = temp_data.TempData(params['temp_uuid'])
def update_tempdata(loc,i,total):
+ """
+ This is the single method that updates Redis for percentage complete!
+ """
tempdata.store("percent_complete",round(i*100.0/total))
debug("Updating REDIS percent_complete=%d" % (round(i*100.0/total)))
progress_set_func(update_tempdata)
--
cgit v1.2.3
From 6fc112431c0edb0ecae6cd5fa45716c349094a7f Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 30 Mar 2015 11:49:43 +0200
Subject: Use of is vs == when testing None
---
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 4 ++--
wqflask/wqflask/my_pylmm/pyLMM/lmm2.py | 12 ++++++------
2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 200424ba..f0473f99 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -278,7 +278,7 @@ def run_other_old(pheno_vector,
print("Running the original LMM engine in run_other (old)")
print("REML=",restricted_max_likelihood," REFIT=",refit)
with Bench("Calculate Kinship"):
- kinship_matrix,genotype_matrix = calculate_kinship_new(genotype_matrix, tempdata)
+ kinship_matrix,genotype_matrix = calculate_kinship_old(genotype_matrix, tempdata)
print("kinship_matrix: ", pf(kinship_matrix))
print("kinship_matrix.shape: ", pf(kinship_matrix.shape))
@@ -880,7 +880,7 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True):
k = kinship.tolist()
params = dict(pheno_vector = pheno.tolist(),
genotype_matrix = geno.tolist(),
- kinship_matrix= k,
+ kinship_matrix = k,
restricted_max_likelihood = True,
refit = False,
temp_uuid = "testrun_temp_uuid",
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
index aa6b473d..d67e1205 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
@@ -85,7 +85,7 @@ def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False):
print("genotype matrix n is:", n)
print("genotype matrix m is:", m)
- if X0 == None:
+ if X0 is None:
X0 = np.ones((n,1))
# Remove missing values in Y and adjust associated parameters
@@ -173,7 +173,7 @@ class LMM2:
When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect.
"""
- if X0 == None:
+ if X0 is None:
X0 = np.ones(len(Y)).reshape(len(Y),1)
self.verbose = verbose
@@ -260,7 +260,7 @@ class LMM2:
REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True.
"""
- if X == None: X = self.X0t
+ if X is None: X = self.X0t
elif stack:
self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0]
X = self.X0t_stack
@@ -316,7 +316,7 @@ class LMM2:
Given this optimum, the function computes the LL and associated ML solutions.
"""
- if X == None: X = self.X0t
+ if X is None: X = self.X0t
else:
#X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)])
self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0]
@@ -340,7 +340,7 @@ class LMM2:
def association(self,X,h=None,stack=True,REML=True,returnBeta=False):
"""
Calculates association statitics for the SNPs encoded in the vector X of size n.
- If h == None, the optimal h stored in optH is used.
+ If h is None, the optimal h stored in optH is used.
"""
if False:
@@ -358,7 +358,7 @@ class LMM2:
self.X0t_stack[:,(self.q)] = m
X = self.X0t_stack
- if h == None: h = self.optH
+ if h is None: h = self.optH
L,beta,sigma,betaVAR = self.LL(h,X,stack=False,REML=REML)
q = len(beta)
--
cgit v1.2.3
From 8b88be4f48baa6cd0cc3c37a851144d5b1dc24af Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 30 Mar 2015 13:01:22 +0200
Subject: Refactoring genotype normalization
---
wqflask/wqflask/my_pylmm/pyLMM/genotype.py | 19 ++++++++++---------
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 9 +++++----
wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 2 ++
3 files changed, 17 insertions(+), 13 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py
index 315fd824..49f32e3a 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py
@@ -37,14 +37,15 @@ def normalize(ind_g):
Run for every SNP list (for one individual) and return
normalized SNP genotype values with missing data filled in
"""
- g1 = np.copy(ind_g) # avoid side effects
- x = True - np.isnan(ind_g) # Matrix of True/False
- m = ind_g[x].mean() # Global mean value
- s = np.sqrt(ind_g[x].var()) # Global stddev
- g1[np.isnan(ind_g)] = m # Plug-in mean values for missing data
- if s == 0:
- g1 = g1 - m # Subtract the mean
+ g = np.copy(ind_g) # copy to avoid side effects
+ missing = np.isnan(g)
+ values = g[True - missing]
+ mean = values.mean() # Global mean value
+ stddev = np.sqrt(values.var()) # Global stddev
+ g[missing] = mean # Plug-in mean values for missing data
+ if stddev == 0:
+ g = g - mean # Subtract the mean
else:
- g1 = (g1 - m) / s # Normalize the deviation
- return g1
+ g = (g - mean) / stddev # Normalize the deviation
+ return g
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index f0473f99..035f31e8 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -414,6 +414,7 @@ def calculate_kinship_old(genotype_matrix, temp_data=None):
info("genotype 2D matrix m (snps) is: %d" % (m))
assert m>n, "n should be larger than m (snps>inds)"
keep = []
+ mprint("G (before old normalize)",genotype_matrix)
for counter in range(m):
#print("type of genotype_matrix[:,counter]:", pf(genotype_matrix[:,counter]))
#Checks if any values in column are not numbers
@@ -435,10 +436,10 @@ def calculate_kinship_old(genotype_matrix, temp_data=None):
progress('kinship_old normalize genotype',counter,m)
genotype_matrix = genotype_matrix[:,keep]
- mprint("After kinship (old) genotype_matrix: ", genotype_matrix)
- # kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m)
- # return kinship_matrix,genotype_matrix
- return kinship_full(genotype_matrix.T,uses),genotype_matrix
+ mprint("G (after old normalize)",genotype_matrix.T)
+ kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m)
+ return kinship_matrix,genotype_matrix
+ # return kinship_full(genotype_matrix.T,uses),genotype_matrix
def GWAS(pheno_vector,
genotype_matrix,
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 88e2a033..fc7a4b9d 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -106,6 +106,8 @@ if options.geno:
if cmd == 'redis_new':
# The main difference between redis_new and redis is that missing
# phenotypes are handled by the first
+ if options.remove_missing_phenotypes:
+ raise Exception('Can not use --remove-missing-phenotypes with LMM2')
Y = y
G = g
print "Original G",G.shape, "\n", G
--
cgit v1.2.3
From 153317412a090d5b17bc176ff7da2e61e6ec4f2c Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Thu, 2 Apr 2015 09:55:42 +0200
Subject: Make the new version of genotype normalization default
---
wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 15 ++++++++++-----
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 5 +++--
wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 4 ++--
wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 5 +++++
4 files changed, 20 insertions(+), 9 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
index f30cf1e6..7bceb089 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
@@ -45,15 +45,20 @@ def mprint(msg,data):
m = np.array(data)
print(msg,m.shape,"=\n",m)
+def fatal(msg):
+ logger.critical(msg)
+ raise Exception(msg)
+
def callbacks():
return dict(
write = sys.stdout.write,
writeln = print,
- debug = logging.debug,
- info = logging.info,
- warning = logging.warning,
- error = logging.error,
- critical = logging.critical,
+ debug = logger.debug,
+ info = logger.info,
+ warning = logger.warning,
+ error = logger.error,
+ critical = logger.critical,
+ fatal = fatal,
progress = progress,
mprint = mprint
)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 035f31e8..8be3fc6f 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -66,7 +66,7 @@ except ImportError:
sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n")
pass
-progress,mprint,debug,info = uses('progress','mprint','debug','info')
+progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal')
#np.seterr('raise')
@@ -278,7 +278,7 @@ def run_other_old(pheno_vector,
print("Running the original LMM engine in run_other (old)")
print("REML=",restricted_max_likelihood," REFIT=",refit)
with Bench("Calculate Kinship"):
- kinship_matrix,genotype_matrix = calculate_kinship_old(genotype_matrix, tempdata)
+ kinship_matrix,genotype_matrix = calculate_kinship_new(genotype_matrix, tempdata)
print("kinship_matrix: ", pf(kinship_matrix))
print("kinship_matrix.shape: ", pf(kinship_matrix.shape))
@@ -408,6 +408,7 @@ def calculate_kinship_old(genotype_matrix, temp_data=None):
"""
info("call calculate_kinship_old")
+ fatal("THE FUNCTION calculate_kinship_old IS OBSOLETE, use calculate_kinship_new instead - see Genotype Normalization Problem on Pjotr's blog")
n = genotype_matrix.shape[0]
m = genotype_matrix.shape[1]
info("genotype 2D matrix n (inds) is: %d" % (n))
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index fc7a4b9d..ef0bdd7e 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -21,7 +21,7 @@ from optparse import OptionParser
import sys
import tsvreader
import numpy as np
-from lmm import gn2_load_redis, calculate_kinship_old
+from lmm import gn2_load_redis, calculate_kinship_new
from kinship import kinship, kinship_full
import genotype
import phenotype
@@ -190,7 +190,7 @@ elif cmd == 'kinship':
print "Genotype",G.shape, "\n", G
print "first Kinship method",K.shape,"\n",K
k1 = round(K[0][0],4)
- K2,G = calculate_kinship_old(np.copy(G).T,temp_data=None)
+ K2,G = calculate_kinship_new(np.copy(G).T,temp_data=None)
print "Genotype",G.shape, "\n", G
print "GN2 Kinship method",K2.shape,"\n",K2
k2 = round(K2[0][0],4)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
index 36bf8fd5..40b2021d 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
@@ -57,6 +57,10 @@ def mprint(msg,data):
m[-2][0:3]," ... ",m[-2][-3:],"\n ",
m[-1][0:3]," ... ",m[-1][-3:],"]")
+def fatal(msg):
+ logger.critical(msg)
+ raise Exception(msg)
+
def callbacks():
return dict(
write = sys.stdout.write,
@@ -66,6 +70,7 @@ def callbacks():
warning = logger.warning,
error = logger.error,
critical = logger.critical,
+ fatal = fatal,
progress = progress,
mprint = mprint
)
--
cgit v1.2.3
From 0f132d0cc4a77e69ab593fd9c8a2d5218d083ed7 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Thu, 2 Apr 2015 10:15:49 +0200
Subject: Release 0.50-gn2
---
wqflask/wqflask/my_pylmm/README.md | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/README.md b/wqflask/wqflask/my_pylmm/README.md
index a84b5be2..4845ec03 100644
--- a/wqflask/wqflask/my_pylmm/README.md
+++ b/wqflask/wqflask/my_pylmm/README.md
@@ -1,11 +1,15 @@
# Genenetwork2/pylmm RELEASE NOTES
-## 0.50-gn2-pre2
+## 0.50-gn2 (April 2nd, 2015)
+
+- Replaced the GN2 genotype normalization
+
+## 0.50-gn2-pre2 (March 18, 2015)
- Added abstractions for progress meter and info/debug statements;
Redis perc_complete is now updated through a lambda
-## 0.50-gn2-pre1 (release)
+## 0.50-gn2-pre1 (release, March 17, 2015)
- This is the first test release of multi-core pylmm into GN2. Both
kinship calculation and GWAS have been made multi-threaded by
--
cgit v1.2.3
From 43295e57621e9a08ca4cb90e95cc14a87e0d8b5e Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Thu, 2 Apr 2015 12:04:14 +0200
Subject: Create test geno iterator
---
wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 9 +++++++--
wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py | 25 +++++++++++++++++++++++++
2 files changed, 32 insertions(+), 2 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index ef0bdd7e..5a4bd268 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -99,11 +99,16 @@ if options.pheno:
y = tsvreader.pheno(options.pheno)
print y.shape
-if options.geno:
+if options.geno and cmd != 'iterator':
g = tsvreader.geno(options.geno)
print g.shape
-if cmd == 'redis_new':
+if cmd == 'iterator':
+ print "ITERATE over SNPs"
+ def pretty(snpid,values):
+ print snpid,values
+ print tsvreader.geno_iter(options.geno,pretty)
+elif cmd == 'redis_new':
# The main difference between redis_new and redis is that missing
# phenotypes are handled by the first
if options.remove_missing_phenotypes:
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
index b4027fa3..7fe75e3f 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
@@ -74,3 +74,28 @@ def geno(fn):
G = np.array(G1)
return G
+def geno(fn):
+ G1 = []
+ def append(id,values):
+ G1.append(values) # <--- slow
+ geno_iter(fn,append)
+ G = np.array(G1)
+ return G
+
+def geno_iter(fn,func):
+ hab_mapper = {'A':0,'H':1,'B':2,'-':3}
+ pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ]
+
+ print fn
+ with open(fn,'r') as tsvin:
+ assert(tsvin.readline().strip() == "# Genotype format version 1.0")
+ tsvin.readline()
+ tsvin.readline()
+ tsvin.readline()
+ tsvin.readline()
+ tsv = csv.reader(tsvin, delimiter='\t')
+ for row in tsv:
+ id = row[0]
+ gs = list(row[1])
+ gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs]
+ func(id,gs2)
--
cgit v1.2.3
From 5151bc389aa98415da9f4d49b3c279ed1380ea7d Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Thu, 2 Apr 2015 12:14:43 +0200
Subject: Prepare iterator
---
wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 5a4bd268..036bf7d5 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -104,10 +104,17 @@ if options.geno and cmd != 'iterator':
print g.shape
if cmd == 'iterator':
- print "ITERATE over SNPs"
- def pretty(snpid,values):
- print snpid,values
- print tsvreader.geno_iter(options.geno,pretty)
+ def snp_iterator(func):
+ tsvreader.geno_iter(options.geno,func)
+
+ if options.remove_missing_phenotypes:
+ raise Exception('Can not use --remove-missing-phenotypes with LMM2')
+ ps, ts = gn2_iter_redis('testrun_iter','other',k,y,snp_iterator)
+ print np.array(ps)
+ print len(ps),sum(ps)
+ # Test results
+ p1 = round(ps[0],4)
+ p2 = round(ps[-1],4)
elif cmd == 'redis_new':
# The main difference between redis_new and redis is that missing
# phenotypes are handled by the first
--
cgit v1.2.3
From b9c79ef58ff6ec4da3e65290ea802c783bb17742 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Thu, 2 Apr 2015 13:40:42 +0200
Subject: Passing in an iterator
---
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 33 ++++++++++++++++++++++++++++-
wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 6 ++----
wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py | 26 ++++++++++++++++++++---
3 files changed, 57 insertions(+), 8 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 8be3fc6f..07b55726 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -875,6 +875,9 @@ def gn2_main():
gn2_redis(key,species)
def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True):
+ """
+ This function emulates current GN2 behaviour by pre-loading Redis
+ """
print("Loading Redis from parsed data")
if kinship == None:
k = None
@@ -896,7 +899,35 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True):
Redis.expire(key, 60*60)
return gn2_redis(key,species,new_code)
-
+
+def gn2_iter_redis(key,species,kinship,pheno,geno_iterator):
+ """
+ This function emulates GN2 behaviour by pre-loading Redis with
+ a SNP iterator
+ """
+ print("Loading Redis using a SNP iterator")
+ if kinship == None:
+ k = None
+ else:
+ k = kinship.tolist()
+ params = dict(pheno_vector = pheno.tolist(),
+ genotype_matrix = geno_iterator.tolist(),
+ kinship_matrix = k,
+ restricted_max_likelihood = True,
+ refit = False,
+ temp_uuid = "testrun_temp_uuid",
+
+ # meta data
+ timestamp = datetime.datetime.now().isoformat(),
+ )
+
+ json_params = json.dumps(params)
+ Redis.set(key, json_params)
+ Redis.expire(key, 60*60)
+
+ return gn2_redis(key,species,new_code)
+
+
if __name__ == '__main__':
print("WARNING: Calling pylmm from lmm.py will become OBSOLETE, use runlmm.py instead!")
if has_gn2:
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 036bf7d5..3b0672b4 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -21,7 +21,7 @@ from optparse import OptionParser
import sys
import tsvreader
import numpy as np
-from lmm import gn2_load_redis, calculate_kinship_new
+from lmm import gn2_load_redis, gn2_iter_redis, calculate_kinship_new
from kinship import kinship, kinship_full
import genotype
import phenotype
@@ -104,11 +104,9 @@ if options.geno and cmd != 'iterator':
print g.shape
if cmd == 'iterator':
- def snp_iterator(func):
- tsvreader.geno_iter(options.geno,func)
-
if options.remove_missing_phenotypes:
raise Exception('Can not use --remove-missing-phenotypes with LMM2')
+ snp_iterator = tsvreader.geno_iter(options.geno)
ps, ts = gn2_iter_redis('testrun_iter','other',k,y,snp_iterator)
print np.array(ps)
print len(ps),sum(ps)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
index 7fe75e3f..27daf43f 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
@@ -76,13 +76,12 @@ def geno(fn):
def geno(fn):
G1 = []
- def append(id,values):
+ for id,values in geno_iter(fn):
G1.append(values) # <--- slow
- geno_iter(fn,append)
G = np.array(G1)
return G
-def geno_iter(fn,func):
+def geno_callback(fn,func):
hab_mapper = {'A':0,'H':1,'B':2,'-':3}
pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ]
@@ -99,3 +98,24 @@ def geno_iter(fn,func):
gs = list(row[1])
gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs]
func(id,gs2)
+
+def geno_iter(fn):
+ """
+ Yield a tuple of snpid and values
+ """
+ hab_mapper = {'A':0,'H':1,'B':2,'-':3}
+ pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ]
+
+ print fn
+ with open(fn,'r') as tsvin:
+ assert(tsvin.readline().strip() == "# Genotype format version 1.0")
+ tsvin.readline()
+ tsvin.readline()
+ tsvin.readline()
+ tsvin.readline()
+ tsv = csv.reader(tsvin, delimiter='\t')
+ for row in tsv:
+ id = row[0]
+ gs = list(row[1])
+ gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs]
+ yield (id,gs2)
--
cgit v1.2.3
From 146b4a45c28b7d3ba4bf982cfaf93eda2e71d1ea Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Fri, 3 Apr 2015 10:58:53 +0200
Subject: Refactoring GN2 interface
---
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 34 +++++++++++++++++++---------------
1 file changed, 19 insertions(+), 15 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 07b55726..6e22e6c9 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -805,7 +805,7 @@ class LMM:
pl.title(title)
-def gn2_redis(key,species,new_code=True):
+def gwas_using_redis(key,species,new_code=True):
"""
Invoke pylmm using Redis as a container. new_code runs the new
version
@@ -861,18 +861,6 @@ def gn2_redis(key,species,new_code=True):
Redis.expire(results_key, 60*60)
return ps, ts
-# This is the main function used by Genenetwork2 (with environment)
-def gn2_main():
- parser = argparse.ArgumentParser(description='Run pyLMM')
- parser.add_argument('-k', '--key')
- parser.add_argument('-s', '--species')
-
- opts = parser.parse_args()
-
- key = opts.key
- species = opts.species
-
- gn2_redis(key,species)
def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True):
"""
@@ -898,7 +886,7 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True):
Redis.set(key, json_params)
Redis.expire(key, 60*60)
- return gn2_redis(key,species,new_code)
+ return gwas_using_redis(key,species,new_code)
def gn2_iter_redis(key,species,kinship,pheno,geno_iterator):
"""
@@ -925,7 +913,23 @@ def gn2_iter_redis(key,species,kinship,pheno,geno_iterator):
Redis.set(key, json_params)
Redis.expire(key, 60*60)
- return gn2_redis(key,species,new_code)
+ return gwas_using_redis(key,species,new_code)
+
+# This is the main function used by Genenetwork2 (with environment)
+#
+# Note that this calling route will become OBSOLETE (we should use runlmm.py
+# instead)
+def gn2_main():
+ parser = argparse.ArgumentParser(description='Run pyLMM')
+ parser.add_argument('-k', '--key')
+ parser.add_argument('-s', '--species')
+
+ opts = parser.parse_args()
+
+ key = opts.key
+ species = opts.species
+
+ gwas_using_redis(key,species)
if __name__ == '__main__':
--
cgit v1.2.3
From fabbcac393627badf0542377fc22325ae7e96f3d Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Fri, 3 Apr 2015 11:15:29 +0200
Subject: Passing in an iterator
---
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 6e22e6c9..b8650938 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -891,15 +891,21 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True):
def gn2_iter_redis(key,species,kinship,pheno,geno_iterator):
"""
This function emulates GN2 behaviour by pre-loading Redis with
- a SNP iterator
+ a SNP iterator, for this it sets a key for every genotype (SNP)
"""
print("Loading Redis using a SNP iterator")
+ for i,genotypes in enumerate(geno_iterator):
+ gkey = key+'_geno_'+str(i)
+ Redis.set(gkey, genotypes)
+ Redis.expire(gkey, 60*60)
+
if kinship == None:
k = None
else:
k = kinship.tolist()
params = dict(pheno_vector = pheno.tolist(),
- genotype_matrix = geno_iterator.tolist(),
+ genotype_matrix = "iterator",
+ genotypes = i,
kinship_matrix = k,
restricted_max_likelihood = True,
refit = False,
@@ -913,7 +919,7 @@ def gn2_iter_redis(key,species,kinship,pheno,geno_iterator):
Redis.set(key, json_params)
Redis.expire(key, 60*60)
- return gwas_using_redis(key,species,new_code)
+ return gwas_using_redis(key,species)
# This is the main function used by Genenetwork2 (with environment)
#
--
cgit v1.2.3
From 7d13eec7f67578aa75d8430bb5ed74a4dd825b51 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Fri, 3 Apr 2015 12:10:55 +0200
Subject: Refactoring Redis use to one function
---
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 65 ++++++++++++++++++--------------
wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 4 +-
2 files changed, 38 insertions(+), 31 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index b8650938..88ca6a7f 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -805,6 +805,36 @@ class LMM:
pl.title(title)
+def gwas_without_redis(species,k,y,geno,cov,reml,refit,inputfn,new_code):
+ """
+ Invoke pylmm using a genotype (SNP) iterator
+ """
+ info("gwas_without_redis")
+ print('pheno', y)
+
+ if species == "human" :
+ print('kinship', k )
+ ps, ts = run_human(pheno_vector = y,
+ covariate_matrix = cov,
+ plink_input_file = inputfn,
+ kinship_matrix = k,
+ refit = refit, tempdata=tempdata)
+ else:
+ print('geno', geno.shape, geno)
+
+ if new_code:
+ ps, ts = run_other_new(pheno_vector = y,
+ genotype_matrix = geno,
+ restricted_max_likelihood = reml,
+ refit = refit,
+ tempdata = tempdata)
+ else:
+ ps, ts = run_other_old(pheno_vector = y,
+ genotype_matrix = geno,
+ restricted_max_likelihood = reml,
+ refit = refit,
+ tempdata = tempdata)
+
def gwas_using_redis(key,species,new_code=True):
"""
Invoke pylmm using Redis as a container. new_code runs the new
@@ -823,33 +853,7 @@ def gwas_using_redis(key,species,new_code=True):
debug("Updating REDIS percent_complete=%d" % (round(i*100.0/total)))
progress_set_func(update_tempdata)
-
- print('pheno', np.array(params['pheno_vector']))
-
- if species == "human" :
- print('kinship', np.array(params['kinship_matrix']))
- ps, ts = run_human(pheno_vector = np.array(params['pheno_vector']),
- covariate_matrix = np.array(params['covariate_matrix']),
- plink_input_file = params['input_file_name'],
- kinship_matrix = np.array(params['kinship_matrix']),
- refit = params['refit'],
- tempdata = tempdata)
- else:
- geno = np.array(params['genotype_matrix'])
- print('geno', geno.shape, geno)
-
- if new_code:
- ps, ts = run_other_new(pheno_vector = np.array(params['pheno_vector']),
- genotype_matrix = geno,
- restricted_max_likelihood = params['restricted_max_likelihood'],
- refit = params['refit'],
- tempdata = tempdata)
- else:
- ps, ts = run_other_old(pheno_vector = np.array(params['pheno_vector']),
- genotype_matrix = geno,
- restricted_max_likelihood = params['restricted_max_likelihood'],
- refit = params['refit'],
- tempdata = tempdata)
+ ps,ts = gwas_without_redis(species,np.array(params['kinship_matrix']),np.array(params['pheno_vector']),np.array(params['genotype_matrix']),np.array(params['covariate_matrix']),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code)
results_key = "pylmm:results:" + params['temp_uuid']
@@ -874,6 +878,8 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True):
params = dict(pheno_vector = pheno.tolist(),
genotype_matrix = geno.tolist(),
kinship_matrix = k,
+ covariate_matrix = None,
+ input_file_name = None,
restricted_max_likelihood = True,
refit = False,
temp_uuid = "testrun_temp_uuid",
@@ -888,7 +894,7 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True):
return gwas_using_redis(key,species,new_code)
-def gn2_iter_redis(key,species,kinship,pheno,geno_iterator):
+def gn2_load_redis_iter(key,species,kinship,pheno,geno_iterator):
"""
This function emulates GN2 behaviour by pre-loading Redis with
a SNP iterator, for this it sets a key for every genotype (SNP)
@@ -907,6 +913,8 @@ def gn2_iter_redis(key,species,kinship,pheno,geno_iterator):
genotype_matrix = "iterator",
genotypes = i,
kinship_matrix = k,
+ covariate_matrix = None,
+ input_file_name = None,
restricted_max_likelihood = True,
refit = False,
temp_uuid = "testrun_temp_uuid",
@@ -918,7 +926,6 @@ def gn2_iter_redis(key,species,kinship,pheno,geno_iterator):
json_params = json.dumps(params)
Redis.set(key, json_params)
Redis.expire(key, 60*60)
-
return gwas_using_redis(key,species)
# This is the main function used by Genenetwork2 (with environment)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 3b0672b4..ab698e41 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -21,7 +21,7 @@ from optparse import OptionParser
import sys
import tsvreader
import numpy as np
-from lmm import gn2_load_redis, gn2_iter_redis, calculate_kinship_new
+from lmm import gn2_load_redis, gn2_load_redis_iter, calculate_kinship_new
from kinship import kinship, kinship_full
import genotype
import phenotype
@@ -107,7 +107,7 @@ if cmd == 'iterator':
if options.remove_missing_phenotypes:
raise Exception('Can not use --remove-missing-phenotypes with LMM2')
snp_iterator = tsvreader.geno_iter(options.geno)
- ps, ts = gn2_iter_redis('testrun_iter','other',k,y,snp_iterator)
+ ps, ts = gn2_load_redis_iter('testrun_iter','other',k,y,snp_iterator)
print np.array(ps)
print len(ps),sum(ps)
# Test results
--
cgit v1.2.3
From fc6f0ef9fc8d2607e70c775c51ca55f50806cc7a Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Fri, 3 Apr 2015 13:13:09 +0200
Subject: temp_data is no longer passed around
---
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 50 +++++++++++++++-----------------
wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 2 +-
2 files changed, 24 insertions(+), 28 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 88ca6a7f..9e25f56d 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -81,8 +81,7 @@ def run_human(pheno_vector,
covariate_matrix,
plink_input_file,
kinship_matrix,
- refit=False,
- tempdata=None):
+ refit=False):
v = np.isnan(pheno_vector)
keep = True - v
@@ -262,23 +261,19 @@ def human_association(snp,
def run_other_old(pheno_vector,
genotype_matrix,
restricted_max_likelihood=True,
- refit=False,
- tempdata=None # <---- can not be None
- ):
+ refit=False):
"""Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics
restricted_max_likelihood -- whether to use restricted max likelihood; True or False
refit -- whether to refit the variance component for each marker
- temp_data -- TempData object that stores the progress for each major step of the
- calculations ("calculate_kinship" and "GWAS" take the majority of time)
"""
print("Running the original LMM engine in run_other (old)")
print("REML=",restricted_max_likelihood," REFIT=",refit)
with Bench("Calculate Kinship"):
- kinship_matrix,genotype_matrix = calculate_kinship_new(genotype_matrix, tempdata)
+ kinship_matrix,genotype_matrix = calculate_kinship_new(genotype_matrix)
print("kinship_matrix: ", pf(kinship_matrix))
print("kinship_matrix.shape: ", pf(kinship_matrix.shape))
@@ -297,24 +292,19 @@ def run_other_old(pheno_vector,
genotype_matrix,
kinship_matrix,
restricted_max_likelihood=True,
- refit=False,
- temp_data=tempdata)
+ refit=False)
Bench().report()
return p_values, t_stats
def run_other_new(pheno_vector,
genotype_matrix,
restricted_max_likelihood=True,
- refit=False,
- tempdata=None # <---- can not be None
- ):
+ refit=False):
"""Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics
restricted_max_likelihood -- whether to use restricted max likelihood; True or False
refit -- whether to refit the variance component for each marker
- temp_data -- TempData object that stores the progress for each major step of the
- calculations ("calculate_kinship" and "GWAS" take the majority of time)
"""
@@ -332,7 +322,7 @@ def run_other_new(pheno_vector,
# G = np.apply_along_axis( genotype.normalize, axis=1, arr=G)
with Bench("Calculate Kinship"):
- K,G = calculate_kinship_new(G, tempdata)
+ K,G = calculate_kinship_new(G)
print("kinship_matrix: ", pf(K))
print("kinship_matrix.shape: ", pf(K.shape))
@@ -815,25 +805,24 @@ def gwas_without_redis(species,k,y,geno,cov,reml,refit,inputfn,new_code):
if species == "human" :
print('kinship', k )
ps, ts = run_human(pheno_vector = y,
- covariate_matrix = cov,
- plink_input_file = inputfn,
- kinship_matrix = k,
- refit = refit, tempdata=tempdata)
+ covariate_matrix = cov,
+ plink_input_file = inputfn,
+ kinship_matrix = k,
+ refit = refit)
else:
print('geno', geno.shape, geno)
if new_code:
ps, ts = run_other_new(pheno_vector = y,
- genotype_matrix = geno,
- restricted_max_likelihood = reml,
- refit = refit,
- tempdata = tempdata)
+ genotype_matrix = geno,
+ restricted_max_likelihood = reml,
+ refit = refit)
else:
ps, ts = run_other_old(pheno_vector = y,
genotype_matrix = geno,
restricted_max_likelihood = reml,
- refit = refit,
- tempdata = tempdata)
+ refit = refit)
+ return ps,ts
def gwas_using_redis(key,species,new_code=True):
"""
@@ -853,7 +842,14 @@ def gwas_using_redis(key,species,new_code=True):
debug("Updating REDIS percent_complete=%d" % (round(i*100.0/total)))
progress_set_func(update_tempdata)
- ps,ts = gwas_without_redis(species,np.array(params['kinship_matrix']),np.array(params['pheno_vector']),np.array(params['genotype_matrix']),np.array(params['covariate_matrix']),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code)
+ def narray(key):
+ print(key)
+ v = params[key]
+ if v is not None:
+ v = np.array(v)
+ return v
+
+ ps,ts = gwas_without_redis(species,narray('kinship_matrix'),narray('pheno_vector'),narray('genotype_matrix'),narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code)
results_key = "pylmm:results:" + params['temp_uuid']
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index ab698e41..3801529e 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -200,7 +200,7 @@ elif cmd == 'kinship':
print "Genotype",G.shape, "\n", G
print "first Kinship method",K.shape,"\n",K
k1 = round(K[0][0],4)
- K2,G = calculate_kinship_new(np.copy(G).T,temp_data=None)
+ K2,G = calculate_kinship_new(np.copy(G).T)
print "Genotype",G.shape, "\n", G
print "GN2 Kinship method",K2.shape,"\n",K2
k2 = round(K2[0][0],4)
--
cgit v1.2.3
From 3c738e6901ecc2ec0b4c1c667f20ebe3dc186f5c Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Fri, 3 Apr 2015 13:17:56 +0200
Subject: Rename gwas_using_redis to gwas_with_redis
---
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 9e25f56d..ad6375e9 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -795,7 +795,7 @@ class LMM:
pl.title(title)
-def gwas_without_redis(species,k,y,geno,cov,reml,refit,inputfn,new_code):
+def run_gwas(species,k,y,geno,cov,reml,refit,inputfn,new_code):
"""
Invoke pylmm using a genotype (SNP) iterator
"""
@@ -824,10 +824,10 @@ def gwas_without_redis(species,k,y,geno,cov,reml,refit,inputfn,new_code):
refit = refit)
return ps,ts
-def gwas_using_redis(key,species,new_code=True):
+def gwas_with_redis(key,species,new_code=True):
"""
Invoke pylmm using Redis as a container. new_code runs the new
- version
+ version. All the Redis code goes here!
"""
json_params = Redis.get(key)
@@ -849,7 +849,7 @@ def gwas_using_redis(key,species,new_code=True):
v = np.array(v)
return v
- ps,ts = gwas_without_redis(species,narray('kinship_matrix'),narray('pheno_vector'),narray('genotype_matrix'),narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code)
+ ps,ts = run_gwas(species,narray('kinship_matrix'),narray('pheno_vector'),narray('genotype_matrix'),narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code)
results_key = "pylmm:results:" + params['temp_uuid']
@@ -888,7 +888,7 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True):
Redis.set(key, json_params)
Redis.expire(key, 60*60)
- return gwas_using_redis(key,species,new_code)
+ return gwas_with_redis(key,species,new_code)
def gn2_load_redis_iter(key,species,kinship,pheno,geno_iterator):
"""
@@ -922,7 +922,7 @@ def gn2_load_redis_iter(key,species,kinship,pheno,geno_iterator):
json_params = json.dumps(params)
Redis.set(key, json_params)
Redis.expire(key, 60*60)
- return gwas_using_redis(key,species)
+ return gwas_with_redis(key,species)
# This is the main function used by Genenetwork2 (with environment)
#
@@ -938,7 +938,7 @@ def gn2_main():
key = opts.key
species = opts.species
- gwas_using_redis(key,species)
+ gwas_with_redis(key,species)
if __name__ == '__main__':
--
cgit v1.2.3
From e9865707ef447b8bc23eb8c872703f156936499d Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Fri, 3 Apr 2015 14:03:32 +0200
Subject: - Calculate n,m from the start - added test function to runlmm.py to
run without Redis (25% faster)
---
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 17 ++++++++++-------
wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 17 +++++++++++++++--
2 files changed, 25 insertions(+), 9 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index ad6375e9..e51742c4 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -795,9 +795,9 @@ class LMM:
pl.title(title)
-def run_gwas(species,k,y,geno,cov,reml,refit,inputfn,new_code):
+def run_gwas(species,n,m,k,y,geno,cov=None,reml=True,refit=False,inputfn=None,new_code=True):
"""
- Invoke pylmm using a genotype (SNP) iterator
+ Invoke pylmm using genotype as a matrix or as a (SNP) iterator.
"""
info("gwas_without_redis")
print('pheno', y)
@@ -848,8 +848,11 @@ def gwas_with_redis(key,species,new_code=True):
if v is not None:
v = np.array(v)
return v
-
- ps,ts = run_gwas(species,narray('kinship_matrix'),narray('pheno_vector'),narray('genotype_matrix'),narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code)
+
+ y = narray('pheno_vector')
+ n = len(y)
+ m = params['num_genotypes']
+ ps,ts = run_gwas(species,n,m,narray('kinship_matrix'),y,narray('genotype_matrix'),narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code)
results_key = "pylmm:results:" + params['temp_uuid']
@@ -873,6 +876,7 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True):
k = kinship.tolist()
params = dict(pheno_vector = pheno.tolist(),
genotype_matrix = geno.tolist(),
+ num_genotypes = geno.shape[1],
kinship_matrix = k,
covariate_matrix = None,
input_file_name = None,
@@ -881,8 +885,7 @@ def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True):
temp_uuid = "testrun_temp_uuid",
# meta data
- timestamp = datetime.datetime.now().isoformat(),
- )
+ timestamp = datetime.datetime.now().isoformat())
json_params = json.dumps(params)
Redis.set(key, json_params)
@@ -907,7 +910,7 @@ def gn2_load_redis_iter(key,species,kinship,pheno,geno_iterator):
k = kinship.tolist()
params = dict(pheno_vector = pheno.tolist(),
genotype_matrix = "iterator",
- genotypes = i,
+ num_genotypes = i,
kinship_matrix = k,
covariate_matrix = None,
input_file_name = None,
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 3801529e..f095bb73 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -21,7 +21,7 @@ from optparse import OptionParser
import sys
import tsvreader
import numpy as np
-from lmm import gn2_load_redis, gn2_load_redis_iter, calculate_kinship_new
+from lmm import gn2_load_redis, gn2_load_redis_iter, calculate_kinship_new, run_gwas
from kinship import kinship, kinship_full
import genotype
import phenotype
@@ -103,7 +103,20 @@ if options.geno and cmd != 'iterator':
g = tsvreader.geno(options.geno)
print g.shape
-if cmd == 'iterator':
+if cmd == 'run':
+ if options.remove_missing_phenotypes:
+ raise Exception('Can not use --remove-missing-phenotypes with LMM2')
+ snp_iterator = tsvreader.geno_iter(options.geno)
+ n = len(y)
+ m = g.shape[1]
+ ps, ts = run_gwas('other',n,m,k,y,g.T)
+ print np.array(ps)
+ print len(ps),sum(ps)
+ # Test results
+ p1 = round(ps[0],4)
+ p2 = round(ps[-1],4)
+
+elif cmd == 'iterator':
if options.remove_missing_phenotypes:
raise Exception('Can not use --remove-missing-phenotypes with LMM2')
snp_iterator = tsvreader.geno_iter(options.geno)
--
cgit v1.2.3
From 163fe965bc1dcb807124c1c70c965d48bf2c2688 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sat, 4 Apr 2015 09:52:24 +0200
Subject: Consolidate tests now they all agree for redis, redis_new and run
---
wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 74 +++++++++++++-------------------
1 file changed, 30 insertions(+), 44 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index f095bb73..2d02e195 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -27,6 +27,8 @@ import genotype
import phenotype
from standalone import uses
+progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal')
+
usage = """
python runlmm.py [options] command
@@ -103,6 +105,29 @@ if options.geno and cmd != 'iterator':
g = tsvreader.geno(options.geno)
print g.shape
+def check_results(ps,ts):
+ print np.array(ps)
+ print len(ps),sum(ps)
+ # Test results
+ p1 = round(ps[0],4)
+ p2 = round(ps[-1],4)
+ # sys.stderr.write(options.geno+"\n")
+ if options.geno == 'data/small.geno':
+ info("Validating results for "+options.geno)
+ assert p1==0.0708, "p1=%f" % p1
+ assert p2==0.1417, "p2=%f" % p2
+ if options.geno == 'data/small_na.geno':
+ info("Validating results for "+options.geno)
+ assert p1==0.0897, "p1=%f" % p1
+ assert p2==0.0405, "p2=%f" % p2
+ if options.geno == 'data/test8000.geno':
+ info("Validating results for "+options.geno)
+ # assert p1==0.8984, "p1=%f" % p1
+ # assert p2==0.9621, "p2=%f" % p2
+ assert round(sum(ps)) == 4070
+ assert len(ps) == 8000
+ info("Run completed")
+
if cmd == 'run':
if options.remove_missing_phenotypes:
raise Exception('Can not use --remove-missing-phenotypes with LMM2')
@@ -110,22 +135,13 @@ if cmd == 'run':
n = len(y)
m = g.shape[1]
ps, ts = run_gwas('other',n,m,k,y,g.T)
- print np.array(ps)
- print len(ps),sum(ps)
- # Test results
- p1 = round(ps[0],4)
- p2 = round(ps[-1],4)
-
+ check_results(ps,ts)
elif cmd == 'iterator':
if options.remove_missing_phenotypes:
raise Exception('Can not use --remove-missing-phenotypes with LMM2')
snp_iterator = tsvreader.geno_iter(options.geno)
ps, ts = gn2_load_redis_iter('testrun_iter','other',k,y,snp_iterator)
- print np.array(ps)
- print len(ps),sum(ps)
- # Test results
- p1 = round(ps[0],4)
- p2 = round(ps[-1],4)
+ check_results(ps,ts)
elif cmd == 'redis_new':
# The main difference between redis_new and redis is that missing
# phenotypes are handled by the first
@@ -138,23 +154,7 @@ elif cmd == 'redis_new':
gt = G.T
G = None
ps, ts = gn2_load_redis('testrun','other',k,Y,gt,new_code=True)
- print np.array(ps)
- print len(ps),sum(ps)
- # Test results
- p1 = round(ps[0],4)
- p2 = round(ps[-1],4)
- sys.stderr.write(options.geno+"\n")
- if options.geno == 'data/small.geno':
- assert p1==0.0708, "p1=%f" % p1
- assert p2==0.1417, "p2=%f" % p2
- if options.geno == 'data/small_na.geno':
- assert p1==0.0897, "p1=%f" % p1
- assert p2==0.0405, "p2=%f" % p2
- if options.geno == 'data/test8000.geno':
- # assert p1==0.8984, "p1=%f" % p1
- # assert p2==0.9621, "p2=%f" % p2
- assert round(sum(ps)) == 4070
- assert len(ps) == 8000
+ check_results(ps,ts)
elif cmd == 'redis':
# Emulating the redis setup of GN2
G = g
@@ -177,21 +177,7 @@ elif cmd == 'redis':
gt = G.T
G = None
ps, ts = gn2_load_redis('testrun','other',k,Y,gt, new_code=False)
- print np.array(ps)
- print len(ps),sum(ps)
- # Test results 4070.02346579
- p1 = round(ps[0],4)
- p2 = round(ps[-1],4)
- sys.stderr.write(options.geno+"\n")
- if options.geno == 'data/small.geno':
- assert p1==0.0708, "p1=%f" % p1
- assert p2==0.1417, "p2=%f" % p2
- if options.geno == 'data/small_na.geno':
- assert p1==0.0897, "p1=%f" % p1
- assert p2==0.0405, "p2=%f" % p2
- if options.geno == 'data/test8000.geno':
- assert int(sum(ps)) == 4070
- assert len(ps) == 8000
+ check_results(ps,ts)
elif cmd == 'kinship':
G = g
print "Original G",G.shape, "\n", G
@@ -235,4 +221,4 @@ elif cmd == 'kinship':
assert k3==1.4352, "k3=%f" % k3
else:
- print "Doing nothing"
+ fatal("Doing nothing")
--
cgit v1.2.3
From 99fef2888f02551191cf6031c2c7222fce27e360 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sat, 4 Apr 2015 12:33:07 +0200
Subject: Run works without transposes
---
wqflask/wqflask/my_pylmm/pyLMM/gwas.py | 21 +++++++---
wqflask/wqflask/my_pylmm/pyLMM/kinship.py | 24 +++++++----
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 65 +++++++++++++++++++----------
wqflask/wqflask/my_pylmm/pyLMM/lmm2.py | 43 ++++++++++---------
wqflask/wqflask/my_pylmm/pyLMM/phenotype.py | 35 +++++++++++++---
wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 21 +++++-----
6 files changed, 136 insertions(+), 73 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
index 8b344a90..ae3769d4 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
@@ -19,7 +19,7 @@
import pdb
import time
-# from utility import temp_data
+import sys
import lmm2
import os
@@ -31,6 +31,18 @@ from lmm2 import LMM2
import multiprocessing as mp # Multiprocessing is part of the Python stdlib
import Queue
+# ---- A trick to decide on the environment:
+try:
+ from wqflask.my_pylmm.pyLMM import chunks
+ from gn2 import uses
+except ImportError:
+ sys.stderr.write("WARNING: LMM2 standalone version missing the Genenetwork2 environment\n")
+ has_gn2=False
+ from standalone import uses
+
+progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal')
+
+
def formatResult(id,beta,betaSD,ts,ps):
return "\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n"
@@ -52,12 +64,11 @@ def compute_snp(j,n,snp_ids,lmm2,REML,q = None):
def f_init(q):
compute_snp.q = q
-def gwas(Y,G,K,uses,restricted_max_likelihood=True,refit=False,verbose=True):
+def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
"""
GWAS. The G matrix should be n inds (cols) x m snps (rows)
"""
- progress,debug,info,mprint = uses('progress','debug','info','mprint')
-
+ info("In gwas.gwas")
matrix_initialize()
cpu_num = mp.cpu_count()
numThreads = None # for now use all available threads
@@ -70,7 +81,7 @@ def gwas(Y,G,K,uses,restricted_max_likelihood=True,refit=False,verbose=True):
m = G.shape[0] # snps
snps = m
info("%s SNPs",snps)
- assert snps>inds, "snps should be larger than inds (snps=%d,inds=%d)" % (snps,inds)
+ assert snps>=inds, "snps should be larger than inds (snps=%d,inds=%d)" % (snps,inds)
# CREATE LMM object for association
# if not kfile2: L = LMM(Y,K,Kva,Kve,X0,verbose=verbose)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
index be12417e..1c157fd8 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
@@ -28,12 +28,21 @@ import time
from optmatrix import matrix_initialize, matrixMultT
-def kinship_full(G,uses):
+# ---- A trick to decide on the environment:
+try:
+ from wqflask.my_pylmm.pyLMM import chunks
+ from gn2 import uses, progress_set_func
+except ImportError:
+ has_gn2=False
+ import standalone as handlers
+ from standalone import uses, progress_set_func
+
+progress,debug,info,mprint = uses('progress','debug','info','mprint')
+
+def kinship_full(G):
"""
Calculate the Kinship matrix using a full dot multiplication
"""
- info,mprint = uses('info','mprint')
-
# mprint("kinship_full G",G)
m = G.shape[0] # snps
n = G.shape[1] # inds
@@ -78,8 +87,7 @@ def f_init(q):
# Calculate the kinship matrix from G (SNPs as rows!), returns K
#
-def kinship(G,uses,computeSize=1000,numThreads=None,useBLAS=False):
- progress,debug,info,mprint = uses('progress','debug','info','mprint')
+def kinship(G,computeSize=1000,numThreads=None,useBLAS=False):
matrix_initialize(useBLAS)
@@ -89,7 +97,7 @@ def kinship(G,uses,computeSize=1000,numThreads=None,useBLAS=False):
m = G.shape[0] # snps
snps = m
info("%i SNPs" % (m))
- assert snps>inds, "snps should be larger than inds (%i snps, %i inds)" % (snps,inds)
+ assert snps>=inds, "snps should be larger than inds (%i snps, %i inds)" % (snps,inds)
q = mp.Queue()
p = mp.Pool(numThreads, f_init, [q])
@@ -140,13 +148,11 @@ def kinship(G,uses,computeSize=1000,numThreads=None,useBLAS=False):
K = K / float(snps)
return K
-def kvakve(K,uses):
+def kvakve(K):
"""
Obtain eigendecomposition for K and return Kva,Kve where Kva is cleaned
of small values < 1e-6 (notably smaller than zero)
"""
- info,mprint = uses('info','mprint')
-
info("Obtaining eigendecomposition for %dx%d matrix" % (K.shape[0],K.shape[1]) )
Kva,Kve = linalg.eigh(K)
mprint("Kva",Kva)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index e51742c4..82bd7f0b 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -64,7 +64,6 @@ except ImportError:
import standalone as handlers
from standalone import uses, progress_set_func
sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n")
- pass
progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal')
@@ -296,8 +295,8 @@ def run_other_old(pheno_vector,
Bench().report()
return p_values, t_stats
-def run_other_new(pheno_vector,
- genotype_matrix,
+def run_other_new(n,m,pheno_vector,
+ geno,
restricted_max_likelihood=True,
refit=False):
@@ -312,8 +311,7 @@ def run_other_new(pheno_vector,
print("REML=",restricted_max_likelihood," REFIT=",refit)
# Adjust phenotypes
- Y,G,keep = phenotype.remove_missing(pheno_vector,genotype_matrix,verbose=True)
- print("Removed missing phenotypes",Y.shape)
+ n,Y,keep = phenotype.remove_missing_new(n,pheno_vector)
# if options.maf_normalization:
# G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g )
@@ -321,8 +319,9 @@ def run_other_new(pheno_vector,
# if not options.skip_genotype_normalization:
# G = np.apply_along_axis( genotype.normalize, axis=1, arr=G)
+ geno = geno[:,keep]
with Bench("Calculate Kinship"):
- K,G = calculate_kinship_new(G)
+ K,G = calculate_kinship_new(geno)
print("kinship_matrix: ", pf(K))
print("kinship_matrix.shape: ", pf(K.shape))
@@ -337,9 +336,8 @@ def run_other_new(pheno_vector,
with Bench("Doing GWAS"):
t_stats, p_values = gwas.gwas(Y,
- G.T,
+ G,
K,
- uses,
restricted_max_likelihood=True,
refit=False,verbose=True)
Bench().report()
@@ -378,18 +376,30 @@ def matrixMult(A,B):
return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB)
+def calculate_kinship_new(genotype_matrix):
+ """
+ Call the new kinship calculation where genotype_matrix contains
+ inds (columns) by snps (rows).
+ """
+ assert type(genotype_matrix) is np.ndarray
+ info("call genotype.normalize")
+ G = np.apply_along_axis( genotype.normalize, axis=1, arr=genotype_matrix)
+ mprint("G",genotype_matrix)
+ info("call calculate_kinship_new")
+ return kinship(G),G # G gets transposed, we'll turn this into an iterator (FIXME)
-def calculate_kinship_new(genotype_matrix, temp_data=None):
+def calculate_kinship_iter(geno):
"""
Call the new kinship calculation where genotype_matrix contains
inds (columns) by snps (rows).
"""
+ assert type(genotype_matrix) is iter
info("call genotype.normalize")
G = np.apply_along_axis( genotype.normalize, axis=0, arr=genotype_matrix)
info("call calculate_kinship_new")
- return kinship(G.T,uses),G # G gets transposed, we'll turn this into an iterator (FIXME)
+ return kinship(G)
-def calculate_kinship_old(genotype_matrix, temp_data=None):
+def calculate_kinship_old(genotype_matrix):
"""
genotype_matrix is an n x m matrix encoding SNP minor alleles.
@@ -430,7 +440,7 @@ def calculate_kinship_old(genotype_matrix, temp_data=None):
mprint("G (after old normalize)",genotype_matrix.T)
kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m)
return kinship_matrix,genotype_matrix
- # return kinship_full(genotype_matrix.T,uses),genotype_matrix
+ # return kinship_full(genotype_matrix.T),genotype_matrix
def GWAS(pheno_vector,
genotype_matrix,
@@ -586,7 +596,7 @@ class LMM:
# if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
begin = time.time()
# Kva,Kve = linalg.eigh(K)
- Kva,Kve = kvakve(K,uses)
+ Kva,Kve = kvakve(K)
end = time.time()
if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin))
print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve))
@@ -794,12 +804,11 @@ class LMM:
pl.ylabel("Probability of data")
pl.title(title)
-
def run_gwas(species,n,m,k,y,geno,cov=None,reml=True,refit=False,inputfn=None,new_code=True):
"""
Invoke pylmm using genotype as a matrix or as a (SNP) iterator.
"""
- info("gwas_without_redis")
+ info("run_gwas")
print('pheno', y)
if species == "human" :
@@ -813,8 +822,8 @@ def run_gwas(species,n,m,k,y,geno,cov=None,reml=True,refit=False,inputfn=None,ne
print('geno', geno.shape, geno)
if new_code:
- ps, ts = run_other_new(pheno_vector = y,
- genotype_matrix = geno,
+ ps, ts = run_other_new(n,m,pheno_vector = y,
+ geno = geno,
restricted_max_likelihood = reml,
refit = refit)
else:
@@ -849,10 +858,20 @@ def gwas_with_redis(key,species,new_code=True):
v = np.array(v)
return v
+ def narrayT(key):
+ m = narray(key)
+ if m is not None:
+ return m.T
+ return m
+
+ # We are transposing before we enter run_gwas - this should happen on the webserver
+ # side (or when reading data from file)
+ k = narray('kinship_matrix')
+ g = narrayT('genotype_matrix')
y = narray('pheno_vector')
n = len(y)
m = params['num_genotypes']
- ps,ts = run_gwas(species,n,m,narray('kinship_matrix'),y,narray('genotype_matrix'),narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code)
+ ps,ts = run_gwas(species,n,m,k,y,g,narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code)
results_key = "pylmm:results:" + params['temp_uuid']
@@ -864,19 +883,19 @@ def gwas_with_redis(key,species,new_code=True):
Redis.expire(results_key, 60*60)
return ps, ts
-
def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True):
"""
- This function emulates current GN2 behaviour by pre-loading Redis
+ This function emulates current GN2 behaviour by pre-loading Redis (note the input
+ genotype is transposed to emulate GN2 (FIXME!)
"""
- print("Loading Redis from parsed data")
+ info("Loading Redis from parsed data")
if kinship == None:
k = None
else:
k = kinship.tolist()
params = dict(pheno_vector = pheno.tolist(),
- genotype_matrix = geno.tolist(),
- num_genotypes = geno.shape[1],
+ genotype_matrix = geno.T.tolist(),
+ num_genotypes = geno.shape[0],
kinship_matrix = k,
covariate_matrix = None,
input_file_name = None,
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
index d67e1205..358bf27e 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
@@ -32,7 +32,6 @@ except ImportError:
sys.stderr.write("WARNING: LMM2 standalone version missing the Genenetwork2 environment\n")
has_gn2=False
from standalone import uses
- pass
def calculateKinship(W,center=False):
"""
@@ -149,28 +148,32 @@ def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False):
class LMM2:
- """
- This is a simple version of EMMA/fastLMM.
- The main purpose of this module is to take a phenotype vector (Y), a set of covariates (X) and a kinship matrix (K)
- and to optimize this model by finding the maximum-likelihood estimates for the model parameters.
- There are three model parameters: heritability (h), covariate coefficients (beta) and the total
- phenotypic variance (sigma).
- Heritability as defined here is the proportion of the total variance (sigma) that is attributed to
- the kinship matrix.
-
- For simplicity, we assume that everything being input is a numpy array.
- If this is not the case, the module may throw an error as conversion from list to numpy array
- is not done consistently.
+ """This is a simple version of EMMA/fastLMM.
+
+ The main purpose of this module is to take a phenotype vector (Y),
+ a set of covariates (X) and a kinship matrix (K) and to optimize
+ this model by finding the maximum-likelihood estimates for the
+ model parameters. There are three model parameters: heritability
+ (h), covariate coefficients (beta) and the total phenotypic
+ variance (sigma). Heritability as defined here is the proportion
+ of the total variance (sigma) that is attributed to the kinship
+ matrix.
+
+ For simplicity, we assume that everything being input is a numpy
+ array. If this is not the case, the module may throw an error as
+ conversion from list to numpy array is not done consistently.
"""
def __init__(self,Y,K,Kva=[],Kve=[],X0=None,verbose=False):
- """
- The constructor takes a phenotype vector or array Y of size n.
- It takes a kinship matrix K of size n x n. Kva and Kve can be computed as Kva,Kve = linalg.eigh(K) and cached.
- If they are not provided, the constructor will calculate them.
- X0 is an optional covariate matrix of size n x q, where there are q covariates.
- When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect.
+ """The constructor takes a phenotype vector or array Y of size n. It
+ takes a kinship matrix K of size n x n. Kva and Kve can be
+ computed as Kva,Kve = linalg.eigh(K) and cached. If they are
+ not provided, the constructor will calculate them. X0 is an
+ optional covariate matrix of size n x q, where there are q
+ covariates. When this parameter is not provided, the
+ constructor will set X0 to an n x 1 matrix of all ones to
+ represent a mean effect.
"""
if X0 is None:
@@ -194,7 +197,7 @@ class LMM2:
# if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
begin = time.time()
# Kva,Kve = linalg.eigh(K)
- Kva,Kve = kinship.kvakve(K,uses)
+ Kva,Kve = kinship.kvakve(K)
end = time.time()
if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin))
print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve))
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
index 4c8175f7..7b652515 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
@@ -19,22 +19,47 @@
import sys
import numpy as np
-def remove_missing(y,g,verbose=False):
+# ---- A trick to decide on the environment:
+try:
+ from wqflask.my_pylmm.pyLMM import chunks
+ from gn2 import uses, progress_set_func
+except ImportError:
+ has_gn2=False
+ import standalone as handlers
+ from standalone import uses, progress_set_func
+
+progress,debug,info,mprint = uses('progress','debug','info','mprint')
+
+def remove_missing(n,y,g):
"""
Remove missing data from matrices, make sure the genotype data has
individuals as rows
"""
assert(y is not None)
- assert(y.shape[0] == g.shape[0])
+ assert y.shape[0] == g.shape[0],"y (n) %d, g (n,m) %s" % (y.shape[0],g.shape)
y1 = y
g1 = g
v = np.isnan(y)
keep = True - v
if v.sum():
- if verbose:
- sys.stderr.write("runlmm.py: Cleaning the phenotype vector and genotype matrix by removing %d individuals...\n" % (v.sum()))
+ info("runlmm.py: Cleaning the phenotype vector and genotype matrix by removing %d individuals...\n" % (v.sum()))
y1 = y[keep]
g1 = g[keep,:]
- return y1,g1,keep
+ n = y1.shape[0]
+ return n,y1,g1,keep
+
+def remove_missing_new(n,y):
+ """
+ Remove missing data. Returns new n,y,keep
+ """
+ assert(y is not None)
+ y1 = y
+ v = np.isnan(y)
+ keep = True - v
+ if v.sum():
+ info("runlmm.py: Cleaning the phenotype vector by removing %d individuals" % (v.sum()))
+ y1 = y[keep]
+ n = y1.shape[0]
+ return n,y1,keep
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 2d02e195..d248dee2 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -131,16 +131,15 @@ def check_results(ps,ts):
if cmd == 'run':
if options.remove_missing_phenotypes:
raise Exception('Can not use --remove-missing-phenotypes with LMM2')
- snp_iterator = tsvreader.geno_iter(options.geno)
n = len(y)
m = g.shape[1]
- ps, ts = run_gwas('other',n,m,k,y,g.T)
+ ps, ts = run_gwas('other',n,m,k,y,g) # <--- pass in geno by SNP
check_results(ps,ts)
elif cmd == 'iterator':
if options.remove_missing_phenotypes:
raise Exception('Can not use --remove-missing-phenotypes with LMM2')
- snp_iterator = tsvreader.geno_iter(options.geno)
- ps, ts = gn2_load_redis_iter('testrun_iter','other',k,y,snp_iterator)
+ geno_iterator = tsvreader.geno_iter(options.geno)
+ ps, ts = gn2_load_redis_iter('testrun_iter','other',k,y,geno_iterator)
check_results(ps,ts)
elif cmd == 'redis_new':
# The main difference between redis_new and redis is that missing
@@ -150,10 +149,9 @@ elif cmd == 'redis_new':
Y = y
G = g
print "Original G",G.shape, "\n", G
-
- gt = G.T
- G = None
- ps, ts = gn2_load_redis('testrun','other',k,Y,gt,new_code=True)
+ # gt = G.T
+ # G = None
+ ps, ts = gn2_load_redis('testrun','other',k,Y,G,new_code=True)
check_results(ps,ts)
elif cmd == 'redis':
# Emulating the redis setup of GN2
@@ -174,9 +172,10 @@ elif cmd == 'redis':
g = None
gnt = None
- gt = G.T
- G = None
- ps, ts = gn2_load_redis('testrun','other',k,Y,gt, new_code=False)
+ # gt = G.T
+ # G = None
+ mprint("G",G)
+ ps, ts = gn2_load_redis('testrun','other',k,Y,G, new_code=False)
check_results(ps,ts)
elif cmd == 'kinship':
G = g
--
cgit v1.2.3
From 49f5eb3e825c953bc7f6da87460ccfe9b891d493 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sat, 4 Apr 2015 13:01:44 +0200
Subject: Fixing transpose issues
---
wqflask/wqflask/my_pylmm/pyLMM/gwas.py | 1 -
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 2 +-
wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 26 ++++++++++++--------------
3 files changed, 13 insertions(+), 16 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
index ae3769d4..247a8729 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
@@ -36,7 +36,6 @@ try:
from wqflask.my_pylmm.pyLMM import chunks
from gn2 import uses
except ImportError:
- sys.stderr.write("WARNING: LMM2 standalone version missing the Genenetwork2 environment\n")
has_gn2=False
from standalone import uses
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 82bd7f0b..6f03eaf7 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -288,7 +288,7 @@ def run_other_old(pheno_vector,
with Bench("Doing GWAS"):
t_stats, p_values = GWAS(pheno_vector,
- genotype_matrix,
+ genotype_matrix.T,
kinship_matrix,
restricted_max_likelihood=True,
refit=False)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index d248dee2..44d5c0f4 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -108,26 +108,25 @@ if options.geno and cmd != 'iterator':
def check_results(ps,ts):
print np.array(ps)
print len(ps),sum(ps)
- # Test results
p1 = round(ps[0],4)
p2 = round(ps[-1],4)
- # sys.stderr.write(options.geno+"\n")
if options.geno == 'data/small.geno':
info("Validating results for "+options.geno)
- assert p1==0.0708, "p1=%f" % p1
- assert p2==0.1417, "p2=%f" % p2
+ assert p1==0.7387, "p1=%f" % p1
+ assert p2==0.7387, "p2=%f" % p2
if options.geno == 'data/small_na.geno':
info("Validating results for "+options.geno)
- assert p1==0.0897, "p1=%f" % p1
- assert p2==0.0405, "p2=%f" % p2
+ assert p1==0.062, "p1=%f" % p1
+ assert p2==0.062, "p2=%f" % p2
if options.geno == 'data/test8000.geno':
info("Validating results for "+options.geno)
- # assert p1==0.8984, "p1=%f" % p1
- # assert p2==0.9621, "p2=%f" % p2
assert round(sum(ps)) == 4070
assert len(ps) == 8000
info("Run completed")
-
+
+if y is not None:
+ n = y.shape[0]
+
if cmd == 'run':
if options.remove_missing_phenotypes:
raise Exception('Can not use --remove-missing-phenotypes with LMM2')
@@ -159,7 +158,7 @@ elif cmd == 'redis':
print "Original G",G.shape, "\n", G
if y is not None and options.remove_missing_phenotypes:
gnt = np.array(g).T
- Y,g,keep = phenotype.remove_missing(y,g.T,options.verbose)
+ n,Y,g,keep = phenotype.remove_missing(n,y,gnt)
G = g.T
print "Removed missing phenotypes",G.shape, "\n", G
else:
@@ -174,7 +173,6 @@ elif cmd == 'redis':
# gt = G.T
# G = None
- mprint("G",G)
ps, ts = gn2_load_redis('testrun','other',k,Y,G, new_code=False)
check_results(ps,ts)
elif cmd == 'kinship':
@@ -182,7 +180,7 @@ elif cmd == 'kinship':
print "Original G",G.shape, "\n", G
if y != None and options.remove_missing_phenotypes:
gnt = np.array(g).T
- Y,g = phenotype.remove_missing(y,g.T,options.verbose)
+ n,Y,g,keep = phenotype.remove_missing(n,y,g.T)
G = g.T
print "Removed missing phenotypes",G.shape, "\n", G
if options.maf_normalization:
@@ -194,7 +192,7 @@ elif cmd == 'kinship':
gnt = None
if options.test_kinship:
- K = kinship_full(np.copy(G),uses)
+ K = kinship_full(np.copy(G))
print "Genotype",G.shape, "\n", G
print "first Kinship method",K.shape,"\n",K
k1 = round(K[0][0],4)
@@ -204,7 +202,7 @@ elif cmd == 'kinship':
k2 = round(K2[0][0],4)
print "Genotype",G.shape, "\n", G
- K3 = kinship(G.T,uses)
+ K3 = kinship(G.T)
print "third Kinship method",K3.shape,"\n",K3
sys.stderr.write(options.geno+"\n")
k3 = round(K3[0][0],4)
--
cgit v1.2.3
From 17f453e50ebac657d9f3096811d92bedc9bfc064 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sat, 4 Apr 2015 13:15:48 +0200
Subject: Regression tests
---
wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 44d5c0f4..52c3c80a 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -196,20 +196,20 @@ elif cmd == 'kinship':
print "Genotype",G.shape, "\n", G
print "first Kinship method",K.shape,"\n",K
k1 = round(K[0][0],4)
- K2,G = calculate_kinship_new(np.copy(G).T)
+ K2,G = calculate_kinship_new(np.copy(G))
print "Genotype",G.shape, "\n", G
print "GN2 Kinship method",K2.shape,"\n",K2
k2 = round(K2[0][0],4)
print "Genotype",G.shape, "\n", G
- K3 = kinship(G.T)
+ K3 = kinship(G)
print "third Kinship method",K3.shape,"\n",K3
sys.stderr.write(options.geno+"\n")
k3 = round(K3[0][0],4)
if options.geno == 'data/small.geno':
- assert k1==0.8, "k1=%f" % k1
- assert k2==0.7939, "k2=%f" % k2
- assert k3==0.7939, "k3=%f" % k3
+ assert k1==0.8333, "k1=%f" % k1
+ assert k2==0.9375, "k2=%f" % k2
+ assert k3==0.9375, "k3=%f" % k3
if options.geno == 'data/small_na.geno':
assert k1==0.8333, "k1=%f" % k1
assert k2==0.7172, "k2=%f" % k2
--
cgit v1.2.3
From 102523493e2f8a7660c63f117f1d8dfd009eff02 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 13 Apr 2015 08:14:43 +0000
Subject: Improved assertion message
---
wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
index b4027fa3..b24ffe8f 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
@@ -56,7 +56,8 @@ def geno(fn):
print fn
with open(fn,'r') as tsvin:
- assert(tsvin.readline().strip() == "# Genotype format version 1.0")
+ line = tsvin.readline().strip()
+ assert line == "# Genotype format version 1.0", line
tsvin.readline()
tsvin.readline()
tsvin.readline()
--
cgit v1.2.3
From 6cef9c3b27d92383f89a432f6fa0e9fd16107f66 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 13 Apr 2015 10:15:59 +0200
Subject: Added examples for convertlmm.py
---
wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
index 3b6b5d70..4312fed0 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
@@ -1,5 +1,5 @@
-# This is a converter for common LMM formats, so as to keep complexity
-# outside the main routines.
+# This is a converter for common LMM formats, so as to keep file
+# reader complexity outside the main routines.
# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl)
#
@@ -31,6 +31,12 @@ python convertlmm.py [--plink] [--prefix out_basename] [--kinship kfile] [--phen
Convert files for runlmm.py processing. Writes to stdout by default.
try --help for more information
+
+Examples:
+
+ python ./my_pylmm/pyLMM/convertlmm.py --plink --pheno data/test_snps.132k.clean.noX.fake.phenos > test.pheno
+
+ python ./my_pylmm/pyLMM/convertlmm.py --plink --pheno data/test_snps.132k.clean.noX.fake.phenos --geno data/test_snps.132k.clean.noX > test.geno
"""
# if len(args) == 0:
--
cgit v1.2.3
From 85ccb971687fda00538b248722454ea2aa514e27 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 13 Apr 2015 10:16:11 +0200
Subject: Started on INSTALL information
---
INSTALL.md | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
create mode 100644 INSTALL.md
diff --git a/INSTALL.md b/INSTALL.md
new file mode 100644
index 00000000..26eacc3a
--- /dev/null
+++ b/INSTALL.md
@@ -0,0 +1,26 @@
+# INSTALL Genenetwork2 (GN2)
+
+## Fetch GN2 from github
+
+Clone the repository (currently ~800Mb) to local
+
+ git clone git@github.com:genenetwork2/genenetwork2.git
+
+## Dependencies
+
+GN2 requires
+
+* redis
+* mysql
+
+## Required python modules
+
+Install the following python modules:
+
+* Flask
+* pyyaml
+* redis
+* qtlreaper
+* numarray
+* pp
+* Flask-SQLAlchemy
--
cgit v1.2.3
From 25bb886b733362edea657c72e7d29172b7e22755 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Fri, 17 Apr 2015 14:33:26 +0200
Subject: INSTALL
---
INSTALL.md | 38 ++++++++++++++++++++++++++++++++++++++
1 file changed, 38 insertions(+)
diff --git a/INSTALL.md b/INSTALL.md
index 26eacc3a..84b3d37c 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -24,3 +24,41 @@ Install the following python modules:
* numarray
* pp
* Flask-SQLAlchemy
+
+## Set up local file settings.py
+
+```python
+LOGFILE = """/tmp/flask_gn_log"""
+
+#This is needed because Flask turns key errors into a
+#400 bad request response with no exception/log
+TRAP_BAD_REQUEST_ERRORS = True
+
+DB_URI = """mysql://gn2:password@localhost/db_webqtl"""
+SQLALCHEMY_DATABASE_URI = 'mysql://gn2:password@localhost/db_webqtl'
+
+# http://pythonhosted.org/Flask-Security/configuration.html
+SECURITY_CONFIRMABLE = True
+SECURITY_TRACKABLE = True
+SECURITY_REGISTERABLE = True
+SECURITY_RECOVERABLE = True
+
+SECURITY_EMAIL_SENDER = "no-reply@genenetwork.org"
+SECURITY_POST_LOGIN_VIEW = "/thank_you"
+SQLALCHEMY_POOL_RECYCLE = 3600
+
+SERVER_PORT = 5051
+
+SECRET_HMAC_CODE = '*'
+```
+
+```sh
+ export WQFLASK_SETTINGS=$HOME/settings.py
+ source /home/pjotr/ve27/bin/activate
+ cd genenetwork2/wqflask
+ python ./runserver.py
+
+ or
+
+ python ./secure_server.py
+```
--
cgit v1.2.3
From bb8e466e00c622f7b28209378c1871a1d8469572 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Fri, 17 Apr 2015 14:41:40 +0200
Subject: spacing
---
INSTALL.md | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/INSTALL.md b/INSTALL.md
index 84b3d37c..38d15090 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -53,12 +53,12 @@ SECRET_HMAC_CODE = '*'
```
```sh
- export WQFLASK_SETTINGS=$HOME/settings.py
- source /home/pjotr/ve27/bin/activate
- cd genenetwork2/wqflask
- python ./runserver.py
+export WQFLASK_SETTINGS=$HOME/settings.py
+source /home/pjotr/ve27/bin/activate
+cd genenetwork2/wqflask
+python ./runserver.py
- or
+or
- python ./secure_server.py
+python ./secure_server.py
```
--
cgit v1.2.3
From 0929b16a5183538811260aef5c37f7406c302026 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sat, 18 Apr 2015 08:54:07 +0200
Subject: Use reduced outputter for GN2 logs too
---
wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 24 ++++++++++++++++++------
1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
index 7bceb089..40b2021d 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
@@ -1,7 +1,10 @@
-# Genenetwork2 specific methods and callback handler
+# Standalone specific methods and callback handler
#
# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl)
#
+# Set the log level with
+#
+# logging.basicConfig(level=logging.DEBUG)
from __future__ import absolute_import, print_function, division
@@ -9,8 +12,10 @@ import numpy as np
import sys
import logging
-# logging.basicConfig(level=logging.DEBUG)
-# np.set_printoptions()
+# logger = logging.getLogger(__name__)
+logger = logging.getLogger('lmm2')
+logging.basicConfig(level=logging.DEBUG)
+np.set_printoptions(precision=3,suppress=True)
progress_location = None
progress_current = None
@@ -37,13 +42,20 @@ def progress(location, count, total):
logger.info("Progress: %s %d%%" % (location,perc))
progress_location = location
progress_prev_perc = perc
-
+
def mprint(msg,data):
"""
Array/matrix print function
"""
m = np.array(data)
- print(msg,m.shape,"=\n",m)
+ if m.ndim == 1:
+ print(msg,m.shape,"=\n",m[0:3]," ... ",m[-3:])
+ if m.ndim == 2:
+ print(msg,m.shape,"=\n[",
+ m[0][0:3]," ... ",m[0][-3:],"\n ",
+ m[1][0:3]," ... ",m[1][-3:],"\n ...\n ",
+ m[-2][0:3]," ... ",m[-2][-3:],"\n ",
+ m[-1][0:3]," ... ",m[-1][-3:],"]")
def fatal(msg):
logger.critical(msg)
@@ -68,7 +80,7 @@ def uses(*funcs):
Some sugar
"""
return [callbacks()[func] for func in funcs]
-
+
# ----- Minor test cases:
if __name__ == '__main__':
--
cgit v1.2.3
From a1d8f68d5428a4ceec9a2d9a771b000ecabec5e6 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sat, 18 Apr 2015 09:36:00 +0000
Subject: pylmm: fix integration problems
---
wqflask/runserver.py | 6 ++---
.../wqflask/marker_regression/marker_regression.py | 14 +++++-----
wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 30 +++++++++++++++-------
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 19 ++++++++------
wqflask/wqflask/my_pylmm/pyLMM/lmm2.py | 18 ++++++++++---
5 files changed, 56 insertions(+), 31 deletions(-)
diff --git a/wqflask/runserver.py b/wqflask/runserver.py
index 9d5686a9..fadae6bf 100755
--- a/wqflask/runserver.py
+++ b/wqflask/runserver.py
@@ -20,9 +20,9 @@ from wqflask import app
import logging
#from themodule import TheHandlerYouWant
-file_handler = logging.FileHandler("/tmp/flask_gn_log_danny_unsecure")
-file_handler.setLevel(logging.DEBUG)
-app.logger.addHandler(file_handler)
+# file_handler = logging.FileHandler("/tmp/flask_gn_log_danny_unsecure")
+# file_handler.setLevel(logging.DEBUG)
+# app.logger.addHandler(file_handler)
import logging_tree
logging_tree.printout()
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 7708356b..ae3e062f 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -40,6 +40,7 @@ from utility import temp_data
from utility.benchmark import Bench
+PYLMM_COMMAND= 'python /home/pjotr/izip/git/opensource/python/gn2/wqflask/wqflask/my_pylmm/pyLMM/lmm.py'
class MarkerRegression(object):
@@ -272,7 +273,7 @@ class MarkerRegression(object):
""")
def run_rqtl_geno(self):
- print("Calling R/qtl from python")
+ print("Calling R/qtl")
self.geno_to_rqtl_function()
@@ -655,8 +656,7 @@ class MarkerRegression(object):
Redis.set(key, json_params)
Redis.expire(key, 60*60)
- command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key,
- "other")
+ command = PYLMM_COMMAND+' --key {} --species {}'.format(key,"other")
os.system(command)
@@ -713,8 +713,8 @@ class MarkerRegression(object):
# "refit": False,
# "temp_data": tempdata}
- print("genotype_matrix:", str(genotype_matrix.tolist()))
- print("pheno_vector:", str(pheno_vector.tolist()))
+ # print("genotype_matrix:", str(genotype_matrix.tolist()))
+ # print("pheno_vector:", str(pheno_vector.tolist()))
params = dict(pheno_vector = pheno_vector.tolist(),
genotype_matrix = genotype_matrix.tolist(),
@@ -732,7 +732,7 @@ class MarkerRegression(object):
Redis.expire(key, 60*60)
print("before printing command")
- command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key,
+ command = PYLMM_COMMAND + ' --key {} --species {}'.format(key,
"other")
print("command is:", command)
print("after printing command")
@@ -806,7 +806,7 @@ class MarkerRegression(object):
print("Before creating the command")
- command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key,
+ command = PYLMM_COMMAND+' --key {} --species {}'.format(key,
"human")
print("command is:", command)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
index 7bceb089..b128bfab 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
@@ -1,7 +1,10 @@
-# Genenetwork2 specific methods and callback handler
+# Standalone specific methods and callback handler
#
# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl)
#
+# Set the log level with
+#
+# logging.basicConfig(level=logging.DEBUG)
from __future__ import absolute_import, print_function, division
@@ -9,10 +12,12 @@ import numpy as np
import sys
import logging
-# logging.basicConfig(level=logging.DEBUG)
-# np.set_printoptions()
+# logger = logging.getLogger(__name__)
+logger = logging.getLogger('lmm2')
+logging.basicConfig(level=logging.DEBUG)
+np.set_printoptions(precision=3,suppress=True)
-progress_location = None
+progress_location = None
progress_current = None
progress_prev_perc = None
@@ -20,30 +25,37 @@ def progress_default_func(location,count,total):
global progress_current
value = round(count*100.0/total)
progress_current = value
-
+
progress_func = progress_default_func
def progress_set_func(func):
global progress_func
progress_func = func
-
+
def progress(location, count, total):
global progress_location
global progress_prev_perc
-
+
perc = round(count*100.0/total)
if perc != progress_prev_perc and (location != progress_location or perc > 98 or perc > progress_prev_perc + 5):
progress_func(location, count, total)
logger.info("Progress: %s %d%%" % (location,perc))
progress_location = location
progress_prev_perc = perc
-
+
def mprint(msg,data):
"""
Array/matrix print function
"""
m = np.array(data)
- print(msg,m.shape,"=\n",m)
+ if m.ndim == 1:
+ print(msg,m.shape,"=\n",m[0:3]," ... ",m[-3:])
+ if m.ndim == 2:
+ print(msg,m.shape,"=\n[",
+ m[0][0:3]," ... ",m[0][-3:],"\n ",
+ m[1][0:3]," ... ",m[1][-3:],"\n ...\n ",
+ m[-2][0:3]," ... ",m[-2][-3:],"\n ",
+ m[-1][0:3]," ... ",m[-1][-3:],"]")
def fatal(msg):
logger.critical(msg)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index b2067b27..6fff5f1d 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -42,25 +42,27 @@ from redis import Redis
Redis = Redis()
import sys
-sys.path.append("/home/zas1024/gene/wqflask/")
-
-has_gn2=True
from utility.benchmark import Bench
from utility import temp_data
-sys.path.append("/home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/")
-
from kinship import kinship, kinship_full, kvakve
import genotype
import phenotype
import gwas
+has_gn2=True
+sys.stderr.write("INFO: pylmm system path is "+":".join(sys.path)+"\n")
+sys.stderr.write("INFO: pylmm file is "+__file__+"\n")
+
# ---- A trick to decide on the environment:
try:
- from wqflask.my_pylmm.pyLMM import chunks
+ sys.stderr.write("INFO: trying loading module\n")
+ import utility.formatting # this is never used, just to check the environment
+ sys.stderr.write("INFO: This is a genenetwork2 environment\n")
from gn2 import uses, progress_set_func
except ImportError:
+ # Failed to load gn2
has_gn2=False
import standalone as handlers
from standalone import uses, progress_set_func
@@ -856,7 +858,8 @@ def gwas_with_redis(key,species,new_code=True):
print(key)
v = params[key]
if v is not None:
- v = np.array(v)
+ v = np.array(v).astype(np.float)
+ print(v)
return v
def narrayT(key):
@@ -969,6 +972,6 @@ if __name__ == '__main__':
if has_gn2:
gn2_main()
else:
- print("Run from runlmm.py instead")
+ fatal("Run from runlmm.py instead")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
index 358bf27e..c65843ec 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
@@ -24,14 +24,24 @@ from scipy import optimize
from optmatrix import matrixMult
import kinship
+sys.stderr.write("INFO: pylmm (lmm2) system path is "+":".join(sys.path)+"\n")
+sys.stderr.write("INFO: pylmm (lmm2) file is "+__file__+"\n")
+
# ---- A trick to decide on the environment:
try:
- from wqflask.my_pylmm.pyLMM import chunks
- from gn2 import uses
+ sys.stderr.write("INFO: trying loading module\n")
+ import utility.formatting # this is never used, just to check the environment
+ sys.stderr.write("INFO: This is a genenetwork2 environment (lmm2)\n")
+ from gn2 import uses, progress_set_func
except ImportError:
- sys.stderr.write("WARNING: LMM2 standalone version missing the Genenetwork2 environment\n")
+ # Failed to load gn2
has_gn2=False
- from standalone import uses
+ import standalone as handlers
+ from standalone import uses, progress_set_func
+ sys.stderr.write("WARNING: LMM2 standalone version missing the Genenetwork2 environment\n")
+
+progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal')
+
def calculateKinship(W,center=False):
"""
--
cgit v1.2.3
From 02660b9406a97943d4c33946250fc3f08b80c556 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sat, 18 Apr 2015 09:40:57 +0000
Subject: pylmm: fix integration problems
---
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 6fff5f1d..618f8332 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -854,22 +854,23 @@ def gwas_with_redis(key,species,new_code=True):
debug("Updating REDIS percent_complete=%d" % (round(i*100.0/total)))
progress_set_func(update_tempdata)
- def narray(key):
- print(key)
- v = params[key]
+ def narray(t):
+ info("Type is "+t)
+ v = params[t]
if v is not None:
v = np.array(v).astype(np.float)
print(v)
return v
- def narrayT(key):
- m = narray(key)
+ def narrayT(t):
+ m = narray(t)
if m is not None:
return m.T
return m
# We are transposing before we enter run_gwas - this should happen on the webserver
# side (or when reading data from file)
+ print(params)
k = narray('kinship_matrix')
g = narrayT('genotype_matrix')
y = narray('pheno_vector')
--
cgit v1.2.3
From 8706319923b3830a4d8cd63fd9a3f6b9a2b04563 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sat, 18 Apr 2015 10:58:26 +0000
Subject: Fix NA to float tranforms
---
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 37 +++++++++++++++++++++++++----------
1 file changed, 27 insertions(+), 10 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 618f8332..5b06c9ae 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -856,30 +856,47 @@ def gwas_with_redis(key,species,new_code=True):
def narray(t):
info("Type is "+t)
- v = params[t]
+ v = params.get(t)
if v is not None:
- v = np.array(v).astype(np.float)
- print(v)
+ # Note input values can be array of string or float
+ v1 = [x if x != 'NA' else 'nan' for x in v]
+ v = np.array(v1).astype(np.float)
return v
- def narrayT(t):
- m = narray(t)
+ def marray(t):
+ info("Type is "+t)
+ v = params.get(t)
+ if v is not None:
+ m = []
+ for r in v:
+ # Note input values can be array of string or float
+ r1 = [x if x != 'NA' else 'nan' for x in r]
+ m.append(np.array(r1).astype(np.float))
+ return np.array(m)
+ return np.array(v)
+
+ def marrayT(t):
+ m = marray(t)
if m is not None:
return m.T
return m
# We are transposing before we enter run_gwas - this should happen on the webserver
# side (or when reading data from file)
- print(params)
- k = narray('kinship_matrix')
- g = narrayT('genotype_matrix')
+ k = marray('kinship_matrix')
+ g = marrayT('genotype_matrix')
+ mprint("geno",g)
y = narray('pheno_vector')
n = len(y)
- m = params['num_genotypes']
- ps,ts = run_gwas(species,n,m,k,y,g,narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params['input_file_name'],new_code)
+ m = params.get('num_genotypes')
+ if m is None:
+ m = g.shape[0]
+ info("m=%d,n=%d" % (m,n))
+ ps,ts = run_gwas(species,n,m,k,y,g,narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params.get('input_file_name'),new_code)
results_key = "pylmm:results:" + params['temp_uuid']
+ # fatal(results_key)
json_results = json.dumps(dict(p_values = ps,
t_stats = ts))
--
cgit v1.2.3
From ced6f0c49c155a2ab47adfe93578d4718504566b Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sat, 18 Apr 2015 11:09:20 +0000
Subject: Disable some print statements - will introduce debug levels soon
---
wqflask/wqflask/marker_regression/marker_regression.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index ae3e062f..c80bba8e 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -128,7 +128,7 @@ class MarkerRegression(object):
#Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary
self.qtl_results = []
for qtl in self.filtered_markers:
- print("lod score is:", qtl['lod_score'])
+ # print("lod score is:", qtl['lod_score'])
if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y":
print("changing to X")
self.json_data['chr'].append("X")
@@ -145,7 +145,7 @@ class MarkerRegression(object):
self.json_data['chrnames'].append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length])
chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length
- print("json_data:", self.json_data)
+ # print("json_data:", self.json_data)
self.js_data = dict(
@@ -745,7 +745,7 @@ class MarkerRegression(object):
json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60)
results = json.loads(json_results[1])
p_values = [float(result) for result in results['p_values']]
- print("p_values:", p_values)
+ print("p_values:", p_values[:10])
#p_values = self.trim_results(p_values)
t_stats = results['t_stats']
--
cgit v1.2.3
From 1f6386cbddfd02d8abbd4e9bcb502c06be6864d1 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sat, 18 Apr 2015 12:41:07 +0000
Subject: Show first 40 LOD scores
---
wqflask/wqflask/marker_regression/marker_regression.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index c80bba8e..fba34b99 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -127,8 +127,9 @@ class MarkerRegression(object):
#Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary
self.qtl_results = []
- for qtl in self.filtered_markers:
- # print("lod score is:", qtl['lod_score'])
+ for index,qtl in enumerate(self.filtered_markers):
+ if index<40:
+ print("lod score is:", qtl['lod_score'])
if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y":
print("changing to X")
self.json_data['chr'].append("X")
--
cgit v1.2.3
From 3736bd0044ddee68180a06809847af7542951743 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sun, 19 Apr 2015 09:46:15 +0200
Subject: INSTALL info
---
INSTALL.md | 15 ++++++++++++---
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 4 ++--
2 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/INSTALL.md b/INSTALL.md
index 38d15090..afe22678 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -1,5 +1,10 @@
# INSTALL Genenetwork2 (GN2)
+## Use a Docker image
+
+A Docker image can be generated from
+[here](https://github.com/lomereiter/gn2-docker).
+
## Fetch GN2 from github
Clone the repository (currently ~800Mb) to local
@@ -10,12 +15,14 @@ Clone the repository (currently ~800Mb) to local
GN2 requires
+* python
* redis
* mysql
## Required python modules
-Install the following python modules:
+Install the following python modules (it is probably wise to use a local
+Python with environment for this)
* Flask
* pyyaml
@@ -53,12 +60,14 @@ SECRET_HMAC_CODE = '*'
```
```sh
+# Use a working copy of python
+export python=$HOME/ve27/bin/python
export WQFLASK_SETTINGS=$HOME/settings.py
source /home/pjotr/ve27/bin/activate
cd genenetwork2/wqflask
-python ./runserver.py
+$python ./runserver.py
or
-python ./secure_server.py
+$python ./secure_server.py
```
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index b2067b27..98bbead8 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -856,7 +856,7 @@ def gwas_with_redis(key,species,new_code=True):
print(key)
v = params[key]
if v is not None:
- v = np.array(v)
+ v = np.array(v).astype(np.float)
return v
def narrayT(key):
@@ -969,6 +969,6 @@ if __name__ == '__main__':
if has_gn2:
gn2_main()
else:
- print("Run from runlmm.py instead")
+ fatal("Run from runlmm.py instead")
--
cgit v1.2.3
From 5dad3ceb4acd652dd28d183f784005479089aa8a Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sun, 19 Apr 2015 10:55:42 +0200
Subject: Restore logger in runserver
---
wqflask/runserver.py | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/wqflask/runserver.py b/wqflask/runserver.py
index fadae6bf..20d79218 100755
--- a/wqflask/runserver.py
+++ b/wqflask/runserver.py
@@ -19,10 +19,9 @@ from wqflask import app
#_log.addHandler(_ch)
import logging
-#from themodule import TheHandlerYouWant
-# file_handler = logging.FileHandler("/tmp/flask_gn_log_danny_unsecure")
-# file_handler.setLevel(logging.DEBUG)
-# app.logger.addHandler(file_handler)
+file_handler = logging.FileHandler(app.config['LOGFILE'])
+file_handler.setLevel(logging.DEBUG)
+app.logger.addHandler(file_handler)
import logging_tree
logging_tree.printout()
--
cgit v1.2.3
From 78dde9ccb4c24ea900b7a6d64ef392ec30ac89ea Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sun, 19 Apr 2015 10:58:43 +0200
Subject: INSTALL: refer to information in ./misc
---
INSTALL.md | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/INSTALL.md b/INSTALL.md
index afe22678..a971ff78 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -71,3 +71,7 @@ or
$python ./secure_server.py
```
+
+## Other information
+
+Check also the ./misc/ directory for settings
\ No newline at end of file
--
cgit v1.2.3
From 93f663a2e865484cb6f476fb7a0fa2415410e4fd Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sun, 19 Apr 2015 11:01:56 +0200
Subject: runserver: set port
---
wqflask/runserver.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/wqflask/runserver.py b/wqflask/runserver.py
index 20d79218..4ae91e64 100755
--- a/wqflask/runserver.py
+++ b/wqflask/runserver.py
@@ -27,7 +27,7 @@ import logging_tree
logging_tree.printout()
app.run(host='0.0.0.0',
- port=5003,
+ port=app.config['SERVER_PORT'],
use_debugger=False,
threaded=True,
use_reloader=True)
--
cgit v1.2.3
From 240c2db33b70b7d10a6bdd18e043fc0aa6766715 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sun, 19 Apr 2015 11:04:31 +0200
Subject: Output configi
---
wqflask/runserver.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/wqflask/runserver.py b/wqflask/runserver.py
index 4ae91e64..5a76d1e2 100755
--- a/wqflask/runserver.py
+++ b/wqflask/runserver.py
@@ -18,6 +18,8 @@ from wqflask import app
#_ch = logging.StreamHandler()
#_log.addHandler(_ch)
+print app.config
+
import logging
file_handler = logging.FileHandler(app.config['LOGFILE'])
file_handler.setLevel(logging.DEBUG)
--
cgit v1.2.3
From e7cbe10d754e1e334746fc43a01e8b9fa3a666c0 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sun, 19 Apr 2015 11:37:32 +0200
Subject: pylmm: Copied benchmark (preparing for a module)
---
wqflask/wqflask/my_pylmm/pyLMM/benchmark.py | 44 +++++++++++++++++++++++++++++
1 file changed, 44 insertions(+)
create mode 100755 wqflask/wqflask/my_pylmm/pyLMM/benchmark.py
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py b/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py
new file mode 100755
index 00000000..6c6c9f88
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py
@@ -0,0 +1,44 @@
+from __future__ import print_function, division, absolute_import
+
+import collections
+import inspect
+import time
+
+class Bench(object):
+ entries = collections.OrderedDict()
+
+ def __init__(self, name=None):
+ self.name = name
+
+ def __enter__(self):
+ if self.name:
+ print("Starting benchmark: %s" % (self.name))
+ else:
+ print("Starting benchmark at: %s [%i]" % (inspect.stack()[1][3], inspect.stack()[1][2]))
+ self.start_time = time.time()
+
+ def __exit__(self, type, value, traceback):
+ if self.name:
+ name = self.name
+ else:
+ name = "That"
+
+ time_taken = time.time() - self.start_time
+ print(" %s took: %f seconds" % (name, (time_taken)))
+
+ if self.name:
+ Bench.entries[self.name] = Bench.entries.get(self.name, 0) + time_taken
+
+
+ @classmethod
+ def report(cls):
+ total_time = sum((time_taken for time_taken in cls.entries.itervalues()))
+ print("\nTiming report\n")
+ for name, time_taken in cls.entries.iteritems():
+ percent = int(round((time_taken/total_time) * 100))
+ print("[{}%] {}: {}".format(percent, name, time_taken))
+ print()
+
+ def reset(cls):
+ """Reset the entries"""
+ cls.entries = collections.OrderedDict()
--
cgit v1.2.3
From ac57e839c8ffb52f65e74a9064e4adeb15c76b49 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sun, 19 Apr 2015 11:54:42 +0200
Subject: pylmm: module loading
---
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 38 ++++++++++++++++------------------
wqflask/wqflask/my_pylmm/pyLMM/lmm2.py | 2 +-
2 files changed, 19 insertions(+), 21 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 5b06c9ae..135ba1f4 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -19,48 +19,46 @@ from __future__ import absolute_import, print_function, division
import sys
import time
-import argparse
+# import argparse
import uuid
import numpy as np
from scipy import linalg
from scipy import optimize
from scipy import stats
-import pdb
+# import pdb
-import simplejson as json
-
-import gzip
-import zlib
+# import gzip
+# import zlib
import datetime
-import cPickle as pickle
-import simplejson as json
-
+# import cPickle as pickle
from pprint import pformat as pf
-from redis import Redis
-Redis = Redis()
-
-import sys
-
-from utility.benchmark import Bench
-from utility import temp_data
-
+# pylmm imports
from kinship import kinship, kinship_full, kvakve
import genotype
import phenotype
import gwas
+from benchmark import Bench
+
+# The following imports are for exchanging data with the webserver
+import simplejson as json
+from redis import Redis
+Redis = Redis()
+from utility import temp_data
+
+has_gn2=None
-has_gn2=True
-sys.stderr.write("INFO: pylmm system path is "+":".join(sys.path)+"\n")
+# sys.stderr.write("INFO: pylmm system path is "+":".join(sys.path)+"\n")
sys.stderr.write("INFO: pylmm file is "+__file__+"\n")
# ---- A trick to decide on the environment:
try:
- sys.stderr.write("INFO: trying loading module\n")
+ sys.stderr.write("INFO: lmm try loading module\n")
import utility.formatting # this is never used, just to check the environment
sys.stderr.write("INFO: This is a genenetwork2 environment\n")
from gn2 import uses, progress_set_func
+ has_gn2=True
except ImportError:
# Failed to load gn2
has_gn2=False
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
index c65843ec..d871d8d2 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
@@ -29,7 +29,7 @@ sys.stderr.write("INFO: pylmm (lmm2) file is "+__file__+"\n")
# ---- A trick to decide on the environment:
try:
- sys.stderr.write("INFO: trying loading module\n")
+ sys.stderr.write("INFO: lmm2 try loading module\n")
import utility.formatting # this is never used, just to check the environment
sys.stderr.write("INFO: This is a genenetwork2 environment (lmm2)\n")
from gn2 import uses, progress_set_func
--
cgit v1.2.3
From e3adbf898dd537688339c8af1b59ac440aef3848 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sun, 19 Apr 2015 12:05:38 +0200
Subject: pylmm: move temp_data local
---
wqflask/wqflask/my_pylmm/pyLMM/benchmark.py | 0
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 2 +-
wqflask/wqflask/my_pylmm/pyLMM/temp_data.py | 25 +++++++++++++++++++++++++
3 files changed, 26 insertions(+), 1 deletion(-)
mode change 100755 => 100644 wqflask/wqflask/my_pylmm/pyLMM/benchmark.py
create mode 100644 wqflask/wqflask/my_pylmm/pyLMM/temp_data.py
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py b/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py
old mode 100755
new mode 100644
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 135ba1f4..2d9ca812 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -45,7 +45,7 @@ from benchmark import Bench
import simplejson as json
from redis import Redis
Redis = Redis()
-from utility import temp_data
+import temp_data
has_gn2=None
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/temp_data.py b/wqflask/wqflask/my_pylmm/pyLMM/temp_data.py
new file mode 100644
index 00000000..004d45c6
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/temp_data.py
@@ -0,0 +1,25 @@
+from __future__ import print_function, division, absolute_import
+from redis import Redis
+
+import simplejson as json
+
+class TempData(object):
+
+ def __init__(self, temp_uuid):
+ self.temp_uuid = temp_uuid
+ self.redis = Redis()
+ self.key = "tempdata:{}".format(self.temp_uuid)
+
+ def store(self, field, value):
+ self.redis.hset(self.key, field, value)
+ self.redis.expire(self.key, 60*15) # Expire in 15 minutes
+
+ def get_all(self):
+ return self.redis.hgetall(self.key)
+
+
+if __name__ == "__main__":
+ redis = Redis()
+ for key in redis.keys():
+ for field in redis.hkeys(key):
+ print("{}.{}={}".format(key, field, redis.hget(key, field)))
--
cgit v1.2.3
From 3c1e043dd63fe2d65a0bd44764867254b13aba32 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sun, 19 Apr 2015 12:28:58 +0200
Subject: pylmm: auto add to pythonpath
---
INSTALL.md | 4 ++--
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 6 ++++++
wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 8 ++++++++
3 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/INSTALL.md b/INSTALL.md
index a971ff78..9f28ac28 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -16,8 +16,8 @@ Clone the repository (currently ~800Mb) to local
GN2 requires
* python
-* redis
-* mysql
+* redis-server
+* mysql-server
## Required python modules
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 2d9ca812..4e35a4ac 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -34,6 +34,12 @@ import datetime
# import cPickle as pickle
from pprint import pformat as pf
+# Add local dir to PYTHONPATH
+import os
+cwd = os.path.dirname(__file__)
+if sys.path[0] != cwd:
+ sys.path.insert(1,cwd)
+
# pylmm imports
from kinship import kinship, kinship_full, kvakve
import genotype
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 52c3c80a..6b241cd6 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -21,6 +21,14 @@ from optparse import OptionParser
import sys
import tsvreader
import numpy as np
+
+# Add local dir to PYTHONPATH
+import os
+cwd = os.path.dirname(__file__)
+if sys.path[0] != cwd:
+ sys.path.insert(1,cwd)
+
+# pylmm modules
from lmm import gn2_load_redis, gn2_load_redis_iter, calculate_kinship_new, run_gwas
from kinship import kinship, kinship_full
import genotype
--
cgit v1.2.3
From 6e4a01fbdf2ac72230346d6f474edaa56a623bfe Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sun, 19 Apr 2015 10:40:15 +0000
Subject: pylmm: can now be called from GN2 without path set
---
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 4e35a4ac..2a0c7fdc 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -19,7 +19,6 @@ from __future__ import absolute_import, print_function, division
import sys
import time
-# import argparse
import uuid
import numpy as np
@@ -977,6 +976,7 @@ def gn2_load_redis_iter(key,species,kinship,pheno,geno_iterator):
# Note that this calling route will become OBSOLETE (we should use runlmm.py
# instead)
def gn2_main():
+ import argparse
parser = argparse.ArgumentParser(description='Run pyLMM')
parser.add_argument('-k', '--key')
parser.add_argument('-s', '--species')
@@ -991,9 +991,5 @@ def gn2_main():
if __name__ == '__main__':
print("WARNING: Calling pylmm from lmm.py will become OBSOLETE, use runlmm.py instead!")
- if has_gn2:
- gn2_main()
- else:
- fatal("Run from runlmm.py instead")
-
+ gn2_main()
--
cgit v1.2.3
From 1b71fc64719ba5c5b56d23aad9d9dc45bc2898a9 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sun, 19 Apr 2015 12:57:10 +0200
Subject: pylmm: Release
---
wqflask/wqflask/my_pylmm/README.md | 7 +++++++
wqflask/wqflask/my_pylmm/pyLMM/__init__.py | 2 +-
2 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/wqflask/wqflask/my_pylmm/README.md b/wqflask/wqflask/my_pylmm/README.md
index 4845ec03..b844c845 100644
--- a/wqflask/wqflask/my_pylmm/README.md
+++ b/wqflask/wqflask/my_pylmm/README.md
@@ -1,5 +1,12 @@
# Genenetwork2/pylmm RELEASE NOTES
+## 0.51-gn2 (April 19, 2015)
+
+- Improved GN2 integration
+- Less matrix transposes
+- Able to run pylmm standalone without Redis again (still requires
+ the modules)
+
## 0.50-gn2 (April 2nd, 2015)
- Replaced the GN2 genotype normalization
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py
index 6ab60d02..f33c4e74 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py
@@ -1 +1 @@
-PYLMM_VERSION="0.50-gn2-pre2"
+PYLMM_VERSION="0.51-gn2"
--
cgit v1.2.3
From 561ad00c82f440aefd5dbaf741d927cb63a37e0f Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sun, 19 Apr 2015 18:57:56 +0200
Subject: Pick up PYLMM_PATH from environment or setting.py
---
wqflask/wqflask/marker_regression/marker_regression.py | 10 +++++++++-
wqflask/wqflask/model.py | 2 +-
2 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index fba34b99..8d3eba48 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -25,6 +25,7 @@ from redis import Redis
Redis = Redis()
from flask import Flask, g
+from wqflask import app
from base.trait import GeneralTrait
from base import data_set
@@ -40,7 +41,14 @@ from utility import temp_data
from utility.benchmark import Bench
-PYLMM_COMMAND= 'python /home/pjotr/izip/git/opensource/python/gn2/wqflask/wqflask/my_pylmm/pyLMM/lmm.py'
+import os
+if os.environ['PYLMM_PATH'] is None:
+ PYLMM_PATH=app.config['PYLMM_PATH']
+ if PYLMM_PATH is None:
+ PYLMM_PATH=os.environ['HOME']+'/gene/wqflask/wqflask/my_pylmm/pyLMM'
+if !os.path.isfile(PYLMM_PATH+'lmm.py'):
+ raise 'PYLMM_PATH unknown or faulty'
+PYLMM_COMMAND= 'python '+PYLMM_PATH+'/lmm.py'
class MarkerRegression(object):
diff --git a/wqflask/wqflask/model.py b/wqflask/wqflask/model.py
index fa8c1aab..042cb8df 100755
--- a/wqflask/wqflask/model.py
+++ b/wqflask/wqflask/model.py
@@ -194,4 +194,4 @@ def display_collapsible(number):
def user_uuid():
"""Unique cookie for a user"""
user_uuid = request.cookies.get('user_uuid')
-
\ No newline at end of file
+
--
cgit v1.2.3
From 4f4ceddafa1d172515b2ef24658e5cf39730e6c6 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sun, 19 Apr 2015 17:03:10 +0000
Subject: Pick up PYLMM_PATH from environment or setting.py
---
wqflask/wqflask/marker_regression/marker_regression.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 8d3eba48..67e1df0d 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -42,11 +42,11 @@ from utility import temp_data
from utility.benchmark import Bench
import os
-if os.environ['PYLMM_PATH'] is None:
- PYLMM_PATH=app.config['PYLMM_PATH']
+if os.environ.get('PYLMM_PATH') is None:
+ PYLMM_PATH=app.config.get('PYLMM_PATH')
if PYLMM_PATH is None:
PYLMM_PATH=os.environ['HOME']+'/gene/wqflask/wqflask/my_pylmm/pyLMM'
-if !os.path.isfile(PYLMM_PATH+'lmm.py'):
+if not os.path.isfile(PYLMM_PATH+'/lmm.py'):
raise 'PYLMM_PATH unknown or faulty'
PYLMM_COMMAND= 'python '+PYLMM_PATH+'/lmm.py'
--
cgit v1.2.3
From 85a335df1fe499bc00b7feabc4f301b7a56b2b85 Mon Sep 17 00:00:00 2001
From: pjotrp
Date: Mon, 11 May 2015 16:52:10 -0500
Subject: pylmm has moved out of the GN2 source tree to
https://github.com/genenetwork/pylmm_gn2
---
wqflask/wqflask/my_pylmm/pyLMM/__init__.py | 1 -
wqflask/wqflask/my_pylmm/pyLMM/benchmark.py | 44 --
wqflask/wqflask/my_pylmm/pyLMM/chunks.py | 96 ---
wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py | 184 -----
wqflask/wqflask/my_pylmm/pyLMM/genotype.py | 51 --
wqflask/wqflask/my_pylmm/pyLMM/gn2.py | 110 ---
wqflask/wqflask/my_pylmm/pyLMM/gwas.py | 165 -----
wqflask/wqflask/my_pylmm/pyLMM/input.py | 267 -------
wqflask/wqflask/my_pylmm/pyLMM/kinship.py | 168 -----
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 995 ---------------------------
wqflask/wqflask/my_pylmm/pyLMM/lmm2.py | 433 ------------
wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py | 55 --
wqflask/wqflask/my_pylmm/pyLMM/phenotype.py | 65 --
wqflask/wqflask/my_pylmm/pyLMM/plink.py | 107 ---
wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 229 ------
wqflask/wqflask/my_pylmm/pyLMM/standalone.py | 110 ---
wqflask/wqflask/my_pylmm/pyLMM/temp_data.py | 25 -
wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py | 122 ----
18 files changed, 3227 deletions(-)
delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/__init__.py
delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/benchmark.py
delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/chunks.py
delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/genotype.py
delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/gn2.py
delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/gwas.py
delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/input.py
delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/kinship.py
delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/lmm.py
delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py
delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/plink.py
delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/standalone.py
delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/temp_data.py
delete mode 100644 wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py b/wqflask/wqflask/my_pylmm/pyLMM/__init__.py
deleted file mode 100644
index f33c4e74..00000000
--- a/wqflask/wqflask/my_pylmm/pyLMM/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-PYLMM_VERSION="0.51-gn2"
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py b/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py
deleted file mode 100644
index 6c6c9f88..00000000
--- a/wqflask/wqflask/my_pylmm/pyLMM/benchmark.py
+++ /dev/null
@@ -1,44 +0,0 @@
-from __future__ import print_function, division, absolute_import
-
-import collections
-import inspect
-import time
-
-class Bench(object):
- entries = collections.OrderedDict()
-
- def __init__(self, name=None):
- self.name = name
-
- def __enter__(self):
- if self.name:
- print("Starting benchmark: %s" % (self.name))
- else:
- print("Starting benchmark at: %s [%i]" % (inspect.stack()[1][3], inspect.stack()[1][2]))
- self.start_time = time.time()
-
- def __exit__(self, type, value, traceback):
- if self.name:
- name = self.name
- else:
- name = "That"
-
- time_taken = time.time() - self.start_time
- print(" %s took: %f seconds" % (name, (time_taken)))
-
- if self.name:
- Bench.entries[self.name] = Bench.entries.get(self.name, 0) + time_taken
-
-
- @classmethod
- def report(cls):
- total_time = sum((time_taken for time_taken in cls.entries.itervalues()))
- print("\nTiming report\n")
- for name, time_taken in cls.entries.iteritems():
- percent = int(round((time_taken/total_time) * 100))
- print("[{}%] {}: {}".format(percent, name, time_taken))
- print()
-
- def reset(cls):
- """Reset the entries"""
- cls.entries = collections.OrderedDict()
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/chunks.py b/wqflask/wqflask/my_pylmm/pyLMM/chunks.py
deleted file mode 100644
index 9565fb96..00000000
--- a/wqflask/wqflask/my_pylmm/pyLMM/chunks.py
+++ /dev/null
@@ -1,96 +0,0 @@
-from __future__ import absolute_import, print_function, division
-
-import math
-import time
-
-
-def divide_into_chunks(the_list, number_chunks):
- """Divides a list into approximately number_chunks smaller lists
-
- >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 3)
- [[1, 2, 7], [3, 22, 8], [5, 22, 333]]
- >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 4)
- [[1, 2, 7], [3, 22, 8], [5, 22, 333]]
- >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 5)
- [[1, 2], [7, 3], [22, 8], [5, 22], [333]]
- >>>
-
- """
- length = len(the_list)
-
- if length == 0:
- return [[]]
-
- if length <= number_chunks:
- number_chunks = length
-
- chunksize = int(math.ceil(length / number_chunks))
-
- chunks = []
- for counter in range(0, length, chunksize):
- chunks.append(the_list[counter:counter+chunksize])
-
- return chunks
-
-def _confirm_chunk(original, result):
- all_chunked = []
- for chunk in result:
- all_chunked.extend(chunk)
- print("length of all chunked:", len(all_chunked))
- assert original == all_chunked, "You didn't chunk right"
-
-
-def _chunk_test(divide_func):
- import random
- random.seed(7)
-
- number_exact = 0
- total_amount_off = 0
-
- for test in range(1, 1001):
- print("\n\ntest:", test)
- number_chunks = random.randint(1, 20)
- number_elements = random.randint(0, 100)
- the_list = list(range(1, number_elements))
- result = divide_func(the_list, number_chunks)
-
- print("Dividing list of length {} into approximately {} chunks - got {} chunks".format(
- len(the_list), number_chunks, len(result)))
- print("result:", result)
-
- _confirm_chunk(the_list, result)
-
- amount_off = abs(number_chunks - len(result))
- if amount_off == 0:
- number_exact += 1
- else:
- total_amount_off += amount_off
-
-
- print("\n{} exact out of {} [Total amount off: {}]".format(number_exact,
- test,
- total_amount_off))
- assert number_exact == 558
- assert total_amount_off == 1580
- return number_exact, total_amount_off
-
-
-def _main():
- info = dict()
- #funcs = (("sam", sam_divide_into_chunks), ("zach", zach_divide_into_chunks))
- funcs = (("only one", divide_into_chunks),)
- for name, func in funcs:
- start = time.time()
- number_exact, total_amount_off = _chunk_test(func)
- took = time.time() - start
- info[name] = dict(number_exact=number_exact,
- total_amount_off=total_amount_off,
- took=took)
-
- print("info is:", info)
-
-if __name__ == '__main__':
- _main()
- print("\nConfirming doctests...")
- import doctest
- doctest.testmod()
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
deleted file mode 100644
index 4312fed0..00000000
--- a/wqflask/wqflask/my_pylmm/pyLMM/convertlmm.py
+++ /dev/null
@@ -1,184 +0,0 @@
-# This is a converter for common LMM formats, so as to keep file
-# reader complexity outside the main routines.
-
-# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl)
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see .
-
-from __future__ import print_function
-from optparse import OptionParser
-import sys
-import os
-import numpy as np
-# from lmm import LMM, run_other
-# import input
-import plink
-
-usage = """
-python convertlmm.py [--plink] [--prefix out_basename] [--kinship kfile] [--pheno pname] [--geno gname]
-
- Convert files for runlmm.py processing. Writes to stdout by default.
-
- try --help for more information
-
-Examples:
-
- python ./my_pylmm/pyLMM/convertlmm.py --plink --pheno data/test_snps.132k.clean.noX.fake.phenos > test.pheno
-
- python ./my_pylmm/pyLMM/convertlmm.py --plink --pheno data/test_snps.132k.clean.noX.fake.phenos --geno data/test_snps.132k.clean.noX > test.geno
-"""
-
-# if len(args) == 0:
-# print usage
-# sys.exit(1)
-
-option_parser = OptionParser(usage=usage)
-option_parser.add_option("--kinship", dest="kinship",
- help="Parse a kinship file. This is an nxn plain text file and can be computed with the pylmmKinship program")
-option_parser.add_option("--pheno", dest="pheno",
- help="Parse a phenotype file (use with --plink only)")
-option_parser.add_option("--geno", dest="geno",
- help="Parse a genotype file (use with --plink only)")
-option_parser.add_option("--plink", dest="plink", action="store_true", default=False,
- help="Parse PLINK style")
-# option_parser.add_option("--kinship",action="store_false", dest="kinship", default=True,
-# help="Parse a kinship file. This is an nxn plain text file and can be computed with the pylmmKinship program.")
-option_parser.add_option("--prefix", dest="prefix",
- help="Output prefix for output file(s)")
-option_parser.add_option("-q", "--quiet",
- action="store_false", dest="verbose", default=True,
- help="don't print status messages to stdout")
-option_parser.add_option("-v", "--verbose",
- action="store_true", dest="verbose", default=False,
- help="Print extra info")
-
-(options, args) = option_parser.parse_args()
-
-writer = None
-num_inds = None
-snp_names = []
-ind_names = []
-
-def msg(s):
- sys.stderr.write("INFO: ")
- sys.stderr.write(s)
- sys.stderr.write("\n")
-
-def wr(s):
- if writer:
- writer.write(s)
- else:
- sys.stdout.write(s)
-
-def wrln(s):
- wr(s)
- wr("\n")
-
-
-if options.pheno:
- if not options.plink:
- raise Exception("Use --plink switch")
- # Because plink does not track size we need to read the whole thing first
- msg("Converting pheno "+options.pheno)
- phenos = []
- count = 0
- count_pheno = None
- for line in open(options.pheno,'r'):
- count += 1
- list = line.split()
- pcount = len(list)-2
- assert(pcount > 0)
- if count_pheno == None:
- count_pheno = pcount
- assert(count_pheno == pcount)
- row = [list[0]]+list[2:]
- phenos.append(row)
-
- writer = None
- if options.prefix:
- writer = open(options.prefix+".pheno","w")
- wrln("# Phenotype format version 1.0")
- wrln("# Individuals = "+str(count))
- wrln("# Phenotypes = "+str(count_pheno))
- for i in range(count_pheno):
- wr("\t"+str(i+1))
- wr("\n")
- for i in range(count):
- wr("\t".join(phenos[i]))
- wr("\n")
- num_inds = count
- msg(str(count)+" pheno lines written")
-
-if options.kinship:
- is_header = True
- count = 0
- msg("Converting kinship "+options.kinship)
- writer = None
- if options.prefix:
- writer = open(options.prefix+".kin","w")
- for line in open(options.kinship,'r'):
- count += 1
- if is_header:
- size = len(line.split())
- wrln("# Kinship format version 1.0")
- wrln("# Size="+str(size))
- for i in range(size):
- wr("\t"+str(i+1))
- wr("\n")
- is_header = False
- wr(str(count))
- wr("\t")
- wr("\t".join(line.split()))
- wr("\n")
- num_inds = count
- msg(str(count)+" kinship lines written")
-
-if options.geno:
- msg("Converting geno "+options.geno+'.bed')
- if not options.plink:
- raise Exception("Use --plink switch")
- if not num_inds:
- raise Exception("Can not figure out the number of individuals, use --pheno or --kinship")
- bim_snps = plink.readbim(options.geno+'.bim')
- num_snps = len(bim_snps)
- writer = None
- if options.prefix:
- writer = open(options.prefix+".geno","w")
- wrln("# Genotype format version 1.0")
- wrln("# Individuals = "+str(num_inds))
- wrln("# SNPs = "+str(num_snps))
- wrln("# Encoding = HAB")
- for i in range(num_inds):
- wr("\t"+str(i+1))
- wr("\n")
-
- m = []
- def out(i,x):
- # wr(str(i)+"\t")
- # wr("\t".join(x))
- # wr("\n")
- m.append(x)
-
- snps = plink.readbed(options.geno+'.bed',num_inds, ('A','H','B','-'), out)
-
- msg("Write transposed genotype matrix")
- for g in range(num_snps):
- wr(bim_snps[g][1]+"\t")
- for i in range(num_inds):
- wr(m[g][i])
- wr("\n")
-
- msg(str(count)+" geno lines written (with "+str(snps)+" snps)")
-
-msg("Converting done")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py
deleted file mode 100644
index 49f32e3a..00000000
--- a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Genotype routines
-
-# Copyright (C) 2013 Nicholas A. Furlotte (nick.furlotte@gmail.com)
-# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl)
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see .
-
-import numpy as np
-from collections import Counter
-import operator
-
-def replace_missing_with_MAF(snp_g):
- """
- Replace the missing genotype with the minor allele frequency (MAF)
- in the snp row. It is rather slow!
- """
- cnt = Counter(snp_g)
- tuples = sorted(cnt.items(), key=operator.itemgetter(1))
- l2 = [t for t in tuples if not np.isnan(t[0])]
- maf = l2[0][0]
- res = np.array([maf if np.isnan(snp) else snp for snp in snp_g])
- return res
-
-def normalize(ind_g):
- """
- Run for every SNP list (for one individual) and return
- normalized SNP genotype values with missing data filled in
- """
- g = np.copy(ind_g) # copy to avoid side effects
- missing = np.isnan(g)
- values = g[True - missing]
- mean = values.mean() # Global mean value
- stddev = np.sqrt(values.var()) # Global stddev
- g[missing] = mean # Plug-in mean values for missing data
- if stddev == 0:
- g = g - mean # Subtract the mean
- else:
- g = (g - mean) / stddev # Normalize the deviation
- return g
-
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py b/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
deleted file mode 100644
index 821195c8..00000000
--- a/wqflask/wqflask/my_pylmm/pyLMM/gn2.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Standalone specific methods and callback handler
-#
-# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl)
-#
-# Set the log level with
-#
-# logging.basicConfig(level=logging.DEBUG)
-
-from __future__ import absolute_import, print_function, division
-
-import numpy as np
-import sys
-import logging
-
-# logger = logging.getLogger(__name__)
-logger = logging.getLogger('lmm2')
-logging.basicConfig(level=logging.DEBUG)
-np.set_printoptions(precision=3,suppress=True)
-
-progress_location = None
-progress_current = None
-progress_prev_perc = None
-
-def progress_default_func(location,count,total):
- global progress_current
- value = round(count*100.0/total)
- progress_current = value
-
-progress_func = progress_default_func
-
-def progress_set_func(func):
- global progress_func
- progress_func = func
-
-def progress(location, count, total):
- global progress_location
- global progress_prev_perc
-
- perc = round(count*100.0/total)
- if perc != progress_prev_perc and (location != progress_location or perc > 98 or perc > progress_prev_perc + 5):
- progress_func(location, count, total)
- logger.info("Progress: %s %d%%" % (location,perc))
- progress_location = location
- progress_prev_perc = perc
-
-def mprint(msg,data):
- """
- Array/matrix print function
- """
- m = np.array(data)
- if m.ndim == 1:
- print(msg,m.shape,"=\n",m[0:3]," ... ",m[-3:])
- if m.ndim == 2:
- print(msg,m.shape,"=\n[",
- m[0][0:3]," ... ",m[0][-3:],"\n ",
- m[1][0:3]," ... ",m[1][-3:],"\n ...\n ",
- m[-2][0:3]," ... ",m[-2][-3:],"\n ",
- m[-1][0:3]," ... ",m[-1][-3:],"]")
-
-def fatal(msg):
- logger.critical(msg)
- raise Exception(msg)
-
-def callbacks():
- return dict(
- write = sys.stdout.write,
- writeln = print,
- debug = logger.debug,
- info = logger.info,
- warning = logger.warning,
- error = logger.error,
- critical = logger.critical,
- fatal = fatal,
- progress = progress,
- mprint = mprint
- )
-
-def uses(*funcs):
- """
- Some sugar
- """
- return [callbacks()[func] for func in funcs]
-
-# ----- Minor test cases:
-
-if __name__ == '__main__':
- # logging.basicConfig(level=logging.DEBUG)
- logging.debug("Test %i" % (1))
- d = callbacks()['debug']
- d("TEST")
- wrln = callbacks()['writeln']
- wrln("Hello %i" % 34)
- progress = callbacks()['progress']
- progress("I am half way",50,100)
- list = [0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
- 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
- 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
- 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
- 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]
- mprint("list",list)
- matrix = [[1,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
- [2,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
- [3,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
- [4,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
- [5,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
- [6,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]]
- mprint("matrix",matrix)
- ix,dx = uses("info","debug")
- ix("ix")
- dx("dx")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
deleted file mode 100644
index 247a8729..00000000
--- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
+++ /dev/null
@@ -1,165 +0,0 @@
-# pylmm-based GWAS calculation
-#
-# Copyright (C) 2013 Nicholas A. Furlotte (nick.furlotte@gmail.com)
-# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl)
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see .
-#!/usr/bin/python
-
-import pdb
-import time
-import sys
-import lmm2
-
-import os
-import numpy as np
-import input
-from optmatrix import matrix_initialize
-from lmm2 import LMM2
-
-import multiprocessing as mp # Multiprocessing is part of the Python stdlib
-import Queue
-
-# ---- A trick to decide on the environment:
-try:
- from wqflask.my_pylmm.pyLMM import chunks
- from gn2 import uses
-except ImportError:
- has_gn2=False
- from standalone import uses
-
-progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal')
-
-
-def formatResult(id,beta,betaSD,ts,ps):
- return "\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n"
-
-def compute_snp(j,n,snp_ids,lmm2,REML,q = None):
- result = []
- for snp_id in snp_ids:
- snp,id = snp_id
- x = snp.reshape((n,1)) # all the SNPs
- # if refit:
- # L.fit(X=snp,REML=REML)
- ts,ps,beta,betaVar = lmm2.association(x,REML=REML,returnBeta=True)
- # result.append(formatResult(id,beta,np.sqrt(betaVar).sum(),ts,ps))
- result.append( (ts,ps) )
- if not q:
- q = compute_snp.q
- q.put([j,result])
- return j
-
-def f_init(q):
- compute_snp.q = q
-
-def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
- """
- GWAS. The G matrix should be n inds (cols) x m snps (rows)
- """
- info("In gwas.gwas")
- matrix_initialize()
- cpu_num = mp.cpu_count()
- numThreads = None # for now use all available threads
- kfile2 = False
- reml = restricted_max_likelihood
-
- mprint("G",G)
- n = G.shape[1] # inds
- inds = n
- m = G.shape[0] # snps
- snps = m
- info("%s SNPs",snps)
- assert snps>=inds, "snps should be larger than inds (snps=%d,inds=%d)" % (snps,inds)
-
- # CREATE LMM object for association
- # if not kfile2: L = LMM(Y,K,Kva,Kve,X0,verbose=verbose)
- # else: L = LMM_withK2(Y,K,Kva,Kve,X0,verbose=verbose,K2=K2)
-
- lmm2 = LMM2(Y,K) # ,Kva,Kve,X0,verbose=verbose)
- if not refit:
- info("Computing fit for null model")
- lmm2.fit() # follow GN model in run_other
- info("heritability=%0.3f, sigma=%0.3f" % (lmm2.optH,lmm2.optSigma))
-
- res = []
-
- # Set up the pool
- # mp.set_start_method('spawn')
- q = mp.Queue()
- p = mp.Pool(numThreads, f_init, [q])
- collect = []
-
- count = 0
- job = 0
- jobs_running = 0
- jobs_completed = 0
- for snp in G:
- snp_id = (snp,'SNPID')
- count += 1
- if count % 1000 == 0:
- job += 1
- debug("Job %d At SNP %d" % (job,count))
- if numThreads == 1:
- debug("Running on 1 THREAD")
- compute_snp(job,n,collect,lmm2,reml,q)
- collect = []
- j,lst = q.get()
- debug("Job "+str(j)+" finished")
- jobs_completed += 1
- progress("GWAS2",jobs_completed,snps/1000)
- res.append((j,lst))
- else:
- p.apply_async(compute_snp,(job,n,collect,lmm2,reml))
- jobs_running += 1
- collect = []
- while jobs_running > cpu_num:
- try:
- j,lst = q.get_nowait()
- debug("Job "+str(j)+" finished")
- jobs_completed += 1
- progress("GWAS2",jobs_completed,snps/1000)
- res.append((j,lst))
- jobs_running -= 1
- except Queue.Empty:
- time.sleep(0.1)
- pass
- if jobs_running > cpu_num*2:
- time.sleep(1.0)
- else:
- break
-
- collect.append(snp_id)
-
- if numThreads==1 or count<1000 or len(collect)>0:
- job += 1
- debug("Collect final batch size %i job %i @%i: " % (len(collect), job, count))
- compute_snp(job,n,collect,lmm2,reml,q)
- collect = []
- j,lst = q.get()
- res.append((j,lst))
- debug("count=%i running=%i collect=%i" % (count,jobs_running,len(collect)))
- for job in range(jobs_running):
- j,lst = q.get(True,15) # time out
- debug("Job "+str(j)+" finished")
- jobs_completed += 1
- progress("GWAS2",jobs_completed,snps/1000)
- res.append((j,lst))
-
- mprint("Before sort",[res1[0] for res1 in res])
- res = sorted(res,key=lambda x: x[0])
- mprint("After sort",[res1[0] for res1 in res])
- info([len(res1[1]) for res1 in res])
- ts = [item[0] for j,res1 in res for item in res1]
- ps = [item[1] for j,res1 in res for item in res1]
- return ts,ps
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/input.py b/wqflask/wqflask/my_pylmm/pyLMM/input.py
deleted file mode 100644
index 7063fedf..00000000
--- a/wqflask/wqflask/my_pylmm/pyLMM/input.py
+++ /dev/null
@@ -1,267 +0,0 @@
-# pylmm is a python-based linear mixed-model solver with applications to GWAS
-
-# Copyright (C) 2013 Nicholas A. Furlotte (nick.furlotte@gmail.com)
-
-#The program is free for academic use. Please contact Nick Furlotte
-# if you are interested in using the software for
-#commercial purposes.
-
-#The software must not be modified and distributed without prior
-#permission of the author.
-
-#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-#"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-#LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-#A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-#CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-#EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-#PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-#PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-#LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-#NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import os
-import sys
-import numpy as np
-import struct
-import pdb
-
-class plink:
- def __init__(self,fbase,kFile=None,phenoFile=None,type='b',normGenotype=True,readKFile=False):
- self.fbase = fbase
- self.type = type
- self.indivs = self.getIndivs(self.fbase,type)
- self.kFile = kFile
- self.phenos = None
- self.normGenotype = normGenotype
- self.phenoFile = phenoFile
- # Originally I was using the fastLMM style that has indiv IDs embedded.
- # NOW I want to use this module to just read SNPs so I'm allowing
- # the programmer to turn off the kinship reading.
- self.readKFile = readKFile
-
- if self.kFile:
- self.K = self.readKinship(self.kFile)
- elif os.path.isfile("%s.kin" % fbase):
- self.kFile = "%s.kin" %fbase
- if self.readKFile:
- self.K = self.readKinship(self.kFile)
- else:
- self.kFile = None
- self.K = None
-
- self.getPhenos(self.phenoFile)
-
- self.fhandle = None
- self.snpFileHandle = None
-
- def __del__(self):
- if self.fhandle: self.fhandle.close()
- if self.snpFileHandle: self.snpFileHandle.close()
-
- def getSNPIterator(self):
- if not self.type == 'b':
- sys.stderr.write("Have only implemented this for binary plink files (bed)\n")
- return
-
- # get the number of snps
- file = self.fbase + '.bim'
- i = 0
- f = open(file,'r')
- for line in f: i += 1
- f.close()
- self.numSNPs = i
- self.have_read = 0
- self.snpFileHandle = open(file,'r')
-
- self.BytestoRead = self.N / 4 + (self.N % 4 and 1 or 0)
- self._formatStr = 'c'*self.BytestoRead
-
- file = self.fbase + '.bed'
- self.fhandle = open(file,'rb')
-
- magicNumber = self.fhandle.read(2)
- order = self.fhandle.read(1)
- if not order == '\x01':
- sys.stderr.write("This is not in SNP major order - you did not handle this case\n")
- raise StopIteration
-
- return self
-
- def __iter__(self):
- return self.getSNPIterator()
-
- def next(self):
- if self.have_read == self.numSNPs:
- raise StopIteration
- X = self.fhandle.read(self.BytestoRead)
- XX = [bin(ord(x)) for x in struct.unpack(self._formatStr,X)]
- self.have_read += 1
- return self.formatBinaryGenotypes(XX,self.normGenotype),self.snpFileHandle.readline().strip().split()[1]
-
- def formatBinaryGenotypes(self,X,norm=True):
- D = { \
- '00': 0.0, \
- '10': 0.5, \
- '11': 1.0, \
- '01': np.nan \
- }
-
- D_tped = { \
- '00': '1 1', \
- '10': '1 2', \
- '11': '2 2', \
- '01': '0 0' \
- }
-
- #D = D_tped
-
- G = []
- for x in X:
- if not len(x) == 10:
- xx = x[2:]
- x = '0b' + '0'*(8 - len(xx)) + xx
- a,b,c,d = (x[8:],x[6:8],x[4:6],x[2:4])
- L = [D[y] for y in [a,b,c,d]]
- G += L
- # only take the leading values because whatever is left should be null
- G = G[:self.N]
- G = np.array(G)
- if norm:
- G = self.normalizeGenotype(G)
- return G
-
- def normalizeGenotype(self,G):
- # print "Before",G
- # print G.shape
- print "call input.normalizeGenotype"
- raise "This should not be used"
- x = True - np.isnan(G)
- m = G[x].mean()
- s = np.sqrt(G[x].var())
- G[np.isnan(G)] = m
- if s == 0: G = G - m
- else: G = (G - m) / s
- # print "After",G
- return G
-
- def getPhenos(self,phenoFile=None):
- if not phenoFile:
- self.phenoFile = phenoFile = self.fbase+".phenos"
- if not os.path.isfile(phenoFile):
- sys.stderr.write("Could not find phenotype file: %s\n" % (phenoFile))
- return
- f = open(phenoFile,'r')
- keys = []
- P = []
- for line in f:
- v = line.strip().split()
- keys.append((v[0],v[1]))
- P.append([(x == 'NA' or x == '-9') and np.nan or float(x) for x in v[2:]])
- f.close()
- P = np.array(P)
-
- # reorder to match self.indivs
- D = {}
- L = []
- for i in range(len(keys)):
- D[keys[i]] = i
- for i in range(len(self.indivs)):
- if not D.has_key(self.indivs[i]):
- continue
- L.append(D[self.indivs[i]])
- P = P[L,:]
-
- self.phenos = P
- return P
-
- def getIndivs(self,base,type='b'):
- if type == 't':
- famFile = "%s.tfam" % base
- else:
- famFile = "%s.fam" % base
- keys = []
- i = 0
- f = open(famFile,'r')
- for line in f:
- v = line.strip().split()
- famId = v[0]
- indivId = v[1]
- k = (famId.strip(),indivId.strip())
- keys.append(k)
- i += 1
- f.close()
-
- self.N = len(keys)
- sys.stderr.write("Read %d individuals from %s\n" % (self.N, famFile))
-
- return keys
-
- def readKinship(self,kFile):
- # Assume the fastLMM style
- # This will read in the kinship matrix and then reorder it
- # according to self.indivs - additionally throwing out individuals
- # that are not in both sets
- if self.indivs == None or len(self.indivs) == 0:
- sys.stderr.write("Did not read any individuals so can't load kinship\n")
- return
-
- sys.stderr.write("Reading kinship matrix from %s\n" % (kFile) )
-
- f = open(kFile,'r')
- # read indivs
- v = f.readline().strip().split("\t")[1:]
- keys = [tuple(y.split()) for y in v]
- D = {}
- for i in range(len(keys)): D[keys[i]] = i
-
- # read matrix
- K = []
- for line in f:
- K.append([float(x) for x in line.strip().split("\t")[1:]])
- f.close()
- K = np.array(K)
-
- # reorder to match self.indivs
- L = []
- KK = []
- X = []
- for i in range(len(self.indivs)):
- if not D.has_key(self.indivs[i]):
- X.append(self.indivs[i])
- else:
- KK.append(self.indivs[i])
- L.append(D[self.indivs[i]])
- K = K[L,:][:,L]
- self.indivs = KK
- self.indivs_removed = X
- if len(self.indivs_removed):
- sys.stderr.write("Removed %d individuals that did not appear in Kinship\n" % (len(self.indivs_removed)))
- return K
-
- def getCovariates(self,covFile=None):
- if not os.path.isfile(covFile):
- sys.stderr.write("Could not find covariate file: %s\n" % (phenoFile))
- return
- f = open(covFile,'r')
- keys = []
- P = []
- for line in f:
- v = line.strip().split()
- keys.append((v[0],v[1]))
- P.append([x == 'NA' and np.nan or float(x) for x in v[2:]])
- f.close()
- P = np.array(P)
-
- # reorder to match self.indivs
- D = {}
- L = []
- for i in range(len(keys)):
- D[keys[i]] = i
- for i in range(len(self.indivs)):
- if not D.has_key(self.indivs[i]): continue
- L.append(D[self.indivs[i]])
- P = P[L,:]
-
- return P
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
deleted file mode 100644
index 1c157fd8..00000000
--- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
+++ /dev/null
@@ -1,168 +0,0 @@
-# pylmm kinship calculation
-#
-# Copyright (C) 2013 Nicholas A. Furlotte (nick.furlotte@gmail.com)
-# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl)
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see .
-
-# env PYTHONPATH=$pylmm_lib_path:./lib python $pylmm_lib_path/runlmm.py --pheno test.pheno --geno test9000.geno kinship --test
-
-import sys
-import os
-import numpy as np
-from scipy import linalg
-import multiprocessing as mp # Multiprocessing is part of the Python stdlib
-import Queue
-import time
-
-from optmatrix import matrix_initialize, matrixMultT
-
-# ---- A trick to decide on the environment:
-try:
- from wqflask.my_pylmm.pyLMM import chunks
- from gn2 import uses, progress_set_func
-except ImportError:
- has_gn2=False
- import standalone as handlers
- from standalone import uses, progress_set_func
-
-progress,debug,info,mprint = uses('progress','debug','info','mprint')
-
-def kinship_full(G):
- """
- Calculate the Kinship matrix using a full dot multiplication
- """
- # mprint("kinship_full G",G)
- m = G.shape[0] # snps
- n = G.shape[1] # inds
- info("%d SNPs",m)
- assert m>n, "n should be larger than m (%d snps > %d inds)" % (m,n)
- # m = np.dot(G.T,G)
- m = matrixMultT(G.T)
- m = m/G.shape[0]
- # mprint("kinship_full K",m)
- return m
-
-def compute_W(job,G,n,snps,compute_size):
- """
- Read 1000 SNPs at a time into matrix and return the result
- """
- m = compute_size
- W = np.ones((n,m)) * np.nan # W matrix has dimensions individuals x SNPs (initially all NaNs)
- for j in range(0,compute_size):
- pos = job*m + j # real position
- if pos >= snps:
- W = W[:,range(0,j)]
- break
- snp = G[job*compute_size+j]
- if snp.var() == 0:
- continue
- W[:,j] = snp # set row to list of SNPs
- return W
-
-def compute_matrixMult(job,W,q = None):
- """
- Compute Kinship(W)*j
-
- For every set of SNPs matrixMult is used to multiply matrices T(W)*W
- """
- res = matrixMultT(W)
- if not q: q=compute_matrixMult.q
- q.put([job,res])
- return job
-
-def f_init(q):
- compute_matrixMult.q = q
-
-# Calculate the kinship matrix from G (SNPs as rows!), returns K
-#
-def kinship(G,computeSize=1000,numThreads=None,useBLAS=False):
-
- matrix_initialize(useBLAS)
-
- mprint("G",G)
- n = G.shape[1] # inds
- inds = n
- m = G.shape[0] # snps
- snps = m
- info("%i SNPs" % (m))
- assert snps>=inds, "snps should be larger than inds (%i snps, %i inds)" % (snps,inds)
-
- q = mp.Queue()
- p = mp.Pool(numThreads, f_init, [q])
- cpu_num = mp.cpu_count()
- info("CPU cores: %i" % cpu_num)
- iterations = snps/computeSize+1
-
- results = []
- K = np.zeros((n,n)) # The Kinship matrix has dimension individuals x individuals
-
- completed = 0
- for job in range(iterations):
- info("Processing job %d first %d SNPs" % (job, ((job+1)*computeSize)))
- W = compute_W(job,G,n,snps,computeSize)
- if numThreads == 1:
- # Single-core
- compute_matrixMult(job,W,q)
- j,x = q.get()
- debug("Job "+str(j)+" finished")
- progress("kinship",j,iterations)
- K_j = x
- K = K + K_j
- else:
- # Multi-core
- results.append(p.apply_async(compute_matrixMult, (job,W)))
- # Do we have a result?
- while (len(results)-completed>cpu_num*2):
- time.sleep(0.1)
- try:
- j,x = q.get_nowait()
- debug("Job "+str(j)+" finished")
- K_j = x
- K = K + K_j
- completed += 1
- progress("kinship",completed,iterations)
- except Queue.Empty:
- pass
-
- if numThreads == None or numThreads > 1:
- for job in range(len(results)-completed):
- j,x = q.get(True,15)
- debug("Job "+str(j)+" finished")
- K_j = x
- K = K + K_j
- completed += 1
- progress("kinship",completed,iterations)
-
- K = K / float(snps)
- return K
-
-def kvakve(K):
- """
- Obtain eigendecomposition for K and return Kva,Kve where Kva is cleaned
- of small values < 1e-6 (notably smaller than zero)
- """
- info("Obtaining eigendecomposition for %dx%d matrix" % (K.shape[0],K.shape[1]) )
- Kva,Kve = linalg.eigh(K)
- mprint("Kva",Kva)
- mprint("Kve",Kve)
-
- if sum(Kva < 0):
- info("Cleaning %d eigen values (Kva<0)" % (sum(Kva < 0)))
- Kva[Kva < 1e-6] = 1e-6
- return Kva,Kve
-
-
-
-
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
deleted file mode 100644
index 2a0c7fdc..00000000
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ /dev/null
@@ -1,995 +0,0 @@
-# pylmm is a python-based linear mixed-model solver with applications to GWAS
-
-# Copyright (C) 2013 Nicholas A. Furlotte (nick.furlotte@gmail.com)
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see .
-
-from __future__ import absolute_import, print_function, division
-
-import sys
-import time
-import uuid
-
-import numpy as np
-from scipy import linalg
-from scipy import optimize
-from scipy import stats
-# import pdb
-
-# import gzip
-# import zlib
-import datetime
-# import cPickle as pickle
-from pprint import pformat as pf
-
-# Add local dir to PYTHONPATH
-import os
-cwd = os.path.dirname(__file__)
-if sys.path[0] != cwd:
- sys.path.insert(1,cwd)
-
-# pylmm imports
-from kinship import kinship, kinship_full, kvakve
-import genotype
-import phenotype
-import gwas
-from benchmark import Bench
-
-# The following imports are for exchanging data with the webserver
-import simplejson as json
-from redis import Redis
-Redis = Redis()
-import temp_data
-
-has_gn2=None
-
-# sys.stderr.write("INFO: pylmm system path is "+":".join(sys.path)+"\n")
-sys.stderr.write("INFO: pylmm file is "+__file__+"\n")
-
-# ---- A trick to decide on the environment:
-try:
- sys.stderr.write("INFO: lmm try loading module\n")
- import utility.formatting # this is never used, just to check the environment
- sys.stderr.write("INFO: This is a genenetwork2 environment\n")
- from gn2 import uses, progress_set_func
- has_gn2=True
-except ImportError:
- # Failed to load gn2
- has_gn2=False
- import standalone as handlers
- from standalone import uses, progress_set_func
- sys.stderr.write("WARNING: LMM standalone version missing the Genenetwork2 environment\n")
-
-progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal')
-
-#np.seterr('raise')
-
-#def run_human(pheno_vector,
-# covariate_matrix,
-# plink_input_file,
-# kinship_matrix,
-# refit=False,
-# loading_progress=None):
-
-def run_human(pheno_vector,
- covariate_matrix,
- plink_input_file,
- kinship_matrix,
- refit=False):
-
- v = np.isnan(pheno_vector)
- keep = True - v
- keep = keep.reshape((len(keep),))
-
- identifier = str(uuid.uuid4())
-
- #print("pheno_vector: ", pf(pheno_vector))
- #print("kinship_matrix: ", pf(kinship_matrix))
- #print("kinship_matrix.shape: ", pf(kinship_matrix.shape))
-
- #lmm_vars = pickle.dumps(dict(
- # pheno_vector = pheno_vector,
- # covariate_matrix = covariate_matrix,
- # kinship_matrix = kinship_matrix
- #))
- #Redis.hset(identifier, "lmm_vars", lmm_vars)
- #Redis.expire(identifier, 60*60)
-
- if v.sum():
- pheno_vector = pheno_vector[keep]
- print("pheno_vector shape is now: ", pf(pheno_vector.shape))
- covariate_matrix = covariate_matrix[keep,:]
- print("kinship_matrix shape is: ", pf(kinship_matrix.shape))
- print("keep is: ", pf(keep.shape))
- kinship_matrix = kinship_matrix[keep,:][:,keep]
-
- print("kinship_matrix:", pf(kinship_matrix))
-
- n = kinship_matrix.shape[0]
- print("n is:", n)
- lmm_ob = LMM(pheno_vector,
- kinship_matrix,
- covariate_matrix)
- lmm_ob.fit()
-
-
- # Buffers for pvalues and t-stats
- p_values = []
- t_stats = []
-
- #print("input_file: ", plink_input_file)
-
- with Bench("Opening and loading pickle file"):
- with gzip.open(plink_input_file, "rb") as input_file:
- data = pickle.load(input_file)
-
- plink_input = data['plink_input']
-
- #plink_input.getSNPIterator()
- with Bench("Calculating numSNPs"):
- total_snps = data['numSNPs']
-
- with Bench("snp iterator loop"):
- count = 0
-
- with Bench("Create list of inputs"):
- inputs = list(plink_input)
-
- with Bench("Divide into chunks"):
- results = chunks.divide_into_chunks(inputs, 64)
-
- result_store = []
-
- key = "plink_inputs"
-
- # Todo: Delete below line when done testing
- Redis.delete(key)
-
- timestamp = datetime.datetime.utcnow().isoformat()
-
- # Pickle chunks of input SNPs (from Plink interator) and compress them
- #print("Starting adding loop")
- for part, result in enumerate(results):
- #data = pickle.dumps(result, pickle.HIGHEST_PROTOCOL)
- holder = pickle.dumps(dict(
- identifier = identifier,
- part = part,
- timestamp = timestamp,
- result = result
- ), pickle.HIGHEST_PROTOCOL)
-
- #print("Adding:", part)
- Redis.rpush(key, zlib.compress(holder))
- #print("End adding loop")
- #print("***** Added to {} queue *****".format(key))
- for snp, this_id in plink_input:
- #with Bench("part before association"):
- #if count > 1000:
- # break
- count += 1
- progress("human",count,total_snps)
-
- #with Bench("actual association"):
- ps, ts = human_association(snp,
- n,
- keep,
- lmm_ob,
- pheno_vector,
- covariate_matrix,
- kinship_matrix,
- refit)
-
- #with Bench("after association"):
- p_values.append(ps)
- t_stats.append(ts)
-
- return p_values, t_stats
-
-
-#class HumanAssociation(object):
-# def __init__(self):
-#
-
-def human_association(snp,
- n,
- keep,
- lmm_ob,
- pheno_vector,
- covariate_matrix,
- kinship_matrix,
- refit):
-
- x = snp[keep].reshape((n,1))
- #x[[1,50,100,200,3000],:] = np.nan
- v = np.isnan(x).reshape((-1,))
-
- # Check SNPs for missing values
- if v.sum():
- keeps = True - v
- xs = x[keeps,:]
- # If no variation at this snp or all genotypes missing
- if keeps.sum() <= 1 or xs.var() <= 1e-6:
- return np.nan, np.nan
- #p_values.append(np.nan)
- #t_stats.append(np.nan)
- #continue
-
- # Its ok to center the genotype - I used options.normalizeGenotype to
- # force the removal of missing genotypes as opposed to replacing them with MAF.
-
- #if not options.normalizeGenotype:
- # xs = (xs - xs.mean()) / np.sqrt(xs.var())
-
- filtered_pheno = pheno_vector[keeps]
- filtered_covariate_matrix = covariate_matrix[keeps,:]
-
- print("kinship_matrix shape is: ", pf(kinship_matrix.shape))
- print("keeps is: ", pf(keeps.shape))
- filtered_kinship_matrix = kinship_matrix[keeps,:][:,keeps]
- filtered_lmm_ob = lmm.LMM(filtered_pheno,filtered_kinship_matrix,X0=filtered_covariate_matrix)
- if refit:
- filtered_lmm_ob.fit(X=xs)
- else:
- #try:
- filtered_lmm_ob.fit()
- #except: pdb.set_trace()
- ts,ps,beta,betaVar = Ls.association(xs,returnBeta=True)
- else:
- if x.var() == 0:
- return np.nan, np.nan
- #p_values.append(np.nan)
- #t_stats.append(np.nan)
- #continue
- if refit:
- lmm_ob.fit(X=x)
- ts, ps, beta, betaVar = lmm_ob.association(x)
- return ps, ts
-
-
-#def run(pheno_vector,
-# genotype_matrix,
-# restricted_max_likelihood=True,
-# refit=False,
-# temp_data=None):
-
-def run_other_old(pheno_vector,
- genotype_matrix,
- restricted_max_likelihood=True,
- refit=False):
-
- """Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics
-
- restricted_max_likelihood -- whether to use restricted max likelihood; True or False
- refit -- whether to refit the variance component for each marker
-
- """
-
- print("Running the original LMM engine in run_other (old)")
- print("REML=",restricted_max_likelihood," REFIT=",refit)
- with Bench("Calculate Kinship"):
- kinship_matrix,genotype_matrix = calculate_kinship_new(genotype_matrix)
-
- print("kinship_matrix: ", pf(kinship_matrix))
- print("kinship_matrix.shape: ", pf(kinship_matrix.shape))
-
- # with Bench("Create LMM object"):
- # lmm_ob = LMM(pheno_vector, kinship_matrix)
-
- # with Bench("LMM_ob fitting"):
- # lmm_ob.fit()
-
- print("run_other_old genotype_matrix: ", genotype_matrix.shape)
- print(genotype_matrix)
-
- with Bench("Doing GWAS"):
- t_stats, p_values = GWAS(pheno_vector,
- genotype_matrix.T,
- kinship_matrix,
- restricted_max_likelihood=True,
- refit=False)
- Bench().report()
- return p_values, t_stats
-
-def run_other_new(n,m,pheno_vector,
- geno,
- restricted_max_likelihood=True,
- refit=False):
-
- """Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics
-
- restricted_max_likelihood -- whether to use restricted max likelihood; True or False
- refit -- whether to refit the variance component for each marker
-
- """
-
- print("Running the new LMM2 engine in run_other_new")
- print("REML=",restricted_max_likelihood," REFIT=",refit)
-
- # Adjust phenotypes
- n,Y,keep = phenotype.remove_missing_new(n,pheno_vector)
-
- # if options.maf_normalization:
- # G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g )
- # print "MAF replacements: \n",G
- # if not options.skip_genotype_normalization:
- # G = np.apply_along_axis( genotype.normalize, axis=1, arr=G)
-
- geno = geno[:,keep]
- with Bench("Calculate Kinship"):
- K,G = calculate_kinship_new(geno)
-
- print("kinship_matrix: ", pf(K))
- print("kinship_matrix.shape: ", pf(K.shape))
-
- # with Bench("Create LMM object"):
- # lmm_ob = lmm2.LMM2(Y,K)
- # with Bench("LMM_ob fitting"):
- # lmm_ob.fit()
-
- print("run_other_new genotype_matrix: ", G.shape)
- print(G)
-
- with Bench("Doing GWAS"):
- t_stats, p_values = gwas.gwas(Y,
- G,
- K,
- restricted_max_likelihood=True,
- refit=False,verbose=True)
- Bench().report()
- return p_values, t_stats
-
-# def matrixMult(A,B):
-# return np.dot(A,B)
-
-def matrixMult(A,B):
-
- # If there is no fblas then we will revert to np.dot()
-
- try:
- linalg.fblas
- except AttributeError:
- return np.dot(A,B)
-
- #print("A is:", pf(A.shape))
- #print("B is:", pf(B.shape))
-
- # If the matrices are in Fortran order then the computations will be faster
- # when using dgemm. Otherwise, the function will copy the matrix and that takes time.
- if not A.flags['F_CONTIGUOUS']:
- AA = A.T
- transA = True
- else:
- AA = A
- transA = False
-
- if not B.flags['F_CONTIGUOUS']:
- BB = B.T
- transB = True
- else:
- BB = B
- transB = False
-
- return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB)
-
-def calculate_kinship_new(genotype_matrix):
- """
- Call the new kinship calculation where genotype_matrix contains
- inds (columns) by snps (rows).
- """
- assert type(genotype_matrix) is np.ndarray
- info("call genotype.normalize")
- G = np.apply_along_axis( genotype.normalize, axis=1, arr=genotype_matrix)
- mprint("G",genotype_matrix)
- info("call calculate_kinship_new")
- return kinship(G),G # G gets transposed, we'll turn this into an iterator (FIXME)
-
-def calculate_kinship_iter(geno):
- """
- Call the new kinship calculation where genotype_matrix contains
- inds (columns) by snps (rows).
- """
- assert type(genotype_matrix) is iter
- info("call genotype.normalize")
- G = np.apply_along_axis( genotype.normalize, axis=0, arr=genotype_matrix)
- info("call calculate_kinship_new")
- return kinship(G)
-
-def calculate_kinship_old(genotype_matrix):
- """
- genotype_matrix is an n x m matrix encoding SNP minor alleles.
-
- This function takes a matrix oF SNPs, imputes missing values with the maf,
- normalizes the resulting vectors and returns the RRM matrix.
-
- """
- info("call calculate_kinship_old")
- fatal("THE FUNCTION calculate_kinship_old IS OBSOLETE, use calculate_kinship_new instead - see Genotype Normalization Problem on Pjotr's blog")
- n = genotype_matrix.shape[0]
- m = genotype_matrix.shape[1]
- info("genotype 2D matrix n (inds) is: %d" % (n))
- info("genotype 2D matrix m (snps) is: %d" % (m))
- assert m>n, "n should be larger than m (snps>inds)"
- keep = []
- mprint("G (before old normalize)",genotype_matrix)
- for counter in range(m):
- #print("type of genotype_matrix[:,counter]:", pf(genotype_matrix[:,counter]))
- #Checks if any values in column are not numbers
- not_number = np.isnan(genotype_matrix[:,counter])
-
- #Gets vector of values for column (no values in vector if not all values in col are numbers)
- marker_values = genotype_matrix[True - not_number, counter]
- #print("marker_values is:", pf(marker_values))
-
- #Gets mean of values in vector
- values_mean = marker_values.mean()
-
- genotype_matrix[not_number,counter] = values_mean
- vr = genotype_matrix[:,counter].var()
- if vr == 0:
- continue
- keep.append(counter)
- genotype_matrix[:,counter] = (genotype_matrix[:,counter] - values_mean) / np.sqrt(vr)
- progress('kinship_old normalize genotype',counter,m)
-
- genotype_matrix = genotype_matrix[:,keep]
- mprint("G (after old normalize)",genotype_matrix.T)
- kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m)
- return kinship_matrix,genotype_matrix
- # return kinship_full(genotype_matrix.T),genotype_matrix
-
-def GWAS(pheno_vector,
- genotype_matrix,
- kinship_matrix,
- kinship_eigen_vals=None,
- kinship_eigen_vectors=None,
- covariate_matrix=None,
- restricted_max_likelihood=True,
- refit=False,
- temp_data=None):
- """
- Performs a basic GWAS scan using the LMM. This function
- uses the LMM module to assess association at each SNP and
- does some simple cleanup, such as removing missing individuals
- per SNP and re-computing the eigen-decomp
-
- pheno_vector - n x 1 phenotype vector
- genotype_matrix - n x m SNP matrix
- kinship_matrix - n x n kinship matrix
- kinship_eigen_vals, kinship_eigen_vectors = linalg.eigh(K) - or the eigen vectors and values for K
- covariate_matrix - n x q covariate matrix
- restricted_max_likelihood - use restricted maximum likelihood
- refit - refit the variance component for each SNP
-
- """
- if kinship_eigen_vals is None:
- kinship_eigen_vals = []
- if kinship_eigen_vectors is None:
- kinship_eigen_vectors = []
-
- n = genotype_matrix.shape[0]
- m = genotype_matrix.shape[1]
-
- if covariate_matrix == None:
- covariate_matrix = np.ones((n,1))
-
- # Remove missing values in pheno_vector and adjust associated parameters
- v = np.isnan(pheno_vector)
- if v.sum():
- keep = True - v
- print(pheno_vector.shape,pheno_vector)
- print(keep.shape,keep)
- pheno_vector = pheno_vector[keep]
- #genotype_matrix = genotype_matrix[keep,:]
- #covariate_matrix = covariate_matrix[keep,:]
- #kinship_matrix = kinship_matrix[keep,:][:,keep]
- kinship_eigen_vals = []
- kinship_eigen_vectors = []
-
- lmm_ob = LMM(pheno_vector,
- kinship_matrix,
- kinship_eigen_vals,
- kinship_eigen_vectors,
- covariate_matrix)
- if not refit:
- lmm_ob.fit()
-
- p_values = []
- t_statistics = []
-
- n = genotype_matrix.shape[0]
- m = genotype_matrix.shape[1]
-
- for counter in range(m):
- x = genotype_matrix[:,counter].reshape((n, 1))
- v = np.isnan(x).reshape((-1,))
- if v.sum():
- keep = True - v
- xs = x[keep,:]
- if xs.var() == 0:
- p_values.append(0)
- t_statistics.append(np.nan)
- continue
-
- print(genotype_matrix.shape,pheno_vector.shape,keep.shape)
-
- pheno_vector = pheno_vector[keep]
- covariate_matrix = covariate_matrix[keep,:]
- kinship_matrix = kinship_matrix[keep,:][:,keep]
- lmm_ob_2 = LMM(pheno_vector,
- kinship_matrix,
- X0=covariate_matrix)
- if refit:
- lmm_ob_2.fit(X=xs)
- else:
- lmm_ob_2.fit()
- ts, ps, beta, betaVar = lmm_ob_2.association(xs, REML=restricted_max_likelihood)
- else:
- if x.var() == 0:
- p_values.append(0)
- t_statistics.append(np.nan)
- continue
-
- if refit:
- lmm_ob.fit(X=x)
- ts, ps, beta, betaVar = lmm_ob.association(x, REML=restricted_max_likelihood)
-
- progress("gwas_old",counter,m)
-
- p_values.append(ps)
- t_statistics.append(ts)
-
- return t_statistics, p_values
-
-
-class LMM:
-
- """
- This is a simple version of EMMA/fastLMM.
- The main purpose of this module is to take a phenotype vector (Y), a set of covariates (X) and a kinship matrix (K)
- and to optimize this model by finding the maximum-likelihood estimates for the model parameters.
- There are three model parameters: heritability (h), covariate coefficients (beta) and the total
- phenotypic variance (sigma).
- Heritability as defined here is the proportion of the total variance (sigma) that is attributed to
- the kinship matrix.
-
- For simplicity, we assume that everything being input is a numpy array.
- If this is not the case, the module may throw an error as conversion from list to numpy array
- is not done consistently.
-
- """
- def __init__(self,Y,K,Kva=[],Kve=[],X0=None,verbose=True):
-
- """
- The constructor takes a phenotype vector or array of size n.
- It takes a kinship matrix of size n x n. Kva and Kve can be computed as Kva,Kve = linalg.eigh(K) and cached.
- If they are not provided, the constructor will calculate them.
- X0 is an optional covariate matrix of size n x q, where there are q covariates.
- When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect.
- """
-
- if X0 is None: X0 = np.ones(len(Y)).reshape(len(Y),1)
- self.verbose = verbose
-
- #x = Y != -9
- x = True - np.isnan(Y)
- #pdb.set_trace()
- if not x.sum() == len(Y):
- print("Removing %d missing values from Y\n" % ((True - x).sum()))
- if self.verbose: sys.stderr.write("Removing %d missing values from Y\n" % ((True - x).sum()))
- Y = Y[x]
- print("x: ", len(x))
- print("K: ", K.shape)
- #K = K[x,:][:,x]
- X0 = X0[x,:]
- Kva = []
- Kve = []
- self.nonmissing = x
-
- print("this K is:", K.shape, pf(K))
-
- if len(Kva) == 0 or len(Kve) == 0:
- # if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
- begin = time.time()
- # Kva,Kve = linalg.eigh(K)
- Kva,Kve = kvakve(K)
- end = time.time()
- if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin))
- print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve))
-
- self.K = K
- self.Kva = Kva
- self.Kve = Kve
- print("self.Kva is: ", self.Kva.shape, pf(self.Kva))
- print("self.Kve is: ", self.Kve.shape, pf(self.Kve))
- self.Y = Y
- self.X0 = X0
- self.N = self.K.shape[0]
-
- # ----> Below moved to kinship.kvakve(K)
- # if sum(self.Kva < 1e-6):
- # if self.verbose: sys.stderr.write("Cleaning %d eigen values\n" % (sum(self.Kva < 0)))
- # self.Kva[self.Kva < 1e-6] = 1e-6
-
- self.transform()
-
- def transform(self):
-
- """
- Computes a transformation on the phenotype vector and the covariate matrix.
- The transformation is obtained by left multiplying each parameter by the transpose of the
- eigenvector matrix of K (the kinship).
- """
-
- self.Yt = matrixMult(self.Kve.T, self.Y)
- self.X0t = matrixMult(self.Kve.T, self.X0)
- self.X0t_stack = np.hstack([self.X0t, np.ones((self.N,1))])
- self.q = self.X0t.shape[1]
-
- def getMLSoln(self,h,X):
-
- """
- Obtains the maximum-likelihood estimates for the covariate coefficients (beta),
- the total variance of the trait (sigma) and also passes intermediates that can
- be utilized in other functions. The input parameter h is a value between 0 and 1 and represents
- the heritability or the proportion of the total variance attributed to genetics. The X is the
- covariate matrix.
- """
-
- S = 1.0/(h*self.Kva + (1.0 - h))
- Xt = X.T*S
- XX = matrixMult(Xt,X)
- XX_i = linalg.inv(XX)
- beta = matrixMult(matrixMult(XX_i,Xt),self.Yt)
- Yt = self.Yt - matrixMult(X,beta)
- Q = np.dot(Yt.T*S,Yt)
- sigma = Q * 1.0 / (float(self.N) - float(X.shape[1]))
- return beta,sigma,Q,XX_i,XX
-
- def LL_brent(self,h,X=None,REML=False):
- #brent will not be bounded by the specified bracket.
- # I return a large number if we encounter h < 0 to avoid errors in LL computation during the search.
- if h < 0: return 1e6
- return -self.LL(h,X,stack=False,REML=REML)[0]
-
- def LL(self,h,X=None,stack=True,REML=False):
-
- """
- Computes the log-likelihood for a given heritability (h). If X==None, then the
- default X0t will be used. If X is set and stack=True, then X0t will be matrix concatenated with
- the input X. If stack is false, then X is used in place of X0t in the LL calculation.
- REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True.
- """
-
- if X is None:
- X = self.X0t
- elif stack:
- self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0]
- X = self.X0t_stack
-
- n = float(self.N)
- q = float(X.shape[1])
- beta,sigma,Q,XX_i,XX = self.getMLSoln(h,X)
- LL = n*np.log(2*np.pi) + np.log(h*self.Kva + (1.0-h)).sum() + n + n*np.log(1.0/n * Q)
- LL = -0.5 * LL
-
- if REML:
- LL_REML_part = q*np.log(2.0*np.pi*sigma) + np.log(linalg.det(matrixMult(X.T,X))) - np.log(linalg.det(XX))
- LL = LL + 0.5*LL_REML_part
-
- return LL,beta,sigma,XX_i
-
- def getMax(self,H, X=None,REML=False):
-
- """
- Helper functions for .fit(...).
- This function takes a set of LLs computed over a grid and finds possible regions
- containing a maximum. Within these regions, a Brent search is performed to find the
- optimum.
-
- """
- n = len(self.LLs)
- HOpt = []
- for i in range(1,n-2):
- if self.LLs[i-1] < self.LLs[i] and self.LLs[i] > self.LLs[i+1]:
- HOpt.append(optimize.brent(self.LL_brent,args=(X,REML),brack=(H[i-1],H[i+1])))
- if np.isnan(HOpt[-1][0]):
- HOpt[-1][0] = [self.LLs[i-1]]
-
- if len(HOpt) > 1:
- if self.verbose:
- sys.stderr.write("NOTE: Found multiple optima. Returning first...\n")
- return HOpt[0]
- elif len(HOpt) == 1:
- return HOpt[0]
- elif self.LLs[0] > self.LLs[n-1]:
- return H[0]
- else:
- return H[n-1]
-
- def fit(self,X=None,ngrids=100,REML=True):
-
- """
- Finds the maximum-likelihood solution for the heritability (h) given the current parameters.
- X can be passed and will transformed and concatenated to X0t. Otherwise, X0t is used as
- the covariate matrix.
-
- This function calculates the LLs over a grid and then uses .getMax(...) to find the optimum.
- Given this optimum, the function computes the LL and associated ML solutions.
- """
-
- if X == None:
- X = self.X0t
- else:
- #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)])
- self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0]
- X = self.X0t_stack
-
- H = np.array(range(ngrids)) / float(ngrids)
- L = np.array([self.LL(h,X,stack=False,REML=REML)[0] for h in H])
- self.LLs = L
-
- hmax = self.getMax(H,X,REML)
- L,beta,sigma,betaSTDERR = self.LL(hmax,X,stack=False,REML=REML)
-
- self.H = H
- self.optH = hmax
- self.optLL = L
- self.optBeta = beta
- self.optSigma = sigma
-
- return hmax,beta,sigma,L
-
- def association(self,X, h = None, stack=True,REML=True, returnBeta=True):
-
- """
- Calculates association statitics for the SNPs encoded in the vector X of size n.
- If h == None, the optimal h stored in optH is used.
-
- """
- if stack:
- #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)])
- self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0]
- X = self.X0t_stack
-
- if h == None:
- h = self.optH
-
- L,beta,sigma,betaVAR = self.LL(h,X,stack=False,REML=REML)
- q = len(beta)
- ts,ps = self.tstat(beta[q-1],betaVAR[q-1,q-1],sigma,q)
-
- if returnBeta:
- return ts,ps,beta[q-1].sum(),betaVAR[q-1,q-1].sum()*sigma
- return ts,ps
-
- def tstat(self,beta,var,sigma,q):
-
- """
- Calculates a t-statistic and associated p-value given the estimate of beta and its standard error.
- This is actually an F-test, but when only one hypothesis is being performed, it reduces to a t-test.
- """
-
- ts = beta / np.sqrt(var * sigma)
- ps = 2.0*(1.0 - stats.t.cdf(np.abs(ts), self.N-q))
- if not len(ts) == 1 or not len(ps) == 1:
- print("ts=",ts)
- print("ps=",ps)
- raise Exception("Something bad happened :(")
- return ts.sum(),ps.sum()
-
- def plotFit(self,color='b-',title=''):
-
- """
- Simple function to visualize the likelihood space. It takes the LLs
- calcualted over a grid and normalizes them by subtracting off the mean and exponentiating.
- The resulting "probabilities" are normalized to one and plotted against heritability.
- This can be seen as an approximation to the posterior distribuiton of heritability.
-
- For diagnostic purposes this lets you see if there is one distinct maximum or multiple
- and what the variance of the parameter looks like.
- """
- import matplotlib.pyplot as pl
-
- mx = self.LLs.max()
- p = np.exp(self.LLs - mx)
- p = p/p.sum()
-
- pl.plot(self.H,p,color)
- pl.xlabel("Heritability")
- pl.ylabel("Probability of data")
- pl.title(title)
-
-def run_gwas(species,n,m,k,y,geno,cov=None,reml=True,refit=False,inputfn=None,new_code=True):
- """
- Invoke pylmm using genotype as a matrix or as a (SNP) iterator.
- """
- info("run_gwas")
- print('pheno', y)
-
- if species == "human" :
- print('kinship', k )
- ps, ts = run_human(pheno_vector = y,
- covariate_matrix = cov,
- plink_input_file = inputfn,
- kinship_matrix = k,
- refit = refit)
- else:
- print('geno', geno.shape, geno)
-
- if new_code:
- ps, ts = run_other_new(n,m,pheno_vector = y,
- geno = geno,
- restricted_max_likelihood = reml,
- refit = refit)
- else:
- ps, ts = run_other_old(pheno_vector = y,
- genotype_matrix = geno,
- restricted_max_likelihood = reml,
- refit = refit)
- return ps,ts
-
-def gwas_with_redis(key,species,new_code=True):
- """
- Invoke pylmm using Redis as a container. new_code runs the new
- version. All the Redis code goes here!
- """
- json_params = Redis.get(key)
-
- params = json.loads(json_params)
-
- tempdata = temp_data.TempData(params['temp_uuid'])
- def update_tempdata(loc,i,total):
- """
- This is the single method that updates Redis for percentage complete!
- """
- tempdata.store("percent_complete",round(i*100.0/total))
- debug("Updating REDIS percent_complete=%d" % (round(i*100.0/total)))
- progress_set_func(update_tempdata)
-
- def narray(t):
- info("Type is "+t)
- v = params.get(t)
- if v is not None:
- # Note input values can be array of string or float
- v1 = [x if x != 'NA' else 'nan' for x in v]
- v = np.array(v1).astype(np.float)
- return v
-
- def marray(t):
- info("Type is "+t)
- v = params.get(t)
- if v is not None:
- m = []
- for r in v:
- # Note input values can be array of string or float
- r1 = [x if x != 'NA' else 'nan' for x in r]
- m.append(np.array(r1).astype(np.float))
- return np.array(m)
- return np.array(v)
-
- def marrayT(t):
- m = marray(t)
- if m is not None:
- return m.T
- return m
-
- # We are transposing before we enter run_gwas - this should happen on the webserver
- # side (or when reading data from file)
- k = marray('kinship_matrix')
- g = marrayT('genotype_matrix')
- mprint("geno",g)
- y = narray('pheno_vector')
- n = len(y)
- m = params.get('num_genotypes')
- if m is None:
- m = g.shape[0]
- info("m=%d,n=%d" % (m,n))
- ps,ts = run_gwas(species,n,m,k,y,g,narray('covariate_matrix'),params['restricted_max_likelihood'],params['refit'],params.get('input_file_name'),new_code)
-
- results_key = "pylmm:results:" + params['temp_uuid']
-
- # fatal(results_key)
- json_results = json.dumps(dict(p_values = ps,
- t_stats = ts))
-
- #Pushing json_results into a list where it is the only item because blpop needs a list
- Redis.rpush(results_key, json_results)
- Redis.expire(results_key, 60*60)
- return ps, ts
-
-def gn2_load_redis(key,species,kinship,pheno,geno,new_code=True):
- """
- This function emulates current GN2 behaviour by pre-loading Redis (note the input
- genotype is transposed to emulate GN2 (FIXME!)
- """
- info("Loading Redis from parsed data")
- if kinship == None:
- k = None
- else:
- k = kinship.tolist()
- params = dict(pheno_vector = pheno.tolist(),
- genotype_matrix = geno.T.tolist(),
- num_genotypes = geno.shape[0],
- kinship_matrix = k,
- covariate_matrix = None,
- input_file_name = None,
- restricted_max_likelihood = True,
- refit = False,
- temp_uuid = "testrun_temp_uuid",
-
- # meta data
- timestamp = datetime.datetime.now().isoformat())
-
- json_params = json.dumps(params)
- Redis.set(key, json_params)
- Redis.expire(key, 60*60)
-
- return gwas_with_redis(key,species,new_code)
-
-def gn2_load_redis_iter(key,species,kinship,pheno,geno_iterator):
- """
- This function emulates GN2 behaviour by pre-loading Redis with
- a SNP iterator, for this it sets a key for every genotype (SNP)
- """
- print("Loading Redis using a SNP iterator")
- for i,genotypes in enumerate(geno_iterator):
- gkey = key+'_geno_'+str(i)
- Redis.set(gkey, genotypes)
- Redis.expire(gkey, 60*60)
-
- if kinship == None:
- k = None
- else:
- k = kinship.tolist()
- params = dict(pheno_vector = pheno.tolist(),
- genotype_matrix = "iterator",
- num_genotypes = i,
- kinship_matrix = k,
- covariate_matrix = None,
- input_file_name = None,
- restricted_max_likelihood = True,
- refit = False,
- temp_uuid = "testrun_temp_uuid",
-
- # meta data
- timestamp = datetime.datetime.now().isoformat(),
- )
-
- json_params = json.dumps(params)
- Redis.set(key, json_params)
- Redis.expire(key, 60*60)
- return gwas_with_redis(key,species)
-
-# This is the main function used by Genenetwork2 (with environment)
-#
-# Note that this calling route will become OBSOLETE (we should use runlmm.py
-# instead)
-def gn2_main():
- import argparse
- parser = argparse.ArgumentParser(description='Run pyLMM')
- parser.add_argument('-k', '--key')
- parser.add_argument('-s', '--species')
-
- opts = parser.parse_args()
-
- key = opts.key
- species = opts.species
-
- gwas_with_redis(key,species)
-
-
-if __name__ == '__main__':
- print("WARNING: Calling pylmm from lmm.py will become OBSOLETE, use runlmm.py instead!")
- gn2_main()
-
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
deleted file mode 100644
index d871d8d2..00000000
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
+++ /dev/null
@@ -1,433 +0,0 @@
-# pylmm is a python-based linear mixed-model solver with applications to GWAS
-
-# Copyright (C) 2013,2014 Nicholas A. Furlotte (nick.furlotte@gmail.com)
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see .
-
-import sys
-import time
-import numpy as np
-from scipy.linalg import eigh, inv, det
-import scipy.stats as stats # t-tests
-from scipy import optimize
-from optmatrix import matrixMult
-import kinship
-
-sys.stderr.write("INFO: pylmm (lmm2) system path is "+":".join(sys.path)+"\n")
-sys.stderr.write("INFO: pylmm (lmm2) file is "+__file__+"\n")
-
-# ---- A trick to decide on the environment:
-try:
- sys.stderr.write("INFO: lmm2 try loading module\n")
- import utility.formatting # this is never used, just to check the environment
- sys.stderr.write("INFO: This is a genenetwork2 environment (lmm2)\n")
- from gn2 import uses, progress_set_func
-except ImportError:
- # Failed to load gn2
- has_gn2=False
- import standalone as handlers
- from standalone import uses, progress_set_func
- sys.stderr.write("WARNING: LMM2 standalone version missing the Genenetwork2 environment\n")
-
-progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal')
-
-
-def calculateKinship(W,center=False):
- """
- W is an n x m matrix encoding SNP minor alleles.
-
- This function takes a matrix oF SNPs, imputes missing values with the maf,
- normalizes the resulting vectors and returns the RRM matrix.
- """
- n = W.shape[0]
- m = W.shape[1]
- keep = []
- for i in range(m):
- mn = W[True - np.isnan(W[:,i]),i].mean()
- W[np.isnan(W[:,i]),i] = mn
- vr = W[:,i].var()
- if vr == 0: continue
-
- keep.append(i)
- W[:,i] = (W[:,i] - mn) / np.sqrt(vr)
-
- W = W[:,keep]
- K = matrixMult(W,W.T) * 1.0/float(m)
- if center:
- P = np.diag(np.repeat(1,n)) - 1/float(n) * np.ones((n,n))
- S = np.trace(matrixMult(matrixMult(P,K),P))
- K_n = (n - 1)*K / S
- return K_n
- return K
-
-def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False):
- """
-
- Performs a basic GWAS scan using the LMM. This function
- uses the LMM module to assess association at each SNP and
- does some simple cleanup, such as removing missing individuals
- per SNP and re-computing the eigen-decomp
-
- Y - n x 1 phenotype vector
- X - n x m SNP matrix (genotype matrix)
- K - n x n kinship matrix
- Kva,Kve = linalg.eigh(K) - or the eigen vectors and values for K
- X0 - n x q covariate matrix
- REML - use restricted maximum likelihood
- refit - refit the variance component for each SNP
-
- """
- n = X.shape[0]
- m = X.shape[1]
- prins("Initialize GWAS")
- print("genotype matrix n is:", n)
- print("genotype matrix m is:", m)
-
- if X0 is None:
- X0 = np.ones((n,1))
-
- # Remove missing values in Y and adjust associated parameters
- v = np.isnan(Y)
- if v.sum():
- keep = True - v
- keep = keep.reshape((-1,))
- Y = Y[keep]
- X = X[keep,:]
- X0 = X0[keep,:]
- K = K[keep,:][:,keep]
- Kva = []
- Kve = []
-
- if len(Y) == 0:
- return np.ones(m)*np.nan,np.ones(m)*np.nan
-
- L = LMM(Y,K,Kva,Kve,X0)
- if not refit: L.fit()
-
- PS = []
- TS = []
-
- n = X.shape[0]
- m = X.shape[1]
-
- for i in range(m):
- x = X[:,i].reshape((n,1))
- v = np.isnan(x).reshape((-1,))
- if v.sum():
- keep = True - v
- xs = x[keep,:]
- if xs.var() == 0:
- PS.append(np.nan)
- TS.append(np.nan)
- continue
-
- Ys = Y[keep]
- X0s = X0[keep,:]
- Ks = K[keep,:][:,keep]
- Ls = LMM(Ys,Ks,X0=X0s)
- if refit:
- Ls.fit(X=xs)
- else:
- Ls.fit()
- ts,ps = Ls.association(xs,REML=REML)
- else:
- if x.var() == 0:
- PS.append(np.nan)
- TS.append(np.nan)
- continue
-
- if refit:
- L.fit(X=x)
- ts,ps = L.association(x,REML=REML)
-
- PS.append(ps)
- TS.append(ts)
-
- return TS,PS
-
-class LMM2:
-
- """This is a simple version of EMMA/fastLMM.
-
- The main purpose of this module is to take a phenotype vector (Y),
- a set of covariates (X) and a kinship matrix (K) and to optimize
- this model by finding the maximum-likelihood estimates for the
- model parameters. There are three model parameters: heritability
- (h), covariate coefficients (beta) and the total phenotypic
- variance (sigma). Heritability as defined here is the proportion
- of the total variance (sigma) that is attributed to the kinship
- matrix.
-
- For simplicity, we assume that everything being input is a numpy
- array. If this is not the case, the module may throw an error as
- conversion from list to numpy array is not done consistently.
-
- """
- def __init__(self,Y,K,Kva=[],Kve=[],X0=None,verbose=False):
-
- """The constructor takes a phenotype vector or array Y of size n. It
- takes a kinship matrix K of size n x n. Kva and Kve can be
- computed as Kva,Kve = linalg.eigh(K) and cached. If they are
- not provided, the constructor will calculate them. X0 is an
- optional covariate matrix of size n x q, where there are q
- covariates. When this parameter is not provided, the
- constructor will set X0 to an n x 1 matrix of all ones to
- represent a mean effect.
- """
-
- if X0 is None:
- X0 = np.ones(len(Y)).reshape(len(Y),1)
- self.verbose = verbose
-
- x = True - np.isnan(Y)
- x = x.reshape(-1,)
- if not x.sum() == len(Y):
- if self.verbose: sys.stderr.write("Removing %d missing values from Y\n" % ((True - x).sum()))
- Y = Y[x]
- K = K[x,:][:,x]
- X0 = X0[x,:]
- Kva = []
- Kve = []
- self.nonmissing = x
-
- print("this K is:", K.shape, K)
-
- if len(Kva) == 0 or len(Kve) == 0:
- # if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
- begin = time.time()
- # Kva,Kve = linalg.eigh(K)
- Kva,Kve = kinship.kvakve(K)
- end = time.time()
- if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin))
- print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve))
-
- self.K = K
- self.Kva = Kva
- self.Kve = Kve
- self.N = self.K.shape[0]
- self.Y = Y.reshape((self.N,1))
- self.X0 = X0
-
- if sum(self.Kva < 1e-6):
- if self.verbose: sys.stderr.write("Cleaning %d eigen values\n" % (sum(self.Kva < 0)))
- self.Kva[self.Kva < 1e-6] = 1e-6
-
- self.transform()
-
- def transform(self):
-
- """
- Computes a transformation on the phenotype vector and the covariate matrix.
- The transformation is obtained by left multiplying each parameter by the transpose of the
- eigenvector matrix of K (the kinship).
- """
-
- self.Yt = matrixMult(self.Kve.T, self.Y)
- self.X0t = matrixMult(self.Kve.T, self.X0)
- self.X0t_stack = np.hstack([self.X0t, np.ones((self.N,1))])
- self.q = self.X0t.shape[1]
-
- def getMLSoln(self,h,X):
-
- """
- Obtains the maximum-likelihood estimates for the covariate coefficients (beta),
- the total variance of the trait (sigma) and also passes intermediates that can
- be utilized in other functions. The input parameter h is a value between 0 and 1 and represents
- the heritability or the proportion of the total variance attributed to genetics. The X is the
- covariate matrix.
- """
-
- S = 1.0/(h*self.Kva + (1.0 - h))
- Xt = X.T*S
- XX = matrixMult(Xt,X)
- XX_i = inv(XX)
- beta = matrixMult(matrixMult(XX_i,Xt),self.Yt)
- Yt = self.Yt - matrixMult(X,beta)
- Q = np.dot(Yt.T*S,Yt)
- sigma = Q * 1.0 / (float(self.N) - float(X.shape[1]))
- return beta,sigma,Q,XX_i,XX
-
- def LL_brent(self,h,X=None,REML=False):
- #brent will not be bounded by the specified bracket.
- # I return a large number if we encounter h < 0 to avoid errors in LL computation during the search.
- if h < 0: return 1e6
- return -self.LL(h,X,stack=False,REML=REML)[0]
-
- def LL(self,h,X=None,stack=True,REML=False):
-
- """
- Computes the log-likelihood for a given heritability (h). If X==None, then the
- default X0t will be used. If X is set and stack=True, then X0t will be matrix concatenated with
- the input X. If stack is false, then X is used in place of X0t in the LL calculation.
- REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True.
- """
-
- if X is None: X = self.X0t
- elif stack:
- self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0]
- X = self.X0t_stack
-
- n = float(self.N)
- q = float(X.shape[1])
- beta,sigma,Q,XX_i,XX = self.getMLSoln(h,X)
- LL = n*np.log(2*np.pi) + np.log(h*self.Kva + (1.0-h)).sum() + n + n*np.log(1.0/n * Q)
- LL = -0.5 * LL
-
- if REML:
- LL_REML_part = q*np.log(2.0*np.pi*sigma) + np.log(det(matrixMult(X.T,X))) - np.log(det(XX))
- LL = LL + 0.5*LL_REML_part
-
-
- LL = LL.sum()
- return LL,beta,sigma,XX_i
-
- def getMax(self,H, X=None,REML=False):
-
- """
- Helper functions for .fit(...).
- This function takes a set of LLs computed over a grid and finds possible regions
- containing a maximum. Within these regions, a Brent search is performed to find the
- optimum.
-
- """
- n = len(self.LLs)
- HOpt = []
- for i in range(1,n-2):
- if self.LLs[i-1] < self.LLs[i] and self.LLs[i] > self.LLs[i+1]:
- HOpt.append(optimize.brent(self.LL_brent,args=(X,REML),brack=(H[i-1],H[i+1])))
- if np.isnan(HOpt[-1]): HOpt[-1] = H[i-1]
- #if np.isnan(HOpt[-1]): HOpt[-1] = self.LLs[i-1]
- #if np.isnan(HOpt[-1][0]): HOpt[-1][0] = [self.LLs[i-1]]
-
- if len(HOpt) > 1:
- if self.verbose: sys.stderr.write("NOTE: Found multiple optima. Returning first...\n")
- return HOpt[0]
- elif len(HOpt) == 1: return HOpt[0]
- elif self.LLs[0] > self.LLs[n-1]: return H[0]
- else: return H[n-1]
-
-
- def fit(self,X=None,ngrids=100,REML=True):
-
- """
- Finds the maximum-likelihood solution for the heritability (h) given the current parameters.
- X can be passed and will transformed and concatenated to X0t. Otherwise, X0t is used as
- the covariate matrix.
-
- This function calculates the LLs over a grid and then uses .getMax(...) to find the optimum.
- Given this optimum, the function computes the LL and associated ML solutions.
- """
-
- if X is None: X = self.X0t
- else:
- #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)])
- self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0]
- X = self.X0t_stack
-
- H = np.array(range(ngrids)) / float(ngrids)
- L = np.array([self.LL(h,X,stack=False,REML=REML)[0] for h in H])
- self.LLs = L
-
- hmax = self.getMax(H,X,REML)
- L,beta,sigma,betaSTDERR = self.LL(hmax,X,stack=False,REML=REML)
-
- self.H = H
- self.optH = hmax.sum()
- self.optLL = L
- self.optBeta = beta
- self.optSigma = sigma.sum()
-
- return hmax,beta,sigma,L
-
- def association(self,X,h=None,stack=True,REML=True,returnBeta=False):
- """
- Calculates association statitics for the SNPs encoded in the vector X of size n.
- If h is None, the optimal h stored in optH is used.
-
- """
- if False:
- print "X=",X
- print "h=",h
- print "q=",self.q
- print "self.Kve=",self.Kve
- print "X0t_stack=",self.X0t_stack.shape,self.X0t_stack
-
- if stack:
- # X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)])
- m = matrixMult(self.Kve.T,X)
- # print "m=",m
- m = m[:,0]
- self.X0t_stack[:,(self.q)] = m
- X = self.X0t_stack
-
- if h is None: h = self.optH
-
- L,beta,sigma,betaVAR = self.LL(h,X,stack=False,REML=REML)
- q = len(beta)
- ts,ps = self.tstat(beta[q-1],betaVAR[q-1,q-1],sigma,q)
-
- if returnBeta: return ts,ps,beta[q-1].sum(),betaVAR[q-1,q-1].sum()*sigma
- return ts,ps
-
- def tstat(self,beta,var,sigma,q,log=False):
-
- """
- Calculates a t-statistic and associated p-value given the estimate of beta and its standard error.
- This is actually an F-test, but when only one hypothesis is being performed, it reduces to a t-test.
- """
-
- ts = beta / np.sqrt(var * sigma)
- #ps = 2.0*(1.0 - stats.t.cdf(np.abs(ts), self.N-q))
- # sf == survival function - this is more accurate -- could also use logsf if the precision is not good enough
- if log:
- ps = 2.0 + (stats.t.logsf(np.abs(ts), self.N-q))
- else:
- ps = 2.0*(stats.t.sf(np.abs(ts), self.N-q))
- if not len(ts) == 1 or not len(ps) == 1:
- raise Exception("Something bad happened :(")
- return ts.sum(),ps.sum()
-
- def plotFit(self,color='b-',title=''):
-
- """
- Simple function to visualize the likelihood space. It takes the LLs
- calcualted over a grid and normalizes them by subtracting off the mean and exponentiating.
- The resulting "probabilities" are normalized to one and plotted against heritability.
- This can be seen as an approximation to the posterior distribuiton of heritability.
-
- For diagnostic purposes this lets you see if there is one distinct maximum or multiple
- and what the variance of the parameter looks like.
- """
- import matplotlib.pyplot as pl
-
- mx = self.LLs.max()
- p = np.exp(self.LLs - mx)
- p = p/p.sum()
-
- pl.plot(self.H,p,color)
- pl.xlabel("Heritability")
- pl.ylabel("Probability of data")
- pl.title(title)
-
- def meanAndVar(self):
-
- mx = self.LLs.max()
- p = np.exp(self.LLs - mx)
- p = p/p.sum()
-
- mn = (self.H * p).sum()
- vx = ((self.H - mn)**2 * p).sum()
-
- return mn,vx
-
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py b/wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py
deleted file mode 100644
index 5c71db6a..00000000
--- a/wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import sys
-import time
-import numpy as np
-from numpy.distutils.system_info import get_info;
-from scipy import linalg
-from scipy import optimize
-from scipy import stats
-
-useNumpy = None
-hasBLAS = None
-
-def matrix_initialize(useBLAS=True):
- global useNumpy # module based variable
- if useBLAS and useNumpy == None:
- print get_info('blas_opt')
- try:
- linalg.fblas
- sys.stderr.write("INFO: using linalg.fblas\n")
- useNumpy = False
- hasBLAS = True
- except AttributeError:
- sys.stderr.write("WARNING: linalg.fblas not found, using numpy.dot instead!\n")
- useNumpy = True
- else:
- sys.stderr.write("INFO: using numpy.dot\n")
- useNumpy=True
-
-def matrixMult(A,B):
- global useNumpy # module based variable
-
- if useNumpy:
- return np.dot(A,B)
-
- # If the matrices are in Fortran order then the computations will be faster
- # when using dgemm. Otherwise, the function will copy the matrix and that takes time.
- if not A.flags['F_CONTIGUOUS']:
- AA = A.T
- transA = True
- else:
- AA = A
- transA = False
-
- if not B.flags['F_CONTIGUOUS']:
- BB = B.T
- transB = True
- else:
- BB = B
- transB = False
-
- return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB)
-
-def matrixMultT(M):
- # res = np.dot(W,W.T)
- # return linalg.fblas.dgemm(alpha=1.,a=M.T,b=M.T,trans_a=True,trans_b=False)
- return matrixMult(M,M.T)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
deleted file mode 100644
index 7b652515..00000000
--- a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# Phenotype routines
-
-# Copyright (C) 2013 Nicholas A. Furlotte (nick.furlotte@gmail.com)
-# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl)
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see .
-
-import sys
-import numpy as np
-
-# ---- A trick to decide on the environment:
-try:
- from wqflask.my_pylmm.pyLMM import chunks
- from gn2 import uses, progress_set_func
-except ImportError:
- has_gn2=False
- import standalone as handlers
- from standalone import uses, progress_set_func
-
-progress,debug,info,mprint = uses('progress','debug','info','mprint')
-
-def remove_missing(n,y,g):
- """
- Remove missing data from matrices, make sure the genotype data has
- individuals as rows
- """
- assert(y is not None)
- assert y.shape[0] == g.shape[0],"y (n) %d, g (n,m) %s" % (y.shape[0],g.shape)
-
- y1 = y
- g1 = g
- v = np.isnan(y)
- keep = True - v
- if v.sum():
- info("runlmm.py: Cleaning the phenotype vector and genotype matrix by removing %d individuals...\n" % (v.sum()))
- y1 = y[keep]
- g1 = g[keep,:]
- n = y1.shape[0]
- return n,y1,g1,keep
-
-def remove_missing_new(n,y):
- """
- Remove missing data. Returns new n,y,keep
- """
- assert(y is not None)
- y1 = y
- v = np.isnan(y)
- keep = True - v
- if v.sum():
- info("runlmm.py: Cleaning the phenotype vector by removing %d individuals" % (v.sum()))
- y1 = y[keep]
- n = y1.shape[0]
- return n,y1,keep
-
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/plink.py b/wqflask/wqflask/my_pylmm/pyLMM/plink.py
deleted file mode 100644
index 7bd2df91..00000000
--- a/wqflask/wqflask/my_pylmm/pyLMM/plink.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# Plink module
-#
-# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl)
-# Some of the BED file parsing came from pylmm:
-# Copyright (C) 2013 Nicholas A. Furlotte (nick.furlotte@gmail.com)
-
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see .
-
-# According to the PLINK information
-
-# Parse a textual BIM file and return the contents as a list of tuples
-#
-# Extended variant information file accompanying a .bed binary genotype table.
-#
-# A text file with no header line, and one line per variant with the following six fields:
-#
-# Chromosome code (either an integer, or 'X'/'Y'/'XY'/'MT'; '0' indicates unknown) or name
-# Variant identifier
-# Position in morgans or centimorgans (safe to use dummy value of '0')
-# Base-pair coordinate (normally 1-based, but 0 ok; limited to 231-2)
-# Allele 1 (corresponding to clear bits in .bed; usually minor)
-# Allele 2 (corresponding to set bits in .bed; usually major)
-#
-# Allele codes can contain more than one character. Variants with negative bp coordinates are ignored by PLINK. Example
-#
-# 1 mm37-1-3125499 0 3125499 1 2
-# 1 mm37-1-3125701 0 3125701 1 2
-# 1 mm37-1-3187481 0 3187481 1 2
-
-import struct
-# import numpy as np
-
-def readbim(fn):
- res = []
- for line in open(fn):
- list = line.split()
- if len([True for e in list if e == 'nan']) == 0:
- res.append( (list[0],list[1],int(list[2]),int(list[3]),int(list[4]),int(list[5])) )
- else:
- res.append( (list[0],list[1],list[2],float('nan'),float('nan'),float('nan')) )
- return res
-
-# .bed (PLINK binary biallelic genotype table)
-#
-# Primary representation of genotype calls at biallelic variants. Must
-# be accompanied by .bim and .fam files. Basically contains num SNP
-# blocks containing IND (compressed 4 IND into a byte)
-#
-# Since it is a biallelic format it supports for every individual
-# whether the first allele is homozygous (b00), the second allele is
-# homozygous (b11), it is heterozygous (b10) or that it is missing
-# (b01).
-
-# http://pngu.mgh.harvard.edu/~purcell/plink2/formats.html#bed
-# http://pngu.mgh.harvard.edu/~purcell/plink2/formats.html#fam
-# http://pngu.mgh.harvard.edu/~purcell/plink2/formats.html#bim
-
-def readbed(fn,inds,encoding,func=None):
-
- # For every SNP block fetch the individual genotypes using values
- # 0.0 and 1.0 for homozygous and 0.5 for heterozygous alleles
- def fetchGenotypes(X):
- # D = { \
- # '00': 0.0, \
- # '10': 0.5, \
- # '11': 1.0, \
- # '01': float('nan') \
- # }
-
- Didx = { '00': 0, '10': 1, '11': 2, '01': 3 }
- G = []
- for x in X:
- if not len(x) == 10:
- xx = x[2:]
- x = '0b' + '0'*(8 - len(xx)) + xx
- a,b,c,d = (x[8:],x[6:8],x[4:6],x[2:4])
- L = [encoding[Didx[y]] for y in [a,b,c,d]]
- G += L
- G = G[:inds]
- # G = np.array(G)
- return G
-
- bytes = inds / 4 + (inds % 4 and 1 or 0)
- format = 'c'*bytes
- count = 0
- with open(fn,'rb') as f:
- magic = f.read(3)
- assert( ":".join("{:02x}".format(ord(c)) for c in magic) == "6c:1b:01")
- while True:
- count += 1
- X = f.read(bytes)
- if not X:
- return(count-1)
- XX = [bin(ord(x)) for x in struct.unpack(format,X)]
- xs = fetchGenotypes(XX)
- func(count,xs)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
deleted file mode 100644
index 6b241cd6..00000000
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ /dev/null
@@ -1,229 +0,0 @@
-# This is the LMM runner that calls the possible methods using command line
-# switches. It acts as a multiplexer where all the invocation complexity
-# is kept outside the main LMM routines.
-#
-# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl)
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see .
-
-from optparse import OptionParser
-import sys
-import tsvreader
-import numpy as np
-
-# Add local dir to PYTHONPATH
-import os
-cwd = os.path.dirname(__file__)
-if sys.path[0] != cwd:
- sys.path.insert(1,cwd)
-
-# pylmm modules
-from lmm import gn2_load_redis, gn2_load_redis_iter, calculate_kinship_new, run_gwas
-from kinship import kinship, kinship_full
-import genotype
-import phenotype
-from standalone import uses
-
-progress,mprint,debug,info,fatal = uses('progress','mprint','debug','info','fatal')
-
-usage = """
-python runlmm.py [options] command
-
- runlmm.py processing multiplexer reads standardised input formats
- and calls the different routines (writes to stdout)
-
- Current commands are:
-
- parse : only parse input files
- redis : use Redis to call into GN2
- kinship : calculate (new) kinship matrix
-
- try --help for more information
-"""
-
-
-parser = OptionParser(usage=usage)
-# parser.add_option("-f", "--file", dest="input file",
-# help="In", metavar="FILE")
-parser.add_option("--kinship",dest="kinship",
- help="Kinship file format 1.0")
-parser.add_option("--pheno",dest="pheno",
- help="Phenotype file format 1.0")
-parser.add_option("--geno",dest="geno",
- help="Genotype file format 1.0")
-parser.add_option("--maf-normalization",
- action="store_true", dest="maf_normalization", default=False,
- help="Apply MAF genotype normalization")
-parser.add_option("--genotype-normalization",
- action="store_true", dest="genotype_normalization", default=False,
- help="Force genotype normalization")
-parser.add_option("--remove-missing-phenotypes",
- action="store_true", dest="remove_missing_phenotypes", default=False,
- help="Remove missing phenotypes")
-parser.add_option("-q", "--quiet",
- action="store_false", dest="verbose", default=True,
- help="don't print status messages to stdout")
-parser.add_option("--blas", action="store_true", default=False, dest="useBLAS", help="Use BLAS instead of numpy matrix multiplication")
-parser.add_option("-t", "--threads",
- type="int", dest="numThreads",
- help="Threads to use")
-parser.add_option("--saveKvaKve",
- action="store_true", dest="saveKvaKve", default=False,
- help="Testing mode")
-parser.add_option("--test",
- action="store_true", dest="testing", default=False,
- help="Testing mode")
-parser.add_option("--test-kinship",
- action="store_true", dest="test_kinship", default=False,
- help="Testing mode for Kinship calculation")
-
-(options, args) = parser.parse_args()
-
-if len(args) != 1:
- print usage
- sys.exit(1)
-
-cmd = args[0]
-print "Command: ",cmd
-
-k = None
-y = None
-g = None
-
-if options.kinship:
- k = tsvreader.kinship(options.kinship)
- print k.shape
-
-if options.pheno:
- y = tsvreader.pheno(options.pheno)
- print y.shape
-
-if options.geno and cmd != 'iterator':
- g = tsvreader.geno(options.geno)
- print g.shape
-
-def check_results(ps,ts):
- print np.array(ps)
- print len(ps),sum(ps)
- p1 = round(ps[0],4)
- p2 = round(ps[-1],4)
- if options.geno == 'data/small.geno':
- info("Validating results for "+options.geno)
- assert p1==0.7387, "p1=%f" % p1
- assert p2==0.7387, "p2=%f" % p2
- if options.geno == 'data/small_na.geno':
- info("Validating results for "+options.geno)
- assert p1==0.062, "p1=%f" % p1
- assert p2==0.062, "p2=%f" % p2
- if options.geno == 'data/test8000.geno':
- info("Validating results for "+options.geno)
- assert round(sum(ps)) == 4070
- assert len(ps) == 8000
- info("Run completed")
-
-if y is not None:
- n = y.shape[0]
-
-if cmd == 'run':
- if options.remove_missing_phenotypes:
- raise Exception('Can not use --remove-missing-phenotypes with LMM2')
- n = len(y)
- m = g.shape[1]
- ps, ts = run_gwas('other',n,m,k,y,g) # <--- pass in geno by SNP
- check_results(ps,ts)
-elif cmd == 'iterator':
- if options.remove_missing_phenotypes:
- raise Exception('Can not use --remove-missing-phenotypes with LMM2')
- geno_iterator = tsvreader.geno_iter(options.geno)
- ps, ts = gn2_load_redis_iter('testrun_iter','other',k,y,geno_iterator)
- check_results(ps,ts)
-elif cmd == 'redis_new':
- # The main difference between redis_new and redis is that missing
- # phenotypes are handled by the first
- if options.remove_missing_phenotypes:
- raise Exception('Can not use --remove-missing-phenotypes with LMM2')
- Y = y
- G = g
- print "Original G",G.shape, "\n", G
- # gt = G.T
- # G = None
- ps, ts = gn2_load_redis('testrun','other',k,Y,G,new_code=True)
- check_results(ps,ts)
-elif cmd == 'redis':
- # Emulating the redis setup of GN2
- G = g
- print "Original G",G.shape, "\n", G
- if y is not None and options.remove_missing_phenotypes:
- gnt = np.array(g).T
- n,Y,g,keep = phenotype.remove_missing(n,y,gnt)
- G = g.T
- print "Removed missing phenotypes",G.shape, "\n", G
- else:
- Y = y
- if options.maf_normalization:
- G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g )
- print "MAF replacements: \n",G
- if options.genotype_normalization:
- G = np.apply_along_axis( genotype.normalize, axis=1, arr=G)
- g = None
- gnt = None
-
- # gt = G.T
- # G = None
- ps, ts = gn2_load_redis('testrun','other',k,Y,G, new_code=False)
- check_results(ps,ts)
-elif cmd == 'kinship':
- G = g
- print "Original G",G.shape, "\n", G
- if y != None and options.remove_missing_phenotypes:
- gnt = np.array(g).T
- n,Y,g,keep = phenotype.remove_missing(n,y,g.T)
- G = g.T
- print "Removed missing phenotypes",G.shape, "\n", G
- if options.maf_normalization:
- G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g )
- print "MAF replacements: \n",G
- if options.genotype_normalization:
- G = np.apply_along_axis( genotype.normalize, axis=1, arr=G)
- g = None
- gnt = None
-
- if options.test_kinship:
- K = kinship_full(np.copy(G))
- print "Genotype",G.shape, "\n", G
- print "first Kinship method",K.shape,"\n",K
- k1 = round(K[0][0],4)
- K2,G = calculate_kinship_new(np.copy(G))
- print "Genotype",G.shape, "\n", G
- print "GN2 Kinship method",K2.shape,"\n",K2
- k2 = round(K2[0][0],4)
-
- print "Genotype",G.shape, "\n", G
- K3 = kinship(G)
- print "third Kinship method",K3.shape,"\n",K3
- sys.stderr.write(options.geno+"\n")
- k3 = round(K3[0][0],4)
- if options.geno == 'data/small.geno':
- assert k1==0.8333, "k1=%f" % k1
- assert k2==0.9375, "k2=%f" % k2
- assert k3==0.9375, "k3=%f" % k3
- if options.geno == 'data/small_na.geno':
- assert k1==0.8333, "k1=%f" % k1
- assert k2==0.7172, "k2=%f" % k2
- assert k3==0.7172, "k3=%f" % k3
- if options.geno == 'data/test8000.geno':
- assert k3==1.4352, "k3=%f" % k3
-
-else:
- fatal("Doing nothing")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py b/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
deleted file mode 100644
index 40b2021d..00000000
--- a/wqflask/wqflask/my_pylmm/pyLMM/standalone.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Standalone specific methods and callback handler
-#
-# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl)
-#
-# Set the log level with
-#
-# logging.basicConfig(level=logging.DEBUG)
-
-from __future__ import absolute_import, print_function, division
-
-import numpy as np
-import sys
-import logging
-
-# logger = logging.getLogger(__name__)
-logger = logging.getLogger('lmm2')
-logging.basicConfig(level=logging.DEBUG)
-np.set_printoptions(precision=3,suppress=True)
-
-progress_location = None
-progress_current = None
-progress_prev_perc = None
-
-def progress_default_func(location,count,total):
- global progress_current
- value = round(count*100.0/total)
- progress_current = value
-
-progress_func = progress_default_func
-
-def progress_set_func(func):
- global progress_func
- progress_func = func
-
-def progress(location, count, total):
- global progress_location
- global progress_prev_perc
-
- perc = round(count*100.0/total)
- if perc != progress_prev_perc and (location != progress_location or perc > 98 or perc > progress_prev_perc + 5):
- progress_func(location, count, total)
- logger.info("Progress: %s %d%%" % (location,perc))
- progress_location = location
- progress_prev_perc = perc
-
-def mprint(msg,data):
- """
- Array/matrix print function
- """
- m = np.array(data)
- if m.ndim == 1:
- print(msg,m.shape,"=\n",m[0:3]," ... ",m[-3:])
- if m.ndim == 2:
- print(msg,m.shape,"=\n[",
- m[0][0:3]," ... ",m[0][-3:],"\n ",
- m[1][0:3]," ... ",m[1][-3:],"\n ...\n ",
- m[-2][0:3]," ... ",m[-2][-3:],"\n ",
- m[-1][0:3]," ... ",m[-1][-3:],"]")
-
-def fatal(msg):
- logger.critical(msg)
- raise Exception(msg)
-
-def callbacks():
- return dict(
- write = sys.stdout.write,
- writeln = print,
- debug = logger.debug,
- info = logger.info,
- warning = logger.warning,
- error = logger.error,
- critical = logger.critical,
- fatal = fatal,
- progress = progress,
- mprint = mprint
- )
-
-def uses(*funcs):
- """
- Some sugar
- """
- return [callbacks()[func] for func in funcs]
-
-# ----- Minor test cases:
-
-if __name__ == '__main__':
- # logging.basicConfig(level=logging.DEBUG)
- logging.debug("Test %i" % (1))
- d = callbacks()['debug']
- d("TEST")
- wrln = callbacks()['writeln']
- wrln("Hello %i" % 34)
- progress = callbacks()['progress']
- progress("I am half way",50,100)
- list = [0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
- 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
- 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
- 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15,
- 0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]
- mprint("list",list)
- matrix = [[1,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
- [2,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
- [3,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
- [4,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
- [5,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15],
- [6,0.5,0.6096595 , -0.31559815, -0.52793285, 1.16573418e-15]]
- mprint("matrix",matrix)
- ix,dx = uses("info","debug")
- ix("ix")
- dx("dx")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/temp_data.py b/wqflask/wqflask/my_pylmm/pyLMM/temp_data.py
deleted file mode 100644
index 004d45c6..00000000
--- a/wqflask/wqflask/my_pylmm/pyLMM/temp_data.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from __future__ import print_function, division, absolute_import
-from redis import Redis
-
-import simplejson as json
-
-class TempData(object):
-
- def __init__(self, temp_uuid):
- self.temp_uuid = temp_uuid
- self.redis = Redis()
- self.key = "tempdata:{}".format(self.temp_uuid)
-
- def store(self, field, value):
- self.redis.hset(self.key, field, value)
- self.redis.expire(self.key, 60*15) # Expire in 15 minutes
-
- def get_all(self):
- return self.redis.hgetall(self.key)
-
-
-if __name__ == "__main__":
- redis = Redis()
- for key in redis.keys():
- for field in redis.hkeys(key):
- print("{}.{}={}".format(key, field, redis.hget(key, field)))
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py b/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
deleted file mode 100644
index 66b34ee2..00000000
--- a/wqflask/wqflask/my_pylmm/pyLMM/tsvreader.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# Standard file readers
-#
-# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl)
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see .
-
-import sys
-import os
-import numpy as np
-import csv
-
-def kinship(fn):
- K1 = []
- print fn
- with open(fn,'r') as tsvin:
- assert(tsvin.readline().strip() == "# Kinship format version 1.0")
- tsvin.readline()
- tsvin.readline()
- tsv = csv.reader(tsvin, delimiter='\t')
- for row in tsv:
- ns = np.genfromtxt(row[1:])
- K1.append(ns) # <--- slow
- K = np.array(K1)
- return K
-
-def pheno(fn):
- Y1 = []
- print fn
- with open(fn,'r') as tsvin:
- assert(tsvin.readline().strip() == "# Phenotype format version 1.0")
- tsvin.readline()
- tsvin.readline()
- tsvin.readline()
- tsv = csv.reader(tsvin, delimiter='\t')
- for row in tsv:
- ns = np.genfromtxt(row[1:])
- Y1.append(ns) # <--- slow
- Y = np.array(Y1)
- return Y
-
-def geno(fn):
- G1 = []
- hab_mapper = {'A':0,'H':1,'B':2,'-':3}
- pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ]
-
- print fn
- with open(fn,'r') as tsvin:
- line = tsvin.readline().strip()
- assert line == "# Genotype format version 1.0", line
- tsvin.readline()
- tsvin.readline()
- tsvin.readline()
- tsvin.readline()
- tsv = csv.reader(tsvin, delimiter='\t')
- for row in tsv:
- # print(row)
- id = row[0]
- gs = list(row[1])
- # print id,gs
- gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs]
- # print id,gs2
- # ns = np.genfromtxt(row[1:])
- G1.append(gs2) # <--- slow
- G = np.array(G1)
- return G
-
-def geno(fn):
- G1 = []
- for id,values in geno_iter(fn):
- G1.append(values) # <--- slow
- G = np.array(G1)
- return G
-
-def geno_callback(fn,func):
- hab_mapper = {'A':0,'H':1,'B':2,'-':3}
- pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ]
-
- print fn
- with open(fn,'r') as tsvin:
- assert(tsvin.readline().strip() == "# Genotype format version 1.0")
- tsvin.readline()
- tsvin.readline()
- tsvin.readline()
- tsvin.readline()
- tsv = csv.reader(tsvin, delimiter='\t')
- for row in tsv:
- id = row[0]
- gs = list(row[1])
- gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs]
- func(id,gs2)
-
-def geno_iter(fn):
- """
- Yield a tuple of snpid and values
- """
- hab_mapper = {'A':0,'H':1,'B':2,'-':3}
- pylmm_mapper = [ 0.0, 0.5, 1.0, float('nan') ]
-
- print fn
- with open(fn,'r') as tsvin:
- assert(tsvin.readline().strip() == "# Genotype format version 1.0")
- tsvin.readline()
- tsvin.readline()
- tsvin.readline()
- tsvin.readline()
- tsv = csv.reader(tsvin, delimiter='\t')
- for row in tsv:
- id = row[0]
- gs = list(row[1])
- gs2 = [pylmm_mapper[hab_mapper[g]] for g in gs]
- yield (id,gs2)
--
cgit v1.2.3
From eef63adae30c1547f4c4189eb59a18d190c3aa08 Mon Sep 17 00:00:00 2001
From: pjotrp
Date: Mon, 11 May 2015 17:03:42 -0500
Subject: Moving pylmm out of the tree
---
wqflask/base/data_set.py | 2 +-
wqflask/utility/chunks.py | 96 ++++
wqflask/wqflask/heatmap/heatmap.py | 635 +++++++++++----------
.../wqflask/marker_regression/marker_regression.py | 4 +-
4 files changed, 417 insertions(+), 320 deletions(-)
create mode 100644 wqflask/utility/chunks.py
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 489bd374..9f805fc3 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -42,7 +42,7 @@ from base import species
from dbFunction import webqtlDatabaseFunction
from utility import webqtlUtil
from utility.benchmark import Bench
-from wqflask.my_pylmm.pyLMM import chunks
+from wqflask.utility import chunks
from maintenance import get_group_samplelists
diff --git a/wqflask/utility/chunks.py b/wqflask/utility/chunks.py
new file mode 100644
index 00000000..9565fb96
--- /dev/null
+++ b/wqflask/utility/chunks.py
@@ -0,0 +1,96 @@
+from __future__ import absolute_import, print_function, division
+
+import math
+import time
+
+
+def divide_into_chunks(the_list, number_chunks):
+ """Divides a list into approximately number_chunks smaller lists
+
+ >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 3)
+ [[1, 2, 7], [3, 22, 8], [5, 22, 333]]
+ >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 4)
+ [[1, 2, 7], [3, 22, 8], [5, 22, 333]]
+ >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 5)
+ [[1, 2], [7, 3], [22, 8], [5, 22], [333]]
+ >>>
+
+ """
+ length = len(the_list)
+
+ if length == 0:
+ return [[]]
+
+ if length <= number_chunks:
+ number_chunks = length
+
+ chunksize = int(math.ceil(length / number_chunks))
+
+ chunks = []
+ for counter in range(0, length, chunksize):
+ chunks.append(the_list[counter:counter+chunksize])
+
+ return chunks
+
+def _confirm_chunk(original, result):
+ all_chunked = []
+ for chunk in result:
+ all_chunked.extend(chunk)
+ print("length of all chunked:", len(all_chunked))
+ assert original == all_chunked, "You didn't chunk right"
+
+
+def _chunk_test(divide_func):
+ import random
+ random.seed(7)
+
+ number_exact = 0
+ total_amount_off = 0
+
+ for test in range(1, 1001):
+ print("\n\ntest:", test)
+ number_chunks = random.randint(1, 20)
+ number_elements = random.randint(0, 100)
+ the_list = list(range(1, number_elements))
+ result = divide_func(the_list, number_chunks)
+
+ print("Dividing list of length {} into approximately {} chunks - got {} chunks".format(
+ len(the_list), number_chunks, len(result)))
+ print("result:", result)
+
+ _confirm_chunk(the_list, result)
+
+ amount_off = abs(number_chunks - len(result))
+ if amount_off == 0:
+ number_exact += 1
+ else:
+ total_amount_off += amount_off
+
+
+ print("\n{} exact out of {} [Total amount off: {}]".format(number_exact,
+ test,
+ total_amount_off))
+ assert number_exact == 558
+ assert total_amount_off == 1580
+ return number_exact, total_amount_off
+
+
+def _main():
+ info = dict()
+ #funcs = (("sam", sam_divide_into_chunks), ("zach", zach_divide_into_chunks))
+ funcs = (("only one", divide_into_chunks),)
+ for name, func in funcs:
+ start = time.time()
+ number_exact, total_amount_off = _chunk_test(func)
+ took = time.time() - start
+ info[name] = dict(number_exact=number_exact,
+ total_amount_off=total_amount_off,
+ took=took)
+
+ print("info is:", info)
+
+if __name__ == '__main__':
+ _main()
+ print("\nConfirming doctests...")
+ import doctest
+ doctest.testmod()
diff --git a/wqflask/wqflask/heatmap/heatmap.py b/wqflask/wqflask/heatmap/heatmap.py
index 9b6b1b69..035736fd 100644
--- a/wqflask/wqflask/heatmap/heatmap.py
+++ b/wqflask/wqflask/heatmap/heatmap.py
@@ -1,317 +1,318 @@
-from __future__ import absolute_import, print_function, division
-
-import sys
-sys.path.append(".")
-
-import gc
-import string
-import cPickle
-import os
-import datetime
-import time
-import pp
-import math
-import collections
-import resource
-
-import scipy
-import numpy as np
-from scipy import linalg
-
-from pprint import pformat as pf
-
-from htmlgen import HTMLgen2 as HT
-import reaper
-
-from base.trait import GeneralTrait
-from base import data_set
-from base import species
-from base import webqtlConfig
-from utility import webqtlUtil
-from wqflask.my_pylmm.data import prep_data
-from wqflask.my_pylmm.pyLMM import lmm
-from wqflask.my_pylmm.pyLMM import input
-from utility import helper_functions
-from utility import Plot, Bunch
-from utility import temp_data
-
-from MySQLdb import escape_string as escape
-
-import cPickle as pickle
-import simplejson as json
-
-from pprint import pformat as pf
-
-from redis import Redis
-Redis = Redis()
-
-from flask import Flask, g
-
-class Heatmap(object):
-
- def __init__(self, start_vars, temp_uuid):
-
- trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')]
-
- helper_functions.get_trait_db_obs(self, trait_db_list)
-
- self.temp_uuid = temp_uuid
- self.num_permutations = 5000
- self.dataset = self.trait_list[0][1]
-
- self.json_data = {} #The dictionary that will be used to create the json object that contains all the data needed to create the figure
-
- self.all_sample_list = []
- self.traits = []
-
- chrnames = []
- self.species = species.TheSpecies(dataset=self.trait_list[0][1])
- for key in self.species.chromosomes.chromosomes.keys():
- chrnames.append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length])
-
- for trait_db in self.trait_list:
-
- this_trait = trait_db[0]
- self.traits.append(this_trait.name)
- this_sample_data = this_trait.data
-
- for sample in this_sample_data:
- if sample not in self.all_sample_list:
- self.all_sample_list.append(sample)
-
- self.sample_data = []
- for trait_db in self.trait_list:
- this_trait = trait_db[0]
- this_sample_data = this_trait.data
-
- #self.sample_data[this_trait.name] = []
- this_trait_vals = []
- for sample in self.all_sample_list:
- if sample in this_sample_data:
- this_trait_vals.append(this_sample_data[sample].value)
- #self.sample_data[this_trait.name].append(this_sample_data[sample].value)
- else:
- this_trait_vals.append('')
- #self.sample_data[this_trait.name].append('')
- self.sample_data.append(this_trait_vals)
-
- self.gen_reaper_results()
- #self.gen_pylmm_results()
-
- #chrnames = []
- lodnames = []
- chr_pos = []
- pos = []
- markernames = []
-
- for trait in self.trait_results.keys():
- lodnames.append(trait)
-
- for marker in self.dataset.group.markers.markers:
- #if marker['chr'] not in chrnames:
- # chr_ob = [marker['chr'], "filler"]
- # chrnames.append(chr_ob)
- chr_pos.append(marker['chr'])
- pos.append(marker['Mb'])
- markernames.append(marker['name'])
-
- self.json_data['chrnames'] = chrnames
- self.json_data['lodnames'] = lodnames
- self.json_data['chr'] = chr_pos
- self.json_data['pos'] = pos
- self.json_data['markernames'] = markernames
-
- for trait in self.trait_results:
- self.json_data[trait] = self.trait_results[trait]
-
- self.js_data = dict(
- json_data = self.json_data
- )
-
- print("self.js_data:", self.js_data)
-
-
- def gen_reaper_results(self):
- self.trait_results = {}
- for trait_db in self.trait_list:
- self.dataset.group.get_markers()
- this_trait = trait_db[0]
- #this_db = trait_db[1]
- genotype = self.dataset.group.read_genotype_file()
- samples, values, variances = this_trait.export_informative()
-
- trimmed_samples = []
- trimmed_values = []
- for i in range(0, len(samples)):
- if samples[i] in self.dataset.group.samplelist:
- trimmed_samples.append(samples[i])
- trimmed_values.append(values[i])
-
- self.lrs_array = genotype.permutation(strains = trimmed_samples,
- trait = trimmed_values,
- nperm= self.num_permutations)
-
- #self.suggestive = self.lrs_array[int(self.num_permutations*0.37-1)]
- #self.significant = self.lrs_array[int(self.num_permutations*0.95-1)]
-
- reaper_results = genotype.regression(strains = trimmed_samples,
- trait = trimmed_values)
-
-
- lrs_values = [float(qtl.lrs) for qtl in reaper_results]
- print("lrs_values:", lrs_values)
- #self.dataset.group.markers.add_pvalues(p_values)
-
- self.trait_results[this_trait.name] = []
- for qtl in reaper_results:
- if qtl.additive > 0:
- self.trait_results[this_trait.name].append(-float(qtl.lrs))
- else:
- self.trait_results[this_trait.name].append(float(qtl.lrs))
- #for lrs in lrs_values:
- # if
- # self.trait_results[this_trait.name].append(lrs)
-
-
- #this_db_samples = self.dataset.group.samplelist
- #this_sample_data = this_trait.data
- ##print("this_sample_data", this_sample_data)
- #this_trait_vals = []
- #for index, sample in enumerate(this_db_samples):
- # if sample in this_sample_data:
- # sample_value = this_sample_data[sample].value
- # this_trait_vals.append(sample_value)
- # else:
- # this_trait_vals.append("x")
-
- #pheno_vector = np.array([val == "x" and np.nan or float(val) for val in this_trait_vals])
-
- #key = "pylmm:input:" + str(self.temp_uuid)
- #print("key is:", pf(key))
-
- #genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers]
-
- #no_val_samples = self.identify_empty_samples(this_trait_vals)
- #trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
-
- #genotype_matrix = np.array(trimmed_genotype_data).T
-
- #print("genotype_matrix:", str(genotype_matrix.tolist()))
- #print("pheno_vector:", str(pheno_vector.tolist()))
-
- #params = dict(pheno_vector = pheno_vector.tolist(),
- # genotype_matrix = genotype_matrix.tolist(),
- # restricted_max_likelihood = True,
- # refit = False,
- # temp_uuid = str(self.temp_uuid),
- #
- # # meta data
- # timestamp = datetime.datetime.now().isoformat(),
- # )
- #
- #json_params = json.dumps(params)
- ##print("json_params:", json_params)
- #Redis.set(key, json_params)
- #Redis.expire(key, 60*60)
- #print("before printing command")
- #
- #command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key,
- # "other")
- #print("command is:", command)
- #print("after printing command")
- #
- #os.system(command)
- #
- #json_results = Redis.blpop("pylmm:results:" + str(self.temp_uuid), 45*60)
-
- def gen_pylmm_results(self):
- self.trait_results = {}
- for trait_db in self.trait_list:
- this_trait = trait_db[0]
- #this_db = trait_db[1]
- self.dataset.group.get_markers()
-
- this_db_samples = self.dataset.group.samplelist
- this_sample_data = this_trait.data
- #print("this_sample_data", this_sample_data)
- this_trait_vals = []
- for index, sample in enumerate(this_db_samples):
- if sample in this_sample_data:
- sample_value = this_sample_data[sample].value
- this_trait_vals.append(sample_value)
- else:
- this_trait_vals.append("x")
-
- pheno_vector = np.array([val == "x" and np.nan or float(val) for val in this_trait_vals])
-
- key = "pylmm:input:" + str(self.temp_uuid)
- #print("key is:", pf(key))
-
- genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers]
-
- no_val_samples = self.identify_empty_samples(this_trait_vals)
- trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
-
- genotype_matrix = np.array(trimmed_genotype_data).T
-
- #print("genotype_matrix:", str(genotype_matrix.tolist()))
- #print("pheno_vector:", str(pheno_vector.tolist()))
-
- params = dict(pheno_vector = pheno_vector.tolist(),
- genotype_matrix = genotype_matrix.tolist(),
- restricted_max_likelihood = True,
- refit = False,
- temp_uuid = str(self.temp_uuid),
-
- # meta data
- timestamp = datetime.datetime.now().isoformat(),
- )
-
- json_params = json.dumps(params)
- #print("json_params:", json_params)
- Redis.set(key, json_params)
- Redis.expire(key, 60*60)
- print("before printing command")
-
- command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key,
- "other")
- print("command is:", command)
- print("after printing command")
-
- os.system(command)
-
- json_results = Redis.blpop("pylmm:results:" + str(self.temp_uuid), 45*60)
- results = json.loads(json_results[1])
- p_values = [float(result) for result in results['p_values']]
- #print("p_values:", p_values)
- self.dataset.group.markers.add_pvalues(p_values)
-
- self.trait_results[this_trait.name] = []
- for marker in self.dataset.group.markers.markers:
- self.trait_results[this_trait.name].append(marker['lod_score'])
-
-
- def identify_empty_samples(self, values):
- no_val_samples = []
- for sample_count, val in enumerate(values):
- if val == "x":
- no_val_samples.append(sample_count)
- return no_val_samples
-
- def trim_genotypes(self, genotype_data, no_value_samples):
- trimmed_genotype_data = []
- for marker in genotype_data:
- new_genotypes = []
- for item_count, genotype in enumerate(marker):
- if item_count in no_value_samples:
- continue
- try:
- genotype = float(genotype)
- except ValueError:
- genotype = np.nan
- pass
- new_genotypes.append(genotype)
- trimmed_genotype_data.append(new_genotypes)
- return trimmed_genotype_data
-
-
\ No newline at end of file
+from __future__ import absolute_import, print_function, division
+
+import sys
+sys.path.append(".")
+
+import gc
+import string
+import cPickle
+import os
+import datetime
+import time
+import pp
+import math
+import collections
+import resource
+
+import scipy
+import numpy as np
+from scipy import linalg
+
+from pprint import pformat as pf
+
+from htmlgen import HTMLgen2 as HT
+import reaper
+
+from base.trait import GeneralTrait
+from base import data_set
+from base import species
+from base import webqtlConfig
+from utility import webqtlUtil
+from wqflask.my_pylmm.data import prep_data
+# from wqflask.my_pylmm.pyLMM import lmm
+# from wqflask.my_pylmm.pyLMM import input
+from utility import helper_functions
+from utility import Plot, Bunch
+from utility import temp_data
+
+from MySQLdb import escape_string as escape
+
+import cPickle as pickle
+import simplejson as json
+
+from pprint import pformat as pf
+
+from redis import Redis
+Redis = Redis()
+
+from flask import Flask, g
+
+class Heatmap(object):
+
+ def __init__(self, start_vars, temp_uuid):
+
+ trait_db_list = [trait.strip() for trait in start_vars['trait_list'].split(',')]
+
+ helper_functions.get_trait_db_obs(self, trait_db_list)
+
+ self.temp_uuid = temp_uuid
+ self.num_permutations = 5000
+ self.dataset = self.trait_list[0][1]
+
+ self.json_data = {} #The dictionary that will be used to create the json object that contains all the data needed to create the figure
+
+ self.all_sample_list = []
+ self.traits = []
+
+ chrnames = []
+ self.species = species.TheSpecies(dataset=self.trait_list[0][1])
+ for key in self.species.chromosomes.chromosomes.keys():
+ chrnames.append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length])
+
+ for trait_db in self.trait_list:
+
+ this_trait = trait_db[0]
+ self.traits.append(this_trait.name)
+ this_sample_data = this_trait.data
+
+ for sample in this_sample_data:
+ if sample not in self.all_sample_list:
+ self.all_sample_list.append(sample)
+
+ self.sample_data = []
+ for trait_db in self.trait_list:
+ this_trait = trait_db[0]
+ this_sample_data = this_trait.data
+
+ #self.sample_data[this_trait.name] = []
+ this_trait_vals = []
+ for sample in self.all_sample_list:
+ if sample in this_sample_data:
+ this_trait_vals.append(this_sample_data[sample].value)
+ #self.sample_data[this_trait.name].append(this_sample_data[sample].value)
+ else:
+ this_trait_vals.append('')
+ #self.sample_data[this_trait.name].append('')
+ self.sample_data.append(this_trait_vals)
+
+ self.gen_reaper_results()
+ #self.gen_pylmm_results()
+
+ #chrnames = []
+ lodnames = []
+ chr_pos = []
+ pos = []
+ markernames = []
+
+ for trait in self.trait_results.keys():
+ lodnames.append(trait)
+
+ for marker in self.dataset.group.markers.markers:
+ #if marker['chr'] not in chrnames:
+ # chr_ob = [marker['chr'], "filler"]
+ # chrnames.append(chr_ob)
+ chr_pos.append(marker['chr'])
+ pos.append(marker['Mb'])
+ markernames.append(marker['name'])
+
+ self.json_data['chrnames'] = chrnames
+ self.json_data['lodnames'] = lodnames
+ self.json_data['chr'] = chr_pos
+ self.json_data['pos'] = pos
+ self.json_data['markernames'] = markernames
+
+ for trait in self.trait_results:
+ self.json_data[trait] = self.trait_results[trait]
+
+ self.js_data = dict(
+ json_data = self.json_data
+ )
+
+ print("self.js_data:", self.js_data)
+
+
+ def gen_reaper_results(self):
+ self.trait_results = {}
+ for trait_db in self.trait_list:
+ self.dataset.group.get_markers()
+ this_trait = trait_db[0]
+ #this_db = trait_db[1]
+ genotype = self.dataset.group.read_genotype_file()
+ samples, values, variances = this_trait.export_informative()
+
+ trimmed_samples = []
+ trimmed_values = []
+ for i in range(0, len(samples)):
+ if samples[i] in self.dataset.group.samplelist:
+ trimmed_samples.append(samples[i])
+ trimmed_values.append(values[i])
+
+ self.lrs_array = genotype.permutation(strains = trimmed_samples,
+ trait = trimmed_values,
+ nperm= self.num_permutations)
+
+ #self.suggestive = self.lrs_array[int(self.num_permutations*0.37-1)]
+ #self.significant = self.lrs_array[int(self.num_permutations*0.95-1)]
+
+ reaper_results = genotype.regression(strains = trimmed_samples,
+ trait = trimmed_values)
+
+
+ lrs_values = [float(qtl.lrs) for qtl in reaper_results]
+ print("lrs_values:", lrs_values)
+ #self.dataset.group.markers.add_pvalues(p_values)
+
+ self.trait_results[this_trait.name] = []
+ for qtl in reaper_results:
+ if qtl.additive > 0:
+ self.trait_results[this_trait.name].append(-float(qtl.lrs))
+ else:
+ self.trait_results[this_trait.name].append(float(qtl.lrs))
+ #for lrs in lrs_values:
+ # if
+ # self.trait_results[this_trait.name].append(lrs)
+
+
+ #this_db_samples = self.dataset.group.samplelist
+ #this_sample_data = this_trait.data
+ ##print("this_sample_data", this_sample_data)
+ #this_trait_vals = []
+ #for index, sample in enumerate(this_db_samples):
+ # if sample in this_sample_data:
+ # sample_value = this_sample_data[sample].value
+ # this_trait_vals.append(sample_value)
+ # else:
+ # this_trait_vals.append("x")
+
+ #pheno_vector = np.array([val == "x" and np.nan or float(val) for val in this_trait_vals])
+
+ #key = "pylmm:input:" + str(self.temp_uuid)
+ #print("key is:", pf(key))
+
+ #genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers]
+
+ #no_val_samples = self.identify_empty_samples(this_trait_vals)
+ #trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
+
+ #genotype_matrix = np.array(trimmed_genotype_data).T
+
+ #print("genotype_matrix:", str(genotype_matrix.tolist()))
+ #print("pheno_vector:", str(pheno_vector.tolist()))
+
+ #params = dict(pheno_vector = pheno_vector.tolist(),
+ # genotype_matrix = genotype_matrix.tolist(),
+ # restricted_max_likelihood = True,
+ # refit = False,
+ # temp_uuid = str(self.temp_uuid),
+ #
+ # # meta data
+ # timestamp = datetime.datetime.now().isoformat(),
+ # )
+ #
+ #json_params = json.dumps(params)
+ ##print("json_params:", json_params)
+ #Redis.set(key, json_params)
+ #Redis.expire(key, 60*60)
+ #print("before printing command")
+ #
+ #command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key,
+ # "other")
+ #print("command is:", command)
+ #print("after printing command")
+ #
+ #os.system(command)
+ #
+ #json_results = Redis.blpop("pylmm:results:" + str(self.temp_uuid), 45*60)
+
+ def gen_pylmm_results(self):
+ # This function is NOT used. If it is, we should use a shared function with marker_regression.py
+ self.trait_results = {}
+ for trait_db in self.trait_list:
+ this_trait = trait_db[0]
+ #this_db = trait_db[1]
+ self.dataset.group.get_markers()
+
+ this_db_samples = self.dataset.group.samplelist
+ this_sample_data = this_trait.data
+ #print("this_sample_data", this_sample_data)
+ this_trait_vals = []
+ for index, sample in enumerate(this_db_samples):
+ if sample in this_sample_data:
+ sample_value = this_sample_data[sample].value
+ this_trait_vals.append(sample_value)
+ else:
+ this_trait_vals.append("x")
+
+ pheno_vector = np.array([val == "x" and np.nan or float(val) for val in this_trait_vals])
+
+ key = "pylmm:input:" + str(self.temp_uuid)
+ #print("key is:", pf(key))
+
+ genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers]
+
+ no_val_samples = self.identify_empty_samples(this_trait_vals)
+ trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
+
+ genotype_matrix = np.array(trimmed_genotype_data).T
+
+ #print("genotype_matrix:", str(genotype_matrix.tolist()))
+ #print("pheno_vector:", str(pheno_vector.tolist()))
+
+ params = dict(pheno_vector = pheno_vector.tolist(),
+ genotype_matrix = genotype_matrix.tolist(),
+ restricted_max_likelihood = True,
+ refit = False,
+ temp_uuid = str(self.temp_uuid),
+
+ # meta data
+ timestamp = datetime.datetime.now().isoformat(),
+ )
+
+ json_params = json.dumps(params)
+ #print("json_params:", json_params)
+ Redis.set(key, json_params)
+ Redis.expire(key, 60*60)
+ print("before printing command")
+
+ command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key,
+ "other")
+ print("command is:", command)
+ print("after printing command")
+
+ os.system(command)
+
+ json_results = Redis.blpop("pylmm:results:" + str(self.temp_uuid), 45*60)
+ results = json.loads(json_results[1])
+ p_values = [float(result) for result in results['p_values']]
+ #print("p_values:", p_values)
+ self.dataset.group.markers.add_pvalues(p_values)
+
+ self.trait_results[this_trait.name] = []
+ for marker in self.dataset.group.markers.markers:
+ self.trait_results[this_trait.name].append(marker['lod_score'])
+
+
+ def identify_empty_samples(self, values):
+ no_val_samples = []
+ for sample_count, val in enumerate(values):
+ if val == "x":
+ no_val_samples.append(sample_count)
+ return no_val_samples
+
+ def trim_genotypes(self, genotype_data, no_value_samples):
+ trimmed_genotype_data = []
+ for marker in genotype_data:
+ new_genotypes = []
+ for item_count, genotype in enumerate(marker):
+ if item_count in no_value_samples:
+ continue
+ try:
+ genotype = float(genotype)
+ except ValueError:
+ genotype = np.nan
+ pass
+ new_genotypes.append(genotype)
+ trimmed_genotype_data.append(new_genotypes)
+ return trimmed_genotype_data
+
+
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 49521bd6..c5fab4ee 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -37,8 +37,8 @@ from utility import webqtlUtil
from wqflask.marker_regression import gemma_mapping
#from wqflask.marker_regression import rqtl_mapping
from wqflask.my_pylmm.data import prep_data
-from wqflask.my_pylmm.pyLMM import lmm
-from wqflask.my_pylmm.pyLMM import input
+# from wqflask.my_pylmm.pyLMM import lmm
+# from wqflask.my_pylmm.pyLMM import input
from utility import helper_functions
from utility import Plot, Bunch
from utility import temp_data
--
cgit v1.2.3
From db5072e285e2579a7195d8007587236d5ce9757d Mon Sep 17 00:00:00 2001
From: pjotrp
Date: Mon, 11 May 2015 17:04:43 -0500
Subject: Remove outdated code
---
.../marker_regression/marker_regression_old.py | 576 ---------------------
1 file changed, 576 deletions(-)
delete mode 100644 wqflask/wqflask/marker_regression/marker_regression_old.py
diff --git a/wqflask/wqflask/marker_regression/marker_regression_old.py b/wqflask/wqflask/marker_regression/marker_regression_old.py
deleted file mode 100644
index 36331250..00000000
--- a/wqflask/wqflask/marker_regression/marker_regression_old.py
+++ /dev/null
@@ -1,576 +0,0 @@
-from __future__ import absolute_import, print_function, division
-
-from base.trait import GeneralTrait
-from base import data_set #import create_dataset
-
-from pprint import pformat as pf
-
-import string
-import sys
-import datetime
-import os
-import collections
-import uuid
-
-import numpy as np
-from scipy import linalg
-
-import cPickle as pickle
-
-import simplejson as json
-
-from redis import Redis
-Redis = Redis()
-
-from flask import Flask, g
-
-from base.trait import GeneralTrait
-from base import data_set
-from base import species
-from base import webqtlConfig
-from utility import webqtlUtil
-from wqflask.my_pylmm.data import prep_data
-from wqflask.my_pylmm.pyLMM import lmm
-from wqflask.my_pylmm.pyLMM import input
-from utility import helper_functions
-from utility import Plot, Bunch
-from utility import temp_data
-
-from utility.benchmark import Bench
-
-
-class MarkerRegression(object):
-
- def __init__(self, start_vars, temp_uuid):
-
- helper_functions.get_species_dataset_trait(self, start_vars)
-
- #tempdata = temp_data.TempData(temp_uuid)
-
- self.samples = [] # Want only ones with values
- self.vals = []
-
- for sample in self.dataset.group.samplelist:
- value = start_vars['value:' + sample]
- self.samples.append(str(sample))
- self.vals.append(value)
-
- self.mapping_method = start_vars['method']
- self.maf = start_vars['maf'] # Minor allele frequency
- print("self.maf:", self.maf)
-
- self.dataset.group.get_markers()
- if self.mapping_method == "gemma":
- qtl_results = self.run_gemma()
- elif self.mapping_method == "plink":
- qtl_results = self.run_plink()
- #print("qtl_results:", pf(qtl_results))
- elif self.mapping_method == "pylmm":
- print("RUNNING PYLMM")
- #self.qtl_results = self.gen_data(tempdata)
- qtl_results = self.gen_data(str(temp_uuid))
- else:
- print("RUNNING NOTHING")
-
- self.lod_cutoff = 2
- self.filtered_markers = []
- for marker in qtl_results:
- if marker['chr'] > 0:
- self.filtered_markers.append(marker)
-
- #Get chromosome lengths for drawing the manhattan plot
- chromosome_mb_lengths = {}
- for key in self.species.chromosomes.chromosomes.keys():
- chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length
-
- self.js_data = dict(
- this_trait = self.this_trait.name,
- data_set = self.dataset.name,
- maf = self.maf,
- chromosomes = chromosome_mb_lengths,
- qtl_results = self.filtered_markers,
- )
-
- def run_gemma(self):
- """Generates p-values for each marker using GEMMA"""
-
- #filename = webqtlUtil.genRandStr("{}_{}_".format(self.dataset.group.name, self.this_trait.name))
- self.gen_pheno_txt_file()
-
- os.chdir("/home/zas1024/gene/web/gemma")
-
- gemma_command = './gemma -bfile %s -k output_%s.cXX.txt -lmm 1 -o %s_output' % (
- self.dataset.group.name,
- self.dataset.group.name,
- self.dataset.group.name)
- print("gemma_command:" + gemma_command)
-
- os.system(gemma_command)
-
- included_markers, p_values = self.parse_gemma_output()
-
- self.dataset.group.get_specified_markers(markers = included_markers)
-
- #for marker in self.dataset.group.markers.markers:
- # if marker['name'] not in included_markers:
- # print("marker:", marker)
- # self.dataset.group.markers.markers.remove(marker)
- # #del self.dataset.group.markers.markers[marker]
-
- self.dataset.group.markers.add_pvalues(p_values)
-
- return self.dataset.group.markers.markers
-
-
- def parse_gemma_output(self):
- included_markers = []
- p_values = []
- with open("/home/zas1024/gene/web/gemma/output/{}_output.assoc.txt".format(self.dataset.group.name)) as output_file:
- for line in output_file:
- if line.startswith("chr"):
- continue
- else:
- included_markers.append(line.split("\t")[1])
- p_values.append(float(line.split("\t")[10]))
- #p_values[line.split("\t")[1]] = float(line.split("\t")[10])
- print("p_values: ", p_values)
- return included_markers, p_values
-
- def gen_pheno_txt_file(self):
- """Generates phenotype file for GEMMA"""
-
- #with open("/home/zas1024/gene/web/gemma/tmp_pheno/{}.txt".format(filename), "w") as outfile:
- # for sample, i in enumerate(self.samples):
- # print("sample:" + str(i))
- # print("self.vals[i]:" + str(self.vals[sample]))
- # outfile.write(str(i) + "\t" + str(self.vals[sample]) + "\n")
-
- with open("/home/zas1024/gene/web/gemma/{}.fam".format(self.dataset.group.name), "w") as outfile:
- for i, sample in enumerate(self.samples):
- outfile.write(str(sample) + " " + str(sample) + " 0 0 0 " + str(self.vals[i]) + "\n")
-
- #def gen_plink_for_gemma(self, filename):
- #
- # make_bed = "/home/zas1024/plink/plink --file /home/zas1024/plink/%s --noweb --no-fid --no-parents --no-sex --no-pheno --pheno %s%s.txt --out %s%s --make-bed" % (webqtlConfig.HTMLPATH,
- # webqtlConfig.HTMLPATH,
- # self.dataset.group.name,
- # webqtlConfig.TMPDIR,
- # filename,
- # webqtlConfig.TMPDIR,
- # filename)
- #
- #
-
- def run_plink(self):
-
- os.chdir("/home/zas1024/plink")
-
- plink_output_filename = webqtlUtil.genRandStr("%s_%s_"%(self.dataset.group.name, self.this_trait.name))
-
- self.gen_pheno_txt_file_plink(pheno_filename = plink_output_filename)
-
- plink_command = './plink --noweb --ped %s.ped --no-fid --no-parents --no-sex --no-pheno --map %s.map --pheno %s/%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (self.dataset.group.name, self.dataset.group.name, webqtlConfig.TMPDIR, plink_output_filename, self.this_trait.name, self.maf, webqtlConfig.TMPDIR, plink_output_filename)
-
- os.system(plink_command)
-
- count, p_values = self.parse_plink_output(plink_output_filename)
- #gemma_command = './gemma -bfile %s -k output_%s.cXX.txt -lmm 1 -o %s_output' % (
- # self.dataset.group.name,
- # self.dataset.group.name,
- # self.dataset.group.name)
- #print("gemma_command:" + gemma_command)
- #
- #os.system(gemma_command)
- #
- #included_markers, p_values = self.parse_gemma_output()
- #
- #self.dataset.group.get_specified_markers(markers = included_markers)
-
- #for marker in self.dataset.group.markers.markers:
- # if marker['name'] not in included_markers:
- # print("marker:", marker)
- # self.dataset.group.markers.markers.remove(marker)
- # #del self.dataset.group.markers.markers[marker]
-
- print("p_values:", pf(p_values))
-
- self.dataset.group.markers.add_pvalues(p_values)
-
- return self.dataset.group.markers.markers
-
-
- def gen_pheno_txt_file_plink(self, pheno_filename = ''):
- ped_sample_list = self.get_samples_from_ped_file()
- output_file = open("%s%s.txt" % (webqtlConfig.TMPDIR, pheno_filename), "wb")
- header = 'FID\tIID\t%s\n' % self.this_trait.name
- output_file.write(header)
-
- new_value_list = []
-
- #if valueDict does not include some strain, value will be set to -9999 as missing value
- for i, sample in enumerate(ped_sample_list):
- try:
- value = self.vals[i]
- value = str(value).replace('value=','')
- value = value.strip()
- except:
- value = -9999
-
- new_value_list.append(value)
-
-
- new_line = ''
- for i, sample in enumerate(ped_sample_list):
- j = i+1
- value = new_value_list[i]
- new_line += '%s\t%s\t%s\n'%(sample, sample, value)
-
- if j%1000 == 0:
- output_file.write(newLine)
- new_line = ''
-
- if new_line:
- output_file.write(new_line)
-
- output_file.close()
-
- # get strain name from ped file in order
- def get_samples_from_ped_file(self):
-
- os.chdir("/home/zas1024/plink")
-
- ped_file= open("{}.ped".format(self.dataset.group.name),"r")
- line = ped_file.readline()
- sample_list=[]
-
- while line:
- lineList = string.split(string.strip(line), '\t')
- lineList = map(string.strip, lineList)
-
- sample_name = lineList[0]
- sample_list.append(sample_name)
-
- line = ped_file.readline()
-
- return sample_list
-
- ################################################################
- # Generate Chr list, Chr OrderId and Retrieve Length Information
- ################################################################
- #def getChrNameOrderIdLength(self,RISet=''):
- # try:
- # query = """
- # Select
- # Chr_Length.Name,Chr_Length.OrderId,Length from Chr_Length, InbredSet
- # where
- # Chr_Length.SpeciesId = InbredSet.SpeciesId AND
- # InbredSet.Name = '%s'
- # Order by OrderId
- # """ % (self.dataset.group.name)
- # results =g.db.execute(query).fetchall()
- # ChrList=[]
- # ChrLengthMbList=[]
- # ChrNameOrderIdDict={}
- # ChrOrderIdNameDict={}
- #
- # for item in results:
- # ChrList.append(item[0])
- # ChrNameOrderIdDict[item[0]]=item[1] # key is chr name, value is orderId
- # ChrOrderIdNameDict[item[1]]=item[0] # key is orderId, value is chr name
- # ChrLengthMbList.append(item[2])
- #
- # except:
- # ChrList=[]
- # ChrNameOrderIdDict={}
- # ChrLengthMbList=[]
- #
- # return ChrList,ChrNameOrderIdDict,ChrOrderIdNameDict,ChrLengthMbList
-
-
- def parse_plink_output(self, output_filename):
- plink_results={}
-
- threshold_p_value = 0.01
-
- result_fp = open("%s%s.qassoc"% (webqtlConfig.TMPDIR, output_filename), "rb")
-
- header_line = result_fp.readline()# read header line
- line = result_fp.readline()
-
- value_list = [] # initialize value list, this list will include snp, bp and pvalue info
- p_value_dict = {}
- count = 0
-
- while line:
- #convert line from str to list
- line_list = self.build_line_list(line=line)
-
- # only keep the records whose chromosome name is in db
- if self.species.chromosomes.chromosomes.has_key(int(line_list[0])) and line_list[-1] and line_list[-1].strip()!='NA':
-
- chr_name = self.species.chromosomes.chromosomes[int(line_list[0])]
- snp = line_list[1]
- BP = line_list[2]
- p_value = float(line_list[-1])
- if threshold_p_value >= 0 and threshold_p_value <= 1:
- if p_value < threshold_p_value:
- p_value_dict[snp] = p_value
-
- if plink_results.has_key(chr_name):
- value_list = plink_results[chr_name]
-
- # pvalue range is [0,1]
- if threshold_p_value >=0 and threshold_p_value <= 1:
- if p_value < threshold_p_value:
- value_list.append((snp, BP, p_value))
- count += 1
-
- plink_results[chr_name] = value_list
- value_list = []
- else:
- if threshold_p_value >= 0 and threshold_p_value <= 1:
- if p_value < threshold_p_value:
- value_list.append((snp, BP, p_value))
- count += 1
-
- if value_list:
- plink_results[chr_name] = value_list
-
- value_list=[]
-
- line = result_fp.readline()
- else:
- line = result_fp.readline()
-
- #if p_value_list:
- # min_p_value = min(p_value_list)
- #else:
- # min_p_value = 0
-
- return count, p_value_dict
-
- ######################################################
- # input: line: str,one line read from file
- # function: convert line from str to list;
- # output: lineList list
- #######################################################
- def build_line_list(self, line=None):
-
- line_list = string.split(string.strip(line),' ')# irregular number of whitespaces between columns
- line_list = [item for item in line_list if item <>'']
- line_list = map(string.strip, line_list)
-
- return line_list
-
- #def gen_data(self, tempdata):
- def gen_data(self, temp_uuid):
- """Generates p-values for each marker"""
-
- pheno_vector = np.array([val == "x" and np.nan or float(val) for val in self.vals])
-
- #lmm_uuid = str(uuid.uuid4())
-
- key = "pylmm:input:" + temp_uuid
- print("key is:", pf(key))
- #with Bench("Loading cache"):
- # result = Redis.get(key)
-
- if self.dataset.group.species == "human":
- p_values, t_stats = self.gen_human_results(pheno_vector, key, temp_uuid)
- #p_values = self.trim_results(p_values)
-
- else:
- print("NOW CWD IS:", os.getcwd())
- genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers]
-
- no_val_samples = self.identify_empty_samples()
- trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
-
- genotype_matrix = np.array(trimmed_genotype_data).T
-
- #print("pheno_vector: ", pf(pheno_vector))
- #print("genotype_matrix: ", pf(genotype_matrix))
- #print("genotype_matrix.shape: ", pf(genotype_matrix.shape))
-
- #params = {"pheno_vector": pheno_vector,
- # "genotype_matrix": genotype_matrix,
- # "restricted_max_likelihood": True,
- # "refit": False,
- # "temp_data": tempdata}
-
- params = dict(pheno_vector = pheno_vector.tolist(),
- genotype_matrix = genotype_matrix.tolist(),
- restricted_max_likelihood = True,
- refit = False,
- temp_uuid = temp_uuid,
-
- # meta data
- timestamp = datetime.datetime.now().isoformat(),
- )
-
- json_params = json.dumps(params)
- #print("json_params:", json_params)
- Redis.set(key, json_params)
- Redis.expire(key, 60*60)
- print("before printing command")
-
- command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key,
- "other")
- print("command is:", command)
- print("after printing command")
-
- os.system(command)
-
- #t_stats, p_values = lmm.run(key)
- #lmm.run(key)
-
- json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60)
- results = json.loads(json_results[1])
- p_values = [float(result) for result in results['p_values']]
- print("p_values:", p_values)
- #p_values = self.trim_results(p_values)
- t_stats = results['t_stats']
-
- #t_stats, p_values = lmm.run(
- # pheno_vector,
- # genotype_matrix,
- # restricted_max_likelihood=True,
- # refit=False,
- # temp_data=tempdata
- #)
- #print("p_values:", p_values)
-
- self.dataset.group.markers.add_pvalues(p_values)
-
- #self.get_lod_score_cutoff()
-
- return self.dataset.group.markers.markers
-
- def trim_results(self, p_values):
- print("len_p_values:", len(p_values))
- if len(p_values) > 500:
- p_values.sort(reverse=True)
- trimmed_values = p_values[:500]
-
- return trimmed_values
-
- #def gen_human_results(self, pheno_vector, tempdata):
- def gen_human_results(self, pheno_vector, key, temp_uuid):
- file_base = os.path.join(webqtlConfig.PYLMM_PATH, self.dataset.group.name)
-
- plink_input = input.plink(file_base, type='b')
- input_file_name = os.path.join(webqtlConfig.SNP_PATH, self.dataset.group.name + ".snps.gz")
-
- pheno_vector = pheno_vector.reshape((len(pheno_vector), 1))
- covariate_matrix = np.ones((pheno_vector.shape[0],1))
- kinship_matrix = np.fromfile(open(file_base + '.kin','r'),sep=" ")
- kinship_matrix.resize((len(plink_input.indivs),len(plink_input.indivs)))
-
- print("Before creating params")
-
- params = dict(pheno_vector = pheno_vector.tolist(),
- covariate_matrix = covariate_matrix.tolist(),
- input_file_name = input_file_name,
- kinship_matrix = kinship_matrix.tolist(),
- refit = False,
- temp_uuid = temp_uuid,
-
- # meta data
- timestamp = datetime.datetime.now().isoformat(),
- )
-
- print("After creating params")
-
- json_params = json.dumps(params)
- Redis.set(key, json_params)
- Redis.expire(key, 60*60)
-
- print("Before creating the command")
-
- command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key,
- "human")
-
- print("command is:", command)
-
- os.system(command)
-
- json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60)
- results = json.loads(json_results[1])
- t_stats = results['t_stats']
- p_values = results['p_values']
-
-
- #p_values, t_stats = lmm.run_human(key)
-
- #p_values, t_stats = lmm.run_human(
- # pheno_vector,
- # covariate_matrix,
- # input_file_name,
- # kinship_matrix,
- # loading_progress=tempdata
- # )
-
- return p_values, t_stats
-
- def get_lod_score_cutoff(self):
- print("INSIDE GET LOD CUTOFF")
- high_qtl_count = 0
- for marker in self.dataset.group.markers.markers:
- if marker['lod_score'] > 1:
- high_qtl_count += 1
-
- if high_qtl_count > 1000:
- return 1
- else:
- return 0
-
- def identify_empty_samples(self):
- no_val_samples = []
- for sample_count, val in enumerate(self.vals):
- if val == "x":
- no_val_samples.append(sample_count)
- return no_val_samples
-
- def trim_genotypes(self, genotype_data, no_value_samples):
- trimmed_genotype_data = []
- for marker in genotype_data:
- new_genotypes = []
- for item_count, genotype in enumerate(marker):
- if item_count in no_value_samples:
- continue
- try:
- genotype = float(genotype)
- except ValueError:
- genotype = np.nan
- pass
- new_genotypes.append(genotype)
- trimmed_genotype_data.append(new_genotypes)
- return trimmed_genotype_data
-
-def create_snp_iterator_file(group):
- plink_file_base = os.path.join(webqtlConfig.PYLMM_PATH, group)
- plink_input = input.plink(plink_file_base, type='b')
-
- data = dict(plink_input = list(plink_input),
- numSNPs = plink_input.numSNPs)
-
- #input_dict = {}
- #
- #input_dict['plink_input'] = list(plink_input)
- #input_dict['numSNPs'] = plink_input.numSNPs
- #
-
- snp_file_base = os.path.join(webqtlConfig.SNP_PATH, group + ".snps.gz")
-
- with gzip.open(snp_file_base, "wb") as fh:
- pickle.dump(data, fh, pickle.HIGHEST_PROTOCOL)
-
-#if __name__ == '__main__':
-# import cPickle as pickle
-# import gzip
-# create_snp_iterator_file("HLC")
-
-if __name__ == '__main__':
- import cPickle as pickle
- import gzip
- create_snp_iterator_file("HLC")
--
cgit v1.2.3
From 246a9ef2d345d6704b97c96d047d06dca13eece0 Mon Sep 17 00:00:00 2001
From: pjotrp
Date: Mon, 11 May 2015 17:08:00 -0500
Subject: More pylmm dependencies disabled
---
wqflask/wqflask/interval_mapping/interval_mapping.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/wqflask/wqflask/interval_mapping/interval_mapping.py b/wqflask/wqflask/interval_mapping/interval_mapping.py
index 5511826a..1cd3fc80 100755
--- a/wqflask/wqflask/interval_mapping/interval_mapping.py
+++ b/wqflask/wqflask/interval_mapping/interval_mapping.py
@@ -24,9 +24,9 @@ from base import data_set
from base import species
from base import webqtlConfig
from utility import webqtlUtil
-from wqflask.my_pylmm.data import prep_data
-from wqflask.my_pylmm.pyLMM import lmm
-from wqflask.my_pylmm.pyLMM import input
+# from wqflask.my_pylmm.data import prep_data
+# from wqflask.my_pylmm.pyLMM import lmm
+# from wqflask.my_pylmm.pyLMM import input
from utility import helper_functions
from utility import Plot, Bunch
from utility import temp_data
--
cgit v1.2.3
From bc98e46fc910357ea3aeca5950e94e38d9584f9e Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 11 May 2015 22:44:18 +0000
Subject: Moved chunks
---
wqflask/base/data_set.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 9f805fc3..1cd57b4b 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -42,7 +42,7 @@ from base import species
from dbFunction import webqtlDatabaseFunction
from utility import webqtlUtil
from utility.benchmark import Bench
-from wqflask.utility import chunks
+from utility import chunks
from maintenance import get_group_samplelists
--
cgit v1.2.3
From 06f7ff5ee29c1899a04b7f538564c8a34f43280b Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 11 May 2015 22:55:41 +0000
Subject: Show PYLMM_PATH on error
---
wqflask/wqflask/marker_regression/marker_regression.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index c5fab4ee..76d05bd8 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -51,7 +51,7 @@ if os.environ.get('PYLMM_PATH') is None:
if PYLMM_PATH is None:
PYLMM_PATH=os.environ['HOME']+'/gene/wqflask/wqflask/my_pylmm/pyLMM'
if not os.path.isfile(PYLMM_PATH+'/lmm.py'):
- raise 'PYLMM_PATH unknown or faulty'
+ raise Exception('PYLMM_PATH '+PYLMM_PATH+' unknown or faulty')
PYLMM_COMMAND= 'python '+PYLMM_PATH+'/lmm.py'
class MarkerRegression(object):
--
cgit v1.2.3