aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPjotr Prins2015-03-17 13:10:49 +0300
committerPjotr Prins2015-03-17 13:10:49 +0300
commit73ed2f67bd0478473b887902ae96caaa0fca58d4 (patch)
tree3e47d798abc3a30a2486a3533f067bc2db04d365
parent46170b2741d006db89ce69128feff51c6e0e7752 (diff)
downloadgenenetwork2-73ed2f67bd0478473b887902ae96caaa0fca58d4.tar.gz
GWAS: one result is missing
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/gwas.py33
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/input.py2
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/lmm.py10
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/lmm2.py17
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/runlmm.py35
5 files changed, 53 insertions, 44 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
index 2a472717..20083bde 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/gwas.py
@@ -66,7 +66,7 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
"""
matrix_initialize()
cpu_num = mp.cpu_count()
- numThreads = None
+ numThreads = None # for now use all available threads
kfile2 = False
reml = restricted_max_likelihood
@@ -110,10 +110,7 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
# PS = []
# TS = []
count = 0
-
- completed = 0
- last_j = 0
- # for snp_id in G:
+ jobs_running = 0
for snp in G:
snp_id = (snp,'SNPID')
count += 1
@@ -129,13 +126,12 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
j,lst = q.get()
if verbose:
sys.stderr.write("Job "+str(j)+" finished\n")
- # for line in lines:
- # out.write(line)
res.append(lst)
else:
p.apply_async(compute_snp,(job,n,collect,lmm2,reml))
+ jobs_running += 1
collect = []
- while job > completed:
+ while jobs_running:
try:
j,lst = q.get_nowait()
if verbose:
@@ -143,34 +139,33 @@ def gwas(Y,G,K,restricted_max_likelihood=True,refit=False,verbose=True):
# for line in lines:
# out.write(line)
res.append(lst)
- completed += 1
+ jobs_running -= 1
except Queue.Empty:
pass
- if job > completed + cpu_num*2:
- time.sleep(0.1)
+ if jobs_running + cpu_num*2 > 0:
+ time.sleep(1.0)
else:
- if job >= completed:
+ if jobs_running > 0:
break
collect.append(snp_id)
- if numThreads==1:
+ if numThreads==1 or count<1000:
print "Running on 1 THREAD"
compute_snp(count/1000,n,collect,lmm2,reml,q)
j,lst = q.get()
- # for line in lines:
- # out.write(line)
res.append(lst)
else:
- for job in range(int(count/1000)-completed):
+ print "count=",count," running=",jobs_running," collect=",len(collect)
+ for job in range(jobs_running):
j,lst = q.get(True,15) # time out
if verbose:
sys.stderr.write("Job "+str(j)+" finished\n")
res.append(lst)
- # print res
+ if verbose:
+ print "res=",res[0][0:10]
+ print [len(res1) for res1 in res]
ts = [item[0] for res1 in res for item in res1]
ps = [item[1] for res1 in res for item in res1]
- # ts = [item[1] for item in res]
- # print ps
return ts,ps
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/input.py b/wqflask/wqflask/my_pylmm/pyLMM/input.py
index f7b556a5..7063fedf 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/input.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/input.py
@@ -135,6 +135,8 @@ class plink:
def normalizeGenotype(self,G):
# print "Before",G
# print G.shape
+ print "call input.normalizeGenotype"
+ raise "This should not be used"
x = True - np.isnan(G)
m = G[x].mean()
s = np.sqrt(G[x].var())
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 2014ffb8..8a24d98b 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -285,7 +285,7 @@ def run_other_old(pheno_vector,
# with Bench("LMM_ob fitting"):
# lmm_ob.fit()
- print("genotype_matrix: ", genotype_matrix.shape)
+ print("run_other_old genotype_matrix: ", genotype_matrix.shape)
print(genotype_matrix)
with Bench("Doing GWAS"):
@@ -320,6 +320,7 @@ def run_other_new(pheno_vector,
# Adjust phenotypes
Y,G,keep = phenotype.remove_missing(pheno_vector,genotype_matrix,verbose=True)
print("Removed missing phenotypes",Y.shape)
+
# if options.maf_normalization:
# G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g )
# print "MAF replacements: \n",G
@@ -337,7 +338,7 @@ def run_other_new(pheno_vector,
# with Bench("LMM_ob fitting"):
# lmm_ob.fit()
- print("genotype_matrix: ", G.shape)
+ print("run_other_new genotype_matrix: ", G.shape)
print(G)
with Bench("Doing GWAS"):
@@ -388,7 +389,9 @@ def calculate_kinship_new(genotype_matrix, temp_data=None):
Call the new kinship calculation where genotype_matrix contains
inds (columns) by snps (rows).
"""
+ print("call genotype.normalize")
G = np.apply_along_axis( genotype.normalize, axis=0, arr=genotype_matrix)
+ print("call calculate_kinship_new")
return kinship(G.T),G # G gets transposed, we'll turn this into an iterator (FIXME)
def calculate_kinship_old(genotype_matrix, temp_data=None):
@@ -399,6 +402,7 @@ def calculate_kinship_old(genotype_matrix, temp_data=None):
normalizes the resulting vectors and returns the RRM matrix.
"""
+ print("call calculate_kinship_old")
n = genotype_matrix.shape[0]
m = genotype_matrix.shape[1]
print("genotype 2D matrix n (inds) is:", n)
@@ -429,7 +433,7 @@ def calculate_kinship_old(genotype_matrix, temp_data=None):
temp_data.store("percent_complete", percent_complete)
genotype_matrix = genotype_matrix[:,keep]
- print("genotype_matrix: ", pf(genotype_matrix))
+ print("After kinship (old) genotype_matrix: ", pf(genotype_matrix))
kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m)
return kinship_matrix,genotype_matrix
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
index d50b0111..d4b3ac82 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm2.py
@@ -22,6 +22,7 @@ from scipy.linalg import eigh, inv, det
import scipy.stats as stats # t-tests
from scipy import optimize
from optmatrix import matrixMult
+import kinship
def calculateKinship(W,center=False):
"""
@@ -177,13 +178,17 @@ class LMM2:
Kve = []
self.nonmissing = x
- if len(Kva) == 0 or len(Kve) == 0:
- if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
- begin = time.time()
- Kva,Kve = eigh(K)
- end = time.time()
- if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin))
+ print("this K is:", K.shape, K)
+ if len(Kva) == 0 or len(Kve) == 0:
+ # if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
+ begin = time.time()
+ # Kva,Kve = linalg.eigh(K)
+ Kva,Kve = kinship.kvakve(K)
+ end = time.time()
+ if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin))
+ print("sum(Kva),sum(Kve)=",sum(Kva),sum(Kve))
+
self.K = K
self.Kva = Kva
self.Kve = Kve
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 1af5a443..708c9185 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -54,9 +54,9 @@ parser.add_option("--geno",dest="geno",
parser.add_option("--maf-normalization",
action="store_true", dest="maf_normalization", default=False,
help="Apply MAF genotype normalization")
-parser.add_option("--skip-genotype-normalization",
- action="store_true", dest="skip_genotype_normalization", default=False,
- help="Skip genotype normalization")
+parser.add_option("--genotype-normalization",
+ action="store_true", dest="genotype_normalization", default=False,
+ help="Force genotype normalization")
parser.add_option("-q", "--quiet",
action="store_false", dest="verbose", default=True,
help="don't print status messages to stdout")
@@ -100,7 +100,8 @@ if options.geno:
print g.shape
if cmd == 'redis_new':
- # Emulating the redis setup of GN2
+ # The main difference between redis_new and redis is that missing
+ # phenotypes are handled by the first
Y = y
G = g
print "Original G",G.shape, "\n", G
@@ -109,7 +110,7 @@ if cmd == 'redis_new':
G = None
ps, ts = gn2_load_redis('testrun','other',k,Y,gt,new_code=True)
print np.array(ps)
- print sum(ps)
+ print len(ps),sum(ps)
# Test results
p1 = round(ps[0],4)
p2 = round(ps[-1],4)
@@ -118,12 +119,14 @@ if cmd == 'redis_new':
assert p1==0.0708, "p1=%f" % p1
assert p2==0.1417, "p2=%f" % p2
if options.geno == 'data/small_na.geno':
- assert p1==0.0958, "p1=%f" % p1
- assert p2==0.0435, "p2=%f" % p2
+ assert p1==0.0897, "p1=%f" % p1
+ assert p2==0.0405, "p2=%f" % p2
if options.geno == 'data/test8000.geno':
assert p1==0.8984, "p1=%f" % p1
- assert p2==0.9623, "p2=%f" % p2
-if cmd == 'redis':
+ assert p2==0.9620, "p2=%f" % p2
+ assert sum(ps) == 4070.02346579
+ assert len(ps) == 8000
+elif cmd == 'redis':
# Emulating the redis setup of GN2
G = g
print "Original G",G.shape, "\n", G
@@ -135,7 +138,7 @@ if cmd == 'redis':
if options.maf_normalization:
G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g )
print "MAF replacements: \n",G
- if not options.skip_genotype_normalization:
+ if options.genotype_normalization:
G = np.apply_along_axis( genotype.normalize, axis=1, arr=G)
g = None
gnt = None
@@ -144,8 +147,8 @@ if cmd == 'redis':
G = None
ps, ts = gn2_load_redis('testrun','other',k,Y,gt, new_code=False)
print np.array(ps)
- print sum(ps)
- # Test results
+ print len(ps),sum(ps)
+ # Test results 4070.02346579
p1 = round(ps[0],4)
p2 = round(ps[-1],4)
sys.stderr.write(options.geno+"\n")
@@ -153,11 +156,11 @@ if cmd == 'redis':
assert p1==0.0708, "p1=%f" % p1
assert p2==0.1417, "p2=%f" % p2
if options.geno == 'data/small_na.geno':
- assert p1==0.0958, "p1=%f" % p1
- assert p2==0.0435, "p2=%f" % p2
+ assert p1==0.0897, "p1=%f" % p1
+ assert p2==0.0405, "p2=%f" % p2
if options.geno == 'data/test8000.geno':
assert p1==0.8984, "p1=%f" % p1
- assert p2==0.9623, "p2=%f" % p2
+ assert p2==0.8827, "p2=%f" % p2
elif cmd == 'kinship':
G = g
print "Original G",G.shape, "\n", G
@@ -169,7 +172,7 @@ elif cmd == 'kinship':
if options.maf_normalization:
G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g )
print "MAF replacements: \n",G
- if not options.skip_genotype_normalization:
+ if options.genotype_normalization:
G = np.apply_along_axis( genotype.normalize, axis=1, arr=G)
g = None
gnt = None