aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPjotr Prins2015-03-14 10:21:25 +0300
committerPjotr Prins2015-03-14 10:21:25 +0300
commit5dfb2fdc0a739d86fc1b6c0230d43dcb05428092 (patch)
tree17e95d679199d47a15ec917f59fe845aafcdaae0
parentb7d7068aa8134b9d2a4a0de11c56b12beee1d0e0 (diff)
downloadgenenetwork2-5dfb2fdc0a739d86fc1b6c0230d43dcb05428092.tar.gz
Testing kinship works on small G
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/kinship.py9
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/runlmm.py29
2 files changed, 26 insertions, 12 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
index 353784aa..61da68fc 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
@@ -27,16 +27,17 @@ import Queue
from optmatrix import matrix_initialize, matrixMultT
-def compute_W(job,G,n,compute_size):
+def compute_W(job,G,n,snps,compute_size):
"""
Read 1000 SNPs at a time into matrix and return the result
"""
W = np.ones((n,compute_size)) * np.nan # W matrix has dimensions individuals x SNPs (initially all NaNs)
for j in range(0,compute_size):
row = job*compute_size + j
- if row >= compute_size:
+ if row >= compute_size or row>=snps:
W = W[:,range(0,j)]
break
+ # print job,compute_size,j
snp = G[job*compute_size+j]
# print snp.shape,snp
if snp.var() == 0:
@@ -79,6 +80,7 @@ def kinship(G,options):
m = G.shape[0] # snps
snps = m
sys.stderr.write(str(m)+" SNPs\n")
+ assert m>n, "n should be larger than m (snps>inds)"
q = mp.Queue()
p = mp.Pool(numThreads, f_init, [q])
@@ -95,7 +97,7 @@ def kinship(G,options):
for job in range(iterations):
if options.verbose:
sys.stderr.write("Processing job %d first %d SNPs\n" % (job, ((job+1)*options.computeSize)))
- W = compute_W(job,G,n,options.computeSize)
+ W = compute_W(job,G,n,snps,options.computeSize)
if numThreads == 1:
compute_matrixMult(job,W,q)
j,x = q.get()
@@ -124,7 +126,6 @@ def kinship(G,options):
# print j,K_j[:,0]
K = K + K_j
- print "kiship.kinship: ",K.shape,K
K = K / float(snps)
outFile = 'runtest.kin'
if options.verbose: sys.stderr.write("Saving Kinship file to %s\n" % outFile)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 627cc7a4..35f6e9a9 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -51,6 +51,9 @@ parser.add_option("--pheno",dest="pheno",
help="Phenotype file format 1.0")
parser.add_option("--geno",dest="geno",
help="Genotype file format 1.0")
+parser.add_option("--skip-genotype-normalization",
+ action="store_true", dest="skip_genotype_normalization", default=False,
+ help="Skip genotype normalization")
parser.add_option("-q", "--quiet",
action="store_false", dest="verbose", default=True,
help="don't print status messages to stdout")
@@ -96,8 +99,11 @@ if cmd == 'redis':
for ind_g in g:
gn.append( normalizeGenotype(ind_g) )
gnt = np.array(gn).T
- Y,G = removeMissingPhenotypes(y,gnt,options.verbose)
- print "G",G.shape,G
+ if y:
+ Y,G = removeMissingPhenotypes(y,gnt,options.verbose)
+ print "G",G.shape,G
+ else:
+ G = gnt
ps, ts = gn2_load_redis('testrun','other',k,Y,G,options.testing)
print np.array(ps)
print round(ps[0],4)
@@ -108,17 +114,24 @@ elif cmd == 'kinship':
gn = []
for ind_g in g:
if len(gn)>=8000: break
- gn.append( normalizeGenotype(ind_g) )
- K = kinship_full(np.array(gn),options)
- print "first Kinship method",K.shape,K
- K = kinship(np.array(gn),options)
- print "second Kinship method",K.shape,K
+ if options.skip_genotype_normalization:
+ gn.append(ind_g)
+ else:
+ gn.append( normalizeGenotype(ind_g) )
+ G = np.array(gn)
+ print G.shape, "\n", G
+ K = kinship_full(G,options)
+ print "first Kinship method",K.shape,"\n",K
+ K2 = calculate_kinship(np.copy(G.T),None,options)
+ print "GN2 Kinship method",K2.shape,"\n",K2
+ K3 = kinship(G,options)
+ print "third Kinship method",K3.shape,"\n",K3
sys.exit(1)
gnt = np.array(gn).T
Y,g = removeMissingPhenotypes(y,gnt,options.verbose)
G = g
print G.shape,G
- K = calculate_kinship(np.copy(G),None,options)
+ K = calculate_kinship(np.copy(G),temp_data=None,is_testing=options.testing)
print G.shape,G
print "first Kinship method",K.shape,K
K = kinship(G.T,options)