From 7759c755cb8d525ba3739864021921888bdf47e9 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sat, 14 Mar 2015 16:17:03 +0300 Subject: Parallel K calculation is fixed --- wqflask/wqflask/my_pylmm/pyLMM/kinship.py | 33 +++++++++++++++---------------- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 5 ++++- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py index 8caa753b..a5497a77 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py @@ -26,20 +26,31 @@ import Queue from optmatrix import matrix_initialize, matrixMultT +def kinship_full(G): + """ + Calculate the Kinship matrix using a full dot multiplication + """ + print G.shape + m = G.shape[0] # snps + n = G.shape[1] # inds + sys.stderr.write(str(m)+" SNPs\n") + assert m>n, "n should be larger than m (snps>inds)" + m = np.dot(G.T,G) + m = m/G.shape[0] + return m def compute_W(job,G,n,snps,compute_size): """ Read 1000 SNPs at a time into matrix and return the result """ - W = np.ones((n,compute_size)) * np.nan # W matrix has dimensions individuals x SNPs (initially all NaNs) + m = compute_size + W = np.ones((n,m)) * np.nan # W matrix has dimensions individuals x SNPs (initially all NaNs) for j in range(0,compute_size): - row = job*compute_size + j - if row >= compute_size or row>=snps: + pos = job*m + j # real position + if pos >= snps: W = W[:,range(0,j)] break - # print job,compute_size,j snp = G[job*compute_size+j] - # print snp.shape,snp if snp.var() == 0: continue W[:,j] = snp # set row to list of SNPs @@ -59,18 +70,6 @@ def compute_matrixMult(job,W,q = None): def f_init(q): compute_matrixMult.q = q -def kinship_full(G): - """ - Calculate the Kinship matrix using a full dot multiplication - """ - print G.shape - m = G.shape[0] # snps - n = G.shape[1] # inds - sys.stderr.write(str(m)+" SNPs\n") - assert m>n, "n should be larger than m (snps>inds)" - m = np.dot(G.T,G) - m = m/G.shape[0] - return m # Calculate the kinship matrix from G (SNPs as rows!), returns K # diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 547ac7f1..a322f988 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -70,6 +70,9 @@ parser.add_option("--saveKvaKve", parser.add_option("--test", action="store_true", dest="testing", default=False, help="Testing mode") +parser.add_option("--test-kinship", + action="store_true", dest="test_kinship", default=False, + help="Testing mode for Kinship calculation") (options, args) = parser.parse_args() @@ -140,7 +143,7 @@ elif cmd == 'kinship': if not options.skip_genotype_normalization: G = np.apply_along_axis( genotype.normalize, axis=1, arr=G) - if False: + if options.test_kinship: K = kinship_full(G) print "Genotype",G.shape, "\n", G print "first Kinship method",K.shape,"\n",K -- cgit v1.2.3