From 5dfb2fdc0a739d86fc1b6c0230d43dcb05428092 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sat, 14 Mar 2015 10:21:25 +0300 Subject: Testing kinship works on small G --- wqflask/wqflask/my_pylmm/pyLMM/kinship.py | 9 +++++---- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 29 +++++++++++++++++++++-------- 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py index 353784aa..61da68fc 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py @@ -27,16 +27,17 @@ import Queue from optmatrix import matrix_initialize, matrixMultT -def compute_W(job,G,n,compute_size): +def compute_W(job,G,n,snps,compute_size): """ Read 1000 SNPs at a time into matrix and return the result """ W = np.ones((n,compute_size)) * np.nan # W matrix has dimensions individuals x SNPs (initially all NaNs) for j in range(0,compute_size): row = job*compute_size + j - if row >= compute_size: + if row >= compute_size or row>=snps: W = W[:,range(0,j)] break + # print job,compute_size,j snp = G[job*compute_size+j] # print snp.shape,snp if snp.var() == 0: @@ -79,6 +80,7 @@ def kinship(G,options): m = G.shape[0] # snps snps = m sys.stderr.write(str(m)+" SNPs\n") + assert m>n, "n should be larger than m (snps>inds)" q = mp.Queue() p = mp.Pool(numThreads, f_init, [q]) @@ -95,7 +97,7 @@ def kinship(G,options): for job in range(iterations): if options.verbose: sys.stderr.write("Processing job %d first %d SNPs\n" % (job, ((job+1)*options.computeSize))) - W = compute_W(job,G,n,options.computeSize) + W = compute_W(job,G,n,snps,options.computeSize) if numThreads == 1: compute_matrixMult(job,W,q) j,x = q.get() @@ -124,7 +126,6 @@ def kinship(G,options): # print j,K_j[:,0] K = K + K_j - print "kiship.kinship: ",K.shape,K K = K / float(snps) outFile = 'runtest.kin' if options.verbose: sys.stderr.write("Saving Kinship file to %s\n" % outFile) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 627cc7a4..35f6e9a9 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -51,6 +51,9 @@ parser.add_option("--pheno",dest="pheno", help="Phenotype file format 1.0") parser.add_option("--geno",dest="geno", help="Genotype file format 1.0") +parser.add_option("--skip-genotype-normalization", + action="store_true", dest="skip_genotype_normalization", default=False, + help="Skip genotype normalization") parser.add_option("-q", "--quiet", action="store_false", dest="verbose", default=True, help="don't print status messages to stdout") @@ -96,8 +99,11 @@ if cmd == 'redis': for ind_g in g: gn.append( normalizeGenotype(ind_g) ) gnt = np.array(gn).T - Y,G = removeMissingPhenotypes(y,gnt,options.verbose) - print "G",G.shape,G + if y: + Y,G = removeMissingPhenotypes(y,gnt,options.verbose) + print "G",G.shape,G + else: + G = gnt ps, ts = gn2_load_redis('testrun','other',k,Y,G,options.testing) print np.array(ps) print round(ps[0],4) @@ -108,17 +114,24 @@ elif cmd == 'kinship': gn = [] for ind_g in g: if len(gn)>=8000: break - gn.append( normalizeGenotype(ind_g) ) - K = kinship_full(np.array(gn),options) - print "first Kinship method",K.shape,K - K = kinship(np.array(gn),options) - print "second Kinship method",K.shape,K + if options.skip_genotype_normalization: + gn.append(ind_g) + else: + gn.append( normalizeGenotype(ind_g) ) + G = np.array(gn) + print G.shape, "\n", G + K = kinship_full(G,options) + print "first Kinship method",K.shape,"\n",K + K2 = calculate_kinship(np.copy(G.T),None,options) + print "GN2 Kinship method",K2.shape,"\n",K2 + K3 = kinship(G,options) + print "third Kinship method",K3.shape,"\n",K3 sys.exit(1) gnt = np.array(gn).T Y,g = removeMissingPhenotypes(y,gnt,options.verbose) G = g print G.shape,G - K = calculate_kinship(np.copy(G),None,options) + K = calculate_kinship(np.copy(G),temp_data=None,is_testing=options.testing) print G.shape,G print "first Kinship method",K.shape,K K = kinship(G.T,options) -- cgit v1.2.3