From 5dfb2fdc0a739d86fc1b6c0230d43dcb05428092 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sat, 14 Mar 2015 10:21:25 +0300
Subject: Testing kinship works on small G

---
 wqflask/wqflask/my_pylmm/pyLMM/kinship.py |  9 +++++----
 wqflask/wqflask/my_pylmm/pyLMM/runlmm.py  | 29 +++++++++++++++++++++--------
 2 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
index 353784aa..61da68fc 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py
@@ -27,16 +27,17 @@ import Queue
 from optmatrix import matrix_initialize, matrixMultT
 
 
-def compute_W(job,G,n,compute_size):
+def compute_W(job,G,n,snps,compute_size):
    """
    Read 1000 SNPs at a time into matrix and return the result
    """
    W = np.ones((n,compute_size)) * np.nan # W matrix has dimensions individuals x SNPs (initially all NaNs)
    for j in range(0,compute_size):
       row = job*compute_size + j
-      if row >= compute_size:
+      if row >= compute_size or row>=snps:
          W = W[:,range(0,j)]
          break
+      # print job,compute_size,j
       snp = G[job*compute_size+j]
       # print snp.shape,snp
       if snp.var() == 0:
@@ -79,6 +80,7 @@ def kinship(G,options):
     m = G.shape[0] # snps
     snps = m
     sys.stderr.write(str(m)+" SNPs\n")
+    assert m>n, "n should be larger than m (snps>inds)"
 
     q = mp.Queue()
     p = mp.Pool(numThreads, f_init, [q])
@@ -95,7 +97,7 @@ def kinship(G,options):
     for job in range(iterations):
        if options.verbose:
           sys.stderr.write("Processing job %d first %d SNPs\n" % (job, ((job+1)*options.computeSize)))
-       W = compute_W(job,G,n,options.computeSize)
+       W = compute_W(job,G,n,snps,options.computeSize)
        if numThreads == 1:
           compute_matrixMult(job,W,q)
           j,x = q.get()
@@ -124,7 +126,6 @@ def kinship(G,options):
           # print j,K_j[:,0]
           K = K + K_j
 
-    print "kiship.kinship: ",K.shape,K
     K = K / float(snps)
     outFile = 'runtest.kin'
     if options.verbose: sys.stderr.write("Saving Kinship file to %s\n" % outFile)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 627cc7a4..35f6e9a9 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -51,6 +51,9 @@ parser.add_option("--pheno",dest="pheno",
                   help="Phenotype file format 1.0")
 parser.add_option("--geno",dest="geno",
                   help="Genotype file format 1.0")
+parser.add_option("--skip-genotype-normalization",
+                  action="store_true", dest="skip_genotype_normalization", default=False,
+                  help="Skip genotype normalization")
 parser.add_option("-q", "--quiet",
                   action="store_false", dest="verbose", default=True,
                   help="don't print status messages to stdout")
@@ -96,8 +99,11 @@ if cmd == 'redis':
     for ind_g in g:
         gn.append( normalizeGenotype(ind_g) )
     gnt = np.array(gn).T
-    Y,G = removeMissingPhenotypes(y,gnt,options.verbose)
-    print "G",G.shape,G
+    if y:
+        Y,G = removeMissingPhenotypes(y,gnt,options.verbose)
+        print "G",G.shape,G
+    else:
+        G = gnt
     ps, ts = gn2_load_redis('testrun','other',k,Y,G,options.testing)
     print np.array(ps)
     print round(ps[0],4)
@@ -108,17 +114,24 @@ elif cmd == 'kinship':
     gn = []
     for ind_g in g:
         if len(gn)>=8000: break
-        gn.append( normalizeGenotype(ind_g) )
-    K = kinship_full(np.array(gn),options)
-    print "first Kinship method",K.shape,K
-    K = kinship(np.array(gn),options)
-    print "second Kinship method",K.shape,K
+        if options.skip_genotype_normalization:
+          gn.append(ind_g)
+        else:
+            gn.append( normalizeGenotype(ind_g) )
+    G = np.array(gn)
+    print G.shape, "\n", G
+    K = kinship_full(G,options)
+    print "first Kinship method",K.shape,"\n",K
+    K2 = calculate_kinship(np.copy(G.T),None,options)
+    print "GN2 Kinship method",K2.shape,"\n",K2
+    K3 = kinship(G,options)
+    print "third Kinship method",K3.shape,"\n",K3
     sys.exit(1)
     gnt = np.array(gn).T
     Y,g = removeMissingPhenotypes(y,gnt,options.verbose)
     G = g
     print G.shape,G
-    K = calculate_kinship(np.copy(G),None,options)
+    K = calculate_kinship(np.copy(G),temp_data=None,is_testing=options.testing)
     print G.shape,G
     print "first Kinship method",K.shape,K
     K = kinship(G.T,options)
-- 
cgit 1.4.1