about summary refs log tree commit diff
path: root/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/wqflask/my_pylmm/pyLMM/runlmm.py')
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/runlmm.py75
1 files changed, 54 insertions, 21 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 6bb79856..324c4f2c 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -21,7 +21,7 @@ from optparse import OptionParser
 import sys
 import tsvreader
 import numpy as np
-from lmm import gn2_load_redis, calculate_kinship
+from lmm import gn2_load_redis, calculate_kinship_old
 from kinship import kinship, kinship_full
 import genotype
 import phenotype
@@ -54,9 +54,12 @@ parser.add_option("--geno",dest="geno",
 parser.add_option("--maf-normalization",
                   action="store_true", dest="maf_normalization", default=False,
                   help="Apply MAF genotype normalization")
-parser.add_option("--skip-genotype-normalization",
-                  action="store_true", dest="skip_genotype_normalization", default=False,
-                  help="Skip genotype normalization")
+parser.add_option("--genotype-normalization",
+                  action="store_true", dest="genotype_normalization", default=False,
+                  help="Force genotype normalization")
+parser.add_option("--remove-missing-phenotypes",
+                  action="store_true", dest="remove_missing_phenotypes", default=False,
+                  help="Remove missing phenotypes")
 parser.add_option("-q", "--quiet",
                   action="store_false", dest="verbose", default=True,
                   help="don't print status messages to stdout")
@@ -99,28 +102,58 @@ if options.geno:
     g = tsvreader.geno(options.geno)
     print g.shape
 
-if cmd == 'redis':
+if cmd == 'redis_new':
+    # The main difference between redis_new and redis is that missing
+    # phenotypes are handled by the first
+    Y = y
+    G = g
+    print "Original G",G.shape, "\n", G
+
+    gt = G.T
+    G = None
+    ps, ts = gn2_load_redis('testrun','other',k,Y,gt,new_code=True)
+    print np.array(ps)
+    print len(ps),sum(ps)
+    # Test results
+    p1 = round(ps[0],4)
+    p2 = round(ps[-1],4)
+    sys.stderr.write(options.geno+"\n")
+    if options.geno == 'data/small.geno':
+        assert p1==0.0708, "p1=%f" % p1
+        assert p2==0.1417, "p2=%f" % p2
+    if options.geno == 'data/small_na.geno':
+        assert p1==0.0897, "p1=%f" % p1
+        assert p2==0.0405, "p2=%f" % p2
+    if options.geno == 'data/test8000.geno':
+        # assert p1==0.8984, "p1=%f" % p1
+        # assert p2==0.9621, "p2=%f" % p2
+        assert round(sum(ps)) == 4070
+        assert len(ps) == 8000
+elif cmd == 'redis':
     # Emulating the redis setup of GN2
     G = g
     print "Original G",G.shape, "\n", G
-    if y != None:
+    if y != None and options.remove_missing_phenotypes:
         gnt = np.array(g).T
-        Y,g = phenotype.remove_missing(y,g.T,options.verbose)
+        Y,g,keep = phenotype.remove_missing(y,g.T,options.verbose)
         G = g.T
         print "Removed missing phenotypes",G.shape, "\n", G
+    else:
+        Y = y
     if options.maf_normalization:
         G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g )
         print "MAF replacements: \n",G
-    if not options.skip_genotype_normalization:
+    if options.genotype_normalization:
         G = np.apply_along_axis( genotype.normalize, axis=1, arr=G)
     g = None
     gnt = None
 
     gt = G.T
     G = None
-    ps, ts = gn2_load_redis('testrun','other',k,Y,gt)
+    ps, ts = gn2_load_redis('testrun','other',k,Y,gt, new_code=False)
     print np.array(ps)
-    # Test results
+    print len(ps),sum(ps)
+    # Test results 4070.02346579
     p1 = round(ps[0],4)
     p2 = round(ps[-1],4)
     sys.stderr.write(options.geno+"\n")
@@ -128,15 +161,15 @@ if cmd == 'redis':
         assert p1==0.0708, "p1=%f" % p1
         assert p2==0.1417, "p2=%f" % p2
     if options.geno == 'data/small_na.geno':
-        assert p1==0.0958, "p1=%f" % p1
-        assert p2==0.0435, "p2=%f" % p2
+        assert p1==0.0897, "p1=%f" % p1
+        assert p2==0.0405, "p2=%f" % p2
     if options.geno == 'data/test8000.geno':
-        assert p1==0.8984, "p1=%f" % p1
-        assert p2==0.9623, "p2=%f" % p2
+        assert round(sum(ps)) == 4070
+        assert len(ps) == 8000
 elif cmd == 'kinship':
     G = g
     print "Original G",G.shape, "\n", G
-    if y != None:
+    if y != None and options.remove_missing_phenotypes:
         gnt = np.array(g).T
         Y,g = phenotype.remove_missing(y,g.T,options.verbose)
         G = g.T
@@ -144,32 +177,32 @@ elif cmd == 'kinship':
     if options.maf_normalization:
         G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g )
         print "MAF replacements: \n",G
-    if not options.skip_genotype_normalization:
+    if options.genotype_normalization:
         G = np.apply_along_axis( genotype.normalize, axis=1, arr=G)
     g = None
     gnt = None
 
     if options.test_kinship:
-        K = kinship_full(G)
+        K = kinship_full(np.copy(G))
         print "Genotype",G.shape, "\n", G
         print "first Kinship method",K.shape,"\n",K
         k1 = round(K[0][0],4)
-        K2 = calculate_kinship(np.copy(G.T),temp_data=None)
+        K2,G = calculate_kinship_old(np.copy(G).T,temp_data=None)
         print "Genotype",G.shape, "\n", G
         print "GN2 Kinship method",K2.shape,"\n",K2
         k2 = round(K2[0][0],4)
     
     print "Genotype",G.shape, "\n", G
-    K3 = kinship(np.copy(G),options)
+    K3 = kinship(G.T)
     print "third Kinship method",K3.shape,"\n",K3
     sys.stderr.write(options.geno+"\n")
     k3 = round(K3[0][0],4)
     if options.geno == 'data/small.geno':
-        assert k1==0.7939, "k1=%f" % k1
+        assert k1==0.8, "k1=%f" % k1
         assert k2==0.7939, "k2=%f" % k2
         assert k3==0.7939, "k3=%f" % k3
     if options.geno == 'data/small_na.geno':
-        assert k1==0.7172, "k1=%f" % k1
+        assert k1==0.8333, "k1=%f" % k1
         assert k2==0.7172, "k2=%f" % k2
         assert k3==0.7172, "k3=%f" % k3
     if options.geno == 'data/test8000.geno':