1 files changed, 100 insertions, 43 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 738268be..469ba6c9 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -21,18 +21,22 @@ from optparse import OptionParser
 import sys
 import tsvreader
 import numpy as np
-from lmm import gn2_load_redis
+from lmm import gn2_load_redis, calculate_kinship
+from kinship import kinship, kinship_full
+import genotype
+import phenotype
 
 usage = """
 python runlmm.py [options] command
 
   runlmm.py processing multiplexer reads standardised input formats
-  and calls the different routines
+  and calls the different routines (writes to stdout)
 
   Current commands are:
 
     parse        : only parse input files
     redis        : use Redis to call into GN2
+    kinship      : calculate (new) kinship matrix
 
   try --help for more information
 """
@@ -47,12 +51,28 @@ parser.add_option("--pheno",dest="pheno",
                   help="Phenotype file format 1.0")
 parser.add_option("--geno",dest="geno",
                   help="Genotype file format 1.0")
+parser.add_option("--maf-normalization",
+                  action="store_true", dest="maf_normalization", default=False,
+                  help="Apply MAF genotype normalization")
+parser.add_option("--skip-genotype-normalization",
+                  action="store_true", dest="skip_genotype_normalization", default=False,
+                  help="Skip genotype normalization")
 parser.add_option("-q", "--quiet",
                   action="store_false", dest="verbose", default=True,
                   help="don't print status messages to stdout")
+parser.add_option("--blas", action="store_true", default=False, dest="useBLAS", help="Use BLAS instead of numpy matrix multiplication")
+parser.add_option("-t", "--threads",
+                  type="int", dest="numThreads", 
+                  help="Threads to use")
+parser.add_option("--saveKvaKve",
+                  action="store_true", dest="saveKvaKve", default=False,
+                  help="Testing mode")
 parser.add_option("--test",
                   action="store_true", dest="testing", default=False,
                   help="Testing mode")
+parser.add_option("--test-kinship",
+                  action="store_true", dest="test_kinship", default=False,
+                  help="Testing mode for Kinship calculation")
 
 (options, args) = parser.parse_args()
 
@@ -63,6 +83,10 @@ if len(args) != 1:
 cmd = args[0]
 print "Command: ",cmd
 
+k = None
+y = None
+g = None
+
 if options.kinship:
     k = tsvreader.kinship(options.kinship)
     print k.shape
@@ -75,46 +99,79 @@ if options.geno:
     g = tsvreader.geno(options.geno)
     print g.shape
 
-def normalizeGenotype(G):
-    # Run for every SNP list (num individuals)
-    x = True - np.isnan(G)  # Matrix of True/False
-    m = G[x].mean()         # Global mean value
-    # print m
-    s = np.sqrt(G[x].var()) # Global stddev
-    # print s
-    G[np.isnan(G)] = m      # Plug-in mean values for missing
-    if s == 0:
-        G = G - m           # Subtract the mean
-    else:
-        G = (G - m) / s     # Normalize the deviation
-    return G
-
-# g = g.reshape((1, -1))[0]
-print("Before",g)
-gn = []
-for snp in g:
-    gn.append( normalizeGenotype(snp) )
-
-gn = g
-gn = np.array(gn)
-print("After1",gn)
-gnT = gn.T
-print("After",gnT)
-# G = gnT
-G = gnT
-print "G shape",G.shape
-# sys.exit(1)
-# assert(G[0,0]==-2.25726341)
-
-# Remove individuals with missing phenotypes
-v = np.isnan(y)
-keep = True - v
-if v.sum():
-   if options.verbose: sys.stderr.write("Cleaning the phenotype vector by removing %d individuals...\n" % (v.sum()))
-   y  = y[keep]
-   G = G[keep,:]
-   k = k[keep,:][:,keep]
-
 if cmd == 'redis':
-    ps, ts = gn2_load_redis('testrun','other',np.array(k),y,G,options.testing)
+    # Emulating the redis setup of GN2
+    G = g
+    print "Original G",G.shape, "\n", G
+    if y != None:
+        gnt = np.array(g).T
+        Y,g = phenotype.remove_missing(y,g.T,options.verbose)
+        G = g.T
+        print "Removed missing phenotypes",G.shape, "\n", G
+    if options.maf_normalization:
+        G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g )
+        print "MAF replacements: \n",G
+    if not options.skip_genotype_normalization:
+        G = np.apply_along_axis( genotype.normalize, axis=1, arr=G)
+    g = None
+    gnt = None
+
+    ps, ts = gn2_load_redis('testrun','other',k,Y,G.T)
     print np.array(ps)
+    # Test results
+    p1 = round(ps[0],4)
+    p2 = round(ps[-1],4)
+    sys.stderr.write(options.geno+"\n")
+    if options.geno == 'data/small.geno':
+        assert p1==0.0708, "p1=%f" % p1
+        assert p2==0.1417, "p2=%f" % p2
+    if options.geno == 'data/small_na.geno':
+        assert p1==0.0958, "p1=%f" % p1
+        assert p2==0.0435, "p2=%f" % p2
+    if options.geno == 'data/test8000.geno':
+        assert p1==0.8984, "p1=%f" % p1
+        assert p2==0.9623, "p2=%f" % p2
+elif cmd == 'kinship':
+    G = g
+    print "Original G",G.shape, "\n", G
+    if y != None:
+        gnt = np.array(g).T
+        Y,g = phenotype.remove_missing(y,g.T,options.verbose)
+        G = g.T
+        print "Removed missing phenotypes",G.shape, "\n", G
+    if options.maf_normalization:
+        G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g )
+        print "MAF replacements: \n",G
+    if not options.skip_genotype_normalization:
+        G = np.apply_along_axis( genotype.normalize, axis=1, arr=G)
+    g = None
+    gnt = None
+
+    if options.test_kinship:
+        K = kinship_full(G)
+        print "Genotype",G.shape, "\n", G
+        print "first Kinship method",K.shape,"\n",K
+        k1 = round(K[0][0],4)
+        K2 = calculate_kinship(np.copy(G.T),temp_data=None)
+        print "Genotype",G.shape, "\n", G
+        print "GN2 Kinship method",K2.shape,"\n",K2
+        k2 = round(K2[0][0],4)
+    
+    print "Genotype",G.shape, "\n", G
+    K3 = kinship(np.copy(G),options)
+    print "third Kinship method",K3.shape,"\n",K3
+    sys.stderr.write(options.geno+"\n")
+    k3 = round(K3[0][0],4)
+    if options.geno == 'data/small.geno':
+        assert k1==0.7939, "k1=%f" % k1
+        assert k2==0.7939, "k2=%f" % k2
+        assert k3==0.7939, "k3=%f" % k3
+    if options.geno == 'data/small_na.geno':
+        assert k1==0.7172, "k1=%f" % k1
+        assert k2==0.7172, "k2=%f" % k2
+        assert k3==0.7172, "k3=%f" % k3
+    if options.geno == 'data/test8000.geno':
+        assert k3==1.4352, "k3=%f" % k3
+
+else:
+    print "Doing nothing"