Normalisation

author: Pjotr Prins 2015-03-10 17:18:26 +0300
committer: Pjotr Prins 2015-03-10 17:18:26 +0300
commit: 6c165a4b9ec848eae0baeb4e4e3382bde4e9c1be (patch)
tree: a062e6cfc6d22507821d38f55324cef045b7da1e /wqflask
parent: f116f59923ebe3fc52c27fac8fd571a70e323a68 (diff)
download: genenetwork2-6c165a4b9ec848eae0baeb4e4e3382bde4e9c1be.tar.gz
3 files changed, 34 insertions, 13 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/input.py b/wqflask/wqflask/my_pylmm/pyLMM/input.py
index fdf02e42..f7b556a5 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/input.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/input.py
@@ -133,13 +133,15 @@ class plink:
         return G
     
     def normalizeGenotype(self,G):
+        # print "Before",G
+        # print G.shape
         x = True - np.isnan(G)
         m = G[x].mean()
         s = np.sqrt(G[x].var())
         G[np.isnan(G)] = m
         if s == 0: G = G - m
         else: G = (G - m) / s
-     
+        # print "After",G
         return G
     
     def getPhenos(self,phenoFile=None):
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index c2271611..ab19bf08 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -333,8 +333,8 @@ def calculate_kinship(genotype_matrix, temp_data, is_testing=False):
     """
     n = genotype_matrix.shape[0]
     m = genotype_matrix.shape[1]
-    print("genotype matrix n is:", n)
-    print("genotype matrix m is:", m)
+    print("genotype 2D matrix n (inds) is:", n)
+    print("genotype 2D matrix m (snps) is:", m)
     keep = []
     for counter in range(m):
         if is_testing and counter>8:
@@ -730,6 +730,7 @@ def gn2_redis(key,species):
     print('kinship', np.array(params['kinship_matrix']))
     print('pheno', np.array(params['pheno_vector']))
     print('geno', np.array(params['genotype_matrix']))
+    # sys.exit(1)
     
     if species == "human" :
         ps, ts = run_human(pheno_vector = np.array(params['pheno_vector']),
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 6db1bdbc..907a6835 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -73,15 +73,33 @@ if options.geno:
     print g.shape
 
 def normalizeGenotype(G):
-    x = True - np.isnan(G)
-    m = G[x].mean()
-    s = np.sqrt(G[x].var())
-    G[np.isnan(G)] = m
-    if s == 0: G = G - m
-    else: G = (G - m) / s
+    # Run for every SNP list (num individuals)
+    x = True - np.isnan(G)  # Matrix of True/False
+    m = G[x].mean()         # Global mean value
+    print m
+    s = np.sqrt(G[x].var()) # Global stddev
+    print s
+    G[np.isnan(G)] = m      # Plug-in mean values for missing
+    if s == 0:
+        G = G - m           # Subtract the mean
+    else:
+        G = (G - m) / s     # Normalize the deviation
     return G
 
-gT = normalizeGenotype(g.T)
+# g = g.reshape((1, -1))[0]
+print("Before",g)
+gn = []
+for snp in g:
+    gn.append( normalizeGenotype(snp) )
+
+gn = np.array(gn)
+print("After1",gn)
+gnT = gn.T
+print("After",gnT)
+# G = gnT
+G = gnT
+print "G shape",G.shape
+# assert(G[0,0]==-2.25726341)
 
 # Remove individuals with missing phenotypes
 v = np.isnan(y)
@@ -89,9 +107,9 @@ keep = True - v
 if v.sum():
    if options.verbose: sys.stderr.write("Cleaning the phenotype vector by removing %d individuals...\n" % (v.sum()))
    y  = y[keep]
-   gT = gT[keep,:]
+   G = G[keep,:]
    k = k[keep,:][:,keep]
 
 if cmd == 'redis':
-    ps, ts = gn2_load_redis('testrun','other',k,y,gT)
-    print ps[0:10],ps[-10:]
+    ps, ts = gn2_load_redis('testrun','other',k,y,G)
+    print np.array(ps)
author	Pjotr Prins	2015-03-10 17:18:26 +0300
committer	Pjotr Prins	2015-03-10 17:18:26 +0300
commit	6c165a4b9ec848eae0baeb4e4e3382bde4e9c1be (patch)
tree	a062e6cfc6d22507821d38f55324cef045b7da1e /wqflask
parent	f116f59923ebe3fc52c27fac8fd571a70e323a68 (diff)
download	genenetwork2-6c165a4b9ec848eae0baeb4e4e3382bde4e9c1be.tar.gz