about summary refs log tree commit diff
diff options
context:
space:
mode:
authorPjotr Prins2015-03-13 11:48:58 +0300
committerPjotr Prins2015-03-13 11:48:58 +0300
commit7c7ace427c8dfa81d3448250d9697b4e00418d34 (patch)
treeb3646dd70619ba4be9099406fe0f4c57fec32c43
parent7ec5303a6eca758236103e191f574bb5480a0ba6 (diff)
downloadgenenetwork2-7c7ace427c8dfa81d3448250d9697b4e00418d34.tar.gz
& Refactoring
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/genotype.py36
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/phenotype.py40
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/runlmm.py47
3 files changed, 85 insertions, 38 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py
new file mode 100644
index 00000000..19b0957b
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py
@@ -0,0 +1,36 @@
+# Genotype routines
+
+# Copyright (C) 2013  Nicholas A. Furlotte (nick.furlotte@gmail.com)
+# Copyright (C) 2015  Pjotr Prins (pjotr.prins@thebird.nl)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import numpy as np
+
+def normalizeGenotype(g):
+    """
+    Run for every SNP list (for one individual) and return
+    normalized SNP genotype values with missing data filled in
+    """
+    g1 = np.copy(g)          # avoid side effects
+    x = True - np.isnan(g)   # Matrix of True/False
+    m = g[x].mean()          # Global mean value
+    s = np.sqrt(g[x].var())  # Global stddev
+    g1[np.isnan(g)] = m      # Plug-in mean values for missing data
+    if s == 0:
+        g1 = g1 - m          # Subtract the mean
+    else:
+        g1 = (g1 - m) / s    # Normalize the deviation
+    return g1
+
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
new file mode 100644
index 00000000..c22da761
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
@@ -0,0 +1,40 @@
+# Phenotype routines
+
+# Copyright (C) 2013  Nicholas A. Furlotte (nick.furlotte@gmail.com)
+# Copyright (C) 2015  Pjotr Prins (pjotr.prins@thebird.nl)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import sys
+import numpy as np
+
+def removeMissingPhenotypes(y,g,verbose=False):
+    """
+    Remove missing data from matrices, make sure the genotype data has
+    individuals as rows
+    """
+    assert(y!=None)
+    assert(y.shape[0] == g.shape[0])
+
+    y1 = y
+    g1 = g
+    v = np.isnan(y)
+    keep = True - v
+    if v.sum():
+        if verbose: 
+            sys.stderr.write("runlmm.py: Cleaning the phenotype vector and genotype matrix by removing %d individuals...\n" % (v.sum()))
+        y1 = y[keep]
+        g1 = g[keep,:]
+    return y1,g1
+
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 94533ecb..16e88419 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -23,6 +23,8 @@ import tsvreader
 import numpy as np
 from lmm import gn2_load_redis
 from kinship import kinship
+from genotype import normalizeGenotype
+from phenotype import removeMissingPhenotypes
 
 usage = """
 python runlmm.py [options] command
@@ -87,47 +89,16 @@ if options.pheno:
 if options.geno:
     g = tsvreader.geno(options.geno)
     print g.shape
-    
-def normalizeGenotype(g1):
-    g = np.copy(g1)         # avoid side effects
-    # Run for every SNP list (num individuals)
-    x = True - np.isnan(g)  # Matrix of True/False
-    m = g[x].mean()         # Global mean value
-    # print m
-    s = np.sqrt(g[x].var()) # Global stddev
-    # print s
-    g[np.isnan(g)] = m      # Plug-in mean values for missing
-    if s == 0:
-        g = g - m           # Subtract the mean
-    else:
-        g = (g - m) / s     # Normalize the deviation
-    return g
-
-gn = []
-for snp in g:
-    gn.append( normalizeGenotype(snp) )
-
-print("Before",g)
-
-gT = np.array(gn).T
-print("After",gT)
-G = np.array(gT)
-print "G shape",G.shape
-
-# Remove individuals with missing phenotypes
-if y != None:
-    v = np.isnan(y)
-    keep = True - v
-    if v.sum():
-       if options.verbose: sys.stderr.write("runlmm.py: Cleaning the phenotype vector by removing %d individuals...\n" % (v.sum()))
-       y = y[keep]
-       G = G[keep,:]
-       if k != None: k = k[keep,:][:,keep]
 
 if cmd == 'redis':
     # Emulating the redis setup of GN2
-    print G
-    ps, ts = gn2_load_redis('testrun','other',np.array(k),y,G,options.testing)
+    gn = []
+    for ind_g in g:
+        gn.append( normalizeGenotype(ind_g) )
+    gnt = np.array(gn).T
+    Y,G = removeMissingPhenotypes(y,gnt,options.verbose)
+    print "G",G.shape,G
+    ps, ts = gn2_load_redis('testrun','other',np.array(k),Y,G,options.testing)
     print np.array(ps)
     print round(ps[0],4)
     assert(options.testing and round(ps[0],4)==0.7262)