aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/genotype.py36
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/phenotype.py40
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/runlmm.py47
3 files changed, 85 insertions, 38 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py
new file mode 100644
index 00000000..19b0957b
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py
@@ -0,0 +1,36 @@
+# Genotype routines
+
+# Copyright (C) 2013 Nicholas A. Furlotte (nick.furlotte@gmail.com)
+# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import numpy as np
+
+def normalizeGenotype(g):
+ """
+ Run for every SNP list (for one individual) and return
+ normalized SNP genotype values with missing data filled in
+ """
+ g1 = np.copy(g) # avoid side effects
+ x = True - np.isnan(g) # Matrix of True/False
+ m = g[x].mean() # Global mean value
+ s = np.sqrt(g[x].var()) # Global stddev
+ g1[np.isnan(g)] = m # Plug-in mean values for missing data
+ if s == 0:
+ g1 = g1 - m # Subtract the mean
+ else:
+ g1 = (g1 - m) / s # Normalize the deviation
+ return g1
+
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
new file mode 100644
index 00000000..c22da761
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/phenotype.py
@@ -0,0 +1,40 @@
+# Phenotype routines
+
+# Copyright (C) 2013 Nicholas A. Furlotte (nick.furlotte@gmail.com)
+# Copyright (C) 2015 Pjotr Prins (pjotr.prins@thebird.nl)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import sys
+import numpy as np
+
+def removeMissingPhenotypes(y,g,verbose=False):
+ """
+ Remove missing data from matrices, make sure the genotype data has
+ individuals as rows
+ """
+ assert(y!=None)
+ assert(y.shape[0] == g.shape[0])
+
+ y1 = y
+ g1 = g
+ v = np.isnan(y)
+ keep = True - v
+ if v.sum():
+ if verbose:
+ sys.stderr.write("runlmm.py: Cleaning the phenotype vector and genotype matrix by removing %d individuals...\n" % (v.sum()))
+ y1 = y[keep]
+ g1 = g[keep,:]
+ return y1,g1
+
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 94533ecb..16e88419 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -23,6 +23,8 @@ import tsvreader
import numpy as np
from lmm import gn2_load_redis
from kinship import kinship
+from genotype import normalizeGenotype
+from phenotype import removeMissingPhenotypes
usage = """
python runlmm.py [options] command
@@ -87,47 +89,16 @@ if options.pheno:
if options.geno:
g = tsvreader.geno(options.geno)
print g.shape
-
-def normalizeGenotype(g1):
- g = np.copy(g1) # avoid side effects
- # Run for every SNP list (num individuals)
- x = True - np.isnan(g) # Matrix of True/False
- m = g[x].mean() # Global mean value
- # print m
- s = np.sqrt(g[x].var()) # Global stddev
- # print s
- g[np.isnan(g)] = m # Plug-in mean values for missing
- if s == 0:
- g = g - m # Subtract the mean
- else:
- g = (g - m) / s # Normalize the deviation
- return g
-
-gn = []
-for snp in g:
- gn.append( normalizeGenotype(snp) )
-
-print("Before",g)
-
-gT = np.array(gn).T
-print("After",gT)
-G = np.array(gT)
-print "G shape",G.shape
-
-# Remove individuals with missing phenotypes
-if y != None:
- v = np.isnan(y)
- keep = True - v
- if v.sum():
- if options.verbose: sys.stderr.write("runlmm.py: Cleaning the phenotype vector by removing %d individuals...\n" % (v.sum()))
- y = y[keep]
- G = G[keep,:]
- if k != None: k = k[keep,:][:,keep]
if cmd == 'redis':
# Emulating the redis setup of GN2
- print G
- ps, ts = gn2_load_redis('testrun','other',np.array(k),y,G,options.testing)
+ gn = []
+ for ind_g in g:
+ gn.append( normalizeGenotype(ind_g) )
+ gnt = np.array(gn).T
+ Y,G = removeMissingPhenotypes(y,gnt,options.verbose)
+ print "G",G.shape,G
+ ps, ts = gn2_load_redis('testrun','other',np.array(k),Y,G,options.testing)
print np.array(ps)
print round(ps[0],4)
assert(options.testing and round(ps[0],4)==0.7262)