aboutsummaryrefslogtreecommitdiff
path: root/wqflask
diff options
context:
space:
mode:
authorPjotr Prins2015-03-30 13:01:22 +0200
committerPjotr Prins2015-03-30 13:01:22 +0200
commit8b88be4f48baa6cd0cc3c37a851144d5b1dc24af (patch)
treecb35abd3dd57a93aa02b10eaf3cac01c46d41ff7 /wqflask
parent6fc112431c0edb0ecae6cd5fa45716c349094a7f (diff)
downloadgenenetwork2-8b88be4f48baa6cd0cc3c37a851144d5b1dc24af.tar.gz
Refactoring genotype normalization
Diffstat (limited to 'wqflask')
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/genotype.py19
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/lmm.py9
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/runlmm.py2
3 files changed, 17 insertions, 13 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py
index 315fd824..49f32e3a 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py
@@ -37,14 +37,15 @@ def normalize(ind_g):
Run for every SNP list (for one individual) and return
normalized SNP genotype values with missing data filled in
"""
- g1 = np.copy(ind_g) # avoid side effects
- x = True - np.isnan(ind_g) # Matrix of True/False
- m = ind_g[x].mean() # Global mean value
- s = np.sqrt(ind_g[x].var()) # Global stddev
- g1[np.isnan(ind_g)] = m # Plug-in mean values for missing data
- if s == 0:
- g1 = g1 - m # Subtract the mean
+ g = np.copy(ind_g) # copy to avoid side effects
+ missing = np.isnan(g)
+ values = g[True - missing]
+ mean = values.mean() # Global mean value
+ stddev = np.sqrt(values.var()) # Global stddev
+ g[missing] = mean # Plug-in mean values for missing data
+ if stddev == 0:
+ g = g - mean # Subtract the mean
else:
- g1 = (g1 - m) / s # Normalize the deviation
- return g1
+ g = (g - mean) / stddev # Normalize the deviation
+ return g
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index f0473f99..035f31e8 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -414,6 +414,7 @@ def calculate_kinship_old(genotype_matrix, temp_data=None):
info("genotype 2D matrix m (snps) is: %d" % (m))
assert m>n, "n should be larger than m (snps>inds)"
keep = []
+ mprint("G (before old normalize)",genotype_matrix)
for counter in range(m):
#print("type of genotype_matrix[:,counter]:", pf(genotype_matrix[:,counter]))
#Checks if any values in column are not numbers
@@ -435,10 +436,10 @@ def calculate_kinship_old(genotype_matrix, temp_data=None):
progress('kinship_old normalize genotype',counter,m)
genotype_matrix = genotype_matrix[:,keep]
- mprint("After kinship (old) genotype_matrix: ", genotype_matrix)
- # kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m)
- # return kinship_matrix,genotype_matrix
- return kinship_full(genotype_matrix.T,uses),genotype_matrix
+ mprint("G (after old normalize)",genotype_matrix.T)
+ kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m)
+ return kinship_matrix,genotype_matrix
+ # return kinship_full(genotype_matrix.T,uses),genotype_matrix
def GWAS(pheno_vector,
genotype_matrix,
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 88e2a033..fc7a4b9d 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -106,6 +106,8 @@ if options.geno:
if cmd == 'redis_new':
# The main difference between redis_new and redis is that missing
# phenotypes are handled by the first
+ if options.remove_missing_phenotypes:
+ raise Exception('Can not use --remove-missing-phenotypes with LMM2')
Y = y
G = g
print "Original G",G.shape, "\n", G