aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPjotr Prins2015-03-14 11:28:36 +0300
committerPjotr Prins2015-03-14 11:28:36 +0300
commit6e6cae20d29de14ab1d0a5dc8e38ebb9162aa639 (patch)
tree296fe3fb008ef6d1c38ea2429b750575dcc2a6cd
parent30cc25c26263f10aa19548c70a18178f2b3ca59e (diff)
downloadgenenetwork2-6e6cae20d29de14ab1d0a5dc8e38ebb9162aa639.tar.gz
More MAF
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/genotype.py15
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/runlmm.py16
2 files changed, 12 insertions, 19 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py
index e2457f6b..f5d9ee8c 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py
@@ -18,20 +18,19 @@
import numpy as np
from collections import Counter
+import operator
def replace_missing_with_MAF(snp_g):
"""
Replace the missing genotype with the minor allele frequency (MAF)
in the snp row
"""
- g1 = np.copy(snp_g)
- cnt = Counter(g1)
- print cnt
- min_val = min(cnt.itervalues())
- print "min_val=",min_val
- l = [k for k, v in cnt.iteritems() if v == min_val and not np.isnan(k)]
- print "l=",l[0]
- return [l[0] if np.isnan(snp) else snp for snp in g1]
+ cnt = Counter(snp_g)
+ tuples = sorted(cnt.items(), key=operator.itemgetter(1))
+ l2 = [t for t in tuples if not np.isnan(t[0])]
+ maf = l2[0][0]
+ res = np.array([maf if np.isnan(snp) else snp for snp in snp_g])
+ return res
def normalize(ind_g):
"""
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index ffe25fcf..0b8830d4 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -115,18 +115,12 @@ if cmd == 'redis':
assert(options.testing and round(ps[-1],4)==0.3461)
elif cmd == 'kinship':
G = g
- print G.shape, "\n", G
+ print "Original G",G.shape, "\n", G
if options.maf_normalization:
- g1 = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g )
- print "MAF: ",g1
- sys.exit()
- for ind_g in g:
- if len(gn)>=8000: break
- if options.skip_genotype_normalization:
- gn.append(ind_g)
- else:
- gn.append( genotype.normalize(ind_g) )
- G = np.array(gn)
+ G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g )
+ print "MAF replacements: \n",G
+ if not options.skip_genotype_normalization:
+ G = np.apply_along_axis( genotype.normalize, axis=1, arr=G)
print G.shape, "\n", G
K = kinship_full(G,options)