From 6e6cae20d29de14ab1d0a5dc8e38ebb9162aa639 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sat, 14 Mar 2015 11:28:36 +0300 Subject: More MAF --- wqflask/wqflask/my_pylmm/pyLMM/genotype.py | 15 +++++++-------- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 16 +++++----------- 2 files changed, 12 insertions(+), 19 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py index e2457f6b..f5d9ee8c 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py @@ -18,20 +18,19 @@ import numpy as np from collections import Counter +import operator def replace_missing_with_MAF(snp_g): """ Replace the missing genotype with the minor allele frequency (MAF) in the snp row """ - g1 = np.copy(snp_g) - cnt = Counter(g1) - print cnt - min_val = min(cnt.itervalues()) - print "min_val=",min_val - l = [k for k, v in cnt.iteritems() if v == min_val and not np.isnan(k)] - print "l=",l[0] - return [l[0] if np.isnan(snp) else snp for snp in g1] + cnt = Counter(snp_g) + tuples = sorted(cnt.items(), key=operator.itemgetter(1)) + l2 = [t for t in tuples if not np.isnan(t[0])] + maf = l2[0][0] + res = np.array([maf if np.isnan(snp) else snp for snp in snp_g]) + return res def normalize(ind_g): """ diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index ffe25fcf..0b8830d4 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -115,18 +115,12 @@ if cmd == 'redis': assert(options.testing and round(ps[-1],4)==0.3461) elif cmd == 'kinship': G = g - print G.shape, "\n", G + print "Original G",G.shape, "\n", G if options.maf_normalization: - g1 = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g ) - print "MAF: ",g1 - sys.exit() - for ind_g in g: - if len(gn)>=8000: break - if options.skip_genotype_normalization: - gn.append(ind_g) - else: - gn.append( genotype.normalize(ind_g) ) - G = np.array(gn) + G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g ) + print "MAF replacements: \n",G + if not options.skip_genotype_normalization: + G = np.apply_along_axis( genotype.normalize, axis=1, arr=G) print G.shape, "\n", G K = kinship_full(G,options) -- cgit v1.2.3