From 2c6d1fcba1138415ecb3ca447e09d06d660af0db Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sat, 14 Mar 2015 12:14:53 +0300 Subject: Working on kinship: GN2 and simple matrix multiplicatino agree --- wqflask/wqflask/my_pylmm/pyLMM/genotype.py | 2 +- wqflask/wqflask/my_pylmm/pyLMM/kinship.py | 4 ++++ wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 4 ++-- wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 23 +++++++++++------------ 4 files changed, 18 insertions(+), 15 deletions(-) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py index f5d9ee8c..315fd824 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/genotype.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/genotype.py @@ -23,7 +23,7 @@ import operator def replace_missing_with_MAF(snp_g): """ Replace the missing genotype with the minor allele frequency (MAF) - in the snp row + in the snp row. It is rather slow! """ cnt = Counter(snp_g) tuples = sorted(cnt.items(), key=operator.itemgetter(1)) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py index 61da68fc..c1750e1d 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/kinship.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/kinship.py @@ -61,6 +61,10 @@ def f_init(q): def kinship_full(G,options): print G.shape + m = G.shape[0] # snps + n = G.shape[1] # inds + sys.stderr.write(str(m)+" SNPs\n") + assert m>n, "n should be larger than m (snps>inds)" m = np.dot(G.T,G) m = m/G.shape[0] return m diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 36c3602f..7bf77be5 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -340,8 +340,8 @@ def calculate_kinship(genotype_matrix, temp_data=None, is_testing=False): print("genotype 2D matrix m (snps) is:", m) keep = [] for counter in range(m): - if is_testing and counter>8: - break + # if is_testing and counter>8: + # break #print("type of genotype_matrix[:,counter]:", pf(genotype_matrix[:,counter])) #Checks if any values in column are not numbers not_number = np.isnan(genotype_matrix[:,counter]) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 0b8830d4..80478368 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -116,28 +116,27 @@ if cmd == 'redis': elif cmd == 'kinship': G = g print "Original G",G.shape, "\n", G + if y: + gnt = np.array(gn).T + Y,g = phenotype.remove_missing(y,g.T,options.verbose) + G = g.T + print "Removed missing phenotypes",G.shape, "\n", G if options.maf_normalization: G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g ) print "MAF replacements: \n",G if not options.skip_genotype_normalization: G = np.apply_along_axis( genotype.normalize, axis=1, arr=G) - print G.shape, "\n", G K = kinship_full(G,options) + print "Genotype",G.shape, "\n", G print "first Kinship method",K.shape,"\n",K - K2 = calculate_kinship(np.copy(G.T),None,options) + K2 = calculate_kinship(np.copy(G.T),temp_data=None,is_testing=options.testing) + print "Genotype",G.shape, "\n", G print "GN2 Kinship method",K2.shape,"\n",K2 - K3 = kinship(G,options) + + print "Genotype",G.shape, "\n", G + K3 = kinship(np.copy(G),options) print "third Kinship method",K3.shape,"\n",K3 - sys.exit(1) - gnt = np.array(gn).T - Y,g = remove_missing_phenotypes(y,gnt,options.verbose) - G = g - print G.shape,G - K = calculate_kinship(np.copy(G),temp_data=None,is_testing=options.testing) - print G.shape,G - print "first Kinship method",K.shape,K - K = kinship(G.T,options) assert(K[0][0]==1.28) else: print "Doing nothing" -- cgit v1.2.3