diff options
author | Pjotr Prins | 2015-03-10 17:18:26 +0300 |
---|---|---|
committer | Pjotr Prins | 2015-03-10 17:18:26 +0300 |
commit | 6c165a4b9ec848eae0baeb4e4e3382bde4e9c1be (patch) | |
tree | a062e6cfc6d22507821d38f55324cef045b7da1e /wqflask | |
parent | f116f59923ebe3fc52c27fac8fd571a70e323a68 (diff) | |
download | genenetwork2-6c165a4b9ec848eae0baeb4e4e3382bde4e9c1be.tar.gz |
Normalisation
Diffstat (limited to 'wqflask')
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/input.py | 4 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 5 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 38 |
3 files changed, 34 insertions, 13 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/input.py b/wqflask/wqflask/my_pylmm/pyLMM/input.py index fdf02e42..f7b556a5 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/input.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/input.py @@ -133,13 +133,15 @@ class plink: return G def normalizeGenotype(self,G): + # print "Before",G + # print G.shape x = True - np.isnan(G) m = G[x].mean() s = np.sqrt(G[x].var()) G[np.isnan(G)] = m if s == 0: G = G - m else: G = (G - m) / s - + # print "After",G return G def getPhenos(self,phenoFile=None): diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index c2271611..ab19bf08 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -333,8 +333,8 @@ def calculate_kinship(genotype_matrix, temp_data, is_testing=False): """ n = genotype_matrix.shape[0] m = genotype_matrix.shape[1] - print("genotype matrix n is:", n) - print("genotype matrix m is:", m) + print("genotype 2D matrix n (inds) is:", n) + print("genotype 2D matrix m (snps) is:", m) keep = [] for counter in range(m): if is_testing and counter>8: @@ -730,6 +730,7 @@ def gn2_redis(key,species): print('kinship', np.array(params['kinship_matrix'])) print('pheno', np.array(params['pheno_vector'])) print('geno', np.array(params['genotype_matrix'])) + # sys.exit(1) if species == "human" : ps, ts = run_human(pheno_vector = np.array(params['pheno_vector']), diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 6db1bdbc..907a6835 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -73,15 +73,33 @@ if options.geno: print g.shape def normalizeGenotype(G): - x = True - np.isnan(G) - m = G[x].mean() - s = np.sqrt(G[x].var()) - G[np.isnan(G)] = m - if s == 0: G = G - m - else: G = (G - m) / s + # Run for every SNP list (num individuals) + x = True - np.isnan(G) # Matrix of True/False + m = G[x].mean() # Global mean value + print m + s = np.sqrt(G[x].var()) # Global stddev + print s + G[np.isnan(G)] = m # Plug-in mean values for missing + if s == 0: + G = G - m # Subtract the mean + else: + G = (G - m) / s # Normalize the deviation return G -gT = normalizeGenotype(g.T) +# g = g.reshape((1, -1))[0] +print("Before",g) +gn = [] +for snp in g: + gn.append( normalizeGenotype(snp) ) + +gn = np.array(gn) +print("After1",gn) +gnT = gn.T +print("After",gnT) +# G = gnT +G = gnT +print "G shape",G.shape +# assert(G[0,0]==-2.25726341) # Remove individuals with missing phenotypes v = np.isnan(y) @@ -89,9 +107,9 @@ keep = True - v if v.sum(): if options.verbose: sys.stderr.write("Cleaning the phenotype vector by removing %d individuals...\n" % (v.sum())) y = y[keep] - gT = gT[keep,:] + G = G[keep,:] k = k[keep,:][:,keep] if cmd == 'redis': - ps, ts = gn2_load_redis('testrun','other',k,y,gT) - print ps[0:10],ps[-10:] + ps, ts = gn2_load_redis('testrun','other',k,y,G) + print np.array(ps) |