aboutsummaryrefslogtreecommitdiff
path: root/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/wqflask/my_pylmm/pyLMM/runlmm.py')
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/runlmm.py75
1 files changed, 54 insertions, 21 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 6bb79856..324c4f2c 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -21,7 +21,7 @@ from optparse import OptionParser
import sys
import tsvreader
import numpy as np
-from lmm import gn2_load_redis, calculate_kinship
+from lmm import gn2_load_redis, calculate_kinship_old
from kinship import kinship, kinship_full
import genotype
import phenotype
@@ -54,9 +54,12 @@ parser.add_option("--geno",dest="geno",
parser.add_option("--maf-normalization",
action="store_true", dest="maf_normalization", default=False,
help="Apply MAF genotype normalization")
-parser.add_option("--skip-genotype-normalization",
- action="store_true", dest="skip_genotype_normalization", default=False,
- help="Skip genotype normalization")
+parser.add_option("--genotype-normalization",
+ action="store_true", dest="genotype_normalization", default=False,
+ help="Force genotype normalization")
+parser.add_option("--remove-missing-phenotypes",
+ action="store_true", dest="remove_missing_phenotypes", default=False,
+ help="Remove missing phenotypes")
parser.add_option("-q", "--quiet",
action="store_false", dest="verbose", default=True,
help="don't print status messages to stdout")
@@ -99,28 +102,58 @@ if options.geno:
g = tsvreader.geno(options.geno)
print g.shape
-if cmd == 'redis':
+if cmd == 'redis_new':
+ # The main difference between redis_new and redis is that missing
+ # phenotypes are handled by the first
+ Y = y
+ G = g
+ print "Original G",G.shape, "\n", G
+
+ gt = G.T
+ G = None
+ ps, ts = gn2_load_redis('testrun','other',k,Y,gt,new_code=True)
+ print np.array(ps)
+ print len(ps),sum(ps)
+ # Test results
+ p1 = round(ps[0],4)
+ p2 = round(ps[-1],4)
+ sys.stderr.write(options.geno+"\n")
+ if options.geno == 'data/small.geno':
+ assert p1==0.0708, "p1=%f" % p1
+ assert p2==0.1417, "p2=%f" % p2
+ if options.geno == 'data/small_na.geno':
+ assert p1==0.0897, "p1=%f" % p1
+ assert p2==0.0405, "p2=%f" % p2
+ if options.geno == 'data/test8000.geno':
+ # assert p1==0.8984, "p1=%f" % p1
+ # assert p2==0.9621, "p2=%f" % p2
+ assert round(sum(ps)) == 4070
+ assert len(ps) == 8000
+elif cmd == 'redis':
# Emulating the redis setup of GN2
G = g
print "Original G",G.shape, "\n", G
- if y != None:
+ if y != None and options.remove_missing_phenotypes:
gnt = np.array(g).T
- Y,g = phenotype.remove_missing(y,g.T,options.verbose)
+ Y,g,keep = phenotype.remove_missing(y,g.T,options.verbose)
G = g.T
print "Removed missing phenotypes",G.shape, "\n", G
+ else:
+ Y = y
if options.maf_normalization:
G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g )
print "MAF replacements: \n",G
- if not options.skip_genotype_normalization:
+ if options.genotype_normalization:
G = np.apply_along_axis( genotype.normalize, axis=1, arr=G)
g = None
gnt = None
gt = G.T
G = None
- ps, ts = gn2_load_redis('testrun','other',k,Y,gt)
+ ps, ts = gn2_load_redis('testrun','other',k,Y,gt, new_code=False)
print np.array(ps)
- # Test results
+ print len(ps),sum(ps)
+ # Test results 4070.02346579
p1 = round(ps[0],4)
p2 = round(ps[-1],4)
sys.stderr.write(options.geno+"\n")
@@ -128,15 +161,15 @@ if cmd == 'redis':
assert p1==0.0708, "p1=%f" % p1
assert p2==0.1417, "p2=%f" % p2
if options.geno == 'data/small_na.geno':
- assert p1==0.0958, "p1=%f" % p1
- assert p2==0.0435, "p2=%f" % p2
+ assert p1==0.0897, "p1=%f" % p1
+ assert p2==0.0405, "p2=%f" % p2
if options.geno == 'data/test8000.geno':
- assert p1==0.8984, "p1=%f" % p1
- assert p2==0.9623, "p2=%f" % p2
+ assert round(sum(ps)) == 4070
+ assert len(ps) == 8000
elif cmd == 'kinship':
G = g
print "Original G",G.shape, "\n", G
- if y != None:
+ if y != None and options.remove_missing_phenotypes:
gnt = np.array(g).T
Y,g = phenotype.remove_missing(y,g.T,options.verbose)
G = g.T
@@ -144,32 +177,32 @@ elif cmd == 'kinship':
if options.maf_normalization:
G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g )
print "MAF replacements: \n",G
- if not options.skip_genotype_normalization:
+ if options.genotype_normalization:
G = np.apply_along_axis( genotype.normalize, axis=1, arr=G)
g = None
gnt = None
if options.test_kinship:
- K = kinship_full(G)
+ K = kinship_full(np.copy(G))
print "Genotype",G.shape, "\n", G
print "first Kinship method",K.shape,"\n",K
k1 = round(K[0][0],4)
- K2 = calculate_kinship(np.copy(G.T),temp_data=None)
+ K2,G = calculate_kinship_old(np.copy(G).T,temp_data=None)
print "Genotype",G.shape, "\n", G
print "GN2 Kinship method",K2.shape,"\n",K2
k2 = round(K2[0][0],4)
print "Genotype",G.shape, "\n", G
- K3 = kinship(np.copy(G),options)
+ K3 = kinship(G.T)
print "third Kinship method",K3.shape,"\n",K3
sys.stderr.write(options.geno+"\n")
k3 = round(K3[0][0],4)
if options.geno == 'data/small.geno':
- assert k1==0.7939, "k1=%f" % k1
+ assert k1==0.8, "k1=%f" % k1
assert k2==0.7939, "k2=%f" % k2
assert k3==0.7939, "k3=%f" % k3
if options.geno == 'data/small_na.geno':
- assert k1==0.7172, "k1=%f" % k1
+ assert k1==0.8333, "k1=%f" % k1
assert k2==0.7172, "k2=%f" % k2
assert k3==0.7172, "k3=%f" % k3
if options.geno == 'data/test8000.geno':