diff options
Diffstat (limited to 'wqflask/wqflask/my_pylmm/pyLMM/runlmm.py')
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 75 |
1 files changed, 54 insertions, 21 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 6bb79856..324c4f2c 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -21,7 +21,7 @@ from optparse import OptionParser import sys import tsvreader import numpy as np -from lmm import gn2_load_redis, calculate_kinship +from lmm import gn2_load_redis, calculate_kinship_old from kinship import kinship, kinship_full import genotype import phenotype @@ -54,9 +54,12 @@ parser.add_option("--geno",dest="geno", parser.add_option("--maf-normalization", action="store_true", dest="maf_normalization", default=False, help="Apply MAF genotype normalization") -parser.add_option("--skip-genotype-normalization", - action="store_true", dest="skip_genotype_normalization", default=False, - help="Skip genotype normalization") +parser.add_option("--genotype-normalization", + action="store_true", dest="genotype_normalization", default=False, + help="Force genotype normalization") +parser.add_option("--remove-missing-phenotypes", + action="store_true", dest="remove_missing_phenotypes", default=False, + help="Remove missing phenotypes") parser.add_option("-q", "--quiet", action="store_false", dest="verbose", default=True, help="don't print status messages to stdout") @@ -99,28 +102,58 @@ if options.geno: g = tsvreader.geno(options.geno) print g.shape -if cmd == 'redis': +if cmd == 'redis_new': + # The main difference between redis_new and redis is that missing + # phenotypes are handled by the first + Y = y + G = g + print "Original G",G.shape, "\n", G + + gt = G.T + G = None + ps, ts = gn2_load_redis('testrun','other',k,Y,gt,new_code=True) + print np.array(ps) + print len(ps),sum(ps) + # Test results + p1 = round(ps[0],4) + p2 = round(ps[-1],4) + sys.stderr.write(options.geno+"\n") + if options.geno == 'data/small.geno': + assert p1==0.0708, "p1=%f" % p1 + assert p2==0.1417, "p2=%f" % p2 + if options.geno == 'data/small_na.geno': + assert p1==0.0897, "p1=%f" % p1 + assert p2==0.0405, "p2=%f" % p2 + if options.geno == 'data/test8000.geno': + # assert p1==0.8984, "p1=%f" % p1 + # assert p2==0.9621, "p2=%f" % p2 + assert round(sum(ps)) == 4070 + assert len(ps) == 8000 +elif cmd == 'redis': # Emulating the redis setup of GN2 G = g print "Original G",G.shape, "\n", G - if y != None: + if y != None and options.remove_missing_phenotypes: gnt = np.array(g).T - Y,g = phenotype.remove_missing(y,g.T,options.verbose) + Y,g,keep = phenotype.remove_missing(y,g.T,options.verbose) G = g.T print "Removed missing phenotypes",G.shape, "\n", G + else: + Y = y if options.maf_normalization: G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g ) print "MAF replacements: \n",G - if not options.skip_genotype_normalization: + if options.genotype_normalization: G = np.apply_along_axis( genotype.normalize, axis=1, arr=G) g = None gnt = None gt = G.T G = None - ps, ts = gn2_load_redis('testrun','other',k,Y,gt) + ps, ts = gn2_load_redis('testrun','other',k,Y,gt, new_code=False) print np.array(ps) - # Test results + print len(ps),sum(ps) + # Test results 4070.02346579 p1 = round(ps[0],4) p2 = round(ps[-1],4) sys.stderr.write(options.geno+"\n") @@ -128,15 +161,15 @@ if cmd == 'redis': assert p1==0.0708, "p1=%f" % p1 assert p2==0.1417, "p2=%f" % p2 if options.geno == 'data/small_na.geno': - assert p1==0.0958, "p1=%f" % p1 - assert p2==0.0435, "p2=%f" % p2 + assert p1==0.0897, "p1=%f" % p1 + assert p2==0.0405, "p2=%f" % p2 if options.geno == 'data/test8000.geno': - assert p1==0.8984, "p1=%f" % p1 - assert p2==0.9623, "p2=%f" % p2 + assert round(sum(ps)) == 4070 + assert len(ps) == 8000 elif cmd == 'kinship': G = g print "Original G",G.shape, "\n", G - if y != None: + if y != None and options.remove_missing_phenotypes: gnt = np.array(g).T Y,g = phenotype.remove_missing(y,g.T,options.verbose) G = g.T @@ -144,32 +177,32 @@ elif cmd == 'kinship': if options.maf_normalization: G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g ) print "MAF replacements: \n",G - if not options.skip_genotype_normalization: + if options.genotype_normalization: G = np.apply_along_axis( genotype.normalize, axis=1, arr=G) g = None gnt = None if options.test_kinship: - K = kinship_full(G) + K = kinship_full(np.copy(G)) print "Genotype",G.shape, "\n", G print "first Kinship method",K.shape,"\n",K k1 = round(K[0][0],4) - K2 = calculate_kinship(np.copy(G.T),temp_data=None) + K2,G = calculate_kinship_old(np.copy(G).T,temp_data=None) print "Genotype",G.shape, "\n", G print "GN2 Kinship method",K2.shape,"\n",K2 k2 = round(K2[0][0],4) print "Genotype",G.shape, "\n", G - K3 = kinship(np.copy(G),options) + K3 = kinship(G.T) print "third Kinship method",K3.shape,"\n",K3 sys.stderr.write(options.geno+"\n") k3 = round(K3[0][0],4) if options.geno == 'data/small.geno': - assert k1==0.7939, "k1=%f" % k1 + assert k1==0.8, "k1=%f" % k1 assert k2==0.7939, "k2=%f" % k2 assert k3==0.7939, "k3=%f" % k3 if options.geno == 'data/small_na.geno': - assert k1==0.7172, "k1=%f" % k1 + assert k1==0.8333, "k1=%f" % k1 assert k2==0.7172, "k2=%f" % k2 assert k3==0.7172, "k3=%f" % k3 if options.geno == 'data/test8000.geno': |