diff options
Diffstat (limited to 'wqflask/wqflask/my_pylmm/pyLMM/runlmm.py')
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/runlmm.py | 143 |
1 files changed, 100 insertions, 43 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py index 738268be..469ba6c9 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py @@ -21,18 +21,22 @@ from optparse import OptionParser import sys import tsvreader import numpy as np -from lmm import gn2_load_redis +from lmm import gn2_load_redis, calculate_kinship +from kinship import kinship, kinship_full +import genotype +import phenotype usage = """ python runlmm.py [options] command runlmm.py processing multiplexer reads standardised input formats - and calls the different routines + and calls the different routines (writes to stdout) Current commands are: parse : only parse input files redis : use Redis to call into GN2 + kinship : calculate (new) kinship matrix try --help for more information """ @@ -47,12 +51,28 @@ parser.add_option("--pheno",dest="pheno", help="Phenotype file format 1.0") parser.add_option("--geno",dest="geno", help="Genotype file format 1.0") +parser.add_option("--maf-normalization", + action="store_true", dest="maf_normalization", default=False, + help="Apply MAF genotype normalization") +parser.add_option("--skip-genotype-normalization", + action="store_true", dest="skip_genotype_normalization", default=False, + help="Skip genotype normalization") parser.add_option("-q", "--quiet", action="store_false", dest="verbose", default=True, help="don't print status messages to stdout") +parser.add_option("--blas", action="store_true", default=False, dest="useBLAS", help="Use BLAS instead of numpy matrix multiplication") +parser.add_option("-t", "--threads", + type="int", dest="numThreads", + help="Threads to use") +parser.add_option("--saveKvaKve", + action="store_true", dest="saveKvaKve", default=False, + help="Testing mode") parser.add_option("--test", action="store_true", dest="testing", default=False, help="Testing mode") +parser.add_option("--test-kinship", + action="store_true", dest="test_kinship", default=False, + help="Testing mode for Kinship calculation") (options, args) = parser.parse_args() @@ -63,6 +83,10 @@ if len(args) != 1: cmd = args[0] print "Command: ",cmd +k = None +y = None +g = None + if options.kinship: k = tsvreader.kinship(options.kinship) print k.shape @@ -75,46 +99,79 @@ if options.geno: g = tsvreader.geno(options.geno) print g.shape -def normalizeGenotype(G): - # Run for every SNP list (num individuals) - x = True - np.isnan(G) # Matrix of True/False - m = G[x].mean() # Global mean value - # print m - s = np.sqrt(G[x].var()) # Global stddev - # print s - G[np.isnan(G)] = m # Plug-in mean values for missing - if s == 0: - G = G - m # Subtract the mean - else: - G = (G - m) / s # Normalize the deviation - return G - -# g = g.reshape((1, -1))[0] -print("Before",g) -gn = [] -for snp in g: - gn.append( normalizeGenotype(snp) ) - -gn = g -gn = np.array(gn) -print("After1",gn) -gnT = gn.T -print("After",gnT) -# G = gnT -G = gnT -print "G shape",G.shape -# sys.exit(1) -# assert(G[0,0]==-2.25726341) - -# Remove individuals with missing phenotypes -v = np.isnan(y) -keep = True - v -if v.sum(): - if options.verbose: sys.stderr.write("Cleaning the phenotype vector by removing %d individuals...\n" % (v.sum())) - y = y[keep] - G = G[keep,:] - k = k[keep,:][:,keep] - if cmd == 'redis': - ps, ts = gn2_load_redis('testrun','other',np.array(k),y,G,options.testing) + # Emulating the redis setup of GN2 + G = g + print "Original G",G.shape, "\n", G + if y != None: + gnt = np.array(g).T + Y,g = phenotype.remove_missing(y,g.T,options.verbose) + G = g.T + print "Removed missing phenotypes",G.shape, "\n", G + if options.maf_normalization: + G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g ) + print "MAF replacements: \n",G + if not options.skip_genotype_normalization: + G = np.apply_along_axis( genotype.normalize, axis=1, arr=G) + g = None + gnt = None + + ps, ts = gn2_load_redis('testrun','other',k,Y,G.T) print np.array(ps) + # Test results + p1 = round(ps[0],4) + p2 = round(ps[-1],4) + sys.stderr.write(options.geno+"\n") + if options.geno == 'data/small.geno': + assert p1==0.0708, "p1=%f" % p1 + assert p2==0.1417, "p2=%f" % p2 + if options.geno == 'data/small_na.geno': + assert p1==0.0958, "p1=%f" % p1 + assert p2==0.0435, "p2=%f" % p2 + if options.geno == 'data/test8000.geno': + assert p1==0.8984, "p1=%f" % p1 + assert p2==0.9623, "p2=%f" % p2 +elif cmd == 'kinship': + G = g + print "Original G",G.shape, "\n", G + if y != None: + gnt = np.array(g).T + Y,g = phenotype.remove_missing(y,g.T,options.verbose) + G = g.T + print "Removed missing phenotypes",G.shape, "\n", G + if options.maf_normalization: + G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g ) + print "MAF replacements: \n",G + if not options.skip_genotype_normalization: + G = np.apply_along_axis( genotype.normalize, axis=1, arr=G) + g = None + gnt = None + + if options.test_kinship: + K = kinship_full(G) + print "Genotype",G.shape, "\n", G + print "first Kinship method",K.shape,"\n",K + k1 = round(K[0][0],4) + K2 = calculate_kinship(np.copy(G.T),temp_data=None) + print "Genotype",G.shape, "\n", G + print "GN2 Kinship method",K2.shape,"\n",K2 + k2 = round(K2[0][0],4) + + print "Genotype",G.shape, "\n", G + K3 = kinship(np.copy(G),options) + print "third Kinship method",K3.shape,"\n",K3 + sys.stderr.write(options.geno+"\n") + k3 = round(K3[0][0],4) + if options.geno == 'data/small.geno': + assert k1==0.7939, "k1=%f" % k1 + assert k2==0.7939, "k2=%f" % k2 + assert k3==0.7939, "k3=%f" % k3 + if options.geno == 'data/small_na.geno': + assert k1==0.7172, "k1=%f" % k1 + assert k2==0.7172, "k2=%f" % k2 + assert k3==0.7172, "k3=%f" % k3 + if options.geno == 'data/test8000.geno': + assert k3==1.4352, "k3=%f" % k3 + +else: + print "Doing nothing" |