aboutsummaryrefslogtreecommitdiff
path: root/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/wqflask/my_pylmm/pyLMM/runlmm.py')
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/runlmm.py143
1 files changed, 100 insertions, 43 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index 738268be..469ba6c9 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -21,18 +21,22 @@ from optparse import OptionParser
import sys
import tsvreader
import numpy as np
-from lmm import gn2_load_redis
+from lmm import gn2_load_redis, calculate_kinship
+from kinship import kinship, kinship_full
+import genotype
+import phenotype
usage = """
python runlmm.py [options] command
runlmm.py processing multiplexer reads standardised input formats
- and calls the different routines
+ and calls the different routines (writes to stdout)
Current commands are:
parse : only parse input files
redis : use Redis to call into GN2
+ kinship : calculate (new) kinship matrix
try --help for more information
"""
@@ -47,12 +51,28 @@ parser.add_option("--pheno",dest="pheno",
help="Phenotype file format 1.0")
parser.add_option("--geno",dest="geno",
help="Genotype file format 1.0")
+parser.add_option("--maf-normalization",
+ action="store_true", dest="maf_normalization", default=False,
+ help="Apply MAF genotype normalization")
+parser.add_option("--skip-genotype-normalization",
+ action="store_true", dest="skip_genotype_normalization", default=False,
+ help="Skip genotype normalization")
parser.add_option("-q", "--quiet",
action="store_false", dest="verbose", default=True,
help="don't print status messages to stdout")
+parser.add_option("--blas", action="store_true", default=False, dest="useBLAS", help="Use BLAS instead of numpy matrix multiplication")
+parser.add_option("-t", "--threads",
+ type="int", dest="numThreads",
+ help="Threads to use")
+parser.add_option("--saveKvaKve",
+ action="store_true", dest="saveKvaKve", default=False,
+ help="Testing mode")
parser.add_option("--test",
action="store_true", dest="testing", default=False,
help="Testing mode")
+parser.add_option("--test-kinship",
+ action="store_true", dest="test_kinship", default=False,
+ help="Testing mode for Kinship calculation")
(options, args) = parser.parse_args()
@@ -63,6 +83,10 @@ if len(args) != 1:
cmd = args[0]
print "Command: ",cmd
+k = None
+y = None
+g = None
+
if options.kinship:
k = tsvreader.kinship(options.kinship)
print k.shape
@@ -75,46 +99,79 @@ if options.geno:
g = tsvreader.geno(options.geno)
print g.shape
-def normalizeGenotype(G):
- # Run for every SNP list (num individuals)
- x = True - np.isnan(G) # Matrix of True/False
- m = G[x].mean() # Global mean value
- # print m
- s = np.sqrt(G[x].var()) # Global stddev
- # print s
- G[np.isnan(G)] = m # Plug-in mean values for missing
- if s == 0:
- G = G - m # Subtract the mean
- else:
- G = (G - m) / s # Normalize the deviation
- return G
-
-# g = g.reshape((1, -1))[0]
-print("Before",g)
-gn = []
-for snp in g:
- gn.append( normalizeGenotype(snp) )
-
-gn = g
-gn = np.array(gn)
-print("After1",gn)
-gnT = gn.T
-print("After",gnT)
-# G = gnT
-G = gnT
-print "G shape",G.shape
-# sys.exit(1)
-# assert(G[0,0]==-2.25726341)
-
-# Remove individuals with missing phenotypes
-v = np.isnan(y)
-keep = True - v
-if v.sum():
- if options.verbose: sys.stderr.write("Cleaning the phenotype vector by removing %d individuals...\n" % (v.sum()))
- y = y[keep]
- G = G[keep,:]
- k = k[keep,:][:,keep]
-
if cmd == 'redis':
- ps, ts = gn2_load_redis('testrun','other',np.array(k),y,G,options.testing)
+ # Emulating the redis setup of GN2
+ G = g
+ print "Original G",G.shape, "\n", G
+ if y != None:
+ gnt = np.array(g).T
+ Y,g = phenotype.remove_missing(y,g.T,options.verbose)
+ G = g.T
+ print "Removed missing phenotypes",G.shape, "\n", G
+ if options.maf_normalization:
+ G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g )
+ print "MAF replacements: \n",G
+ if not options.skip_genotype_normalization:
+ G = np.apply_along_axis( genotype.normalize, axis=1, arr=G)
+ g = None
+ gnt = None
+
+ ps, ts = gn2_load_redis('testrun','other',k,Y,G.T)
print np.array(ps)
+ # Test results
+ p1 = round(ps[0],4)
+ p2 = round(ps[-1],4)
+ sys.stderr.write(options.geno+"\n")
+ if options.geno == 'data/small.geno':
+ assert p1==0.0708, "p1=%f" % p1
+ assert p2==0.1417, "p2=%f" % p2
+ if options.geno == 'data/small_na.geno':
+ assert p1==0.0958, "p1=%f" % p1
+ assert p2==0.0435, "p2=%f" % p2
+ if options.geno == 'data/test8000.geno':
+ assert p1==0.8984, "p1=%f" % p1
+ assert p2==0.9623, "p2=%f" % p2
+elif cmd == 'kinship':
+ G = g
+ print "Original G",G.shape, "\n", G
+ if y != None:
+ gnt = np.array(g).T
+ Y,g = phenotype.remove_missing(y,g.T,options.verbose)
+ G = g.T
+ print "Removed missing phenotypes",G.shape, "\n", G
+ if options.maf_normalization:
+ G = np.apply_along_axis( genotype.replace_missing_with_MAF, axis=0, arr=g )
+ print "MAF replacements: \n",G
+ if not options.skip_genotype_normalization:
+ G = np.apply_along_axis( genotype.normalize, axis=1, arr=G)
+ g = None
+ gnt = None
+
+ if options.test_kinship:
+ K = kinship_full(G)
+ print "Genotype",G.shape, "\n", G
+ print "first Kinship method",K.shape,"\n",K
+ k1 = round(K[0][0],4)
+ K2 = calculate_kinship(np.copy(G.T),temp_data=None)
+ print "Genotype",G.shape, "\n", G
+ print "GN2 Kinship method",K2.shape,"\n",K2
+ k2 = round(K2[0][0],4)
+
+ print "Genotype",G.shape, "\n", G
+ K3 = kinship(np.copy(G),options)
+ print "third Kinship method",K3.shape,"\n",K3
+ sys.stderr.write(options.geno+"\n")
+ k3 = round(K3[0][0],4)
+ if options.geno == 'data/small.geno':
+ assert k1==0.7939, "k1=%f" % k1
+ assert k2==0.7939, "k2=%f" % k2
+ assert k3==0.7939, "k3=%f" % k3
+ if options.geno == 'data/small_na.geno':
+ assert k1==0.7172, "k1=%f" % k1
+ assert k2==0.7172, "k2=%f" % k2
+ assert k3==0.7172, "k3=%f" % k3
+ if options.geno == 'data/test8000.geno':
+ assert k3==1.4352, "k3=%f" % k3
+
+else:
+ print "Doing nothing"