aboutsummaryrefslogtreecommitdiff
path: root/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/wqflask/my_pylmm/pyLMM/runlmm.py')
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/runlmm.py92
1 files changed, 68 insertions, 24 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
index ff53b4ea..738268be 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/runlmm.py
@@ -19,32 +19,40 @@
from optparse import OptionParser
import sys
-import os
+import tsvreader
import numpy as np
-# from lmm import LMM, run_other
-import csv
-
+from lmm import gn2_load_redis
usage = """
python runlmm.py [options] command
- runlmm.py processing multiplexer reads standard input types and calls the routines
+ runlmm.py processing multiplexer reads standardised input formats
+ and calls the different routines
Current commands are:
parse : only parse input files
+ redis : use Redis to call into GN2
try --help for more information
"""
+
parser = OptionParser(usage=usage)
# parser.add_option("-f", "--file", dest="input file",
# help="In", metavar="FILE")
parser.add_option("--kinship",dest="kinship",
- help="Kinship file format")
+ help="Kinship file format 1.0")
+parser.add_option("--pheno",dest="pheno",
+ help="Phenotype file format 1.0")
+parser.add_option("--geno",dest="geno",
+ help="Genotype file format 1.0")
parser.add_option("-q", "--quiet",
action="store_false", dest="verbose", default=True,
help="don't print status messages to stdout")
+parser.add_option("--test",
+ action="store_true", dest="testing", default=False,
+ help="Testing mode")
(options, args) = parser.parse_args()
@@ -55,22 +63,58 @@ if len(args) != 1:
cmd = args[0]
print "Command: ",cmd
-
if options.kinship:
- K1 = []
- print options.kinship
- with open(options.kinship,'r') as tsvin:
- if tsvin.readline().strip() != "# Kinship format version 1.0":
- raise Exception("Expecting Kinship format version 1.0")
- tsvin.readline()
- tsvin.readline()
- tsv = csv.reader(tsvin, delimiter='\t')
- for row in tsv:
- ns = np.genfromtxt(row[1:])
- K1.append(ns) # <--- slow
- K = np.array(K1)
-
-if cmd == 'parse':
- pass
-else:
- raise Exception("Unknown command")
+ k = tsvreader.kinship(options.kinship)
+ print k.shape
+
+if options.pheno:
+ y = tsvreader.pheno(options.pheno)
+ print y.shape
+
+if options.geno:
+ g = tsvreader.geno(options.geno)
+ print g.shape
+
+def normalizeGenotype(G):
+ # Run for every SNP list (num individuals)
+ x = True - np.isnan(G) # Matrix of True/False
+ m = G[x].mean() # Global mean value
+ # print m
+ s = np.sqrt(G[x].var()) # Global stddev
+ # print s
+ G[np.isnan(G)] = m # Plug-in mean values for missing
+ if s == 0:
+ G = G - m # Subtract the mean
+ else:
+ G = (G - m) / s # Normalize the deviation
+ return G
+
+# g = g.reshape((1, -1))[0]
+print("Before",g)
+gn = []
+for snp in g:
+ gn.append( normalizeGenotype(snp) )
+
+gn = g
+gn = np.array(gn)
+print("After1",gn)
+gnT = gn.T
+print("After",gnT)
+# G = gnT
+G = gnT
+print "G shape",G.shape
+# sys.exit(1)
+# assert(G[0,0]==-2.25726341)
+
+# Remove individuals with missing phenotypes
+v = np.isnan(y)
+keep = True - v
+if v.sum():
+ if options.verbose: sys.stderr.write("Cleaning the phenotype vector by removing %d individuals...\n" % (v.sum()))
+ y = y[keep]
+ G = G[keep,:]
+ k = k[keep,:][:,keep]
+
+if cmd == 'redis':
+ ps, ts = gn2_load_redis('testrun','other',np.array(k),y,G,options.testing)
+ print np.array(ps)