From 61b8ae229687612b4c9d4a0b089a9ef0ef47ff2c Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Fri, 20 Feb 2015 14:40:55 +0300
Subject: Moved pylmm scripts that are not used by GN2

---
 wqflask/wqflask/my_pylmm/example.py              |  58 ----
 wqflask/wqflask/my_pylmm/pylmmGWAS.py            | 346 -----------------------
 wqflask/wqflask/my_pylmm/pylmmKinship.py         | 186 ------------
 wqflask/wqflask/my_pylmm/run_pylmm.py            |  77 -----
 wqflask/wqflask/my_pylmm/scripts/example.py      |  58 ++++
 wqflask/wqflask/my_pylmm/scripts/pylmmGWAS.py    | 346 +++++++++++++++++++++++
 wqflask/wqflask/my_pylmm/scripts/pylmmKinship.py | 186 ++++++++++++
 wqflask/wqflask/my_pylmm/scripts/run_pylmm.py    |  77 +++++
 8 files changed, 667 insertions(+), 667 deletions(-)
 delete mode 100755 wqflask/wqflask/my_pylmm/example.py
 delete mode 100755 wqflask/wqflask/my_pylmm/pylmmGWAS.py
 delete mode 100755 wqflask/wqflask/my_pylmm/pylmmKinship.py
 delete mode 100755 wqflask/wqflask/my_pylmm/run_pylmm.py
 create mode 100755 wqflask/wqflask/my_pylmm/scripts/example.py
 create mode 100755 wqflask/wqflask/my_pylmm/scripts/pylmmGWAS.py
 create mode 100755 wqflask/wqflask/my_pylmm/scripts/pylmmKinship.py
 create mode 100755 wqflask/wqflask/my_pylmm/scripts/run_pylmm.py

diff --git a/wqflask/wqflask/my_pylmm/example.py b/wqflask/wqflask/my_pylmm/example.py
deleted file mode 100755
index 8b30debd..00000000
--- a/wqflask/wqflask/my_pylmm/example.py
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/usr/bin/python
-
-from __future__ import absolute_import, print_function, division
-
-import sys
-import time
-
-import numpy as np
-from pyLMM import lmm
-
-from pprint import pformat as pf
-
-
-Y = np.genfromtxt('data/mdp.exprs.1.new')
-print("exprs is:", pf(Y.shape))
-
-# Loading npdump and first 1000 snps for speed
-#K = np.load('data/hmdp.liver.K.npdump')
-#snps = np.load('data/hmdp.liver.snps.1000.npdump').T
-
-# These three lines will load all SNPs (from npdump or from txt) and 
-# calculate the kinship
-snps = np.genfromtxt('/home/zas1024/gene/web/new_genotypers/mdp.snps.1000.new').T
-print("snps is:", pf(snps.shape))
-#snps = snps[~np.isnan(snps).all(axis=1)]
-#print ("snps is now:", pf(snps))
-np.savetxt("/home/zas1024/gene/wqflask/wqflask/pylmm/data/mdp.snps.trimmed", snps, fmt='%s', delimiter=' ')
-#snps = np.load('data/hmdp.liver.snps.npdump').T
-K = lmm.calculateKinship(snps)
-#print("K is:", pf(K))
-#print("Y is:", pf(Y.shape))
-
-# Instantiate a LMM object for the phentoype Y and fit the null model
-L = lmm.LMM(Y,K)
-L.fit()
-
-# Manually calculate the association at one SNP
-X = snps[:,0]
-X[np.isnan(X)] = X[True - np.isnan(X)].mean() # Fill missing with MAF
-X = X.reshape(len(X),1)
-if X.var() == 0: ts,ps = (np.nan,np.nan)
-else: ts,ps = L.association(X)
-
-# If I want to refit the variance component
-L.fit(X=X)
-ts,ps = L.association(X)
-
-# If I want to do a genome-wide scan over the 1000 SNPs.
-# This call will use REML (REML = False means use ML).
-# It will also refit the variance components for each SNP.
-# Setting refit = False will cause the program to fit the model once
-# and hold those variance component estimates for each SNP.
-begin = time.time()
-TS,PS = lmm.GWAS(Y,snps,K,REML=True,refit=False)
-print("TS is:", pf(TS))
-print("PS is:", pf(PS))
-end = time.time()
-sys.stderr.write("Total time for 1000 SNPs: %0.3f\n" % (end- begin))
\ No newline at end of file
diff --git a/wqflask/wqflask/my_pylmm/pylmmGWAS.py b/wqflask/wqflask/my_pylmm/pylmmGWAS.py
deleted file mode 100755
index 74b58dd6..00000000
--- a/wqflask/wqflask/my_pylmm/pylmmGWAS.py
+++ /dev/null
@@ -1,346 +0,0 @@
-#!/usr/bin/python
-
-# pylmm is a python-based linear mixed-model solver with applications to GWAS
-
-# Copyright (C) 2013  Nicholas A. Furlotte (nick.furlotte@gmail.com)
-
-#The program is free for academic use. Please contact Nick Furlotte
-#<nick.furlotte@gmail.com> if you are interested in using the software for
-#commercial purposes.
-
-#The software must not be modified and distributed without prior
-#permission of the author.
-
-#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-#"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-#LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-#A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-#CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-#EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-#PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-#PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-#LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-#NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import pdb
-import time
-import sys
-
-def printOutHead(): out.write("\t".join(["SNP_ID","BETA","BETA_SD","F_STAT","P_VALUE"]) + "\n")
-
-def formatResult(id,beta,betaSD,ts,ps):
-   return "\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n"
-   
-def outputResult(id,beta,betaSD,ts,ps):
-   out.write(formatResult(id,beta,betaSD,ts,ps))
-
-from optparse import OptionParser,OptionGroup
-usage = """usage: %prog [options] --kfile kinshipFile --[tfile | bfile] plinkFileBase outfileBase
-
-This program provides basic genome-wide association (GWAS) functionality.  You provide a phenotype and genotype file as well as a pre-computed (use pylmmKinship.py) kinship matrix and the program outputs a result file with information about each SNP, including the association p-value.  
-The input file are all standard plink formatted with the first two columns specifiying the individual and family ID.  For the phenotype file, we accept either NA or -9 to denote missing values.  
-
-Basic usage:
-
-      python pylmmGWAS.py -v --bfile plinkFile --kfile preComputedKinship.kin --phenofile plinkFormattedPhenotypeFile resultFile
-
-	    """
-parser = OptionParser(usage=usage)
-
-basicGroup = OptionGroup(parser, "Basic Options")
-advancedGroup = OptionGroup(parser, "Advanced Options")
-experimentalGroup = OptionGroup(parser, "Experimental Options")
-
-#basicGroup.add_option("--pfile", dest="pfile",
-#                  help="The base for a PLINK ped file")
-basicGroup.add_option("--tfile", dest="tfile",
-                  help="The base for a PLINK tped file")
-basicGroup.add_option("--bfile", dest="bfile",
-                  help="The base for a PLINK binary bed file")
-basicGroup.add_option("--phenofile", dest="phenoFile", default=None,
-                  help="Without this argument the program will look for a file with .pheno that has the plinkFileBase root.  If you want to specify an alternative phenotype file, then use this argument.  This file should be in plink format. ")
-
-# EMMA Options
-basicGroup.add_option("--emmaSNP", dest="emmaFile", default=None,
-                  help="For backwards compatibility with emma, we allow for \"EMMA\" file formats.  This is just a text file with individuals on the columns and snps on the rows.")
-basicGroup.add_option("--emmaPHENO", dest="emmaPheno", default=None,
-                  help="For backwards compatibility with emma, we allow for \"EMMA\" file formats.  This is just a text file with each phenotype as one row.")
-basicGroup.add_option("--emmaCOV", dest="emmaCov", default=None,
-                  help="For backwards compatibility with emma, we allow for \"EMMA\" file formats.  This is just a text file with each covariate as one row.")
-
-basicGroup.add_option("--kfile", dest="kfile",
-                  help="The location of a kinship file.  This is an nxn plain text file and can be computed with the pylmmKinship program.")
-basicGroup.add_option("--covfile", dest="covfile",
-                  help="The location of a covariate file file.  This is a plink formatted covariate file.")
-basicGroup.add_option("-p", type="int", dest="pheno", help="The phenotype index to be used in association.", default=0)
-
-
-
-advancedGroup.add_option("--removeMissingGenotypes",
-                  action="store_false", dest="normalizeGenotype", default=True,
-                  help="By default the program replaces missing genotypes with the minor allele frequency.  This option overrides that behavior making the program remove missing individuals.  NOTE: This can increase running time due to the need to recompute the eigendecomposition for each SNP with missing values.")
-advancedGroup.add_option("--refit",
-                  action="store_true", dest="refit", default=False,
-                  help="Refit the variance components at each SNP (default is to lock in the variance components under the null).")
-
-advancedGroup.add_option("--REML",
-                  action="store_true", dest="REML", default=False,
-                  help="Use restricted maximum-likelihood (REML) (default is maximum-likelihood).")
-#advancedGroup.add_option("-e", "--efile", dest="saveEig", help="Save eigendecomposition to this file.")
-advancedGroup.add_option("--eigen", dest="eigenfile",
-                  help="The location of the precomputed eigendecomposition for the kinship file.  These can be computed with pylmmKinship.py.")
-advancedGroup.add_option("--noMean", dest="noMean", default=False,action="store_true",
-                  help="This option only applies when --cofile is used.  When covfile is provided, the program will automatically add a global mean covariate to the model unless this option is specified.")
-
-basicGroup.add_option("-t", "--nthreads", dest="numThreads", help="maximum number of threads to use")
-
-advancedGroup.add_option("-v", "--verbose",
-                  action="store_true", dest="verbose", default=False,
-                  help="Print extra info")
-
-# Experimental Group Options
-experimentalGroup.add_option("--kfile2", dest="kfile2", 
-                  help="The location of a second kinship file.  This file has the same format as the first kinship.  This might be used if you want to correct for another form of confounding.")
-
-parser.add_option_group(basicGroup)
-parser.add_option_group(advancedGroup)
-parser.add_option_group(experimentalGroup)
-
-(options, args) = parser.parse_args()
-
-import sys
-import os
-import numpy as np
-from scipy import linalg
-from pylmm.lmm import LMM
-from pylmm import input
-
-import multiprocessing as mp # Multiprocessing is part of the Python stdlib
-import Queue 
-
-if len(args) != 1:  
-   parser.print_help()
-   sys.exit()
-
-outFile = args[0]
-
-if not options.tfile and not options.bfile and not options.emmaFile: 
-#if not options.pfile and not options.tfile and not options.bfile: 
-   parser.error("You must provide at least one PLINK input file base (--tfile or --bfile) or an EMMA formatted file (--emmaSNP).")
-if not options.kfile:
-   parser.error("Please provide a pre-computed kinship file")
-
-numThreads = None
-if options.numThreads:
-   numThreads = int(options.numThreads)
-
-# READING PLINK input
-if options.verbose: sys.stderr.write("Reading SNP input...\n")
-if options.bfile: IN = input.plink(options.bfile,type='b', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype)
-elif options.tfile: IN = input.plink(options.tfile,type='t', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype)
-#elif options.pfile: IN = input.plink(options.pfile,type='p', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype)
-elif options.emmaFile: IN = input.plink(options.emmaFile,type='emma', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype)
-else: parser.error("You must provide at least one PLINK input file base")
-
-if not os.path.isfile(options.phenoFile or IN.fbase + '.phenos') and not os.path.isfile(options.emmaPheno):
-   parser.error("No .pheno file exist for %s.  Please provide a phenotype file using the --phenofile or --emmaPHENO argument." % (options.phenoFile or IN.fbase + '.phenos'))
-
-# Read the emma phenotype file if provided.
-# Format should be rows are phenotypes and columns are individuals.
-if options.emmaPheno:
-   f = open(options.emmaPheno,'r')
-   P = []
-   for line in f:
-      v = line.strip().split()
-      p = []
-      for x in v:
-	 try:
-	    p.append(float(x))
-	 except: p.append(np.nan)
-      P.append(p)
-   f.close()
-   IN.phenos = np.array(P).T
-
-# READING Covariate File
-if options.covfile: 
-   if options.verbose: 
-      sys.stderr.write("Reading covariate file...\n")
-   P = IN.getCovariates(options.covfile) 
-   if options.noMean: 
-      X0 = P
-   else: 
-      X0 = np.hstack([np.ones((IN.phenos.shape[0],1)),P])
-elif options.emmaCov:
-   if options.verbose: 
-      sys.stderr.write("Reading covariate file...\n")
-   P = IN.getCovariatesEMMA(options.emmaCov) 
-   if options.noMean: 
-      X0 = P
-   else: 
-      X0 = np.hstack([np.ones((IN.phenos.shape[0],1)),P])
-else: 
-   X0 = np.ones((IN.phenos.shape[0],1))
-
-if np.isnan(X0).sum(): 
-      parser.error("The covariate file %s contains missing values. At this time we are not dealing with this case.  Either remove those individuals with missing values or replace them in some way.")
-
-# READING Kinship - major bottleneck for large datasets
-if options.verbose: sys.stderr.write("Reading kinship...\n")
-begin = time.time()
-# This method seems to be the fastest and works if you already know the size of the matrix
-if options.kfile[-3:] == '.gz':
-   import gzip
-   f = gzip.open(options.kfile,'r')
-   F = f.read() # might exhaust mem if the file is huge
-   K = np.fromstring(F,sep=' ') # Assume that space separated
-   f.close()
-else: 
-   K = np.fromfile(open(options.kfile,'r'),sep=" ")
-K.resize((len(IN.indivs),len(IN.indivs)))
-end = time.time()
-# Other slower ways
-#K = np.loadtxt(options.kfile)
-#K = np.genfromtxt(options.kfile)
-if options.verbose: sys.stderr.write("Read the %d x %d kinship matrix in %0.3fs \n" % (K.shape[0],K.shape[1],end-begin))
-
-if options.kfile2:
-   if options.verbose: sys.stderr.write("Reading second kinship...\n")
-   begin = time.time()
-   # This method seems to be the fastest and works if you already know the size of the matrix
-   if options.kfile2[-3:] == '.gz':
-      import gzip
-      f = gzip.open(options.kfile2,'r')
-      F = f.read() # might exhaust mem if the file is huge
-      K2 = np.fromstring(F,sep=' ') # Assume that space separated
-      f.close()
-   else: K2 = np.fromfile(open(options.kfile2,'r'),sep=" ")
-   K2.resize((len(IN.indivs),len(IN.indivs)))
-   end = time.time()
-   if options.verbose: sys.stderr.write("Read the %d x %d second kinship matrix in %0.3fs \n" % (K2.shape[0],K2.shape[1],end-begin))
-
-# PROCESS the phenotype data -- Remove missing phenotype values
-# Keep will now index into the "full" data to select what we keep (either everything or a subset of non missing data
-Y = IN.phenos[:,options.pheno]
-v = np.isnan(Y)
-keep = True - v
-if v.sum():
-   if options.verbose: sys.stderr.write("Cleaning the phenotype vector by removing %d individuals...\n" % (v.sum()))
-   Y = Y[keep]
-   X0 = X0[keep,:]
-   K = K[keep,:][:,keep]
-   if options.kfile2: K2 = K2[keep,:][:,keep]
-   Kva = []
-   Kve = []
-
-# Only load the decomposition if we did not remove individuals.
-# Otherwise it would not be correct and we would have to compute it again.
-if not v.sum() and options.eigenfile:
-   if options.verbose: sys.stderr.write("Loading pre-computed eigendecomposition...\n")
-   Kva = np.load(options.eigenfile + ".Kva")
-   Kve = np.load(options.eigenfile + ".Kve")
-else: 
-   Kva = []
-   Kve = []
-
-# CREATE LMM object for association
-n = K.shape[0]
-if not options.kfile2:  L = LMM(Y,K,Kva,Kve,X0,verbose=options.verbose)
-else:  L = LMM_withK2(Y,K,Kva,Kve,X0,verbose=options.verbose,K2=K2)
-
-# Fit the null model -- if refit is true we will refit for each SNP, so no reason to run here
-if not options.refit: 
-   if options.verbose: sys.stderr.write("Computing fit for null model\n")
-   L.fit()
-   if options.verbose and not options.kfile2: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (L.optH,L.optSigma))
-   if options.verbose and options.kfile2: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f, w=%0.3f\n" % (L.optH,L.optSigma,L.optW))
-
-def compute_snp(collect):
-   snp = collect[0]
-   id = collect[1]
-   # result = []
-   # Check SNPs for missing values
-   x = snp[keep].reshape((n,1))  # all the SNPs
-   v = np.isnan(x).reshape((-1,))
-   if v.sum():
-      keeps = True - v
-      xs = x[keeps,:]
-      if keeps.sum() <= 1 or xs.var() <= 1e-6: 
-         # PS.append(np.nan)
-         # TS.append(np.nan)
-         # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan))
-         # continue
-         return formatResult(id,np.nan,np.nan,np.nan,np.nan)
-
-      # Its ok to center the genotype -  I used options.normalizeGenotype to 
-      # force the removal of missing genotypes as opposed to replacing them with MAF.
-      if not options.normalizeGenotype:
-         xs = (xs - xs.mean()) / np.sqrt(xs.var())
-      Ys = Y[keeps]
-      X0s = X0[keeps,:]
-      Ks = K[keeps,:][:,keeps]
-      if options.kfile2:
-         K2s = K2[keeps,:][:,keeps]
-         Ls = LMM_withK2(Ys,Ks,X0=X0s,verbose=options.verbose,K2=K2s)
-      else:
-         Ls = LMM(Ys,Ks,X0=X0s,verbose=options.verbose)
-      if options.refit:
-        Ls.fit(X=xs,REML=options.REML)
-      else:
-         #try:
-         Ls.fit(REML=options.REML)
-         #except: pdb.set_trace()
-      ts,ps,beta,betaVar = Ls.association(xs,REML=options.REML,returnBeta=True)
-   else:
-      if x.var() == 0: 
-         # PS.append(np.nan)
-         # TS.append(np.nan)
-         # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan)) # writes nan values
-         return formatResult(id,np.nan,np.nan,np.nan,np.nan)
-         # continue
-
-      if options.refit:
-         L.fit(X=x,REML=options.REML)
-      # This is where it happens
-      ts,ps,beta,betaVar = L.association(x,REML=options.REML,returnBeta=True)
-   
-   return formatResult(id,beta,np.sqrt(betaVar).sum(),ts,ps)
-   # PS.append(ps)
-   # TS.append(ts)
-   # return len(result)
-   # compute.q.put(result)
-   # return None
-
-def f_init(q):
-   compute_snp.q = q
-
-# Set up the pool
-# mp.set_start_method('spawn')
-q = mp.Queue()
-p = mp.Pool(numThreads, f_init, [q])
-collect = []
-   
-# Buffers for pvalues and t-stats
-# PS = []
-# TS = []
-count = 0
-out = open(outFile,'w')
-printOutHead()
-
-for snp_id in IN:
-   count += 1
-   if count % 1000 == 0:
-      if options.verbose:
-         sys.stderr.write("At SNP %d\n" % count)
-      # if count>8000 :
-      #    break         # for testing only
-   if count % 100 == 0:
-      for line in p.imap(compute_snp,collect):
-         out.write(line)
-      collect = []
-      
-   collect.append(snp_id)
-for line in p.imap(compute_snp,collect):
-   out.write(line)
-
-
diff --git a/wqflask/wqflask/my_pylmm/pylmmKinship.py b/wqflask/wqflask/my_pylmm/pylmmKinship.py
deleted file mode 100755
index c9a73ece..00000000
--- a/wqflask/wqflask/my_pylmm/pylmmKinship.py
+++ /dev/null
@@ -1,186 +0,0 @@
-#!/usr/bin/python
-
-# pylmm is a python-based linear mixed-model solver with applications to GWAS
-
-# Copyright (C) 2013  Nicholas A. Furlotte (nick.furlotte@gmail.com)
-
-#The program is free for academic use. Please contact Nick Furlotte
-#<nick.furlotte@gmail.com> if you are interested in using the software for
-#commercial purposes.
-
-#The software must not be modified and distributed without prior
-#permission of the author.
-
-#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-#"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-#LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-#A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-#CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-#EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-#PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-#PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-#LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-#NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import sys
-import pdb
-
-from optparse import OptionParser,OptionGroup
-usage = """usage: %prog [options] --[tfile | bfile] plinkFileBase outfile
-
-"""
-
-parser = OptionParser(usage=usage)
-
-basicGroup = OptionGroup(parser, "Basic Options")
-#advancedGroup = OptionGroup(parser, "Advanced Options")
-
-#basicGroup.add_option("--pfile", dest="pfile",
-#                  help="The base for a PLINK ped file")
-basicGroup.add_option("--tfile", dest="tfile",
-                  help="The base for a PLINK tped file")
-basicGroup.add_option("--bfile", dest="bfile",
-                  help="The base for a PLINK binary ped file")
-basicGroup.add_option("--emmaSNP", dest="emmaFile", default=None,
-                  help="For backwards compatibility with emma, we allow for \"EMMA\" file formats.  This is just a text file with individuals on the rows and snps on the columns.")
-basicGroup.add_option("--emmaNumSNPs", dest="numSNPs", type="int", default=0,
-		     help="When providing the emmaSNP file you need to specify how many snps are in the file")
-
-basicGroup.add_option("-e", "--efile", dest="saveEig", help="Save eigendecomposition to this file.")
-basicGroup.add_option("-n", default=1000,dest="computeSize", type="int", help="The maximum number of SNPs to read into memory at once (default 1000).  This is important when there is a large number of SNPs, because memory could be an issue.")
-basicGroup.add_option("-t", "--nthreads", dest="numThreads", help="maximum number of threads to use")
-
-basicGroup.add_option("-v", "--verbose",
-                  action="store_true", dest="verbose", default=False,
-                  help="Print extra info")
-
-parser.add_option_group(basicGroup)
-#parser.add_option_group(advancedGroup)
-
-(options, args) = parser.parse_args()
-if len(args) != 1: 
-   parser.print_help()
-   sys.exit()
-
-outFile = args[0]
-
-import sys
-import os
-import numpy as np
-from scipy import linalg
-from pylmm.lmm import calculateKinship
-from pylmm import input
-import multiprocessing as mp # Multiprocessing is part of the Python stdlib
-import Queue 
-
-if not options.tfile and not options.bfile and not options.emmaFile: 
-   parser.error("You must provide at least one PLINK input file base (--tfile or --bfile) or an emma formatted file (--emmaSNP).")
-
-numThreads = None
-if options.numThreads:
-   numThreads = int(options.numThreads)
-   
-if options.verbose: sys.stderr.write("Reading PLINK input...\n")
-if options.bfile: IN = input.plink(options.bfile,type='b')
-elif options.tfile: IN = input.plink(options.tfile,type='t')
-#elif options.pfile: IN = input.plink(options.pfile,type='p')
-elif options.emmaFile: 
-   if not options.numSNPs: parser.error("You must provide the number of SNPs when specifying an emma formatted file.")
-   IN = input.plink(options.emmaFile,type='emma')
-else: parser.error("You must provide at least one PLINK input file base (--tfile or --bfile) or an emma formatted file (--emmaSNP).")
-
-def compute_W(job):
-   """
-   Read 1000 SNPs at a time into matrix and return the result
-   """
-   n = len(IN.indivs)
-   m = options.computeSize
-   W = np.ones((n,m)) * np.nan # W matrix has dimensions individuals x SNPs (initially all NaNs)
-   for j in range(0,options.computeSize):
-      row = job*m + j
-      if row >= IN.numSNPs:
-         W = W[:,range(0,j)]
-         break
-      snp,id = IN.next()
-      if snp.var() == 0:
-         continue
-      W[:,j] = snp  # set row to list of SNPs
-   return W
-
-def compute_dgemm(job,W):
-   """
-   Compute Kinship(W)*j
-
-   For every set of SNPs dgemm is used to multiply matrices T(W)*W
-   """
-   res = None
-   try:
-      res = linalg.fblas.dgemm(alpha=1.,a=W.T,b=W.T,trans_a=True,trans_b=False)
-   except AttributeError:
-      res = np.dot(W,W.T) 
-   compute_dgemm.q.put([job,res])
-   return job
-
-def f_init(q):
-    compute_dgemm.q = q
-
-n = len(IN.indivs)
-# m = options.computeSize
-# jobsize=m
-
-IN.getSNPIterator()
-# Annoying hack to get around the fact that it is expensive to determine the number of SNPs in an emma file
-if options.emmaFile: IN.numSNPs = options.numSNPs
-# i = 0
-
-# mp.set_start_method('spawn')
-q = mp.Queue()
-p = mp.Pool(numThreads, f_init, [q])
-iterations = IN.numSNPs/options.computeSize+1
-# jobs = range(0,8) # range(0,iterations)
-
-results = []
-
-K = np.zeros((n,n))  # The Kinship matrix has dimension individuals x individuals
-
-completed = 0
-
-# for job in range(8):
-for job in range(iterations):
-   if options.verbose:
-      sys.stderr.write("Processing job %d first %d SNPs\n" % (job, ((job+1)*options.computeSize)))
-   W = compute_W(job)
-   results.append(p.apply_async(compute_dgemm, (job,W)))   
-   # Do we have a result?
-   try: 
-     j,x = q.get_nowait()
-     if options.verbose: sys.stderr.write("Job "+str(j)+" finished\n")
-     K_j = x
-     # print j,K_j[:,0]
-     K = K + K_j
-     completed += 1
-   except Queue.Empty:
-     pass
-
-for job in range(len(results)-completed):
-   j,x = q.get()
-   if options.verbose: sys.stderr.write("Job "+str(j)+" finished\n")
-   K_j = x
-   # print j,K_j[:,0]
-   K = K + K_j
-        
-K = K / float(IN.numSNPs)
-if options.verbose: sys.stderr.write("Saving Kinship file to %s\n" % outFile)
-np.savetxt(outFile,K)
-
-if options.saveEig:
-   if options.verbose: sys.stderr.write("Obtaining Eigendecomposition\n")
-   Kva,Kve = linalg.eigh(K)
-   if options.verbose: sys.stderr.write("Saving eigendecomposition to %s.[kva | kve]\n" % outFile)
-   np.savetxt(outFile+".kva",Kva)
-   np.savetxt(outFile+".kve",Kve)
-      
-
-
-
diff --git a/wqflask/wqflask/my_pylmm/run_pylmm.py b/wqflask/wqflask/my_pylmm/run_pylmm.py
deleted file mode 100755
index 0c96d986..00000000
--- a/wqflask/wqflask/my_pylmm/run_pylmm.py
+++ /dev/null
@@ -1,77 +0,0 @@
-from __future__ import absolute_import, print_function, division
-
-from base import data_set
-from base.species import TheSpecies
-    
-    def run(dataset_name, vals, temp_uuid):
-        """Generates p-values for each marker"""
-
-        tempdata = temp_data.TempData(temp_uuid)
-        
-        dataset = data_set.create_dataset(dataset_name)
-        species = TheSpecies(dataset=dataset)
-
-        samples = [] # Want only ones with values
-        vals = vals
-
-        for sample in dataset.group.samplelist:
-            samples.append(str(sample))
-            
-        gen_data(dataset, vals, tempdata)
-
-
-    def gen_data(dataset, vals)
-        dataset.group.get_markers()
-
-        pheno_vector = np.array([val == "x" and np.nan or float(val) for val in vals])
-
-        if dataset.group.species == "human":
-            p_values, t_stats = gen_human_results(pheno_vector, tempdata)
-        else:
-            genotype_data = [marker['genotypes'] for marker in dataset.group.markers.markers]
-            
-            no_val_samples = self.identify_empty_samples()
-            trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
-            
-            genotype_matrix = np.array(trimmed_genotype_data).T
-            
-            #print("pheno_vector: ", pf(pheno_vector))
-            #print("genotype_matrix: ", pf(genotype_matrix))
-            #print("genotype_matrix.shape: ", pf(genotype_matrix.shape))
-            
-            t_stats, p_values = lmm.run(
-                pheno_vector,
-                genotype_matrix,
-                restricted_max_likelihood=True,
-                refit=False,
-                temp_data=tempdata
-            )
-            #print("p_values:", p_values)
-        
-        self.dataset.group.markers.add_pvalues(p_values)
-        return self.dataset.group.markers.markers
-
-
-    def gen_human_results(self, pheno_vector, tempdata):
-        file_base = os.path.join(webqtlConfig.PYLMM_PATH, self.dataset.group.name)
-
-        plink_input = input.plink(file_base, type='b')
-        input_file_name = os.path.join(webqtlConfig.SNP_PATH, self.dataset.group.name + ".snps.gz")
-
-        pheno_vector = pheno_vector.reshape((len(pheno_vector), 1))
-        covariate_matrix = np.ones((pheno_vector.shape[0],1))
-        kinship_matrix = np.fromfile(open(file_base + '.kin','r'),sep=" ")
-        kinship_matrix.resize((len(plink_input.indivs),len(plink_input.indivs)))
-
-        p_values, t_stats = lmm.run_human(
-                pheno_vector,
-                covariate_matrix,
-                input_file_name,
-                kinship_matrix,
-                loading_progress=tempdata
-            )
-
-        return p_values, t_stats
-    
-if __name__ == '__main__':
-    run(dataset_name, vals, temp_uuid)
\ No newline at end of file
diff --git a/wqflask/wqflask/my_pylmm/scripts/example.py b/wqflask/wqflask/my_pylmm/scripts/example.py
new file mode 100755
index 00000000..8b30debd
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/scripts/example.py
@@ -0,0 +1,58 @@
+#!/usr/bin/python
+
+from __future__ import absolute_import, print_function, division
+
+import sys
+import time
+
+import numpy as np
+from pyLMM import lmm
+
+from pprint import pformat as pf
+
+
+Y = np.genfromtxt('data/mdp.exprs.1.new')
+print("exprs is:", pf(Y.shape))
+
+# Loading npdump and first 1000 snps for speed
+#K = np.load('data/hmdp.liver.K.npdump')
+#snps = np.load('data/hmdp.liver.snps.1000.npdump').T
+
+# These three lines will load all SNPs (from npdump or from txt) and 
+# calculate the kinship
+snps = np.genfromtxt('/home/zas1024/gene/web/new_genotypers/mdp.snps.1000.new').T
+print("snps is:", pf(snps.shape))
+#snps = snps[~np.isnan(snps).all(axis=1)]
+#print ("snps is now:", pf(snps))
+np.savetxt("/home/zas1024/gene/wqflask/wqflask/pylmm/data/mdp.snps.trimmed", snps, fmt='%s', delimiter=' ')
+#snps = np.load('data/hmdp.liver.snps.npdump').T
+K = lmm.calculateKinship(snps)
+#print("K is:", pf(K))
+#print("Y is:", pf(Y.shape))
+
+# Instantiate a LMM object for the phentoype Y and fit the null model
+L = lmm.LMM(Y,K)
+L.fit()
+
+# Manually calculate the association at one SNP
+X = snps[:,0]
+X[np.isnan(X)] = X[True - np.isnan(X)].mean() # Fill missing with MAF
+X = X.reshape(len(X),1)
+if X.var() == 0: ts,ps = (np.nan,np.nan)
+else: ts,ps = L.association(X)
+
+# If I want to refit the variance component
+L.fit(X=X)
+ts,ps = L.association(X)
+
+# If I want to do a genome-wide scan over the 1000 SNPs.
+# This call will use REML (REML = False means use ML).
+# It will also refit the variance components for each SNP.
+# Setting refit = False will cause the program to fit the model once
+# and hold those variance component estimates for each SNP.
+begin = time.time()
+TS,PS = lmm.GWAS(Y,snps,K,REML=True,refit=False)
+print("TS is:", pf(TS))
+print("PS is:", pf(PS))
+end = time.time()
+sys.stderr.write("Total time for 1000 SNPs: %0.3f\n" % (end- begin))
\ No newline at end of file
diff --git a/wqflask/wqflask/my_pylmm/scripts/pylmmGWAS.py b/wqflask/wqflask/my_pylmm/scripts/pylmmGWAS.py
new file mode 100755
index 00000000..74b58dd6
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/scripts/pylmmGWAS.py
@@ -0,0 +1,346 @@
+#!/usr/bin/python
+
+# pylmm is a python-based linear mixed-model solver with applications to GWAS
+
+# Copyright (C) 2013  Nicholas A. Furlotte (nick.furlotte@gmail.com)
+
+#The program is free for academic use. Please contact Nick Furlotte
+#<nick.furlotte@gmail.com> if you are interested in using the software for
+#commercial purposes.
+
+#The software must not be modified and distributed without prior
+#permission of the author.
+
+#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+#CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+#EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+#PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+#PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+#LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+#NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pdb
+import time
+import sys
+
+def printOutHead(): out.write("\t".join(["SNP_ID","BETA","BETA_SD","F_STAT","P_VALUE"]) + "\n")
+
+def formatResult(id,beta,betaSD,ts,ps):
+   return "\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n"
+   
+def outputResult(id,beta,betaSD,ts,ps):
+   out.write(formatResult(id,beta,betaSD,ts,ps))
+
+from optparse import OptionParser,OptionGroup
+usage = """usage: %prog [options] --kfile kinshipFile --[tfile | bfile] plinkFileBase outfileBase
+
+This program provides basic genome-wide association (GWAS) functionality.  You provide a phenotype and genotype file as well as a pre-computed (use pylmmKinship.py) kinship matrix and the program outputs a result file with information about each SNP, including the association p-value.  
+The input file are all standard plink formatted with the first two columns specifiying the individual and family ID.  For the phenotype file, we accept either NA or -9 to denote missing values.  
+
+Basic usage:
+
+      python pylmmGWAS.py -v --bfile plinkFile --kfile preComputedKinship.kin --phenofile plinkFormattedPhenotypeFile resultFile
+
+	    """
+parser = OptionParser(usage=usage)
+
+basicGroup = OptionGroup(parser, "Basic Options")
+advancedGroup = OptionGroup(parser, "Advanced Options")
+experimentalGroup = OptionGroup(parser, "Experimental Options")
+
+#basicGroup.add_option("--pfile", dest="pfile",
+#                  help="The base for a PLINK ped file")
+basicGroup.add_option("--tfile", dest="tfile",
+                  help="The base for a PLINK tped file")
+basicGroup.add_option("--bfile", dest="bfile",
+                  help="The base for a PLINK binary bed file")
+basicGroup.add_option("--phenofile", dest="phenoFile", default=None,
+                  help="Without this argument the program will look for a file with .pheno that has the plinkFileBase root.  If you want to specify an alternative phenotype file, then use this argument.  This file should be in plink format. ")
+
+# EMMA Options
+basicGroup.add_option("--emmaSNP", dest="emmaFile", default=None,
+                  help="For backwards compatibility with emma, we allow for \"EMMA\" file formats.  This is just a text file with individuals on the columns and snps on the rows.")
+basicGroup.add_option("--emmaPHENO", dest="emmaPheno", default=None,
+                  help="For backwards compatibility with emma, we allow for \"EMMA\" file formats.  This is just a text file with each phenotype as one row.")
+basicGroup.add_option("--emmaCOV", dest="emmaCov", default=None,
+                  help="For backwards compatibility with emma, we allow for \"EMMA\" file formats.  This is just a text file with each covariate as one row.")
+
+basicGroup.add_option("--kfile", dest="kfile",
+                  help="The location of a kinship file.  This is an nxn plain text file and can be computed with the pylmmKinship program.")
+basicGroup.add_option("--covfile", dest="covfile",
+                  help="The location of a covariate file file.  This is a plink formatted covariate file.")
+basicGroup.add_option("-p", type="int", dest="pheno", help="The phenotype index to be used in association.", default=0)
+
+
+
+advancedGroup.add_option("--removeMissingGenotypes",
+                  action="store_false", dest="normalizeGenotype", default=True,
+                  help="By default the program replaces missing genotypes with the minor allele frequency.  This option overrides that behavior making the program remove missing individuals.  NOTE: This can increase running time due to the need to recompute the eigendecomposition for each SNP with missing values.")
+advancedGroup.add_option("--refit",
+                  action="store_true", dest="refit", default=False,
+                  help="Refit the variance components at each SNP (default is to lock in the variance components under the null).")
+
+advancedGroup.add_option("--REML",
+                  action="store_true", dest="REML", default=False,
+                  help="Use restricted maximum-likelihood (REML) (default is maximum-likelihood).")
+#advancedGroup.add_option("-e", "--efile", dest="saveEig", help="Save eigendecomposition to this file.")
+advancedGroup.add_option("--eigen", dest="eigenfile",
+                  help="The location of the precomputed eigendecomposition for the kinship file.  These can be computed with pylmmKinship.py.")
+advancedGroup.add_option("--noMean", dest="noMean", default=False,action="store_true",
+                  help="This option only applies when --cofile is used.  When covfile is provided, the program will automatically add a global mean covariate to the model unless this option is specified.")
+
+basicGroup.add_option("-t", "--nthreads", dest="numThreads", help="maximum number of threads to use")
+
+advancedGroup.add_option("-v", "--verbose",
+                  action="store_true", dest="verbose", default=False,
+                  help="Print extra info")
+
+# Experimental Group Options
+experimentalGroup.add_option("--kfile2", dest="kfile2", 
+                  help="The location of a second kinship file.  This file has the same format as the first kinship.  This might be used if you want to correct for another form of confounding.")
+
+parser.add_option_group(basicGroup)
+parser.add_option_group(advancedGroup)
+parser.add_option_group(experimentalGroup)
+
+(options, args) = parser.parse_args()
+
+import sys
+import os
+import numpy as np
+from scipy import linalg
+from pylmm.lmm import LMM
+from pylmm import input
+
+import multiprocessing as mp # Multiprocessing is part of the Python stdlib
+import Queue 
+
+if len(args) != 1:  
+   parser.print_help()
+   sys.exit()
+
+outFile = args[0]
+
+if not options.tfile and not options.bfile and not options.emmaFile: 
+#if not options.pfile and not options.tfile and not options.bfile: 
+   parser.error("You must provide at least one PLINK input file base (--tfile or --bfile) or an EMMA formatted file (--emmaSNP).")
+if not options.kfile:
+   parser.error("Please provide a pre-computed kinship file")
+
+numThreads = None
+if options.numThreads:
+   numThreads = int(options.numThreads)
+
+# READING PLINK input
+if options.verbose: sys.stderr.write("Reading SNP input...\n")
+if options.bfile: IN = input.plink(options.bfile,type='b', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype)
+elif options.tfile: IN = input.plink(options.tfile,type='t', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype)
+#elif options.pfile: IN = input.plink(options.pfile,type='p', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype)
+elif options.emmaFile: IN = input.plink(options.emmaFile,type='emma', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype)
+else: parser.error("You must provide at least one PLINK input file base")
+
+if not os.path.isfile(options.phenoFile or IN.fbase + '.phenos') and not os.path.isfile(options.emmaPheno):
+   parser.error("No .pheno file exist for %s.  Please provide a phenotype file using the --phenofile or --emmaPHENO argument." % (options.phenoFile or IN.fbase + '.phenos'))
+
+# Read the emma phenotype file if provided.
+# Format should be rows are phenotypes and columns are individuals.
+if options.emmaPheno:
+   f = open(options.emmaPheno,'r')
+   P = []
+   for line in f:
+      v = line.strip().split()
+      p = []
+      for x in v:
+	 try:
+	    p.append(float(x))
+	 except: p.append(np.nan)
+      P.append(p)
+   f.close()
+   IN.phenos = np.array(P).T
+
+# READING Covariate File
+if options.covfile: 
+   if options.verbose: 
+      sys.stderr.write("Reading covariate file...\n")
+   P = IN.getCovariates(options.covfile) 
+   if options.noMean: 
+      X0 = P
+   else: 
+      X0 = np.hstack([np.ones((IN.phenos.shape[0],1)),P])
+elif options.emmaCov:
+   if options.verbose: 
+      sys.stderr.write("Reading covariate file...\n")
+   P = IN.getCovariatesEMMA(options.emmaCov) 
+   if options.noMean: 
+      X0 = P
+   else: 
+      X0 = np.hstack([np.ones((IN.phenos.shape[0],1)),P])
+else: 
+   X0 = np.ones((IN.phenos.shape[0],1))
+
+if np.isnan(X0).sum(): 
+      parser.error("The covariate file %s contains missing values. At this time we are not dealing with this case.  Either remove those individuals with missing values or replace them in some way.")
+
+# READING Kinship - major bottleneck for large datasets
+if options.verbose: sys.stderr.write("Reading kinship...\n")
+begin = time.time()
+# This method seems to be the fastest and works if you already know the size of the matrix
+if options.kfile[-3:] == '.gz':
+   import gzip
+   f = gzip.open(options.kfile,'r')
+   F = f.read() # might exhaust mem if the file is huge
+   K = np.fromstring(F,sep=' ') # Assume that space separated
+   f.close()
+else: 
+   K = np.fromfile(open(options.kfile,'r'),sep=" ")
+K.resize((len(IN.indivs),len(IN.indivs)))
+end = time.time()
+# Other slower ways
+#K = np.loadtxt(options.kfile)
+#K = np.genfromtxt(options.kfile)
+if options.verbose: sys.stderr.write("Read the %d x %d kinship matrix in %0.3fs \n" % (K.shape[0],K.shape[1],end-begin))
+
+if options.kfile2:
+   if options.verbose: sys.stderr.write("Reading second kinship...\n")
+   begin = time.time()
+   # This method seems to be the fastest and works if you already know the size of the matrix
+   if options.kfile2[-3:] == '.gz':
+      import gzip
+      f = gzip.open(options.kfile2,'r')
+      F = f.read() # might exhaust mem if the file is huge
+      K2 = np.fromstring(F,sep=' ') # Assume that space separated
+      f.close()
+   else: K2 = np.fromfile(open(options.kfile2,'r'),sep=" ")
+   K2.resize((len(IN.indivs),len(IN.indivs)))
+   end = time.time()
+   if options.verbose: sys.stderr.write("Read the %d x %d second kinship matrix in %0.3fs \n" % (K2.shape[0],K2.shape[1],end-begin))
+
+# PROCESS the phenotype data -- Remove missing phenotype values
+# Keep will now index into the "full" data to select what we keep (either everything or a subset of non missing data
+Y = IN.phenos[:,options.pheno]
+v = np.isnan(Y)
+keep = True - v
+if v.sum():
+   if options.verbose: sys.stderr.write("Cleaning the phenotype vector by removing %d individuals...\n" % (v.sum()))
+   Y = Y[keep]
+   X0 = X0[keep,:]
+   K = K[keep,:][:,keep]
+   if options.kfile2: K2 = K2[keep,:][:,keep]
+   Kva = []
+   Kve = []
+
+# Only load the decomposition if we did not remove individuals.
+# Otherwise it would not be correct and we would have to compute it again.
+if not v.sum() and options.eigenfile:
+   if options.verbose: sys.stderr.write("Loading pre-computed eigendecomposition...\n")
+   Kva = np.load(options.eigenfile + ".Kva")
+   Kve = np.load(options.eigenfile + ".Kve")
+else: 
+   Kva = []
+   Kve = []
+
+# CREATE LMM object for association
+n = K.shape[0]
+if not options.kfile2:  L = LMM(Y,K,Kva,Kve,X0,verbose=options.verbose)
+else:  L = LMM_withK2(Y,K,Kva,Kve,X0,verbose=options.verbose,K2=K2)
+
+# Fit the null model -- if refit is true we will refit for each SNP, so no reason to run here
+if not options.refit: 
+   if options.verbose: sys.stderr.write("Computing fit for null model\n")
+   L.fit()
+   if options.verbose and not options.kfile2: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (L.optH,L.optSigma))
+   if options.verbose and options.kfile2: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f, w=%0.3f\n" % (L.optH,L.optSigma,L.optW))
+
+def compute_snp(collect):
+   snp = collect[0]
+   id = collect[1]
+   # result = []
+   # Check SNPs for missing values
+   x = snp[keep].reshape((n,1))  # all the SNPs
+   v = np.isnan(x).reshape((-1,))
+   if v.sum():
+      keeps = True - v
+      xs = x[keeps,:]
+      if keeps.sum() <= 1 or xs.var() <= 1e-6: 
+         # PS.append(np.nan)
+         # TS.append(np.nan)
+         # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan))
+         # continue
+         return formatResult(id,np.nan,np.nan,np.nan,np.nan)
+
+      # Its ok to center the genotype -  I used options.normalizeGenotype to 
+      # force the removal of missing genotypes as opposed to replacing them with MAF.
+      if not options.normalizeGenotype:
+         xs = (xs - xs.mean()) / np.sqrt(xs.var())
+      Ys = Y[keeps]
+      X0s = X0[keeps,:]
+      Ks = K[keeps,:][:,keeps]
+      if options.kfile2:
+         K2s = K2[keeps,:][:,keeps]
+         Ls = LMM_withK2(Ys,Ks,X0=X0s,verbose=options.verbose,K2=K2s)
+      else:
+         Ls = LMM(Ys,Ks,X0=X0s,verbose=options.verbose)
+      if options.refit:
+        Ls.fit(X=xs,REML=options.REML)
+      else:
+         #try:
+         Ls.fit(REML=options.REML)
+         #except: pdb.set_trace()
+      ts,ps,beta,betaVar = Ls.association(xs,REML=options.REML,returnBeta=True)
+   else:
+      if x.var() == 0: 
+         # PS.append(np.nan)
+         # TS.append(np.nan)
+         # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan)) # writes nan values
+         return formatResult(id,np.nan,np.nan,np.nan,np.nan)
+         # continue
+
+      if options.refit:
+         L.fit(X=x,REML=options.REML)
+      # This is where it happens
+      ts,ps,beta,betaVar = L.association(x,REML=options.REML,returnBeta=True)
+   
+   return formatResult(id,beta,np.sqrt(betaVar).sum(),ts,ps)
+   # PS.append(ps)
+   # TS.append(ts)
+   # return len(result)
+   # compute.q.put(result)
+   # return None
+
+def f_init(q):
+   compute_snp.q = q
+
+# Set up the pool
+# mp.set_start_method('spawn')
+q = mp.Queue()
+p = mp.Pool(numThreads, f_init, [q])
+collect = []
+   
+# Buffers for pvalues and t-stats
+# PS = []
+# TS = []
+count = 0
+out = open(outFile,'w')
+printOutHead()
+
+for snp_id in IN:
+   count += 1
+   if count % 1000 == 0:
+      if options.verbose:
+         sys.stderr.write("At SNP %d\n" % count)
+      # if count>8000 :
+      #    break         # for testing only
+   if count % 100 == 0:
+      for line in p.imap(compute_snp,collect):
+         out.write(line)
+      collect = []
+      
+   collect.append(snp_id)
+for line in p.imap(compute_snp,collect):
+   out.write(line)
+
+
diff --git a/wqflask/wqflask/my_pylmm/scripts/pylmmKinship.py b/wqflask/wqflask/my_pylmm/scripts/pylmmKinship.py
new file mode 100755
index 00000000..c9a73ece
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/scripts/pylmmKinship.py
@@ -0,0 +1,186 @@
+#!/usr/bin/python
+
+# pylmm is a python-based linear mixed-model solver with applications to GWAS
+
+# Copyright (C) 2013  Nicholas A. Furlotte (nick.furlotte@gmail.com)
+
+#The program is free for academic use. Please contact Nick Furlotte
+#<nick.furlotte@gmail.com> if you are interested in using the software for
+#commercial purposes.
+
+#The software must not be modified and distributed without prior
+#permission of the author.
+
+#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+#CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+#EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+#PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+#PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+#LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+#NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+import pdb
+
+from optparse import OptionParser,OptionGroup
+usage = """usage: %prog [options] --[tfile | bfile] plinkFileBase outfile
+
+"""
+
+parser = OptionParser(usage=usage)
+
+basicGroup = OptionGroup(parser, "Basic Options")
+#advancedGroup = OptionGroup(parser, "Advanced Options")
+
+#basicGroup.add_option("--pfile", dest="pfile",
+#                  help="The base for a PLINK ped file")
+basicGroup.add_option("--tfile", dest="tfile",
+                  help="The base for a PLINK tped file")
+basicGroup.add_option("--bfile", dest="bfile",
+                  help="The base for a PLINK binary ped file")
+basicGroup.add_option("--emmaSNP", dest="emmaFile", default=None,
+                  help="For backwards compatibility with emma, we allow for \"EMMA\" file formats.  This is just a text file with individuals on the rows and snps on the columns.")
+basicGroup.add_option("--emmaNumSNPs", dest="numSNPs", type="int", default=0,
+		     help="When providing the emmaSNP file you need to specify how many snps are in the file")
+
+basicGroup.add_option("-e", "--efile", dest="saveEig", help="Save eigendecomposition to this file.")
+basicGroup.add_option("-n", default=1000,dest="computeSize", type="int", help="The maximum number of SNPs to read into memory at once (default 1000).  This is important when there is a large number of SNPs, because memory could be an issue.")
+basicGroup.add_option("-t", "--nthreads", dest="numThreads", help="maximum number of threads to use")
+
+basicGroup.add_option("-v", "--verbose",
+                  action="store_true", dest="verbose", default=False,
+                  help="Print extra info")
+
+parser.add_option_group(basicGroup)
+#parser.add_option_group(advancedGroup)
+
+(options, args) = parser.parse_args()
+if len(args) != 1: 
+   parser.print_help()
+   sys.exit()
+
+outFile = args[0]
+
+import sys
+import os
+import numpy as np
+from scipy import linalg
+from pylmm.lmm import calculateKinship
+from pylmm import input
+import multiprocessing as mp # Multiprocessing is part of the Python stdlib
+import Queue 
+
+if not options.tfile and not options.bfile and not options.emmaFile: 
+   parser.error("You must provide at least one PLINK input file base (--tfile or --bfile) or an emma formatted file (--emmaSNP).")
+
+numThreads = None
+if options.numThreads:
+   numThreads = int(options.numThreads)
+   
+if options.verbose: sys.stderr.write("Reading PLINK input...\n")
+if options.bfile: IN = input.plink(options.bfile,type='b')
+elif options.tfile: IN = input.plink(options.tfile,type='t')
+#elif options.pfile: IN = input.plink(options.pfile,type='p')
+elif options.emmaFile: 
+   if not options.numSNPs: parser.error("You must provide the number of SNPs when specifying an emma formatted file.")
+   IN = input.plink(options.emmaFile,type='emma')
+else: parser.error("You must provide at least one PLINK input file base (--tfile or --bfile) or an emma formatted file (--emmaSNP).")
+
+def compute_W(job):
+   """
+   Read 1000 SNPs at a time into matrix and return the result
+   """
+   n = len(IN.indivs)
+   m = options.computeSize
+   W = np.ones((n,m)) * np.nan # W matrix has dimensions individuals x SNPs (initially all NaNs)
+   for j in range(0,options.computeSize):
+      row = job*m + j
+      if row >= IN.numSNPs:
+         W = W[:,range(0,j)]
+         break
+      snp,id = IN.next()
+      if snp.var() == 0:
+         continue
+      W[:,j] = snp  # set row to list of SNPs
+   return W
+
+def compute_dgemm(job,W):
+   """
+   Compute Kinship(W)*j
+
+   For every set of SNPs dgemm is used to multiply matrices T(W)*W
+   """
+   res = None
+   try:
+      res = linalg.fblas.dgemm(alpha=1.,a=W.T,b=W.T,trans_a=True,trans_b=False)
+   except AttributeError:
+      res = np.dot(W,W.T) 
+   compute_dgemm.q.put([job,res])
+   return job
+
+def f_init(q):
+    compute_dgemm.q = q
+
+n = len(IN.indivs)
+# m = options.computeSize
+# jobsize=m
+
+IN.getSNPIterator()
+# Annoying hack to get around the fact that it is expensive to determine the number of SNPs in an emma file
+if options.emmaFile: IN.numSNPs = options.numSNPs
+# i = 0
+
+# mp.set_start_method('spawn')
+q = mp.Queue()
+p = mp.Pool(numThreads, f_init, [q])
+iterations = IN.numSNPs/options.computeSize+1
+# jobs = range(0,8) # range(0,iterations)
+
+results = []
+
+K = np.zeros((n,n))  # The Kinship matrix has dimension individuals x individuals
+
+completed = 0
+
+# for job in range(8):
+for job in range(iterations):
+   if options.verbose:
+      sys.stderr.write("Processing job %d first %d SNPs\n" % (job, ((job+1)*options.computeSize)))
+   W = compute_W(job)
+   results.append(p.apply_async(compute_dgemm, (job,W)))   
+   # Do we have a result?
+   try: 
+     j,x = q.get_nowait()
+     if options.verbose: sys.stderr.write("Job "+str(j)+" finished\n")
+     K_j = x
+     # print j,K_j[:,0]
+     K = K + K_j
+     completed += 1
+   except Queue.Empty:
+     pass
+
+for job in range(len(results)-completed):
+   j,x = q.get()
+   if options.verbose: sys.stderr.write("Job "+str(j)+" finished\n")
+   K_j = x
+   # print j,K_j[:,0]
+   K = K + K_j
+        
+K = K / float(IN.numSNPs)
+if options.verbose: sys.stderr.write("Saving Kinship file to %s\n" % outFile)
+np.savetxt(outFile,K)
+
+if options.saveEig:
+   if options.verbose: sys.stderr.write("Obtaining Eigendecomposition\n")
+   Kva,Kve = linalg.eigh(K)
+   if options.verbose: sys.stderr.write("Saving eigendecomposition to %s.[kva | kve]\n" % outFile)
+   np.savetxt(outFile+".kva",Kva)
+   np.savetxt(outFile+".kve",Kve)
+      
+
+
+
diff --git a/wqflask/wqflask/my_pylmm/scripts/run_pylmm.py b/wqflask/wqflask/my_pylmm/scripts/run_pylmm.py
new file mode 100755
index 00000000..0c96d986
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/scripts/run_pylmm.py
@@ -0,0 +1,77 @@
+from __future__ import absolute_import, print_function, division
+
+from base import data_set
+from base.species import TheSpecies
+    
+    def run(dataset_name, vals, temp_uuid):
+        """Generates p-values for each marker"""
+
+        tempdata = temp_data.TempData(temp_uuid)
+        
+        dataset = data_set.create_dataset(dataset_name)
+        species = TheSpecies(dataset=dataset)
+
+        samples = [] # Want only ones with values
+        vals = vals
+
+        for sample in dataset.group.samplelist:
+            samples.append(str(sample))
+            
+        gen_data(dataset, vals, tempdata)
+
+
+    def gen_data(dataset, vals)
+        dataset.group.get_markers()
+
+        pheno_vector = np.array([val == "x" and np.nan or float(val) for val in vals])
+
+        if dataset.group.species == "human":
+            p_values, t_stats = gen_human_results(pheno_vector, tempdata)
+        else:
+            genotype_data = [marker['genotypes'] for marker in dataset.group.markers.markers]
+            
+            no_val_samples = self.identify_empty_samples()
+            trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
+            
+            genotype_matrix = np.array(trimmed_genotype_data).T
+            
+            #print("pheno_vector: ", pf(pheno_vector))
+            #print("genotype_matrix: ", pf(genotype_matrix))
+            #print("genotype_matrix.shape: ", pf(genotype_matrix.shape))
+            
+            t_stats, p_values = lmm.run(
+                pheno_vector,
+                genotype_matrix,
+                restricted_max_likelihood=True,
+                refit=False,
+                temp_data=tempdata
+            )
+            #print("p_values:", p_values)
+        
+        self.dataset.group.markers.add_pvalues(p_values)
+        return self.dataset.group.markers.markers
+
+
+    def gen_human_results(self, pheno_vector, tempdata):
+        file_base = os.path.join(webqtlConfig.PYLMM_PATH, self.dataset.group.name)
+
+        plink_input = input.plink(file_base, type='b')
+        input_file_name = os.path.join(webqtlConfig.SNP_PATH, self.dataset.group.name + ".snps.gz")
+
+        pheno_vector = pheno_vector.reshape((len(pheno_vector), 1))
+        covariate_matrix = np.ones((pheno_vector.shape[0],1))
+        kinship_matrix = np.fromfile(open(file_base + '.kin','r'),sep=" ")
+        kinship_matrix.resize((len(plink_input.indivs),len(plink_input.indivs)))
+
+        p_values, t_stats = lmm.run_human(
+                pheno_vector,
+                covariate_matrix,
+                input_file_name,
+                kinship_matrix,
+                loading_progress=tempdata
+            )
+
+        return p_values, t_stats
+    
+if __name__ == '__main__':
+    run(dataset_name, vals, temp_uuid)
\ No newline at end of file
-- 
cgit v1.2.3