9 files changed, 432 insertions, 215 deletions
diff --git a/wqflask/wqflask/do_search.py b/wqflask/wqflask/do_search.py
index fd7bd916..29f3de98 100755
--- a/wqflask/wqflask/do_search.py
+++ b/wqflask/wqflask/do_search.py
@@ -370,13 +370,24 @@ class WikiSearch(MrnaAssaySearch):
 
     DoSearch.search_types['WIKI'] =  "WikiSearch"
 
-    def run(self):
+    def get_where_clause(self):
         where_clause = """%s.symbol = GeneRIF.symbol
             and GeneRIF.versionId=0 and GeneRIF.display>0
             and (GeneRIF.comment REGEXP '%s' or GeneRIF.initial = '%s')
                 """ % (self.dataset.type,
                        "[[:<:]]"+str(self.search_term[0])+"[[:>:]]",
-                       str(self.search_term[0]))
+                       str(self.search_term[0])) 
+        return where_clause
+
+    def run(self):
+        #where_clause = """%s.symbol = GeneRIF.symbol
+        #    and GeneRIF.versionId=0 and GeneRIF.display>0
+        #    and (GeneRIF.comment REGEXP '%s' or GeneRIF.initial = '%s')
+        #        """ % (self.dataset.type,
+        #               "[[:<:]]"+str(self.search_term[0])+"[[:>:]]",
+        #               str(self.search_term[0]))
+
+        where_clause = self.get_where_clause()
 
         from_clause = ", GeneRIF "
         query = self.compile_final_query(from_clause, where_clause)
diff --git a/wqflask/wqflask/my_pylmm/pylmmGWAS.py b/wqflask/wqflask/my_pylmm/pylmmGWAS.py
index 54a230de..74b58dd6 100755
--- a/wqflask/wqflask/my_pylmm/pylmmGWAS.py
+++ b/wqflask/wqflask/my_pylmm/pylmmGWAS.py
@@ -3,60 +3,80 @@
 # pylmm is a python-based linear mixed-model solver with applications to GWAS
 
 # Copyright (C) 2013  Nicholas A. Furlotte (nick.furlotte@gmail.com)
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
 
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Affero General Public License for more details.
-
-# You should have received a copy of the GNU Affero General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#The program is free for academic use. Please contact Nick Furlotte
+#<nick.furlotte@gmail.com> if you are interested in using the software for
+#commercial purposes.
+
+#The software must not be modified and distributed without prior
+#permission of the author.
+
+#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+#CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+#EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+#PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+#PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+#LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+#NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import pdb
 import time
+import sys
+
+def printOutHead(): out.write("\t".join(["SNP_ID","BETA","BETA_SD","F_STAT","P_VALUE"]) + "\n")
 
-def printOutHead():
-    out.write("\t".join(["SNP_ID","BETA","BETA_SD","F_STAT","P_VALUE"]) + "\n")
+def formatResult(id,beta,betaSD,ts,ps):
+   return "\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n"
+   
 def outputResult(id,beta,betaSD,ts,ps):
-    out.write("\t".join([str(x) for x in [id,beta,betaSD,ts,ps]]) + "\n")
+   out.write(formatResult(id,beta,betaSD,ts,ps))
 
 from optparse import OptionParser,OptionGroup
-usage = """usage: %prog [options] --kfile kinshipFile --[t | b | p]file plinkFileBase outfileBase
+usage = """usage: %prog [options] --kfile kinshipFile --[tfile | bfile] plinkFileBase outfileBase
 
-This program provides basic genome-wide association (GWAS) functionality.  You provide a phenotype and genotype file as well as a pre-computed (use pylmmKinship.py) kinship matrix and the program outputs a result file with information about each SNP, including the association p-value.
-The input file are all standard plink formatted with the first two columns specifiying the individual and family ID.  For the phenotype file, we accept either NA or -9 to denote missing values.
+This program provides basic genome-wide association (GWAS) functionality.  You provide a phenotype and genotype file as well as a pre-computed (use pylmmKinship.py) kinship matrix and the program outputs a result file with information about each SNP, including the association p-value.  
+The input file are all standard plink formatted with the first two columns specifiying the individual and family ID.  For the phenotype file, we accept either NA or -9 to denote missing values.  
 
 Basic usage:
 
       python pylmmGWAS.py -v --bfile plinkFile --kfile preComputedKinship.kin --phenofile plinkFormattedPhenotypeFile resultFile
 
-NOTE: The current running version only supports binary PED files (PLINK).  It is simple to convert between ped or tped and bed using PLINK.  Sorry for the inconvinience.
-
-            """
+	    """
 parser = OptionParser(usage=usage)
 
 basicGroup = OptionGroup(parser, "Basic Options")
 advancedGroup = OptionGroup(parser, "Advanced Options")
+experimentalGroup = OptionGroup(parser, "Experimental Options")
 
-basicGroup.add_option("--pfile", dest="pfile",
-                  help="The base for a PLINK ped file")
+#basicGroup.add_option("--pfile", dest="pfile",
+#                  help="The base for a PLINK ped file")
 basicGroup.add_option("--tfile", dest="tfile",
                   help="The base for a PLINK tped file")
 basicGroup.add_option("--bfile", dest="bfile",
                   help="The base for a PLINK binary bed file")
 basicGroup.add_option("--phenofile", dest="phenoFile", default=None,
-                  help="Without this argument the program will look for a file with .pheno that has the plinkFileBase root.  If you want to specify an alternative phenotype file, then use this argument.  The order does not matter for this file. ")
+                  help="Without this argument the program will look for a file with .pheno that has the plinkFileBase root.  If you want to specify an alternative phenotype file, then use this argument.  This file should be in plink format. ")
+
+# EMMA Options
+basicGroup.add_option("--emmaSNP", dest="emmaFile", default=None,
+                  help="For backwards compatibility with emma, we allow for \"EMMA\" file formats.  This is just a text file with individuals on the columns and snps on the rows.")
+basicGroup.add_option("--emmaPHENO", dest="emmaPheno", default=None,
+                  help="For backwards compatibility with emma, we allow for \"EMMA\" file formats.  This is just a text file with each phenotype as one row.")
+basicGroup.add_option("--emmaCOV", dest="emmaCov", default=None,
+                  help="For backwards compatibility with emma, we allow for \"EMMA\" file formats.  This is just a text file with each covariate as one row.")
+
 basicGroup.add_option("--kfile", dest="kfile",
                   help="The location of a kinship file.  This is an nxn plain text file and can be computed with the pylmmKinship program.")
 basicGroup.add_option("--covfile", dest="covfile",
                   help="The location of a covariate file file.  This is a plink formatted covariate file.")
 basicGroup.add_option("-p", type="int", dest="pheno", help="The phenotype index to be used in association.", default=0)
 
+
+
 advancedGroup.add_option("--removeMissingGenotypes",
                   action="store_false", dest="normalizeGenotype", default=True,
                   help="By default the program replaces missing genotypes with the minor allele frequency.  This option overrides that behavior making the program remove missing individuals.  NOTE: This can increase running time due to the need to recompute the eigendecomposition for each SNP with missing values.")
@@ -69,16 +89,23 @@ advancedGroup.add_option("--REML",
                   help="Use restricted maximum-likelihood (REML) (default is maximum-likelihood).")
 #advancedGroup.add_option("-e", "--efile", dest="saveEig", help="Save eigendecomposition to this file.")
 advancedGroup.add_option("--eigen", dest="eigenfile",
-                  help="The location of the precomputed eigendecomposition for the kinship file")
+                  help="The location of the precomputed eigendecomposition for the kinship file.  These can be computed with pylmmKinship.py.")
 advancedGroup.add_option("--noMean", dest="noMean", default=False,action="store_true",
                   help="This option only applies when --cofile is used.  When covfile is provided, the program will automatically add a global mean covariate to the model unless this option is specified.")
 
+basicGroup.add_option("-t", "--nthreads", dest="numThreads", help="maximum number of threads to use")
+
 advancedGroup.add_option("-v", "--verbose",
                   action="store_true", dest="verbose", default=False,
                   help="Print extra info")
 
+# Experimental Group Options
+experimentalGroup.add_option("--kfile2", dest="kfile2", 
+                  help="The location of a second kinship file.  This file has the same format as the first kinship.  This might be used if you want to correct for another form of confounding.")
+
 parser.add_option_group(basicGroup)
 parser.add_option_group(advancedGroup)
+parser.add_option_group(experimentalGroup)
 
 (options, args) = parser.parse_args()
 
@@ -89,68 +116,108 @@ from scipy import linalg
 from pylmm.lmm import LMM
 from pylmm import input
 
-if len(args) != 1:
-    parser.error("Incorrect number of arguments")
+import multiprocessing as mp # Multiprocessing is part of the Python stdlib
+import Queue 
+
+if len(args) != 1:  
+   parser.print_help()
+   sys.exit()
+
 outFile = args[0]
 
-if not options.pfile and not options.tfile and not options.bfile:
-    parser.error("You must provide at least one PLINK input file base")
+if not options.tfile and not options.bfile and not options.emmaFile: 
+#if not options.pfile and not options.tfile and not options.bfile: 
+   parser.error("You must provide at least one PLINK input file base (--tfile or --bfile) or an EMMA formatted file (--emmaSNP).")
 if not options.kfile:
-    parser.error("Please provide a pre-computed kinship file")
+   parser.error("Please provide a pre-computed kinship file")
+
+numThreads = None
+if options.numThreads:
+   numThreads = int(options.numThreads)
 
 # READING PLINK input
-if options.verbose:
-    sys.stderr.write("Reading PLINK input...\n")
-if options.bfile:
-    IN = input.plink(options.bfile,type='b', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype)
-elif options.tfile:
-    IN = input.plink(options.tfile,type='t', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype)
-elif options.pfile:
-    IN = input.plink(options.pfile,type='p', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype)
-else:
-    parser.error("You must provide at least one PLINK input file base")
-
-if not os.path.isfile(options.phenoFile or IN.fbase + '.phenos'):
-    parser.error("No .pheno file exist for %s" % (options.phenoFile or IN.fbase + '.phenos'))
+if options.verbose: sys.stderr.write("Reading SNP input...\n")
+if options.bfile: IN = input.plink(options.bfile,type='b', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype)
+elif options.tfile: IN = input.plink(options.tfile,type='t', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype)
+#elif options.pfile: IN = input.plink(options.pfile,type='p', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype)
+elif options.emmaFile: IN = input.plink(options.emmaFile,type='emma', phenoFile=options.phenoFile,normGenotype=options.normalizeGenotype)
+else: parser.error("You must provide at least one PLINK input file base")
+
+if not os.path.isfile(options.phenoFile or IN.fbase + '.phenos') and not os.path.isfile(options.emmaPheno):
+   parser.error("No .pheno file exist for %s.  Please provide a phenotype file using the --phenofile or --emmaPHENO argument." % (options.phenoFile or IN.fbase + '.phenos'))
+
+# Read the emma phenotype file if provided.
+# Format should be rows are phenotypes and columns are individuals.
+if options.emmaPheno:
+   f = open(options.emmaPheno,'r')
+   P = []
+   for line in f:
+      v = line.strip().split()
+      p = []
+      for x in v:
+	 try:
+	    p.append(float(x))
+	 except: p.append(np.nan)
+      P.append(p)
+   f.close()
+   IN.phenos = np.array(P).T
 
 # READING Covariate File
-if options.covfile:
-    if options.verbose:
-        sys.stderr.write("Reading covariate file...\n")
-    # Read the covariate file -- write this into input.plink
-    P = IN.getCovariates(options.covfile)
-
-    if options.noMean:
-        X0 = P
-    else:
-        X0 = np.hstack([np.ones((IN.phenos.shape[0],1)),P])
-
-    if np.isnan(X0).sum():
-        parser.error("The covariate file %s contains missing values. At this time we are not dealing with this case.  Either remove those individuals with missing values or replace them in some way.")
-else:
-    X0 = np.ones((IN.phenos.shape[0],1))
+if options.covfile: 
+   if options.verbose: 
+      sys.stderr.write("Reading covariate file...\n")
+   P = IN.getCovariates(options.covfile) 
+   if options.noMean: 
+      X0 = P
+   else: 
+      X0 = np.hstack([np.ones((IN.phenos.shape[0],1)),P])
+elif options.emmaCov:
+   if options.verbose: 
+      sys.stderr.write("Reading covariate file...\n")
+   P = IN.getCovariatesEMMA(options.emmaCov) 
+   if options.noMean: 
+      X0 = P
+   else: 
+      X0 = np.hstack([np.ones((IN.phenos.shape[0],1)),P])
+else: 
+   X0 = np.ones((IN.phenos.shape[0],1))
+
+if np.isnan(X0).sum(): 
+      parser.error("The covariate file %s contains missing values. At this time we are not dealing with this case.  Either remove those individuals with missing values or replace them in some way.")
 
 # READING Kinship - major bottleneck for large datasets
-if options.verbose:
-    sys.stderr.write("Reading kinship...\n")
+if options.verbose: sys.stderr.write("Reading kinship...\n")
 begin = time.time()
 # This method seems to be the fastest and works if you already know the size of the matrix
 if options.kfile[-3:] == '.gz':
-    import gzip
-    f = gzip.open(options.kfile,'r')
-    F = f.read() # might exhaust mem if the file is huge
-    K = np.fromstring(F,sep=' ') # Assume that space separated
-    f.close()
-else:
-    K = np.fromfile(open(options.kfile,'r'),sep=" ")
+   import gzip
+   f = gzip.open(options.kfile,'r')
+   F = f.read() # might exhaust mem if the file is huge
+   K = np.fromstring(F,sep=' ') # Assume that space separated
+   f.close()
+else: 
+   K = np.fromfile(open(options.kfile,'r'),sep=" ")
 K.resize((len(IN.indivs),len(IN.indivs)))
 end = time.time()
 # Other slower ways
 #K = np.loadtxt(options.kfile)
 #K = np.genfromtxt(options.kfile)
-if options.verbose:
-    sys.stderr.write("Read the %d x %d kinship matrix in %0.3fs \n" % (K.shape[0],K.shape[1],end-begin))
-
+if options.verbose: sys.stderr.write("Read the %d x %d kinship matrix in %0.3fs \n" % (K.shape[0],K.shape[1],end-begin))
+
+if options.kfile2:
+   if options.verbose: sys.stderr.write("Reading second kinship...\n")
+   begin = time.time()
+   # This method seems to be the fastest and works if you already know the size of the matrix
+   if options.kfile2[-3:] == '.gz':
+      import gzip
+      f = gzip.open(options.kfile2,'r')
+      F = f.read() # might exhaust mem if the file is huge
+      K2 = np.fromstring(F,sep=' ') # Assume that space separated
+      f.close()
+   else: K2 = np.fromfile(open(options.kfile2,'r'),sep=" ")
+   K2.resize((len(IN.indivs),len(IN.indivs)))
+   end = time.time()
+   if options.verbose: sys.stderr.write("Read the %d x %d second kinship matrix in %0.3fs \n" % (K2.shape[0],K2.shape[1],end-begin))
 
 # PROCESS the phenotype data -- Remove missing phenotype values
 # Keep will now index into the "full" data to select what we keep (either everything or a subset of non missing data
@@ -158,81 +225,122 @@ Y = IN.phenos[:,options.pheno]
 v = np.isnan(Y)
 keep = True - v
 if v.sum():
-    if options.verbose:
-        sys.stderr.write("Cleaning the phenotype vector by removing %d individuals...\n" % (v.sum()))
-    Y = Y[keep]
-    X0 = X0[keep,:]
-    K = K[keep,:][:,keep]
-    Kva = []
-    Kve = []
+   if options.verbose: sys.stderr.write("Cleaning the phenotype vector by removing %d individuals...\n" % (v.sum()))
+   Y = Y[keep]
+   X0 = X0[keep,:]
+   K = K[keep,:][:,keep]
+   if options.kfile2: K2 = K2[keep,:][:,keep]
+   Kva = []
+   Kve = []
 
 # Only load the decomposition if we did not remove individuals.
 # Otherwise it would not be correct and we would have to compute it again.
 if not v.sum() and options.eigenfile:
-    if options.verbose: sys.stderr.write("Loading pre-computed eigendecomposition...\n")
-    Kva = np.load(options.eigenfile + ".Kva")
-    Kve = np.load(options.eigenfile + ".Kve")
-else:
-    Kva = []
-    Kve = []
+   if options.verbose: sys.stderr.write("Loading pre-computed eigendecomposition...\n")
+   Kva = np.load(options.eigenfile + ".Kva")
+   Kve = np.load(options.eigenfile + ".Kve")
+else: 
+   Kva = []
+   Kve = []
 
 # CREATE LMM object for association
 n = K.shape[0]
-L = LMM(Y,K,Kva,Kve,X0,verbose=options.verbose)
-# Fit the null model -- if refit is true we will refit for each SNP, so no reason to run here
-if not options.refit:
-    if options.verbose: sys.stderr.write("Computing fit for null model\n")
-    L.fit()
-    if options.verbose: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (L.optH,L.optSigma))
+if not options.kfile2:  L = LMM(Y,K,Kva,Kve,X0,verbose=options.verbose)
+else:  L = LMM_withK2(Y,K,Kva,Kve,X0,verbose=options.verbose,K2=K2)
 
+# Fit the null model -- if refit is true we will refit for each SNP, so no reason to run here
+if not options.refit: 
+   if options.verbose: sys.stderr.write("Computing fit for null model\n")
+   L.fit()
+   if options.verbose and not options.kfile2: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (L.optH,L.optSigma))
+   if options.verbose and options.kfile2: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f, w=%0.3f\n" % (L.optH,L.optSigma,L.optW))
+
+def compute_snp(collect):
+   snp = collect[0]
+   id = collect[1]
+   # result = []
+   # Check SNPs for missing values
+   x = snp[keep].reshape((n,1))  # all the SNPs
+   v = np.isnan(x).reshape((-1,))
+   if v.sum():
+      keeps = True - v
+      xs = x[keeps,:]
+      if keeps.sum() <= 1 or xs.var() <= 1e-6: 
+         # PS.append(np.nan)
+         # TS.append(np.nan)
+         # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan))
+         # continue
+         return formatResult(id,np.nan,np.nan,np.nan,np.nan)
+
+      # Its ok to center the genotype -  I used options.normalizeGenotype to 
+      # force the removal of missing genotypes as opposed to replacing them with MAF.
+      if not options.normalizeGenotype:
+         xs = (xs - xs.mean()) / np.sqrt(xs.var())
+      Ys = Y[keeps]
+      X0s = X0[keeps,:]
+      Ks = K[keeps,:][:,keeps]
+      if options.kfile2:
+         K2s = K2[keeps,:][:,keeps]
+         Ls = LMM_withK2(Ys,Ks,X0=X0s,verbose=options.verbose,K2=K2s)
+      else:
+         Ls = LMM(Ys,Ks,X0=X0s,verbose=options.verbose)
+      if options.refit:
+        Ls.fit(X=xs,REML=options.REML)
+      else:
+         #try:
+         Ls.fit(REML=options.REML)
+         #except: pdb.set_trace()
+      ts,ps,beta,betaVar = Ls.association(xs,REML=options.REML,returnBeta=True)
+   else:
+      if x.var() == 0: 
+         # PS.append(np.nan)
+         # TS.append(np.nan)
+         # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan)) # writes nan values
+         return formatResult(id,np.nan,np.nan,np.nan,np.nan)
+         # continue
+
+      if options.refit:
+         L.fit(X=x,REML=options.REML)
+      # This is where it happens
+      ts,ps,beta,betaVar = L.association(x,REML=options.REML,returnBeta=True)
+   
+   return formatResult(id,beta,np.sqrt(betaVar).sum(),ts,ps)
+   # PS.append(ps)
+   # TS.append(ts)
+   # return len(result)
+   # compute.q.put(result)
+   # return None
+
+def f_init(q):
+   compute_snp.q = q
+
+# Set up the pool
+# mp.set_start_method('spawn')
+q = mp.Queue()
+p = mp.Pool(numThreads, f_init, [q])
+collect = []
+   
 # Buffers for pvalues and t-stats
-PS = []
-TS = []
+# PS = []
+# TS = []
 count = 0
 out = open(outFile,'w')
 printOutHead()
 
-for snp,id in IN:
-    count += 1
-    if options.verbose and count % 1000 == 0:
-        sys.stderr.write("At SNP %d\n" % count)
-
-    x = snp[keep].reshape((n,1))
-    #x[[1,50,100,200,3000],:] = np.nan
-    v = np.isnan(x).reshape((-1,))
-    # Check SNPs for missing values
-    if v.sum():
-        keeps = True - v
-        xs = x[keeps,:]
-        if keeps.sum() <= 1 or xs.var() <= 1e-6:
-            PS.append(np.nan)
-            TS.append(np.nan)
-            outputResult(id,np.nan,np.nan,np.nan,np.nan)
-            continue
-
-        # Its ok to center the genotype -  I used options.normalizeGenotype to
-        # force the removal of missing genotypes as opposed to replacing them with MAF.
-        if not options.normalizeGenotype: xs = (xs - xs.mean()) / np.sqrt(xs.var())
-        Ys = Y[keeps]
-        X0s = X0[keeps,:]
-        Ks = K[keeps,:][:,keeps]
-        Ls = LMM(Ys,Ks,X0=X0s,verbose=options.verbose)
-        if options.refit: Ls.fit(X=xs,REML=options.REML)
-        else:
-            #try:
-            Ls.fit(REML=options.REML)
-            #except: pdb.set_trace()
-        ts,ps,beta,betaVar = Ls.association(xs,REML=options.REML,returnBeta=True)
-    else:
-        if x.var() == 0:
-            PS.append(np.nan)
-            TS.append(np.nan)
-            outputResult(id,np.nan,np.nan,np.nan,np.nan)
-            continue
-
-        if options.refit: L.fit(X=x,REML=options.REML)
-        ts,ps,beta,betaVar = L.association(x,REML=options.REML,returnBeta=True)
-
-    outputResult(id,beta,np.sqrt(betaVar).sum(),ts,ps)
-    PS.append(ps)
-    TS.append(ts)
+for snp_id in IN:
+   count += 1
+   if count % 1000 == 0:
+      if options.verbose:
+         sys.stderr.write("At SNP %d\n" % count)
+      # if count>8000 :
+      #    break         # for testing only
+   if count % 100 == 0:
+      for line in p.imap(compute_snp,collect):
+         out.write(line)
+      collect = []
+      
+   collect.append(snp_id)
+for line in p.imap(compute_snp,collect):
+   out.write(line)
+
+
diff --git a/wqflask/wqflask/my_pylmm/pylmmKinship.py b/wqflask/wqflask/my_pylmm/pylmmKinship.py
index cfba2936..c9a73ece 100755
--- a/wqflask/wqflask/my_pylmm/pylmmKinship.py
+++ b/wqflask/wqflask/my_pylmm/pylmmKinship.py
@@ -23,10 +23,12 @@
 #NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 #SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import sys
+import pdb
+
 from optparse import OptionParser,OptionGroup
-usage = """usage: %prog [options] --[t | b | p]file plinkFileBase outfile
+usage = """usage: %prog [options] --[tfile | bfile] plinkFileBase outfile
 
-NOTE: The current running version only supports binary PED files (PLINK).  It is simple to convert between ped or tped and bed using PLINK.  Sorry for the inconvinience.
 """
 
 parser = OptionParser(usage=usage)
@@ -34,15 +36,20 @@ parser = OptionParser(usage=usage)
 basicGroup = OptionGroup(parser, "Basic Options")
 #advancedGroup = OptionGroup(parser, "Advanced Options")
 
-basicGroup.add_option("--pfile", dest="pfile",
-                  help="The base for a PLINK ped file")
+#basicGroup.add_option("--pfile", dest="pfile",
+#                  help="The base for a PLINK ped file")
 basicGroup.add_option("--tfile", dest="tfile",
                   help="The base for a PLINK tped file")
 basicGroup.add_option("--bfile", dest="bfile",
                   help="The base for a PLINK binary ped file")
+basicGroup.add_option("--emmaSNP", dest="emmaFile", default=None,
+                  help="For backwards compatibility with emma, we allow for \"EMMA\" file formats.  This is just a text file with individuals on the rows and snps on the columns.")
+basicGroup.add_option("--emmaNumSNPs", dest="numSNPs", type="int", default=0,
+		     help="When providing the emmaSNP file you need to specify how many snps are in the file")
 
 basicGroup.add_option("-e", "--efile", dest="saveEig", help="Save eigendecomposition to this file.")
 basicGroup.add_option("-n", default=1000,dest="computeSize", type="int", help="The maximum number of SNPs to read into memory at once (default 1000).  This is important when there is a large number of SNPs, because memory could be an issue.")
+basicGroup.add_option("-t", "--nthreads", dest="numThreads", help="maximum number of threads to use")
 
 basicGroup.add_option("-v", "--verbose",
                   action="store_true", dest="verbose", default=False,
@@ -52,68 +59,128 @@ parser.add_option_group(basicGroup)
 #parser.add_option_group(advancedGroup)
 
 (options, args) = parser.parse_args()
-if len(args) != 1: parser.error("Incorrect number of arguments")
+if len(args) != 1: 
+   parser.print_help()
+   sys.exit()
+
 outFile = args[0]
 
 import sys
 import os
 import numpy as np
 from scipy import linalg
-from pyLMM.lmm import calculate_kinship
-from pyLMM import input
-
-if not options.pfile and not options.tfile and not options.bfile: 
-   parser.error("You must provide at least one PLINK input file base")
-
+from pylmm.lmm import calculateKinship
+from pylmm import input
+import multiprocessing as mp # Multiprocessing is part of the Python stdlib
+import Queue 
+
+if not options.tfile and not options.bfile and not options.emmaFile: 
+   parser.error("You must provide at least one PLINK input file base (--tfile or --bfile) or an emma formatted file (--emmaSNP).")
+
+numThreads = None
+if options.numThreads:
+   numThreads = int(options.numThreads)
+   
 if options.verbose: sys.stderr.write("Reading PLINK input...\n")
 if options.bfile: IN = input.plink(options.bfile,type='b')
 elif options.tfile: IN = input.plink(options.tfile,type='t')
-elif options.pfile: IN = input.plink(options.pfile,type='p')
-else: parser.error("You must provide at least one PLINK input file base")
+#elif options.pfile: IN = input.plink(options.pfile,type='p')
+elif options.emmaFile: 
+   if not options.numSNPs: parser.error("You must provide the number of SNPs when specifying an emma formatted file.")
+   IN = input.plink(options.emmaFile,type='emma')
+else: parser.error("You must provide at least one PLINK input file base (--tfile or --bfile) or an emma formatted file (--emmaSNP).")
+
+def compute_W(job):
+   """
+   Read 1000 SNPs at a time into matrix and return the result
+   """
+   n = len(IN.indivs)
+   m = options.computeSize
+   W = np.ones((n,m)) * np.nan # W matrix has dimensions individuals x SNPs (initially all NaNs)
+   for j in range(0,options.computeSize):
+      row = job*m + j
+      if row >= IN.numSNPs:
+         W = W[:,range(0,j)]
+         break
+      snp,id = IN.next()
+      if snp.var() == 0:
+         continue
+      W[:,j] = snp  # set row to list of SNPs
+   return W
+
+def compute_dgemm(job,W):
+   """
+   Compute Kinship(W)*j
+
+   For every set of SNPs dgemm is used to multiply matrices T(W)*W
+   """
+   res = None
+   try:
+      res = linalg.fblas.dgemm(alpha=1.,a=W.T,b=W.T,trans_a=True,trans_b=False)
+   except AttributeError:
+      res = np.dot(W,W.T) 
+   compute_dgemm.q.put([job,res])
+   return job
+
+def f_init(q):
+    compute_dgemm.q = q
 
 n = len(IN.indivs)
-m = options.computeSize
-W = np.ones((n,m)) * np.nan
+# m = options.computeSize
+# jobsize=m
 
 IN.getSNPIterator()
-i = 0
-K = None
-while i < IN.numSNPs:
-    j = 0
-    while j < options.computeSize and i < IN.numSNPs:
-        snp,id = IN.next()
-        if snp.var() == 0:
-            i += 1
-            continue
-        W[:,j] = snp
-
-        i += 1
-        j += 1
-    if j < options.computeSize: W = W[:,range(0,j)] 
-
-    if options.verbose:
-        sys.stderr.write("Processing first %d SNPs\n" % i)
-    if K == None: 
-        try: 
-            K = linalg.fblas.dgemm(alpha=1.,a=W.T,b=W.T,trans_a=True,trans_b=False) # calculateKinship(W) * j
-        except AttributeError:
-            K = np.dot(W,W.T) 
-    else:
-        try: 
-            K_j = linalg.fblas.dgemm(alpha=1.,a=W.T,b=W.T,trans_a=True,trans_b=False) # calculateKinship(W) * j
-        except AttributeError:
-            K_j = np.dot(W,W.T)
-        K = K + K_j
-
+# Annoying hack to get around the fact that it is expensive to determine the number of SNPs in an emma file
+if options.emmaFile: IN.numSNPs = options.numSNPs
+# i = 0
+
+# mp.set_start_method('spawn')
+q = mp.Queue()
+p = mp.Pool(numThreads, f_init, [q])
+iterations = IN.numSNPs/options.computeSize+1
+# jobs = range(0,8) # range(0,iterations)
+
+results = []
+
+K = np.zeros((n,n))  # The Kinship matrix has dimension individuals x individuals
+
+completed = 0
+
+# for job in range(8):
+for job in range(iterations):
+   if options.verbose:
+      sys.stderr.write("Processing job %d first %d SNPs\n" % (job, ((job+1)*options.computeSize)))
+   W = compute_W(job)
+   results.append(p.apply_async(compute_dgemm, (job,W)))   
+   # Do we have a result?
+   try: 
+     j,x = q.get_nowait()
+     if options.verbose: sys.stderr.write("Job "+str(j)+" finished\n")
+     K_j = x
+     # print j,K_j[:,0]
+     K = K + K_j
+     completed += 1
+   except Queue.Empty:
+     pass
+
+for job in range(len(results)-completed):
+   j,x = q.get()
+   if options.verbose: sys.stderr.write("Job "+str(j)+" finished\n")
+   K_j = x
+   # print j,K_j[:,0]
+   K = K + K_j
+        
 K = K / float(IN.numSNPs)
 if options.verbose: sys.stderr.write("Saving Kinship file to %s\n" % outFile)
 np.savetxt(outFile,K)
 
 if options.saveEig:
-    if options.verbose:
-        sys.stderr.write("Obtaining Eigendecomposition\n")
-    Kva,Kve = linalg.eigh(K)
-    if options.verbose:
-        sys.stderr.write("Saving eigendecomposition to %s.[kva | kve]\n" % outFile)
-    np.savetxt(outFile+".kva",Kva)
-    np.savetxt(outFile+".kve",Kve)
+   if options.verbose: sys.stderr.write("Obtaining Eigendecomposition\n")
+   Kva,Kve = linalg.eigh(K)
+   if options.verbose: sys.stderr.write("Saving eigendecomposition to %s.[kva | kve]\n" % outFile)
+   np.savetxt(outFile+".kva",Kva)
+   np.savetxt(outFile+".kve",Kve)
+      
+
+
+
diff --git a/wqflask/wqflask/parser.py b/wqflask/wqflask/parser.py
index be92014d..5c89bc01 100755
--- a/wqflask/wqflask/parser.py
+++ b/wqflask/wqflask/parser.py
@@ -26,7 +26,7 @@ from pprint import pformat as pf
 def parse(pstring):
     """
     
-    returned item serach_term is always a list, even if only one element
+    returned item search_term is always a list, even if only one element
     """
     pstring = re.split(r"""(?:(\w+\s*=\s*[\('"\[][^)'"]*[\)\]'"])  |  # LRS=(1 2 3), cisLRS=[4 5 6], etc
                        (\w+\s*[=:\>\<][\w\*]+)  |  # wiki=bar, GO:foobar, etc
@@ -42,6 +42,8 @@ def parse(pstring):
     
     print("separators:", separators)
 
+    
+ 
     for item in pstring:
         splat = re.split(separators, item)
         print("splat is:", splat)
diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py
index 9d2d0b88..c6521106 100755
--- a/wqflask/wqflask/search_results.py
+++ b/wqflask/wqflask/search_results.py
@@ -212,9 +212,9 @@ class SearchResultPage(object):
     #
     #    #search_gene
     #    #search_geno
-    #    #searhch_pheno
+    #    #search_pheno
     #    #search_mrn
-    #    #searhc_publish
+    #    #search_publish
 
 
     def search(self):
diff --git a/wqflask/wqflask/static/new/javascript/dataset_select_menu.coffee b/wqflask/wqflask/static/new/javascript/dataset_select_menu.coffee
index af53bd09..83264da3 100755
--- a/wqflask/wqflask/static/new/javascript/dataset_select_menu.coffee
+++ b/wqflask/wqflask/static/new/javascript/dataset_select_menu.coffee
@@ -118,5 +118,12 @@ $ ->
                 console.log("Calling:", populate_function)
                 window[populate_function]()
 
-
-    $("#make_default").click(make_default)
\ No newline at end of file
+    check_search_term = ->
+        search_term = $('#tfor').val()
+        console.log("search_term:", search_term)
+        if (search_term == "")
+            alert("Please enter one or more search terms or search equations.")
+            return false
+
+    $("#make_default").click(make_default)
+    $("#btsearch").click(check_search_term)
\ No newline at end of file
diff --git a/wqflask/wqflask/static/new/javascript/dataset_select_menu.js b/wqflask/wqflask/static/new/javascript/dataset_select_menu.js
index 6f63b715..5bd54359 100644
--- a/wqflask/wqflask/static/new/javascript/dataset_select_menu.js
+++ b/wqflask/wqflask/static/new/javascript/dataset_select_menu.js
@@ -1,6 +1,6 @@
 // Generated by CoffeeScript 1.8.0
 $(function() {
-  var apply_default, dataset_info, group_info, make_default, open_window, populate_dataset, populate_group, populate_species, populate_type, process_json, redo_dropdown;
+  var apply_default, check_search_term, dataset_info, group_info, make_default, open_window, populate_dataset, populate_group, populate_species, populate_type, process_json, redo_dropdown;
   process_json = function(data) {
     window.jdata = data;
     populate_species();
@@ -133,5 +133,15 @@ $(function() {
     }
     return _results;
   };
-  return $("#make_default").click(make_default);
+  check_search_term = function() {
+    var search_term;
+    search_term = $('#tfor').val();
+    console.log("search_term:", search_term);
+    if (search_term === "") {
+      alert("Please enter one or more search terms or search equations.");
+      return false;
+    }
+  };
+  $("#make_default").click(make_default);
+  return $("#btsearch").click(check_search_term);
 });
diff --git a/wqflask/wqflask/templates/index_page.html b/wqflask/wqflask/templates/index_page.html
index eb4f883c..538a6799 100755
--- a/wqflask/wqflask/templates/index_page.html
+++ b/wqflask/wqflask/templates/index_page.html
@@ -18,7 +18,7 @@
         
         <div class="row" style="width: 1400px !important;">
 
-            <div class="col-xs-5 col-xs-5">
+            <div class="col-xs-5">
                 <section id="search">
                     <div class="page-header">
                         <h1>Select and search</h1>
@@ -29,31 +29,42 @@
 
                                 <div class="form-group">
                                     <label for="species" class="col-xs-1 control-label" style="width: 65px !important;">Species:</label>
-                                    <div class="col-xs-4 controls">
-                                        <select name="species" id="species" class="form-control selectpicker span3" style="width: 300px !important;"></select>
+                                    <div class="col-xs-10 controls">
+                                        <div class="col-xs-8">
+                                          <select name="species" id="species" class="form-control selectpicker span3" style="width: 280px !important;"></select>
+                                        </div>
+                                        <div class="col-xs-4">
+                                          <input id="make_default" class="btn btn-default form-control" value="Make Default">
+                                        </div>
                                     </div>
                                 </div>
 
                                 <div class="form-group">
                                     <label for="group" class="col-xs-1 control-label" style="width: 65px !important;">Group:</label>
                                     <div class="col-xs-4 controls input-append">
-                                        <select name="group" id="group" class="form-control selectpicker span3" style="width: 300px !important;"></select>
-                                        <i class="icon-question-sign"></i>
+                                        <div class="col-xs-8">
+                                          <select name="group" id="group" class="form-control selectpicker span3" style="width: 280px !important;"></select>
+                                          <i class="icon-question-sign"></i>
+                                        </div>
                                     </div>
                                 </div>
 
                                 <div class="form-group">
                                     <label for="tissue" class="col-xs-1 control-label" style="width: 65px !important;">Type:</label>
                                     <div class="col-xs-4 controls">
-                                        <select name="type" id="type" class="form-control selectpicker span3" style="width: 300px !important;"></select>
+                                        <div class="col-xs-8">
+                                          <select name="type" id="type" class="form-control selectpicker span3" style="width: 280px !important;"></select>
+                                        </div>
                                     </div>
                                 </div>
 
                                 <div class="form-group">
                                     <label for="dataset" class="col-xs-1 control-label" style="width: 65px !important;">Dataset:</label>
                                     <div class="col-xs-4 controls input-append">
-                                        <select name="dataset" id="dataset" class="form-control selectpicker span5" style="width: 450px !important;"></select>
-                                        <i class="icon-question-sign"></i>
+                                        <div class="col-xs-8">
+                                          <select name="dataset" id="dataset" class="form-control selectpicker span5" style="width: 450px !important;"></select>
+                                          <i class="icon-question-sign"></i>
+                                        </div>
                                     </div>
                                 </div>
 
@@ -65,9 +76,11 @@
                                 <!--  GET ANY  SEARCH -->
 
                                 <div class="form-group">
-                                    <label for="tfor" class="col-xs-1 control-label" style="width: 65px !important;">Search for:</label>
+                                    <label for="tfor" class="col-xs-1 control-label" style="padding-left: 0px; padding-right: 0px; width: 65px !important;">Search for:</label>
                                     <div class="col-xs-10 controls">
-                                        <textarea name="search_terms" rows="2" class="form-control search-query" style="width: 450px !important;" id="tfor"></textarea>
+                                        <div class="col-xs-8">
+                                          <textarea name="search_terms" rows="2" class="form-control search-query" style="max-width: 550px; width: 450px !important;" id="tfor"></textarea>
+                                        </div>
                                     </div>
                                 </div>
 
@@ -75,12 +88,14 @@
                                 <div class="form-group">
                                     <label for="tfor" class="col-xs-1 control-label" style="width: 65px !important;"></label>
                                     <div class="col-xs-10 controls">
-                                        <p>Enter terms, genes, ID numbers in the
-                                           <b>Search</b> field<br>
-                                           Use <b>*</b> or <b>?</b> wildcards (Cyp*a?,
-                                           synap*)<br>
-                                           Use <b>quotes</b> for terms such as <i>"tyrosine
-                                           kinase"</i></p>
+                                        <div class="col-xs-10">
+                                          <p>Enter terms, genes, ID numbers in the
+                                             <b>Search</b> field<br>
+                                             Use <b>*</b> or <b>?</b> wildcards (Cyp*a?,
+                                             synap*)<br>
+                                             Use <b>quotes</b> for terms such as <i>"tyrosine
+                                             kinase"</i></p>
+                                        </div>
                                     </div>
                                 </div>
 
@@ -92,9 +107,6 @@
                                     <div class="col-xs-3 controls" style="width: 100px !important;">
                                         <input id="btsearch" type="submit" class="btn btn-primary form-control" value="Search">
                                     </div>
-                                    <div class="col-xs-4 controls" style="width: 150px !important;">
-                                        <input id="make_default" class="btn btn-default form-control" value="Make Default">
-                                    </div>
                                 </div>
 
                                 <input type="hidden" name="FormID" value="searchResult" class="form-control">
diff --git a/wqflask/wqflask/templates/new_security/login_user.html b/wqflask/wqflask/templates/new_security/login_user.html
index 8ad0c79e..81e8b53a 100755
--- a/wqflask/wqflask/templates/new_security/login_user.html
+++ b/wqflask/wqflask/templates/new_security/login_user.html
@@ -7,7 +7,7 @@
         {{ flash_me() }}
 
         <div class="page-header">
-            <h1>Login</h1>
+            <h1>&nbsp;&nbsp;&nbsp;&nbsp;Login</h1>
         </div>