3 files changed, 462 insertions, 347 deletions
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 92270eb2..13ec4280 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -17,6 +17,8 @@ import urllib
 
 import numpy as np
 
+import json
+
 from htmlgen import HTMLgen2 as HT
 from utility import Plot, Bunch
 from wqflask.interval_analyst import GeneUtil
@@ -453,21 +455,43 @@ class MarkerRegression(object):
     def gen_data(self):
         """Todo: Fill this in here"""
 
-        prep_data.PrepData(self.vals, self.dataset.group.name)
+        json_data = open(os.path.join(webqtlConfig.NEWGENODIR + self.dataset.group.name + '.json'))
+        markers = json.load(json_data)
+        genotype_data = [marker['genotypes'] for marker in markers]
+        
+        no_val_samples = self.identify_empty_samples()
+        trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
+        
+        #print("trimmed genotype data is:", pf(trimmed_genotype_data))
+        
+        #for marker_object in genotype_data:
+        #    print("marker_object:", pf(marker_object))
+        
+
+        #prep_data.PrepData(self.vals, genotype_data)
         
         pheno_vector = np.array([float(val) for val in self.vals if val!="x"])
-        genotypes = np.genfromtxt(os.path.join(webqtlConfig.TMPDIR,
-                                               self.dataset.group.name + '.snps.new'))
+        genotypes = np.array(trimmed_genotype_data).T
+        print("genotypes is", pf(genotypes))
+        #genotypes = np.genfromtxt(os.path.join(webqtlConfig.TMPDIR,
+        #                                       self.dataset.group.name + '.snps.new')).T
         
-        print("genotypes is:", pf(genotypes))
+        print("pheno_vector is:", pf(pheno_vector.shape))
+        print("genotypes is:", pf(genotypes.shape))
         
         kinship_matrix = lmm.calculateKinship(genotypes)
         print("kinship_matrix is:", pf(kinship_matrix))
-        print("pheno_vector is:", pf(pheno_vector))
         
         lmm_ob = lmm.LMM(pheno_vector, kinship_matrix)
         lmm_ob.fit()
-        
+
+        t_stats, p_values = lmm.GWAS(pheno_vector,
+                                     genotypes,
+                                     kinship_matrix,
+                                     REML=True,
+                                     refit=False)
+
+        print("p_values is:", pf(len(p_values)))
 
         #calculate QTL for each trait
         self.qtl_results = self.genotype.regression(strains = self.samples,
@@ -633,6 +657,31 @@ class MarkerRegression(object):
 
         #return rv,tblobj,bottomInfo
 
+    def identify_empty_samples(self):
+        no_val_samples = []
+        for sample_count, val in enumerate(self.vals):
+            if val == "x":
+                no_val_samples.append(sample_count)
+        return no_val_samples
+        #print("self.no_val_samples:", self.no_val_samples)
+        #nums = set(range(0, 176))
+        #print("not included:", nums-self.empty_columns)
+        
+    def trim_genotypes(self, genotype_data, no_value_samples):
+        trimmed_genotype_data = []
+        for marker in genotype_data:
+            new_genotypes = []
+            for item_count, genotype in enumerate(marker):
+                if item_count in no_value_samples:
+                    continue
+                try:
+                    genotype = float(genotype)
+                except ValueError:
+                    pass
+                new_genotypes.append(genotype)
+            trimmed_genotype_data.append(new_genotypes)
+        return trimmed_genotype_data
+
     def plotIntMappingForPLINK(self, fd, canvas, offset= (80, 120, 20, 80), zoom = 1, startMb = None, endMb = None, showLocusForm = "",plinkResultDict={}):
         #calculating margins
         xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset
diff --git a/wqflask/wqflask/my_pylmm/data/genofile_parser.py b/wqflask/wqflask/my_pylmm/data/genofile_parser.py
index 1dafecc8..ec8c521c 100644
--- a/wqflask/wqflask/my_pylmm/data/genofile_parser.py
+++ b/wqflask/wqflask/my_pylmm/data/genofile_parser.py
@@ -1,13 +1,31 @@
 #!/usr/bin/python
 
 from __future__ import print_function, division, absolute_import
-import csv
+import sys
+sys.path.append("..")
 import os
 import glob
 import traceback
 
+import numpy as np
+from pyLMM import lmm
+
+import simplejson as json
+
+from pprint import pformat as pf
+
 class EmptyConfigurations(Exception): pass
 
+        
+
+class Marker(object):
+    def __init__(self):
+        self.name = None
+        self.chr = None
+        self.cM = None
+        self.Mb = None
+        self.genotypes = []
+
 class ConvertGenoFile(object):
 
     def __init__(self, input_file, output_file):
@@ -15,6 +33,9 @@ class ConvertGenoFile(object):
         self.input_file = input_file
         self.output_file = output_file
         
+        self.mb_exists = False
+        self.markers = []
+        
         self.latest_row_pos = None
         self.latest_col_pos = None
         
@@ -23,7 +44,7 @@ class ConvertGenoFile(object):
         
     def convert(self):
 
-        self.prefer_config = {
+        self.haplotype_notation = {
             '@mat': "1",
             '@pat': "0",
             '@het': "0.5",
@@ -31,36 +52,56 @@ class ConvertGenoFile(object):
             }
         
         self.configurations = {}
-        self.skipped_cols = 3
+        #self.skipped_cols = 3
         
         self.input_fh = open(self.input_file)
         
-        
         with open(self.output_file, "w") as self.output_fh:
             self.process_csv()
-     
-            
-        
+
+
     #def process_row(self, row):
     #    counter = 0
     #    for char in row:
     #        if char 
     #        counter += 1
-     
+
     def process_csv(self):
         for row_count, row in enumerate(self.process_rows()):
             #self.latest_row_pos = row_count
 
-            for item_count, item in enumerate(row.split()[self.skipped_cols:]):
+            row_items = row.split()
+
+            this_marker = Marker()
+            this_marker.name = row_items[1]
+            this_marker.chr = row_items[0]
+            this_marker.cM = row_items[2]
+            if self.mb_exists:
+                this_marker.Mb = row_items[3]
+                genotypes = row_items[4:]
+            else:
+                genotypes = row_items[3:]
+            for item_count, genotype in enumerate(genotypes):
+                this_marker.genotypes.append(self.configurations[genotype.upper()])
+                
+            #print("this_marker is:", pf(this_marker.__dict__))   
+                
+            self.markers.append(this_marker.__dict__)
+
+        with open(self.output_file, 'w') as fh:
+            json.dump(self.markers, fh, indent="   ", sort_keys=True)
+                
                 # print('configurations:', str(configurations))
-                self.latest_col_pos = item_count + self.skipped_cols
-                self.latest_col_value = item
-                if item_count != 0:
-                    self.output_fh.write(" ")
-                self.output_fh.write(self.configurations[item.upper()])
+                #self.latest_col_pos = item_count + self.skipped_cols
+                #self.latest_col_value = item
+                
+                #if item_count != 0:
+                #    self.output_fh.write(" ")
+                #self.output_fh.write(self.configurations[item.upper()])
                     
-            self.output_fh.write("\n")
-            
+            #self.output_fh.write("\n")
+
+
     def process_rows(self):
         for self.latest_row_pos, row in enumerate(self.input_fh):
             self.latest_row_value = row
@@ -69,14 +110,14 @@ class ConvertGenoFile(object):
                 continue
             if row.startswith('Chr'):
                 if 'Mb' in row.split():
-                    self.skipped_cols = 4
+                    self.mb_exists = True
                 continue
             if row.startswith('@'):
                 key, _separater, value = row.partition(':')
                 key = key.strip()
                 value = value.strip()
-                if key in self.prefer_config:
-                    self.configurations[value] = self.prefer_config[key]
+                if key in self.haplotype_notation:
+                    self.configurations[value] = self.haplotype_notation[key]
                 continue
             if not len(self.configurations):
                 raise EmptyConfigurations
@@ -87,17 +128,17 @@ class ConvertGenoFile(object):
         os.chdir(old_directory)
         for input_file in glob.glob("*.geno"):
             group_name = input_file.split('.')[0]
-            output_file = os.path.join(new_directory, group_name + ".snps")
+            output_file = os.path.join(new_directory, group_name + ".json")
             print("%s -> %s" % (input_file, output_file))
             convertob = ConvertGenoFile(input_file, output_file)
             try:
-                convertob.convert() 
+                convertob.convert()
             except EmptyConfigurations as why:
                 print("  No config info? Continuing...")
                 #excepted = True
                 continue
             except Exception as why:
-            
+
                 print("  Exception:", why)
                 print(traceback.print_exc())
                 print("    Found in row %i at tabular column %i" % (convertob.latest_row_pos,
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 1ae663d4..015c2e14 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -1,109 +1,128 @@
-# pyLMM software Copyright 2012, Nicholas A. Furlotte
-# Version 0.1
-
-#License Details
-#---------------
-
-# The program is free for academic use. Please contact Nick Furlotte
-# <nick.furlotte@gmail.com> if you are interested in using the software for
-# commercial purposes.
-
-# The software must not be modified and distributed without prior
-# permission of the author.
-# Any instance of this software must retain the above copyright notice.
-
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# pylmm is a python-based linear mixed-model solver with applications to GWAS
+
+# Copyright (C) 2013  Nicholas A. Furlotte (nick.furlotte@gmail.com)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 from __future__ import absolute_import, print_function, division
 
 import sys
 import time
 import numpy as np
-import numpy.linalg as linalg
+from scipy import linalg
 from scipy import optimize
 from scipy import stats
-#import matplotlib.pyplot as pl
 import pdb
 
 from pprint import pformat as pf
 
+#np.seterr('raise')
+
+def matrixMult(A,B):
+    #return np.dot(A,B)
+
+    print("A is:", pf(A.shape))
+    print("B is:", pf(B.shape))
+
+    # If the matrices are in Fortran order then the computations will be faster
+    # when using dgemm.  Otherwise, the function will copy the matrix and that takes time.
+    if not A.flags['F_CONTIGUOUS']:
+       AA = A.T
+       transA = True
+    else:
+       AA = A
+       transA = False
+    
+    if not B.flags['F_CONTIGUOUS']:
+       BB = B.T
+       transB = True
+    else:
+       BB = B
+       transB = False
+
+    return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB)
+
 def calculateKinship(W):
-    """
-       W is an n x m matrix encoding SNP minor alleles.
-    """
-    n = W.shape[0]
-    m = W.shape[1]
-    keep = []
-    for i in range(m):
-        mn = W[True - np.isnan(W[:,i]),i].mean()
-        W[np.isnan(W[:,i]),i] = mn
-        vr = W[:,i].var()
-        if vr == 0: continue
-
-        keep.append(i)
-        W[:,i] = (W[:,i] - mn) / np.sqrt(vr)
-
-    W = W[:,keep]
-    K = np.dot(W,W.T) * 1.0/float(m)
-    return K
+      """
+         W is an n x m matrix encoding SNP minor alleles.
+
+         This function takes a matrix oF SNPs, imputes missing values with the maf,
+         normalizes the resulting vectors and returns the RRM matrix.
+      """
+      n = W.shape[0]
+      m = W.shape[1]
+      keep = []
+      for i in range(m):
+         mn = W[True - np.isnan(W[:,i]),i].mean()
+         W[np.isnan(W[:,i]),i] = mn
+         vr = W[:,i].var()
+         if vr == 0: continue
+
+         keep.append(i)
+         W[:,i] = (W[:,i] - mn) / np.sqrt(vr)
+
+      W = W[:,keep]
+      K = matrixMult(W,W.T) * 1.0/float(m)
+      return K
 
 def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False):
-    """
-      Performs a basic GWAS scan using the LMM.  This function
-      uses the LMM module to assess association at each SNP and
-      does some simple cleanup, such as removing missing individuals
-      per SNP and re-computing the eigen-decomp
-
-      Y - n x 1 phenotype vector
-      X - n x m SNP matrix
-      K - n x n kinship matrix
-      Kva,Kve = linalg.eigh(K) - or the eigen vectors and values for K
-      X0 - n x q covariate matrix
-      REML - use restricted maximum likelihood
-      refit - refit the variance component for each SNP
-    """
-    n = X.shape[0]
-    m = X.shape[1]
-
-    if X0 == None: X0 = np.ones((n,1))
-
-    # Remove missing values in Y and adjust associated parameters
-    v = np.isnan(Y)
-    if v.sum():
-        keep = True - v
-        Y = Y[keep]
-        X = X[keep,:]
-        X0 = X0[keep,:]
-        K = K[keep,:][:,keep]
-        Kva = []
-        Kve = []
-
-    L = LMM(Y,K,Kva,Kve,X0)
-    if not refit: L.fit()
-
-    PS = []
-    TS = []
-
-    for i in range(m):
-        x = X[:,i].reshape((n,1))
-        v = np.isnan(x).reshape((-1,))
-        if v.sum():
+      """
+        Performs a basic GWAS scan using the LMM.  This function
+        uses the LMM module to assess association at each SNP and 
+        does some simple cleanup, such as removing missing individuals 
+        per SNP and re-computing the eigen-decomp
+
+        Y - n x 1 phenotype vector 
+        X - n x m SNP matrix
+        K - n x n kinship matrix
+        Kva,Kve = linalg.eigh(K) - or the eigen vectors and values for K
+        X0 - n x q covariate matrix
+        REML - use restricted maximum likelihood 
+        refit - refit the variance component for each SNP
+      """
+      n = X.shape[0]
+      m = X.shape[1]
+
+      if X0 == None: X0 = np.ones((n,1))
+      
+      # Remove missing values in Y and adjust associated parameters
+      v = np.isnan(Y)
+      if v.sum():
+         keep = True - v
+         Y = Y[keep]
+         X = X[keep,:]
+         X0 = X0[keep,:]
+         K = K[keep,:][:,keep]
+         Kva = []
+         Kve = []
+
+      L = LMM(Y,K,Kva,Kve,X0)
+      if not refit: L.fit()
+
+      PS = []
+      TS = []
+
+      for i in range(m):
+         x = X[:,i].reshape((n,1))
+         v = np.isnan(x).reshape((-1,))
+         if v.sum():
             keep = True - v
             xs = x[keep,:]
-            if xs.var() == 0:
-                PS.append(np.nan)
-                TS.append(np.nan)
-                continue
+            if xs.var() == 0: 
+               PS.append(np.nan) 
+               TS.append(np.nan) 
+               continue
 
             Ys = Y[keep]
             X0s = X0[keep,:]
@@ -112,245 +131,251 @@ def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False):
             if refit: Ls.fit(X=xs)
             else: Ls.fit()
             ts,ps = Ls.association(xs,REML=REML)
-        else:
-            if x.var() == 0:
-                PS.append(np.nan)
-                TS.append(np.nan)
-                continue
+         else: 
+            if x.var() == 0: 
+               PS.append(np.nan) 
+               TS.append(np.nan) 
+               continue
 
             if refit: L.fit(X=x)
             ts,ps = L.association(x,REML=REML)
+            
+         PS.append(ps)
+         TS.append(ts)
 
-        PS.append(ps)
-        TS.append(ts)
-
-    return TS,PS
+      return TS,PS
 
 class LMM:
 
-    """
-          This is a simple version of EMMA/fastLMM.
-          The main purpose of this module is to take a phenotype vector (Y), a set of covariates (X) and a kinship matrix (K)
-          and to optimize this model by finding the maximum-likelihood estimates for the model parameters.
-          There are three model parameters: heritability (h), covariate coefficients (beta) and the total
-          phenotypic variance (sigma).
-          Heritability as defined here is the proportion of the total variance (sigma) that is attributed to
-          the kinship matrix.
-
-          For simplicity, we assume that everything being input is a numpy array.
-          If this is not the case, the module may throw an error as conversion from list to numpy array
-          is not done consistently.
-
-    """
-    def __init__(self, Y, K, Kva=None, Kve=None, X0=None):
-        """
-        The constructor takes a phenotype vector or array of size n.
-        It takes a kinship matrix of size n x n.  Kva and Kve can be computed as Kva,Kve = linalg.eigh(K) and cached.
-        If they are not provided, the constructor will calculate them.
-        X0 is an optional covariate matrix of size n x q, where there are q covariates.
-        When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect.
-       
-        """
-
-        if Kva is None:
-            Kva = []
-        if Kve is None:
-            Kve = []
-            
-
-        if X0 == None:
-            X0 = np.ones(len(Y)).reshape(len(Y),1)
-
-        print("Y is:", pf(Y))
-        
-        for key, value in locals().iteritems():
-            print("  %s - %s" % (key, type(value)))
-        
-        x = Y != -9
-        print("x is:", pf(x))
-        if not x.sum() == len(Y):
-            print("x.sum is:", pf(x.sum()))
-            print("len(Y) is:", pf(len(Y)))
-            sys.stderr.write("Removing %d missing values from Y\n" % ((True - x).sum()))
-            Y = Y[x]
-            K = K[x,:][:,x]
-            X0 = X0[x,:]
-            Kva = []
-            Kve = []
-        self.nonmissing = x
-
-        if len(Kva) == 0 or len(Kve) == 0:
-            sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
-            begin = time.time()
-            Kva,Kve = linalg.eigh(K)
-            end = time.time()
-            sys.stderr.write("Total time: %0.3f\n" % (end - begin))
-        self.K = K
-        self.Kva = Kva
-        self.Kve = Kve
-        self.Y = Y
-        self.X0 = X0
-        self.N = self.K.shape[0]
-
-        self.transform()
-
-    def transform(self):
+   """
+         This is a simple version of EMMA/fastLMM.  
+         The main purpose of this module is to take a phenotype vector (Y), a set of covariates (X) and a kinship matrix (K)
+         and to optimize this model by finding the maximum-likelihood estimates for the model parameters.
+         There are three model parameters: heritability (h), covariate coefficients (beta) and the total
+         phenotypic variance (sigma).
+         Heritability as defined here is the proportion of the total variance (sigma) that is attributed to 
+         the kinship matrix.
+
+         For simplicity, we assume that everything being input is a numpy array.
+         If this is not the case, the module may throw an error as conversion from list to numpy array
+         is not done consistently.
+
+   """
+   def __init__(self,Y,K,Kva=[],Kve=[],X0=None,verbose=False):
+
+      """
+      The constructor takes a phenotype vector or array of size n.
+      It takes a kinship matrix of size n x n.  Kva and Kve can be computed as Kva,Kve = linalg.eigh(K) and cached.
+      If they are not provided, the constructor will calculate them.
+      X0 is an optional covariate matrix of size n x q, where there are q covariates.
+      When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect.
+      """
+
+      if X0 == None: X0 = np.ones(len(Y)).reshape(len(Y),1)
+      self.verbose = verbose
+
+      #x = Y != -9
+      x = True - np.isnan(Y)
+      if not x.sum() == len(Y):
+         if self.verbose: sys.stderr.write("Removing %d missing values from Y\n" % ((True - x).sum()))
+         Y = Y[x]
+         K = K[x,:][:,x]
+         X0 = X0[x,:]
+         Kva = []
+         Kve = []
+      self.nonmissing = x
+
+      if len(Kva) == 0 or len(Kve) == 0:
+         if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
+         begin = time.time()
+         Kva,Kve = linalg.eigh(K)
+         end = time.time()
+         if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin))
+      
+      self.K = K
+      self.Kva = Kva
+      self.Kve = Kve
+      self.Y = Y
+      self.X0 = X0
+      self.N = self.K.shape[0]
+
+      if sum(self.Kva < 1e-6):
+         if self.verbose: sys.stderr.write("Cleaning %d eigen values\n" % (sum(self.Kva < 0)))
+         self.Kva[self.Kva < 1e-6] = 1e-6
+
+      self.transform()
+
+   def transform(self):
 
         """
            Computes a transformation on the phenotype vector and the covariate matrix.
-           The transformation is obtained by left multiplying each parameter by the transpose of the
+           The transformation is obtained by left multiplying each parameter by the transpose of the 
            eigenvector matrix of K (the kinship).
         """
-        
-        print(len(self.Kve.T))
-        print(len(self.Y))
-
-        self.Yt = np.dot(self.Kve.T, self.Y)
-        self.X0t = np.dot(self.Kve.T, self.X0)
-
-    def getMLSoln(self,h,X):
-
-        """
-           Obtains the maximum-likelihood estimates for the covariate coefficients (beta),
-           the total variance of the trait (sigma) and also passes intermediates that can
-           be utilized in other functions. The input parameter h is a value between 0 and 1 and represents
-           the heritability or the proportion of the total variance attributed to genetics.  The X is the
-           covariate matrix.
-        """
-
-        #print("h is", pf(h))
-        #print("X is", pf(X))
-        print("X.shape is", pf(X.shape))
-
-        S = 1.0/(h*self.Kva + (1.0 - h))
-        Xt = X.T*S
-        XX = np.dot(Xt,X)
-
-
-        XX_i = linalg.inv(XX)
-        beta =  np.dot(np.dot(XX_i,Xt),self.Yt)
-        Yt = self.Yt - np.dot(X,beta)
-        Q = np.dot(Yt.T*S,Yt)
-        sigma = Q * 1.0 / (float(len(self.Yt)) - float(X.shape[1]))
-        return beta,sigma,Q,XX_i,XX
-
-    def LL_brent(self,h,X=None,REML=False): return -self.LL(h,X,stack=False,REML=REML)[0]
-    def LL(self,h,X=None,stack=True,REML=False):
-
-        """
-           Computes the log-likelihood for a given heritability (h).  If X==None, then the
-           default X0t will be used.  If X is set and stack=True, then X0t will be matrix concatenated with
-           the input X.  If stack is false, then X is used in place of X0t in the LL calculation.
-           REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True.
-        """
-
-        if X == None: X = self.X0t
-        elif stack: X = np.hstack([self.X0t,np.dot(self.Kve.T, X)])
-
-        n = float(self.N)
-        q = float(X.shape[1])
-        beta,sigma,Q,XX_i,XX = self.getMLSoln(h,X)
-        LL = n*np.log(2*np.pi) + np.log(h*self.Kva + (1.0-h)).sum() + n + n*np.log(1.0/n * Q)
-        LL = -0.5 * LL
-
-        if REML:
-            LL_REML_part = q*np.log(2.0*np.pi*sigma) + np.log(linalg.det(np.dot(X.T,X))) - np.log(linalg.det(XX))
-            LL = LL + 0.5*LL_REML_part
-
-        return LL,beta,sigma,XX_i
-
-    def getMax(self,H, X=None,REML=False):
-
-        """
-           Helper functions for .fit(...).
-           This function takes a set of LLs computed over a grid and finds possible regions
-           containing a maximum.  Within these regions, a Brent search is performed to find the
-           optimum.
-
-        """
-        n = len(self.LLs)
-        HOpt = []
-        for i in range(1,n-2):
-            if self.LLs[i-1] < self.LLs[i] and self.LLs[i] > self.LLs[i+1]: HOpt.append(optimize.brent(self.LL_brent,args=(X,REML),brack=(H[i-1],H[i+1])))
-
-        if len(HOpt) > 1:
-            sys.stderr.write("ERR: Found multiple maximum.  Returning first...\n")
-            return HOpt[0]
-        elif len(HOpt) == 1: return HOpt[0]
-        elif self.LLs[0] > self.LLs[n-1]: return H[0]
-        else: return H[n-1]
-
-    def fit(self,X=None,ngrids=100,REML=True):
-
-        """
-           Finds the maximum-likelihood solution for the heritability (h) given the current parameters.
-           X can be passed and will transformed and concatenated to X0t.  Otherwise, X0t is used as
-           the covariate matrix.
-
-           This function calculates the LLs over a grid and then uses .getMax(...) to find the optimum.
-           Given this optimum, the function computes the LL and associated ML solutions.
-        """
-
-        if X == None: X = self.X0t
-        else: X = np.hstack([self.X0t,np.dot(self.Kve.T, X)])
-        H = np.array(range(ngrids)) / float(ngrids)
-        L = np.array([self.LL(h,X,stack=False,REML=REML)[0] for h in H])
-        self.LLs = L
-
-        hmax = self.getMax(H,X,REML)
-        L,beta,sigma,betaSTDERR = self.LL(hmax,X,stack=False,REML=REML)
-
-        self.H = H
-        self.optH = hmax
-        self.optLL = L
-        self.optBeta = beta
-        self.optSigma = sigma
-
-        return hmax,beta,sigma,L
-
-
-    def association(self,X, h = None, stack=True,REML=True):
-
-        """
-          Calculates association statitics for the SNPs encoded in the vector X of size n.
-          If h == None, the optimal h stored in optH is used.
-
-        """
-        if stack: X = np.hstack([self.X0t,np.dot(self.Kve.T, X)])
-        if h == None: h = self.optH
-
-        L,beta,sigma,betaSTDERR = self.LL(h,X,stack=False,REML=REML)
-        q  = len(beta)
-        ts,ps = self.tstat(beta[q-1],betaSTDERR[q-1,q-1],sigma,q)
-        return ts,ps
-
-    def tstat(self,beta,stderr,sigma,q):
-
-        """
-           Calculates a t-statistic and associated p-value given the estimate of beta and its standard error.
-           This is actually an F-test, but when only one hypothesis is being performed, it reduces to a t-test.
-        """
-
-        ts = beta / np.sqrt(stderr * sigma)
-        ps = 2.0*(1.0 - stats.t.cdf(np.abs(ts), self.N-q))
-        return ts,ps
-
-    def plotFit(self,color='b-',title=''):
-
-        """
-           Simple function to visualize the likelihood space.  It takes the LLs
-           calcualted over a grid and normalizes them by subtracting off the mean and exponentiating.
-           The resulting "probabilities" are normalized to one and plotted against heritability.
-           This can be seen as an approximation to the posterior distribuiton of heritability.
-
-           For diagnostic purposes this lets you see if there is one distinct maximum or multiple
-           and what the variance of the parameter looks like.
-        """
-        mx = self.LLs.max()
-        p = np.exp(self.LLs - mx)
-        p = p/p.sum()
-
-        pl.plot(self.H,p,color)
-        pl.xlabel("Heritability")
-        pl.ylabel("Probability of data")
-        pl.title(title)
+    
+        self.Yt = matrixMult(self.Kve.T, self.Y)
+        self.X0t = matrixMult(self.Kve.T, self.X0)
+        self.X0t_stack = np.hstack([self.X0t, np.ones((self.N,1))])
+        self.q = self.X0t.shape[1]
+
+   def getMLSoln(self,h,X):
+
+      """
+         Obtains the maximum-likelihood estimates for the covariate coefficients (beta),
+         the total variance of the trait (sigma) and also passes intermediates that can 
+         be utilized in other functions. The input parameter h is a value between 0 and 1 and represents
+         the heritability or the proportion of the total variance attributed to genetics.  The X is the 
+         covariate matrix.
+      """
+   
+      S = 1.0/(h*self.Kva + (1.0 - h))
+      Xt = X.T*S
+      XX = matrixMult(Xt,X)
+      XX_i = linalg.inv(XX)
+      beta =  matrixMult(matrixMult(XX_i,Xt),self.Yt)
+      Yt = self.Yt - matrixMult(X,beta)
+      Q = np.dot(Yt.T*S,Yt)
+      sigma = Q * 1.0 / (float(self.N) - float(X.shape[1]))
+      return beta,sigma,Q,XX_i,XX
+
+   def LL_brent(self,h,X=None,REML=False): 
+      #brent will not be bounded by the specified bracket.
+      # I return a large number if we encounter h < 0 to avoid errors in LL computation during the search.
+      if h < 0: return 1e6
+      return -self.LL(h,X,stack=False,REML=REML)[0]
+         
+   def LL(self,h,X=None,stack=True,REML=False):
+
+      """
+         Computes the log-likelihood for a given heritability (h).  If X==None, then the 
+         default X0t will be used.  If X is set and stack=True, then X0t will be matrix concatenated with
+         the input X.  If stack is false, then X is used in place of X0t in the LL calculation.
+         REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True.
+      """
+
+      if X == None: X = self.X0t
+      elif stack: 
+         self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0]
+         X = self.X0t_stack
+
+      n = float(self.N)
+      q = float(X.shape[1])
+      beta,sigma,Q,XX_i,XX = self.getMLSoln(h,X)
+      LL = n*np.log(2*np.pi) + np.log(h*self.Kva + (1.0-h)).sum() + n + n*np.log(1.0/n * Q)
+      LL = -0.5 * LL
+
+      if REML:
+         LL_REML_part = q*np.log(2.0*np.pi*sigma) + np.log(linalg.det(matrixMult(X.T,X))) - np.log(linalg.det(XX))
+         LL = LL + 0.5*LL_REML_part
+
+      return LL,beta,sigma,XX_i
+
+   def getMax(self,H, X=None,REML=False):
+
+      """
+         Helper functions for .fit(...).  
+         This function takes a set of LLs computed over a grid and finds possible regions 
+         containing a maximum.  Within these regions, a Brent search is performed to find the 
+         optimum.
+
+      """
+      n = len(self.LLs)
+      HOpt = []
+      for i in range(1,n-2):
+          if self.LLs[i-1] < self.LLs[i] and self.LLs[i] > self.LLs[i+1]: 
+            HOpt.append(optimize.brent(self.LL_brent,args=(X,REML),brack=(H[i-1],H[i+1])))
+            if np.isnan(HOpt[-1][0]): HOpt[-1][0] = [self.LLs[i-1]]
+
+      if len(HOpt) > 1: 
+         if self.verbose: sys.stderr.write("NOTE: Found multiple optima.  Returning first...\n")
+         return HOpt[0]
+      elif len(HOpt) == 1: return HOpt[0]
+      elif self.LLs[0] > self.LLs[n-1]: return H[0]
+      else: return H[n-1]
+
+   def fit(self,X=None,ngrids=100,REML=True):
+
+      """
+         Finds the maximum-likelihood solution for the heritability (h) given the current parameters.
+         X can be passed and will transformed and concatenated to X0t.  Otherwise, X0t is used as 
+         the covariate matrix.
+
+         This function calculates the LLs over a grid and then uses .getMax(...) to find the optimum.
+         Given this optimum, the function computes the LL and associated ML solutions.
+      """
+   
+      if X == None: X = self.X0t
+      else: 
+         #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)])
+         self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0]
+         X = self.X0t_stack
+
+      H = np.array(range(ngrids)) / float(ngrids)
+      L = np.array([self.LL(h,X,stack=False,REML=REML)[0] for h in H])
+      self.LLs = L
+
+      hmax = self.getMax(H,X,REML)
+      L,beta,sigma,betaSTDERR = self.LL(hmax,X,stack=False,REML=REML)
+      
+      self.H = H
+      self.optH = hmax
+      self.optLL = L
+      self.optBeta = beta
+      self.optSigma = sigma
+
+      return hmax,beta,sigma,L
+
+   def association(self,X, h = None, stack=True,REML=True, returnBeta=False):
+
+      """
+        Calculates association statitics for the SNPs encoded in the vector X of size n.
+        If h == None, the optimal h stored in optH is used.
+
+      """
+      if stack: 
+         #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)])
+         self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0]
+         X = self.X0t_stack
+         
+      if h == None: h = self.optH
+
+      L,beta,sigma,betaVAR = self.LL(h,X,stack=False,REML=REML)
+      q  = len(beta)
+      ts,ps = self.tstat(beta[q-1],betaVAR[q-1,q-1],sigma,q)
+      
+      if returnBeta: return ts,ps,beta[q-1].sum(),betaVAR[q-1,q-1].sum()*sigma
+      return ts,ps
+
+   def tstat(self,beta,var,sigma,q): 
+
+         """
+            Calculates a t-statistic and associated p-value given the estimate of beta and its standard error.
+            This is actually an F-test, but when only one hypothesis is being performed, it reduces to a t-test.
+         """
+
+         ts = beta / np.sqrt(var * sigma)        
+         ps = 2.0*(1.0 - stats.t.cdf(np.abs(ts), self.N-q))
+         if not len(ts) == 1 or not len(ps) == 1: raise Exception("Something bad happened :(")
+         return ts.sum(),ps.sum()
+
+   def plotFit(self,color='b-',title=''):
+
+      """
+         Simple function to visualize the likelihood space.  It takes the LLs 
+         calcualted over a grid and normalizes them by subtracting off the mean and exponentiating.
+         The resulting "probabilities" are normalized to one and plotted against heritability.
+         This can be seen as an approximation to the posterior distribuiton of heritability.
+
+         For diagnostic purposes this lets you see if there is one distinct maximum or multiple 
+         and what the variance of the parameter looks like.
+      """
+      import matplotlib.pyplot as pl
+
+      mx = self.LLs.max()
+      p = np.exp(self.LLs - mx)
+      p = p/p.sum()
+
+      pl.plot(self.H,p,color)
+      pl.xlabel("Heritability")
+      pl.ylabel("Probability of data")
+      pl.title(title)
\ No newline at end of file