about summary refs log tree commit diff
diff options
context:
space:
mode:
-rwxr-xr-xwqflask/wqflask/marker_regression/marker_regression.py18
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/lmm.py51
2 files changed, 35 insertions, 34 deletions
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 23cec6d0..40ebc546 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -465,7 +465,7 @@ class MarkerRegression(object):
     def gen_data(self):
         """Todo: Fill this in here"""
 
-        print("something")
+        print("Session UUID: ", self.start_vars[session_uuid])
 
         genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers]
 
@@ -473,13 +473,10 @@ class MarkerRegression(object):
         trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
 
         pheno_vector = np.array([float(val) for val in self.vals if val!="x"])
-        genotypes = np.array(trimmed_genotype_data).T
+        genotype_matrix = np.array(trimmed_genotype_data).T
 
-        #times = collections.OrderedDict()
-        #times['start'] = time.time()
-        
         with Bench("Calculate Kinship"):
-            kinship_matrix = lmm.calculateKinship(genotypes)
+            kinship_matrix = lmm.calculate_kinship(genotype_matrix)
         
         with Bench("Create LMM object"):
             lmm_ob = lmm.LMM(pheno_vector, kinship_matrix)
@@ -490,19 +487,12 @@ class MarkerRegression(object):
         
         with Bench("Doing gwas"):
             t_stats, p_values = lmm.GWAS(pheno_vector,
-                                         genotypes,
+                                         genotype_matrix,
                                          kinship_matrix,
                                          REML=True,
                                          refit=False)
             
         Bench().report()
-        
-        #previous_time = None
-        #for operation, this_time in times.iteritems():
-        #    if previous_time:
-        #        print("{} run time: {}".format(operation, this_time-previous_time))
-        #        #print("time[{}]:{}\t{}".format(key, thistime, thistime-lasttime))
-        #    previous_time = this_time
 
         self.dataset.group.markers.add_pvalues(p_values)
 
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 33f6573e..374452f0 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -53,34 +53,45 @@ def matrixMult(A,B):
 
     return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB)
 
-def calculateKinship(W):
+
+def calculate_kinship(genotype_matrix):
     """
-       W is an n x m matrix encoding SNP minor alleles.
+    genotype_matrix is an n x m matrix encoding SNP minor alleles.
+    
+    This function takes a matrix oF SNPs, imputes missing values with the maf,
+    normalizes the resulting vectors and returns the RRM matrix.
     
-       This function takes a matrix oF SNPs, imputes missing values with the maf,
-       normalizes the resulting vectors and returns the RRM matrix.
     """
-    n = W.shape[0]
-    m = W.shape[1]
+    n = genotype_matrix.shape[0]
+    m = genotype_matrix.shape[1]
     print("n is:", n)
     print("m is:", m)
     keep = []
-    for i in range(m):
-        print("type of W[:,i]:", pf(W[:,i]))
-        foo = np.isnan(W[:,i])
-        print("type of foo:", type(foo))
-        mn = W[True - foo,i]
-        print("type of mn is:", type(mn))
-        mn = mn.mean()
-        W[np.isnan(W[:,i]),i] = mn
-        vr = W[:,i].var()
+    for counter in range(m):
+        print("type of genotype_matrix[:,counter]:", pf(genotype_matrix[:,counter]))
+        #Checks if any values in column are not numbers
+        not_number = np.isnan(genotype_matrix[:,counter])
+        print("type of not_number:", type(not_number))
+        
+        #Gets vector of values for column (no values in vector if not all values in col are numbers)
+        marker_values = genotype_matrix[True - not_number, counter]
+        print("type of marker_values is:", type(marker_values))
+        
+        #Gets mean of values in vector
+        values_mean = marker_values.mean()
+
+        genotype_matrix[not_number,counter] = values_mean
+        vr = genotype_matrix[:,counter].var()
         if vr == 0:
             continue
-        keep.append(i)
-        W[:,i] = (W[:,i] - mn) / np.sqrt(vr)
-    W = W[:,keep]
-    K = np.dot(W,W.T) * 1.0/float(m)
-    return K
+        keep.append(counter)
+        genotype_matrix[:,counter] = (genotype_matrix[:,counter] - values_mean) / np.sqrt(vr)
+        
+        stage = round((counter/m)*45)
+        print("Percent complete: ", stage)
+    genotype_matrix = genotype_matrix[:,keep]
+    kinship_matrix = np.dot(genotype_matrix,genotype_matrix.T) * 1.0/float(m)
+    return kinship_matrix
 
 def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False):
       """