Made some changes related to getting marker regression working for

some datasets
author: Zachary Sloan 2013-08-14 17:39:28 -0500
committer: Zachary Sloan 2013-08-14 17:39:28 -0500
commit: 11c426d50ac6718e16981be603527bd569f9d11a (patch)
tree: f7a54d937ba20e678226028ddc9e79138a6768c5 /wqflask
parent: 6379959af53b2ec595b85ccdc099c6f14adf0381 (diff)
download: genenetwork2-11c426d50ac6718e16981be603527bd569f9d11a.tar.gz
4 files changed, 29 insertions, 9 deletions
diff --git a/wqflask/maintenance/correlation_matrix_test.py b/wqflask/maintenance/correlation_matrix_test.py
index 2983a76b..5f8f10e1 100644
--- a/wqflask/maintenance/correlation_matrix_test.py
+++ b/wqflask/maintenance/correlation_matrix_test.py
@@ -48,7 +48,7 @@ Sleep a bunch because this can take a while
 
 Ensure that the correlation between Trait3 (HC_M2_0606_P::1457003_at) and Trait4 (HC_M2_0606_P::1422223_at) is 0.608
 >>> test.get_text('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/blockquote/table/tbody/tr[5]/td[5]/a/font''')
-text: 0.608
+text: 0.608\n71
 
 """
 
diff --git a/wqflask/wqflask/my_pylmm/data/genofile_parser.py b/wqflask/wqflask/my_pylmm/data/genofile_parser.py
index b926592b..4a647959 100644
--- a/wqflask/wqflask/my_pylmm/data/genofile_parser.py
+++ b/wqflask/wqflask/my_pylmm/data/genofile_parser.py
@@ -1,14 +1,24 @@
 #!/usr/bin/python
 
+"""
+Convert .geno files to json
+
+This file goes through all of the genofiles in the genofile directory (.geno)
+and converts them to json files that are used when running the marker regression
+code
+
+"""
+
 from __future__ import print_function, division, absolute_import
 import sys
 sys.path.append("..")
 import os
 import glob
 import traceback
+import gzip
 
-import numpy as np
-from pyLMM import lmm
+#import numpy as np
+#from pyLMM import lmm
 
 import simplejson as json
 
@@ -54,7 +64,11 @@ class ConvertGenoFile(object):
         self.configurations = {}
         #self.skipped_cols = 3
         
-        self.input_fh = open(self.input_file)
+        if self.input_file.endswith(".geno.gz"):
+            print("self.input_file: ", self.input_file)
+            self.input_fh = gzip.open(self.input_file)
+        else:
+            self.input_fh = open(self.input_file)
         
         with open(self.output_file, "w") as self.output_fh:
             #if self.file_type == "geno":
@@ -111,6 +125,8 @@ class ConvertGenoFile(object):
 
     def process_rows(self):
         for self.latest_row_pos, row in enumerate(self.input_fh):
+            if self.input_file.endswith(".geno.gz"):
+                print("row: ", row)
             self.latest_row_value = row
             # Take care of headers
             if not row.strip():
@@ -135,7 +151,9 @@ class ConvertGenoFile(object):
     @classmethod
     def process_all(cls, old_directory, new_directory):
         os.chdir(old_directory)
-        for input_file in glob.glob("*.geno"):
+        for input_file in glob.glob("*"):
+            if not input_file.endswith(('geno', '.geno.gz')):
+                continue
             group_name = input_file.split('.')[0]
             output_file = os.path.join(new_directory, group_name + ".json")
             print("%s -> %s" % (
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 9a795c66..3743e77c 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -299,8 +299,8 @@ def calculate_kinship(genotype_matrix, temp_data):
     """
     n = genotype_matrix.shape[0]
     m = genotype_matrix.shape[1]
-    #print("n is:", n)
-    #print("m is:", m)
+    print("n is:", n)
+    print("m is:", m)
     keep = []
     for counter in range(m):
         #print("type of genotype_matrix[:,counter]:", pf(genotype_matrix[:,counter]))
@@ -309,7 +309,7 @@ def calculate_kinship(genotype_matrix, temp_data):
         
         #Gets vector of values for column (no values in vector if not all values in col are numbers)
         marker_values = genotype_matrix[True - not_number, counter]
-        #print("type of marker_values is:", type(marker_values))
+        #print("marker_values is:", pf(marker_values))
         
         #Gets mean of values in vector
         values_mean = marker_values.mean()
@@ -325,7 +325,8 @@ def calculate_kinship(genotype_matrix, temp_data):
         temp_data.store("percent_complete", percent_complete)
         
     genotype_matrix = genotype_matrix[:,keep]
-    kinship_matrix = np.dot(genotype_matrix,genotype_matrix.T) * 1.0/float(m)
+    print("genotype_matrix: ", pf(genotype_matrix))
+    kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m)
     return kinship_matrix
 
 def GWAS(pheno_vector,
diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
index b0c96c56..7a3ae1e5 100755
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -1204,6 +1204,7 @@ class ShowTrait(object):
                 other_sample_names.append(sample)
 
         if other_sample_names:
+            parent_f1_samples = None
             if self.dataset.group.parlist and self.dataset.group.f1list:
                 parent_f1_samples = self.dataset.group.parlist + self.dataset.group.f1list
author	Zachary Sloan	2013-08-14 17:39:28 -0500
committer	Zachary Sloan	2013-08-14 17:39:28 -0500
commit	11c426d50ac6718e16981be603527bd569f9d11a (patch)
tree	f7a54d937ba20e678226028ddc9e79138a6768c5 /wqflask
parent	6379959af53b2ec595b85ccdc099c6f14adf0381 (diff)
download	genenetwork2-11c426d50ac6718e16981be603527bd569f9d11a.tar.gz