Merge https://github.com/zsloan/genenetwork

author: Zachary Sloan 2013-08-16 16:26:06 -0500
committer: Zachary Sloan 2013-08-16 16:26:06 -0500
commit: 99a0faffb1a42a379b72a4572088c1ad588fca3e (patch)
tree: 7dde335c6d6fa35eb63922a21224d94a1feb5eae /wqflask
parent: ac4ba08c41de06f7aa91696b3643df3ff66aea46 (diff)
parent: 63a6ab2f565611bbe1464d718acff4398de12a19 (diff)
download: genenetwork2-99a0faffb1a42a379b72a4572088c1ad588fca3e.tar.gz
4 files changed, 37 insertions, 15 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 091433a6..0b9b1ce0 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -168,11 +168,13 @@ class Markers(object):
         
         for marker, p_value in itertools.izip(self.markers, p_values):
             marker['p_value'] = p_value
-            if math.isnan(marker['p_value']):
-                print("p_value is:", marker['p_value'])
-            marker['lod_score'] = -math.log10(marker['p_value'])
-            #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values
-            marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61
+            if marker['p_value'] == 0:
+                marker['lod_score'] = 0
+                marker['lrs_value'] = 0
+            else:
+                marker['lod_score'] = -math.log10(marker['p_value'])
+                #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values
+                marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61
         
         
 
diff --git a/wqflask/wqflask/my_pylmm/data/genofile_parser.py b/wqflask/wqflask/my_pylmm/data/genofile_parser.py
index b926592b..4a647959 100644
--- a/wqflask/wqflask/my_pylmm/data/genofile_parser.py
+++ b/wqflask/wqflask/my_pylmm/data/genofile_parser.py
@@ -1,14 +1,24 @@
 #!/usr/bin/python
 
+"""
+Convert .geno files to json
+
+This file goes through all of the genofiles in the genofile directory (.geno)
+and converts them to json files that are used when running the marker regression
+code
+
+"""
+
 from __future__ import print_function, division, absolute_import
 import sys
 sys.path.append("..")
 import os
 import glob
 import traceback
+import gzip
 
-import numpy as np
-from pyLMM import lmm
+#import numpy as np
+#from pyLMM import lmm
 
 import simplejson as json
 
@@ -54,7 +64,11 @@ class ConvertGenoFile(object):
         self.configurations = {}
         #self.skipped_cols = 3
         
-        self.input_fh = open(self.input_file)
+        if self.input_file.endswith(".geno.gz"):
+            print("self.input_file: ", self.input_file)
+            self.input_fh = gzip.open(self.input_file)
+        else:
+            self.input_fh = open(self.input_file)
         
         with open(self.output_file, "w") as self.output_fh:
             #if self.file_type == "geno":
@@ -111,6 +125,8 @@ class ConvertGenoFile(object):
 
     def process_rows(self):
         for self.latest_row_pos, row in enumerate(self.input_fh):
+            if self.input_file.endswith(".geno.gz"):
+                print("row: ", row)
             self.latest_row_value = row
             # Take care of headers
             if not row.strip():
@@ -135,7 +151,9 @@ class ConvertGenoFile(object):
     @classmethod
     def process_all(cls, old_directory, new_directory):
         os.chdir(old_directory)
-        for input_file in glob.glob("*.geno"):
+        for input_file in glob.glob("*"):
+            if not input_file.endswith(('geno', '.geno.gz')):
+                continue
             group_name = input_file.split('.')[0]
             output_file = os.path.join(new_directory, group_name + ".json")
             print("%s -> %s" % (
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 9a795c66..6ef1669b 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -299,8 +299,8 @@ def calculate_kinship(genotype_matrix, temp_data):
     """
     n = genotype_matrix.shape[0]
     m = genotype_matrix.shape[1]
-    #print("n is:", n)
-    #print("m is:", m)
+    print("n is:", n)
+    print("m is:", m)
     keep = []
     for counter in range(m):
         #print("type of genotype_matrix[:,counter]:", pf(genotype_matrix[:,counter]))
@@ -309,7 +309,7 @@ def calculate_kinship(genotype_matrix, temp_data):
         
         #Gets vector of values for column (no values in vector if not all values in col are numbers)
         marker_values = genotype_matrix[True - not_number, counter]
-        #print("type of marker_values is:", type(marker_values))
+        #print("marker_values is:", pf(marker_values))
         
         #Gets mean of values in vector
         values_mean = marker_values.mean()
@@ -325,7 +325,8 @@ def calculate_kinship(genotype_matrix, temp_data):
         temp_data.store("percent_complete", percent_complete)
         
     genotype_matrix = genotype_matrix[:,keep]
-    kinship_matrix = np.dot(genotype_matrix,genotype_matrix.T) * 1.0/float(m)
+    print("genotype_matrix: ", pf(genotype_matrix))
+    kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m)
     return kinship_matrix
 
 def GWAS(pheno_vector,
@@ -395,7 +396,7 @@ def GWAS(pheno_vector,
             keep = True - v
             xs = x[keep,:]
             if xs.var() == 0:
-                p_values.append(np.nan)
+                p_values.append(0)
                 t_statistics.append(np.nan)
                 continue
 
@@ -412,7 +413,7 @@ def GWAS(pheno_vector,
             ts, ps, beta, betaVar = lmm_ob_2.association(xs, REML=restricted_max_likelihood)
         else:
             if x.var() == 0:
-                p_values.append(np.nan)
+                p_values.append(0)
                 t_statistics.append(np.nan)
                 continue
 
diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
index b0c96c56..7a3ae1e5 100755
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -1204,6 +1204,7 @@ class ShowTrait(object):
                 other_sample_names.append(sample)
 
         if other_sample_names:
+            parent_f1_samples = None
             if self.dataset.group.parlist and self.dataset.group.f1list:
                 parent_f1_samples = self.dataset.group.parlist + self.dataset.group.f1list
author	Zachary Sloan	2013-08-16 16:26:06 -0500
committer	Zachary Sloan	2013-08-16 16:26:06 -0500
commit	99a0faffb1a42a379b72a4572088c1ad588fca3e (patch)
tree	7dde335c6d6fa35eb63922a21224d94a1feb5eae /wqflask
parent	ac4ba08c41de06f7aa91696b3643df3ff66aea46 (diff)
parent	63a6ab2f565611bbe1464d718acff4398de12a19 (diff)
download	genenetwork2-99a0faffb1a42a379b72a4572088c1ad588fca3e.tar.gz