aboutsummaryrefslogtreecommitdiff
path: root/wqflask
diff options
context:
space:
mode:
authorZachary Sloan2013-08-16 16:26:06 -0500
committerZachary Sloan2013-08-16 16:26:06 -0500
commit99a0faffb1a42a379b72a4572088c1ad588fca3e (patch)
tree7dde335c6d6fa35eb63922a21224d94a1feb5eae /wqflask
parentac4ba08c41de06f7aa91696b3643df3ff66aea46 (diff)
parent63a6ab2f565611bbe1464d718acff4398de12a19 (diff)
downloadgenenetwork2-99a0faffb1a42a379b72a4572088c1ad588fca3e.tar.gz
Merge https://github.com/zsloan/genenetwork
Diffstat (limited to 'wqflask')
-rwxr-xr-xwqflask/base/data_set.py12
-rw-r--r--wqflask/wqflask/my_pylmm/data/genofile_parser.py26
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/lmm.py13
-rwxr-xr-xwqflask/wqflask/show_trait/show_trait.py1
4 files changed, 37 insertions, 15 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 091433a6..0b9b1ce0 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -168,11 +168,13 @@ class Markers(object):
for marker, p_value in itertools.izip(self.markers, p_values):
marker['p_value'] = p_value
- if math.isnan(marker['p_value']):
- print("p_value is:", marker['p_value'])
- marker['lod_score'] = -math.log10(marker['p_value'])
- #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values
- marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61
+ if marker['p_value'] == 0:
+ marker['lod_score'] = 0
+ marker['lrs_value'] = 0
+ else:
+ marker['lod_score'] = -math.log10(marker['p_value'])
+ #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values
+ marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61
diff --git a/wqflask/wqflask/my_pylmm/data/genofile_parser.py b/wqflask/wqflask/my_pylmm/data/genofile_parser.py
index b926592b..4a647959 100644
--- a/wqflask/wqflask/my_pylmm/data/genofile_parser.py
+++ b/wqflask/wqflask/my_pylmm/data/genofile_parser.py
@@ -1,14 +1,24 @@
#!/usr/bin/python
+"""
+Convert .geno files to json
+
+This file goes through all of the genofiles in the genofile directory (.geno)
+and converts them to json files that are used when running the marker regression
+code
+
+"""
+
from __future__ import print_function, division, absolute_import
import sys
sys.path.append("..")
import os
import glob
import traceback
+import gzip
-import numpy as np
-from pyLMM import lmm
+#import numpy as np
+#from pyLMM import lmm
import simplejson as json
@@ -54,7 +64,11 @@ class ConvertGenoFile(object):
self.configurations = {}
#self.skipped_cols = 3
- self.input_fh = open(self.input_file)
+ if self.input_file.endswith(".geno.gz"):
+ print("self.input_file: ", self.input_file)
+ self.input_fh = gzip.open(self.input_file)
+ else:
+ self.input_fh = open(self.input_file)
with open(self.output_file, "w") as self.output_fh:
#if self.file_type == "geno":
@@ -111,6 +125,8 @@ class ConvertGenoFile(object):
def process_rows(self):
for self.latest_row_pos, row in enumerate(self.input_fh):
+ if self.input_file.endswith(".geno.gz"):
+ print("row: ", row)
self.latest_row_value = row
# Take care of headers
if not row.strip():
@@ -135,7 +151,9 @@ class ConvertGenoFile(object):
@classmethod
def process_all(cls, old_directory, new_directory):
os.chdir(old_directory)
- for input_file in glob.glob("*.geno"):
+ for input_file in glob.glob("*"):
+ if not input_file.endswith(('geno', '.geno.gz')):
+ continue
group_name = input_file.split('.')[0]
output_file = os.path.join(new_directory, group_name + ".json")
print("%s -> %s" % (
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 9a795c66..6ef1669b 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -299,8 +299,8 @@ def calculate_kinship(genotype_matrix, temp_data):
"""
n = genotype_matrix.shape[0]
m = genotype_matrix.shape[1]
- #print("n is:", n)
- #print("m is:", m)
+ print("n is:", n)
+ print("m is:", m)
keep = []
for counter in range(m):
#print("type of genotype_matrix[:,counter]:", pf(genotype_matrix[:,counter]))
@@ -309,7 +309,7 @@ def calculate_kinship(genotype_matrix, temp_data):
#Gets vector of values for column (no values in vector if not all values in col are numbers)
marker_values = genotype_matrix[True - not_number, counter]
- #print("type of marker_values is:", type(marker_values))
+ #print("marker_values is:", pf(marker_values))
#Gets mean of values in vector
values_mean = marker_values.mean()
@@ -325,7 +325,8 @@ def calculate_kinship(genotype_matrix, temp_data):
temp_data.store("percent_complete", percent_complete)
genotype_matrix = genotype_matrix[:,keep]
- kinship_matrix = np.dot(genotype_matrix,genotype_matrix.T) * 1.0/float(m)
+ print("genotype_matrix: ", pf(genotype_matrix))
+ kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m)
return kinship_matrix
def GWAS(pheno_vector,
@@ -395,7 +396,7 @@ def GWAS(pheno_vector,
keep = True - v
xs = x[keep,:]
if xs.var() == 0:
- p_values.append(np.nan)
+ p_values.append(0)
t_statistics.append(np.nan)
continue
@@ -412,7 +413,7 @@ def GWAS(pheno_vector,
ts, ps, beta, betaVar = lmm_ob_2.association(xs, REML=restricted_max_likelihood)
else:
if x.var() == 0:
- p_values.append(np.nan)
+ p_values.append(0)
t_statistics.append(np.nan)
continue
diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
index b0c96c56..7a3ae1e5 100755
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -1204,6 +1204,7 @@ class ShowTrait(object):
other_sample_names.append(sample)
if other_sample_names:
+ parent_f1_samples = None
if self.dataset.group.parlist and self.dataset.group.f1list:
parent_f1_samples = self.dataset.group.parlist + self.dataset.group.f1list