aboutsummaryrefslogtreecommitdiff
path: root/wqflask
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask')
-rw-r--r--wqflask/wqflask/my_pylmm/data/genofile_parser.py26
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/lmm.py9
-rwxr-xr-xwqflask/wqflask/show_trait/show_trait.py1
3 files changed, 28 insertions, 8 deletions
diff --git a/wqflask/wqflask/my_pylmm/data/genofile_parser.py b/wqflask/wqflask/my_pylmm/data/genofile_parser.py
index b926592b..4a647959 100644
--- a/wqflask/wqflask/my_pylmm/data/genofile_parser.py
+++ b/wqflask/wqflask/my_pylmm/data/genofile_parser.py
@@ -1,14 +1,24 @@
#!/usr/bin/python
+"""
+Convert .geno files to json
+
+This file goes through all of the genofiles in the genofile directory (.geno)
+and converts them to json files that are used when running the marker regression
+code
+
+"""
+
from __future__ import print_function, division, absolute_import
import sys
sys.path.append("..")
import os
import glob
import traceback
+import gzip
-import numpy as np
-from pyLMM import lmm
+#import numpy as np
+#from pyLMM import lmm
import simplejson as json
@@ -54,7 +64,11 @@ class ConvertGenoFile(object):
self.configurations = {}
#self.skipped_cols = 3
- self.input_fh = open(self.input_file)
+ if self.input_file.endswith(".geno.gz"):
+ print("self.input_file: ", self.input_file)
+ self.input_fh = gzip.open(self.input_file)
+ else:
+ self.input_fh = open(self.input_file)
with open(self.output_file, "w") as self.output_fh:
#if self.file_type == "geno":
@@ -111,6 +125,8 @@ class ConvertGenoFile(object):
def process_rows(self):
for self.latest_row_pos, row in enumerate(self.input_fh):
+ if self.input_file.endswith(".geno.gz"):
+ print("row: ", row)
self.latest_row_value = row
# Take care of headers
if not row.strip():
@@ -135,7 +151,9 @@ class ConvertGenoFile(object):
@classmethod
def process_all(cls, old_directory, new_directory):
os.chdir(old_directory)
- for input_file in glob.glob("*.geno"):
+ for input_file in glob.glob("*"):
+ if not input_file.endswith(('geno', '.geno.gz')):
+ continue
group_name = input_file.split('.')[0]
output_file = os.path.join(new_directory, group_name + ".json")
print("%s -> %s" % (
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 9a795c66..3743e77c 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -299,8 +299,8 @@ def calculate_kinship(genotype_matrix, temp_data):
"""
n = genotype_matrix.shape[0]
m = genotype_matrix.shape[1]
- #print("n is:", n)
- #print("m is:", m)
+ print("n is:", n)
+ print("m is:", m)
keep = []
for counter in range(m):
#print("type of genotype_matrix[:,counter]:", pf(genotype_matrix[:,counter]))
@@ -309,7 +309,7 @@ def calculate_kinship(genotype_matrix, temp_data):
#Gets vector of values for column (no values in vector if not all values in col are numbers)
marker_values = genotype_matrix[True - not_number, counter]
- #print("type of marker_values is:", type(marker_values))
+ #print("marker_values is:", pf(marker_values))
#Gets mean of values in vector
values_mean = marker_values.mean()
@@ -325,7 +325,8 @@ def calculate_kinship(genotype_matrix, temp_data):
temp_data.store("percent_complete", percent_complete)
genotype_matrix = genotype_matrix[:,keep]
- kinship_matrix = np.dot(genotype_matrix,genotype_matrix.T) * 1.0/float(m)
+ print("genotype_matrix: ", pf(genotype_matrix))
+ kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m)
return kinship_matrix
def GWAS(pheno_vector,
diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
index b0c96c56..7a3ae1e5 100755
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -1204,6 +1204,7 @@ class ShowTrait(object):
other_sample_names.append(sample)
if other_sample_names:
+ parent_f1_samples = None
if self.dataset.group.parlist and self.dataset.group.f1list:
parent_f1_samples = self.dataset.group.parlist + self.dataset.group.f1list