diff options
author | Zachary Sloan | 2013-08-14 17:39:28 -0500 |
---|---|---|
committer | Zachary Sloan | 2013-08-14 17:39:28 -0500 |
commit | 11c426d50ac6718e16981be603527bd569f9d11a (patch) | |
tree | f7a54d937ba20e678226028ddc9e79138a6768c5 /wqflask | |
parent | 6379959af53b2ec595b85ccdc099c6f14adf0381 (diff) | |
download | genenetwork2-11c426d50ac6718e16981be603527bd569f9d11a.tar.gz |
Made some changes related to getting marker regression working for
some datasets
Diffstat (limited to 'wqflask')
-rw-r--r-- | wqflask/maintenance/correlation_matrix_test.py | 2 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/data/genofile_parser.py | 26 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 9 | ||||
-rwxr-xr-x | wqflask/wqflask/show_trait/show_trait.py | 1 |
4 files changed, 29 insertions, 9 deletions
diff --git a/wqflask/maintenance/correlation_matrix_test.py b/wqflask/maintenance/correlation_matrix_test.py index 2983a76b..5f8f10e1 100644 --- a/wqflask/maintenance/correlation_matrix_test.py +++ b/wqflask/maintenance/correlation_matrix_test.py @@ -48,7 +48,7 @@ Sleep a bunch because this can take a while Ensure that the correlation between Trait3 (HC_M2_0606_P::1457003_at) and Trait4 (HC_M2_0606_P::1422223_at) is 0.608 >>> test.get_text('''/html/body/table/tbody/tr[3]/td/table/tbody/tr/td/form/blockquote/table/tbody/tr[5]/td[5]/a/font''') -text: 0.608 +text: 0.608\n71 """ diff --git a/wqflask/wqflask/my_pylmm/data/genofile_parser.py b/wqflask/wqflask/my_pylmm/data/genofile_parser.py index b926592b..4a647959 100644 --- a/wqflask/wqflask/my_pylmm/data/genofile_parser.py +++ b/wqflask/wqflask/my_pylmm/data/genofile_parser.py @@ -1,14 +1,24 @@ #!/usr/bin/python +""" +Convert .geno files to json + +This file goes through all of the genofiles in the genofile directory (.geno) +and converts them to json files that are used when running the marker regression +code + +""" + from __future__ import print_function, division, absolute_import import sys sys.path.append("..") import os import glob import traceback +import gzip -import numpy as np -from pyLMM import lmm +#import numpy as np +#from pyLMM import lmm import simplejson as json @@ -54,7 +64,11 @@ class ConvertGenoFile(object): self.configurations = {} #self.skipped_cols = 3 - self.input_fh = open(self.input_file) + if self.input_file.endswith(".geno.gz"): + print("self.input_file: ", self.input_file) + self.input_fh = gzip.open(self.input_file) + else: + self.input_fh = open(self.input_file) with open(self.output_file, "w") as self.output_fh: #if self.file_type == "geno": @@ -111,6 +125,8 @@ class ConvertGenoFile(object): def process_rows(self): for self.latest_row_pos, row in enumerate(self.input_fh): + if self.input_file.endswith(".geno.gz"): + print("row: ", row) self.latest_row_value = row # Take care of headers if not row.strip(): @@ -135,7 +151,9 @@ class ConvertGenoFile(object): @classmethod def process_all(cls, old_directory, new_directory): os.chdir(old_directory) - for input_file in glob.glob("*.geno"): + for input_file in glob.glob("*"): + if not input_file.endswith(('geno', '.geno.gz')): + continue group_name = input_file.split('.')[0] output_file = os.path.join(new_directory, group_name + ".json") print("%s -> %s" % ( diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 9a795c66..3743e77c 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -299,8 +299,8 @@ def calculate_kinship(genotype_matrix, temp_data): """ n = genotype_matrix.shape[0] m = genotype_matrix.shape[1] - #print("n is:", n) - #print("m is:", m) + print("n is:", n) + print("m is:", m) keep = [] for counter in range(m): #print("type of genotype_matrix[:,counter]:", pf(genotype_matrix[:,counter])) @@ -309,7 +309,7 @@ def calculate_kinship(genotype_matrix, temp_data): #Gets vector of values for column (no values in vector if not all values in col are numbers) marker_values = genotype_matrix[True - not_number, counter] - #print("type of marker_values is:", type(marker_values)) + #print("marker_values is:", pf(marker_values)) #Gets mean of values in vector values_mean = marker_values.mean() @@ -325,7 +325,8 @@ def calculate_kinship(genotype_matrix, temp_data): temp_data.store("percent_complete", percent_complete) genotype_matrix = genotype_matrix[:,keep] - kinship_matrix = np.dot(genotype_matrix,genotype_matrix.T) * 1.0/float(m) + print("genotype_matrix: ", pf(genotype_matrix)) + kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m) return kinship_matrix def GWAS(pheno_vector, diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index b0c96c56..7a3ae1e5 100755 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -1204,6 +1204,7 @@ class ShowTrait(object): other_sample_names.append(sample) if other_sample_names: + parent_f1_samples = None if self.dataset.group.parlist and self.dataset.group.f1list: parent_f1_samples = self.dataset.group.parlist + self.dataset.group.f1list |