diff options
Diffstat (limited to 'wqflask')
-rwxr-xr-x | wqflask/base/data_set.py | 12 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/data/genofile_parser.py | 26 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 13 | ||||
-rwxr-xr-x | wqflask/wqflask/show_trait/show_trait.py | 1 |
4 files changed, 37 insertions, 15 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 091433a6..0b9b1ce0 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -168,11 +168,13 @@ class Markers(object): for marker, p_value in itertools.izip(self.markers, p_values): marker['p_value'] = p_value - if math.isnan(marker['p_value']): - print("p_value is:", marker['p_value']) - marker['lod_score'] = -math.log10(marker['p_value']) - #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values - marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 + if marker['p_value'] == 0: + marker['lod_score'] = 0 + marker['lrs_value'] = 0 + else: + marker['lod_score'] = -math.log10(marker['p_value']) + #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values + marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 diff --git a/wqflask/wqflask/my_pylmm/data/genofile_parser.py b/wqflask/wqflask/my_pylmm/data/genofile_parser.py index b926592b..4a647959 100644 --- a/wqflask/wqflask/my_pylmm/data/genofile_parser.py +++ b/wqflask/wqflask/my_pylmm/data/genofile_parser.py @@ -1,14 +1,24 @@ #!/usr/bin/python +""" +Convert .geno files to json + +This file goes through all of the genofiles in the genofile directory (.geno) +and converts them to json files that are used when running the marker regression +code + +""" + from __future__ import print_function, division, absolute_import import sys sys.path.append("..") import os import glob import traceback +import gzip -import numpy as np -from pyLMM import lmm +#import numpy as np +#from pyLMM import lmm import simplejson as json @@ -54,7 +64,11 @@ class ConvertGenoFile(object): self.configurations = {} #self.skipped_cols = 3 - self.input_fh = open(self.input_file) + if self.input_file.endswith(".geno.gz"): + print("self.input_file: ", self.input_file) + self.input_fh = gzip.open(self.input_file) + else: + self.input_fh = open(self.input_file) with open(self.output_file, "w") as self.output_fh: #if self.file_type == "geno": @@ -111,6 +125,8 @@ class ConvertGenoFile(object): def process_rows(self): for self.latest_row_pos, row in enumerate(self.input_fh): + if self.input_file.endswith(".geno.gz"): + print("row: ", row) self.latest_row_value = row # Take care of headers if not row.strip(): @@ -135,7 +151,9 @@ class ConvertGenoFile(object): @classmethod def process_all(cls, old_directory, new_directory): os.chdir(old_directory) - for input_file in glob.glob("*.geno"): + for input_file in glob.glob("*"): + if not input_file.endswith(('geno', '.geno.gz')): + continue group_name = input_file.split('.')[0] output_file = os.path.join(new_directory, group_name + ".json") print("%s -> %s" % ( diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 9a795c66..6ef1669b 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -299,8 +299,8 @@ def calculate_kinship(genotype_matrix, temp_data): """ n = genotype_matrix.shape[0] m = genotype_matrix.shape[1] - #print("n is:", n) - #print("m is:", m) + print("n is:", n) + print("m is:", m) keep = [] for counter in range(m): #print("type of genotype_matrix[:,counter]:", pf(genotype_matrix[:,counter])) @@ -309,7 +309,7 @@ def calculate_kinship(genotype_matrix, temp_data): #Gets vector of values for column (no values in vector if not all values in col are numbers) marker_values = genotype_matrix[True - not_number, counter] - #print("type of marker_values is:", type(marker_values)) + #print("marker_values is:", pf(marker_values)) #Gets mean of values in vector values_mean = marker_values.mean() @@ -325,7 +325,8 @@ def calculate_kinship(genotype_matrix, temp_data): temp_data.store("percent_complete", percent_complete) genotype_matrix = genotype_matrix[:,keep] - kinship_matrix = np.dot(genotype_matrix,genotype_matrix.T) * 1.0/float(m) + print("genotype_matrix: ", pf(genotype_matrix)) + kinship_matrix = np.dot(genotype_matrix, genotype_matrix.T) * 1.0/float(m) return kinship_matrix def GWAS(pheno_vector, @@ -395,7 +396,7 @@ def GWAS(pheno_vector, keep = True - v xs = x[keep,:] if xs.var() == 0: - p_values.append(np.nan) + p_values.append(0) t_statistics.append(np.nan) continue @@ -412,7 +413,7 @@ def GWAS(pheno_vector, ts, ps, beta, betaVar = lmm_ob_2.association(xs, REML=restricted_max_likelihood) else: if x.var() == 0: - p_values.append(np.nan) + p_values.append(0) t_statistics.append(np.nan) continue diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index b0c96c56..7a3ae1e5 100755 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -1204,6 +1204,7 @@ class ShowTrait(object): other_sample_names.append(sample) if other_sample_names: + parent_f1_samples = None if self.dataset.group.parlist and self.dataset.group.f1list: parent_f1_samples = self.dataset.group.parlist + self.dataset.group.f1list |