From b3853925653cf6145d7fb56b71edfc824a2d051a Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Tue, 12 Feb 2013 16:20:56 -0600 Subject: Edited marker_regression.py and data_set.py to store the p-values and their corresponding markers to be used in the table of qtl results and other figures --- wqflask/base/data_set.py | 29 +++++++- .../wqflask/marker_regression/marker_regression.py | 80 +++++++++++++--------- wqflask/wqflask/my_pylmm/data/genofile_parser.py | 15 +++- 3 files changed, 89 insertions(+), 35 deletions(-) diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 8ced1528..182e15e6 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -23,6 +23,8 @@ from __future__ import absolute_import, print_function, division import os +import json + from flask import Flask, g from htmlgen import HTMLgen2 as HT @@ -64,6 +66,21 @@ def create_dataset(dataset_name): return dataset_class(dataset_name) +class Markers(object): + """Todo: Build in cacheing so it saves us reading the same file more than once""" + def __init__(self, name): + json_data_fh = open(os.path.join(webqtlConfig.NEWGENODIR + name + '.json')) + self.markers = json.load(json_data) + + def add_pvalues(p_values): + #for count, marker in enumerate(self.markers): + # marker['p_value'] = p_values[count] + + for marker, p_value in itertools.izip(self.markers, p_values): + marker['p_value'] = p_value + #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values + marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 + class DatasetGroup(object): """ Each group has multiple datasets; each species has multiple groups. @@ -84,6 +101,7 @@ class DatasetGroup(object): self.f1list = None self.parlist = None self.allsamples = None + self.markers = Markers(self.name) #def read_genotype(self): @@ -91,9 +109,16 @@ class DatasetGroup(object): # # if not self.genotype: # Didn'd succeed, so we try method 2 # self.read_genotype_data() - + + #def read_genotype_json(self): + # '''Read genotype from json file''' + # + # json_data = open(os.path.join(webqtlConfig.NEWGENODIR + self.name + '.json')) + # markers = json.load(json_data) + # + def read_genotype_file(self): - '''read genotype from .geno file instead of database''' + '''Read genotype from .geno file instead of database''' #if self.group == 'BXD300': # self.group = 'BXD' # diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 13ec4280..1d005df4 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -454,10 +454,11 @@ class MarkerRegression(object): def gen_data(self): """Todo: Fill this in here""" - - json_data = open(os.path.join(webqtlConfig.NEWGENODIR + self.dataset.group.name + '.json')) - markers = json.load(json_data) - genotype_data = [marker['genotypes'] for marker in markers] + + #json_data = open(os.path.join(webqtlConfig.NEWGENODIR + self.dataset.group.name + '.json')) + #markers = json.load(json_data) + + genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers] no_val_samples = self.identify_empty_samples() trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) @@ -466,7 +467,6 @@ class MarkerRegression(object): #for marker_object in genotype_data: # print("marker_object:", pf(marker_object)) - #prep_data.PrepData(self.vals, genotype_data) @@ -492,40 +492,60 @@ class MarkerRegression(object): refit=False) print("p_values is:", pf(len(p_values))) + + self.dataset.group.markers.add_pvalues(p_values) #calculate QTL for each trait - self.qtl_results = self.genotype.regression(strains = self.samples, - trait = self.vals) - self.lrs_array = self.genotype.permutation(strains = self.samples, - trait = self.vals, - nperm=self.num_perm) + #self.qtl_results = self.genotype.regression(strains = self.samples, + # trait = self.vals) + #self.lrs_array = self.genotype.permutation(strains = self.samples, + # trait = self.vals, + # nperm=self.num_perm) + + self.lrs_values = [marker['lrs_value'] for marker in self.dataset.group.markers] self.lrs_thresholds = Bunch( - suggestive = self.lrs_array[int(self.num_perm*0.37-1)], - significant = self.lrs_array[int(self.num_perm*0.95-1)], - highly_significant = self.lrs_array[int(self.num_perm*0.99-1)] + suggestive = self.lrs_values[int(self.num_perm*0.37-1)], + significant = self.lrs_values[int(self.num_perm*0.95-1)], + highly_significant = self.lrs_values[int(self.num_perm*0.99-1)] ) + #self.lrs_thresholds = Bunch( + # suggestive = self.lrs_array[int(self.num_perm*0.37-1)], + # significant = self.lrs_array[int(self.num_perm*0.95-1)], + # highly_significant = self.lrs_array[int(self.num_perm*0.99-1)] + # ) + if self.display_all_lrs: - filtered_results = self.qtl_results + self.filtered_results = self.dataset.group.markers.markers else: - suggestive_results = [] + self.filtered_results = [] self.pure_qtl_results = [] - for result in self.qtl_results: - self.pure_qtl_results.append(dict(locus=dict(name=result.locus.name, - mb=result.locus.Mb, - chromosome=result.locus.chr), - lrs=result.lrs, - additive=result.additive)) - if result.lrs > self.lrs_thresholds.suggestive: - suggestive_results.append(result) - filtered_results = suggestive_results + for marker in self.dataset.group.markers.markers: + self.pure_qtl_results.append(marker) + if marker['lrs_value'] > self.lrs_thresholds.suggestive: + self.filtered_results.append(marker) + + #if self.display_all_lrs: + # filtered_results = self.qtl_results + #else: + # suggestive_results = [] + # self.pure_qtl_results = [] + # for result in self.qtl_results: + # self.pure_qtl_results.append(dict(locus=dict(name=result.locus.name, + # mb=result.locus.Mb, + # chromosome=result.locus.chr), + # lrs=result.lrs, + # additive=result.additive)) + # if result.lrs > self.lrs_thresholds.suggestive: + # suggestive_results.append(result) + # filtered_results = suggestive_results # Todo (2013): Use top_10 variable to generate page message about whether top 10 was used - if not filtered_results: + if not self.filtered_results: # We use the 10 results with the highest LRS values - filtered_results = sorted(self.qtl_results)[-10:] + self.filtered_results = sorted(self.qtl_results)[-10:] self.top_10 = True else: self.top_10 = False @@ -567,11 +587,9 @@ class MarkerRegression(object): #permutation = HT.TableLite() #permutation.append(HT.TR(HT.TD(img))) - for marker in filtered_results: - if marker.lrs > webqtlConfig.MAXLRS: - marker.lrs = webqtlConfig.MAXLRS - - self.filtered_results = filtered_results + for marker in self.filtered_results: + if marker['lrs_value'] > webqtlConfig.MAXLRS: + marker['lrs_value'] = webqtlConfig.MAXLRS #if fd.genotype.type == 'intercross': # ncol =len(headerList) diff --git a/wqflask/wqflask/my_pylmm/data/genofile_parser.py b/wqflask/wqflask/my_pylmm/data/genofile_parser.py index ec8c521c..8c74fe74 100644 --- a/wqflask/wqflask/my_pylmm/data/genofile_parser.py +++ b/wqflask/wqflask/my_pylmm/data/genofile_parser.py @@ -28,10 +28,11 @@ class Marker(object): class ConvertGenoFile(object): - def __init__(self, input_file, output_file): + def __init__(self, input_file, output_file, file_type): self.input_file = input_file self.output_file = output_file + self.file_type = file_type self.mb_exists = False self.markers = [] @@ -57,7 +58,10 @@ class ConvertGenoFile(object): self.input_fh = open(self.input_file) with open(self.output_file, "w") as self.output_fh: - self.process_csv() + if self.file_type == "geno": + self.process_csv() + elif self.file_type == "snps": + self.process_snps_file() #def process_row(self, row): @@ -66,6 +70,7 @@ class ConvertGenoFile(object): # if char # counter += 1 + def process_csv(self): for row_count, row in enumerate(self.process_rows()): #self.latest_row_pos = row_count @@ -146,6 +151,12 @@ class ConvertGenoFile(object): print(" Column is:", convertob.latest_col_value) print(" Row is:", convertob.latest_row_value) break + + def process_snps_file(cls, snps_file, new_directory): + output_file = os.path.join(new_directory, "mouse_families.json") + print("%s -> %s" % (snps_file, output_file)) + convertob = ConvertGenoFile(input_file, output_file) + if __name__=="__main__": -- cgit v1.2.3