aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xwqflask/base/data_set.py29
-rwxr-xr-xwqflask/wqflask/marker_regression/marker_regression.py80
-rw-r--r--wqflask/wqflask/my_pylmm/data/genofile_parser.py15
3 files changed, 89 insertions, 35 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 8ced1528..182e15e6 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -23,6 +23,8 @@
from __future__ import absolute_import, print_function, division
import os
+import json
+
from flask import Flask, g
from htmlgen import HTMLgen2 as HT
@@ -64,6 +66,21 @@ def create_dataset(dataset_name):
return dataset_class(dataset_name)
+class Markers(object):
+ """Todo: Build in cacheing so it saves us reading the same file more than once"""
+ def __init__(self, name):
+ json_data_fh = open(os.path.join(webqtlConfig.NEWGENODIR + name + '.json'))
+ self.markers = json.load(json_data)
+
+ def add_pvalues(p_values):
+ #for count, marker in enumerate(self.markers):
+ # marker['p_value'] = p_values[count]
+
+ for marker, p_value in itertools.izip(self.markers, p_values):
+ marker['p_value'] = p_value
+ #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values
+ marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61
+
class DatasetGroup(object):
"""
Each group has multiple datasets; each species has multiple groups.
@@ -84,6 +101,7 @@ class DatasetGroup(object):
self.f1list = None
self.parlist = None
self.allsamples = None
+ self.markers = Markers(self.name)
#def read_genotype(self):
@@ -91,9 +109,16 @@ class DatasetGroup(object):
#
# if not self.genotype: # Didn'd succeed, so we try method 2
# self.read_genotype_data()
-
+
+ #def read_genotype_json(self):
+ # '''Read genotype from json file'''
+ #
+ # json_data = open(os.path.join(webqtlConfig.NEWGENODIR + self.name + '.json'))
+ # markers = json.load(json_data)
+ #
+
def read_genotype_file(self):
- '''read genotype from .geno file instead of database'''
+ '''Read genotype from .geno file instead of database'''
#if self.group == 'BXD300':
# self.group = 'BXD'
#
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 13ec4280..1d005df4 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -454,10 +454,11 @@ class MarkerRegression(object):
def gen_data(self):
"""Todo: Fill this in here"""
-
- json_data = open(os.path.join(webqtlConfig.NEWGENODIR + self.dataset.group.name + '.json'))
- markers = json.load(json_data)
- genotype_data = [marker['genotypes'] for marker in markers]
+
+ #json_data = open(os.path.join(webqtlConfig.NEWGENODIR + self.dataset.group.name + '.json'))
+ #markers = json.load(json_data)
+
+ genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers]
no_val_samples = self.identify_empty_samples()
trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
@@ -466,7 +467,6 @@ class MarkerRegression(object):
#for marker_object in genotype_data:
# print("marker_object:", pf(marker_object))
-
#prep_data.PrepData(self.vals, genotype_data)
@@ -492,40 +492,60 @@ class MarkerRegression(object):
refit=False)
print("p_values is:", pf(len(p_values)))
+
+ self.dataset.group.markers.add_pvalues(p_values)
#calculate QTL for each trait
- self.qtl_results = self.genotype.regression(strains = self.samples,
- trait = self.vals)
- self.lrs_array = self.genotype.permutation(strains = self.samples,
- trait = self.vals,
- nperm=self.num_perm)
+ #self.qtl_results = self.genotype.regression(strains = self.samples,
+ # trait = self.vals)
+ #self.lrs_array = self.genotype.permutation(strains = self.samples,
+ # trait = self.vals,
+ # nperm=self.num_perm)
+
+ self.lrs_values = [marker['lrs_value'] for marker in self.dataset.group.markers]
self.lrs_thresholds = Bunch(
- suggestive = self.lrs_array[int(self.num_perm*0.37-1)],
- significant = self.lrs_array[int(self.num_perm*0.95-1)],
- highly_significant = self.lrs_array[int(self.num_perm*0.99-1)]
+ suggestive = self.lrs_values[int(self.num_perm*0.37-1)],
+ significant = self.lrs_values[int(self.num_perm*0.95-1)],
+ highly_significant = self.lrs_values[int(self.num_perm*0.99-1)]
)
+ #self.lrs_thresholds = Bunch(
+ # suggestive = self.lrs_array[int(self.num_perm*0.37-1)],
+ # significant = self.lrs_array[int(self.num_perm*0.95-1)],
+ # highly_significant = self.lrs_array[int(self.num_perm*0.99-1)]
+ # )
+
if self.display_all_lrs:
- filtered_results = self.qtl_results
+ self.filtered_results = self.dataset.group.markers.markers
else:
- suggestive_results = []
+ self.filtered_results = []
self.pure_qtl_results = []
- for result in self.qtl_results:
- self.pure_qtl_results.append(dict(locus=dict(name=result.locus.name,
- mb=result.locus.Mb,
- chromosome=result.locus.chr),
- lrs=result.lrs,
- additive=result.additive))
- if result.lrs > self.lrs_thresholds.suggestive:
- suggestive_results.append(result)
- filtered_results = suggestive_results
+ for marker in self.dataset.group.markers.markers:
+ self.pure_qtl_results.append(marker)
+ if marker['lrs_value'] > self.lrs_thresholds.suggestive:
+ self.filtered_results.append(marker)
+
+ #if self.display_all_lrs:
+ # filtered_results = self.qtl_results
+ #else:
+ # suggestive_results = []
+ # self.pure_qtl_results = []
+ # for result in self.qtl_results:
+ # self.pure_qtl_results.append(dict(locus=dict(name=result.locus.name,
+ # mb=result.locus.Mb,
+ # chromosome=result.locus.chr),
+ # lrs=result.lrs,
+ # additive=result.additive))
+ # if result.lrs > self.lrs_thresholds.suggestive:
+ # suggestive_results.append(result)
+ # filtered_results = suggestive_results
# Todo (2013): Use top_10 variable to generate page message about whether top 10 was used
- if not filtered_results:
+ if not self.filtered_results:
# We use the 10 results with the highest LRS values
- filtered_results = sorted(self.qtl_results)[-10:]
+ self.filtered_results = sorted(self.qtl_results)[-10:]
self.top_10 = True
else:
self.top_10 = False
@@ -567,11 +587,9 @@ class MarkerRegression(object):
#permutation = HT.TableLite()
#permutation.append(HT.TR(HT.TD(img)))
- for marker in filtered_results:
- if marker.lrs > webqtlConfig.MAXLRS:
- marker.lrs = webqtlConfig.MAXLRS
-
- self.filtered_results = filtered_results
+ for marker in self.filtered_results:
+ if marker['lrs_value'] > webqtlConfig.MAXLRS:
+ marker['lrs_value'] = webqtlConfig.MAXLRS
#if fd.genotype.type == 'intercross':
# ncol =len(headerList)
diff --git a/wqflask/wqflask/my_pylmm/data/genofile_parser.py b/wqflask/wqflask/my_pylmm/data/genofile_parser.py
index ec8c521c..8c74fe74 100644
--- a/wqflask/wqflask/my_pylmm/data/genofile_parser.py
+++ b/wqflask/wqflask/my_pylmm/data/genofile_parser.py
@@ -28,10 +28,11 @@ class Marker(object):
class ConvertGenoFile(object):
- def __init__(self, input_file, output_file):
+ def __init__(self, input_file, output_file, file_type):
self.input_file = input_file
self.output_file = output_file
+ self.file_type = file_type
self.mb_exists = False
self.markers = []
@@ -57,7 +58,10 @@ class ConvertGenoFile(object):
self.input_fh = open(self.input_file)
with open(self.output_file, "w") as self.output_fh:
- self.process_csv()
+ if self.file_type == "geno":
+ self.process_csv()
+ elif self.file_type == "snps":
+ self.process_snps_file()
#def process_row(self, row):
@@ -66,6 +70,7 @@ class ConvertGenoFile(object):
# if char
# counter += 1
+
def process_csv(self):
for row_count, row in enumerate(self.process_rows()):
#self.latest_row_pos = row_count
@@ -146,6 +151,12 @@ class ConvertGenoFile(object):
print(" Column is:", convertob.latest_col_value)
print(" Row is:", convertob.latest_row_value)
break
+
+ def process_snps_file(cls, snps_file, new_directory):
+ output_file = os.path.join(new_directory, "mouse_families.json")
+ print("%s -> %s" % (snps_file, output_file))
+ convertob = ConvertGenoFile(input_file, output_file)
+
if __name__=="__main__":