aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzsloan2018-04-27 16:07:17 +0000
committerzsloan2018-04-27 16:07:17 +0000
commit1995c6018f5d5d93b97f85b908cb67112d3f01b6 (patch)
treef4ca3491a1524bae782e9db73d7b3366a94d2047
parent36ceda09d76c898c5f818236122f5d431261a5b1 (diff)
downloadgenenetwork2-1995c6018f5d5d93b97f85b908cb67112d3f01b6.tar.gz
Added marker filtering for data set groups with larger genotype files
Fixed issue where GEMMA + LOCO wasn't working with AIL (and probably some other groups)
-rw-r--r--wqflask/wqflask/marker_regression/gemma_mapping.py21
-rw-r--r--wqflask/wqflask/marker_regression/marker_regression.py55
-rw-r--r--wqflask/wqflask/marker_regression/marker_regression_gn1.py4
3 files changed, 62 insertions, 18 deletions
diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py
index 0e31e73e..5242ec05 100644
--- a/wqflask/wqflask/marker_regression/gemma_mapping.py
+++ b/wqflask/wqflask/marker_regression/gemma_mapping.py
@@ -171,9 +171,7 @@ def parse_gemma_output(genofile_name):
included_markers = []
p_values = []
marker_obs = []
- previous_chr = 0
- #with open("/home/zas1024/gene/wqflask/output/{}_output.assoc.txt".format(this_dataset.group.name)) as output_file:
with open("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name)) as output_file:
for line in output_file:
if line.startswith("chr"):
@@ -185,14 +183,8 @@ def parse_gemma_output(genofile_name):
marker['chr'] = int(line.split("\t")[0])
else:
marker['chr'] = line.split("\t")[0]
- # try:
- # marker['chr'] = int(line.split("\t")[0])
- # except:
- # marker['chr'] = previous_chr + 1
- # if marker['chr'] != previous_chr:
- # previous_chr = marker['chr']
marker['Mb'] = float(line.split("\t")[2]) / 1000000
- marker['p_value'] = float(line.split("\t")[11])
+ marker['p_value'] = float(line.split("\t")[9])
if math.isnan(marker['p_value']) or (marker['p_value'] <= 0):
marker['lod_score'] = 0
#marker['lrs_value'] = 0
@@ -202,7 +194,7 @@ def parse_gemma_output(genofile_name):
marker_obs.append(marker)
included_markers.append(line.split("\t")[1])
- p_values.append(float(line.split("\t")[11]))
+ p_values.append(float(line.split("\t")[9]))
return marker_obs
@@ -222,7 +214,6 @@ def parse_loco_output(this_dataset, gwa_output_filename):
previous_chr = 0
for this_file in output_filelist:
- #with open("/home/zas1024/gene/wqflask/output/{}_output.assoc.txt".format(this_dataset.group.name)) as output_file:
with open(this_file) as output_file:
for line in output_file:
if line.startswith("chr"):
@@ -232,10 +223,14 @@ def parse_loco_output(this_dataset, gwa_output_filename):
marker['name'] = line.split("\t")[1]
if line.split("\t")[0] != "X" and line.split("\t")[0] != "X/Y":
marker['chr'] = int(line.split("\t")[0])
+ if marker['chr'] > previous_chr:
+ previous_chr = marker['chr']
+ elif marker['chr'] < previous_chr:
+ break
else:
marker['chr'] = line.split("\t")[0]
marker['Mb'] = float(line.split("\t")[2]) / 1000000
- marker['p_value'] = float(line.split("\t")[11])
+ marker['p_value'] = float(line.split("\t")[9])
if math.isnan(marker['p_value']) or (marker['p_value'] <= 0):
marker['lod_score'] = 0
#marker['lrs_value'] = 0
@@ -245,6 +240,6 @@ def parse_loco_output(this_dataset, gwa_output_filename):
marker_obs.append(marker)
included_markers.append(line.split("\t")[1])
- p_values.append(float(line.split("\t")[11]))
+ p_values.append(float(line.split("\t")[9]))
return marker_obs \ No newline at end of file
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 50e673c4..a3631462 100644
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -285,9 +285,15 @@ class MarkerRegression(object):
if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()):
self.qtl_results.append(marker)
- export_mapping_results(self.dataset, self.this_trait, self.qtl_results, self.mapping_results_path, self.mapping_scale, self.score_type)
+ with Bench("Exporting Results"):
+ export_mapping_results(self.dataset, self.this_trait, self.qtl_results, self.mapping_results_path, self.mapping_scale, self.score_type)
- self.trimmed_markers = trim_markers_for_table(results)
+ with Bench("Trimming Markers for Figure"):
+ if len(self.qtl_results) > 20000:
+ self.qtl_results = trim_markers_for_figure(self.qtl_results)
+
+ with Bench("Trimming Markers for Table"):
+ self.trimmed_markers = trim_markers_for_table(results)
if self.mapping_method != "gemma":
self.json_data['chr'] = []
@@ -321,8 +327,6 @@ class MarkerRegression(object):
self.json_data['chrnames'].append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length])
chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length
- # logger.debug("json_data:", self.json_data)
-
self.js_data = dict(
result_score_type = self.score_type,
json_data = self.json_data,
@@ -599,6 +603,49 @@ def export_mapping_results(dataset, trait, markers, results_path, mapping_scale,
if i < (len(markers) - 1):
output_file.write("\n")
+def trim_markers_for_figure(markers):
+ if 'lod_score' in markers[0].keys():
+ score_type = 'lod_score'
+ else:
+ score_type = 'lrs_value'
+
+ filtered_markers = []
+ low_counter = 0
+ med_counter = 0
+ high_counter = 0
+ for marker in markers:
+ if score_type == 'lod_score':
+ if marker[score_type] < 1:
+ if low_counter % 20 == 0:
+ filtered_markers.append(marker)
+ low_counter += 1
+ elif 1 <= marker[score_type] < 2:
+ if med_counter % 10 == 0:
+ filtered_markers.append(marker)
+ med_counter += 1
+ elif 2 <= marker[score_type] <= 3:
+ if high_counter % 2 == 0:
+ filtered_markers.append(marker)
+ high_counter += 1
+ else:
+ filtered_markers.append(marker)
+ else:
+ if marker[score_type] < 4.16:
+ if low_counter % 20 == 0:
+ filtered_markers.append(marker)
+ low_counter += 1
+ elif 4.16 <= marker[score_type] < (2*4.16):
+ if med_counter % 10 == 0:
+ filtered_markers.append(marker)
+ med_counter += 1
+ elif (2*4.16) <= marker[score_type] <= (3*4.16):
+ if high_counter % 2 == 0:
+ filtered_markers.append(marker)
+ high_counter += 1
+ else:
+ filtered_markers.append(marker)
+ return filtered_markers
+
def trim_markers_for_table(markers):
if 'lod_score' in markers[0].keys():
sorted_markers = sorted(markers, key=lambda k: k['lod_score'], reverse=True)
diff --git a/wqflask/wqflask/marker_regression/marker_regression_gn1.py b/wqflask/wqflask/marker_regression/marker_regression_gn1.py
index 8e9c5b4c..40402597 100644
--- a/wqflask/wqflask/marker_regression/marker_regression_gn1.py
+++ b/wqflask/wqflask/marker_regression/marker_regression_gn1.py
@@ -41,6 +41,7 @@ from base.GeneralObject import GeneralObject
from utility import webqtlUtil
from utility import helper_functions
from utility import Plot
+from utility.benchmark import Bench
from wqflask.interval_analyst import GeneUtil
from base.webqtlConfig import TMPDIR, GENERATED_TEXT_DIR, GENERATED_IMAGE_DIR
@@ -427,7 +428,8 @@ class MarkerRegression(object):
################################################################
showLocusForm = ""
intCanvas = pid.PILCanvas(size=(self.graphWidth, self.graphHeight))
- gifmap = self.plotIntMapping(intCanvas, startMb = self.startMb, endMb = self.endMb, showLocusForm= showLocusForm)
+ with Bench("Drawing Plot"):
+ gifmap = self.plotIntMapping(intCanvas, startMb = self.startMb, endMb = self.endMb, showLocusForm= showLocusForm)
self.gifmap = gifmap.__str__()