diff options
author | zsloan | 2018-04-27 16:07:17 +0000 |
---|---|---|
committer | zsloan | 2018-04-27 16:07:17 +0000 |
commit | 1995c6018f5d5d93b97f85b908cb67112d3f01b6 (patch) | |
tree | f4ca3491a1524bae782e9db73d7b3366a94d2047 /wqflask | |
parent | 36ceda09d76c898c5f818236122f5d431261a5b1 (diff) | |
download | genenetwork2-1995c6018f5d5d93b97f85b908cb67112d3f01b6.tar.gz |
Added marker filtering for data set groups with larger genotype files
Fixed issue where GEMMA + LOCO wasn't working with AIL (and probably some other groups)
Diffstat (limited to 'wqflask')
-rw-r--r-- | wqflask/wqflask/marker_regression/gemma_mapping.py | 21 | ||||
-rw-r--r-- | wqflask/wqflask/marker_regression/marker_regression.py | 55 | ||||
-rw-r--r-- | wqflask/wqflask/marker_regression/marker_regression_gn1.py | 4 |
3 files changed, 62 insertions, 18 deletions
diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py index 0e31e73e..5242ec05 100644 --- a/wqflask/wqflask/marker_regression/gemma_mapping.py +++ b/wqflask/wqflask/marker_regression/gemma_mapping.py @@ -171,9 +171,7 @@ def parse_gemma_output(genofile_name): included_markers = [] p_values = [] marker_obs = [] - previous_chr = 0 - #with open("/home/zas1024/gene/wqflask/output/{}_output.assoc.txt".format(this_dataset.group.name)) as output_file: with open("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name)) as output_file: for line in output_file: if line.startswith("chr"): @@ -185,14 +183,8 @@ def parse_gemma_output(genofile_name): marker['chr'] = int(line.split("\t")[0]) else: marker['chr'] = line.split("\t")[0] - # try: - # marker['chr'] = int(line.split("\t")[0]) - # except: - # marker['chr'] = previous_chr + 1 - # if marker['chr'] != previous_chr: - # previous_chr = marker['chr'] marker['Mb'] = float(line.split("\t")[2]) / 1000000 - marker['p_value'] = float(line.split("\t")[11]) + marker['p_value'] = float(line.split("\t")[9]) if math.isnan(marker['p_value']) or (marker['p_value'] <= 0): marker['lod_score'] = 0 #marker['lrs_value'] = 0 @@ -202,7 +194,7 @@ def parse_gemma_output(genofile_name): marker_obs.append(marker) included_markers.append(line.split("\t")[1]) - p_values.append(float(line.split("\t")[11])) + p_values.append(float(line.split("\t")[9])) return marker_obs @@ -222,7 +214,6 @@ def parse_loco_output(this_dataset, gwa_output_filename): previous_chr = 0 for this_file in output_filelist: - #with open("/home/zas1024/gene/wqflask/output/{}_output.assoc.txt".format(this_dataset.group.name)) as output_file: with open(this_file) as output_file: for line in output_file: if line.startswith("chr"): @@ -232,10 +223,14 @@ def parse_loco_output(this_dataset, gwa_output_filename): marker['name'] = line.split("\t")[1] if line.split("\t")[0] != "X" and line.split("\t")[0] != "X/Y": marker['chr'] = int(line.split("\t")[0]) + if marker['chr'] > previous_chr: + previous_chr = marker['chr'] + elif marker['chr'] < previous_chr: + break else: marker['chr'] = line.split("\t")[0] marker['Mb'] = float(line.split("\t")[2]) / 1000000 - marker['p_value'] = float(line.split("\t")[11]) + marker['p_value'] = float(line.split("\t")[9]) if math.isnan(marker['p_value']) or (marker['p_value'] <= 0): marker['lod_score'] = 0 #marker['lrs_value'] = 0 @@ -245,6 +240,6 @@ def parse_loco_output(this_dataset, gwa_output_filename): marker_obs.append(marker) included_markers.append(line.split("\t")[1]) - p_values.append(float(line.split("\t")[11])) + p_values.append(float(line.split("\t")[9])) return marker_obs
\ No newline at end of file diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 50e673c4..a3631462 100644 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -285,9 +285,15 @@ class MarkerRegression(object): if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()): self.qtl_results.append(marker) - export_mapping_results(self.dataset, self.this_trait, self.qtl_results, self.mapping_results_path, self.mapping_scale, self.score_type) + with Bench("Exporting Results"): + export_mapping_results(self.dataset, self.this_trait, self.qtl_results, self.mapping_results_path, self.mapping_scale, self.score_type) - self.trimmed_markers = trim_markers_for_table(results) + with Bench("Trimming Markers for Figure"): + if len(self.qtl_results) > 20000: + self.qtl_results = trim_markers_for_figure(self.qtl_results) + + with Bench("Trimming Markers for Table"): + self.trimmed_markers = trim_markers_for_table(results) if self.mapping_method != "gemma": self.json_data['chr'] = [] @@ -321,8 +327,6 @@ class MarkerRegression(object): self.json_data['chrnames'].append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length]) chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length - # logger.debug("json_data:", self.json_data) - self.js_data = dict( result_score_type = self.score_type, json_data = self.json_data, @@ -599,6 +603,49 @@ def export_mapping_results(dataset, trait, markers, results_path, mapping_scale, if i < (len(markers) - 1): output_file.write("\n") +def trim_markers_for_figure(markers): + if 'lod_score' in markers[0].keys(): + score_type = 'lod_score' + else: + score_type = 'lrs_value' + + filtered_markers = [] + low_counter = 0 + med_counter = 0 + high_counter = 0 + for marker in markers: + if score_type == 'lod_score': + if marker[score_type] < 1: + if low_counter % 20 == 0: + filtered_markers.append(marker) + low_counter += 1 + elif 1 <= marker[score_type] < 2: + if med_counter % 10 == 0: + filtered_markers.append(marker) + med_counter += 1 + elif 2 <= marker[score_type] <= 3: + if high_counter % 2 == 0: + filtered_markers.append(marker) + high_counter += 1 + else: + filtered_markers.append(marker) + else: + if marker[score_type] < 4.16: + if low_counter % 20 == 0: + filtered_markers.append(marker) + low_counter += 1 + elif 4.16 <= marker[score_type] < (2*4.16): + if med_counter % 10 == 0: + filtered_markers.append(marker) + med_counter += 1 + elif (2*4.16) <= marker[score_type] <= (3*4.16): + if high_counter % 2 == 0: + filtered_markers.append(marker) + high_counter += 1 + else: + filtered_markers.append(marker) + return filtered_markers + def trim_markers_for_table(markers): if 'lod_score' in markers[0].keys(): sorted_markers = sorted(markers, key=lambda k: k['lod_score'], reverse=True) diff --git a/wqflask/wqflask/marker_regression/marker_regression_gn1.py b/wqflask/wqflask/marker_regression/marker_regression_gn1.py index 8e9c5b4c..40402597 100644 --- a/wqflask/wqflask/marker_regression/marker_regression_gn1.py +++ b/wqflask/wqflask/marker_regression/marker_regression_gn1.py @@ -41,6 +41,7 @@ from base.GeneralObject import GeneralObject from utility import webqtlUtil from utility import helper_functions from utility import Plot +from utility.benchmark import Bench from wqflask.interval_analyst import GeneUtil from base.webqtlConfig import TMPDIR, GENERATED_TEXT_DIR, GENERATED_IMAGE_DIR @@ -427,7 +428,8 @@ class MarkerRegression(object): ################################################################ showLocusForm = "" intCanvas = pid.PILCanvas(size=(self.graphWidth, self.graphHeight)) - gifmap = self.plotIntMapping(intCanvas, startMb = self.startMb, endMb = self.endMb, showLocusForm= showLocusForm) + with Bench("Drawing Plot"): + gifmap = self.plotIntMapping(intCanvas, startMb = self.startMb, endMb = self.endMb, showLocusForm= showLocusForm) self.gifmap = gifmap.__str__() |