From 857d6d36a7e97b398c007083b7c5c05a44430bcb Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 21 Mar 2016 17:18:57 +0000 Subject: Changed code parsing marker csv file to increase GEMMA speed Currently commenting out the vector map/table for GEMMA since they cause the page to hang, I'll try to figure out a way to prevent this --- .../wqflask/marker_regression/marker_regression.py | 61 ++++++++++++++++++++-- .../wqflask/templates/marker_regression_gn1.html | 20 +++---- wqflask/wqflask/views.py | 14 ++--- 3 files changed, 69 insertions(+), 26 deletions(-) diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 37f9351d..b75a30f6 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -18,6 +18,7 @@ import numpy as np from scipy import linalg import cPickle as pickle +import itertools import simplejson as json @@ -90,10 +91,14 @@ class MarkerRegression(object): self.dataset.group.get_markers() if self.mapping_method == "gemma": self.score_type = "LOD" - included_markers, p_values = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals) - self.dataset.group.get_specified_markers(markers = included_markers) - self.dataset.group.markers.add_pvalues(p_values) - results = self.dataset.group.markers.markers + with Bench("Running GEMMA"): + included_markers, p_values = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals) + with Bench("Getting markers from csv"): + marker_obs = get_markers_from_csv(included_markers, p_values, self.dataset.group.name) + results = marker_obs + #self.dataset.group.get_specified_markers(markers = included_markers) + #self.dataset.group.markers.add_pvalues(p_values) + #results = self.dataset.group.markers.markers elif self.mapping_method == "rqtl_plink": results = self.run_rqtl_plink() elif self.mapping_method == "rqtl_geno": @@ -183,7 +188,7 @@ class MarkerRegression(object): #if index<40: # print("lod score is:", qtl['lod_score']) if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y": - print("changing to X") + #print("changing to X") self.json_data['chr'].append("X") else: self.json_data['chr'].append(str(qtl['chr'])) @@ -1006,6 +1011,52 @@ def create_snp_iterator_file(group): with gzip.open(snp_file_base, "wb") as fh: pickle.dump(data, fh, pickle.HIGHEST_PROTOCOL) +def get_markers_from_csv(included_markers, p_values, group_name): + marker_data_fh = open(os.path.join(webqtlConfig.PYLMM_PATH + group_name + '_markers.csv')) + markers = [] + for marker_name, p_value in itertools.izip(included_markers, p_values): + if not p_value or len(included_markers) < 1: + continue + for line in marker_data_fh: + splat = line.strip().split() + if splat[0] == marker_name: + marker = {} + marker['name'] = splat[0] + marker['chr'] = int(splat[1]) + marker['Mb'] = float(splat[2]) + marker['p_value'] = p_value + if math.isnan(marker['p_value']) or (marker['p_value'] <= 0): + marker['lod_score'] = 0 + marker['lrs_value'] = 0 + else: + marker['lod_score'] = -math.log10(marker['p_value']) + marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 + markers.append(marker) + break + +# for line, p_value in itertools.izip(marker_data_fh, p_values): +# if not p_value or len(included_markers) < 1: +# continue +# splat = line.strip().split() +# if splat[0] in included_markers: +# marker = {} +# marker['name'] = splat[0] +# marker['chr'] = int(splat[1]) +# marker['Mb'] = float(splat[2]) +# marker['p_value'] = p_value +# if math.isnan(marker['p_value']) or (marker['p_value'] <= 0): +# marker['lod_score'] = 0 +# marker['lrs_value'] = 0 +# else: +# marker['lod_score'] = -math.log10(marker['p_value']) +# marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 +# markers.append(marker) +# else: +# continue + + return markers + + #if __name__ == '__main__': # import cPickle as pickle # import gzip diff --git a/wqflask/wqflask/templates/marker_regression_gn1.html b/wqflask/wqflask/templates/marker_regression_gn1.html index 4eb5862a..58478758 100644 --- a/wqflask/wqflask/templates/marker_regression_gn1.html +++ b/wqflask/wqflask/templates/marker_regression_gn1.html @@ -15,7 +15,6 @@ - {% if mapping_method != "gemma" %} {% for sample in dataset.group.samplelist %} {% endfor %} @@ -29,7 +28,6 @@ - {% endif %}
@@ -42,7 +40,6 @@ Location: Chr {{ this_trait.chr }} @ {{ this_trait.mb }} Mb {% endif %}
- {% if mapping_method != "gemma" %}
@@ -69,40 +66,37 @@ - {% endif %} -
- {% if mapping_method != "gemma" %}
{{ gifmap|safe }}
- {% endif %} + {% if mapping_method != "gemma" %}
+ {% endif %}
- - {% if mapping_method != "gemma" %} + {% if mapping_method != "gemma" %}

Results @@ -182,11 +176,13 @@ js_data = {{ js_data | safe }} + {% if mapping_method != "gemma" %} - + {% endif %} +