From 8aeff9b91d078a40a50d13f6393a1f1dabf62aa4 Mon Sep 17 00:00:00 2001
From: Zachary Sloan
Date: Fri, 18 Jan 2013 16:58:28 -0600
Subject: Renamed CorrelationPage.py to show_corr_results.py
Worked with correlation code; got to the code that
begins to do the actual correlations
Created a function "get_dataset_and_trait" in
the new file "helper_functions.py" because the
code initializing the dataset and trait objects
was repeated in multiple places
---
wqflask/utility/helper_functions.py | 15 +++++++++++++++
1 file changed, 15 insertions(+)
create mode 100644 wqflask/utility/helper_functions.py
(limited to 'wqflask/utility')
diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py
new file mode 100644
index 00000000..920d9ac6
--- /dev/null
+++ b/wqflask/utility/helper_functions.py
@@ -0,0 +1,15 @@
+from __future__ import absolute_import, print_function, division
+
+from base.trait import GeneralTrait
+from base import data_set
+
+def get_dataset_and_trait(self, start_vars):
+ #assert type(read_genotype) == type(bool()), "Expecting boolean value for read_genotype"
+ self.dataset = data_set.create_dataset(start_vars['dataset'])
+ self.this_trait = GeneralTrait(dataset=self.dataset.name,
+ name=start_vars['trait_id'],
+ cellid=None)
+
+ #if read_genotype:
+ self.dataset.group.read_genotype_file()
+ self.genotype = self.dataset.group.genotype
\ No newline at end of file
--
cgit v1.2.3
From 01283a27bf9cc78653059236fa55d6063558ab21 Mon Sep 17 00:00:00 2001
From: Zachary Sloan
Date: Tue, 22 Jan 2013 17:24:26 -0600
Subject: Pushed through a few errors in getting the correlation page running
---
wqflask/utility/helper_functions.py | 2 +-
wqflask/wqflask/correlation/show_corr_results.py | 25 ++++++++++++------------
2 files changed, 13 insertions(+), 14 deletions(-)
(limited to 'wqflask/utility')
diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py
index 920d9ac6..9ecad993 100644
--- a/wqflask/utility/helper_functions.py
+++ b/wqflask/utility/helper_functions.py
@@ -5,7 +5,7 @@ from base import data_set
def get_dataset_and_trait(self, start_vars):
#assert type(read_genotype) == type(bool()), "Expecting boolean value for read_genotype"
- self.dataset = data_set.create_dataset(start_vars['dataset'])
+ self.dataset = data_set.create_dataset(start_vars['dataset_name'])
self.this_trait = GeneralTrait(dataset=self.dataset.name,
name=start_vars['trait_id'],
cellid=None)
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index 23dd1534..b82f1c59 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -374,7 +374,7 @@ class CorrelationResults(object):
#XZ: As of Nov/13/2010, this dataset is 'UTHSC Illumina V6.2 RankInv B6 D2 average CNS GI average (May 08)'
self.tissue_probeset_freeze_id = 1
- traitList = self.correlate(self.vals)
+ traitList = self.correlate()
_log.info("Done doing correlation calculation")
@@ -823,18 +823,17 @@ Resorting this table
"""Returns the name of the reference database file with which correlations are calculated.
Takes argument cursor which is a cursor object of any instance of a subclass of templatePage
Used by correlationPage"""
-ROM ProbeSetFreeze WHERE Name = "%s"' % target_db_name
- self.cursor.execute(query)
- result = self.cursor.fetchone()
- Id = result[0]
- FullName = result[1]
- FullName = FullName.replace(' ','_')
- FullName = FullName.replace('/','_')
- FileName = 'ProbeSetFreezeId_' + str(Id) + '_FullName_' + FullName + '.txt'
+ trait_id, full_name = g.db.execute("""SELECT Id, FullName
+ FROM ProbeSetFreeze
+ WHERE Name = '%s'""" % target_db_name).fetchone()
+ for char in [' ', '/']:
+ full_name = full_name.replace(char, '_')
+
+ file_name = 'ProbeSetFreezeId_' + str(trait_id) + '_FullName_' + full_name + '.txt'
+
+ return file_name
- return FileName
- query = 'SELECT Id, FullName F
#XZ, 01/29/2009: I modified this function.
@@ -1262,7 +1261,7 @@ ROM ProbeSetFreeze WHERE Name = "%s"' % target_db_name
#_log.info("Using the slow method for correlation")
#
#_log.info("Fetching from database")
- traits = self.fetchAllDatabaseData(species=self.species, GeneId=self.gene_id, GeneSymbol=self.trait_symbol, strains=self.sample_names, db=self.db, method=self.method, returnNumber=self.returnNumber, tissueProbeSetFreezeId= self.tissue_probeset_freeze_id)
+ traits = self.fetchAllDatabaseData(species=self.dataset.species, GeneId=self.gene_id, GeneSymbol=self.trait.symbol, strains=self.sample_names, db=self.db, method=self.method, returnNumber=self.returnNumber, tissueProbeSetFreezeId= self.tissue_probeset_freeze_id)
#_log.info("Done fetching from database")
totalTraits = len(traits) #XZ, 09/18/2008: total trait number
@@ -1339,7 +1338,7 @@ ROM ProbeSetFreeze WHERE Name = "%s"' % target_db_name
#cache_available = db_filename in os.listdir(webqtlConfig.TEXTDIR)
# If the cache file exists, do a cached correlation for probeset data
- if self.db.type == "ProbeSet":
+ if self.dataset.type == "ProbeSet":
# if self.method in [METHOD_SAMPLE_PEARSON, METHOD_SAMPLE_RANK] and cache_available:
# traits = do_parallel_correlation()
#
--
cgit v1.2.3
From b82eb4a59edb4e1d8bbf3588edeb13e38ead052e Mon Sep 17 00:00:00 2001
From: Zachary Sloan
Date: Thu, 31 Jan 2013 15:08:01 -0600
Subject: Changed reference to 'dataset_name' in keywords to 'dataset' to get
page to work, but will change all references to the dataset name to
'dataset_name' in future to avoid confusion between the dataset name and the
actual dataset object
---
wqflask/utility/helper_functions.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'wqflask/utility')
diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py
index 9ecad993..920d9ac6 100644
--- a/wqflask/utility/helper_functions.py
+++ b/wqflask/utility/helper_functions.py
@@ -5,7 +5,7 @@ from base import data_set
def get_dataset_and_trait(self, start_vars):
#assert type(read_genotype) == type(bool()), "Expecting boolean value for read_genotype"
- self.dataset = data_set.create_dataset(start_vars['dataset_name'])
+ self.dataset = data_set.create_dataset(start_vars['dataset'])
self.this_trait = GeneralTrait(dataset=self.dataset.name,
name=start_vars['trait_id'],
cellid=None)
--
cgit v1.2.3
From b4d6873f8d6327ae7cd64c80f31348728322f719 Mon Sep 17 00:00:00 2001
From: Zachary Sloan
Date: Tue, 19 Feb 2013 15:09:38 -0600
Subject: Made lines for manhattan plot proportional to chromosome lengths
---
wqflask/utility/helper_functions.py | 6 ++-
.../wqflask/marker_regression/marker_regression.py | 35 +++++++------
wqflask/wqflask/show_trait/show_trait.py | 2 +-
.../static/new/javascript/marker_regression.coffee | 57 +++++++++++++++-------
.../static/new/javascript/marker_regression.js | 43 +++++++++++-----
wqflask/wqflask/views.py | 2 +-
6 files changed, 97 insertions(+), 48 deletions(-)
(limited to 'wqflask/utility')
diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py
index 920d9ac6..d2567b63 100644
--- a/wqflask/utility/helper_functions.py
+++ b/wqflask/utility/helper_functions.py
@@ -2,14 +2,16 @@ from __future__ import absolute_import, print_function, division
from base.trait import GeneralTrait
from base import data_set
+from base.species import TheSpecies
-def get_dataset_and_trait(self, start_vars):
+def get_species_dataset_trait(self, start_vars):
#assert type(read_genotype) == type(bool()), "Expecting boolean value for read_genotype"
self.dataset = data_set.create_dataset(start_vars['dataset'])
+ self.species = TheSpecies(dataset=self.dataset)
self.this_trait = GeneralTrait(dataset=self.dataset.name,
name=start_vars['trait_id'],
cellid=None)
#if read_genotype:
self.dataset.group.read_genotype_file()
- self.genotype = self.dataset.group.genotype
\ No newline at end of file
+ self.genotype = self.dataset.group.genotype
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 81bf3825..50739614 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -24,6 +24,7 @@ from utility import Plot, Bunch
from wqflask.interval_analyst import GeneUtil
from base.trait import GeneralTrait
from base import data_set
+from base import species
from base.templatePage import templatePage
from utility import webqtlUtil, helper_functions
from base import webqtlConfig
@@ -60,7 +61,7 @@ class MarkerRegression(object):
#print("start_vars are: ", pf(start_vars))
- helper_functions.get_dataset_and_trait(self, start_vars)
+ helper_functions.get_species_dataset_trait(self, start_vars)
self.num_perm = int(start_vars['num_perm'])
@@ -306,7 +307,14 @@ class MarkerRegression(object):
# end: common part with human data
+ chromosome_mb_lengths = {}
+ for key in self.species.chromosomes.chromosomes.keys():
+ chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length
+
+ print("chromosomes is:", pf(chromosome_mb_lengths))
+
self.js_data = dict(
+ chromosomes = chromosome_mb_lengths,
qtl_results = self.pure_qtl_results,
lrs_values = self.lrs_values,
)
@@ -476,25 +484,24 @@ class MarkerRegression(object):
#prep_data.PrepData(self.vals, genotype_data)
pheno_vector = np.array([float(val) for val in self.vals if val!="x"])
- print("genotypes was:", pf(trimmed_genotype_data))
- for item in trimmed_genotype_data:
- if type(item) != type(list()):
- print(" --->", type(item))
- for counter, part in enumerate(item):
- if type(part) != type(float()):
- print(" ------>", type(part), " : ", part)
- if counter % 100 == 0:
- print(" ------>", type(part))
+ #for item in trimmed_genotype_data:
+ # if type(item) != type(list()):
+ # print(" --->", type(item))
+ # for counter, part in enumerate(item):
+ # if type(part) != type(float()):
+ # print(" ------>", type(part), " : ", part)
+ # if counter % 100 == 0:
+ # print(" ------>", type(part))
genotypes = np.array(trimmed_genotype_data).T
- print("genotypes is:", pf(genotypes))
+ #print("genotypes is:", pf(genotypes))
#genotypes = np.genfromtxt(os.path.join(webqtlConfig.TMPDIR,
# self.dataset.group.name + '.snps.new')).T
- print("pheno_vector is:", pf(pheno_vector.shape))
- print("genotypes is:", pf(genotypes.shape))
+ #print("pheno_vector is:", pf(pheno_vector.shape))
+ #print("genotypes is:", pf(genotypes.shape))
kinship_matrix = lmm.calculateKinship(genotypes)
- print("kinship_matrix is:", pf(kinship_matrix))
+ #print("kinship_matrix is:", pf(kinship_matrix))
lmm_ob = lmm.LMM(pheno_vector, kinship_matrix)
lmm_ob.fit()
diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
index 33ea6e86..720515c1 100755
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -38,7 +38,7 @@ class ShowTrait(object):
print("in ShowTrait, kw are:", kw)
self.trait_id = kw['trait_id']
- helper_functions.get_dataset_and_trait(self, kw)
+ helper_functions.get_species_dataset_trait(self, kw)
#self.dataset = create_dataset(kw['dataset'])
#
diff --git a/wqflask/wqflask/static/new/javascript/marker_regression.coffee b/wqflask/wqflask/static/new/javascript/marker_regression.coffee
index 41632723..6bdb3ac6 100644
--- a/wqflask/wqflask/static/new/javascript/marker_regression.coffee
+++ b/wqflask/wqflask/static/new/javascript/marker_regression.coffee
@@ -4,7 +4,9 @@ $ ->
constructor: ->
@qtl_results = js_data.qtl_results
+ @chromosomes = js_data.chromosomes
@max_chr = @get_max_chr()
+ @cumulative_chr_lengths = @get_cumulative_chr_lengths()
@plot_height = 500
@plot_width = 1000
@@ -31,9 +33,22 @@ $ ->
max_chr = chr
return max_chr
+ get_cumulative_chr_lengths: () ->
+ cumulative_chr_lengths = []
+ total_length = 0
+ for key of @chromosomes
+ this_length = @chromosomes[key]
+ cumulative_chr_lengths.push(total_length + this_length)
+ total_length += this_length
+ console.log("lengths:", cumulative_chr_lengths)
+ return cumulative_chr_lengths
+
get_coordinates: () ->
+ total_length = 0
+ chr_lengths = []
for result in js_data.qtl_results
+ chr_length = @chromosomes[result.chr]
chr = parseInt(result.chr)
if _.isNaN(chr)
if result.chr == "X"
@@ -41,9 +56,17 @@ $ ->
else if result.chr == "Y"
chr = @max_chr + 2
- @x_coords.push(((chr-1) * 200) + parseFloat(result.Mb))
+ @x_coords.push(total_length + parseFloat(result.Mb))
@y_coords.push(result.lrs_value)
@marker_names.push(result.name)
+
+ if chr_length in chr_lengths
+ continue
+ else
+ chr_lengths.push(chr_length)
+ total_length += chr_length
+
+ console.log("chr_lengths are:", @chr_lengths)
display_info: (d) ->
@@ -56,20 +79,20 @@ $ ->
.attr("width", @plot_width)
.attr("height", @plot_height)
- svg.selectAll("text")
- .data(@plot_coordinates)
- .enter()
- .append("text")
- .attr("x", (d) =>
- return (@plot_width * d[0]/@x_max)
- )
- .attr("y", (d) =>
- return @plot_height - ((0.8*@plot_height) * d[1]/@y_max)
- )
- .text((d) => d[2])
- .attr("font-family", "sans-serif")
- .attr("font-size", "12px")
- .attr("fill", "black");
+ #svg.selectAll("text")
+ # .data(@plot_coordinates)
+ # .enter()
+ # .append("text")
+ # .attr("x", (d) =>
+ # return (@plot_width * d[0]/@x_max)
+ # )
+ # .attr("y", (d) =>
+ # return @plot_height - ((0.8*@plot_height) * d[1]/@y_max)
+ # )
+ # .text((d) => d[2])
+ # .attr("font-family", "sans-serif")
+ # .attr("font-size", "12px")
+ # .attr("fill", "black");
svg.selectAll("circle")
.data(@plot_coordinates)
@@ -116,7 +139,7 @@ $ ->
)
.on("mouseout", () =>
d3.select(d3.event.target).classed("d3_highlight", false)
- .attr("r", 2)
+ .attr("r", 2)
.attr("fill", "black")
)
.attr("title", "foobar")
@@ -132,7 +155,7 @@ $ ->
.range([0, @plot_height])
svg.selectAll("line")
- .data(x.ticks(@max_chr))
+ .data(@cumulative_chr_lengths)
.enter()
.append("line")
.attr("x1", x)
diff --git a/wqflask/wqflask/static/new/javascript/marker_regression.js b/wqflask/wqflask/static/new/javascript/marker_regression.js
index 68e04034..19fec21e 100644
--- a/wqflask/wqflask/static/new/javascript/marker_regression.js
+++ b/wqflask/wqflask/static/new/javascript/marker_regression.js
@@ -1,5 +1,6 @@
// Generated by CoffeeScript 1.3.3
(function() {
+ var __indexOf = [].indexOf || function(item) { for (var i = 0, l = this.length; i < l; i++) { if (i in this && this[i] === item) return i; } return -1; };
$(function() {
var Manhattan_Plot;
@@ -7,7 +8,9 @@
function Manhattan_Plot() {
this.qtl_results = js_data.qtl_results;
+ this.chromosomes = js_data.chromosomes;
this.max_chr = this.get_max_chr();
+ this.cumulative_chr_lengths = this.get_cumulative_chr_lengths();
this.plot_height = 500;
this.plot_width = 1000;
this.x_coords = [];
@@ -37,12 +40,27 @@
return max_chr;
};
+ Manhattan_Plot.prototype.get_cumulative_chr_lengths = function() {
+ var cumulative_chr_lengths, key, this_length, total_length;
+ cumulative_chr_lengths = [];
+ total_length = 0;
+ for (key in this.chromosomes) {
+ this_length = this.chromosomes[key];
+ cumulative_chr_lengths.push(total_length + this_length);
+ total_length += this_length;
+ }
+ console.log("lengths:", cumulative_chr_lengths);
+ return cumulative_chr_lengths;
+ };
+
Manhattan_Plot.prototype.get_coordinates = function() {
- var chr, result, _i, _len, _ref, _results;
+ var chr, chr_length, chr_lengths, result, total_length, _i, _len, _ref;
+ total_length = 0;
+ chr_lengths = [];
_ref = js_data.qtl_results;
- _results = [];
for (_i = 0, _len = _ref.length; _i < _len; _i++) {
result = _ref[_i];
+ chr_length = this.chromosomes[result.chr];
chr = parseInt(result.chr);
if (_.isNaN(chr)) {
if (result.chr === "X") {
@@ -51,11 +69,17 @@
chr = this.max_chr + 2;
}
}
- this.x_coords.push(((chr - 1) * 200) + parseFloat(result.Mb));
+ this.x_coords.push(total_length + parseFloat(result.Mb));
this.y_coords.push(result.lrs_value);
- _results.push(this.marker_names.push(result.name));
+ this.marker_names.push(result.name);
+ if (__indexOf.call(chr_lengths, chr_length) >= 0) {
+ continue;
+ } else {
+ chr_lengths.push(chr_length);
+ total_length += chr_length;
+ }
}
- return _results;
+ return console.log("chr_lengths are:", this.chr_lengths);
};
Manhattan_Plot.prototype.display_info = function(d) {
@@ -66,13 +90,6 @@
var svg, x, y,
_this = this;
svg = d3.select("#manhattan_plots").append("svg").style('border', '2px solid black').attr("width", this.plot_width).attr("height", this.plot_height);
- svg.selectAll("text").data(this.plot_coordinates).enter().append("text").attr("x", function(d) {
- return _this.plot_width * d[0] / _this.x_max;
- }).attr("y", function(d) {
- return _this.plot_height - ((0.8 * _this.plot_height) * d[1] / _this.y_max);
- }).text(function(d) {
- return d[2];
- }).attr("font-family", "sans-serif").attr("font-size", "12px").attr("fill", "black");
svg.selectAll("circle").data(this.plot_coordinates).enter().append("circle").attr("cx", function(d) {
return _this.plot_width * d[0] / _this.x_max;
}).attr("cy", function(d) {
@@ -84,7 +101,7 @@
}).attr("title", "foobar");
x = d3.scale.linear().domain([0, this.x_max]).range([0, this.plot_width]);
y = d3.scale.linear().domain([0, this.y_max]).range([0, this.plot_height]);
- return svg.selectAll("line").data(x.ticks(this.max_chr)).enter().append("line").attr("x1", x).attr("x2", x).attr("y1", 0).attr("y2", this.plot_height).style("stroke", "#ccc");
+ return svg.selectAll("line").data(this.cumulative_chr_lengths).enter().append("line").attr("x1", x).attr("x2", x).attr("y1", 0).attr("y2", this.plot_height).style("stroke", "#ccc");
};
return Manhattan_Plot;
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index 81777742..5f2df8a4 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -154,7 +154,7 @@ def marker_regression_page():
template_vars.js_data = json.dumps(template_vars.js_data,
default=json_default_handler,
indent=" ")
- print("[dub] js_data after dump:", template_vars.js_data)
+ #print("[dub] js_data after dump:", template_vars.js_data)
#print("marker_regression template_vars:", pf(template_vars.__dict__))
return render_template("marker_regression.html", **template_vars.__dict__)
--
cgit v1.2.3
From fa4c4f116c37285d24edc9532bb223e18dfb1584 Mon Sep 17 00:00:00 2001
From: Zachary Sloan
Date: Thu, 28 Feb 2013 23:37:59 +0000
Subject: Added benchmark.py that compares the time of various operations and
gives a report with the percent of total computation time each takes
---
wqflask/utility/benchmark.py | 42 +++++
.../wqflask/marker_regression/marker_regression.py | 169 +++++----------------
wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 2 +-
wqflask/wqflask/show_trait/show_trait.py | 6 +-
wqflask/wqflask/templates/show_trait.html | 4 +-
5 files changed, 86 insertions(+), 137 deletions(-)
create mode 100644 wqflask/utility/benchmark.py
(limited to 'wqflask/utility')
diff --git a/wqflask/utility/benchmark.py b/wqflask/utility/benchmark.py
new file mode 100644
index 00000000..0a6e422c
--- /dev/null
+++ b/wqflask/utility/benchmark.py
@@ -0,0 +1,42 @@
+from __future__ import print_function, division, absolute_import
+
+import collections
+import inspect
+import time
+
+
+class Bench(object):
+ entries = collections.OrderedDict()
+ def __init__(self, name=None):
+ self.name = name
+
+ def __enter__(self):
+ if self.name:
+ print("Starting benchmark: %s" % (self.name))
+ else:
+ print("Starting benchmark at: %s [%i]" % (inspect.stack()[1][3], inspect.stack()[1][2]))
+ self.start_time = time.time()
+
+ def __exit__(self, type, value, traceback):
+ if self.name:
+ name = self.name
+ else:
+ name = "That"
+
+ time_took = time.time() - self.start_time
+ print(" %s took: %f seconds" % (name, (time_took)))
+
+ if self.name:
+ Bench.entries[self.name] = time_took
+
+ @classmethod
+ def report(cls):
+ total_time = sum((time_took for time_took in cls.entries.itervalues()))
+ print("\nTiming report\n")
+ for name, time_took in cls.entries.iteritems():
+ percent = int(round((time_took/total_time) * 100))
+ print("[{}%] {}: {}".format(percent, name, time_took))
+ print()
+
+ # Reset the entries after reporting
+ cls.entries = collections.OrderedDict()
\ No newline at end of file
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 50739614..23cec6d0 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -14,6 +14,7 @@ import sys
import os
import httplib
import urllib
+import collections
import numpy as np
@@ -21,6 +22,7 @@ import json
from htmlgen import HTMLgen2 as HT
from utility import Plot, Bunch
+from utility.benchmark import Bench
from wqflask.interval_analyst import GeneUtil
from base.trait import GeneralTrait
from base import data_set
@@ -462,91 +464,62 @@ class MarkerRegression(object):
def gen_data(self):
"""Todo: Fill this in here"""
-
- #json_data = open(os.path.join(webqtlConfig.NEWGENODIR + self.dataset.group.name + '.json'))
- #markers = json.load(json_data)
-
+
+ print("something")
+
genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers]
-
+
no_val_samples = self.identify_empty_samples()
trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
- #for i, marker in enumerate(trimmed_genotype_data):
- # if i > 10:
- # break
- # print("genotype is:", pf(marker))
-
- #print("trimmed genotype data is:", pf(trimmed_genotype_data))
-
- #for marker_object in genotype_data:
- # print("marker_object:", pf(marker_object))
-
- #prep_data.PrepData(self.vals, genotype_data)
-
pheno_vector = np.array([float(val) for val in self.vals if val!="x"])
- #for item in trimmed_genotype_data:
- # if type(item) != type(list()):
- # print(" --->", type(item))
- # for counter, part in enumerate(item):
- # if type(part) != type(float()):
- # print(" ------>", type(part), " : ", part)
- # if counter % 100 == 0:
- # print(" ------>", type(part))
genotypes = np.array(trimmed_genotype_data).T
- #print("genotypes is:", pf(genotypes))
- #genotypes = np.genfromtxt(os.path.join(webqtlConfig.TMPDIR,
- # self.dataset.group.name + '.snps.new')).T
+
+ #times = collections.OrderedDict()
+ #times['start'] = time.time()
- #print("pheno_vector is:", pf(pheno_vector.shape))
- #print("genotypes is:", pf(genotypes.shape))
+ with Bench("Calculate Kinship"):
+ kinship_matrix = lmm.calculateKinship(genotypes)
- kinship_matrix = lmm.calculateKinship(genotypes)
- #print("kinship_matrix is:", pf(kinship_matrix))
+ with Bench("Create LMM object"):
+ lmm_ob = lmm.LMM(pheno_vector, kinship_matrix)
- lmm_ob = lmm.LMM(pheno_vector, kinship_matrix)
- lmm_ob.fit()
-
- t_stats, p_values = lmm.GWAS(pheno_vector,
- genotypes,
- kinship_matrix,
- REML=True,
- refit=False)
-
- #print("p_values is:", pf(len(p_values)))
+ with Bench("LMM_ob fitting"):
+ lmm_ob.fit()
+
- self.dataset.group.markers.add_pvalues(p_values)
+ with Bench("Doing gwas"):
+ t_stats, p_values = lmm.GWAS(pheno_vector,
+ genotypes,
+ kinship_matrix,
+ REML=True,
+ refit=False)
+
+ Bench().report()
+
+ #previous_time = None
+ #for operation, this_time in times.iteritems():
+ # if previous_time:
+ # print("{} run time: {}".format(operation, this_time-previous_time))
+ # #print("time[{}]:{}\t{}".format(key, thistime, thistime-lasttime))
+ # previous_time = this_time
- #calculate QTL for each trait
- #self.qtl_results = self.genotype.regression(strains = self.samples,
- # trait = self.vals)
- #self.lrs_array = self.genotype.permutation(strains = self.samples,
- # trait = self.vals,
- # nperm=self.num_perm)
+ self.dataset.group.markers.add_pvalues(p_values)
self.lrs_values = [marker['lrs_value'] for marker in self.dataset.group.markers.markers]
- #print("self.lrs_values is:", pf(self.lrs_values))
lrs_values_sorted = sorted(self.lrs_values)
-
- #print("lrs_values_sorted is:", pf(lrs_values_sorted))
- #print("int(self.num_perm*0.37-1)", pf(int(self.num_perm*0.37-1)))
-
+
lrs_values_length = len(lrs_values_sorted)
-
+
def lrs_threshold(place):
return lrs_values_sorted[int((lrs_values_length * place) -1)]
-
+
self.lrs_thresholds = Bunch(
suggestive = lrs_threshold(.37),
significant = lrs_threshold(.95),
highly_significant = lrs_threshold(.99),
)
- #self.lrs_thresholds = Bunch(
- # suggestive = self.lrs_array[int(self.num_perm*0.37-1)],
- # significant = self.lrs_array[int(self.num_perm*0.95-1)],
- # highly_significant = self.lrs_array[int(self.num_perm*0.99-1)]
- # )
-
if self.display_all_lrs:
self.filtered_results = self.dataset.group.markers.markers
else:
@@ -557,67 +530,6 @@ class MarkerRegression(object):
if marker['lrs_value'] > self.lrs_thresholds.suggestive:
self.filtered_results.append(marker)
- #if self.display_all_lrs:
- # filtered_results = self.qtl_results
- #else:
- # suggestive_results = []
- # self.pure_qtl_results = []
- # for result in self.qtl_results:
- # self.pure_qtl_results.append(dict(locus=dict(name=result.locus.name,
- # mb=result.locus.Mb,
- # chromosome=result.locus.chr),
- # lrs=result.lrs,
- # additive=result.additive))
- # if result.lrs > self.lrs_thresholds.suggestive:
- # suggestive_results.append(result)
- # filtered_results = suggestive_results
-
-
- # Todo (2013): Use top_10 variable to generate page message about whether top 10 was used
- if not self.filtered_results:
- # We use the 10 results with the highest LRS values
- self.filtered_results = sorted(self.qtl_results)[-10:]
- self.top_10 = True
- else:
- self.top_10 = False
-
-
-
- #qtlresults2 = []
- #if self.disp_all_lrs:
- # filtered = self.qtl_results[:]
- #else:
- # filtered = filter(lambda x, y=fd.suggestive: x.lrs > y, qtlresults)
- #if len(filtered) == 0:
- # qtlresults2 = qtlresults[:]
- # qtlresults2.sort()
- # filtered = qtlresults2[-10:]
-
- #########################################
- # Permutation Graph
- #########################################
- #myCanvas = pid.PILCanvas(size=(400,300))
- ##plotBar(myCanvas,10,10,390,290,LRSArray,XLabel='LRS',YLabel='Frequency',title=' Histogram of Permutation Test',identification=fd.identification)
- #Plot.plotBar(myCanvas, LRSArray, XLabel='LRS',YLabel='Frequency',title=' Histogram of Permutation Test')
- #filename= webqtlUtil.genRandStr("Reg_")
- #myCanvas.save(webqtlConfig.IMGDIR+filename, format='gif')
- #img=HT.Image('/image/'+filename+'.gif',border=0,alt='Histogram of Permutation Test')
-
- #if fd.suggestive == None:
- # fd.suggestive = LRSArray[int(fd.nperm*0.37-1)]
- #else:
- # fd.suggestive = float(fd.suggestive)
- #if fd.significance == None:
- # fd.significance = LRSArray[int(fd.nperm*0.95-1)]
- #else:
- # fd.significance = float(fd.significance)
-
- #permutationHeading = HT.Paragraph('Histogram of Permutation Test')
- #permutationHeading.__setattr__("class","title")
- #
- #permutation = HT.TableLite()
- #permutation.append(HT.TR(HT.TD(img)))
-
for marker in self.filtered_results:
if marker['lrs_value'] > webqtlConfig.MAXLRS:
marker['lrs_value'] = webqtlConfig.MAXLRS
@@ -695,17 +607,6 @@ class MarkerRegression(object):
# index+=1
# tblobj_body.append(reportBodyRow)
- #tblobj_header.append(reportHeaderRow)
- #tblobj['header']=tblobj_header
- #tblobj['body']=tblobj_body
-
- #rv=HT.TD(regressionHeading,LRSInfo,report, locusForm, HT.P(),width='55%',valign='top', align='left', bgColor='#eeeeee')
- #if fd.genotype.type == 'intercross':
- # bottomInfo.append(HT.BR(), HT.BR(), HT.Strong('Dominance Effect'),' is the difference between the mean trait value of cases heterozygous at a marker and the average mean for the two groups homozygous at this marker: e.g., BD - (BB+DD)/2]. A positive dominance effect indicates that the average phenotype of BD heterozygotes exceeds the mean of BB and DD homozygotes. No dominance deviation can be computed for a set of recombinant inbred strains or for a backcross.')
- #return rv,tblobj,bottomInfo
-
- #return rv,tblobj,bottomInfo
-
def identify_empty_samples(self):
no_val_samples = []
for sample_count, val in enumerate(self.vals):
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index d0f379dd..33f6573e 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -137,7 +137,7 @@ def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False):
if refit: Ls.fit(X=xs)
else: Ls.fit()
ts,ps = Ls.association(xs,REML=REML)
- else:
+ else:
if x.var() == 0:
PS.append(np.nan)
TS.append(np.nan)
diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
index 720515c1..7b2d022c 100755
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -3,6 +3,7 @@ from __future__ import absolute_import, print_function, division
import string
import os
import cPickle
+import uuid
#import pyXLWriter as xl
from collections import OrderedDict
@@ -120,6 +121,8 @@ class ShowTrait(object):
# We'll need access to this_trait and hddn in the Jinja2 Template, so we put it inside self
self.hddn = hddn
+
+ self.session_uuid = uuid.uuid4()
self.sample_group_types = OrderedDict()
self.sample_group_types['samples_primary'] = self.dataset.group.name + " Only"
@@ -129,7 +132,8 @@ class ShowTrait(object):
print("sample_lists is:", pf(sample_lists))
js_data = dict(sample_group_types = self.sample_group_types,
sample_lists = sample_lists,
- attribute_names = self.sample_groups[0].attributes)
+ attribute_names = self.sample_groups[0].attributes,
+ session_uuid = self.session_uuid)
#print("js_data:", pf(js_data))
self.js_data = js_data
diff --git a/wqflask/wqflask/templates/show_trait.html b/wqflask/wqflask/templates/show_trait.html
index 87199e9f..bb87b4bb 100644
--- a/wqflask/wqflask/templates/show_trait.html
+++ b/wqflask/wqflask/templates/show_trait.html
@@ -19,8 +19,10 @@