From 8aeff9b91d078a40a50d13f6393a1f1dabf62aa4 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Fri, 18 Jan 2013 16:58:28 -0600 Subject: Renamed CorrelationPage.py to show_corr_results.py Worked with correlation code; got to the code that begins to do the actual correlations Created a function "get_dataset_and_trait" in the new file "helper_functions.py" because the code initializing the dataset and trait objects was repeated in multiple places --- wqflask/utility/helper_functions.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 wqflask/utility/helper_functions.py (limited to 'wqflask/utility') diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py new file mode 100644 index 00000000..920d9ac6 --- /dev/null +++ b/wqflask/utility/helper_functions.py @@ -0,0 +1,15 @@ +from __future__ import absolute_import, print_function, division + +from base.trait import GeneralTrait +from base import data_set + +def get_dataset_and_trait(self, start_vars): + #assert type(read_genotype) == type(bool()), "Expecting boolean value for read_genotype" + self.dataset = data_set.create_dataset(start_vars['dataset']) + self.this_trait = GeneralTrait(dataset=self.dataset.name, + name=start_vars['trait_id'], + cellid=None) + + #if read_genotype: + self.dataset.group.read_genotype_file() + self.genotype = self.dataset.group.genotype \ No newline at end of file -- cgit v1.2.3 From 01283a27bf9cc78653059236fa55d6063558ab21 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Tue, 22 Jan 2013 17:24:26 -0600 Subject: Pushed through a few errors in getting the correlation page running --- wqflask/utility/helper_functions.py | 2 +- wqflask/wqflask/correlation/show_corr_results.py | 25 ++++++++++++------------ 2 files changed, 13 insertions(+), 14 deletions(-) (limited to 'wqflask/utility') diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py index 920d9ac6..9ecad993 100644 --- a/wqflask/utility/helper_functions.py +++ b/wqflask/utility/helper_functions.py @@ -5,7 +5,7 @@ from base import data_set def get_dataset_and_trait(self, start_vars): #assert type(read_genotype) == type(bool()), "Expecting boolean value for read_genotype" - self.dataset = data_set.create_dataset(start_vars['dataset']) + self.dataset = data_set.create_dataset(start_vars['dataset_name']) self.this_trait = GeneralTrait(dataset=self.dataset.name, name=start_vars['trait_id'], cellid=None) diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index 23dd1534..b82f1c59 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -374,7 +374,7 @@ class CorrelationResults(object): #XZ: As of Nov/13/2010, this dataset is 'UTHSC Illumina V6.2 RankInv B6 D2 average CNS GI average (May 08)' self.tissue_probeset_freeze_id = 1 - traitList = self.correlate(self.vals) + traitList = self.correlate() _log.info("Done doing correlation calculation") @@ -823,18 +823,17 @@ Resorting this table
"""Returns the name of the reference database file with which correlations are calculated. Takes argument cursor which is a cursor object of any instance of a subclass of templatePage Used by correlationPage""" -ROM ProbeSetFreeze WHERE Name = "%s"' % target_db_name - self.cursor.execute(query) - result = self.cursor.fetchone() - Id = result[0] - FullName = result[1] - FullName = FullName.replace(' ','_') - FullName = FullName.replace('/','_') - FileName = 'ProbeSetFreezeId_' + str(Id) + '_FullName_' + FullName + '.txt' + trait_id, full_name = g.db.execute("""SELECT Id, FullName + FROM ProbeSetFreeze + WHERE Name = '%s'""" % target_db_name).fetchone() + for char in [' ', '/']: + full_name = full_name.replace(char, '_') + + file_name = 'ProbeSetFreezeId_' + str(trait_id) + '_FullName_' + full_name + '.txt' + + return file_name - return FileName - query = 'SELECT Id, FullName F #XZ, 01/29/2009: I modified this function. @@ -1262,7 +1261,7 @@ ROM ProbeSetFreeze WHERE Name = "%s"' % target_db_name #_log.info("Using the slow method for correlation") # #_log.info("Fetching from database") - traits = self.fetchAllDatabaseData(species=self.species, GeneId=self.gene_id, GeneSymbol=self.trait_symbol, strains=self.sample_names, db=self.db, method=self.method, returnNumber=self.returnNumber, tissueProbeSetFreezeId= self.tissue_probeset_freeze_id) + traits = self.fetchAllDatabaseData(species=self.dataset.species, GeneId=self.gene_id, GeneSymbol=self.trait.symbol, strains=self.sample_names, db=self.db, method=self.method, returnNumber=self.returnNumber, tissueProbeSetFreezeId= self.tissue_probeset_freeze_id) #_log.info("Done fetching from database") totalTraits = len(traits) #XZ, 09/18/2008: total trait number @@ -1339,7 +1338,7 @@ ROM ProbeSetFreeze WHERE Name = "%s"' % target_db_name #cache_available = db_filename in os.listdir(webqtlConfig.TEXTDIR) # If the cache file exists, do a cached correlation for probeset data - if self.db.type == "ProbeSet": + if self.dataset.type == "ProbeSet": # if self.method in [METHOD_SAMPLE_PEARSON, METHOD_SAMPLE_RANK] and cache_available: # traits = do_parallel_correlation() # -- cgit v1.2.3 From b82eb4a59edb4e1d8bbf3588edeb13e38ead052e Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Thu, 31 Jan 2013 15:08:01 -0600 Subject: Changed reference to 'dataset_name' in keywords to 'dataset' to get page to work, but will change all references to the dataset name to 'dataset_name' in future to avoid confusion between the dataset name and the actual dataset object --- wqflask/utility/helper_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'wqflask/utility') diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py index 9ecad993..920d9ac6 100644 --- a/wqflask/utility/helper_functions.py +++ b/wqflask/utility/helper_functions.py @@ -5,7 +5,7 @@ from base import data_set def get_dataset_and_trait(self, start_vars): #assert type(read_genotype) == type(bool()), "Expecting boolean value for read_genotype" - self.dataset = data_set.create_dataset(start_vars['dataset_name']) + self.dataset = data_set.create_dataset(start_vars['dataset']) self.this_trait = GeneralTrait(dataset=self.dataset.name, name=start_vars['trait_id'], cellid=None) -- cgit v1.2.3 From b4d6873f8d6327ae7cd64c80f31348728322f719 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Tue, 19 Feb 2013 15:09:38 -0600 Subject: Made lines for manhattan plot proportional to chromosome lengths --- wqflask/utility/helper_functions.py | 6 ++- .../wqflask/marker_regression/marker_regression.py | 35 +++++++------ wqflask/wqflask/show_trait/show_trait.py | 2 +- .../static/new/javascript/marker_regression.coffee | 57 +++++++++++++++------- .../static/new/javascript/marker_regression.js | 43 +++++++++++----- wqflask/wqflask/views.py | 2 +- 6 files changed, 97 insertions(+), 48 deletions(-) (limited to 'wqflask/utility') diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py index 920d9ac6..d2567b63 100644 --- a/wqflask/utility/helper_functions.py +++ b/wqflask/utility/helper_functions.py @@ -2,14 +2,16 @@ from __future__ import absolute_import, print_function, division from base.trait import GeneralTrait from base import data_set +from base.species import TheSpecies -def get_dataset_and_trait(self, start_vars): +def get_species_dataset_trait(self, start_vars): #assert type(read_genotype) == type(bool()), "Expecting boolean value for read_genotype" self.dataset = data_set.create_dataset(start_vars['dataset']) + self.species = TheSpecies(dataset=self.dataset) self.this_trait = GeneralTrait(dataset=self.dataset.name, name=start_vars['trait_id'], cellid=None) #if read_genotype: self.dataset.group.read_genotype_file() - self.genotype = self.dataset.group.genotype \ No newline at end of file + self.genotype = self.dataset.group.genotype diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 81bf3825..50739614 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -24,6 +24,7 @@ from utility import Plot, Bunch from wqflask.interval_analyst import GeneUtil from base.trait import GeneralTrait from base import data_set +from base import species from base.templatePage import templatePage from utility import webqtlUtil, helper_functions from base import webqtlConfig @@ -60,7 +61,7 @@ class MarkerRegression(object): #print("start_vars are: ", pf(start_vars)) - helper_functions.get_dataset_and_trait(self, start_vars) + helper_functions.get_species_dataset_trait(self, start_vars) self.num_perm = int(start_vars['num_perm']) @@ -306,7 +307,14 @@ class MarkerRegression(object): # end: common part with human data + chromosome_mb_lengths = {} + for key in self.species.chromosomes.chromosomes.keys(): + chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length + + print("chromosomes is:", pf(chromosome_mb_lengths)) + self.js_data = dict( + chromosomes = chromosome_mb_lengths, qtl_results = self.pure_qtl_results, lrs_values = self.lrs_values, ) @@ -476,25 +484,24 @@ class MarkerRegression(object): #prep_data.PrepData(self.vals, genotype_data) pheno_vector = np.array([float(val) for val in self.vals if val!="x"]) - print("genotypes was:", pf(trimmed_genotype_data)) - for item in trimmed_genotype_data: - if type(item) != type(list()): - print(" --->", type(item)) - for counter, part in enumerate(item): - if type(part) != type(float()): - print(" ------>", type(part), " : ", part) - if counter % 100 == 0: - print(" ------>", type(part)) + #for item in trimmed_genotype_data: + # if type(item) != type(list()): + # print(" --->", type(item)) + # for counter, part in enumerate(item): + # if type(part) != type(float()): + # print(" ------>", type(part), " : ", part) + # if counter % 100 == 0: + # print(" ------>", type(part)) genotypes = np.array(trimmed_genotype_data).T - print("genotypes is:", pf(genotypes)) + #print("genotypes is:", pf(genotypes)) #genotypes = np.genfromtxt(os.path.join(webqtlConfig.TMPDIR, # self.dataset.group.name + '.snps.new')).T - print("pheno_vector is:", pf(pheno_vector.shape)) - print("genotypes is:", pf(genotypes.shape)) + #print("pheno_vector is:", pf(pheno_vector.shape)) + #print("genotypes is:", pf(genotypes.shape)) kinship_matrix = lmm.calculateKinship(genotypes) - print("kinship_matrix is:", pf(kinship_matrix)) + #print("kinship_matrix is:", pf(kinship_matrix)) lmm_ob = lmm.LMM(pheno_vector, kinship_matrix) lmm_ob.fit() diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index 33ea6e86..720515c1 100755 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -38,7 +38,7 @@ class ShowTrait(object): print("in ShowTrait, kw are:", kw) self.trait_id = kw['trait_id'] - helper_functions.get_dataset_and_trait(self, kw) + helper_functions.get_species_dataset_trait(self, kw) #self.dataset = create_dataset(kw['dataset']) # diff --git a/wqflask/wqflask/static/new/javascript/marker_regression.coffee b/wqflask/wqflask/static/new/javascript/marker_regression.coffee index 41632723..6bdb3ac6 100644 --- a/wqflask/wqflask/static/new/javascript/marker_regression.coffee +++ b/wqflask/wqflask/static/new/javascript/marker_regression.coffee @@ -4,7 +4,9 @@ $ -> constructor: -> @qtl_results = js_data.qtl_results + @chromosomes = js_data.chromosomes @max_chr = @get_max_chr() + @cumulative_chr_lengths = @get_cumulative_chr_lengths() @plot_height = 500 @plot_width = 1000 @@ -31,9 +33,22 @@ $ -> max_chr = chr return max_chr + get_cumulative_chr_lengths: () -> + cumulative_chr_lengths = [] + total_length = 0 + for key of @chromosomes + this_length = @chromosomes[key] + cumulative_chr_lengths.push(total_length + this_length) + total_length += this_length + console.log("lengths:", cumulative_chr_lengths) + return cumulative_chr_lengths + get_coordinates: () -> + total_length = 0 + chr_lengths = [] for result in js_data.qtl_results + chr_length = @chromosomes[result.chr] chr = parseInt(result.chr) if _.isNaN(chr) if result.chr == "X" @@ -41,9 +56,17 @@ $ -> else if result.chr == "Y" chr = @max_chr + 2 - @x_coords.push(((chr-1) * 200) + parseFloat(result.Mb)) + @x_coords.push(total_length + parseFloat(result.Mb)) @y_coords.push(result.lrs_value) @marker_names.push(result.name) + + if chr_length in chr_lengths + continue + else + chr_lengths.push(chr_length) + total_length += chr_length + + console.log("chr_lengths are:", @chr_lengths) display_info: (d) -> @@ -56,20 +79,20 @@ $ -> .attr("width", @plot_width) .attr("height", @plot_height) - svg.selectAll("text") - .data(@plot_coordinates) - .enter() - .append("text") - .attr("x", (d) => - return (@plot_width * d[0]/@x_max) - ) - .attr("y", (d) => - return @plot_height - ((0.8*@plot_height) * d[1]/@y_max) - ) - .text((d) => d[2]) - .attr("font-family", "sans-serif") - .attr("font-size", "12px") - .attr("fill", "black"); + #svg.selectAll("text") + # .data(@plot_coordinates) + # .enter() + # .append("text") + # .attr("x", (d) => + # return (@plot_width * d[0]/@x_max) + # ) + # .attr("y", (d) => + # return @plot_height - ((0.8*@plot_height) * d[1]/@y_max) + # ) + # .text((d) => d[2]) + # .attr("font-family", "sans-serif") + # .attr("font-size", "12px") + # .attr("fill", "black"); svg.selectAll("circle") .data(@plot_coordinates) @@ -116,7 +139,7 @@ $ -> ) .on("mouseout", () => d3.select(d3.event.target).classed("d3_highlight", false) - .attr("r", 2) + .attr("r", 2) .attr("fill", "black") ) .attr("title", "foobar") @@ -132,7 +155,7 @@ $ -> .range([0, @plot_height]) svg.selectAll("line") - .data(x.ticks(@max_chr)) + .data(@cumulative_chr_lengths) .enter() .append("line") .attr("x1", x) diff --git a/wqflask/wqflask/static/new/javascript/marker_regression.js b/wqflask/wqflask/static/new/javascript/marker_regression.js index 68e04034..19fec21e 100644 --- a/wqflask/wqflask/static/new/javascript/marker_regression.js +++ b/wqflask/wqflask/static/new/javascript/marker_regression.js @@ -1,5 +1,6 @@ // Generated by CoffeeScript 1.3.3 (function() { + var __indexOf = [].indexOf || function(item) { for (var i = 0, l = this.length; i < l; i++) { if (i in this && this[i] === item) return i; } return -1; }; $(function() { var Manhattan_Plot; @@ -7,7 +8,9 @@ function Manhattan_Plot() { this.qtl_results = js_data.qtl_results; + this.chromosomes = js_data.chromosomes; this.max_chr = this.get_max_chr(); + this.cumulative_chr_lengths = this.get_cumulative_chr_lengths(); this.plot_height = 500; this.plot_width = 1000; this.x_coords = []; @@ -37,12 +40,27 @@ return max_chr; }; + Manhattan_Plot.prototype.get_cumulative_chr_lengths = function() { + var cumulative_chr_lengths, key, this_length, total_length; + cumulative_chr_lengths = []; + total_length = 0; + for (key in this.chromosomes) { + this_length = this.chromosomes[key]; + cumulative_chr_lengths.push(total_length + this_length); + total_length += this_length; + } + console.log("lengths:", cumulative_chr_lengths); + return cumulative_chr_lengths; + }; + Manhattan_Plot.prototype.get_coordinates = function() { - var chr, result, _i, _len, _ref, _results; + var chr, chr_length, chr_lengths, result, total_length, _i, _len, _ref; + total_length = 0; + chr_lengths = []; _ref = js_data.qtl_results; - _results = []; for (_i = 0, _len = _ref.length; _i < _len; _i++) { result = _ref[_i]; + chr_length = this.chromosomes[result.chr]; chr = parseInt(result.chr); if (_.isNaN(chr)) { if (result.chr === "X") { @@ -51,11 +69,17 @@ chr = this.max_chr + 2; } } - this.x_coords.push(((chr - 1) * 200) + parseFloat(result.Mb)); + this.x_coords.push(total_length + parseFloat(result.Mb)); this.y_coords.push(result.lrs_value); - _results.push(this.marker_names.push(result.name)); + this.marker_names.push(result.name); + if (__indexOf.call(chr_lengths, chr_length) >= 0) { + continue; + } else { + chr_lengths.push(chr_length); + total_length += chr_length; + } } - return _results; + return console.log("chr_lengths are:", this.chr_lengths); }; Manhattan_Plot.prototype.display_info = function(d) { @@ -66,13 +90,6 @@ var svg, x, y, _this = this; svg = d3.select("#manhattan_plots").append("svg").style('border', '2px solid black').attr("width", this.plot_width).attr("height", this.plot_height); - svg.selectAll("text").data(this.plot_coordinates).enter().append("text").attr("x", function(d) { - return _this.plot_width * d[0] / _this.x_max; - }).attr("y", function(d) { - return _this.plot_height - ((0.8 * _this.plot_height) * d[1] / _this.y_max); - }).text(function(d) { - return d[2]; - }).attr("font-family", "sans-serif").attr("font-size", "12px").attr("fill", "black"); svg.selectAll("circle").data(this.plot_coordinates).enter().append("circle").attr("cx", function(d) { return _this.plot_width * d[0] / _this.x_max; }).attr("cy", function(d) { @@ -84,7 +101,7 @@ }).attr("title", "foobar"); x = d3.scale.linear().domain([0, this.x_max]).range([0, this.plot_width]); y = d3.scale.linear().domain([0, this.y_max]).range([0, this.plot_height]); - return svg.selectAll("line").data(x.ticks(this.max_chr)).enter().append("line").attr("x1", x).attr("x2", x).attr("y1", 0).attr("y2", this.plot_height).style("stroke", "#ccc"); + return svg.selectAll("line").data(this.cumulative_chr_lengths).enter().append("line").attr("x1", x).attr("x2", x).attr("y1", 0).attr("y2", this.plot_height).style("stroke", "#ccc"); }; return Manhattan_Plot; diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 81777742..5f2df8a4 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -154,7 +154,7 @@ def marker_regression_page(): template_vars.js_data = json.dumps(template_vars.js_data, default=json_default_handler, indent=" ") - print("[dub] js_data after dump:", template_vars.js_data) + #print("[dub] js_data after dump:", template_vars.js_data) #print("marker_regression template_vars:", pf(template_vars.__dict__)) return render_template("marker_regression.html", **template_vars.__dict__) -- cgit v1.2.3 From fa4c4f116c37285d24edc9532bb223e18dfb1584 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Thu, 28 Feb 2013 23:37:59 +0000 Subject: Added benchmark.py that compares the time of various operations and gives a report with the percent of total computation time each takes --- wqflask/utility/benchmark.py | 42 +++++ .../wqflask/marker_regression/marker_regression.py | 169 +++++---------------- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 2 +- wqflask/wqflask/show_trait/show_trait.py | 6 +- wqflask/wqflask/templates/show_trait.html | 4 +- 5 files changed, 86 insertions(+), 137 deletions(-) create mode 100644 wqflask/utility/benchmark.py (limited to 'wqflask/utility') diff --git a/wqflask/utility/benchmark.py b/wqflask/utility/benchmark.py new file mode 100644 index 00000000..0a6e422c --- /dev/null +++ b/wqflask/utility/benchmark.py @@ -0,0 +1,42 @@ +from __future__ import print_function, division, absolute_import + +import collections +import inspect +import time + + +class Bench(object): + entries = collections.OrderedDict() + def __init__(self, name=None): + self.name = name + + def __enter__(self): + if self.name: + print("Starting benchmark: %s" % (self.name)) + else: + print("Starting benchmark at: %s [%i]" % (inspect.stack()[1][3], inspect.stack()[1][2])) + self.start_time = time.time() + + def __exit__(self, type, value, traceback): + if self.name: + name = self.name + else: + name = "That" + + time_took = time.time() - self.start_time + print(" %s took: %f seconds" % (name, (time_took))) + + if self.name: + Bench.entries[self.name] = time_took + + @classmethod + def report(cls): + total_time = sum((time_took for time_took in cls.entries.itervalues())) + print("\nTiming report\n") + for name, time_took in cls.entries.iteritems(): + percent = int(round((time_took/total_time) * 100)) + print("[{}%] {}: {}".format(percent, name, time_took)) + print() + + # Reset the entries after reporting + cls.entries = collections.OrderedDict() \ No newline at end of file diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 50739614..23cec6d0 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -14,6 +14,7 @@ import sys import os import httplib import urllib +import collections import numpy as np @@ -21,6 +22,7 @@ import json from htmlgen import HTMLgen2 as HT from utility import Plot, Bunch +from utility.benchmark import Bench from wqflask.interval_analyst import GeneUtil from base.trait import GeneralTrait from base import data_set @@ -462,91 +464,62 @@ class MarkerRegression(object): def gen_data(self): """Todo: Fill this in here""" - - #json_data = open(os.path.join(webqtlConfig.NEWGENODIR + self.dataset.group.name + '.json')) - #markers = json.load(json_data) - + + print("something") + genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers] - + no_val_samples = self.identify_empty_samples() trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) - #for i, marker in enumerate(trimmed_genotype_data): - # if i > 10: - # break - # print("genotype is:", pf(marker)) - - #print("trimmed genotype data is:", pf(trimmed_genotype_data)) - - #for marker_object in genotype_data: - # print("marker_object:", pf(marker_object)) - - #prep_data.PrepData(self.vals, genotype_data) - pheno_vector = np.array([float(val) for val in self.vals if val!="x"]) - #for item in trimmed_genotype_data: - # if type(item) != type(list()): - # print(" --->", type(item)) - # for counter, part in enumerate(item): - # if type(part) != type(float()): - # print(" ------>", type(part), " : ", part) - # if counter % 100 == 0: - # print(" ------>", type(part)) genotypes = np.array(trimmed_genotype_data).T - #print("genotypes is:", pf(genotypes)) - #genotypes = np.genfromtxt(os.path.join(webqtlConfig.TMPDIR, - # self.dataset.group.name + '.snps.new')).T + + #times = collections.OrderedDict() + #times['start'] = time.time() - #print("pheno_vector is:", pf(pheno_vector.shape)) - #print("genotypes is:", pf(genotypes.shape)) + with Bench("Calculate Kinship"): + kinship_matrix = lmm.calculateKinship(genotypes) - kinship_matrix = lmm.calculateKinship(genotypes) - #print("kinship_matrix is:", pf(kinship_matrix)) + with Bench("Create LMM object"): + lmm_ob = lmm.LMM(pheno_vector, kinship_matrix) - lmm_ob = lmm.LMM(pheno_vector, kinship_matrix) - lmm_ob.fit() - - t_stats, p_values = lmm.GWAS(pheno_vector, - genotypes, - kinship_matrix, - REML=True, - refit=False) - - #print("p_values is:", pf(len(p_values))) + with Bench("LMM_ob fitting"): + lmm_ob.fit() + - self.dataset.group.markers.add_pvalues(p_values) + with Bench("Doing gwas"): + t_stats, p_values = lmm.GWAS(pheno_vector, + genotypes, + kinship_matrix, + REML=True, + refit=False) + + Bench().report() + + #previous_time = None + #for operation, this_time in times.iteritems(): + # if previous_time: + # print("{} run time: {}".format(operation, this_time-previous_time)) + # #print("time[{}]:{}\t{}".format(key, thistime, thistime-lasttime)) + # previous_time = this_time - #calculate QTL for each trait - #self.qtl_results = self.genotype.regression(strains = self.samples, - # trait = self.vals) - #self.lrs_array = self.genotype.permutation(strains = self.samples, - # trait = self.vals, - # nperm=self.num_perm) + self.dataset.group.markers.add_pvalues(p_values) self.lrs_values = [marker['lrs_value'] for marker in self.dataset.group.markers.markers] - #print("self.lrs_values is:", pf(self.lrs_values)) lrs_values_sorted = sorted(self.lrs_values) - - #print("lrs_values_sorted is:", pf(lrs_values_sorted)) - #print("int(self.num_perm*0.37-1)", pf(int(self.num_perm*0.37-1))) - + lrs_values_length = len(lrs_values_sorted) - + def lrs_threshold(place): return lrs_values_sorted[int((lrs_values_length * place) -1)] - + self.lrs_thresholds = Bunch( suggestive = lrs_threshold(.37), significant = lrs_threshold(.95), highly_significant = lrs_threshold(.99), ) - #self.lrs_thresholds = Bunch( - # suggestive = self.lrs_array[int(self.num_perm*0.37-1)], - # significant = self.lrs_array[int(self.num_perm*0.95-1)], - # highly_significant = self.lrs_array[int(self.num_perm*0.99-1)] - # ) - if self.display_all_lrs: self.filtered_results = self.dataset.group.markers.markers else: @@ -557,67 +530,6 @@ class MarkerRegression(object): if marker['lrs_value'] > self.lrs_thresholds.suggestive: self.filtered_results.append(marker) - #if self.display_all_lrs: - # filtered_results = self.qtl_results - #else: - # suggestive_results = [] - # self.pure_qtl_results = [] - # for result in self.qtl_results: - # self.pure_qtl_results.append(dict(locus=dict(name=result.locus.name, - # mb=result.locus.Mb, - # chromosome=result.locus.chr), - # lrs=result.lrs, - # additive=result.additive)) - # if result.lrs > self.lrs_thresholds.suggestive: - # suggestive_results.append(result) - # filtered_results = suggestive_results - - - # Todo (2013): Use top_10 variable to generate page message about whether top 10 was used - if not self.filtered_results: - # We use the 10 results with the highest LRS values - self.filtered_results = sorted(self.qtl_results)[-10:] - self.top_10 = True - else: - self.top_10 = False - - - - #qtlresults2 = [] - #if self.disp_all_lrs: - # filtered = self.qtl_results[:] - #else: - # filtered = filter(lambda x, y=fd.suggestive: x.lrs > y, qtlresults) - #if len(filtered) == 0: - # qtlresults2 = qtlresults[:] - # qtlresults2.sort() - # filtered = qtlresults2[-10:] - - ######################################### - # Permutation Graph - ######################################### - #myCanvas = pid.PILCanvas(size=(400,300)) - ##plotBar(myCanvas,10,10,390,290,LRSArray,XLabel='LRS',YLabel='Frequency',title=' Histogram of Permutation Test',identification=fd.identification) - #Plot.plotBar(myCanvas, LRSArray, XLabel='LRS',YLabel='Frequency',title=' Histogram of Permutation Test') - #filename= webqtlUtil.genRandStr("Reg_") - #myCanvas.save(webqtlConfig.IMGDIR+filename, format='gif') - #img=HT.Image('/image/'+filename+'.gif',border=0,alt='Histogram of Permutation Test') - - #if fd.suggestive == None: - # fd.suggestive = LRSArray[int(fd.nperm*0.37-1)] - #else: - # fd.suggestive = float(fd.suggestive) - #if fd.significance == None: - # fd.significance = LRSArray[int(fd.nperm*0.95-1)] - #else: - # fd.significance = float(fd.significance) - - #permutationHeading = HT.Paragraph('Histogram of Permutation Test') - #permutationHeading.__setattr__("class","title") - # - #permutation = HT.TableLite() - #permutation.append(HT.TR(HT.TD(img))) - for marker in self.filtered_results: if marker['lrs_value'] > webqtlConfig.MAXLRS: marker['lrs_value'] = webqtlConfig.MAXLRS @@ -695,17 +607,6 @@ class MarkerRegression(object): # index+=1 # tblobj_body.append(reportBodyRow) - #tblobj_header.append(reportHeaderRow) - #tblobj['header']=tblobj_header - #tblobj['body']=tblobj_body - - #rv=HT.TD(regressionHeading,LRSInfo,report, locusForm, HT.P(),width='55%',valign='top', align='left', bgColor='#eeeeee') - #if fd.genotype.type == 'intercross': - # bottomInfo.append(HT.BR(), HT.BR(), HT.Strong('Dominance Effect'),' is the difference between the mean trait value of cases heterozygous at a marker and the average mean for the two groups homozygous at this marker: e.g., BD - (BB+DD)/2]. A positive dominance effect indicates that the average phenotype of BD heterozygotes exceeds the mean of BB and DD homozygotes. No dominance deviation can be computed for a set of recombinant inbred strains or for a backcross.') - #return rv,tblobj,bottomInfo - - #return rv,tblobj,bottomInfo - def identify_empty_samples(self): no_val_samples = [] for sample_count, val in enumerate(self.vals): diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index d0f379dd..33f6573e 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -137,7 +137,7 @@ def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False): if refit: Ls.fit(X=xs) else: Ls.fit() ts,ps = Ls.association(xs,REML=REML) - else: + else: if x.var() == 0: PS.append(np.nan) TS.append(np.nan) diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index 720515c1..7b2d022c 100755 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -3,6 +3,7 @@ from __future__ import absolute_import, print_function, division import string import os import cPickle +import uuid #import pyXLWriter as xl from collections import OrderedDict @@ -120,6 +121,8 @@ class ShowTrait(object): # We'll need access to this_trait and hddn in the Jinja2 Template, so we put it inside self self.hddn = hddn + + self.session_uuid = uuid.uuid4() self.sample_group_types = OrderedDict() self.sample_group_types['samples_primary'] = self.dataset.group.name + " Only" @@ -129,7 +132,8 @@ class ShowTrait(object): print("sample_lists is:", pf(sample_lists)) js_data = dict(sample_group_types = self.sample_group_types, sample_lists = sample_lists, - attribute_names = self.sample_groups[0].attributes) + attribute_names = self.sample_groups[0].attributes, + session_uuid = self.session_uuid) #print("js_data:", pf(js_data)) self.js_data = js_data diff --git a/wqflask/wqflask/templates/show_trait.html b/wqflask/wqflask/templates/show_trait.html index 87199e9f..bb87b4bb 100644 --- a/wqflask/wqflask/templates/show_trait.html +++ b/wqflask/wqflask/templates/show_trait.html @@ -19,8 +19,10 @@
{% for key in hddn %} - + {% endfor %} + +
\ No newline at end of file -- cgit v1.2.3 From ea53a2f20d13130f3555967d57282b3c9562da5a Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Wed, 17 Apr 2013 23:49:47 +0000 Subject: Created file with pickled SNPIterator (from input.py) data for HLC datasets Still need to read in file --- wqflask/base/webqtlConfig.py | 1 + wqflask/utility/temp_data.py | 6 ++- .../wqflask/marker_regression/marker_regression.py | 48 ++++++++++++---------- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 27 ++++++++---- 4 files changed, 50 insertions(+), 32 deletions(-) (limited to 'wqflask/utility') diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py index 1845c749..49afb631 100755 --- a/wqflask/base/webqtlConfig.py +++ b/wqflask/base/webqtlConfig.py @@ -53,6 +53,7 @@ SECUREDIR = GNROOT + 'secure/' COMMON_LIB = GNROOT + 'support/admin' HTMLPATH = GNROOT + 'web/' PYLMM_PATH = HTMLPATH + 'plink/' +SNP_PATH = '/mnt/xvdf1/snps/' IMGDIR = HTMLPATH +'image/' IMAGESPATH = HTMLPATH + 'images/' UPLOADPATH = IMAGESPATH + 'upload/' diff --git a/wqflask/utility/temp_data.py b/wqflask/utility/temp_data.py index 004d45c6..ddf2653c 100644 --- a/wqflask/utility/temp_data.py +++ b/wqflask/utility/temp_data.py @@ -5,14 +5,16 @@ import simplejson as json class TempData(object): - def __init__(self, temp_uuid): + def __init__(self, temp_uuid, part=None): self.temp_uuid = temp_uuid self.redis = Redis() self.key = "tempdata:{}".format(self.temp_uuid) + if part: + self.key += ":{}".format(part) def store(self, field, value): self.redis.hset(self.key, field, value) - self.redis.expire(self.key, 60*15) # Expire in 15 minutes + self.redis.expire(self.key, 60*60) # Expire in 60 minutes def get_all(self): return self.redis.hgetall(self.key) diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 86d9fe06..2ede5660 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -9,11 +9,12 @@ import string import sys import os import collections -import pdb import numpy as np from scipy import linalg +import simplejson as json + #from redis import Redis @@ -41,8 +42,6 @@ class MarkerRegression(object): self.samples = [] # Want only ones with values self.vals = [] - print("start_vars: ", pf(start_vars)) - self.suggestive = float(start_vars['suggestive']) for sample in self.dataset.group.samplelist: value = start_vars['value:' + sample] @@ -52,13 +51,12 @@ class MarkerRegression(object): self.gen_data(tempdata) #Get chromosome lengths for drawing the manhattan plot - chromosomes = {} + chromosome_mb_lengths = {} for key in self.species.chromosomes.chromosomes.keys(): - this_chr = self.species.chromosomes.chromosomes[key] - chromosomes[key] = [this_chr.name, this_chr.mb_length] + chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length self.js_data = dict( - chromosomes = chromosomes, + chromosomes = chromosome_mb_lengths, qtl_results = self.qtl_results, ) @@ -74,10 +72,10 @@ class MarkerRegression(object): p_values, t_stats = self.gen_human_results(pheno_vector, tempdata) else: genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers] - + no_val_samples = self.identify_empty_samples() - trimmed_genotype_data = self.trim_genotypes(genotype_data, no_value_samples=[]) - pdb.set_trace() + trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) + genotype_matrix = np.array(trimmed_genotype_data).T print("pheno_vector is: ", pf(pheno_vector)) @@ -90,23 +88,16 @@ class MarkerRegression(object): refit=False, temp_data=tempdata ) - + self.dataset.group.markers.add_pvalues(p_values) - self.qtl_results = [] - for marker in self.dataset.group.markers.markers: - if marker['lod_score'] >= self.suggestive: - self.qtl_results.append(marker) - - #self.qtl_results = self.dataset.group.markers.markers + self.qtl_results = self.dataset.group.markers.markers def gen_human_results(self, pheno_vector, tempdata): file_base = os.path.join(webqtlConfig.PYLMM_PATH, self.dataset.group.name) - - tempdata.store("percent_complete", 0) + plink_input = input.plink(file_base, type='b') - tempdata.store("percent_complete", 0.1) pheno_vector = pheno_vector.reshape((len(pheno_vector), 1)) covariate_matrix = np.ones((pheno_vector.shape[0],1)) @@ -118,11 +109,11 @@ class MarkerRegression(object): covariate_matrix, plink_input, kinship_matrix, - temp_data=tempdata + loading_progress=tempdata ) return p_values, t_stats - + def identify_empty_samples(self): no_val_samples = [] @@ -147,4 +138,17 @@ class MarkerRegression(object): trimmed_genotype_data.append(new_genotypes) return trimmed_genotype_data +def create_snp_iterator_file(group): + plink_file_base = os.path.join(webqtlConfig.PYLMM_PATH, group) + plink_input = input.plink(plink_file_base, type='b') + inputs = list(plink_input) + snp_file_base = os.path.join(webqtlConfig.SNP_PATH, group + ".snps") + + with open(snp_file_base, "w") as fh: + pickle.dump(inputs, fh) + + +if __name__ == '__main__': + import cPickle as pickle + create_snp_iterator_file("HLC") diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 24565af8..918f8200 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -19,15 +19,21 @@ from __future__ import absolute_import, print_function, division import sys import time +import uuid + import numpy as np from scipy import linalg from scipy import optimize from scipy import stats import pdb +#import cPickle as pickle +import simplejson as json + from pprint import pformat as pf from utility.benchmark import Bench +from utility import temp_data from wqflask.my_pylmm.pyLMM import chunks @@ -38,7 +44,7 @@ def run_human(pheno_vector, plink_input, kinship_matrix, refit=False, - temp_data=None): + loading_progress=None): v = np.isnan(pheno_vector) keep = True - v @@ -65,27 +71,32 @@ def run_human(pheno_vector, plink_input.getSNPIterator() total_snps = plink_input.numSNPs - number_chunks = 63 - with Bench("snp iterator loop"): count = 0 - - + with Bench("Create list of inputs"): inputs = list(plink_input) with Bench("Divide into chunks"): - results = chunks.divide_into_chunks(inputs, 63) + results = chunks.divide_into_chunks(inputs, 64) + result_store = [] + identifier = uuid.uuid4() + for part, result in enumerate(results): + data_store = temp_data.TempData(identifier, part) + + data_store.store(data=json.dumps(result.tolist())) + result_store.append(data_store) + for snp, this_id in plink_input: with Bench("part before association"): - if count > 500: + if count > 2000: break count += 1 percent_complete = (float(count) / total_snps) * 100 #print("percent_complete: ", percent_complete) - temp_data.store("percent_complete", percent_complete) + loading_progress.store("percent_complete", percent_complete) with Bench("actual association"): ps, ts = human_association(snp, -- cgit v1.2.3 From 5ffd0debd5ab7ee0e98def74374a8e996629f5c9 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Thu, 18 Apr 2013 22:36:39 +0000 Subject: The plink_input is split into chunks that are stored in temp_data, but we might decide to store it differently --- wqflask/utility/temp_data.py | 14 +++++++++----- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 8 ++++---- 2 files changed, 13 insertions(+), 9 deletions(-) (limited to 'wqflask/utility') diff --git a/wqflask/utility/temp_data.py b/wqflask/utility/temp_data.py index ddf2653c..60f01167 100644 --- a/wqflask/utility/temp_data.py +++ b/wqflask/utility/temp_data.py @@ -1,14 +1,15 @@ from __future__ import print_function, division, absolute_import from redis import Redis +import redis import simplejson as json class TempData(object): - - def __init__(self, temp_uuid, part=None): + + def __init__(self, temp_uuid, preface="tempdata", part=None): self.temp_uuid = temp_uuid self.redis = Redis() - self.key = "tempdata:{}".format(self.temp_uuid) + self.key = "{}:{}".format(preface, self.temp_uuid) if part: self.key += ":{}".format(part) @@ -19,9 +20,12 @@ class TempData(object): def get_all(self): return self.redis.hgetall(self.key) - if __name__ == "__main__": redis = Redis() for key in redis.keys(): + print("key is:", key) + if "plink" not in key: + print(" Skipping...\n") + continue for field in redis.hkeys(key): - print("{}.{}={}".format(key, field, redis.hget(key, field))) + print(" {}.{}={}\n".format(key, field, len(redis.hget(key, field)))) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 8c0e0282..a6134fdd 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -89,13 +89,13 @@ def run_human(pheno_vector, with Bench("Divide into chunks"): results = chunks.divide_into_chunks(inputs, 64) - + result_store = [] identifier = uuid.uuid4() for part, result in enumerate(results): - data_store = temp_data.TempData(identifier, part) + data_store = temp_data.TempData(identifier, "plink", part) - data_store.store(data=pickle.dumps(result)) + data_store.store("data", pickle.dumps(result, pickle.HIGHEST_PROTOCOL)) result_store.append(data_store) for snp, this_id in plink_input: @@ -103,7 +103,7 @@ def run_human(pheno_vector, if count > 2000: break count += 1 - + percent_complete = (float(count) / total_snps) * 100 #print("percent_complete: ", percent_complete) loading_progress.store("percent_complete", percent_complete) -- cgit v1.2.3 From 8810c7735ed8a1bfa225449f7b388438e2ace890 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Tue, 23 Apr 2013 21:37:55 +0000 Subject: Created file correlation_plot.py for the correlation scatterplot Reverted temp_data.py to previous version that doesn't include the "part" input (for chunks) Made change to lmm related to splitting main iterator into chunks Deleted a bunch of unnecessary commented out code from show_trait.py --- misc/notes.txt | 12 + wqflask/maintenance/quick_search_table.py | 32 +- wqflask/utility/temp_data.py | 18 +- wqflask/wqflask/correlation/correlation_plot.py | 48 +++ wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 109 +++---- wqflask/wqflask/show_trait/show_trait.py | 377 +----------------------- 6 files changed, 139 insertions(+), 457 deletions(-) create mode 100644 wqflask/wqflask/correlation/correlation_plot.py (limited to 'wqflask/utility') diff --git a/misc/notes.txt b/misc/notes.txt index a48ee5bf..6bdcccf3 100644 --- a/misc/notes.txt +++ b/misc/notes.txt @@ -107,6 +107,11 @@ sudo /etc/init.d/redis_6379 start =========================================== Start screen session: + +If "no option -R": +byobu-select-backend +2. screen + byobu -RD (to start) control-a then :multiuser on control-a then :acladd sam @@ -172,6 +177,13 @@ cp -a (archive; copies recursively and doesn't follow symbol links) mv (same as above, but with no -a) +=========================================== + +Add user: +sudo adduser +Edit /etc/sudoers to give user root privileges + + =========================================== tidyp - Improves/beautifies html code diff --git a/wqflask/maintenance/quick_search_table.py b/wqflask/maintenance/quick_search_table.py index d175e600..046a05c4 100644 --- a/wqflask/maintenance/quick_search_table.py +++ b/wqflask/maintenance/quick_search_table.py @@ -42,7 +42,7 @@ Metadata.bind = Engine class PublishXRef(Base): __tablename__ = 'PublishXRef' - + Id = sa.Column(sa.Integer, primary_key=True) InbredSetId = sa.Column(sa.Integer, primary_key=True) PhenotypeId = sa.Column(sa.Integer) @@ -53,7 +53,7 @@ class PublishXRef(Base): additive = sa.Column(sa.Float) Sequence = sa.Column(sa.Integer) comments = sa.Column(sa.Text) - + @classmethod def run(cls): conn = Engine.connect() @@ -69,7 +69,7 @@ class PublishXRef(Base): conn.execute(ins) counter += 1 print("Done:", counter) - + @staticmethod def get_unique_terms(publishxref_id, inbredset_id): results = Session.query( @@ -114,7 +114,7 @@ class PublishXRef(Base): continue unique.add(token) - print("\nUnique terms are: {}\n".format(unique)) + #print("\nUnique terms are: {}\n".format(unique)) return " ".join(unique) @staticmethod @@ -155,12 +155,12 @@ class PublishXRef(Base): #"Geno.SpeciesId = Species.Id and " #"Geno.Name = PublishXRef.Locus ").params(publishxref_id=publishxref_id, # inbredset_id=inbredset_id).all() - for result in results: - print("****", result) + #for result in results: + # print("****", result) assert len(set(result for result in results)) == 1, "Different results or no results" - print("results are:", results) + #print("results are:", results) result = results[0] result = row2dict(result) try: @@ -214,7 +214,7 @@ class GenoXRef(Base): "FROM Geno " "WHERE Geno.Id = :geno_id ").params(geno_id=geno_id).all() - print("results: ", pf(results)) + #print("results: ", pf(results)) unique = set() if len(results): @@ -234,7 +234,7 @@ class GenoXRef(Base): continue unique.add(token) - print("\nUnique terms are: {}\n".format(unique)) + #print("\nUnique terms are: {}\n".format(unique)) return " ".join(unique) @@ -271,11 +271,11 @@ class GenoXRef(Base): "InbredSet.Id = GenoFreeze.InbredSetId and " "InbredSet.SpeciesId = Species.Id ").params(geno_id=geno_id, dataset_id=dataset_id).all() - for result in results: - print(result) + #for result in results: + # print(result) assert len(set(result for result in results)) == 1, "Different results" - print("results are:", results) + #print("results are:", results) result = results[0] result = row2dict(result) try: @@ -366,7 +366,7 @@ class ProbeSetXRef(Base): continue unique.add(token) - print("\nUnique terms are: {}\n".format(unique)) + #print("\nUnique terms are: {}\n".format(unique)) return " ".join(unique) @@ -420,14 +420,14 @@ class ProbeSetXRef(Base): "ProbeFreeze.InbredSetId = InbredSet.Id and " "InbredSet.SpeciesId = Species.Id ").params(probeset_id=probeset_id, dataset_id=dataset_id).all() - for result in results: - print("-", result) + #for result in results: + # print("-", result) if len(set(result for result in results)) != 1: return None #assert len(set(result for result in results)) == 1, "Different results" - print("results are:", results) + #print("results are:", results) result = results[0] result = row2dict(result) try: diff --git a/wqflask/utility/temp_data.py b/wqflask/utility/temp_data.py index 60f01167..004d45c6 100644 --- a/wqflask/utility/temp_data.py +++ b/wqflask/utility/temp_data.py @@ -1,31 +1,25 @@ from __future__ import print_function, division, absolute_import from redis import Redis -import redis import simplejson as json class TempData(object): - - def __init__(self, temp_uuid, preface="tempdata", part=None): + + def __init__(self, temp_uuid): self.temp_uuid = temp_uuid self.redis = Redis() - self.key = "{}:{}".format(preface, self.temp_uuid) - if part: - self.key += ":{}".format(part) + self.key = "tempdata:{}".format(self.temp_uuid) def store(self, field, value): self.redis.hset(self.key, field, value) - self.redis.expire(self.key, 60*60) # Expire in 60 minutes + self.redis.expire(self.key, 60*15) # Expire in 15 minutes def get_all(self): return self.redis.hgetall(self.key) + if __name__ == "__main__": redis = Redis() for key in redis.keys(): - print("key is:", key) - if "plink" not in key: - print(" Skipping...\n") - continue for field in redis.hkeys(key): - print(" {}.{}={}\n".format(key, field, len(redis.hget(key, field)))) + print("{}.{}={}".format(key, field, redis.hget(key, field))) diff --git a/wqflask/wqflask/correlation/correlation_plot.py b/wqflask/wqflask/correlation/correlation_plot.py new file mode 100644 index 00000000..4b043fc3 --- /dev/null +++ b/wqflask/wqflask/correlation/correlation_plot.py @@ -0,0 +1,48 @@ +#!/usr/bin/python + +from __future__ import print_function, division + +from base.trait import GeneralTrait +from base import data_set +from wqflask.show_trait.SampleList import SampleList + +class CorrelationPlot(object): + """Page that displays a correlation scatterplot with a line fitted to it""" + + def __init__(self, start_vars): + self.dataset1 = data_set.create_dataset(start_vars['dataset1']) + self.trait1 = GeneralTrait(dataset=self.dataset1.name, + name=start_vars['trait1']) + + self.dataset2 = data_set.create_dataset(start_vars['dataset2']) + self.trait2 = GeneralTrait(dataset=self.dataset2.name, + name=start_vars['trait2']) + + sample_names_1 = self.get_sample_names(self.dataset1) + sample_names_2 = self.get_sample_names(self.dataset2) + + self.samples_1 = self.get_samples(self.dataset1, sample_names_1, self.trait1) + self.samples_2 = self.get_samples(self.dataset2, sample_names_2, self.trait2) + + + def get_sample_names(self, dataset): + if dataset.group.parlist: + sample_names = (dataset.group.parlist + + dataset.group.f1list + + dataset.group.samplelist) + elif dataset.group.f1list: + sample_names = dataset.group.f1list + dataset.group.samplelist + else: + sample_names = dataset.group.samplelist + + return sample_names + + + def get_samples(self, dataset, sample_names, trait): + samples = SampleList(dataset = dataset, + sample_names=sample_names, + this_trait=trait, + sample_group_type='primary', + header="%s Only" % (dataset.group.name)) + + return samples \ No newline at end of file diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 2e8f020d..a3ba8fdb 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -468,7 +468,7 @@ class LMM: the heritability or the proportion of the total variance attributed to genetics. The X is the covariate matrix. """ - + S = 1.0/(h*self.Kva + (1.0 - h)) Xt = X.T*S XX = matrixMult(Xt,X) @@ -487,67 +487,68 @@ class LMM: def LL(self,h,X=None,stack=True,REML=False): - """ - Computes the log-likelihood for a given heritability (h). If X==None, then the - default X0t will be used. If X is set and stack=True, then X0t will be matrix concatenated with - the input X. If stack is false, then X is used in place of X0t in the LL calculation. - REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True. - """ - - if X == None: X = self.X0t - elif stack: - self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] - X = self.X0t_stack + """ + Computes the log-likelihood for a given heritability (h). If X==None, then the + default X0t will be used. If X is set and stack=True, then X0t will be matrix concatenated with + the input X. If stack is false, then X is used in place of X0t in the LL calculation. + REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True. + """ - n = float(self.N) - q = float(X.shape[1]) - beta,sigma,Q,XX_i,XX = self.getMLSoln(h,X) - LL = n*np.log(2*np.pi) + np.log(h*self.Kva + (1.0-h)).sum() + n + n*np.log(1.0/n * Q) - LL = -0.5 * LL + if X == None: + X = self.X0t + elif stack: + self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] + X = self.X0t_stack - if REML: - LL_REML_part = q*np.log(2.0*np.pi*sigma) + np.log(linalg.det(matrixMult(X.T,X))) - np.log(linalg.det(XX)) - LL = LL + 0.5*LL_REML_part + n = float(self.N) + q = float(X.shape[1]) + beta,sigma,Q,XX_i,XX = self.getMLSoln(h,X) + LL = n*np.log(2*np.pi) + np.log(h*self.Kva + (1.0-h)).sum() + n + n*np.log(1.0/n * Q) + LL = -0.5 * LL - return LL,beta,sigma,XX_i + if REML: + LL_REML_part = q*np.log(2.0*np.pi*sigma) + np.log(linalg.det(matrixMult(X.T,X))) - np.log(linalg.det(XX)) + LL = LL + 0.5*LL_REML_part + + return LL,beta,sigma,XX_i def getMax(self,H, X=None,REML=False): - """ - Helper functions for .fit(...). - This function takes a set of LLs computed over a grid and finds possible regions - containing a maximum. Within these regions, a Brent search is performed to find the - optimum. - - """ - n = len(self.LLs) - HOpt = [] - for i in range(1,n-2): - if self.LLs[i-1] < self.LLs[i] and self.LLs[i] > self.LLs[i+1]: - HOpt.append(optimize.brent(self.LL_brent,args=(X,REML),brack=(H[i-1],H[i+1]))) - if np.isnan(HOpt[-1][0]): - HOpt[-1][0] = [self.LLs[i-1]] - - if len(HOpt) > 1: - if self.verbose: - sys.stderr.write("NOTE: Found multiple optima. Returning first...\n") - return HOpt[0] - elif len(HOpt) == 1: - return HOpt[0] - elif self.LLs[0] > self.LLs[n-1]: - return H[0] - else: - return H[n-1] + """ + Helper functions for .fit(...). + This function takes a set of LLs computed over a grid and finds possible regions + containing a maximum. Within these regions, a Brent search is performed to find the + optimum. + + """ + n = len(self.LLs) + HOpt = [] + for i in range(1,n-2): + if self.LLs[i-1] < self.LLs[i] and self.LLs[i] > self.LLs[i+1]: + HOpt.append(optimize.brent(self.LL_brent,args=(X,REML),brack=(H[i-1],H[i+1]))) + if np.isnan(HOpt[-1][0]): + HOpt[-1][0] = [self.LLs[i-1]] + + if len(HOpt) > 1: + if self.verbose: + sys.stderr.write("NOTE: Found multiple optima. Returning first...\n") + return HOpt[0] + elif len(HOpt) == 1: + return HOpt[0] + elif self.LLs[0] > self.LLs[n-1]: + return H[0] + else: + return H[n-1] def fit(self,X=None,ngrids=100,REML=True): """ - Finds the maximum-likelihood solution for the heritability (h) given the current parameters. - X can be passed and will transformed and concatenated to X0t. Otherwise, X0t is used as - the covariate matrix. - - This function calculates the LLs over a grid and then uses .getMax(...) to find the optimum. - Given this optimum, the function computes the LL and associated ML solutions. + Finds the maximum-likelihood solution for the heritability (h) given the current parameters. + X can be passed and will transformed and concatenated to X0t. Otherwise, X0t is used as + the covariate matrix. + + This function calculates the LLs over a grid and then uses .getMax(...) to find the optimum. + Given this optimum, the function computes the LL and associated ML solutions. """ if X == None: @@ -575,8 +576,8 @@ class LMM: def association(self,X, h = None, stack=True,REML=True, returnBeta=True): """ - Calculates association statitics for the SNPs encoded in the vector X of size n. - If h == None, the optimal h stored in optH is used. + Calculates association statitics for the SNPs encoded in the vector X of size n. + If h == None, the optimal h stored in optH is used. """ if stack: diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index 85e33595..60e42afb 100755 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -41,26 +41,11 @@ class ShowTrait(object): helper_functions.get_species_dataset_trait(self, kw) - #self.dataset = create_dataset(kw['dataset']) - # - ##self.cell_id = None - # - # - #this_trait = GeneralTrait(dataset=self.dataset.name, - # name=self.trait_id, - # cellid=None) - # - # self.dataset.group.read_genotype_file() - #if not self.dataset.group.genotype: - # self.read_data(include_f1=True) - - # Todo: Add back in the ones we actually need from below, as we discover we need them hddn = OrderedDict() - ## Some fields, like method, are defaulted to None; otherwise in IE the field can't be changed using jquery #hddn = OrderedDict( # FormID = fmID, @@ -101,14 +86,8 @@ class ShowTrait(object): # this_trait.mysqlid) # heritability = self.cursor.fetchone() - #hddn['mappingMethodId'] = webqtlDatabaseFunction.getMappingMethod (cursor=self.cursor, - # groupName=fd.group) - self.dispTraitInformation(kw, "", hddn, self.this_trait) #Display trait information + function buttons - #if this_trait == None: - # this_trait = webqtlTrait(data=kw['allTraitData'], dataset=None) - self.build_correlation_tools(self.this_trait) self.make_sample_lists(self.this_trait) @@ -134,29 +113,9 @@ class ShowTrait(object): sample_lists = sample_lists, attribute_names = self.sample_groups[0].attributes, temp_uuid = self.temp_uuid) - #print("js_data:", pf(js_data)) self.js_data = js_data - #def get_this_trait(self): - # this_trait = GeneralTrait(dataset=self.dataset.name, - # name=self.trait_id, - # cellid=self.cell_id) - # - # ###identification, etc. - # #self.identification = '%s : %s' % (self.dataset.shortname, self.trait_id) - # #this_trait.returnURL = webqtlConfig.CGIDIR + webqtlConfig.SCRIPTFILE + '?FormID=showDatabase&database=%s\ - # # &ProbeSetID=%s&group=%s&parentsf1=on' %(self.dataset, self.trait_id, self.dataset.group.name) - # # - # #if self.cell_id: - # # self.identification = '%s/%s'%(self.identification, self.cell_id) - # # this_trait.returnURL = '%s&CellID=%s' % (this_trait.returnURL, self.cell_id) - # - # this_trait.retrieve_info() - # this_trait.retrieve_sample_data() - # return this_trait - - def read_data(self, include_f1=False): '''read user input data or from trait data and analysis form''' @@ -327,13 +286,11 @@ class ShowTrait(object): if snpurl: snpBrowserButton = HT.Href(url="#redirect", onClick="openNewWin('%s')" % snpurl) snpBrowserButton_img = HT.Image("/images/snp_icon.jpg", name="snpbrowser", alt=" View SNPs and Indels ", title=" View SNPs and Indels ", style="border:none;") - #snpBrowserButton.append(snpBrowserButton_img) snpBrowserText = "SNPs" #XZ: Show GeneWiki for all species geneWikiButton = HT.Href(url="#redirect", onClick="openNewWin('%s')" % (os.path.join(webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE) + "?FormID=geneWiki&symbol=%s" % this_trait.symbol)) geneWikiButton_img = HT.Image("/images/genewiki_icon.jpg", name="genewiki", alt=" Write or review comments about this gene ", title=" Write or review comments about this gene ", style="border:none;") - #geneWikiButton.append(geneWikiButton_img) geneWikiText = 'GeneWiki' #XZ: display similar traits in other selected datasets @@ -342,15 +299,9 @@ class ShowTrait(object): similarUrl = "%s?cmd=sch&gene=%s&alias=1&species=%s" % (os.path.join(webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE), this_trait.symbol, _Species) similarButton = HT.Href(url="#redirect", onClick="openNewWin('%s')" % similarUrl) similarButton_img = HT.Image("/images/find_icon.jpg", name="similar", alt=" Find similar expression data ", title=" Find similar expression data ", style="border:none;") - #similarButton.append(similarButton_img) similarText = "Find" else: pass - #tbl.append(HT.TR( - #HT.TD('Gene Symbol: ', Class="fwb fs13", valign="top", nowrap="on", width=90), - #HT.TD(width=10, valign="top"), - #HT.TD(HT.Span('%s' % this_trait.symbol, valign="top", Class="fs13 fsI"), valign="top", width=740) - #)) else: tbl.append(HT.TR( HT.TD('Gene Symbol: ', Class="fwb fs13", valign="top", nowrap="on"), @@ -396,11 +347,6 @@ class ShowTrait(object): for seqt in seqs: if int(seqt[1][-1]) %2 == 1: blatsequence += '%3EProbe_'+string.strip(seqt[1])+'%0A'+string.strip(seqt[0])+'%0A' - #-------- - #XZ, 07/16/2009: targetsequence is not used, so I comment out this block - #targetsequence = this_trait.targetseq - #if targetsequence==None: - # targetsequence = "" #XZ: Pay attention to the parameter of version (rn, mm, hg). They need to be changed if necessary. if _Species == "rat": @@ -449,55 +395,6 @@ class ShowTrait(object): #probeButton.append(probeButton_img) probeText = "Probes" - #tSpan = HT.Span(Class="fs13") - - #XZ: deal with blat score and blat specificity. - #if this_trait.probe_set_specificity or this_trait.probe_set_blat_score: - # if this_trait.probe_set_specificity: - # pass - # #tSpan.append(HT.Href(url="/blatInfo.html", target="_blank", title="Values higher than 2 for the specificity are good", text="BLAT specificity", Class="non_bold"),": %.1f" % float(this_trait.probe_set_specificity), " "*3) - # if this_trait.probe_set_blat_score: - # pass - # #tSpan.append("Score: %s" % int(this_trait.probe_set_blat_score), " "*2) - - #onClick="openNewWin('/blatInfo.html')" - - #tbl.append(HT.TR( - # HT.TD('Target Score: ', Class="fwb fs13", valign="top", nowrap="on"), - # HT.TD(width=10, valign="top"), - # HT.TD(tSpan, valign="top") - # )) - - #tSpan = HT.Span(Class="fs13") - #tSpan.append(str(_Species).capitalize(), ", ", fd.group) - # - #tbl.append(HT.TR( - # HT.TD('Species and Group: ', Class="fwb fs13", valign="top", nowrap="on"), - # HT.TD(width=10, valign="top"), - # HT.TD(tSpan, valign="top") - # )) - - #if this_trait.cellid: - # self.cursor.execute(""" - # select ProbeFreeze.Name from ProbeFreeze, ProbeSetFreeze - # where - # ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND - # ProbeSetFreeze.Id = %d""" % this_trait.dataset.id) - # probeDBName = self.cursor.fetchone()[0] - # tbl.append(HT.TR( - # HT.TD('Database: ', Class="fs13 fwb", valign="top", nowrap="on"), - # HT.TD(width=10, valign="top"), - # HT.TD(HT.Span('%s' % probeDBName, Class="non_bold"), valign="top") - # )) - #else: - #tbl.append(HT.TR( - # HT.TD('Database: ', Class="fs13 fwb", valign="top", nowrap="on"), - # HT.TD(width=10, valign="top"), - # HT.TD(HT.Href(text=this_trait.dataset.fullname, url = webqtlConfig.INFOPAGEHREF % this_trait.dataset.name, - # target='_blank', Class="fs13 fwn non_bold"), valign="top") - # )) - #pass - this_trait.species = _Species # We need this in the template, so we tuck it into this_trait this_trait.database = this_trait.get_database() @@ -532,14 +429,6 @@ class ShowTrait(object): url=webqtlConfig.HOMOLOGENE_ID % this_trait.homologeneid, Class="fs14 fwn", title="Find similar genes in other species") #tSpan.append(HT.Span(hurl, style=idStyle), " "*2) - #tbl.append( - # HT.TR(HT.TD(colspan=3,height=6)), - # HT.TR( - # HT.TD('Resource Links: ', Class="fwb fs13", valign="top", nowrap="on"), - # HT.TD(width=10, valign="top"), - # HT.TD(tSpan, valign="top") - # )) - #XZ: Resource Links: if this_trait.symbol: linkStyle = "background:#dddddd;padding:2" @@ -584,9 +473,7 @@ class ShowTrait(object): # txen), # Class="fs14 fwn"), style=linkStyle) # , " "*2) - #except: - # pass - + #XZ, 7/16/2009: The url for SymAtlas (renamed as BioGPS) has changed. We don't need this any more #tSpan.append(HT.Span(HT.Href(text= 'SymAtlas',target="mainFrame",\ # url="http://symatlas.gnf.org/SymAtlas/bioentry?querytext=%s&query=14&species=%s&type=Expression" \ @@ -655,28 +542,12 @@ class ShowTrait(object): # title="Allen Brain Atlas"), style=linkStyle), " "*2) pass - #tbl.append( - # HT.TR(HT.TD(colspan=3,height=6)), - # HT.TR( - # HT.TD(' '), - # HT.TD(width=10, valign="top"), - # HT.TD(tSpan, valign="top"))) - - #menuTable = HT.TableLite(cellpadding=2, Class="collap", width="620", id="target1") - #menuTable.append(HT.TR(HT.TD(addSelectionButton, align="center"),HT.TD(similarButton, align="center"),HT.TD(verifyButton, align="center"),HT.TD(geneWikiButton, align="center"),HT.TD(snpBrowserButton, align="center"),HT.TD(rnaseqButton, align="center"),HT.TD(probeButton, align="center"),HT.TD(updateButton, align="center"), colspan=3, height=50, style="vertical-align:bottom;")) - #menuTable.append(HT.TR(HT.TD(addSelectionText, align="center"),HT.TD(similarText, align="center"),HT.TD(verifyText, align="center"),HT.TD(geneWikiText, align="center"),HT.TD(snpBrowserText, align="center"),HT.TD(rnaseqText, align="center"),HT.TD(probeText, align="center"),HT.TD(updateText, align="center"), colspan=3, height=50, style="vertical-align:bottom;")) - - #for zhou mi's cliques, need to be removed #if self.database[:6] == 'BXDMic' and self.ProbeSetID in cliqueID: # Info2Disp.append(HT.Strong('Clique Search: '),HT.Href(text='Search',\ # url ="http://compbio1.utmem.edu/clique_go/results.php?pid=%s&pval_1=0&pval_2=0.001" \ # % self.ProbeSetID,target='_blank',Class="normalsize"),HT.BR()) - #linkTable.append(HT.TR(linkTD)) - #Info2Disp.append(linkTable) - #title1Body.append(tbl, HT.BR(), menuTable) - elif this_trait and this_trait.dataset and this_trait.dataset.type =='Publish': #Check if trait is phenotype #if this_trait.confidential: @@ -759,12 +630,6 @@ class ShowTrait(object): # )) pass - #menuTable = HT.TableLite(cellpadding=2, Class="collap", width="150", id="target1") - #menuTable.append(HT.TR(HT.TD(addSelectionButton, align="center"),HT.TD(updateButton, align="center"), colspan=3, height=50, style="vertical-align:bottom;")) - #menuTable.append(HT.TR(HT.TD(addSelectionText, align="center"),HT.TD(updateText, align="center"), colspan=3, height=50, style="vertical-align:bottom;")) - - #title1Body.append(tbl, HT.BR(), menuTable) - elif this_trait and this_trait.dataset and this_trait.dataset.type == 'Geno': #Check if trait is genotype if this_trait.chr and this_trait.mb: @@ -810,41 +675,10 @@ class ShowTrait(object): # valign="top", width=740) # )) - #menuTable = HT.TableLite(cellpadding=2, Class="collap", width="275", id="target1") - #menuTable.append(HT.TR(HT.TD(addSelectionButton, align="center"),HT.TD(verifyButton, align="center"),HT.TD(rnaseqButton, align="center"), HT.TD(updateButton, align="center"), colspan=3, height=50, style="vertical-align:bottom;")) - #menuTable.append(HT.TR(HT.TD(addSelectionText, align="center"),HT.TD(verifyText, align="center"),HT.TD(rnaseqText, align="center"), HT.TD(updateText, align="center"), colspan=3, height=50, style="vertical-align:bottom;")) - - #title1Body.append(tbl, HT.BR(), menuTable) - - elif (this_trait == None or this_trait.dataset.type == 'Temp'): #if temporary trait (user-submitted trait or PCA trait) - - #TempInfo = HT.Paragraph() - if this_trait != None: - if this_trait.description: - pass - #tbl.append(HT.TR(HT.TD(HT.Strong('Description: '),' %s ' % this_trait.description,HT.BR()), colspan=3, height=15)) - else: - tbl.append(HT.TR(HT.TD(HT.Strong('Description: '),'not available',HT.BR(),HT.BR()), colspan=3, height=15)) - - if (updateText == "Edit"): - menuTable = HT.TableLite(cellpadding=2, Class="collap", width="150", id="target1") - else: - menuTable = HT.TableLite(cellpadding=2, Class="collap", width="80", id="target1") - - #menuTable.append(HT.TR(HT.TD(addSelectionButton, align="right"),HT.TD(updateButton, align="right"), colspan=3, height=50, style="vertical-align:bottom;") ) - #menuTable.append(HT.TR(HT.TD(addSelectionText, align="center"),HT.TD(updateText, align="center"), colspan=3, height=50, style="vertical-align:bottom;")) - # - #title1Body.append(tbl, HT.BR(), menuTable) - - else: - pass - def dispBasicStatistics(self, fd, this_trait): #XZ, June 22, 2011: The definition and usage of primary_samples, other_samples, specialStrains, all_samples are not clear and hard to understand. But since they are only used in this function for draw graph purpose, they will not hurt the business logic outside. As of June 21, 2011, this function seems work fine, so no hurry to clean up. These parameters and code in this function should be cleaned along with fd.f1list, fd.parlist, fd.samplelist later. - #stats_row = HT.TR() - #stats_cell = HT.TD() # This should still be riset here - Sam - Nov. 2012 if fd.genotype.type == "riset": @@ -912,7 +746,6 @@ class ShowTrait(object): for sampleNameOrig in all_samples: sampleName = sampleNameOrig.replace("_2nd_", "") - #try: print("* type of this_trait:", type(this_trait)) print(" name:", this_trait.__class__.__name__) print(" this_trait:", this_trait) @@ -925,28 +758,16 @@ class ShowTrait(object): print(" thisvar:", thisvar) thisValFull = [sampleName, thisval, thisvar] print(" thisValFull:", thisValFull) - #except: - # continue vals1.append(thisValFull) - - #vals1 = [[sampleNameOrig.replace("_2nd_", ""), - # this_trait.data[sampleName].val, - # this_trait.data[sampleName].var] - # for sampleNameOrig in all_samples]] - # - #Using just the group sample for sampleNameOrig in primary_samples: sampleName = sampleNameOrig.replace("_2nd_", "") - #try: thisval = this_trait.data[sampleName].value thisvar = this_trait.data[sampleName].variance thisValFull = [sampleName,thisval,thisvar] - #except: - # continue vals2.append(thisValFull) @@ -954,12 +775,9 @@ class ShowTrait(object): for sampleNameOrig in other_samples: sampleName = sampleNameOrig.replace("_2nd_", "") - #try: thisval = this_trait.data[sampleName].value thisvar = this_trait.data[sampleName].variance thisValFull = [sampleName,thisval,thisvar] - #except: - # continue vals3.append(thisValFull) @@ -972,12 +790,9 @@ class ShowTrait(object): for sampleNameOrig in all_samples: sampleName = sampleNameOrig.replace("_2nd_", "") - #try: thisval = this_trait.data[sampleName].value thisvar = this_trait.data[sampleName].variance thisValFull = [sampleName,thisval,thisvar] - #except: - # continue vals.append(thisValFull) @@ -988,8 +803,6 @@ class ShowTrait(object): if i == 0 and len(vals) < 4: stats_container = HT.Div(id="stats_tabs", style="padding:10px;", Class="ui-tabs") #Needed for tabs; notice the "stats_script_text" below referring to this element stats_container.append(HT.Div(HT.Italic("Fewer than 4 case data were entered. No statistical analysis has been attempted."))) - #stats_script_text = """$(function() { $("#stats_tabs").tabs();});""" - #stats_cell.append(stats_container) break elif (i == 1 and len(primary_samples) < 4): stats_container = HT.Div(id="stats_tabs%s" % i, Class="ui-tabs") @@ -997,20 +810,12 @@ class ShowTrait(object): elif (i == 2 and len(other_samples) < 4): stats_container = HT.Div(id="stats_tabs%s" % i, Class="ui-tabs") stats_container.append(HT.Div(HT.Italic("Fewer than 4 non-" + fd.group + " case data were entered. No statistical analysis has been attempted."))) - #stats_script_text = """$(function() { $("#stats_tabs0").tabs(); $("#stats_tabs1").tabs(); $("#stats_tabs2").tabs();});""" else: continue if len(vals) > 4: stats_tab_list = [HT.Href(text="Basic Table", url="#statstabs-1", Class="stats_tab"),HT.Href(text="Probability Plot", url="#statstabs-5", Class="stats_tab"), HT.Href(text="Bar Graph (by name)", url="#statstabs-3", Class="stats_tab"), HT.Href(text="Bar Graph (by rank)", url="#statstabs-4", Class="stats_tab"), HT.Href(text="Box Plot", url="#statstabs-2", Class="stats_tab")] - #stats_tabs = HT.List(stats_tab_list) - #stats_container.append(stats_tabs) - # - #table_div = HT.Div(id="statstabs-1") - #table_container = HT.Paragraph() - # - #statsTable = HT.TableLite(cellspacing=0, cellpadding=0, width="100%") if this_trait.dataset: if this_trait.cellid: @@ -1020,12 +825,6 @@ class ShowTrait(object): else: self.stats_data.append(BasicStatisticsFunctions.basicStatsTable(vals=vals)) - #statsTable.append(HT.TR(HT.TD(statsTableCell))) - - #table_container.append(statsTable) - #table_div.append(table_container) - #stats_container.append(table_div) - # #normalplot_div = HT.Div(id="statstabs-5") #normalplot_container = HT.Paragraph() #normalplot = HT.TableLite(cellspacing=0, cellpadding=0, width="100%") @@ -1043,49 +842,22 @@ class ShowTrait(object): #normally distributed. Different symbols represent different groups.",HT.BR(),HT.BR(), #"More about ", HT.Href(url="http://en.wikipedia.org/wiki/Normal_probability_plot", # target="_blank", text="Normal Probability Plots"), " and more about interpreting these plots from the ", HT.Href(url="/glossary.html#normal_probability", target="_blank", text="glossary")))) - #normalplot_container.append(normalplot) - #normalplot_div.append(normalplot_container) - #stats_container.append(normalplot_div) #boxplot_div = HT.Div(id="statstabs-2") #boxplot_container = HT.Paragraph() #boxplot = HT.TableLite(cellspacing=0, cellpadding=0, width="100%") #boxplot_img, boxplot_link = BasicStatisticsFunctions.plotBoxPlot(vals) #boxplot.append(HT.TR(HT.TD(boxplot_img, HT.P(), boxplot_link, align="left"))) - #boxplot_container.append(boxplot) - #boxplot_div.append(boxplot_container) - #stats_container.append(boxplot_div) - #barName_div = HT.Div(id="statstabs-3") #barName_container = HT.Paragraph() #barName = HT.TableLite(cellspacing=0, cellpadding=0, width="100%") #barName_img = BasicStatisticsFunctions.plotBarGraph(identification=fd.identification, group=fd.group, vals=vals, type="name") - #barName.append(HT.TR(HT.TD(barName_img))) - #barName_container.append(barName) - #barName_div.append(barName_container) - #stats_container.append(barName_div) - # + #barRank_div = HT.Div(id="statstabs-4") #barRank_container = HT.Paragraph() #barRank = HT.TableLite(cellspacing=0, cellpadding=0, width="100%") #barRank_img = BasicStatisticsFunctions.plotBarGraph(identification=fd.identification, group=fd.group, vals=vals, type="rank") - #barRank.append(HT.TR(HT.TD(barRank_img))) - #barRank_container.append(barRank) - #barRank_div.append(barRank_container) - #stats_container.append(barRank_div) - - # stats_cell.append(stats_container) - # - #stats_script.append(stats_script_text) - # - #submitTable = HT.TableLite(cellspacing=0, cellpadding=0, width="100%", Class="target2") - #stats_row.append(stats_cell) - - #submitTable.append(stats_row) - #submitTable.append(stats_script) - - #title2Body.append(submitTable) def build_correlation_tools(self, this_trait): @@ -1100,15 +872,6 @@ class ShowTrait(object): this_group = 'BXD' if this_group: - #sample_correlation = HT.Input(type='button',name='sample_corr', value=' Compute ', Class="button sample_corr") - #lit_correlation = HT.Input(type='button',name='lit_corr', value=' Compute ', Class="button lit_corr") - #tissue_correlation = HT.Input(type='button',name='tiss_corr', value=' Compute ', Class="button tiss_corr") - #methodText = HT.Span("Calculate:", Class="ffl fwb fs12") - # - #databaseText = HT.Span("Database:", Class="ffl fwb fs12") - #databaseMenu1 = HT.Select(name='database1') - #databaseMenu2 = HT.Select(name='database2') - #databaseMenu3 = HT.Select(name='database3') dataset_menu = [] print("[tape4] webqtlConfig.PUBLICTHRESH:", webqtlConfig.PUBLICTHRESH) @@ -1133,8 +896,6 @@ class ShowTrait(object): tissues = g.db.execute("SELECT Id, Name FROM Tissue order by Name") for item in tissues.fetchall(): tissue_id, tissue_name = item - #databaseMenuSub = HT.Optgroup(label = '%s ------' % tissue_name) - #dataset_sub_menu = [] data_sets = g.db.execute('''SELECT ProbeSetFreeze.FullName,ProbeSetFreeze.Name FROM ProbeSetFreeze, ProbeFreeze, InbredSet WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id and ProbeFreeze.TissueId = %s and ProbeSetFreeze.public > %s and ProbeFreeze.InbredSetId = InbredSet.Id and InbredSet.Name like %s @@ -1144,149 +905,15 @@ class ShowTrait(object): if dataset_sub_menu: dataset_menu.append(dict(tissue=tissue_name, datasets=dataset_sub_menu)) - # ("**heading**", tissue_name)) - #dataset_menu.append(dataset_sub_menu) dataset_menu_selected = None if len(dataset_menu): if this_trait and this_trait.dataset: dataset_menu_selected = this_trait.dataset.name - #criteriaText = HT.Span("Return:", Class="ffl fwb fs12") - - #criteriaMenu1 = HT.Select(name='criteria1', selected='500', onMouseOver="if (NS4 || IE4) activateEl('criterias', event);") - return_results_menu = (100, 200, 500, 1000, 2000, 5000, 10000, 15000, 20000) return_results_menu_selected = 500 - #criteriaMenu1.append(('top 100','100')) - #criteriaMenu1.append(('top 200','200')) - #criteriaMenu1.append(('top 500','500')) - #criteriaMenu1.append(('top 1000','1000')) - #criteriaMenu1.append(('top 2000','2000')) - #criteriaMenu1.append(('top 5000','5000')) - #criteriaMenu1.append(('top 10000','10000')) - #criteriaMenu1.append(('top 15000','15000')) - #criteriaMenu1.append(('top 20000','20000')) - - #self.MDPRow1 = HT.TR(Class='mdp1') - #self.MDPRow2 = HT.TR(Class='mdp2') - #self.MDPRow3 = HT.TR(Class='mdp3') - - # correlationMenus1 = HT.TableLite( - # HT.TR(HT.TD(databaseText), HT.TD(databaseMenu1, colspan="3")), - # HT.TR(HT.TD(criteriaText), HT.TD(criteriaMenu1)), - # self.MDPRow1, cellspacing=0, width="619px", cellpadding=2) - # correlationMenus1.append(HT.Input(name='orderBy', value='2', type='hidden')) # to replace the orderBy menu - # correlationMenus2 = HT.TableLite( - # HT.TR(HT.TD(databaseText), HT.TD(databaseMenu2, colspan="3")), - # HT.TR(HT.TD(criteriaText), HT.TD(criteriaMenu2)), - # self.MDPRow2, cellspacing=0, width="619px", cellpadding=2) - # correlationMenus2.append(HT.Input(name='orderBy', value='2', type='hidden')) - # correlationMenus3 = HT.TableLite( - # HT.TR(HT.TD(databaseText), HT.TD(databaseMenu3, colspan="3")), - # HT.TR(HT.TD(criteriaText), HT.TD(criteriaMenu3)), - # self.MDPRow3, cellspacing=0, width="619px", cellpadding=2) - # correlationMenus3.append(HT.Input(name='orderBy', value='2', type='hidden')) - # - #else: - # correlationMenus = "" - - - #corr_row = HT.TR() - #corr_container = HT.Div(id="corr_tabs", Class="ui-tabs") - # - #if (this_trait.dataset != None and this_trait.dataset.type =='ProbeSet'): - # corr_tab_list = [HT.Href(text='Sample r', url="#corrtabs-1"), - # HT.Href(text='Literature r', url="#corrtabs-2"), - # HT.Href(text='Tissue r', url="#corrtabs-3")] - #else: - # corr_tab_list = [HT.Href(text='Sample r', url="#corrtabs-1")] - # - #corr_tabs = HT.List(corr_tab_list) - #corr_container.append(corr_tabs) - - #if correlationMenus1 or correlationMenus2 or correlationMenus3: - #sample_div = HT.Div(id="corrtabs-1") - #sample_container = HT.Span() - # - #sample_type = HT.Input(type="radio", name="sample_method", value="1", checked="checked") - #sample_type2 = HT.Input(type="radio", name="sample_method", value="2") - # - #sampleTable = HT.TableLite(cellspacing=0, cellpadding=0, width="100%") - #sampleTD = HT.TD(correlationMenus1, HT.BR(), - # "Pearson", sample_type, " "*3, "Spearman Rank", sample_type2, HT.BR(), HT.BR(), - # sample_correlation, HT.BR(), HT.BR()) - # - #sampleTD.append(HT.Span("The ", - # HT.Href(url="/correlationAnnotation.html#sample_r", target="_blank", - # text="Sample Correlation")," is computed between trait data and", - # " any ",HT.BR()," other traits in the sample database selected above. Use ", - # HT.Href(url="/glossary.html#Correlations", target="_blank", text="Spearman Rank"), - # HT.BR(),"when the sample size is small (<20) or when there are influential \ - # outliers.", HT.BR(),Class="fs12")) - - #sampleTable.append(sampleTD) - - #sample_container.append(sampleTable) - #sample_div.append(sample_container) - #corr_container.append(sample_div) - # - #literature_div = HT.Div(id="corrtabs-2") - #literature_container = HT.Span() - - #literatureTable = HT.TableLite(cellspacing=0, cellpadding=0, width="100%") - #literatureTD = HT.TD(correlationMenus2,HT.BR(),lit_correlation, HT.BR(), HT.BR()) - #literatureTD.append(HT.Span("The ", HT.Href(url="/correlationAnnotation.html", target="_blank",text="Literature Correlation"), " (Lit r) between this gene and all other genes is computed",HT.BR(), - # "using the ", HT.Href(url="https://grits.eecs.utk.edu/sgo/sgo.html", target="_blank", text="Semantic Gene Organizer"), - # " and human, rat, and mouse data from PubMed. ", HT.BR(),"Values are ranked by Lit r, \ - # but Sample r and Tissue r are also displayed.", HT.BR(), HT.BR(), - # HT.Href(url="/glossary.html#Literature", target="_blank", text="More on using Lit r"), Class="fs12")) - #literatureTable.append(literatureTD) - # - #literature_container.append(literatureTable) - #literature_div.append(literature_container) - # - #if this_trait.dataset != None: - # if (this_trait.dataset.type =='ProbeSet'): - # corr_container.append(literature_div) - # - #tissue_div = HT.Div(id="corrtabs-3") - #tissue_container = HT.Span() - # - #tissue_type = HT.Input(type="radio", name="tissue_method", value="4", checked="checked") - #tissue_type2 = HT.Input(type="radio", name="tissue_method", value="5") - # - #tissueTable = HT.TableLite(cellspacing=0, cellpadding=0, width="100%") - #tissueTD = HT.TD(correlationMenus3,HT.BR(), - # "Pearson", tissue_type, " "*3, "Spearman Rank", tissue_type2, HT.BR(), HT.BR(), - # tissue_correlation, HT.BR(), HT.BR()) - #tissueTD.append(HT.Span("The ", HT.Href(url="/webqtl/main.py?FormID=tissueCorrelation", target="_blank", text="Tissue Correlation"), - #" (Tissue r) estimates the similarity of expression of two genes",HT.BR()," or \ - #transcripts across different cells, tissues, or organs (",HT.Href(url="/correlationAnnotation.html#tissue_r", target="_blank", text="glossary"),"). \ - #Tissue correlations",HT.BR()," are generated by analyzing expression in multiple samples usually taken from \ - #single cases.",HT.BR(),HT.Bold("Pearson")," and ",HT.Bold("Spearman Rank")," correlations have been computed for all pairs \ - #of genes",HT.BR()," using data from mouse samples.", - #HT.BR(), Class="fs12")) - #tissueTable.append(tissueTD) - # - #tissue_container.append(tissueTable) - #tissue_div.append(tissue_container) - #if this_trait.dataset != None: - # if (this_trait.dataset.type =='ProbeSet'): - # corr_container.append(tissue_div) - # - #corr_row.append(HT.TD(corr_container)) - # - #corr_script = HT.Script(language="Javascript") - #corr_script_text = """$(function() { $("#corr_tabs").tabs(); });""" - #corr_script.append(corr_script_text) - # - #submitTable = HT.TableLite(cellspacing=0, cellpadding=0, width="100%", Class="target4") - #submitTable.append(corr_row) - #submitTable.append(corr_script) - # - #title3Body.append(submitTable) self.corr_tools = dict(dataset_menu = dataset_menu, dataset_menu_selected = dataset_menu_selected, return_results_menu = return_results_menu, -- cgit v1.2.3 From 7eb9f5d12389df0ef7440a82df0be9bd1119847e Mon Sep 17 00:00:00 2001 From: Sam Date: Thu, 23 May 2013 00:22:54 +0000 Subject: Lots of changes around users --- wqflask/cfg/default_settings.py | 4 +++- wqflask/utility/formatting.py | 13 ++++++++++ wqflask/wqflask/model.py | 28 +++++++++++++++++++++- wqflask/wqflask/templates/admin/user_manager.html | 6 ++++- .../security/email/confirmation_instructions.html | 4 +++- .../security/email/confirmation_instructions.txt | 4 +++- .../wqflask/templates/security/email/welcome.html | 4 +++- .../wqflask/templates/security/email/welcome.txt | 4 +++- wqflask/wqflask/user_manager.py | 14 ++++++++++- wqflask/wqflask/views.py | 9 +++++-- 10 files changed, 80 insertions(+), 10 deletions(-) (limited to 'wqflask/utility') diff --git a/wqflask/cfg/default_settings.py b/wqflask/cfg/default_settings.py index d0713e4d..96f7f1a5 100644 --- a/wqflask/cfg/default_settings.py +++ b/wqflask/cfg/default_settings.py @@ -3,7 +3,7 @@ LOGFILE = """/tmp/flask_gn_log""" SERVER_PORT = 5000 #This is needed because Flask turns key errors into a -#400 bad request response with no exception/log +#400 bad request response with no exception/log TRAP_BAD_REQUEST_ERRORS = True # http://pythonhosted.org/Flask-Security/configuration.html @@ -11,3 +11,5 @@ SECURITY_CONFIRMABLE = True SECURITY_TRACKABLE = True SECURITY_REGISTERABLE = True SECURITY_RECOVERABLE = True + +SECURITY_EMAIL_SENDER = "no-reply@genenetwork.org" diff --git a/wqflask/utility/formatting.py b/wqflask/utility/formatting.py index 52173417..e53dda22 100644 --- a/wqflask/utility/formatting.py +++ b/wqflask/utility/formatting.py @@ -10,6 +10,14 @@ def numify(number, singular=None, plural=None): >>> numify(9, 'book', 'books') 'nine books' + You can add capitalize to change the capitalization + >>> numify(9, 'book', 'books').capitalize() + 'Nine books' + + Or capitalize every word using title + >>> numify(9, 'book', 'books').title() + 'Nine Books' + >>> numify(15) '15' @@ -107,3 +115,8 @@ def commify(n): if cents: out += '.' + cents return out + + +if __name__ == '__main__': + import doctest + doctest.testmod() diff --git a/wqflask/wqflask/model.py b/wqflask/wqflask/model.py index 9f9267d9..296f1f0d 100644 --- a/wqflask/wqflask/model.py +++ b/wqflask/wqflask/model.py @@ -8,6 +8,30 @@ from wqflask import app # Create database connection object db = SQLAlchemy(app) +# Is this right? -Sam +#from sqlalchemy.ext.declarative import declarative_base +#Base = declarative_base() + +#@classmethod +#def get(cls, key): +# """Convenience get method using the primary key +# +# If record doesn't exist, returns None +# +# Allows the following: User.get('121') +# +# """ +# print("in get cls is:", cls) +# print(" key is {} : {}".format(type(key), key)) +# query = db.Model.query(cls) +# print("query is: ", query) +# record = query.get(key) +# return record +# +# +#print("db.Model is:", vars(db.Model)) +#db.Model.get = get + # Define models roles_users = db.Table('roles_users', db.Column('user_id', db.Integer(), db.ForeignKey('user.id')), @@ -38,5 +62,7 @@ class User(db.Model, UserMixin): user_datastore = SQLAlchemyUserDatastore(db, User, Role) security = Security(app, user_datastore) - db.metadata.create_all(db.engine) + + + diff --git a/wqflask/wqflask/templates/admin/user_manager.html b/wqflask/wqflask/templates/admin/user_manager.html index c49f0d94..14cd12e0 100644 --- a/wqflask/wqflask/templates/admin/user_manager.html +++ b/wqflask/wqflask/templates/admin/user_manager.html @@ -16,6 +16,7 @@ + @@ -23,7 +24,10 @@ {% for user in users %} - + + diff --git a/wqflask/wqflask/templates/security/email/confirmation_instructions.html b/wqflask/wqflask/templates/security/email/confirmation_instructions.html index 5082a9a8..239f670f 100644 --- a/wqflask/wqflask/templates/security/email/confirmation_instructions.html +++ b/wqflask/wqflask/templates/security/email/confirmation_instructions.html @@ -1,3 +1,5 @@ +

Welcome to GeneNetwork!

+

Please confirm your email through the link below:

-

Confirm my account

\ No newline at end of file +

Confirm my account

diff --git a/wqflask/wqflask/templates/security/email/confirmation_instructions.txt b/wqflask/wqflask/templates/security/email/confirmation_instructions.txt index fb435b55..babedd8b 100644 --- a/wqflask/wqflask/templates/security/email/confirmation_instructions.txt +++ b/wqflask/wqflask/templates/security/email/confirmation_instructions.txt @@ -1,3 +1,5 @@ +Welcome to GeneNetwork! + Please confirm your email through the link below: -{{ confirmation_link }} \ No newline at end of file +{{ confirmation_link }} diff --git a/wqflask/wqflask/templates/security/email/welcome.html b/wqflask/wqflask/templates/security/email/welcome.html index 55eaed61..3cb01ce0 100644 --- a/wqflask/wqflask/templates/security/email/welcome.html +++ b/wqflask/wqflask/templates/security/email/welcome.html @@ -1,7 +1,9 @@

Welcome {{ user.email }}!

+

We hope you find GeneNetwork an amazing resource!

+ {% if security.confirmable %}

You can confirm your email through the link below:

Confirm my account

-{% endif %} \ No newline at end of file +{% endif %} diff --git a/wqflask/wqflask/templates/security/email/welcome.txt b/wqflask/wqflask/templates/security/email/welcome.txt index fb6ee5b5..9a400686 100644 --- a/wqflask/wqflask/templates/security/email/welcome.txt +++ b/wqflask/wqflask/templates/security/email/welcome.txt @@ -1,7 +1,9 @@ Welcome {{ user.email }}! +We hope you find GeneNetwork an amazing resource! + {% if security.confirmable %} You can confirm your email through the link below: {{ confirmation_link }} -{% endif %} \ No newline at end of file +{% endif %} diff --git a/wqflask/wqflask/user_manager.py b/wqflask/wqflask/user_manager.py index 4d608dc7..97ba18a3 100644 --- a/wqflask/wqflask/user_manager.py +++ b/wqflask/wqflask/user_manager.py @@ -11,12 +11,24 @@ from wqflask import model from flask import Flask, g +from pprint import pformat as pf + #from app import db print("globals are:", globals()) -class UserManager(object): +class UsersManager(object): def __init__(self): self.users = model.User.query.all() print("Users are:", self.users) + + +class UserManager(object): + def __init__(self, kw): + self.user_id = int(kw['user_id']) + print("In UserManager locals are:", pf(locals())) + #self.user = model.User.get(user_id) + #print("user is:", user) + self.user = model.User.query.get(self.user_id) + print("user is:", self.user) diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 63781c73..b59c3b34 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -239,11 +239,16 @@ def get_temp_data(): temp_uuid = request.args['key'] return flask.jsonify(temp_data.TempData(temp_uuid).get_all()) -@app.route("/users") +@app.route("/manage/users") def manage_users(): - template_vars = user_manager.UserManager() + template_vars = user_manager.UsersManager() return render_template("admin/user_manager.html", **template_vars.__dict__) +@app.route("/manage/user") +def manage_user(): + template_vars = user_manager.UserManager(request.args) + return render_template("admin/ind_user_manager.html", **template_vars.__dict__) + def json_default_handler(obj): '''Based on http://stackoverflow.com/a/2680060/1175849''' -- cgit v1.2.3 From 25bd2fa7ac229eb7862fe778fe03eb75ff34368c Mon Sep 17 00:00:00 2001 From: Lei Yan Date: Thu, 13 Jun 2013 21:13:51 +0000 Subject: Fixed issue where too much memory was used as a result of creating a dataset object for each trait in the correlation results Added new fields/columns for each trait in the correlation result table (max LRS, max LRS location, mean expression) Fixed error if trait doesn't have these fields --- wqflask/base/data_set.py | 30 +++---- wqflask/base/trait.py | 27 +++++-- wqflask/utility/helper_functions.py | 2 +- wqflask/wqflask/correlation/show_corr_results.py | 99 +++++++++++------------- wqflask/wqflask/search_results.py | 2 +- 5 files changed, 83 insertions(+), 77 deletions(-) (limited to 'wqflask/utility') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 0c7676c4..0903bf16 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -90,8 +90,8 @@ class Markers(object): self.markers = json.load(json_data_fh) def add_pvalues(self, p_values): - print("length of self.markers:", len(self.markers)) - print("length of p_values:", len(p_values)) + #print("length of self.markers:", len(self.markers)) + #print("length of p_values:", len(p_values)) # THIS IS only needed for the case when we are limiting the number of p-values calculated if len(self.markers) < len(p_values): @@ -161,7 +161,7 @@ class DatasetGroup(object): self.f1list = None self.parlist = None self.get_f1_parent_strains() - print("parents/f1s: {}:{}".format(self.parlist, self.f1list)) + #print("parents/f1s: {}:{}".format(self.parlist, self.f1list)) self.species = webqtlDatabaseFunction.retrieve_species(self.name) @@ -170,7 +170,7 @@ class DatasetGroup(object): def get_markers(self): - print("self.species is:", self.species) + #print("self.species is:", self.species) if self.species == "human": marker_class = HumanMarkers else: @@ -293,14 +293,14 @@ class DataSet(object): self.name, self.name, self.name)) - print("query_args are:", query_args) + #print("query_args are:", query_args) - print(""" - SELECT Id, Name, FullName, ShortName - FROM %s - WHERE public > %s AND - (Name = '%s' OR FullName = '%s' OR ShortName = '%s') - """ % (query_args)) + #print(""" + # SELECT Id, Name, FullName, ShortName + # FROM %s + # WHERE public > %s AND + # (Name = '%s' OR FullName = '%s' OR ShortName = '%s') + # """ % (query_args)) self.id, self.name, self.fullname, self.shortname = g.db.execute(""" SELECT Id, Name, FullName, ShortName @@ -624,12 +624,12 @@ class MrnaAssayDataSet(DataSet): and ProbeSetFreezeId = {} """.format(escape(str(self.id))) results = g.db.execute(query).fetchall() - print("After get_trait_list query") + #print("After get_trait_list query") trait_data = {} for trait in results: print("Retrieving sample_data for ", trait[0]) trait_data[trait[0]] = self.retrieve_sample_data(trait[0]) - print("After retrieve_sample_data") + #print("After retrieve_sample_data") return trait_data def get_trait_data(self): @@ -763,7 +763,7 @@ class MrnaAssayDataSet(DataSet): """ % (escape(str(this_trait.dataset.id)), escape(this_trait.name))) - print("query is:", pf(query)) + #print("query is:", pf(query)) result = g.db.execute(query).fetchone() @@ -926,7 +926,7 @@ class TempDataSet(DataSet): def geno_mrna_confidentiality(ob): dataset_table = ob.type + "Freeze" - print("dataset_table [%s]: %s" % (type(dataset_table), dataset_table)) + #print("dataset_table [%s]: %s" % (type(dataset_table), dataset_table)) query = '''SELECT Id, Name, FullName, confidentiality, AuthorisedUsers FROM %s WHERE Name = %%s''' % (dataset_table) diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index 53f41779..f333d5a7 100755 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -1,6 +1,8 @@ from __future__ import absolute_import, division, print_function import string +import resource + from htmlgen import HTMLgen2 as HT @@ -15,6 +17,10 @@ from pprint import pformat as pf from flask import Flask, g +def print_mem(stage=""): + mem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + print("{}: {}".format(stage, mem/1024)) + class GeneralTrait(object): """ Trait class defines a trait in webqtl, can be either Microarray, @@ -23,8 +29,12 @@ class GeneralTrait(object): """ def __init__(self, **kw): - #print("in GeneralTrait") - self.dataset = kw.get('dataset') # database name + # xor assertion + assert bool(kw.get('dataset')) != bool(kw.get('dataset_name')), "Needs dataset ob. xor name"; + if kw.get('dataset_name'): + self.dataset = create_dataset(kw.get('dataset_name')) + else: + self.dataset = kw.get('dataset') self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc. self.cellid = kw.get('cellid') self.identification = kw.get('identification', 'un-named trait') @@ -39,8 +49,6 @@ class GeneralTrait(object): # self.cellid is set to None above elif len(name2) == 3: self.dataset, self.name, self.cellid = name2 - - self.dataset = create_dataset(self.dataset) # Todo: These two lines are necessary most of the time, but perhaps not all of the time # So we could add a simple if statement to short-circuit this if necessary @@ -355,8 +363,17 @@ class GeneralTrait(object): #traitQTL = self.cursor.fetchone() if traitQTL: self.locus, self.lrs, self.pvalue, self.mean = traitQTL + if self.locus: + result = g.db.execute(""" + select Geno.Chr, Geno.Mb from Geno, Species + where Species.Name = '%s' and + Geno.Name = '%s' and + Geno.SpeciesId = Species.Id + """, (species, self.locus)).fetchone() + self.locus_chr = result[0] + self.locus_mb = result[1] else: - self.locus = self.lrs = self.pvalue = self.mean = "" + self.locus = self.locus_chr = self.locus_mb = self.lrs = self.pvalue = self.mean = "" if self.dataset.type == 'Publish': traitQTL = g.db.execute(""" SELECT diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py index 28242c27..d76a32ce 100644 --- a/wqflask/utility/helper_functions.py +++ b/wqflask/utility/helper_functions.py @@ -9,7 +9,7 @@ def get_species_dataset_trait(self, start_vars): #assert type(read_genotype) == type(bool()), "Expecting boolean value for read_genotype" self.dataset = data_set.create_dataset(start_vars['dataset']) self.species = TheSpecies(dataset=self.dataset) - self.this_trait = GeneralTrait(dataset=self.dataset.name, + self.this_trait = GeneralTrait(dataset=self.dataset, name=start_vars['trait_id'], cellid=None) diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index 96c0155b..3b8b7ba2 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -92,11 +92,6 @@ class CorrelationResults(object): # #RANK_ORDERS = {"1": 0, "2": 1, "3": 0, "4": 0, "5": 1} - - #def error(self, message, *args, **kw): - # heading = heading or self.PAGE_HEADING - # return templatePage.error(heading = heading, detail = [message], error=error) - def __init__(self, start_vars): # get trait list from db (database name) # calculate correlation with Base vector and targets @@ -104,10 +99,8 @@ class CorrelationResults(object): #self.this_trait = GeneralTrait(dataset=self.dataset.name, # name=start_vars['trait_id'], # cellid=None) - #print("start_vars: ", pf(start_vars)) with Bench("Doing correlations"): - print_mem("At beginning") helper_functions.get_species_dataset_trait(self, start_vars) self.dataset.group.read_genotype_file() @@ -138,7 +131,6 @@ class CorrelationResults(object): self.correlation_data = {} - print_mem("Before calculating correlations") for trait, values in self.target_dataset.trait_data.iteritems(): this_trait_values = [] target_values = [] @@ -150,63 +142,60 @@ class CorrelationResults(object): target_values.append(target_sample_value) this_trait_values, target_values = normalize_values(this_trait_values, target_values) - + if self.corr_method == 'pearson': sample_r, sample_p = scipy.stats.pearsonr(this_trait_values, target_values) else: sample_r, sample_p = scipy.stats.spearmanr(this_trait_values, target_values) - + self.correlation_data[trait] = [sample_r, sample_p] - - print_mem("After calculating correlations") - + self.correlation_data = collections.OrderedDict(sorted(self.correlation_data.items(), key=lambda t: -abs(t[1][0]))) - + self.correlation_data_slice = collections.OrderedDict() - - old_memory_usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss - + for trait_counter, trait in enumerate(self.correlation_data.keys()[:300]): - print_mem("In trait info loop") - print("\nTrait #:", trait_counter) - print_mem("Before trait_object") - trait_object = GeneralTrait(dataset=self.dataset.name, name=trait) - print_mem("After trait object") - trait_info = dict( - correlation = float(self.correlation_data[trait][0]), - p_value = float(self.correlation_data[trait][1]), - symbol = trait_object.symbol, - alias = trait_object.alias, - description = trait_object.description, - chromosome = trait_object.chr, - mb = trait_object.mb - ) - print_mem("Before deleting trait object") - del trait_object - print_mem("After deleting trait object") - gc.collect() - print_mem("After colleting garabage") - print("** trait_info:", pf(trait_info)) - print("\n** Start trait_info") - counter = 1 - for key, value in trait_info.iteritems(): - print(" <{}> [{}] {}: [{}] {}\n".format( - counter, type(key), key, type(value), value)) - counter += 1 - print("** Done trait_info") + trait_object = GeneralTrait(dataset=self.dataset, name=trait) + if self.dataset.type == 'ProbeSet': + trait_info = collections.OrderedDict( + correlation = float(self.correlation_data[trait][0]), + p_value = float(self.correlation_data[trait][1]), + symbol = trait_object.symbol, + alias = trait_object.alias, + description = trait_object.description, + chromosome = trait_object.chr, + mb = trait_object.mb + ) + if hasattr(trait_object, 'mean'): + trait_info[mean] = trait_object.mean + if hasattr(trait_object, 'lrs'): + trait_info[lrs] = trait_object.lrs + if hasattr(trait_object, 'locus_chr'): + trait_info[locus_chr] = trait_object.locus_chr + if hasattr(trait_object, 'locus_mb'): + trait_info[locus_mb] = trait_object.locus_mb + elif self.dataset.type == 'Geno': + trait_info = collections.OrderedDict( + correlation = float(self.correlation_data[trait][0]), + p_value = float(self.correlation_data[trait][1]), + symbol = trait_object.symbol, + alias = trait_object.alias, + description = trait_object.description, + chromosome = trait_object.chr, + mb = trait_object.mb + ) + else: # 'Publish' + trait_info = collections.OrderedDict( + correlation = float(self.correlation_data[trait][0]), + p_value = float(self.correlation_data[trait][1]), + symbol = trait_object.symbol, + alias = trait_object.alias, + description = trait_object.description, + chromosome = trait_object.chr, + mb = trait_object.mb + ) self.correlation_data_slice[trait] = trait_info - #self.correlation_data_slice[trait].append(trait_object) - - new_memory_usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss - print("Memory difference:", new_memory_usage-old_memory_usage) - old_memory_usage = new_memory_usage - print_mem("End of purple loop") - print("*************************** End purple ******** ") - - print_mem("After getting trait info") - print("Garbage colleting...") - gc.collect() #XZ, 09/18/2008: get all information about the user selected database. #target_db_name = fd.corr_dataset diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index dc872a8b..e171f1ab 100644 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -106,7 +106,7 @@ class SearchResultPage(object): print("foo locals are:", locals()) trait_id = result[0] - this_trait = GeneralTrait(dataset=self.dataset.name, name=trait_id) + this_trait = GeneralTrait(dataset=self.dataset, name=trait_id) this_trait.retrieve_info(QTL=True) self.trait_list.append(this_trait) -- cgit v1.2.3 From 0c5e75b42fb02faa2f2735aa4e188d32b9691101 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Thu, 20 Jun 2013 21:47:59 +0000 Subject: Added file corr_result_helpers.py that includes the function that normalizes the lists of sample values --- wqflask/utility/corr_result_helpers.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 wqflask/utility/corr_result_helpers.py (limited to 'wqflask/utility') diff --git a/wqflask/utility/corr_result_helpers.py b/wqflask/utility/corr_result_helpers.py new file mode 100644 index 00000000..edf32449 --- /dev/null +++ b/wqflask/utility/corr_result_helpers.py @@ -0,0 +1,30 @@ +def normalize_values(a_values, b_values): + """ + Trim two lists of values to contain only the values they both share + + Given two lists of sample values, trim each list so that it contains + only the samples that contain a value in both lists. Also returns + the number of such samples. + + >>> normalize_values([2.3, None, None, 3.2, 4.1, 5], [3.4, 7.2, 1.3, None, 6.2, 4.1]) + ([2.3, 4.1, 5], [3.4, 6.2, 4.1], 3) + + """ + + min_length = min(len(a_values), len(b_values)) + a_new = [] + b_new = [] + for counter in range(min_length): + if a_values[counter] and b_values[counter]: + a_new.append(a_values[counter]) + b_new.append(b_values[counter]) + + num_overlap = len(a_new) + assert num_overlap == len(b_new), "Lengths should be the same" + + return a_new, b_new, num_overlap + + +if __name__ == '__main__': + import doctest + doctest.testmod() \ No newline at end of file -- cgit v1.2.3
ID Email Confirmed at Active
{{ user.email }} + {{ user.id }} + {{ user.email }} {{ user.confirmed_at }} {{ user.active }}