From 3b047d589fa89ddd9aff852d4f2a00c580eb7243 Mon Sep 17 00:00:00 2001 From: Lei Yan Date: Tue, 11 Jun 2013 23:24:38 +0000 Subject: Put trait info for correlation results page into a dictionary instead of storing list of GeneralTrait objects Added print statements to track memory usage --- misc/notes.txt | 3 +- wqflask/base/data_set.py | 2 +- wqflask/base/trait.py | 6 +- wqflask/wqflask/correlation/show_corr_results.py | 71 ++++++++++++++++++++++-- wqflask/wqflask/templates/correlation_page.html | 15 ++++- wqflask/wqflask/views.py | 9 ++- 6 files changed, 91 insertions(+), 15 deletions(-) diff --git a/misc/notes.txt b/misc/notes.txt index 4e478345..678e71b8 100644 --- a/misc/notes.txt +++ b/misc/notes.txt @@ -190,7 +190,8 @@ tidyp -m -i -w 100 index_page.html =========================================== -ps -ax - View processes +ps ax - View processes +ps ux (the "u" is for user) kill (process #) diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 4c5c46a5..0c7676c4 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -809,7 +809,7 @@ class MrnaAssayDataSet(DataSet): this_trait.LRS_location_repr = 'Chr %s: %.4f Mb' % (lrs_chr, float(lrs_mb)) - def convert_location_to_value(chromosome, mb): + def convert_location_to_value(self, chromosome, mb): try: location_value = int(chromosome)*1000 + float(mb) except ValueError: diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index 5fde114f..53f41779 100755 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -23,7 +23,7 @@ class GeneralTrait(object): """ def __init__(self, **kw): - print("in GeneralTrait") + #print("in GeneralTrait") self.dataset = kw.get('dataset') # database name self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc. self.cellid = kw.get('cellid') @@ -269,7 +269,7 @@ class GeneralTrait(object): escape(self.dataset.name), escape(self.name)) traitInfo = g.db.execute(query).fetchone() - print("traitInfo is: ", pf(traitInfo)) + #print("traitInfo is: ", pf(traitInfo)) #XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name # to avoid the problem of same marker name from different species. elif self.dataset.type == 'Geno': @@ -287,7 +287,7 @@ class GeneralTrait(object): escape(self.dataset.name), escape(self.name)) traitInfo = g.db.execute(query).fetchone() - print("traitInfo is: ", pf(traitInfo)) + #print("traitInfo is: ", pf(traitInfo)) else: #Temp type query = """SELECT %s FROM %s WHERE Name = %s """ % (string.join(self.dataset.display_fields,','), diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index 5d40c835..96c0155b 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -29,6 +29,7 @@ from __future__ import absolute_import, print_function, division +import gc import string import cPickle import os @@ -37,6 +38,7 @@ import time import pp import math import collections +import resource import scipy @@ -69,7 +71,13 @@ TISSUE_METHODS = [METHOD_TISSUE_PEARSON, METHOD_TISSUE_RANK] TISSUE_MOUSE_DB = 1 -class AuthException(Exception): pass +def print_mem(stage=""): + mem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + print("{}: {}".format(stage, mem/1024)) + + +class AuthException(Exception): + pass class CorrelationResults(object): @@ -99,6 +107,7 @@ class CorrelationResults(object): #print("start_vars: ", pf(start_vars)) with Bench("Doing correlations"): + print_mem("At beginning") helper_functions.get_species_dataset_trait(self, start_vars) self.dataset.group.read_genotype_file() @@ -126,8 +135,10 @@ class CorrelationResults(object): self.target_dataset = data_set.create_dataset(start_vars['corr_dataset']) self.target_dataset.get_trait_data() - + + self.correlation_data = {} + print_mem("Before calculating correlations") for trait, values in self.target_dataset.trait_data.iteritems(): this_trait_values = [] target_values = [] @@ -139,15 +150,63 @@ class CorrelationResults(object): target_values.append(target_sample_value) this_trait_values, target_values = normalize_values(this_trait_values, target_values) + if self.corr_method == 'pearson': sample_r, sample_p = scipy.stats.pearsonr(this_trait_values, target_values) else: sample_r, sample_p = scipy.stats.spearmanr(this_trait_values, target_values) + self.correlation_data[trait] = [sample_r, sample_p] - self.correlation_data = collections.OrderedDict( - sorted(self.correlation_data.items(), - key=lambda t: -abs(t[1][0]))) - + + print_mem("After calculating correlations") + + self.correlation_data = collections.OrderedDict(sorted(self.correlation_data.items(), + key=lambda t: -abs(t[1][0]))) + + self.correlation_data_slice = collections.OrderedDict() + + old_memory_usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + + for trait_counter, trait in enumerate(self.correlation_data.keys()[:300]): + print_mem("In trait info loop") + print("\nTrait #:", trait_counter) + print_mem("Before trait_object") + trait_object = GeneralTrait(dataset=self.dataset.name, name=trait) + print_mem("After trait object") + trait_info = dict( + correlation = float(self.correlation_data[trait][0]), + p_value = float(self.correlation_data[trait][1]), + symbol = trait_object.symbol, + alias = trait_object.alias, + description = trait_object.description, + chromosome = trait_object.chr, + mb = trait_object.mb + ) + print_mem("Before deleting trait object") + del trait_object + print_mem("After deleting trait object") + gc.collect() + print_mem("After colleting garabage") + print("** trait_info:", pf(trait_info)) + print("\n** Start trait_info") + counter = 1 + for key, value in trait_info.iteritems(): + print(" <{}> [{}] {}: [{}] {}\n".format( + counter, type(key), key, type(value), value)) + counter += 1 + print("** Done trait_info") + self.correlation_data_slice[trait] = trait_info + #self.correlation_data_slice[trait].append(trait_object) + + new_memory_usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + print("Memory difference:", new_memory_usage-old_memory_usage) + old_memory_usage = new_memory_usage + print_mem("End of purple loop") + print("*************************** End purple ******** ") + + print_mem("After getting trait info") + print("Garbage colleting...") + gc.collect() #XZ, 09/18/2008: get all information about the user selected database. #target_db_name = fd.corr_dataset diff --git a/wqflask/wqflask/templates/correlation_page.html b/wqflask/wqflask/templates/correlation_page.html index 68fe81ed..b06f7096 100644 --- a/wqflask/wqflask/templates/correlation_page.html +++ b/wqflask/wqflask/templates/correlation_page.html @@ -24,14 +24,23 @@ Sample rho Sample p(rho) {% endif %} + Symbol + Alias + Description + Location - {% for trait in correlation_data %} + {% for trait in correlation_data_slice %} {{ trait }} - {{ correlation_data[trait][0] }} - {{ correlation_data[trait][1] }} + {{ correlation_data_slice[trait].correlation }} + {{ correlation_data_slice[trait].p_value }} + {{ correlation_data_slice[trait].symbol }} + {{ correlation_data_slice[trait].alias }} + {{ correlation_data_slice[trait].description }} + Chr{{ correlation_data_slice[trait].chromosome }}: {{ correlation_data_slice[trait].mb }} + {# {{ correlation_data_slice[trait][2].__dict__ }} #} {% endfor %} diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 8531561a..102863f0 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -3,6 +3,8 @@ from __future__ import absolute_import, division, print_function import csv import StringIO # Todo: Use cStringIO? +import gc + import cPickle as pickle import simplejson as json @@ -251,4 +253,9 @@ def json_default_handler(obj): # return None else: raise TypeError, 'Object of type %s with value of %s is not JSON serializable' % ( - type(obj), repr(obj)) \ No newline at end of file + type(obj), repr(obj)) + + +#@app.after_request +#def after_request(response): +# gc.collect() \ No newline at end of file -- cgit v1.2.3