aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLei Yan2013-06-11 23:24:38 +0000
committerLei Yan2013-06-11 23:24:38 +0000
commit3b047d589fa89ddd9aff852d4f2a00c580eb7243 (patch)
tree09d236010018ac536a9b6c7ab7f36806d0eeb2de
parent466be48f92d4943995c7a3e7bcb9fd1efd775bf6 (diff)
downloadgenenetwork2-3b047d589fa89ddd9aff852d4f2a00c580eb7243.tar.gz
Put trait info for correlation results page into a dictionary instead
of storing list of GeneralTrait objects Added print statements to track memory usage
-rw-r--r--misc/notes.txt3
-rwxr-xr-xwqflask/base/data_set.py2
-rwxr-xr-xwqflask/base/trait.py6
-rw-r--r--wqflask/wqflask/correlation/show_corr_results.py71
-rw-r--r--wqflask/wqflask/templates/correlation_page.html15
-rw-r--r--wqflask/wqflask/views.py9
6 files changed, 91 insertions, 15 deletions
diff --git a/misc/notes.txt b/misc/notes.txt
index 4e478345..678e71b8 100644
--- a/misc/notes.txt
+++ b/misc/notes.txt
@@ -190,7 +190,8 @@ tidyp -m -i -w 100 index_page.html
===========================================
-ps -ax - View processes
+ps ax - View processes
+ps ux (the "u" is for user)
kill (process #)
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 4c5c46a5..0c7676c4 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -809,7 +809,7 @@ class MrnaAssayDataSet(DataSet):
this_trait.LRS_location_repr = 'Chr %s: %.4f Mb' % (lrs_chr, float(lrs_mb))
- def convert_location_to_value(chromosome, mb):
+ def convert_location_to_value(self, chromosome, mb):
try:
location_value = int(chromosome)*1000 + float(mb)
except ValueError:
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index 5fde114f..53f41779 100755
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -23,7 +23,7 @@ class GeneralTrait(object):
"""
def __init__(self, **kw):
- print("in GeneralTrait")
+ #print("in GeneralTrait")
self.dataset = kw.get('dataset') # database name
self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc.
self.cellid = kw.get('cellid')
@@ -269,7 +269,7 @@ class GeneralTrait(object):
escape(self.dataset.name),
escape(self.name))
traitInfo = g.db.execute(query).fetchone()
- print("traitInfo is: ", pf(traitInfo))
+ #print("traitInfo is: ", pf(traitInfo))
#XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name
# to avoid the problem of same marker name from different species.
elif self.dataset.type == 'Geno':
@@ -287,7 +287,7 @@ class GeneralTrait(object):
escape(self.dataset.name),
escape(self.name))
traitInfo = g.db.execute(query).fetchone()
- print("traitInfo is: ", pf(traitInfo))
+ #print("traitInfo is: ", pf(traitInfo))
else: #Temp type
query = """SELECT %s FROM %s WHERE Name = %s
""" % (string.join(self.dataset.display_fields,','),
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index 5d40c835..96c0155b 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -29,6 +29,7 @@
from __future__ import absolute_import, print_function, division
+import gc
import string
import cPickle
import os
@@ -37,6 +38,7 @@ import time
import pp
import math
import collections
+import resource
import scipy
@@ -69,7 +71,13 @@ TISSUE_METHODS = [METHOD_TISSUE_PEARSON, METHOD_TISSUE_RANK]
TISSUE_MOUSE_DB = 1
-class AuthException(Exception): pass
+def print_mem(stage=""):
+ mem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+ print("{}: {}".format(stage, mem/1024))
+
+
+class AuthException(Exception):
+ pass
class CorrelationResults(object):
@@ -99,6 +107,7 @@ class CorrelationResults(object):
#print("start_vars: ", pf(start_vars))
with Bench("Doing correlations"):
+ print_mem("At beginning")
helper_functions.get_species_dataset_trait(self, start_vars)
self.dataset.group.read_genotype_file()
@@ -126,8 +135,10 @@ class CorrelationResults(object):
self.target_dataset = data_set.create_dataset(start_vars['corr_dataset'])
self.target_dataset.get_trait_data()
-
+
+
self.correlation_data = {}
+ print_mem("Before calculating correlations")
for trait, values in self.target_dataset.trait_data.iteritems():
this_trait_values = []
target_values = []
@@ -139,15 +150,63 @@ class CorrelationResults(object):
target_values.append(target_sample_value)
this_trait_values, target_values = normalize_values(this_trait_values, target_values)
+
if self.corr_method == 'pearson':
sample_r, sample_p = scipy.stats.pearsonr(this_trait_values, target_values)
else:
sample_r, sample_p = scipy.stats.spearmanr(this_trait_values, target_values)
+
self.correlation_data[trait] = [sample_r, sample_p]
- self.correlation_data = collections.OrderedDict(
- sorted(self.correlation_data.items(),
- key=lambda t: -abs(t[1][0])))
-
+
+ print_mem("After calculating correlations")
+
+ self.correlation_data = collections.OrderedDict(sorted(self.correlation_data.items(),
+ key=lambda t: -abs(t[1][0])))
+
+ self.correlation_data_slice = collections.OrderedDict()
+
+ old_memory_usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+
+ for trait_counter, trait in enumerate(self.correlation_data.keys()[:300]):
+ print_mem("In trait info loop")
+ print("\nTrait #:", trait_counter)
+ print_mem("Before trait_object")
+ trait_object = GeneralTrait(dataset=self.dataset.name, name=trait)
+ print_mem("After trait object")
+ trait_info = dict(
+ correlation = float(self.correlation_data[trait][0]),
+ p_value = float(self.correlation_data[trait][1]),
+ symbol = trait_object.symbol,
+ alias = trait_object.alias,
+ description = trait_object.description,
+ chromosome = trait_object.chr,
+ mb = trait_object.mb
+ )
+ print_mem("Before deleting trait object")
+ del trait_object
+ print_mem("After deleting trait object")
+ gc.collect()
+ print_mem("After colleting garabage")
+ print("** trait_info:", pf(trait_info))
+ print("\n** Start trait_info")
+ counter = 1
+ for key, value in trait_info.iteritems():
+ print(" <{}> [{}] {}: [{}] {}\n".format(
+ counter, type(key), key, type(value), value))
+ counter += 1
+ print("** Done trait_info")
+ self.correlation_data_slice[trait] = trait_info
+ #self.correlation_data_slice[trait].append(trait_object)
+
+ new_memory_usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+ print("Memory difference:", new_memory_usage-old_memory_usage)
+ old_memory_usage = new_memory_usage
+ print_mem("End of purple loop")
+ print("*************************** End purple ******** ")
+
+ print_mem("After getting trait info")
+ print("Garbage colleting...")
+ gc.collect()
#XZ, 09/18/2008: get all information about the user selected database.
#target_db_name = fd.corr_dataset
diff --git a/wqflask/wqflask/templates/correlation_page.html b/wqflask/wqflask/templates/correlation_page.html
index 68fe81ed..b06f7096 100644
--- a/wqflask/wqflask/templates/correlation_page.html
+++ b/wqflask/wqflask/templates/correlation_page.html
@@ -24,14 +24,23 @@
<th>Sample rho</th>
<th>Sample p(rho)</th>
{% endif %}
+ <th>Symbol</th>
+ <th>Alias</th>
+ <th>Description</th>
+ <th>Location</th>
</tr>
</thead>
<tbody>
- {% for trait in correlation_data %}
+ {% for trait in correlation_data_slice %}
<tr>
<td>{{ trait }}</td>
- <td>{{ correlation_data[trait][0] }}</td>
- <td>{{ correlation_data[trait][1] }}</td>
+ <td>{{ correlation_data_slice[trait].correlation }}</td>
+ <td>{{ correlation_data_slice[trait].p_value }}</td>
+ <td>{{ correlation_data_slice[trait].symbol }}</td>
+ <td>{{ correlation_data_slice[trait].alias }}</td>
+ <td>{{ correlation_data_slice[trait].description }}</td>
+ <td>Chr{{ correlation_data_slice[trait].chromosome }}: {{ correlation_data_slice[trait].mb }}</td>
+ {# <td>{{ correlation_data_slice[trait][2].__dict__ }}</td> #}
</tr>
{% endfor %}
</tbody>
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index 8531561a..102863f0 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -3,6 +3,8 @@ from __future__ import absolute_import, division, print_function
import csv
import StringIO # Todo: Use cStringIO?
+import gc
+
import cPickle as pickle
import simplejson as json
@@ -251,4 +253,9 @@ def json_default_handler(obj):
# return None
else:
raise TypeError, 'Object of type %s with value of %s is not JSON serializable' % (
- type(obj), repr(obj)) \ No newline at end of file
+ type(obj), repr(obj))
+
+
+#@app.after_request
+#def after_request(response):
+# gc.collect() \ No newline at end of file