Put trait info for correlation results page into a dictionary instead

of storing list of GeneralTrait objects Added print statements to track memory usage
author: Lei Yan 2013-06-11 23:24:38 +0000
committer: Lei Yan 2013-06-11 23:24:38 +0000
commit: 3b047d589fa89ddd9aff852d4f2a00c580eb7243 (patch)
tree: 09d236010018ac536a9b6c7ab7f36806d0eeb2de
parent: 466be48f92d4943995c7a3e7bcb9fd1efd775bf6 (diff)
download: genenetwork2-3b047d589fa89ddd9aff852d4f2a00c580eb7243.tar.gz
6 files changed, 91 insertions, 15 deletions
diff --git a/misc/notes.txt b/misc/notes.txt
index 4e478345..678e71b8 100644
--- a/misc/notes.txt
+++ b/misc/notes.txt
@@ -190,7 +190,8 @@ tidyp -m -i -w 100 index_page.html
 
 ===========================================
 
-ps -ax - View processes
+ps ax - View processes
+ps ux (the "u" is for user)
 
 kill (process #)
 
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 4c5c46a5..0c7676c4 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -809,7 +809,7 @@ class MrnaAssayDataSet(DataSet):
                     this_trait.LRS_location_repr = 'Chr %s: %.4f Mb' % (lrs_chr, float(lrs_mb))
       
 
-    def convert_location_to_value(chromosome, mb):
+    def convert_location_to_value(self, chromosome, mb):
         try:
             location_value = int(chromosome)*1000 + float(mb)
         except ValueError:
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index 5fde114f..53f41779 100755
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -23,7 +23,7 @@ class GeneralTrait(object):
     """
 
     def __init__(self, **kw):
-        print("in GeneralTrait")
+        #print("in GeneralTrait")
         self.dataset = kw.get('dataset')           # database name
         self.name = kw.get('name')                 # Trait ID, ProbeSet ID, Published ID, etc.
         self.cellid = kw.get('cellid')
@@ -269,7 +269,7 @@ class GeneralTrait(object):
                            escape(self.dataset.name),
                            escape(self.name))
             traitInfo = g.db.execute(query).fetchone()
-            print("traitInfo is: ", pf(traitInfo))
+            #print("traitInfo is: ", pf(traitInfo))
         #XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name
         # to avoid the problem of same marker name from different species.
         elif self.dataset.type == 'Geno':
@@ -287,7 +287,7 @@ class GeneralTrait(object):
                            escape(self.dataset.name),
                            escape(self.name))
             traitInfo = g.db.execute(query).fetchone()
-            print("traitInfo is: ", pf(traitInfo))
+            #print("traitInfo is: ", pf(traitInfo))
         else: #Temp type
             query = """SELECT %s FROM %s WHERE Name = %s
                                      """ % (string.join(self.dataset.display_fields,','),
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index 5d40c835..96c0155b 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -29,6 +29,7 @@
 
 from __future__ import absolute_import, print_function, division
 
+import gc
 import string
 import cPickle
 import os
@@ -37,6 +38,7 @@ import time
 import pp
 import math
 import collections
+import resource
 
 import scipy
 
@@ -69,7 +71,13 @@ TISSUE_METHODS = [METHOD_TISSUE_PEARSON, METHOD_TISSUE_RANK]
 
 TISSUE_MOUSE_DB = 1
 
-class AuthException(Exception): pass
+def print_mem(stage=""):
+    mem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+    print("{}: {}".format(stage, mem/1024))
+    
+
+class AuthException(Exception):
+    pass
 
 class CorrelationResults(object):
 
@@ -99,6 +107,7 @@ class CorrelationResults(object):
         
         #print("start_vars: ", pf(start_vars))
         with Bench("Doing correlations"):
+            print_mem("At beginning")
             helper_functions.get_species_dataset_trait(self, start_vars)
             self.dataset.group.read_genotype_file()
     
@@ -126,8 +135,10 @@ class CorrelationResults(object):
                 
             self.target_dataset = data_set.create_dataset(start_vars['corr_dataset'])
             self.target_dataset.get_trait_data()
-            
+
+
             self.correlation_data = {}
+            print_mem("Before calculating correlations")
             for trait, values in self.target_dataset.trait_data.iteritems():
                 this_trait_values = []
                 target_values = []
@@ -139,15 +150,63 @@ class CorrelationResults(object):
                         target_values.append(target_sample_value)
 
                 this_trait_values, target_values = normalize_values(this_trait_values, target_values)
+                
                 if self.corr_method == 'pearson':
                     sample_r, sample_p = scipy.stats.pearsonr(this_trait_values, target_values)
                 else:
                     sample_r, sample_p = scipy.stats.spearmanr(this_trait_values, target_values)
+                    
                 self.correlation_data[trait] = [sample_r, sample_p]
-            self.correlation_data = collections.OrderedDict(
-                sorted(self.correlation_data.items(),
-                        key=lambda t: -abs(t[1][0])))
-
+                
+            print_mem("After calculating correlations")
+            
+            self.correlation_data = collections.OrderedDict(sorted(self.correlation_data.items(),
+                                                                   key=lambda t: -abs(t[1][0])))
+            
+            self.correlation_data_slice = collections.OrderedDict()
+            
+            old_memory_usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+            
+            for trait_counter, trait in enumerate(self.correlation_data.keys()[:300]):
+                print_mem("In trait info loop")
+                print("\nTrait #:", trait_counter)
+                print_mem("Before trait_object")
+                trait_object = GeneralTrait(dataset=self.dataset.name, name=trait)
+                print_mem("After trait object")
+                trait_info = dict(
+                    correlation = float(self.correlation_data[trait][0]),
+                    p_value = float(self.correlation_data[trait][1]),
+                    symbol = trait_object.symbol,
+                    alias = trait_object.alias,
+                    description = trait_object.description,
+                    chromosome = trait_object.chr,
+                    mb = trait_object.mb
+                )
+                print_mem("Before deleting trait object")
+                del trait_object
+                print_mem("After deleting trait object")
+                gc.collect()
+                print_mem("After colleting garabage")
+                print("** trait_info:", pf(trait_info))
+                print("\n** Start trait_info")
+                counter = 1
+                for key, value in trait_info.iteritems():
+                    print("   <{}> [{}] {}: [{}] {}\n".format(
+                        counter, type(key), key, type(value), value))
+                    counter += 1
+                print("** Done trait_info")
+                self.correlation_data_slice[trait] = trait_info
+                #self.correlation_data_slice[trait].append(trait_object)
+                
+                new_memory_usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+                print("Memory difference:", new_memory_usage-old_memory_usage)
+                old_memory_usage = new_memory_usage
+                print_mem("End of purple loop")
+                print("*************************** End purple ******** ")
+               
+            print_mem("After getting trait info")
+            print("Garbage colleting...")
+            gc.collect()
 
         #XZ, 09/18/2008: get all information about the user selected database.
         #target_db_name = fd.corr_dataset
diff --git a/wqflask/wqflask/templates/correlation_page.html b/wqflask/wqflask/templates/correlation_page.html
index 68fe81ed..b06f7096 100644
--- a/wqflask/wqflask/templates/correlation_page.html
+++ b/wqflask/wqflask/templates/correlation_page.html
@@ -24,14 +24,23 @@
                 <th>Sample rho</th>
                 <th>Sample p(rho)</th>
                 {% endif %}
+                <th>Symbol</th>
+                <th>Alias</th>
+                <th>Description</th>
+                <th>Location</th>
             </tr>
         </thead>
         <tbody>
-        {% for trait in correlation_data %}
+        {% for trait in correlation_data_slice %}
             <tr>
                 <td>{{ trait }}</td>
-                <td>{{ correlation_data[trait][0] }}</td>
-                <td>{{ correlation_data[trait][1] }}</td>
+                <td>{{ correlation_data_slice[trait].correlation }}</td>
+                <td>{{ correlation_data_slice[trait].p_value }}</td>
+                <td>{{ correlation_data_slice[trait].symbol }}</td>
+                <td>{{ correlation_data_slice[trait].alias }}</td>
+                <td>{{ correlation_data_slice[trait].description }}</td>
+                <td>Chr{{ correlation_data_slice[trait].chromosome }}: {{ correlation_data_slice[trait].mb }}</td>
+                {# <td>{{ correlation_data_slice[trait][2].__dict__ }}</td> #}
             </tr>
         {% endfor %}
         </tbody>
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index 8531561a..102863f0 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -3,6 +3,8 @@ from __future__ import absolute_import, division, print_function
 import csv
 import StringIO  # Todo: Use cStringIO?
 
+import gc
+
 import cPickle as pickle
 
 import simplejson as json
@@ -251,4 +253,9 @@ def json_default_handler(obj):
     #    return None
     else:
         raise TypeError, 'Object of type %s with value of %s is not JSON serializable' % (
-            type(obj), repr(obj))
\ No newline at end of file
+            type(obj), repr(obj))
+    
+    
+#@app.after_request
+#def after_request(response):
+#    gc.collect()
\ No newline at end of file
author	Lei Yan	2013-06-11 23:24:38 +0000
committer	Lei Yan	2013-06-11 23:24:38 +0000
commit	3b047d589fa89ddd9aff852d4f2a00c580eb7243 (patch)
tree	09d236010018ac536a9b6c7ab7f36806d0eeb2de
parent	466be48f92d4943995c7a3e7bcb9fd1efd775bf6 (diff)
download	genenetwork2-3b047d589fa89ddd9aff852d4f2a00c580eb7243.tar.gz