Added benchmark.py that compares the time of various operations

and gives a report with the percent of total computation time each takes
author: Zachary Sloan 2013-02-28 23:37:59 +0000
committer: Zachary Sloan 2013-02-28 23:37:59 +0000
commit: fa4c4f116c37285d24edc9532bb223e18dfb1584 (patch)
tree: 99f076a0e5cd2a797957b6092344793caade6036 /wqflask
parent: e82a11458cded705dd2056bad3cee76aa659288c (diff)
download: genenetwork2-fa4c4f116c37285d24edc9532bb223e18dfb1584.tar.gz
5 files changed, 86 insertions, 137 deletions
diff --git a/wqflask/utility/benchmark.py b/wqflask/utility/benchmark.py
new file mode 100644
index 00000000..0a6e422c
--- /dev/null
+++ b/wqflask/utility/benchmark.py
@@ -0,0 +1,42 @@
+from __future__ import print_function, division, absolute_import
+
+import collections
+import inspect
+import time
+
+
+class Bench(object):
+    entries = collections.OrderedDict()
+    def __init__(self, name=None):
+        self.name = name
+
+    def __enter__(self):
+        if self.name:
+            print("Starting benchmark: %s" % (self.name))
+        else:
+            print("Starting benchmark at: %s [%i]" % (inspect.stack()[1][3], inspect.stack()[1][2]))
+        self.start_time = time.time()
+
+    def __exit__(self, type, value, traceback):
+        if self.name:
+            name = self.name
+        else:
+            name = "That"
+
+        time_took = time.time() - self.start_time
+        print("  %s took: %f seconds" % (name, (time_took)))
+        
+        if self.name:
+            Bench.entries[self.name] = time_took
+
+    @classmethod
+    def report(cls):
+        total_time = sum((time_took for time_took in cls.entries.itervalues()))
+        print("\nTiming report\n")
+        for name, time_took in cls.entries.iteritems():
+            percent = int(round((time_took/total_time) * 100))
+            print("[{}%] {}: {}".format(percent, name, time_took))
+        print()
+        
+        # Reset the entries after reporting
+        cls.entries = collections.OrderedDict()
+\ No newline at end of file
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 50739614..23cec6d0 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -14,6 +14,7 @@ import sys
 import os
 import httplib
 import urllib
+import collections
 
 import numpy as np
 
@@ -21,6 +22,7 @@ import json
 
 from htmlgen import HTMLgen2 as HT
 from utility import Plot, Bunch
+from utility.benchmark import Bench
 from wqflask.interval_analyst import GeneUtil
 from base.trait import GeneralTrait
 from base import data_set
@@ -462,91 +464,62 @@ class MarkerRegression(object):
 
     def gen_data(self):
         """Todo: Fill this in here"""
-        
-        #json_data = open(os.path.join(webqtlConfig.NEWGENODIR + self.dataset.group.name + '.json'))
-        #markers = json.load(json_data)
-        
+
+        print("something")
+
         genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers]
-        
+
         no_val_samples = self.identify_empty_samples()
         trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
 
-        #for i, marker in enumerate(trimmed_genotype_data):
-        #    if i > 10:
-        #        break
-        #    print("genotype is:", pf(marker))
-        
-        #print("trimmed genotype data is:", pf(trimmed_genotype_data))
-        
-        #for marker_object in genotype_data:
-        #    print("marker_object:", pf(marker_object))
-
-        #prep_data.PrepData(self.vals, genotype_data)
-        
         pheno_vector = np.array([float(val) for val in self.vals if val!="x"])
-        #for item in trimmed_genotype_data:
-        #    if type(item) != type(list()):
-        #        print(" --->", type(item))
-        #    for counter, part in enumerate(item):
-        #        if type(part) != type(float()):
-        #            print(" ------>", type(part), " : ", part)
-        #        if counter % 100 == 0:
-        #            print(" ------>", type(part))
         genotypes = np.array(trimmed_genotype_data).T
-        #print("genotypes is:", pf(genotypes))
-        #genotypes = np.genfromtxt(os.path.join(webqtlConfig.TMPDIR,
-        #                                       self.dataset.group.name + '.snps.new')).T
+
+        #times = collections.OrderedDict()
+        #times['start'] = time.time()
         
-        #print("pheno_vector is:", pf(pheno_vector.shape))
-        #print("genotypes is:", pf(genotypes.shape))
+        with Bench("Calculate Kinship"):
+            kinship_matrix = lmm.calculateKinship(genotypes)
         
-        kinship_matrix = lmm.calculateKinship(genotypes)
-        #print("kinship_matrix is:", pf(kinship_matrix))
+        with Bench("Create LMM object"):
+            lmm_ob = lmm.LMM(pheno_vector, kinship_matrix)
         
-        lmm_ob = lmm.LMM(pheno_vector, kinship_matrix)
-        lmm_ob.fit()
-
-        t_stats, p_values = lmm.GWAS(pheno_vector,
-                                     genotypes,
-                                     kinship_matrix,
-                                     REML=True,
-                                     refit=False)
-
-        #print("p_values is:", pf(len(p_values)))
+        with Bench("LMM_ob fitting"):
+            lmm_ob.fit()
+            
         
-        self.dataset.group.markers.add_pvalues(p_values)
+        with Bench("Doing gwas"):
+            t_stats, p_values = lmm.GWAS(pheno_vector,
+                                         genotypes,
+                                         kinship_matrix,
+                                         REML=True,
+                                         refit=False)
+            
+        Bench().report()
+        
+        #previous_time = None
+        #for operation, this_time in times.iteritems():
+        #    if previous_time:
+        #        print("{} run time: {}".format(operation, this_time-previous_time))
+        #        #print("time[{}]:{}\t{}".format(key, thistime, thistime-lasttime))
+        #    previous_time = this_time
 
-        #calculate QTL for each trait
-        #self.qtl_results = self.genotype.regression(strains = self.samples,
-        #                                        trait = self.vals)
-        #self.lrs_array = self.genotype.permutation(strains = self.samples,
-        #                                       trait = self.vals,
-        #                                       nperm=self.num_perm)
+        self.dataset.group.markers.add_pvalues(p_values)
 
         self.lrs_values = [marker['lrs_value'] for marker in self.dataset.group.markers.markers]
-        #print("self.lrs_values is:", pf(self.lrs_values))
         lrs_values_sorted = sorted(self.lrs_values)
-        
-        #print("lrs_values_sorted is:", pf(lrs_values_sorted))
-        #print("int(self.num_perm*0.37-1)", pf(int(self.num_perm*0.37-1)))
-        
+
         lrs_values_length = len(lrs_values_sorted)
-        
+
         def lrs_threshold(place):
             return lrs_values_sorted[int((lrs_values_length * place) -1)]
-        
+
         self.lrs_thresholds = Bunch(
                             suggestive = lrs_threshold(.37),
                             significant = lrs_threshold(.95),
                             highly_significant = lrs_threshold(.99),
                                 )
 
-        #self.lrs_thresholds = Bunch(
-        #                        suggestive = self.lrs_array[int(self.num_perm*0.37-1)],
-        #                        significant = self.lrs_array[int(self.num_perm*0.95-1)],
-        #                        highly_significant = self.lrs_array[int(self.num_perm*0.99-1)]
-        #                        )
-
         if self.display_all_lrs:
             self.filtered_results = self.dataset.group.markers.markers
         else:
@@ -557,67 +530,6 @@ class MarkerRegression(object):
                 if marker['lrs_value'] > self.lrs_thresholds.suggestive:
                     self.filtered_results.append(marker)
 
-        #if self.display_all_lrs:
-        #    filtered_results = self.qtl_results
-        #else:
-        #    suggestive_results = []
-        #    self.pure_qtl_results = []
-        #    for result in self.qtl_results:
-        #        self.pure_qtl_results.append(dict(locus=dict(name=result.locus.name,
-        #                                                     mb=result.locus.Mb,
-        #                                                     chromosome=result.locus.chr),
-        #                                          lrs=result.lrs,
-        #                                          additive=result.additive))
-        #        if result.lrs > self.lrs_thresholds.suggestive:
-        #            suggestive_results.append(result)
-        #    filtered_results = suggestive_results 
-
-
-        # Todo (2013): Use top_10 variable to generate page message about whether top 10 was used
-        if not self.filtered_results:
-            # We use the 10 results with the highest LRS values
-            self.filtered_results = sorted(self.qtl_results)[-10:]
-            self.top_10 = True
-        else:
-            self.top_10 = False
-
-
-
-        #qtlresults2 = []
-        #if self.disp_all_lrs:
-        #    filtered = self.qtl_results[:]
-        #else:
-        #    filtered = filter(lambda x, y=fd.suggestive: x.lrs > y, qtlresults)
-        #if len(filtered) == 0:
-        #    qtlresults2 = qtlresults[:]
-        #    qtlresults2.sort()
-        #    filtered = qtlresults2[-10:]
-
-        #########################################
-        #      Permutation Graph
-        #########################################
-        #myCanvas = pid.PILCanvas(size=(400,300))
-        ##plotBar(myCanvas,10,10,390,290,LRSArray,XLabel='LRS',YLabel='Frequency',title=' Histogram of Permutation Test',identification=fd.identification)
-        #Plot.plotBar(myCanvas, LRSArray, XLabel='LRS',YLabel='Frequency',title=' Histogram of Permutation Test')
-        #filename= webqtlUtil.genRandStr("Reg_")
-        #myCanvas.save(webqtlConfig.IMGDIR+filename, format='gif')
-        #img=HT.Image('/image/'+filename+'.gif',border=0,alt='Histogram of Permutation Test')
-            
-        #if fd.suggestive == None:
-        #    fd.suggestive = LRSArray[int(fd.nperm*0.37-1)]
-        #else:
-        #    fd.suggestive = float(fd.suggestive)
-        #if fd.significance == None:
-        #    fd.significance = LRSArray[int(fd.nperm*0.95-1)]
-        #else:
-        #    fd.significance = float(fd.significance)
-
-        #permutationHeading = HT.Paragraph('Histogram of Permutation Test')
-        #permutationHeading.__setattr__("class","title")
-        #
-        #permutation = HT.TableLite()
-        #permutation.append(HT.TR(HT.TD(img)))
-
         for marker in self.filtered_results:
             if marker['lrs_value'] > webqtlConfig.MAXLRS:
                 marker['lrs_value'] = webqtlConfig.MAXLRS
@@ -695,17 +607,6 @@ class MarkerRegression(object):
         #        index+=1
         #        tblobj_body.append(reportBodyRow)
 
-        #tblobj_header.append(reportHeaderRow)
-        #tblobj['header']=tblobj_header
-        #tblobj['body']=tblobj_body
-
-        #rv=HT.TD(regressionHeading,LRSInfo,report, locusForm, HT.P(),width='55%',valign='top', align='left', bgColor='#eeeeee')
-        #if fd.genotype.type == 'intercross':
-        #    bottomInfo.append(HT.BR(), HT.BR(), HT.Strong('Dominance Effect'),' is the difference between the mean trait value of cases heterozygous at a marker and the average mean for the two groups homozygous at this marker: e.g.,  BD - (BB+DD)/2]. A positive dominance effect indicates that the average phenotype of BD heterozygotes exceeds the mean of BB and DD homozygotes. No dominance deviation can be computed for a set of recombinant inbred strains or for a backcross.')
-            #return rv,tblobj,bottomInfo
-
-        #return rv,tblobj,bottomInfo
-
     def identify_empty_samples(self):
         no_val_samples = []
         for sample_count, val in enumerate(self.vals):
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index d0f379dd..33f6573e 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -137,7 +137,7 @@ def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False):
             if refit: Ls.fit(X=xs)
             else: Ls.fit()
             ts,ps = Ls.association(xs,REML=REML)
-         else: 
+         else:
             if x.var() == 0: 
                PS.append(np.nan) 
                TS.append(np.nan) 
diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
index 720515c1..7b2d022c 100755
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -3,6 +3,7 @@ from __future__ import absolute_import, print_function, division
 import string
 import os
 import cPickle
+import uuid
 #import pyXLWriter as xl
 
 from collections import OrderedDict
@@ -120,6 +121,8 @@ class ShowTrait(object):
 
         # We'll need access to this_trait and hddn in the Jinja2 Template, so we put it inside self
         self.hddn = hddn
+        
+        self.session_uuid = uuid.uuid4()
 
         self.sample_group_types = OrderedDict()
         self.sample_group_types['samples_primary'] = self.dataset.group.name + " Only"
@@ -129,7 +132,8 @@ class ShowTrait(object):
         print("sample_lists is:", pf(sample_lists))
         js_data = dict(sample_group_types = self.sample_group_types,
                         sample_lists = sample_lists,
-                        attribute_names = self.sample_groups[0].attributes)
+                        attribute_names = self.sample_groups[0].attributes,
+                        session_uuid = self.session_uuid)
         #print("js_data:", pf(js_data))
         self.js_data = js_data
 
diff --git a/wqflask/wqflask/templates/show_trait.html b/wqflask/wqflask/templates/show_trait.html
index 87199e9f..bb87b4bb 100644
--- a/wqflask/wqflask/templates/show_trait.html
+++ b/wqflask/wqflask/templates/show_trait.html
@@ -19,8 +19,10 @@
     <form method="post" action="/corr_compute" name="trait_page" id="trait_data_form"
     class="form-horizontal">
             {% for key in hddn %}
-                <input type="hidden" name="{{ key }}" value="{{ hddn[key] }}">
+            <input type="hidden" name="{{ key }}" value="{{ hddn[key] }}">
             {% endfor %}
+            
+            <input type="hidden" name="session_uuid" value="{{ session_uuid }}">
 
         <div class="container">
             <div class="page-header">
author	Zachary Sloan	2013-02-28 23:37:59 +0000
committer	Zachary Sloan	2013-02-28 23:37:59 +0000
commit	fa4c4f116c37285d24edc9532bb223e18dfb1584 (patch)
tree	99f076a0e5cd2a797957b6092344793caade6036 /wqflask
parent	e82a11458cded705dd2056bad3cee76aa659288c (diff)
download	genenetwork2-fa4c4f116c37285d24edc9532bb223e18dfb1584.tar.gz