aboutsummaryrefslogtreecommitdiff
path: root/wqflask
diff options
context:
space:
mode:
authorZachary Sloan2013-02-28 23:37:59 +0000
committerZachary Sloan2013-02-28 23:37:59 +0000
commitfa4c4f116c37285d24edc9532bb223e18dfb1584 (patch)
tree99f076a0e5cd2a797957b6092344793caade6036 /wqflask
parente82a11458cded705dd2056bad3cee76aa659288c (diff)
downloadgenenetwork2-fa4c4f116c37285d24edc9532bb223e18dfb1584.tar.gz
Added benchmark.py that compares the time of various operations
and gives a report with the percent of total computation time each takes
Diffstat (limited to 'wqflask')
-rw-r--r--wqflask/utility/benchmark.py42
-rwxr-xr-xwqflask/wqflask/marker_regression/marker_regression.py169
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/lmm.py2
-rwxr-xr-xwqflask/wqflask/show_trait/show_trait.py6
-rw-r--r--wqflask/wqflask/templates/show_trait.html4
5 files changed, 86 insertions, 137 deletions
diff --git a/wqflask/utility/benchmark.py b/wqflask/utility/benchmark.py
new file mode 100644
index 00000000..0a6e422c
--- /dev/null
+++ b/wqflask/utility/benchmark.py
@@ -0,0 +1,42 @@
+from __future__ import print_function, division, absolute_import
+
+import collections
+import inspect
+import time
+
+
+class Bench(object):
+ entries = collections.OrderedDict()
+ def __init__(self, name=None):
+ self.name = name
+
+ def __enter__(self):
+ if self.name:
+ print("Starting benchmark: %s" % (self.name))
+ else:
+ print("Starting benchmark at: %s [%i]" % (inspect.stack()[1][3], inspect.stack()[1][2]))
+ self.start_time = time.time()
+
+ def __exit__(self, type, value, traceback):
+ if self.name:
+ name = self.name
+ else:
+ name = "That"
+
+ time_took = time.time() - self.start_time
+ print(" %s took: %f seconds" % (name, (time_took)))
+
+ if self.name:
+ Bench.entries[self.name] = time_took
+
+ @classmethod
+ def report(cls):
+ total_time = sum((time_took for time_took in cls.entries.itervalues()))
+ print("\nTiming report\n")
+ for name, time_took in cls.entries.iteritems():
+ percent = int(round((time_took/total_time) * 100))
+ print("[{}%] {}: {}".format(percent, name, time_took))
+ print()
+
+ # Reset the entries after reporting
+ cls.entries = collections.OrderedDict() \ No newline at end of file
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 50739614..23cec6d0 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -14,6 +14,7 @@ import sys
import os
import httplib
import urllib
+import collections
import numpy as np
@@ -21,6 +22,7 @@ import json
from htmlgen import HTMLgen2 as HT
from utility import Plot, Bunch
+from utility.benchmark import Bench
from wqflask.interval_analyst import GeneUtil
from base.trait import GeneralTrait
from base import data_set
@@ -462,91 +464,62 @@ class MarkerRegression(object):
def gen_data(self):
"""Todo: Fill this in here"""
-
- #json_data = open(os.path.join(webqtlConfig.NEWGENODIR + self.dataset.group.name + '.json'))
- #markers = json.load(json_data)
-
+
+ print("something")
+
genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers]
-
+
no_val_samples = self.identify_empty_samples()
trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
- #for i, marker in enumerate(trimmed_genotype_data):
- # if i > 10:
- # break
- # print("genotype is:", pf(marker))
-
- #print("trimmed genotype data is:", pf(trimmed_genotype_data))
-
- #for marker_object in genotype_data:
- # print("marker_object:", pf(marker_object))
-
- #prep_data.PrepData(self.vals, genotype_data)
-
pheno_vector = np.array([float(val) for val in self.vals if val!="x"])
- #for item in trimmed_genotype_data:
- # if type(item) != type(list()):
- # print(" --->", type(item))
- # for counter, part in enumerate(item):
- # if type(part) != type(float()):
- # print(" ------>", type(part), " : ", part)
- # if counter % 100 == 0:
- # print(" ------>", type(part))
genotypes = np.array(trimmed_genotype_data).T
- #print("genotypes is:", pf(genotypes))
- #genotypes = np.genfromtxt(os.path.join(webqtlConfig.TMPDIR,
- # self.dataset.group.name + '.snps.new')).T
+
+ #times = collections.OrderedDict()
+ #times['start'] = time.time()
- #print("pheno_vector is:", pf(pheno_vector.shape))
- #print("genotypes is:", pf(genotypes.shape))
+ with Bench("Calculate Kinship"):
+ kinship_matrix = lmm.calculateKinship(genotypes)
- kinship_matrix = lmm.calculateKinship(genotypes)
- #print("kinship_matrix is:", pf(kinship_matrix))
+ with Bench("Create LMM object"):
+ lmm_ob = lmm.LMM(pheno_vector, kinship_matrix)
- lmm_ob = lmm.LMM(pheno_vector, kinship_matrix)
- lmm_ob.fit()
-
- t_stats, p_values = lmm.GWAS(pheno_vector,
- genotypes,
- kinship_matrix,
- REML=True,
- refit=False)
-
- #print("p_values is:", pf(len(p_values)))
+ with Bench("LMM_ob fitting"):
+ lmm_ob.fit()
+
- self.dataset.group.markers.add_pvalues(p_values)
+ with Bench("Doing gwas"):
+ t_stats, p_values = lmm.GWAS(pheno_vector,
+ genotypes,
+ kinship_matrix,
+ REML=True,
+ refit=False)
+
+ Bench().report()
+
+ #previous_time = None
+ #for operation, this_time in times.iteritems():
+ # if previous_time:
+ # print("{} run time: {}".format(operation, this_time-previous_time))
+ # #print("time[{}]:{}\t{}".format(key, thistime, thistime-lasttime))
+ # previous_time = this_time
- #calculate QTL for each trait
- #self.qtl_results = self.genotype.regression(strains = self.samples,
- # trait = self.vals)
- #self.lrs_array = self.genotype.permutation(strains = self.samples,
- # trait = self.vals,
- # nperm=self.num_perm)
+ self.dataset.group.markers.add_pvalues(p_values)
self.lrs_values = [marker['lrs_value'] for marker in self.dataset.group.markers.markers]
- #print("self.lrs_values is:", pf(self.lrs_values))
lrs_values_sorted = sorted(self.lrs_values)
-
- #print("lrs_values_sorted is:", pf(lrs_values_sorted))
- #print("int(self.num_perm*0.37-1)", pf(int(self.num_perm*0.37-1)))
-
+
lrs_values_length = len(lrs_values_sorted)
-
+
def lrs_threshold(place):
return lrs_values_sorted[int((lrs_values_length * place) -1)]
-
+
self.lrs_thresholds = Bunch(
suggestive = lrs_threshold(.37),
significant = lrs_threshold(.95),
highly_significant = lrs_threshold(.99),
)
- #self.lrs_thresholds = Bunch(
- # suggestive = self.lrs_array[int(self.num_perm*0.37-1)],
- # significant = self.lrs_array[int(self.num_perm*0.95-1)],
- # highly_significant = self.lrs_array[int(self.num_perm*0.99-1)]
- # )
-
if self.display_all_lrs:
self.filtered_results = self.dataset.group.markers.markers
else:
@@ -557,67 +530,6 @@ class MarkerRegression(object):
if marker['lrs_value'] > self.lrs_thresholds.suggestive:
self.filtered_results.append(marker)
- #if self.display_all_lrs:
- # filtered_results = self.qtl_results
- #else:
- # suggestive_results = []
- # self.pure_qtl_results = []
- # for result in self.qtl_results:
- # self.pure_qtl_results.append(dict(locus=dict(name=result.locus.name,
- # mb=result.locus.Mb,
- # chromosome=result.locus.chr),
- # lrs=result.lrs,
- # additive=result.additive))
- # if result.lrs > self.lrs_thresholds.suggestive:
- # suggestive_results.append(result)
- # filtered_results = suggestive_results
-
-
- # Todo (2013): Use top_10 variable to generate page message about whether top 10 was used
- if not self.filtered_results:
- # We use the 10 results with the highest LRS values
- self.filtered_results = sorted(self.qtl_results)[-10:]
- self.top_10 = True
- else:
- self.top_10 = False
-
-
-
- #qtlresults2 = []
- #if self.disp_all_lrs:
- # filtered = self.qtl_results[:]
- #else:
- # filtered = filter(lambda x, y=fd.suggestive: x.lrs > y, qtlresults)
- #if len(filtered) == 0:
- # qtlresults2 = qtlresults[:]
- # qtlresults2.sort()
- # filtered = qtlresults2[-10:]
-
- #########################################
- # Permutation Graph
- #########################################
- #myCanvas = pid.PILCanvas(size=(400,300))
- ##plotBar(myCanvas,10,10,390,290,LRSArray,XLabel='LRS',YLabel='Frequency',title=' Histogram of Permutation Test',identification=fd.identification)
- #Plot.plotBar(myCanvas, LRSArray, XLabel='LRS',YLabel='Frequency',title=' Histogram of Permutation Test')
- #filename= webqtlUtil.genRandStr("Reg_")
- #myCanvas.save(webqtlConfig.IMGDIR+filename, format='gif')
- #img=HT.Image('/image/'+filename+'.gif',border=0,alt='Histogram of Permutation Test')
-
- #if fd.suggestive == None:
- # fd.suggestive = LRSArray[int(fd.nperm*0.37-1)]
- #else:
- # fd.suggestive = float(fd.suggestive)
- #if fd.significance == None:
- # fd.significance = LRSArray[int(fd.nperm*0.95-1)]
- #else:
- # fd.significance = float(fd.significance)
-
- #permutationHeading = HT.Paragraph('Histogram of Permutation Test')
- #permutationHeading.__setattr__("class","title")
- #
- #permutation = HT.TableLite()
- #permutation.append(HT.TR(HT.TD(img)))
-
for marker in self.filtered_results:
if marker['lrs_value'] > webqtlConfig.MAXLRS:
marker['lrs_value'] = webqtlConfig.MAXLRS
@@ -695,17 +607,6 @@ class MarkerRegression(object):
# index+=1
# tblobj_body.append(reportBodyRow)
- #tblobj_header.append(reportHeaderRow)
- #tblobj['header']=tblobj_header
- #tblobj['body']=tblobj_body
-
- #rv=HT.TD(regressionHeading,LRSInfo,report, locusForm, HT.P(),width='55%',valign='top', align='left', bgColor='#eeeeee')
- #if fd.genotype.type == 'intercross':
- # bottomInfo.append(HT.BR(), HT.BR(), HT.Strong('Dominance Effect'),' is the difference between the mean trait value of cases heterozygous at a marker and the average mean for the two groups homozygous at this marker: e.g., BD - (BB+DD)/2]. A positive dominance effect indicates that the average phenotype of BD heterozygotes exceeds the mean of BB and DD homozygotes. No dominance deviation can be computed for a set of recombinant inbred strains or for a backcross.')
- #return rv,tblobj,bottomInfo
-
- #return rv,tblobj,bottomInfo
-
def identify_empty_samples(self):
no_val_samples = []
for sample_count, val in enumerate(self.vals):
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index d0f379dd..33f6573e 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -137,7 +137,7 @@ def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False):
if refit: Ls.fit(X=xs)
else: Ls.fit()
ts,ps = Ls.association(xs,REML=REML)
- else:
+ else:
if x.var() == 0:
PS.append(np.nan)
TS.append(np.nan)
diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
index 720515c1..7b2d022c 100755
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -3,6 +3,7 @@ from __future__ import absolute_import, print_function, division
import string
import os
import cPickle
+import uuid
#import pyXLWriter as xl
from collections import OrderedDict
@@ -120,6 +121,8 @@ class ShowTrait(object):
# We'll need access to this_trait and hddn in the Jinja2 Template, so we put it inside self
self.hddn = hddn
+
+ self.session_uuid = uuid.uuid4()
self.sample_group_types = OrderedDict()
self.sample_group_types['samples_primary'] = self.dataset.group.name + " Only"
@@ -129,7 +132,8 @@ class ShowTrait(object):
print("sample_lists is:", pf(sample_lists))
js_data = dict(sample_group_types = self.sample_group_types,
sample_lists = sample_lists,
- attribute_names = self.sample_groups[0].attributes)
+ attribute_names = self.sample_groups[0].attributes,
+ session_uuid = self.session_uuid)
#print("js_data:", pf(js_data))
self.js_data = js_data
diff --git a/wqflask/wqflask/templates/show_trait.html b/wqflask/wqflask/templates/show_trait.html
index 87199e9f..bb87b4bb 100644
--- a/wqflask/wqflask/templates/show_trait.html
+++ b/wqflask/wqflask/templates/show_trait.html
@@ -19,8 +19,10 @@
<form method="post" action="/corr_compute" name="trait_page" id="trait_data_form"
class="form-horizontal">
{% for key in hddn %}
- <input type="hidden" name="{{ key }}" value="{{ hddn[key] }}">
+ <input type="hidden" name="{{ key }}" value="{{ hddn[key] }}">
{% endfor %}
+
+ <input type="hidden" name="session_uuid" value="{{ session_uuid }}">
<div class="container">
<div class="page-header">