From fa4c4f116c37285d24edc9532bb223e18dfb1584 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Thu, 28 Feb 2013 23:37:59 +0000 Subject: Added benchmark.py that compares the time of various operations and gives a report with the percent of total computation time each takes --- wqflask/utility/benchmark.py | 42 +++++ .../wqflask/marker_regression/marker_regression.py | 169 +++++---------------- wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 2 +- wqflask/wqflask/show_trait/show_trait.py | 6 +- wqflask/wqflask/templates/show_trait.html | 4 +- 5 files changed, 86 insertions(+), 137 deletions(-) create mode 100644 wqflask/utility/benchmark.py diff --git a/wqflask/utility/benchmark.py b/wqflask/utility/benchmark.py new file mode 100644 index 00000000..0a6e422c --- /dev/null +++ b/wqflask/utility/benchmark.py @@ -0,0 +1,42 @@ +from __future__ import print_function, division, absolute_import + +import collections +import inspect +import time + + +class Bench(object): + entries = collections.OrderedDict() + def __init__(self, name=None): + self.name = name + + def __enter__(self): + if self.name: + print("Starting benchmark: %s" % (self.name)) + else: + print("Starting benchmark at: %s [%i]" % (inspect.stack()[1][3], inspect.stack()[1][2])) + self.start_time = time.time() + + def __exit__(self, type, value, traceback): + if self.name: + name = self.name + else: + name = "That" + + time_took = time.time() - self.start_time + print(" %s took: %f seconds" % (name, (time_took))) + + if self.name: + Bench.entries[self.name] = time_took + + @classmethod + def report(cls): + total_time = sum((time_took for time_took in cls.entries.itervalues())) + print("\nTiming report\n") + for name, time_took in cls.entries.iteritems(): + percent = int(round((time_took/total_time) * 100)) + print("[{}%] {}: {}".format(percent, name, time_took)) + print() + + # Reset the entries after reporting + cls.entries = collections.OrderedDict() \ No newline at end of file diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 50739614..23cec6d0 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -14,6 +14,7 @@ import sys import os import httplib import urllib +import collections import numpy as np @@ -21,6 +22,7 @@ import json from htmlgen import HTMLgen2 as HT from utility import Plot, Bunch +from utility.benchmark import Bench from wqflask.interval_analyst import GeneUtil from base.trait import GeneralTrait from base import data_set @@ -462,91 +464,62 @@ class MarkerRegression(object): def gen_data(self): """Todo: Fill this in here""" - - #json_data = open(os.path.join(webqtlConfig.NEWGENODIR + self.dataset.group.name + '.json')) - #markers = json.load(json_data) - + + print("something") + genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers] - + no_val_samples = self.identify_empty_samples() trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) - #for i, marker in enumerate(trimmed_genotype_data): - # if i > 10: - # break - # print("genotype is:", pf(marker)) - - #print("trimmed genotype data is:", pf(trimmed_genotype_data)) - - #for marker_object in genotype_data: - # print("marker_object:", pf(marker_object)) - - #prep_data.PrepData(self.vals, genotype_data) - pheno_vector = np.array([float(val) for val in self.vals if val!="x"]) - #for item in trimmed_genotype_data: - # if type(item) != type(list()): - # print(" --->", type(item)) - # for counter, part in enumerate(item): - # if type(part) != type(float()): - # print(" ------>", type(part), " : ", part) - # if counter % 100 == 0: - # print(" ------>", type(part)) genotypes = np.array(trimmed_genotype_data).T - #print("genotypes is:", pf(genotypes)) - #genotypes = np.genfromtxt(os.path.join(webqtlConfig.TMPDIR, - # self.dataset.group.name + '.snps.new')).T + + #times = collections.OrderedDict() + #times['start'] = time.time() - #print("pheno_vector is:", pf(pheno_vector.shape)) - #print("genotypes is:", pf(genotypes.shape)) + with Bench("Calculate Kinship"): + kinship_matrix = lmm.calculateKinship(genotypes) - kinship_matrix = lmm.calculateKinship(genotypes) - #print("kinship_matrix is:", pf(kinship_matrix)) + with Bench("Create LMM object"): + lmm_ob = lmm.LMM(pheno_vector, kinship_matrix) - lmm_ob = lmm.LMM(pheno_vector, kinship_matrix) - lmm_ob.fit() - - t_stats, p_values = lmm.GWAS(pheno_vector, - genotypes, - kinship_matrix, - REML=True, - refit=False) - - #print("p_values is:", pf(len(p_values))) + with Bench("LMM_ob fitting"): + lmm_ob.fit() + - self.dataset.group.markers.add_pvalues(p_values) + with Bench("Doing gwas"): + t_stats, p_values = lmm.GWAS(pheno_vector, + genotypes, + kinship_matrix, + REML=True, + refit=False) + + Bench().report() + + #previous_time = None + #for operation, this_time in times.iteritems(): + # if previous_time: + # print("{} run time: {}".format(operation, this_time-previous_time)) + # #print("time[{}]:{}\t{}".format(key, thistime, thistime-lasttime)) + # previous_time = this_time - #calculate QTL for each trait - #self.qtl_results = self.genotype.regression(strains = self.samples, - # trait = self.vals) - #self.lrs_array = self.genotype.permutation(strains = self.samples, - # trait = self.vals, - # nperm=self.num_perm) + self.dataset.group.markers.add_pvalues(p_values) self.lrs_values = [marker['lrs_value'] for marker in self.dataset.group.markers.markers] - #print("self.lrs_values is:", pf(self.lrs_values)) lrs_values_sorted = sorted(self.lrs_values) - - #print("lrs_values_sorted is:", pf(lrs_values_sorted)) - #print("int(self.num_perm*0.37-1)", pf(int(self.num_perm*0.37-1))) - + lrs_values_length = len(lrs_values_sorted) - + def lrs_threshold(place): return lrs_values_sorted[int((lrs_values_length * place) -1)] - + self.lrs_thresholds = Bunch( suggestive = lrs_threshold(.37), significant = lrs_threshold(.95), highly_significant = lrs_threshold(.99), ) - #self.lrs_thresholds = Bunch( - # suggestive = self.lrs_array[int(self.num_perm*0.37-1)], - # significant = self.lrs_array[int(self.num_perm*0.95-1)], - # highly_significant = self.lrs_array[int(self.num_perm*0.99-1)] - # ) - if self.display_all_lrs: self.filtered_results = self.dataset.group.markers.markers else: @@ -557,67 +530,6 @@ class MarkerRegression(object): if marker['lrs_value'] > self.lrs_thresholds.suggestive: self.filtered_results.append(marker) - #if self.display_all_lrs: - # filtered_results = self.qtl_results - #else: - # suggestive_results = [] - # self.pure_qtl_results = [] - # for result in self.qtl_results: - # self.pure_qtl_results.append(dict(locus=dict(name=result.locus.name, - # mb=result.locus.Mb, - # chromosome=result.locus.chr), - # lrs=result.lrs, - # additive=result.additive)) - # if result.lrs > self.lrs_thresholds.suggestive: - # suggestive_results.append(result) - # filtered_results = suggestive_results - - - # Todo (2013): Use top_10 variable to generate page message about whether top 10 was used - if not self.filtered_results: - # We use the 10 results with the highest LRS values - self.filtered_results = sorted(self.qtl_results)[-10:] - self.top_10 = True - else: - self.top_10 = False - - - - #qtlresults2 = [] - #if self.disp_all_lrs: - # filtered = self.qtl_results[:] - #else: - # filtered = filter(lambda x, y=fd.suggestive: x.lrs > y, qtlresults) - #if len(filtered) == 0: - # qtlresults2 = qtlresults[:] - # qtlresults2.sort() - # filtered = qtlresults2[-10:] - - ######################################### - # Permutation Graph - ######################################### - #myCanvas = pid.PILCanvas(size=(400,300)) - ##plotBar(myCanvas,10,10,390,290,LRSArray,XLabel='LRS',YLabel='Frequency',title=' Histogram of Permutation Test',identification=fd.identification) - #Plot.plotBar(myCanvas, LRSArray, XLabel='LRS',YLabel='Frequency',title=' Histogram of Permutation Test') - #filename= webqtlUtil.genRandStr("Reg_") - #myCanvas.save(webqtlConfig.IMGDIR+filename, format='gif') - #img=HT.Image('/image/'+filename+'.gif',border=0,alt='Histogram of Permutation Test') - - #if fd.suggestive == None: - # fd.suggestive = LRSArray[int(fd.nperm*0.37-1)] - #else: - # fd.suggestive = float(fd.suggestive) - #if fd.significance == None: - # fd.significance = LRSArray[int(fd.nperm*0.95-1)] - #else: - # fd.significance = float(fd.significance) - - #permutationHeading = HT.Paragraph('Histogram of Permutation Test') - #permutationHeading.__setattr__("class","title") - # - #permutation = HT.TableLite() - #permutation.append(HT.TR(HT.TD(img))) - for marker in self.filtered_results: if marker['lrs_value'] > webqtlConfig.MAXLRS: marker['lrs_value'] = webqtlConfig.MAXLRS @@ -695,17 +607,6 @@ class MarkerRegression(object): # index+=1 # tblobj_body.append(reportBodyRow) - #tblobj_header.append(reportHeaderRow) - #tblobj['header']=tblobj_header - #tblobj['body']=tblobj_body - - #rv=HT.TD(regressionHeading,LRSInfo,report, locusForm, HT.P(),width='55%',valign='top', align='left', bgColor='#eeeeee') - #if fd.genotype.type == 'intercross': - # bottomInfo.append(HT.BR(), HT.BR(), HT.Strong('Dominance Effect'),' is the difference between the mean trait value of cases heterozygous at a marker and the average mean for the two groups homozygous at this marker: e.g., BD - (BB+DD)/2]. A positive dominance effect indicates that the average phenotype of BD heterozygotes exceeds the mean of BB and DD homozygotes. No dominance deviation can be computed for a set of recombinant inbred strains or for a backcross.') - #return rv,tblobj,bottomInfo - - #return rv,tblobj,bottomInfo - def identify_empty_samples(self): no_val_samples = [] for sample_count, val in enumerate(self.vals): diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index d0f379dd..33f6573e 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -137,7 +137,7 @@ def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False): if refit: Ls.fit(X=xs) else: Ls.fit() ts,ps = Ls.association(xs,REML=REML) - else: + else: if x.var() == 0: PS.append(np.nan) TS.append(np.nan) diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index 720515c1..7b2d022c 100755 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -3,6 +3,7 @@ from __future__ import absolute_import, print_function, division import string import os import cPickle +import uuid #import pyXLWriter as xl from collections import OrderedDict @@ -120,6 +121,8 @@ class ShowTrait(object): # We'll need access to this_trait and hddn in the Jinja2 Template, so we put it inside self self.hddn = hddn + + self.session_uuid = uuid.uuid4() self.sample_group_types = OrderedDict() self.sample_group_types['samples_primary'] = self.dataset.group.name + " Only" @@ -129,7 +132,8 @@ class ShowTrait(object): print("sample_lists is:", pf(sample_lists)) js_data = dict(sample_group_types = self.sample_group_types, sample_lists = sample_lists, - attribute_names = self.sample_groups[0].attributes) + attribute_names = self.sample_groups[0].attributes, + session_uuid = self.session_uuid) #print("js_data:", pf(js_data)) self.js_data = js_data diff --git a/wqflask/wqflask/templates/show_trait.html b/wqflask/wqflask/templates/show_trait.html index 87199e9f..bb87b4bb 100644 --- a/wqflask/wqflask/templates/show_trait.html +++ b/wqflask/wqflask/templates/show_trait.html @@ -19,8 +19,10 @@