diff options
Diffstat (limited to 'wqflask/basicStatistics')
-rw-r--r-- | wqflask/basicStatistics/BasicStatisticsFunctions.py | 207 | ||||
-rw-r--r-- | wqflask/basicStatistics/__init__.py | 0 | ||||
-rw-r--r-- | wqflask/basicStatistics/corestats.py | 103 |
3 files changed, 0 insertions, 310 deletions
diff --git a/wqflask/basicStatistics/BasicStatisticsFunctions.py b/wqflask/basicStatistics/BasicStatisticsFunctions.py deleted file mode 100644 index 1e5646a1..00000000 --- a/wqflask/basicStatistics/BasicStatisticsFunctions.py +++ /dev/null @@ -1,207 +0,0 @@ -from __future__ import print_function - -#import string -from math import * -#import piddle as pid -#import os -import traceback - -from pprint import pformat as pf - -from corestats import Stats - -import reaper -from htmlgen import HTMLgen2 as HT - -#from utility import Plot -from utility import webqtlUtil -from base import webqtlConfig -from db import webqtlDatabaseFunction - -def basicStatsTable(vals, trait_type=None, cellid=None, heritability=None): - print("basicStatsTable called - len of vals", len(vals)) - st = {} # This is the dictionary where we'll put everything for the template - valsOnly = [] - dataXZ = vals[:] - for i in range(len(dataXZ)): - valsOnly.append(dataXZ[i][1]) - - (st['traitmean'], - st['traitmedian'], - st['traitvar'], - st['traitstdev'], - st['traitsem'], - st['N']) = reaper.anova(valsOnly) #ZS: Should convert this from reaper to R in the future - - #tbl = HT.TableLite(cellpadding=20, cellspacing=0) - #dataXZ = vals[:] - dataXZ = sorted(vals, webqtlUtil.cmpOrder) - - print("data for stats is:", pf(dataXZ)) - for num, item in enumerate(dataXZ): - print(" %i - %s" % (num, item)) - print(" length:", len(dataXZ)) - - st['min'] = dataXZ[0][1] - st['max'] = dataXZ[-1][1] - - numbers = [x[1] for x in dataXZ] - stats = Stats(numbers) - - at75 = stats.percentile(75) - at25 = stats.percentile(25) - print("should get a stack") - traceback.print_stack() - print("Interquartile:", at75 - at25) - - #tbl.append(HT.TR(HT.TD("Statistic",align="left", Class="fs14 fwb ffl b1 cw cbrb", width = 180), - # HT.TD("Value", align="right", Class="fs14 fwb ffl b1 cw cbrb", width = 60))) - #tbl.append(HT.TR(HT.TD("N of Samples",align="left", Class="fs13 b1 cbw c222"), - # HT.TD(N,nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - #tbl.append(HT.TR(HT.TD("Mean",align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - # HT.TD("%2.3f" % traitmean,nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - #tbl.append(HT.TR(HT.TD("Median",align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - # HT.TD("%2.3f" % traitmedian,nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - ##tbl.append(HT.TR(HT.TD("Variance",align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - ## HT.TD("%2.3f" % traitvar,nowrap="yes",align="left", Class="fs13 b1 cbw c222"))) - #tbl.append(HT.TR(HT.TD("Standard Error (SE)",align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - # HT.TD("%2.3f" % traitsem,nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - #tbl.append(HT.TR(HT.TD("Standard Deviation (SD)", align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - # HT.TD("%2.3f" % traitstdev,nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - #tbl.append(HT.TR(HT.TD("Minimum", align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - # HT.TD("%s" % dataXZ[0][1],nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - #tbl.append(HT.TR(HT.TD("Maximum", align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - # HT.TD("%s" % dataXZ[-1][1],nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - - - - if (trait_type != None and trait_type == 'ProbeSet'): - #tbl.append(HT.TR(HT.TD("Range (log2)",align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - # HT.TD("%2.3f" % (dataXZ[-1][1]-dataXZ[0][1]),nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - #tbl.append(HT.TR(HT.TD(HT.Span("Range (fold)"),align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - # HT.TD("%2.2f" % pow(2.0,(dataXZ[-1][1]-dataXZ[0][1])), nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - #tbl.append(HT.TR(HT.TD(HT.Span(HT.Href(url="/glossary.html#Interquartile", target="_blank", text="Interquartile Range", Class="non_bold")), align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - # HT.TD("%2.2f" % pow(2.0,(dataXZ[int((N-1)*3.0/4.0)][1]-dataXZ[int((N-1)/4.0)][1])), nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - st['range_log2'] = dataXZ[-1][1]-dataXZ[0][1] - st['range_fold'] = pow(2.0, (dataXZ[-1][1]-dataXZ[0][1])) - st['interquartile'] = pow(2.0, (dataXZ[int((st['N']-1)*3.0/4.0)][1]-dataXZ[int((st['N']-1)/4.0)][1])) - - #XZ, 04/01/2009: don't try to get H2 value for probe. - if not cellid: - if heritability: - # This field needs to still be put into the Jinja2 template - st['heritability'] = heritability - #tbl.append(HT.TR(HT.TD(HT.Span("Heritability"),align="center", Class="fs13 b1 cbw c222",nowrap="yes"),HT.TD("%s" % heritability, nowrap="yes",align="center", Class="fs13 b1 cbw c222"))) - - # Lei Yan - # 2008/12/19 - - return st - -def plotNormalProbability(vals=None, RISet='', title=None, showstrains=0, specialStrains=[None], size=(750,500)): - - dataXZ = vals[:] - dataXZ.sort(webqtlUtil.cmpOrder) - dataLabel = [] - dataX = map(lambda X: X[1], dataXZ) - - showLabel = showstrains - if len(dataXZ) > 50: - showLabel = 0 - for item in dataXZ: - strainName = webqtlUtil.genShortStrainName(RISet=RISet, input_strainName=item[0]) - dataLabel.append(strainName) - - dataY=Plot.U(len(dataX)) - dataZ=map(Plot.inverseCumul,dataY) - c = pid.PILCanvas(size=(750,500)) - Plot.plotXY(c, dataZ, dataX, dataLabel = dataLabel, XLabel='Expected Z score', connectdot=0, YLabel='Trait value', title=title, specialCases=specialStrains, showLabel = showLabel) - - filename= webqtlUtil.genRandStr("nP_") - c.save(webqtlConfig.GENERATED_IMAGE_DIR+filename, format='gif') - - img=HT.Image('/image/'+filename+'.gif',border=0) - - return img - -def plotBoxPlot(vals): - - valsOnly = [] - dataXZ = vals[:] - for i in range(len(dataXZ)): - valsOnly.append(dataXZ[i][1]) - - plotHeight = 320 - plotWidth = 220 - xLeftOffset = 60 - xRightOffset = 40 - yTopOffset = 40 - yBottomOffset = 60 - - canvasHeight = plotHeight + yTopOffset + yBottomOffset - canvasWidth = plotWidth + xLeftOffset + xRightOffset - canvas = pid.PILCanvas(size=(canvasWidth,canvasHeight)) - XXX = [('', valsOnly[:])] - - Plot.plotBoxPlot(canvas, XXX, offset=(xLeftOffset, xRightOffset, yTopOffset, yBottomOffset), XLabel= "Trait") - filename= webqtlUtil.genRandStr("Box_") - canvas.save(webqtlConfig.GENERATED_IMAGE_DIR+filename, format='gif') - img=HT.Image('/image/'+filename+'.gif',border=0) - - plotLink = HT.Span("More about ", HT.Href(text="Box Plots", url="http://davidmlane.com/hyperstat/A37797.html", target="_blank", Class="fs13")) - - return img, plotLink - -def plotBarGraph(identification='', RISet='', vals=None, type="name"): - - this_identification = "unnamed trait" - if identification: - this_identification = identification - - if type=="rank": - dataXZ = vals[:] - dataXZ.sort(webqtlUtil.cmpOrder) - title='%s' % this_identification - else: - dataXZ = vals[:] - title='%s' % this_identification - - tvals = [] - tnames = [] - tvars = [] - for i in range(len(dataXZ)): - tvals.append(dataXZ[i][1]) - tnames.append(webqtlUtil.genShortStrainName(RISet=RISet, input_strainName=dataXZ[i][0])) - tvars.append(dataXZ[i][2]) - nnStrain = len(tnames) - - sLabel = 1 - - ###determine bar width and space width - if nnStrain < 20: - sw = 4 - elif nnStrain < 40: - sw = 3 - else: - sw = 2 - - ### 700 is the default plot width minus Xoffsets for 40 strains - defaultWidth = 650 - if nnStrain > 40: - defaultWidth += (nnStrain-40)*10 - defaultOffset = 100 - bw = int(0.5+(defaultWidth - (nnStrain-1.0)*sw)/nnStrain) - if bw < 10: - bw = 10 - - plotWidth = (nnStrain-1)*sw + nnStrain*bw + defaultOffset - plotHeight = 500 - #print [plotWidth, plotHeight, bw, sw, nnStrain] - c = pid.PILCanvas(size=(plotWidth,plotHeight)) - Plot.plotBarText(c, tvals, tnames, variance=tvars, YLabel='Value', title=title, sLabel = sLabel, barSpace = sw) - - filename= webqtlUtil.genRandStr("Bar_") - c.save(webqtlConfig.GENERATED_IMAGE_DIR+filename, format='gif') - img=HT.Image('/image/'+filename+'.gif',border=0) - - return img diff --git a/wqflask/basicStatistics/__init__.py b/wqflask/basicStatistics/__init__.py deleted file mode 100644 index e69de29b..00000000 --- a/wqflask/basicStatistics/__init__.py +++ /dev/null diff --git a/wqflask/basicStatistics/corestats.py b/wqflask/basicStatistics/corestats.py deleted file mode 100644 index eba84c52..00000000 --- a/wqflask/basicStatistics/corestats.py +++ /dev/null @@ -1,103 +0,0 @@ -#!/usr/bin/env python - -# corestats.py (COREy STATS) -# Copyright (c) 2006-2007, Corey Goldberg (corey@goldb.org) -# -# statistical calculation class -# for processing numeric sequences -# -# license: GNU LGPL -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. - - - -import sys - - -class Stats: - - def __init__(self, sequence): - # sequence of numbers we will process - # convert all items to floats for numerical processing - self.sequence = [float(item) for item in sequence] - - - def sum(self): - if len(self.sequence) < 1: - return None - else: - return sum(self.sequence) - - - def count(self): - return len(self.sequence) - - - def min(self): - if len(self.sequence) < 1: - return None - else: - return min(self.sequence) - - - def max(self): - if len(self.sequence) < 1: - return None - else: - return max(self.sequence) - - - def avg(self): - if len(self.sequence) < 1: - return None - else: - return sum(self.sequence) / len(self.sequence) - - - def median(self): - if len(self.sequence) < 1: - return None - else: - self.sequence.sort() - return self.sequence[len(self.sequence) // 2] - - - def stdev(self): - if len(self.sequence) < 1: - return None - else: - avg = self.avg() - sdsq = sum([(i - avg) ** 2 for i in self.sequence]) - stdev = (sdsq / (len(self.sequence) - 1)) ** .5 - return stdev - - - def percentile(self, percentile): - if len(self.sequence) < 1: - value = None - elif (percentile >= 100): - sys.stderr.write('ERROR: percentile must be < 100. you supplied: %s\n'% percentile) - value = None - else: - element_idx = int(len(self.sequence) * (percentile / 100.0)) - self.sequence.sort() - value = self.sequence[element_idx] - return value - - - - -# Sample script using this class: -# ------------------------------------------- -# #!/usr/bin/env python -# import corestats -# -# sequence = [1, 2.5, 7, 13.4, 8.0] -# stats = corestats.Stats(sequence) -# print stats.avg() -# print stats.percentile(90) -# ------------------------------------------- |