diff options
Diffstat (limited to 'wqflask')
-rw-r--r-- | wqflask/basicStatistics/BasicStatisticsFunctions.py | 207 | ||||
-rw-r--r-- | wqflask/basicStatistics/__init__.py | 0 | ||||
-rw-r--r-- | wqflask/utility/Plot.py | 134 | ||||
-rw-r--r-- | wqflask/utility/corestats.py (renamed from wqflask/basicStatistics/corestats.py) | 8 | ||||
-rw-r--r-- | wqflask/wqflask/show_trait/show_trait.py | 1 |
5 files changed, 3 insertions, 347 deletions
diff --git a/wqflask/basicStatistics/BasicStatisticsFunctions.py b/wqflask/basicStatistics/BasicStatisticsFunctions.py deleted file mode 100644 index 1e5646a1..00000000 --- a/wqflask/basicStatistics/BasicStatisticsFunctions.py +++ /dev/null @@ -1,207 +0,0 @@ -from __future__ import print_function - -#import string -from math import * -#import piddle as pid -#import os -import traceback - -from pprint import pformat as pf - -from corestats import Stats - -import reaper -from htmlgen import HTMLgen2 as HT - -#from utility import Plot -from utility import webqtlUtil -from base import webqtlConfig -from db import webqtlDatabaseFunction - -def basicStatsTable(vals, trait_type=None, cellid=None, heritability=None): - print("basicStatsTable called - len of vals", len(vals)) - st = {} # This is the dictionary where we'll put everything for the template - valsOnly = [] - dataXZ = vals[:] - for i in range(len(dataXZ)): - valsOnly.append(dataXZ[i][1]) - - (st['traitmean'], - st['traitmedian'], - st['traitvar'], - st['traitstdev'], - st['traitsem'], - st['N']) = reaper.anova(valsOnly) #ZS: Should convert this from reaper to R in the future - - #tbl = HT.TableLite(cellpadding=20, cellspacing=0) - #dataXZ = vals[:] - dataXZ = sorted(vals, webqtlUtil.cmpOrder) - - print("data for stats is:", pf(dataXZ)) - for num, item in enumerate(dataXZ): - print(" %i - %s" % (num, item)) - print(" length:", len(dataXZ)) - - st['min'] = dataXZ[0][1] - st['max'] = dataXZ[-1][1] - - numbers = [x[1] for x in dataXZ] - stats = Stats(numbers) - - at75 = stats.percentile(75) - at25 = stats.percentile(25) - print("should get a stack") - traceback.print_stack() - print("Interquartile:", at75 - at25) - - #tbl.append(HT.TR(HT.TD("Statistic",align="left", Class="fs14 fwb ffl b1 cw cbrb", width = 180), - # HT.TD("Value", align="right", Class="fs14 fwb ffl b1 cw cbrb", width = 60))) - #tbl.append(HT.TR(HT.TD("N of Samples",align="left", Class="fs13 b1 cbw c222"), - # HT.TD(N,nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - #tbl.append(HT.TR(HT.TD("Mean",align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - # HT.TD("%2.3f" % traitmean,nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - #tbl.append(HT.TR(HT.TD("Median",align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - # HT.TD("%2.3f" % traitmedian,nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - ##tbl.append(HT.TR(HT.TD("Variance",align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - ## HT.TD("%2.3f" % traitvar,nowrap="yes",align="left", Class="fs13 b1 cbw c222"))) - #tbl.append(HT.TR(HT.TD("Standard Error (SE)",align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - # HT.TD("%2.3f" % traitsem,nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - #tbl.append(HT.TR(HT.TD("Standard Deviation (SD)", align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - # HT.TD("%2.3f" % traitstdev,nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - #tbl.append(HT.TR(HT.TD("Minimum", align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - # HT.TD("%s" % dataXZ[0][1],nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - #tbl.append(HT.TR(HT.TD("Maximum", align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - # HT.TD("%s" % dataXZ[-1][1],nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - - - - if (trait_type != None and trait_type == 'ProbeSet'): - #tbl.append(HT.TR(HT.TD("Range (log2)",align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - # HT.TD("%2.3f" % (dataXZ[-1][1]-dataXZ[0][1]),nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - #tbl.append(HT.TR(HT.TD(HT.Span("Range (fold)"),align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - # HT.TD("%2.2f" % pow(2.0,(dataXZ[-1][1]-dataXZ[0][1])), nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - #tbl.append(HT.TR(HT.TD(HT.Span(HT.Href(url="/glossary.html#Interquartile", target="_blank", text="Interquartile Range", Class="non_bold")), align="left", Class="fs13 b1 cbw c222",nowrap="yes"), - # HT.TD("%2.2f" % pow(2.0,(dataXZ[int((N-1)*3.0/4.0)][1]-dataXZ[int((N-1)/4.0)][1])), nowrap="yes", Class="fs13 b1 cbw c222"), align="right")) - st['range_log2'] = dataXZ[-1][1]-dataXZ[0][1] - st['range_fold'] = pow(2.0, (dataXZ[-1][1]-dataXZ[0][1])) - st['interquartile'] = pow(2.0, (dataXZ[int((st['N']-1)*3.0/4.0)][1]-dataXZ[int((st['N']-1)/4.0)][1])) - - #XZ, 04/01/2009: don't try to get H2 value for probe. - if not cellid: - if heritability: - # This field needs to still be put into the Jinja2 template - st['heritability'] = heritability - #tbl.append(HT.TR(HT.TD(HT.Span("Heritability"),align="center", Class="fs13 b1 cbw c222",nowrap="yes"),HT.TD("%s" % heritability, nowrap="yes",align="center", Class="fs13 b1 cbw c222"))) - - # Lei Yan - # 2008/12/19 - - return st - -def plotNormalProbability(vals=None, RISet='', title=None, showstrains=0, specialStrains=[None], size=(750,500)): - - dataXZ = vals[:] - dataXZ.sort(webqtlUtil.cmpOrder) - dataLabel = [] - dataX = map(lambda X: X[1], dataXZ) - - showLabel = showstrains - if len(dataXZ) > 50: - showLabel = 0 - for item in dataXZ: - strainName = webqtlUtil.genShortStrainName(RISet=RISet, input_strainName=item[0]) - dataLabel.append(strainName) - - dataY=Plot.U(len(dataX)) - dataZ=map(Plot.inverseCumul,dataY) - c = pid.PILCanvas(size=(750,500)) - Plot.plotXY(c, dataZ, dataX, dataLabel = dataLabel, XLabel='Expected Z score', connectdot=0, YLabel='Trait value', title=title, specialCases=specialStrains, showLabel = showLabel) - - filename= webqtlUtil.genRandStr("nP_") - c.save(webqtlConfig.GENERATED_IMAGE_DIR+filename, format='gif') - - img=HT.Image('/image/'+filename+'.gif',border=0) - - return img - -def plotBoxPlot(vals): - - valsOnly = [] - dataXZ = vals[:] - for i in range(len(dataXZ)): - valsOnly.append(dataXZ[i][1]) - - plotHeight = 320 - plotWidth = 220 - xLeftOffset = 60 - xRightOffset = 40 - yTopOffset = 40 - yBottomOffset = 60 - - canvasHeight = plotHeight + yTopOffset + yBottomOffset - canvasWidth = plotWidth + xLeftOffset + xRightOffset - canvas = pid.PILCanvas(size=(canvasWidth,canvasHeight)) - XXX = [('', valsOnly[:])] - - Plot.plotBoxPlot(canvas, XXX, offset=(xLeftOffset, xRightOffset, yTopOffset, yBottomOffset), XLabel= "Trait") - filename= webqtlUtil.genRandStr("Box_") - canvas.save(webqtlConfig.GENERATED_IMAGE_DIR+filename, format='gif') - img=HT.Image('/image/'+filename+'.gif',border=0) - - plotLink = HT.Span("More about ", HT.Href(text="Box Plots", url="http://davidmlane.com/hyperstat/A37797.html", target="_blank", Class="fs13")) - - return img, plotLink - -def plotBarGraph(identification='', RISet='', vals=None, type="name"): - - this_identification = "unnamed trait" - if identification: - this_identification = identification - - if type=="rank": - dataXZ = vals[:] - dataXZ.sort(webqtlUtil.cmpOrder) - title='%s' % this_identification - else: - dataXZ = vals[:] - title='%s' % this_identification - - tvals = [] - tnames = [] - tvars = [] - for i in range(len(dataXZ)): - tvals.append(dataXZ[i][1]) - tnames.append(webqtlUtil.genShortStrainName(RISet=RISet, input_strainName=dataXZ[i][0])) - tvars.append(dataXZ[i][2]) - nnStrain = len(tnames) - - sLabel = 1 - - ###determine bar width and space width - if nnStrain < 20: - sw = 4 - elif nnStrain < 40: - sw = 3 - else: - sw = 2 - - ### 700 is the default plot width minus Xoffsets for 40 strains - defaultWidth = 650 - if nnStrain > 40: - defaultWidth += (nnStrain-40)*10 - defaultOffset = 100 - bw = int(0.5+(defaultWidth - (nnStrain-1.0)*sw)/nnStrain) - if bw < 10: - bw = 10 - - plotWidth = (nnStrain-1)*sw + nnStrain*bw + defaultOffset - plotHeight = 500 - #print [plotWidth, plotHeight, bw, sw, nnStrain] - c = pid.PILCanvas(size=(plotWidth,plotHeight)) - Plot.plotBarText(c, tvals, tnames, variance=tvars, YLabel='Value', title=title, sLabel = sLabel, barSpace = sw) - - filename= webqtlUtil.genRandStr("Bar_") - c.save(webqtlConfig.GENERATED_IMAGE_DIR+filename, format='gif') - img=HT.Image('/image/'+filename+'.gif',border=0) - - return img diff --git a/wqflask/basicStatistics/__init__.py b/wqflask/basicStatistics/__init__.py deleted file mode 100644 index e69de29b..00000000 --- a/wqflask/basicStatistics/__init__.py +++ /dev/null diff --git a/wqflask/utility/Plot.py b/wqflask/utility/Plot.py index d60e2bb2..3a8b8dd5 100644 --- a/wqflask/utility/Plot.py +++ b/wqflask/utility/Plot.py @@ -36,11 +36,10 @@ from numarray import linear_algebra as la from numarray import ones, array, dot, swapaxes import reaper -# sys.path.append("..") Never in a running webserver -from basicStatistics import corestats import svg import webqtlUtil +import corestats from base import webqtlConfig import utility.logger @@ -315,137 +314,6 @@ def find_outliers(vals): logger.debug(pf(locals())) return upper_bound, lower_bound - -def plotBoxPlot(canvas, data, offset= (40, 40, 40, 40), XLabel="Category", YLabel="Value"): - xLeftOffset, xRightOffset, yTopOffset, yBottomOffset = offset - plotWidth = canvas.size[0] - xLeftOffset - xRightOffset - plotHeight = canvas.size[1] - yTopOffset - yBottomOffset - iValues = [] - for item in data: - for item2 in item[1]: - try: - iValues.append(item2[1]) - except: - iValues.append(item2) - - #draw frame - max_Y = max(iValues) - min_Y = min(iValues) - scaleY = detScale(min_Y, max_Y) - Yll = scaleY[0] - Yur = scaleY[1] - nStep = scaleY[2] - stepY = (Yur - Yll)/nStep - stepYPixel = plotHeight/(nStep) - canvas.drawRect(plotWidth+xLeftOffset, plotHeight + yTopOffset, xLeftOffset, yTopOffset) - - ##draw Y Scale - YYY = Yll - YCoord = plotHeight + yTopOffset - scaleFont=pid.Font(ttf="cour",size=11,bold=1) - for i in range(nStep+1): - strY = cformat(d=YYY, rank=0) - YCoord = max(YCoord, yTopOffset) - canvas.drawLine(xLeftOffset,YCoord,xLeftOffset-5,YCoord) - canvas.drawString(strY, xLeftOffset -30,YCoord +5,font=scaleFont) - YYY += stepY - YCoord -= stepYPixel - - ##draw X Scale - stepX = plotWidth/len(data) - XCoord = xLeftOffset + 0.5*stepX - YCoord = plotHeight + yTopOffset - scaleFont = pid.Font(ttf="tahoma",size=12,bold=0) - labelFont = pid.Font(ttf="tahoma",size=13,bold=0) - for item in data: - itemname, itemvalue = item - canvas.drawLine(XCoord, YCoord,XCoord, YCoord+5, color=pid.black) - canvas.drawString(itemname, XCoord - canvas.stringWidth(itemname,font=labelFont)/2.0,\ - YCoord +20,font=labelFont) - - nValue = len(itemvalue) - catValue = [] - for item2 in itemvalue: - try: - tstrain, tvalue = item2 - except: - tvalue = item2 - if nValue <= 4: - canvas.drawCross(XCoord, plotHeight + yTopOffset - (tvalue-Yll)*plotHeight/(Yur - Yll), color=pid.red,size=5) - else: - catValue.append(tvalue) - if catValue != []: - catMean = gmean(catValue) - catMedian = gmedian(catValue) - lowHinge = gpercentile(catValue, 25) - upHinge = gpercentile(catValue, 75) - Hstep = 1.5*(upHinge - lowHinge) - - outlier = [] - extrem = [] - - upperAdj = None - for item in catValue: - if item >= upHinge + 2*Hstep: - extrem.append(item) - elif item >= upHinge + Hstep: - outlier.append(item) - elif item > upHinge and item < upHinge + Hstep: - if upperAdj == None or item > upperAdj: - upperAdj = item - else: - pass - lowerAdj = None - for item in catValue: - if item <= lowHinge - 2*Hstep: - extrem.append(item) - elif item <= lowHinge - Hstep: - outlier.append(item) - if item < lowHinge and item > lowHinge - Hstep: - if lowerAdj == None or item < lowerAdj: - lowerAdj = item - else: - pass - canvas.drawRect(XCoord-20, plotHeight + yTopOffset - (lowHinge-Yll)*plotHeight/(Yur - Yll), \ - XCoord+20, plotHeight + yTopOffset - (upHinge-Yll)*plotHeight/(Yur - Yll)) - canvas.drawLine(XCoord-20, plotHeight + yTopOffset - (catMedian-Yll)*plotHeight/(Yur - Yll), \ - XCoord+20, plotHeight + yTopOffset - (catMedian-Yll)*plotHeight/(Yur - Yll)) - if upperAdj != None: - canvas.drawLine(XCoord, plotHeight + yTopOffset - (upHinge-Yll)*plotHeight/(Yur - Yll), \ - XCoord, plotHeight + yTopOffset - (upperAdj-Yll)*plotHeight/(Yur - Yll)) - canvas.drawLine(XCoord-20, plotHeight + yTopOffset - (upperAdj-Yll)*plotHeight/(Yur - Yll), \ - XCoord+20, plotHeight + yTopOffset - (upperAdj-Yll)*plotHeight/(Yur - Yll)) - if lowerAdj != None: - canvas.drawLine(XCoord, plotHeight + yTopOffset - (lowHinge-Yll)*plotHeight/(Yur - Yll), \ - XCoord, plotHeight + yTopOffset - (lowerAdj-Yll)*plotHeight/(Yur - Yll)) - canvas.drawLine(XCoord-20, plotHeight + yTopOffset - (lowerAdj-Yll)*plotHeight/(Yur - Yll), \ - XCoord+20, plotHeight + yTopOffset - (lowerAdj-Yll)*plotHeight/(Yur - Yll)) - - outlierFont = pid.Font(ttf="cour",size=12,bold=0) - if outlier != []: - for item in outlier: - yc = plotHeight + yTopOffset - (item-Yll)*plotHeight/(Yur - Yll) - #canvas.drawEllipse(XCoord-3, yc-3, XCoord+3, yc+3) - canvas.drawString('o', XCoord-3, yc+5, font=outlierFont, color=pid.orange) - if extrem != []: - for item in extrem: - yc = plotHeight + yTopOffset - (item-Yll)*plotHeight/(Yur - Yll) - #canvas.drawEllipse(XCoord-3, yc-3, XCoord+3, yc+3) - canvas.drawString('*', XCoord-3, yc+6, font=outlierFont, color=pid.red) - - canvas.drawCross(XCoord, plotHeight + yTopOffset - (catMean-Yll)*plotHeight/(Yur - Yll), \ - color=pid.blue,size=3) - #print(catMean, catMedian, cat25per, cat75per) - pass - - XCoord += stepX - - labelFont=pid.Font(ttf="verdana",size=18,bold=0) - canvas.drawString(XLabel, xLeftOffset + (plotWidth -canvas.stringWidth(XLabel,font=labelFont))/2.0, \ - YCoord +40, font=labelFont) - canvas.drawString(YLabel,xLeftOffset-40, YCoord-(plotHeight -canvas.stringWidth(YLabel,font=labelFont))/2.0,\ - font=labelFont, angle =90) - def plotSecurity(canvas, text="12345"): if not text: return diff --git a/wqflask/basicStatistics/corestats.py b/wqflask/utility/corestats.py index eba84c52..c48183ed 100644 --- a/wqflask/basicStatistics/corestats.py +++ b/wqflask/utility/corestats.py @@ -13,11 +13,9 @@ # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. - - import sys - +#ZS: Should switch to using some third party library for this; maybe scipy has an equivalent class Stats: def __init__(self, sequence): @@ -89,8 +87,6 @@ class Stats: return value - - # Sample script using this class: # ------------------------------------------- # #!/usr/bin/env python @@ -100,4 +96,4 @@ class Stats: # stats = corestats.Stats(sequence) # print stats.avg() # print stats.percentile(90) -# ------------------------------------------- +# -------------------------------------------
\ No newline at end of file diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index 6de5cd5f..1f000564 100644 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -24,7 +24,6 @@ from utility import webqtlUtil, Plot, Bunch, helper_functions from base.trait import GeneralTrait from base import data_set from db import webqtlDatabaseFunction -from basicStatistics import BasicStatisticsFunctions from pprint import pformat as pf |