about summary refs log tree commit diff
path: root/wqflask/basicStatistics/BasicStatisticsFunctions.py
diff options
context:
space:
mode:
authorZachary Sloan2013-01-11 23:59:41 +0000
committerZachary Sloan2013-01-11 23:59:41 +0000
commit2b2970d167c5b555e0e0ad0b34b72f817c1fac91 (patch)
tree36fa8c708138fff03593e0f50cc933bcb62b5592 /wqflask/basicStatistics/BasicStatisticsFunctions.py
parent1db9237a05fd27c80dc963db9916072594156198 (diff)
parentd39b691994a395c45fa242de6d64d12a5470af10 (diff)
downloadgenenetwork2-2b2970d167c5b555e0e0ad0b34b72f817c1fac91.tar.gz
Merge branch 'flask' of http://github.com/zsloan/genenetwork
Diffstat (limited to 'wqflask/basicStatistics/BasicStatisticsFunctions.py')
-rwxr-xr-xwqflask/basicStatistics/BasicStatisticsFunctions.py207
1 files changed, 207 insertions, 0 deletions
diff --git a/wqflask/basicStatistics/BasicStatisticsFunctions.py b/wqflask/basicStatistics/BasicStatisticsFunctions.py
new file mode 100755
index 00000000..74784853
--- /dev/null
+++ b/wqflask/basicStatistics/BasicStatisticsFunctions.py
@@ -0,0 +1,207 @@
+from __future__ import print_function
+
+#import string
+from math import *
+#import piddle as pid
+#import os
+import traceback
+
+from pprint import pformat as pf
+
+from corestats import Stats
+
+import reaper
+from htmlgen import HTMLgen2 as HT
+
+#from utility import Plot
+from utility import webqtlUtil
+from base import webqtlConfig
+from dbFunction import webqtlDatabaseFunction
+
+def basicStatsTable(vals, trait_type=None, cellid=None, heritability=None):
+    print("basicStatsTable called - len of vals", len(vals))
+    st = {}  # This is the dictionary where we'll put everything for the template
+    valsOnly = []
+    dataXZ = vals[:]
+    for i in range(len(dataXZ)):
+        valsOnly.append(dataXZ[i][1])
+
+    (st['traitmean'],
+     st['traitmedian'],
+     st['traitvar'],
+     st['traitstdev'],
+     st['traitsem'],
+     st['N']) = reaper.anova(valsOnly) #ZS: Should convert this from reaper to R in the future
+
+    #tbl = HT.TableLite(cellpadding=20, cellspacing=0)
+    #dataXZ = vals[:]
+    dataXZ = sorted(vals, webqtlUtil.cmpOrder)
+
+    print("data for stats is:", pf(dataXZ))
+    for num, item in enumerate(dataXZ):
+        print(" %i - %s" % (num, item))
+    print("  length:", len(dataXZ))
+
+    st['min'] = dataXZ[0][1]
+    st['max'] = dataXZ[-1][1]
+
+    numbers = [x[1] for x in dataXZ]
+    stats = Stats(numbers)
+
+    at75 = stats.percentile(75)
+    at25 = stats.percentile(25)
+    print("should get a stack")
+    traceback.print_stack()
+    print("Interquartile:", at75 - at25)
+
+    #tbl.append(HT.TR(HT.TD("Statistic",align="left", Class="fs14 fwb ffl b1 cw cbrb", width = 180),
+    #                HT.TD("Value", align="right", Class="fs14 fwb ffl b1 cw cbrb", width = 60)))
+    #tbl.append(HT.TR(HT.TD("N of Samples",align="left", Class="fs13 b1 cbw c222"),
+    #                HT.TD(N,nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
+    #tbl.append(HT.TR(HT.TD("Mean",align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
+    #                HT.TD("%2.3f" % traitmean,nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
+    #tbl.append(HT.TR(HT.TD("Median",align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
+    #                HT.TD("%2.3f" % traitmedian,nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
+    ##tbl.append(HT.TR(HT.TD("Variance",align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
+    ##               HT.TD("%2.3f" % traitvar,nowrap="yes",align="left", Class="fs13 b1 cbw c222")))
+    #tbl.append(HT.TR(HT.TD("Standard Error (SE)",align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
+    #                HT.TD("%2.3f" % traitsem,nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
+    #tbl.append(HT.TR(HT.TD("Standard Deviation (SD)", align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
+    #                HT.TD("%2.3f" % traitstdev,nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
+    #tbl.append(HT.TR(HT.TD("Minimum", align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
+    #                HT.TD("%s" % dataXZ[0][1],nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
+    #tbl.append(HT.TR(HT.TD("Maximum", align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
+    #                HT.TD("%s" % dataXZ[-1][1],nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
+
+
+
+    if (trait_type != None and trait_type == 'ProbeSet'):
+        #tbl.append(HT.TR(HT.TD("Range (log2)",align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
+        #        HT.TD("%2.3f" % (dataXZ[-1][1]-dataXZ[0][1]),nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
+        #tbl.append(HT.TR(HT.TD(HT.Span("Range (fold)"),align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
+        #        HT.TD("%2.2f" % pow(2.0,(dataXZ[-1][1]-dataXZ[0][1])), nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
+        #tbl.append(HT.TR(HT.TD(HT.Span(HT.Href(url="/glossary.html#Interquartile", target="_blank", text="Interquartile Range", Class="non_bold")), align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
+        #        HT.TD("%2.2f" % pow(2.0,(dataXZ[int((N-1)*3.0/4.0)][1]-dataXZ[int((N-1)/4.0)][1])), nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
+        st['range_log2'] = dataXZ[-1][1]-dataXZ[0][1]
+        st['range_fold'] = pow(2.0, (dataXZ[-1][1]-dataXZ[0][1]))
+        st['interquartile'] = pow(2.0, (dataXZ[int((st['N']-1)*3.0/4.0)][1]-dataXZ[int((st['N']-1)/4.0)][1]))
+
+        #XZ, 04/01/2009: don't try to get H2 value for probe.
+        if not cellid:
+            if heritability:
+                # This field needs to still be put into the Jinja2 template
+                st['heritability'] = heritability
+                #tbl.append(HT.TR(HT.TD(HT.Span("Heritability"),align="center", Class="fs13 b1 cbw c222",nowrap="yes"),HT.TD("%s" % heritability, nowrap="yes",align="center", Class="fs13 b1 cbw c222")))
+
+        # Lei Yan
+        # 2008/12/19
+
+    return st
+
+def plotNormalProbability(vals=None, RISet='', title=None, showstrains=0, specialStrains=[None], size=(750,500)):
+
+    dataXZ = vals[:]
+    dataXZ.sort(webqtlUtil.cmpOrder)
+    dataLabel = []
+    dataX = map(lambda X: X[1], dataXZ)
+
+    showLabel = showstrains
+    if len(dataXZ) > 50:
+        showLabel = 0
+    for item in dataXZ:
+        strainName = webqtlUtil.genShortStrainName(RISet=RISet, input_strainName=item[0])
+        dataLabel.append(strainName)
+
+    dataY=Plot.U(len(dataX))
+    dataZ=map(Plot.inverseCumul,dataY)
+    c = pid.PILCanvas(size=(750,500))
+    Plot.plotXY(c, dataZ, dataX, dataLabel = dataLabel, XLabel='Expected Z score', connectdot=0, YLabel='Trait value', title=title, specialCases=specialStrains, showLabel = showLabel)
+
+    filename= webqtlUtil.genRandStr("nP_")
+    c.save(webqtlConfig.IMGDIR+filename, format='gif')
+
+    img=HT.Image('/image/'+filename+'.gif',border=0)
+
+    return img
+
+def plotBoxPlot(vals):
+
+    valsOnly = []
+    dataXZ = vals[:]
+    for i in range(len(dataXZ)):
+        valsOnly.append(dataXZ[i][1])
+
+    plotHeight = 320
+    plotWidth = 220
+    xLeftOffset = 60
+    xRightOffset = 40
+    yTopOffset = 40
+    yBottomOffset = 60
+
+    canvasHeight = plotHeight + yTopOffset + yBottomOffset
+    canvasWidth = plotWidth + xLeftOffset + xRightOffset
+    canvas = pid.PILCanvas(size=(canvasWidth,canvasHeight))
+    XXX = [('', valsOnly[:])]
+
+    Plot.plotBoxPlot(canvas, XXX, offset=(xLeftOffset, xRightOffset, yTopOffset, yBottomOffset), XLabel= "Trait")
+    filename= webqtlUtil.genRandStr("Box_")
+    canvas.save(webqtlConfig.IMGDIR+filename, format='gif')
+    img=HT.Image('/image/'+filename+'.gif',border=0)
+
+    plotLink = HT.Span("More about ", HT.Href(text="Box Plots", url="http://davidmlane.com/hyperstat/A37797.html", target="_blank", Class="fs13"))
+
+    return img, plotLink
+
+def plotBarGraph(identification='', RISet='', vals=None, type="name"):
+
+    this_identification = "unnamed trait"
+    if identification:
+        this_identification = identification
+
+    if type=="rank":
+        dataXZ = vals[:]
+        dataXZ.sort(webqtlUtil.cmpOrder)
+        title='%s' % this_identification
+    else:
+        dataXZ = vals[:]
+        title='%s' % this_identification
+
+    tvals = []
+    tnames = []
+    tvars = []
+    for i in range(len(dataXZ)):
+        tvals.append(dataXZ[i][1])
+        tnames.append(webqtlUtil.genShortStrainName(RISet=RISet, input_strainName=dataXZ[i][0]))
+        tvars.append(dataXZ[i][2])
+    nnStrain = len(tnames)
+
+    sLabel = 1
+
+    ###determine bar width and space width
+    if nnStrain < 20:
+        sw = 4
+    elif nnStrain < 40:
+        sw = 3
+    else:
+        sw = 2
+
+    ### 700 is the default plot width minus Xoffsets for 40 strains
+    defaultWidth = 650
+    if nnStrain > 40:
+        defaultWidth += (nnStrain-40)*10
+    defaultOffset = 100
+    bw = int(0.5+(defaultWidth - (nnStrain-1.0)*sw)/nnStrain)
+    if bw < 10:
+        bw = 10
+
+    plotWidth = (nnStrain-1)*sw + nnStrain*bw + defaultOffset
+    plotHeight = 500
+    #print [plotWidth, plotHeight, bw, sw, nnStrain]
+    c = pid.PILCanvas(size=(plotWidth,plotHeight))
+    Plot.plotBarText(c, tvals, tnames, variance=tvars, YLabel='Value', title=title, sLabel = sLabel, barSpace = sw)
+
+    filename= webqtlUtil.genRandStr("Bar_")
+    c.save(webqtlConfig.IMGDIR+filename, format='gif')
+    img=HT.Image('/image/'+filename+'.gif',border=0)
+
+    return img