about summary refs log tree commit diff
path: root/wqflask/basicStatistics
diff options
context:
space:
mode:
authorzsloan2018-04-09 16:51:54 +0000
committerzsloan2018-04-09 16:51:54 +0000
commitc9b0ab18457929bd7ca458f7207e50fe14099d6a (patch)
tree5cbc39ef322f6246c90914eb0b8f74fa05ba630e /wqflask/basicStatistics
parentfb62420ddbbf0189c9b0fb6d227121836fc377d8 (diff)
downloadgenenetwork2-c9b0ab18457929bd7ca458f7207e50fe14099d6a.tar.gz
Removed the basicStatistics directory/contents because nothing there was being used except corestats, which I moved to utility
Removed box plot code from Plot.py since we no longer use it either
Diffstat (limited to 'wqflask/basicStatistics')
-rw-r--r--wqflask/basicStatistics/BasicStatisticsFunctions.py207
-rw-r--r--wqflask/basicStatistics/__init__.py0
-rw-r--r--wqflask/basicStatistics/corestats.py103
3 files changed, 0 insertions, 310 deletions
diff --git a/wqflask/basicStatistics/BasicStatisticsFunctions.py b/wqflask/basicStatistics/BasicStatisticsFunctions.py
deleted file mode 100644
index 1e5646a1..00000000
--- a/wqflask/basicStatistics/BasicStatisticsFunctions.py
+++ /dev/null
@@ -1,207 +0,0 @@
-from __future__ import print_function
-
-#import string
-from math import *
-#import piddle as pid
-#import os
-import traceback
-
-from pprint import pformat as pf
-
-from corestats import Stats
-
-import reaper
-from htmlgen import HTMLgen2 as HT
-
-#from utility import Plot
-from utility import webqtlUtil
-from base import webqtlConfig
-from db import webqtlDatabaseFunction
-
-def basicStatsTable(vals, trait_type=None, cellid=None, heritability=None):
-    print("basicStatsTable called - len of vals", len(vals))
-    st = {}  # This is the dictionary where we'll put everything for the template
-    valsOnly = []
-    dataXZ = vals[:]
-    for i in range(len(dataXZ)):
-        valsOnly.append(dataXZ[i][1])
-
-    (st['traitmean'],
-     st['traitmedian'],
-     st['traitvar'],
-     st['traitstdev'],
-     st['traitsem'],
-     st['N']) = reaper.anova(valsOnly) #ZS: Should convert this from reaper to R in the future
-
-    #tbl = HT.TableLite(cellpadding=20, cellspacing=0)
-    #dataXZ = vals[:]
-    dataXZ = sorted(vals, webqtlUtil.cmpOrder)
-
-    print("data for stats is:", pf(dataXZ))
-    for num, item in enumerate(dataXZ):
-        print(" %i - %s" % (num, item))
-    print("  length:", len(dataXZ))
-
-    st['min'] = dataXZ[0][1]
-    st['max'] = dataXZ[-1][1]
-
-    numbers = [x[1] for x in dataXZ]
-    stats = Stats(numbers)
-
-    at75 = stats.percentile(75)
-    at25 = stats.percentile(25)
-    print("should get a stack")
-    traceback.print_stack()
-    print("Interquartile:", at75 - at25)
-
-    #tbl.append(HT.TR(HT.TD("Statistic",align="left", Class="fs14 fwb ffl b1 cw cbrb", width = 180),
-    #                HT.TD("Value", align="right", Class="fs14 fwb ffl b1 cw cbrb", width = 60)))
-    #tbl.append(HT.TR(HT.TD("N of Samples",align="left", Class="fs13 b1 cbw c222"),
-    #                HT.TD(N,nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
-    #tbl.append(HT.TR(HT.TD("Mean",align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
-    #                HT.TD("%2.3f" % traitmean,nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
-    #tbl.append(HT.TR(HT.TD("Median",align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
-    #                HT.TD("%2.3f" % traitmedian,nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
-    ##tbl.append(HT.TR(HT.TD("Variance",align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
-    ##               HT.TD("%2.3f" % traitvar,nowrap="yes",align="left", Class="fs13 b1 cbw c222")))
-    #tbl.append(HT.TR(HT.TD("Standard Error (SE)",align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
-    #                HT.TD("%2.3f" % traitsem,nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
-    #tbl.append(HT.TR(HT.TD("Standard Deviation (SD)", align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
-    #                HT.TD("%2.3f" % traitstdev,nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
-    #tbl.append(HT.TR(HT.TD("Minimum", align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
-    #                HT.TD("%s" % dataXZ[0][1],nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
-    #tbl.append(HT.TR(HT.TD("Maximum", align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
-    #                HT.TD("%s" % dataXZ[-1][1],nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
-
-
-
-    if (trait_type != None and trait_type == 'ProbeSet'):
-        #tbl.append(HT.TR(HT.TD("Range (log2)",align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
-        #        HT.TD("%2.3f" % (dataXZ[-1][1]-dataXZ[0][1]),nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
-        #tbl.append(HT.TR(HT.TD(HT.Span("Range (fold)"),align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
-        #        HT.TD("%2.2f" % pow(2.0,(dataXZ[-1][1]-dataXZ[0][1])), nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
-        #tbl.append(HT.TR(HT.TD(HT.Span(HT.Href(url="/glossary.html#Interquartile", target="_blank", text="Interquartile Range", Class="non_bold")), align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
-        #        HT.TD("%2.2f" % pow(2.0,(dataXZ[int((N-1)*3.0/4.0)][1]-dataXZ[int((N-1)/4.0)][1])), nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
-        st['range_log2'] = dataXZ[-1][1]-dataXZ[0][1]
-        st['range_fold'] = pow(2.0, (dataXZ[-1][1]-dataXZ[0][1]))
-        st['interquartile'] = pow(2.0, (dataXZ[int((st['N']-1)*3.0/4.0)][1]-dataXZ[int((st['N']-1)/4.0)][1]))
-
-        #XZ, 04/01/2009: don't try to get H2 value for probe.
-        if not cellid:
-            if heritability:
-                # This field needs to still be put into the Jinja2 template
-                st['heritability'] = heritability
-                #tbl.append(HT.TR(HT.TD(HT.Span("Heritability"),align="center", Class="fs13 b1 cbw c222",nowrap="yes"),HT.TD("%s" % heritability, nowrap="yes",align="center", Class="fs13 b1 cbw c222")))
-
-        # Lei Yan
-        # 2008/12/19
-
-    return st
-
-def plotNormalProbability(vals=None, RISet='', title=None, showstrains=0, specialStrains=[None], size=(750,500)):
-
-    dataXZ = vals[:]
-    dataXZ.sort(webqtlUtil.cmpOrder)
-    dataLabel = []
-    dataX = map(lambda X: X[1], dataXZ)
-
-    showLabel = showstrains
-    if len(dataXZ) > 50:
-        showLabel = 0
-    for item in dataXZ:
-        strainName = webqtlUtil.genShortStrainName(RISet=RISet, input_strainName=item[0])
-        dataLabel.append(strainName)
-
-    dataY=Plot.U(len(dataX))
-    dataZ=map(Plot.inverseCumul,dataY)
-    c = pid.PILCanvas(size=(750,500))
-    Plot.plotXY(c, dataZ, dataX, dataLabel = dataLabel, XLabel='Expected Z score', connectdot=0, YLabel='Trait value', title=title, specialCases=specialStrains, showLabel = showLabel)
-
-    filename= webqtlUtil.genRandStr("nP_")
-    c.save(webqtlConfig.GENERATED_IMAGE_DIR+filename, format='gif')
-
-    img=HT.Image('/image/'+filename+'.gif',border=0)
-
-    return img
-
-def plotBoxPlot(vals):
-
-    valsOnly = []
-    dataXZ = vals[:]
-    for i in range(len(dataXZ)):
-        valsOnly.append(dataXZ[i][1])
-
-    plotHeight = 320
-    plotWidth = 220
-    xLeftOffset = 60
-    xRightOffset = 40
-    yTopOffset = 40
-    yBottomOffset = 60
-
-    canvasHeight = plotHeight + yTopOffset + yBottomOffset
-    canvasWidth = plotWidth + xLeftOffset + xRightOffset
-    canvas = pid.PILCanvas(size=(canvasWidth,canvasHeight))
-    XXX = [('', valsOnly[:])]
-
-    Plot.plotBoxPlot(canvas, XXX, offset=(xLeftOffset, xRightOffset, yTopOffset, yBottomOffset), XLabel= "Trait")
-    filename= webqtlUtil.genRandStr("Box_")
-    canvas.save(webqtlConfig.GENERATED_IMAGE_DIR+filename, format='gif')
-    img=HT.Image('/image/'+filename+'.gif',border=0)
-
-    plotLink = HT.Span("More about ", HT.Href(text="Box Plots", url="http://davidmlane.com/hyperstat/A37797.html", target="_blank", Class="fs13"))
-
-    return img, plotLink
-
-def plotBarGraph(identification='', RISet='', vals=None, type="name"):
-
-    this_identification = "unnamed trait"
-    if identification:
-        this_identification = identification
-
-    if type=="rank":
-        dataXZ = vals[:]
-        dataXZ.sort(webqtlUtil.cmpOrder)
-        title='%s' % this_identification
-    else:
-        dataXZ = vals[:]
-        title='%s' % this_identification
-
-    tvals = []
-    tnames = []
-    tvars = []
-    for i in range(len(dataXZ)):
-        tvals.append(dataXZ[i][1])
-        tnames.append(webqtlUtil.genShortStrainName(RISet=RISet, input_strainName=dataXZ[i][0]))
-        tvars.append(dataXZ[i][2])
-    nnStrain = len(tnames)
-
-    sLabel = 1
-
-    ###determine bar width and space width
-    if nnStrain < 20:
-        sw = 4
-    elif nnStrain < 40:
-        sw = 3
-    else:
-        sw = 2
-
-    ### 700 is the default plot width minus Xoffsets for 40 strains
-    defaultWidth = 650
-    if nnStrain > 40:
-        defaultWidth += (nnStrain-40)*10
-    defaultOffset = 100
-    bw = int(0.5+(defaultWidth - (nnStrain-1.0)*sw)/nnStrain)
-    if bw < 10:
-        bw = 10
-
-    plotWidth = (nnStrain-1)*sw + nnStrain*bw + defaultOffset
-    plotHeight = 500
-    #print [plotWidth, plotHeight, bw, sw, nnStrain]
-    c = pid.PILCanvas(size=(plotWidth,plotHeight))
-    Plot.plotBarText(c, tvals, tnames, variance=tvars, YLabel='Value', title=title, sLabel = sLabel, barSpace = sw)
-
-    filename= webqtlUtil.genRandStr("Bar_")
-    c.save(webqtlConfig.GENERATED_IMAGE_DIR+filename, format='gif')
-    img=HT.Image('/image/'+filename+'.gif',border=0)
-
-    return img
diff --git a/wqflask/basicStatistics/__init__.py b/wqflask/basicStatistics/__init__.py
deleted file mode 100644
index e69de29b..00000000
--- a/wqflask/basicStatistics/__init__.py
+++ /dev/null
diff --git a/wqflask/basicStatistics/corestats.py b/wqflask/basicStatistics/corestats.py
deleted file mode 100644
index eba84c52..00000000
--- a/wqflask/basicStatistics/corestats.py
+++ /dev/null
@@ -1,103 +0,0 @@
-#!/usr/bin/env python
-
-#  corestats.py (COREy STATS)
-#  Copyright (c) 2006-2007, Corey Goldberg (corey@goldb.org)
-#
-#    statistical calculation class
-#    for processing numeric sequences
-#
-#  license: GNU LGPL
-#
-#  This library is free software; you can redistribute it and/or
-#  modify it under the terms of the GNU Lesser General Public
-#  License as published by the Free Software Foundation; either
-#  version 2.1 of the License, or (at your option) any later version.
-
-
-
-import sys
-
-
-class Stats:
-
-    def __init__(self, sequence):
-        # sequence of numbers we will process
-        # convert all items to floats for numerical processing
-        self.sequence = [float(item) for item in sequence]
-
-
-    def sum(self):
-        if len(self.sequence) < 1:
-            return None
-        else:
-            return sum(self.sequence)
-
-
-    def count(self):
-        return len(self.sequence)
-
-
-    def min(self):
-        if len(self.sequence) < 1:
-            return None
-        else:
-            return min(self.sequence)
-
-
-    def max(self):
-        if len(self.sequence) < 1:
-            return None
-        else:
-            return max(self.sequence)
-
-
-    def avg(self):
-        if len(self.sequence) < 1:
-            return None
-        else:
-            return sum(self.sequence) / len(self.sequence)
-
-
-    def median(self):
-        if len(self.sequence) < 1:
-            return None
-        else:
-            self.sequence.sort()
-            return self.sequence[len(self.sequence) // 2]
-
-
-    def stdev(self):
-        if len(self.sequence) < 1:
-            return None
-        else:
-            avg = self.avg()
-            sdsq = sum([(i - avg) ** 2 for i in self.sequence])
-            stdev = (sdsq / (len(self.sequence) - 1)) ** .5
-            return stdev
-
-
-    def percentile(self, percentile):
-        if len(self.sequence) < 1:
-            value = None
-        elif (percentile >= 100):
-            sys.stderr.write('ERROR: percentile must be < 100.  you supplied: %s\n'% percentile)
-            value = None
-        else:
-            element_idx = int(len(self.sequence) * (percentile / 100.0))
-            self.sequence.sort()
-            value = self.sequence[element_idx]
-        return value
-
-
-
-
-# Sample script using this class:
-# -------------------------------------------
-#    #!/usr/bin/env python
-#    import corestats
-#
-#    sequence = [1, 2.5, 7, 13.4, 8.0]
-#    stats = corestats.Stats(sequence)
-#    print stats.avg()
-#    print stats.percentile(90)
-# -------------------------------------------