aboutsummaryrefslogtreecommitdiff
path: root/wqflask/basicStatistics
diff options
context:
space:
mode:
authorzsloan2018-08-14 20:16:32 +0000
committerzsloan2018-08-14 20:16:32 +0000
commit838362c116b02c090dadeb76cda27e9902a6626a (patch)
treea6be104cc73e3bc9e271f9b5ca854dd32f3b810d /wqflask/basicStatistics
parent0bead53661ea701ffd9f9d565e4d2ecbbed81a8e (diff)
parent85defabb17ecdef1c7b8e92fa2e06b44d1e9ca49 (diff)
downloadgenenetwork2-838362c116b02c090dadeb76cda27e9902a6626a.tar.gz
Merge branch 'testing' of https://github.com/genenetwork/genenetwork2 into production
Diffstat (limited to 'wqflask/basicStatistics')
-rw-r--r--wqflask/basicStatistics/BasicStatisticsFunctions.py207
-rw-r--r--wqflask/basicStatistics/__init__.py0
-rw-r--r--wqflask/basicStatistics/corestats.py103
3 files changed, 0 insertions, 310 deletions
diff --git a/wqflask/basicStatistics/BasicStatisticsFunctions.py b/wqflask/basicStatistics/BasicStatisticsFunctions.py
deleted file mode 100644
index 1e5646a1..00000000
--- a/wqflask/basicStatistics/BasicStatisticsFunctions.py
+++ /dev/null
@@ -1,207 +0,0 @@
-from __future__ import print_function
-
-#import string
-from math import *
-#import piddle as pid
-#import os
-import traceback
-
-from pprint import pformat as pf
-
-from corestats import Stats
-
-import reaper
-from htmlgen import HTMLgen2 as HT
-
-#from utility import Plot
-from utility import webqtlUtil
-from base import webqtlConfig
-from db import webqtlDatabaseFunction
-
-def basicStatsTable(vals, trait_type=None, cellid=None, heritability=None):
- print("basicStatsTable called - len of vals", len(vals))
- st = {} # This is the dictionary where we'll put everything for the template
- valsOnly = []
- dataXZ = vals[:]
- for i in range(len(dataXZ)):
- valsOnly.append(dataXZ[i][1])
-
- (st['traitmean'],
- st['traitmedian'],
- st['traitvar'],
- st['traitstdev'],
- st['traitsem'],
- st['N']) = reaper.anova(valsOnly) #ZS: Should convert this from reaper to R in the future
-
- #tbl = HT.TableLite(cellpadding=20, cellspacing=0)
- #dataXZ = vals[:]
- dataXZ = sorted(vals, webqtlUtil.cmpOrder)
-
- print("data for stats is:", pf(dataXZ))
- for num, item in enumerate(dataXZ):
- print(" %i - %s" % (num, item))
- print(" length:", len(dataXZ))
-
- st['min'] = dataXZ[0][1]
- st['max'] = dataXZ[-1][1]
-
- numbers = [x[1] for x in dataXZ]
- stats = Stats(numbers)
-
- at75 = stats.percentile(75)
- at25 = stats.percentile(25)
- print("should get a stack")
- traceback.print_stack()
- print("Interquartile:", at75 - at25)
-
- #tbl.append(HT.TR(HT.TD("Statistic",align="left", Class="fs14 fwb ffl b1 cw cbrb", width = 180),
- # HT.TD("Value", align="right", Class="fs14 fwb ffl b1 cw cbrb", width = 60)))
- #tbl.append(HT.TR(HT.TD("N of Samples",align="left", Class="fs13 b1 cbw c222"),
- # HT.TD(N,nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
- #tbl.append(HT.TR(HT.TD("Mean",align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
- # HT.TD("%2.3f" % traitmean,nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
- #tbl.append(HT.TR(HT.TD("Median",align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
- # HT.TD("%2.3f" % traitmedian,nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
- ##tbl.append(HT.TR(HT.TD("Variance",align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
- ## HT.TD("%2.3f" % traitvar,nowrap="yes",align="left", Class="fs13 b1 cbw c222")))
- #tbl.append(HT.TR(HT.TD("Standard Error (SE)",align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
- # HT.TD("%2.3f" % traitsem,nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
- #tbl.append(HT.TR(HT.TD("Standard Deviation (SD)", align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
- # HT.TD("%2.3f" % traitstdev,nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
- #tbl.append(HT.TR(HT.TD("Minimum", align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
- # HT.TD("%s" % dataXZ[0][1],nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
- #tbl.append(HT.TR(HT.TD("Maximum", align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
- # HT.TD("%s" % dataXZ[-1][1],nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
-
-
-
- if (trait_type != None and trait_type == 'ProbeSet'):
- #tbl.append(HT.TR(HT.TD("Range (log2)",align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
- # HT.TD("%2.3f" % (dataXZ[-1][1]-dataXZ[0][1]),nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
- #tbl.append(HT.TR(HT.TD(HT.Span("Range (fold)"),align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
- # HT.TD("%2.2f" % pow(2.0,(dataXZ[-1][1]-dataXZ[0][1])), nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
- #tbl.append(HT.TR(HT.TD(HT.Span(HT.Href(url="/glossary.html#Interquartile", target="_blank", text="Interquartile Range", Class="non_bold")), align="left", Class="fs13 b1 cbw c222",nowrap="yes"),
- # HT.TD("%2.2f" % pow(2.0,(dataXZ[int((N-1)*3.0/4.0)][1]-dataXZ[int((N-1)/4.0)][1])), nowrap="yes", Class="fs13 b1 cbw c222"), align="right"))
- st['range_log2'] = dataXZ[-1][1]-dataXZ[0][1]
- st['range_fold'] = pow(2.0, (dataXZ[-1][1]-dataXZ[0][1]))
- st['interquartile'] = pow(2.0, (dataXZ[int((st['N']-1)*3.0/4.0)][1]-dataXZ[int((st['N']-1)/4.0)][1]))
-
- #XZ, 04/01/2009: don't try to get H2 value for probe.
- if not cellid:
- if heritability:
- # This field needs to still be put into the Jinja2 template
- st['heritability'] = heritability
- #tbl.append(HT.TR(HT.TD(HT.Span("Heritability"),align="center", Class="fs13 b1 cbw c222",nowrap="yes"),HT.TD("%s" % heritability, nowrap="yes",align="center", Class="fs13 b1 cbw c222")))
-
- # Lei Yan
- # 2008/12/19
-
- return st
-
-def plotNormalProbability(vals=None, RISet='', title=None, showstrains=0, specialStrains=[None], size=(750,500)):
-
- dataXZ = vals[:]
- dataXZ.sort(webqtlUtil.cmpOrder)
- dataLabel = []
- dataX = map(lambda X: X[1], dataXZ)
-
- showLabel = showstrains
- if len(dataXZ) > 50:
- showLabel = 0
- for item in dataXZ:
- strainName = webqtlUtil.genShortStrainName(RISet=RISet, input_strainName=item[0])
- dataLabel.append(strainName)
-
- dataY=Plot.U(len(dataX))
- dataZ=map(Plot.inverseCumul,dataY)
- c = pid.PILCanvas(size=(750,500))
- Plot.plotXY(c, dataZ, dataX, dataLabel = dataLabel, XLabel='Expected Z score', connectdot=0, YLabel='Trait value', title=title, specialCases=specialStrains, showLabel = showLabel)
-
- filename= webqtlUtil.genRandStr("nP_")
- c.save(webqtlConfig.GENERATED_IMAGE_DIR+filename, format='gif')
-
- img=HT.Image('/image/'+filename+'.gif',border=0)
-
- return img
-
-def plotBoxPlot(vals):
-
- valsOnly = []
- dataXZ = vals[:]
- for i in range(len(dataXZ)):
- valsOnly.append(dataXZ[i][1])
-
- plotHeight = 320
- plotWidth = 220
- xLeftOffset = 60
- xRightOffset = 40
- yTopOffset = 40
- yBottomOffset = 60
-
- canvasHeight = plotHeight + yTopOffset + yBottomOffset
- canvasWidth = plotWidth + xLeftOffset + xRightOffset
- canvas = pid.PILCanvas(size=(canvasWidth,canvasHeight))
- XXX = [('', valsOnly[:])]
-
- Plot.plotBoxPlot(canvas, XXX, offset=(xLeftOffset, xRightOffset, yTopOffset, yBottomOffset), XLabel= "Trait")
- filename= webqtlUtil.genRandStr("Box_")
- canvas.save(webqtlConfig.GENERATED_IMAGE_DIR+filename, format='gif')
- img=HT.Image('/image/'+filename+'.gif',border=0)
-
- plotLink = HT.Span("More about ", HT.Href(text="Box Plots", url="http://davidmlane.com/hyperstat/A37797.html", target="_blank", Class="fs13"))
-
- return img, plotLink
-
-def plotBarGraph(identification='', RISet='', vals=None, type="name"):
-
- this_identification = "unnamed trait"
- if identification:
- this_identification = identification
-
- if type=="rank":
- dataXZ = vals[:]
- dataXZ.sort(webqtlUtil.cmpOrder)
- title='%s' % this_identification
- else:
- dataXZ = vals[:]
- title='%s' % this_identification
-
- tvals = []
- tnames = []
- tvars = []
- for i in range(len(dataXZ)):
- tvals.append(dataXZ[i][1])
- tnames.append(webqtlUtil.genShortStrainName(RISet=RISet, input_strainName=dataXZ[i][0]))
- tvars.append(dataXZ[i][2])
- nnStrain = len(tnames)
-
- sLabel = 1
-
- ###determine bar width and space width
- if nnStrain < 20:
- sw = 4
- elif nnStrain < 40:
- sw = 3
- else:
- sw = 2
-
- ### 700 is the default plot width minus Xoffsets for 40 strains
- defaultWidth = 650
- if nnStrain > 40:
- defaultWidth += (nnStrain-40)*10
- defaultOffset = 100
- bw = int(0.5+(defaultWidth - (nnStrain-1.0)*sw)/nnStrain)
- if bw < 10:
- bw = 10
-
- plotWidth = (nnStrain-1)*sw + nnStrain*bw + defaultOffset
- plotHeight = 500
- #print [plotWidth, plotHeight, bw, sw, nnStrain]
- c = pid.PILCanvas(size=(plotWidth,plotHeight))
- Plot.plotBarText(c, tvals, tnames, variance=tvars, YLabel='Value', title=title, sLabel = sLabel, barSpace = sw)
-
- filename= webqtlUtil.genRandStr("Bar_")
- c.save(webqtlConfig.GENERATED_IMAGE_DIR+filename, format='gif')
- img=HT.Image('/image/'+filename+'.gif',border=0)
-
- return img
diff --git a/wqflask/basicStatistics/__init__.py b/wqflask/basicStatistics/__init__.py
deleted file mode 100644
index e69de29b..00000000
--- a/wqflask/basicStatistics/__init__.py
+++ /dev/null
diff --git a/wqflask/basicStatistics/corestats.py b/wqflask/basicStatistics/corestats.py
deleted file mode 100644
index eba84c52..00000000
--- a/wqflask/basicStatistics/corestats.py
+++ /dev/null
@@ -1,103 +0,0 @@
-#!/usr/bin/env python
-
-# corestats.py (COREy STATS)
-# Copyright (c) 2006-2007, Corey Goldberg (corey@goldb.org)
-#
-# statistical calculation class
-# for processing numeric sequences
-#
-# license: GNU LGPL
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-
-
-
-import sys
-
-
-class Stats:
-
- def __init__(self, sequence):
- # sequence of numbers we will process
- # convert all items to floats for numerical processing
- self.sequence = [float(item) for item in sequence]
-
-
- def sum(self):
- if len(self.sequence) < 1:
- return None
- else:
- return sum(self.sequence)
-
-
- def count(self):
- return len(self.sequence)
-
-
- def min(self):
- if len(self.sequence) < 1:
- return None
- else:
- return min(self.sequence)
-
-
- def max(self):
- if len(self.sequence) < 1:
- return None
- else:
- return max(self.sequence)
-
-
- def avg(self):
- if len(self.sequence) < 1:
- return None
- else:
- return sum(self.sequence) / len(self.sequence)
-
-
- def median(self):
- if len(self.sequence) < 1:
- return None
- else:
- self.sequence.sort()
- return self.sequence[len(self.sequence) // 2]
-
-
- def stdev(self):
- if len(self.sequence) < 1:
- return None
- else:
- avg = self.avg()
- sdsq = sum([(i - avg) ** 2 for i in self.sequence])
- stdev = (sdsq / (len(self.sequence) - 1)) ** .5
- return stdev
-
-
- def percentile(self, percentile):
- if len(self.sequence) < 1:
- value = None
- elif (percentile >= 100):
- sys.stderr.write('ERROR: percentile must be < 100. you supplied: %s\n'% percentile)
- value = None
- else:
- element_idx = int(len(self.sequence) * (percentile / 100.0))
- self.sequence.sort()
- value = self.sequence[element_idx]
- return value
-
-
-
-
-# Sample script using this class:
-# -------------------------------------------
-# #!/usr/bin/env python
-# import corestats
-#
-# sequence = [1, 2.5, 7, 13.4, 8.0]
-# stats = corestats.Stats(sequence)
-# print stats.avg()
-# print stats.percentile(90)
-# -------------------------------------------