# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License
# as published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU Affero General Public License for more details.
#
# This program is available from Source Forge: at GeneNetwork Project
# (sourceforge.net/projects/genenetwork/).
#
# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
#
#
#
# This module is used by GeneNetwork project (www.genenetwork.org)
#
# Created by GeneNetwork Core Team 2010/08/10
#
# Last updated by GeneNetwork Core Team 2010/10/20

import string
import time
import re
import math
from math import *

from htmlgen import HTMLgen2 as HT

from base import webqtlConfig

# NL, 07/27/2010. moved from webqtlForm.py
#Dict of Parents and F1 information, In the order of [F1, Mat, Pat]
ParInfo ={
'BXH':['BHF1', 'HBF1',  'C57BL/6J', 'C3H/HeJ'],
'AKXD':['AKF1', 'KAF1', 'AKR/J', 'DBA/2J'],
'BXD':['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'],
'C57BL-6JxC57BL-6NJF2':['', '', 'C57BL/6J', 'C57BL/6NJ'],
'BXD300':['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'],
'B6BTBRF2':['B6BTBRF1', 'BTBRB6F1', 'C57BL/6J', 'BTBRT<+>tf/J'],
'BHHBF2':['B6HF2','HB6F2','C57BL/6J','C3H/HeJ'],
'BHF2':['B6HF2','HB6F2','C57BL/6J','C3H/HeJ'],
'B6D2F2':['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'],
'BDF2-1999':['B6D2F2', 'D2B6F2', 'C57BL/6J', 'DBA/2J'],
'BDF2-2005':['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'],
'CTB6F2':['CTB6F2','B6CTF2','C57BL/6J','Castaneous'],
'CXB':['CBF1', 'BCF1', 'C57BL/6ByJ', 'BALB/cByJ'],
'AXBXA':['ABF1', 'BAF1', 'C57BL/6J', 'A/J'],
'AXB':['ABF1', 'BAF1', 'C57BL/6J', 'A/J'],
'BXA':['BAF1', 'ABF1', 'C57BL/6J', 'A/J'],
'LXS':['LSF1', 'SLF1', 'ISS', 'ILS'],
'HXBBXH':['HSRBNF1', 'BNHSRF1', 'BN', 'HSR'],
'BayXSha':['BayXShaF1', 'ShaXBayF1', 'Bay-0','Shahdara'],
'ColXBur':['ColXBurF1', 'BurXColF1', 'Col-0','Bur-0'],
'ColXCvi':['ColXCviF1', 'CviXColF1', 'Col-0','Cvi'],
'SXM':['SMF1', 'MSF1', 'Steptoe','Morex']
}

#########################################
#      Accessory Functions
#########################################

def exportData(hddn, tdata, NP = None):
    for key in tdata.keys():
        _val, _var, _N = tdata[key].val, tdata[key].var, tdata[key].N
        if _val != None:
            hddn[key] = _val
            if _var != None:
                hddn['V'+key] = _var
            if NP and _N != None:
                hddn['N'+key] = _N

def genShortStrainName(RISet='', input_strainName=''):
    #aliasStrainDict = {'C57BL/6J':'B6','DBA/2J':'D2'}
    strainName = input_strainName
    if RISet != 'AXBXA':
        if RISet == 'BXD300':
            this_RISet = 'BXD'
        elif RISet == 'BDF2-2005':
            this_RISet = 'CASE05_'
        else:
            this_RISet = RISet
        strainName = string.replace(strainName,this_RISet,'')
        strainName = string.replace(strainName,'CASE','')
        try:
            strainName = "%02d" % int(strainName)
        except:
            pass
    else:
        strainName = string.replace(strainName,'AXB','A')
        strainName = string.replace(strainName,'BXA','B')
        try:
            strainName = strainName[0] + "%02d" % int(strainName[1:])
        except:
            pass
    return strainName

def genRandStr(prefix = "", length=8, chars=string.letters+string.digits):
    from random import choice
    _str = prefix[:]
    for i in range(length):
        _str += choice(chars)
    return _str

def StringAsFloat(str):
    'Converts string to float but catches any exception and returns None'
    try:
        return float(str)
    except:
        return None

def IntAsFloat(str):
    'Converts string to Int but catches any exception and returns None'
    try:
        return int(str)
    except:
        return None

def FloatAsFloat(flt):
    'Converts float to string but catches any exception and returns None'
    try:
        return float("%2.3f" % flt)
    except:
        return None

def RemoveZero(flt):
    'Converts string to float but catches any exception and returns None'
    try:
        if abs(flt) < 1e-6:
            return None
        else:
            return flt
    except:
        return None


def SciFloat(d):
    'Converts string to float but catches any exception and returns None'

    try:
        if abs(d) <= 1.0e-4:
            return "%1.2e" % d
        else:
            return "%1.5f" % d
    except:
        return None

###To be removed
def FloatList2String(lst):
    'Converts float list to string but catches any exception and returns None'
    tt=''
    try:
        for item in lst:
            if item == None:
                tt += 'X '
            else:
                tt += '%f ' % item
        return tt
    except:
        return ""

def ListNotNull(lst):
    '''Obsolete - Use built in function any (or all or whatever)


    Determine if the elements in a list are all null

    '''
    for item in lst:
        if item is not None:
            return 1
    return None

###To be removed
def FileDataProcess(str):
    'Remove the description text from the input file if theres any'
    i=0
    while i<len(str):
        if str[i]<'\x7f' and str[i]>'\x20':
            break
        else:
            i+=1
    str=str[i:]
    str=string.join(string.split(str,'\000'),'')
    i=string.find(str,"*****")
    if i>-1:
        return str[i+5:]
    else:
        return str

def rank(a,lst,offset=0):
    """Calculate the integer rank of a number in an array, can be used to calculate p-value"""
    n = len(lst)
    if n == 2:
        if a <lst[0]:
            return offset
        elif a > lst[1]:
            return offset + 2
        else:
            return offset +1
    elif n == 1:
        if a <lst[0]:
            return offset
        else:
            return offset +1
    elif n== 0:
        return offset
    else:
        mid = n/2
        if a < lst[mid]:
            return rank(a,lst[:mid-1],offset)
        else:
            return rank(a,lst[mid:],offset+mid)

def cmpScanResult(A,B):
    try:
        if A.LRS > B.LRS:
            return 1
        elif A.LRS == B.LRS:
            return 0
        else:
            return -1
    except:
        return 0


def cmpScanResult2(A,B):
    try:
        if A.LRS < B.LRS:
            return 1
        elif A.LRS == B.LRS:
            return 0
        else:
            return -1
    except:
        return 0

def cmpOrder(A,B):
    try:
        if A[1] < B[1]:
            return -1
        elif A[1] == B[1]:
            return 0
        else:
            return 1
    except:
        return 0

def cmpOrder2(A,B):
    try:
        if A[-1] < B[-1]:
            return -1
        elif A[-1] == B[-1]:
            return 0
        else:
            return 1
    except:
        return 0


def calRank(xVals, yVals, N): ###  Zach Sloan, February 4 2010
    """
    Returns a ranked set of X and Y values. These are used when generating
    a Spearman scatterplot. Bear in mind that this sets values equal to each
    other as the same rank.
    """
    XX = []
    YY = []
    X = [0]*len(xVals)
    Y = [0]*len(yVals)
    j = 0

    for i in range(len(xVals)):

        if xVals[i] != None and yVals[i] != None:
            XX.append((j, xVals[i]))
            YY.append((j, yVals[i]))
            j = j + 1

    NN = len(XX)

    XX.sort(cmpOrder2)
    YY.sort(cmpOrder2)

    j = 1
    rank = 0.0

    while j < NN:

        if XX[j][1] != XX[j-1][1]:
            X[XX[j-1][0]] = j
            j = j+1

        else:
            jt = j+1
            ji = j
            for jt in range(j+1, NN):
                if (XX[jt][1] != XX[j-1][1]):
                    break
            rank = 0.5*(j+jt)
            for ji in range(j-1, jt):
                X[XX[ji][0]] = rank
            if (jt == NN-1):
                if (XX[jt][1] == XX[j-1][1]):
                    X[XX[NN-1][0]] = rank
            j = jt+1

    if j == NN:
        if X[XX[NN-1][0]] == 0:
            X[XX[NN-1][0]] = NN

    j = 1
    rank = 0.0

    while j < NN:

        if YY[j][1] != YY[j-1][1]:
            Y[YY[j-1][0]] = j
            j = j+1
        else:
            jt = j+1
            ji = j
            for jt in range(j+1, NN):
                if (YY[jt][1] != YY[j-1][1]):
                    break
            rank = 0.5*(j+jt)
            for ji in range(j-1, jt):
                Y[YY[ji][0]] = rank
            if (jt == NN-1):
                if (YY[jt][1] == YY[j-1][1]):
                    Y[YY[NN-1][0]] = rank
            j = jt+1

    if j == NN:
        if Y[YY[NN-1][0]] == 0:
            Y[YY[NN-1][0]] = NN

    return (X,Y)

def calCorrelationRank(xVals,yVals,N):
    """
    Calculated Spearman Ranked Correlation. The algorithm works
    by setting all tied ranks to the average of those ranks (for
    example, if ranks 5-10 all have the same value, each will be set
    to rank 7.5).
    """

    XX = []
    YY = []
    j = 0

    for i in range(len(xVals)):
        if (xVals[i]!= None and yVals[i]!= None) and (xVals[i] != "None" and yVals[i] != "None"):
            XX.append((j,xVals[i]))
            YY.append((j,yVals[i]))
            j = j+1

    NN = len(XX)
    if NN <6:
        return (0.0,NN)
    XX.sort(cmpOrder2)
    YY.sort(cmpOrder2)
    X = [0]*NN
    Y = [0]*NN

    j = 1
    rank = 0.0
    t = 0.0
    sx = 0.0

    while j < NN:

        if XX[j][1] != XX[j-1][1]:
            X[XX[j-1][0]] = j
            j = j+1

        else:
            jt = j+1
            ji = j
            for jt in range(j+1, NN):
                if (XX[jt][1] != XX[j-1][1]):
                    break
            rank = 0.5*(j+jt)
            for ji in range(j-1, jt):
                X[XX[ji][0]] = rank
            t = jt-j
            sx = sx + (t*t*t-t)
            if (jt == NN-1):
                if (XX[jt][1] == XX[j-1][1]):
                    X[XX[NN-1][0]] = rank
            j = jt+1

    if j == NN:
        if X[XX[NN-1][0]] == 0:
            X[XX[NN-1][0]] = NN

    j = 1
    rank = 0.0
    t = 0.0
    sy = 0.0

    while j < NN:

        if YY[j][1] != YY[j-1][1]:
            Y[YY[j-1][0]] = j
            j = j+1
        else:
            jt = j+1
            ji = j
            for jt in range(j+1, NN):
                if (YY[jt][1] != YY[j-1][1]):
                    break
            rank = 0.5*(j+jt)
            for ji in range(j-1, jt):
                Y[YY[ji][0]] = rank
            t = jt - j
            sy = sy + (t*t*t-t)
            if (jt == NN-1):
                if (YY[jt][1] == YY[j-1][1]):
                    Y[YY[NN-1][0]] = rank
            j = jt+1

    if j == NN:
        if Y[YY[NN-1][0]] == 0:
            Y[YY[NN-1][0]] = NN

    D = 0.0

    for i in range(NN):
        D += (X[i]-Y[i])*(X[i]-Y[i])

    fac = (1.0 -sx/(NN*NN*NN-NN))*(1.0-sy/(NN*NN*NN-NN))

    return ((1-(6.0/(NN*NN*NN-NN))*(D+(sx+sy)/12.0))/math.sqrt(fac),NN)


def calCorrelationRankText(dbdata,userdata,N): ### dcrowell = David Crowell, July 2008
    """Calculates correlation ranks with data formatted from the text file.
    dbdata, userdata are lists of strings.  N is an int.  Returns a float.
    Used by correlationPage"""
    XX = []
    YY = []
    j = 0
    for i in range(N):
        if (dbdata[i]!= None and userdata[i]!=None) and (dbdata[i]!= 'None' and userdata[i]!='None'):
            XX.append((j,float(dbdata[i])))
            YY.append((j,float(userdata[i])))
            j += 1
    NN = len(XX)
    if NN <6:
        return (0.0,NN)
    XX.sort(cmpOrder2)
    YY.sort(cmpOrder2)
    X = [0]*NN
    Y = [0]*NN

    j = 1
    rank = 0.0
    t = 0.0
    sx = 0.0

    while j < NN:

        if XX[j][1] != XX[j-1][1]:
            X[XX[j-1][0]] = j
            j = j+1

        else:
            jt = j+1
            ji = j
            for jt in range(j+1, NN):
                if (XX[jt][1] != XX[j-1][1]):
                    break
            rank = 0.5*(j+jt)
            for ji in range(j-1, jt):
                X[XX[ji][0]] = rank
            t = jt-j
            sx = sx + (t*t*t-t)
            if (jt == NN-1):
                if (XX[jt][1] == XX[j-1][1]):
                    X[XX[NN-1][0]] = rank
            j = jt+1

    if j == NN:
        if X[XX[NN-1][0]] == 0:
            X[XX[NN-1][0]] = NN

    j = 1
    rank = 0.0
    t = 0.0
    sy = 0.0

    while j < NN:

        if YY[j][1] != YY[j-1][1]:
            Y[YY[j-1][0]] = j
            j = j+1
        else:
            jt = j+1
            ji = j
            for jt in range(j+1, NN):
                if (YY[jt][1] != YY[j-1][1]):
                    break
            rank = 0.5*(j+jt)
            for ji in range(j-1, jt):
                Y[YY[ji][0]] = rank
            t = jt - j
            sy = sy + (t*t*t-t)
            if (jt == NN-1):
                if (YY[jt][1] == YY[j-1][1]):
                    Y[YY[NN-1][0]] = rank
            j = jt+1

    if j == NN:
        if Y[YY[NN-1][0]] == 0:
            Y[YY[NN-1][0]] = NN

    D = 0.0

    for i in range(NN):
        D += (X[i]-Y[i])*(X[i]-Y[i])

    fac = (1.0 -sx/(NN*NN*NN-NN))*(1.0-sy/(NN*NN*NN-NN))

    return ((1-(6.0/(NN*NN*NN-NN))*(D+(sx+sy)/12.0))/math.sqrt(fac),NN)


def calCorrelation(dbdata,userdata,N):
    X = []
    Y = []
    for i in range(N):
        if dbdata[i]!= None and userdata[i]!= None:
            X.append(dbdata[i])
            Y.append(userdata[i])
    NN = len(X)
    if NN <6:
        return (0.0,NN)
    sx = reduce(lambda x,y:x+y,X,0.0)
    sy = reduce(lambda x,y:x+y,Y,0.0)
    meanx = sx/NN
    meany = sy/NN
    xyd = 0.0
    sxd = 0.0
    syd = 0.0
    for i in range(NN):
        xyd += (X[i] - meanx)*(Y[i]-meany)
        sxd += (X[i] - meanx)*(X[i] - meanx)
        syd += (Y[i] - meany)*(Y[i] - meany)
    try:
        corr = xyd/(sqrt(sxd)*sqrt(syd))
    except:
        corr = 0
    return (corr,NN)

def calCorrelationText(dbdata,userdata,N): ### dcrowell July 2008
    """Calculates correlation coefficients with values formatted from text files. dbdata, userdata are lists of strings.  N is an int.  Returns a float
    Used by correlationPage"""
    X = []
    Y = []
    for i in range(N):
        #if (dbdata[i]!= None and userdata[i]!= None) and (dbdata[i]!= 'None' and userdata[i]!= 'None'):
        #               X.append(float(dbdata[i]))
        #               Y.append(float(userdata[i]))
        if dbdata[i] == None or dbdata[i] == 'None' or userdata[i] == None or userdata[i] == 'None':
            continue
        else:
            X.append(float(dbdata[i]))
            Y.append(float(userdata[i]))
    NN = len(X)
    if NN <6:
        return (0.0,NN)
    sx = sum(X)
    sy = sum(Y)
    meanx = sx/float(NN)
    meany = sy/float(NN)
    xyd = 0.0
    sxd = 0.0
    syd = 0.0
    for i in range(NN):
        x1 = X[i]-meanx
        y1 = Y[i]-meany
        xyd += x1*y1
        sxd += x1**2
        syd += y1**2
    try:
        corr = xyd/(sqrt(sxd)*sqrt(syd))
    except:
        corr = 0
    return (corr,NN)


def readLineCSV(line): ### dcrowell July 2008
    """Parses a CSV string of text and returns a list containing each element as a string.
    Used by correlationPage"""
    returnList = line.split('","')
    returnList[-1]=returnList[-1][:-2]
    returnList[0]=returnList[0][1:]
    return returnList


def cmpCorr(A,B):
    try:
        if abs(A[1]) < abs(B[1]):
            return 1
        elif abs(A[1]) == abs(B[1]):
            return 0
        else:
            return -1
    except:
        return 0

def cmpLitCorr(A,B):
    try:
        if abs(A[3]) < abs(B[3]): return 1
        elif abs(A[3]) == abs(B[3]):
            if abs(A[1]) < abs(B[1]): return 1
            elif abs(A[1]) == abs(B[1]): return 0
            else: return -1
        else: return -1
    except:
        return 0

def cmpPValue(A,B):
    try:
        if A.corrPValue < B.corrPValue:
            return -1
        elif A.corrPValue == B.corrPValue:
            if abs(A.corr) > abs(B.corr):
                return -1
            elif abs(A.corr) < abs(B.corr):
                return 1
            else:
                return 0
        else:
            return 1
    except:
        return 0

def cmpEigenValue(A,B):
    try:
        if A[0] > B[0]:
            return -1
        elif A[0] == B[0]:
            return 0
        else:
            return 1
    except:
        return 0


def cmpLRSFull(A,B):
    try:
        if A[0] < B[0]:
            return -1
        elif A[0] == B[0]:
            return 0
        else:
            return 1
    except:
        return 0

def cmpLRSInteract(A,B):
    try:
        if A[1] < B[1]:
            return -1
        elif A[1] == B[1]:
            return 0
        else:
            return 1
    except:
        return 0


def cmpPos(A,B):
    try:
        try:
            AChr = int(A.chr)
        except:
            AChr = 20
        try:
            BChr = int(B.chr)
        except:
            BChr = 20
        if AChr > BChr:
            return 1
        elif AChr == BChr:
            if A.mb > B.mb:
                return 1
            if A.mb == B.mb:
                return 0
            else:
                return -1
        else:
            return -1
    except:
        return 0

def cmpGenoPos(A,B):
    try:
        A1 = A.chr
        B1 = B.chr
        try:
            A1 = int(A1)
        except:
            A1 = 25
        try:
            B1 = int(B1)
        except:
            B1 = 25
        if A1 > B1:
            return 1
        elif A1 == B1:
            if A.mb > B.mb:
                return 1
            if A.mb == B.mb:
                return 0
            else:
                return -1
        else:
            return -1
    except:
        return 0

def hasAccessToConfidentialPhenotypeTrait(privilege, userName, authorized_users):
    access_to_confidential_phenotype_trait = 0
    if webqtlConfig.USERDICT[privilege] > webqtlConfig.USERDICT['user']:
        access_to_confidential_phenotype_trait = 1
    else:
        AuthorizedUsersList=map(string.strip, string.split(authorized_users, ','))
        if AuthorizedUsersList.__contains__(userName):
            access_to_confidential_phenotype_trait = 1
    return access_to_confidential_phenotype_trait


class VisualizeException(Exception):
    def __init__(self, message):
        self.message = message
    def __str__(self):
        return self.message

# safeConvert : (string -> A) -> A -> A
# to convert a string to type A, using the supplied default value
# if the given conversion function doesn't work
def safeConvert(f, value, default):
    try:
        return f(value)
    except:
        return default

# safeFloat : string -> float -> float
# to convert a string to a float safely
def safeFloat(value, default):
    return safeConvert(float, value, default)

# safeInt: string -> int -> int
# to convert a string to an int safely
def safeInt(value, default):
    return safeConvert(int, value, default)

# safeString : string -> (arrayof string) -> string -> string
# if a string is not in a list of strings to pick a default value
# for that string
def safeString(value, validChoices, default):
    if value in validChoices:
        return value
    else:
        return default

# yesNoToInt: string -> int
# map "yes" -> 1 and "no" -> 0
def yesNoToInt(value):
    if value == "yes":
        return 1
    elif value == "no":
        return 0
    else:
        return None

# IntToYesNo: int -> string
# map 1 -> "yes" and 0 -> "no"
def intToYesNo(value):
    if value == 1:
        return "yes"
    elif value == 0:
        return "no"
    else:
        return None

def formatField(name):
    name = name.replace("_", " ")
    name = name.title()
    #name = name.replace("Mb Mm6", "Mb");
    return name.replace("Id", "ID")

def natsort_key(string):
    r = []
    for c in string:
        try:
            c = int(c)
            try: r[-1] = r[-1] * 10 + c
            except: r.append(c)
        except:
            r.append(c)
    return r