aboutsummaryrefslogtreecommitdiff
path: root/gn3/correlation/correlation_functions.py
blob: be08c964251b520ee0cd6fb96c9395b663d4e177 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
"""
# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License
# as published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU Affero General Public License for more details.
#
# This program is available from Source Forge: at GeneNetwork Project
# (sourceforge.net/projects/genenetwork/).
#
# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
#
#
#
# This module is used by GeneNetwork project (www.genenetwork.org)
#
# Created by GeneNetwork Core Team 2010/08/10
#
# Last updated by NL 2011/03/23


"""

import rpy2.robjects
from gn3.base.mrna_assay_tissue_data import MrnaAssayTissueData


#####################################################################################
# Input: primaryValue(list): one list of expression values of one probeSet,
#       targetValue(list): one list of expression values of one probeSet,
#               method(string): indicate correlation method ('pearson' or 'spearman')
# Output: corr_result(list): first item is Correlation Value, second item is tissue number,
#                           third item is PValue
# Function: get correlation value,Tissue quantity ,p value result by using R;
# Note : This function is special case since both primaryValue and targetValue are from
# the same dataset. So the length of these two parameters is the same. They are pairs.
# Also, in the datatable TissueProbeSetData, all Tissue values are loaded based on
# the same tissue order
#####################################################################################

def cal_zero_order_corr_for_tiss(primaryValue=[], targetValue=[], method='pearson'):
    """refer above for info on the function"""
    # pylint: disable = E, W, R, C

    #nb disabled pylint until tests are written for this function

    R_primary = rpy2.robjects.FloatVector(list(range(len(primaryValue))))
    N = len(primaryValue)
    for i in range(len(primaryValue)):
        R_primary[i] = primaryValue[i]

    R_target = rpy2.robjects.FloatVector(list(range(len(targetValue))))
    for i in range(len(targetValue)):
        R_target[i] = targetValue[i]

    R_corr_test = rpy2.robjects.r['cor.test']
    if method == 'spearman':
        R_result = R_corr_test(R_primary, R_target, method='spearman')
    else:
        R_result = R_corr_test(R_primary, R_target)

    corr_result = []
    corr_result.append(R_result[3][0])
    corr_result.append(N)
    corr_result.append(R_result[2][0])

    return corr_result


####################################################
####################################################
# input: cursor, symbolList (list), dataIdDict(Dict): key is symbol
# output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair.
#        key is symbol, value is one list of expression values of one probeSet.
# function: wrapper function for getSymbolValuePairDict function
#          build gene symbol list if necessary, cut it into small lists if necessary,
#          then call getSymbolValuePairDict function and merge the results.
###################################################
#####################################################

def get_trait_symbol_and_tissue_values(symbol_list=None):
    """function to get trait symbol and tissues values refer above"""
    tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list)

    if len(tissue_data.gene_symbols) >= 1:
        return tissue_data.get_symbol_values_pairs()

    return None