aboutsummaryrefslogtreecommitdiff
path: root/gn3/computations/correlations2.py
blob: d0222ae76336b300d50ca567ddb4bdf94be48827 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
"""
DESCRIPTION:
    TODO: Add a description for the module

FUNCTIONS:
compute_correlation:
    TODO: Describe what the function does..."""

from scipy import stats
## From GN1: mostly for clustering and heatmap generation

def __items_with_values(dbdata, userdata):
    """Retains only corresponding items in the data items that are not `None` values.
    This should probably be renamed to something sensible"""
    filtered = [x for x in zip(dbdata, userdata) if x[0] is not None and x[1] is not None]
    return tuple(zip(*filtered)) if filtered else ([], [])

def compute_correlation(dbdata, userdata):
    """Compute the Pearson correlation coefficient.

    This is extracted from
    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/utility/webqtlUtil.py#L622-L647
    """
    x_items, y_items = __items_with_values(dbdata, userdata)
    correlation = stats.pearsonr(x_items, y_items)[0] if len(x_items) >= 6 else 0
    return (correlation, len(x_items))