aboutsummaryrefslogtreecommitdiff
path: root/gn3/computations
diff options
context:
space:
mode:
Diffstat (limited to 'gn3/computations')
-rw-r--r--gn3/computations/correlations.py3
-rw-r--r--gn3/computations/correlations2.py37
-rw-r--r--gn3/computations/slink.py97
3 files changed, 86 insertions, 51 deletions
diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py
index bc738a7..56f483c 100644
--- a/gn3/computations/correlations.py
+++ b/gn3/computations/correlations.py
@@ -1,4 +1,5 @@
"""module contains code for correlations"""
+import math
import multiprocessing
from typing import List
@@ -90,7 +91,7 @@ def compute_sample_r_correlation(trait_name, corr_method, trait_vals,
target_values=sanitized_target_vals,
corr_method=corr_method)
- if corr_coefficient is not None:
+ if corr_coefficient is not None and not math.isnan(corr_coefficient):
return (trait_name, corr_coefficient, p_value, num_overlap)
return None
diff --git a/gn3/computations/correlations2.py b/gn3/computations/correlations2.py
index 6c456db..93db3fa 100644
--- a/gn3/computations/correlations2.py
+++ b/gn3/computations/correlations2.py
@@ -1,15 +1,25 @@
+"""
+DESCRIPTION:
+ TODO: Add a description for the module
+
+FUNCTIONS:
+compute_correlation:
+ TODO: Describe what the function does..."""
+
from math import sqrt
from functools import reduce
## From GN1: mostly for clustering and heatmap generation
-def items_with_values(dbdata, userdata):
+def __items_with_values(dbdata, userdata):
"""Retains only corresponding items in the data items that are not `None` values.
-This should probably be renamed to something sensible"""
+ This should probably be renamed to something sensible"""
def both_not_none(item1, item2):
+ """Check that both items are not the value `None`."""
if (item1 is not None) and (item2 is not None):
return (item1, item2)
return None
def split_lists(accumulator, item):
+ """Separate the 'x' and 'y' items."""
return [accumulator[0] + [item[0]], accumulator[1] + [item[1]]]
return reduce(
split_lists,
@@ -17,19 +27,24 @@ This should probably be renamed to something sensible"""
[[], []])
def compute_correlation(dbdata, userdata):
- x, y = items_with_values(dbdata, userdata)
- if len(x) < 6:
- return (0.0, len(x))
- meanx = sum(x)/len(x)
- meany = sum(y)/len(y)
+ """Compute some form of correlation.
+
+ This is extracted from
+ https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/utility/webqtlUtil.py#L622-L647
+ """
+ x_items, y_items = __items_with_values(dbdata, userdata)
+ if len(x_items) < 6:
+ return (0.0, len(x_items))
+ meanx = sum(x_items)/len(x_items)
+ meany = sum(y_items)/len(y_items)
def cal_corr_vals(acc, item):
xitem, yitem = item
return [
acc[0] + ((xitem - meanx) * (yitem - meany)),
acc[1] + ((xitem - meanx) * (xitem - meanx)),
acc[2] + ((yitem - meany) * (yitem - meany))]
- xyd, sxd, syd = reduce(cal_corr_vals, zip(x, y), [0.0, 0.0, 0.0])
+ xyd, sxd, syd = reduce(cal_corr_vals, zip(x_items, y_items), [0.0, 0.0, 0.0])
try:
- return ((xyd/(sqrt(sxd)*sqrt(syd))), len(x))
- except ZeroDivisionError as zde:
- return(0, len(x))
+ return ((xyd/(sqrt(sxd)*sqrt(syd))), len(x_items))
+ except ZeroDivisionError:
+ return(0, len(x_items))
diff --git a/gn3/computations/slink.py b/gn3/computations/slink.py
index 8d51f29..23d3d88 100644
--- a/gn3/computations/slink.py
+++ b/gn3/computations/slink.py
@@ -7,13 +7,18 @@ slink:
TODO: Describe what the function does...
"""
import logging
-from functools import partial
+from typing import List, Tuple, Union, Sequence
+
+NumType = Union[int, float]
+SeqOfNums = Sequence[NumType]
class LengthError(BaseException):
- pass
+ """Raised whenever child lists/tuples are not the same length as the parent
+ list of tuple."""
class MirrorError(BaseException):
- pass
+ """Raised if the distance from child A to child B is not the same as the
+ distance from child B to child A."""
def __is_list_or_tuple(item):
return type(item) in [list, tuple]
@@ -50,19 +55,20 @@ def __raise_valueerror_if_child_list_distance_from_itself_is_not_zero(lists):
def __raise_mirrorerror_of_distances_one_way_are_not_same_other_way(lists):
"""Check that the distance from A to B, is the same as the distance from B to A.
If the two distances are different, throw an exception."""
- for i in range(len(lists)):
- for j in range(len(lists)):
- if lists[i][j] != lists[j][i]:
- raise MirrorError(
- ("Distance from one child({}) to the other ({}) "
- "should be the same in both directions.").format(
- lists[i][j], lists[j][i]))
+ inner_coords = range(len(lists))
+ coords = ((i, j) for i in inner_coords for j in inner_coords)
+ def __is_same_reversed(coord):
+ return lists[coord[0]][coord[1]] == lists[coord[1]][coord[0]]
+ if not all(map(__is_same_reversed, coords)):
+ raise MirrorError((
+ "Distance from one child to the other should be the same in both "
+ "directions."))
def __raise_valueerror_on_negative_distances(lists):
"""Check that distances between 'somethings' are all positive, otherwise,
raise an exception."""
def zero_or_positive(val):
- return val >= 0;
+ return val >= 0
# flatten lists
flattened = __flatten_list_of_lists(lists)
if not all(map(zero_or_positive, flattened)):
@@ -71,12 +77,16 @@ raise an exception."""
def __flatten_list_of_lists(parent):
return [item for child in parent for item in child]
-def nearest(lists, i, j):
+# i and j are Union[SeqOfNums, NumType], but that leads to errors where the
+# values of i or j are indexed, since the NumType type is not indexable.
+# I don't know how to type this so that it does not fail on running `mypy .`
+def nearest(lists: Sequence[SeqOfNums], i, j) -> NumType:
"""
Computes shortest distance between member(s) in `i` and member(s) in `j`.
Description:
- This is 'copied' over from genenetwork1, from https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/slink.py#L42-L64.
+ This is 'copied' over from genenetwork1, from
+ https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/slink.py#L42-L64.
This description should be updated to better describe what 'member' means in
the context where the function is used.
@@ -108,19 +118,25 @@ def nearest(lists, i, j):
__raise_mirrorerror_of_distances_one_way_are_not_same_other_way(lists)
__raise_valueerror_on_negative_distances(lists)
#### END: Guard Functions ####
- if type(i) == int and type(j) == int: # From member i to member j
+ if isinstance(i, int) and isinstance(j, int): # From member i to member j
return lists[i][j]
- elif type(i) == int and __is_list_or_tuple(j):
+
+ if isinstance(i, int) and __is_list_or_tuple(j):
return min(map(lambda j_new: nearest(lists, i, j_new), j[:-1]))
- elif type(j) == int and __is_list_or_tuple(i):
+ if isinstance(j, int) and __is_list_or_tuple(i):
return min(map(lambda i_new: nearest(lists, i_new, j), i[:-1]))
- elif __is_list_or_tuple(i) and __is_list_or_tuple(j):
+
+ if __is_list_or_tuple(i) and __is_list_or_tuple(j):
coordinate_pairs = __flatten_list_of_lists(
[[(itemi, itemj) for itemj in j[:-1]] for itemi in i[:-1]])
return min(map(lambda x: nearest(lists, x[0], x[1]), coordinate_pairs))
- else:
- raise ValueError("member values (i or j) should be lists/tuples of integers or integers")
+ raise ValueError("member values (i or j) should be lists/tuples of integers or integers")
+
+# `lists` here could be Sequence[SeqOfNums], but that leads to errors I do not
+# understand down the line
+# Might have to re-implement the function especially since the errors are thrown
+# where `listindexcopy` is involved
def slink(lists):
"""
DESCRIPTION:
@@ -144,36 +160,39 @@ def slink(lists):
"""
try:
size = len(lists)
- listindex = range(size)
listindexcopy = list(range(size))
- listscopy = [[item for item in child] for child in lists]
- initSize = size
+ listscopy = [child[:] for child in lists]
+ init_size = size
candidate = []
- while initSize >2:
+ while init_size > 2:
mindist = 1e10
- for i in range(initSize):
- for j in range(i+1,initSize):
+ for i in range(init_size):
+ for j in range(i+1, init_size):
if listscopy[i][j] < mindist:
- mindist = listscopy[i][j]
- candidate=[[i,j]]
+ mindist = listscopy[i][j]
+ candidate = [[i, j]]
elif listscopy[i][j] == mindist:
- mindist = listscopy[i][j]
- candidate.append([i,j])
+ mindist = listscopy[i][j]
+ candidate.append([i, j])
else:
pass
- newmem = (listindexcopy[candidate[0][0]],listindexcopy[candidate[0][1]],mindist)
+ newmem = (
+ listindexcopy[candidate[0][0]], listindexcopy[candidate[0][1]],
+ mindist)
listindexcopy.pop(candidate[0][1])
listindexcopy[candidate[0][0]] = newmem
- initSize -= 1
- for i in range(initSize):
- for j in range(i+1,initSize):
- listscopy[i][j] = nearest(lists,listindexcopy[i],listindexcopy[j])
+ init_size -= 1
+ for i in range(init_size):
+ for j in range(i+1, init_size):
+ listscopy[i][j] = nearest(
+ lists, listindexcopy[i], listindexcopy[j])
listscopy[j][i] = listscopy[i][j]
- listindexcopy.append(nearest(lists,listindexcopy[0],listindexcopy[1]))
+ listindexcopy.append(
+ nearest(lists, listindexcopy[0], listindexcopy[1]))
return listindexcopy
- except Exception as e:
- # TODO: Look into making the logging log output to the system's
- # configured logger(s)
- logging.warning("Exception: {}, {}".format(type(e), e))
+ except (LengthError, MirrorError, TypeError, IndexError) as exc:
+ # Look into making the logging log output to the system's
+ # configured logger(s)
+ logging.warning("Exception: %s, %s", type(exc), exc)
return []