From cbb8029746400a299b9c65c5cd7be9a38cade189 Mon Sep 17 00:00:00 2001 From: zsloan Date: Mon, 26 Jul 2021 01:08:19 -0500 Subject: Check if corr_coefficient is NaN, since apparently it's stored as NaN instead of None when it can't be calculcated (which was messing up sorting); it may also be okay to remove the None check, but leaving it for now (#28) --- gn3/computations/correlations.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py index bc738a7..56f483c 100644 --- a/gn3/computations/correlations.py +++ b/gn3/computations/correlations.py @@ -1,4 +1,5 @@ """module contains code for correlations""" +import math import multiprocessing from typing import List @@ -90,7 +91,7 @@ def compute_sample_r_correlation(trait_name, corr_method, trait_vals, target_values=sanitized_target_vals, corr_method=corr_method) - if corr_coefficient is not None: + if corr_coefficient is not None and not math.isnan(corr_coefficient): return (trait_name, corr_coefficient, p_value, num_overlap) return None -- cgit v1.2.3 From 4c8c13814a22fe6b40081ecfa1f957bc5bf99930 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Mon, 26 Jul 2021 15:39:42 +0300 Subject: Fix issues caught by pylint Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * Fix a myriad of issues caught by pylint to ensure the code passes all tests. --- gn3/computations/correlations2.py | 37 ++- gn3/computations/slink.py | 84 +++--- tests/unit/computations/test_correlation.py | 32 +-- tests/unit/computations/test_slink.py | 401 +++++++++++++++++----------- 4 files changed, 335 insertions(+), 219 deletions(-) diff --git a/gn3/computations/correlations2.py b/gn3/computations/correlations2.py index 6c456db..93db3fa 100644 --- a/gn3/computations/correlations2.py +++ b/gn3/computations/correlations2.py @@ -1,15 +1,25 @@ +""" +DESCRIPTION: + TODO: Add a description for the module + +FUNCTIONS: +compute_correlation: + TODO: Describe what the function does...""" + from math import sqrt from functools import reduce ## From GN1: mostly for clustering and heatmap generation -def items_with_values(dbdata, userdata): +def __items_with_values(dbdata, userdata): """Retains only corresponding items in the data items that are not `None` values. -This should probably be renamed to something sensible""" + This should probably be renamed to something sensible""" def both_not_none(item1, item2): + """Check that both items are not the value `None`.""" if (item1 is not None) and (item2 is not None): return (item1, item2) return None def split_lists(accumulator, item): + """Separate the 'x' and 'y' items.""" return [accumulator[0] + [item[0]], accumulator[1] + [item[1]]] return reduce( split_lists, @@ -17,19 +27,24 @@ This should probably be renamed to something sensible""" [[], []]) def compute_correlation(dbdata, userdata): - x, y = items_with_values(dbdata, userdata) - if len(x) < 6: - return (0.0, len(x)) - meanx = sum(x)/len(x) - meany = sum(y)/len(y) + """Compute some form of correlation. + + This is extracted from + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/utility/webqtlUtil.py#L622-L647 + """ + x_items, y_items = __items_with_values(dbdata, userdata) + if len(x_items) < 6: + return (0.0, len(x_items)) + meanx = sum(x_items)/len(x_items) + meany = sum(y_items)/len(y_items) def cal_corr_vals(acc, item): xitem, yitem = item return [ acc[0] + ((xitem - meanx) * (yitem - meany)), acc[1] + ((xitem - meanx) * (xitem - meanx)), acc[2] + ((yitem - meany) * (yitem - meany))] - xyd, sxd, syd = reduce(cal_corr_vals, zip(x, y), [0.0, 0.0, 0.0]) + xyd, sxd, syd = reduce(cal_corr_vals, zip(x_items, y_items), [0.0, 0.0, 0.0]) try: - return ((xyd/(sqrt(sxd)*sqrt(syd))), len(x)) - except ZeroDivisionError as zde: - return(0, len(x)) + return ((xyd/(sqrt(sxd)*sqrt(syd))), len(x_items)) + except ZeroDivisionError: + return(0, len(x_items)) diff --git a/gn3/computations/slink.py b/gn3/computations/slink.py index 8d51f29..4aac6b3 100644 --- a/gn3/computations/slink.py +++ b/gn3/computations/slink.py @@ -7,13 +7,14 @@ slink: TODO: Describe what the function does... """ import logging -from functools import partial class LengthError(BaseException): - pass + """Raised whenever child lists/tuples are not the same length as the parent + list of tuple.""" class MirrorError(BaseException): - pass + """Raised if the distance from child A to child B is not the same as the + distance from child B to child A.""" def __is_list_or_tuple(item): return type(item) in [list, tuple] @@ -50,19 +51,20 @@ def __raise_valueerror_if_child_list_distance_from_itself_is_not_zero(lists): def __raise_mirrorerror_of_distances_one_way_are_not_same_other_way(lists): """Check that the distance from A to B, is the same as the distance from B to A. If the two distances are different, throw an exception.""" - for i in range(len(lists)): - for j in range(len(lists)): - if lists[i][j] != lists[j][i]: - raise MirrorError( - ("Distance from one child({}) to the other ({}) " - "should be the same in both directions.").format( - lists[i][j], lists[j][i])) + inner_coords = range(len(lists)) + coords = ((i, j) for i in inner_coords for j in inner_coords) + def __is_same_reversed(coord): + return lists[coord[0]][coord[1]] == lists[coord[1]][coord[0]] + if not all(map(__is_same_reversed, coords)): + raise MirrorError(( + "Distance from one child to the other should be the same in both " + "directions.")) def __raise_valueerror_on_negative_distances(lists): """Check that distances between 'somethings' are all positive, otherwise, raise an exception.""" def zero_or_positive(val): - return val >= 0; + return val >= 0 # flatten lists flattened = __flatten_list_of_lists(lists) if not all(map(zero_or_positive, flattened)): @@ -76,7 +78,8 @@ def nearest(lists, i, j): Computes shortest distance between member(s) in `i` and member(s) in `j`. Description: - This is 'copied' over from genenetwork1, from https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/slink.py#L42-L64. + This is 'copied' over from genenetwork1, from + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/slink.py#L42-L64. This description should be updated to better describe what 'member' means in the context where the function is used. @@ -108,18 +111,20 @@ def nearest(lists, i, j): __raise_mirrorerror_of_distances_one_way_are_not_same_other_way(lists) __raise_valueerror_on_negative_distances(lists) #### END: Guard Functions #### - if type(i) == int and type(j) == int: # From member i to member j + if isinstance(i, int) and isinstance(j, int): # From member i to member j return lists[i][j] - elif type(i) == int and __is_list_or_tuple(j): + + if isinstance(i, int) and __is_list_or_tuple(j): return min(map(lambda j_new: nearest(lists, i, j_new), j[:-1])) - elif type(j) == int and __is_list_or_tuple(i): + if isinstance(j, int) and __is_list_or_tuple(i): return min(map(lambda i_new: nearest(lists, i_new, j), i[:-1])) - elif __is_list_or_tuple(i) and __is_list_or_tuple(j): + + if __is_list_or_tuple(i) and __is_list_or_tuple(j): coordinate_pairs = __flatten_list_of_lists( [[(itemi, itemj) for itemj in j[:-1]] for itemi in i[:-1]]) return min(map(lambda x: nearest(lists, x[0], x[1]), coordinate_pairs)) - else: - raise ValueError("member values (i or j) should be lists/tuples of integers or integers") + + raise ValueError("member values (i or j) should be lists/tuples of integers or integers") def slink(lists): """ @@ -144,36 +149,39 @@ def slink(lists): """ try: size = len(lists) - listindex = range(size) listindexcopy = list(range(size)) - listscopy = [[item for item in child] for child in lists] - initSize = size + listscopy = [child[:] for child in lists] + init_size = size candidate = [] - while initSize >2: + while init_size > 2: mindist = 1e10 - for i in range(initSize): - for j in range(i+1,initSize): + for i in range(init_size): + for j in range(i+1, init_size): if listscopy[i][j] < mindist: - mindist = listscopy[i][j] - candidate=[[i,j]] + mindist = listscopy[i][j] + candidate = [[i, j]] elif listscopy[i][j] == mindist: - mindist = listscopy[i][j] - candidate.append([i,j]) + mindist = listscopy[i][j] + candidate.append([i, j]) else: pass - newmem = (listindexcopy[candidate[0][0]],listindexcopy[candidate[0][1]],mindist) + newmem = ( + listindexcopy[candidate[0][0]], listindexcopy[candidate[0][1]], + mindist) listindexcopy.pop(candidate[0][1]) listindexcopy[candidate[0][0]] = newmem - initSize -= 1 - for i in range(initSize): - for j in range(i+1,initSize): - listscopy[i][j] = nearest(lists,listindexcopy[i],listindexcopy[j]) + init_size -= 1 + for i in range(init_size): + for j in range(i+1, init_size): + listscopy[i][j] = nearest( + lists, listindexcopy[i], listindexcopy[j]) listscopy[j][i] = listscopy[i][j] - listindexcopy.append(nearest(lists,listindexcopy[0],listindexcopy[1])) + listindexcopy.append( + nearest(lists, listindexcopy[0], listindexcopy[1])) return listindexcopy - except Exception as e: - # TODO: Look into making the logging log output to the system's - # configured logger(s) - logging.warning("Exception: {}, {}".format(type(e), e)) + except (LengthError, MirrorError, TypeError, IndexError) as exc: + # Look into making the logging log output to the system's + # configured logger(s) + logging.warning("Exception: %s, %s", type(exc), exc) return [] diff --git a/tests/unit/computations/test_correlation.py b/tests/unit/computations/test_correlation.py index 6153c8a..9450094 100644 --- a/tests/unit/computations/test_correlation.py +++ b/tests/unit/computations/test_correlation.py @@ -467,26 +467,28 @@ class TestCorrelation(TestCase): self.assertEqual(results, [expected_results]) def test_compute_correlation(self): - for dbdata,userdata,expected in [ - [[None,None,None,None,None,None,None,None,None,None], - [None,None,None,None,None,None,None,None,None,None], + """Test that the new correlation function works the same as the original + from genenetwork1.""" + for dbdata, userdata, expected in [ + [[None, None, None, None, None, None, None, None, None, None], + [None, None, None, None, None, None, None, None, None, None], (0.0, 0)], - [[None,None,None,None,None,None,None,None,None,0], - [None,None,None,None,None,None,None,None,None,None], + [[None, None, None, None, None, None, None, None, None, 0], + [None, None, None, None, None, None, None, None, None, None], (0.0, 0)], - [[None,None,None,None,None,None,None,None,None,0], - [None,None,None,None,None,None,None,None,None,0], + [[None, None, None, None, None, None, None, None, None, 0], + [None, None, None, None, None, None, None, None, None, 0], (0.0, 1)], - [[0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0], + [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], (0, 10)], - [[9.87,9.87,9.87,9.87,9.87,9.87,9.87,9.87,9.87,9.87], - [9.87,9.87,9.87,9.87,9.87,9.87,9.87,9.87,9.87,9.87], + [[9.87, 9.87, 9.87, 9.87, 9.87, 9.87, 9.87, 9.87, 9.87, 9.87], + [9.87, 9.87, 9.87, 9.87, 9.87, 9.87, 9.87, 9.87, 9.87, 9.87], (0.9999999999999998, 10)], - [[9.3,2.2,5.4,7.2,6.4,7.6,3.8,1.8,8.4,0.2], - [0.6,3.97,5.82,8.21,1.65,4.55,6.72,9.5,7.33,2.34], + [[9.3, 2.2, 5.4, 7.2, 6.4, 7.6, 3.8, 1.8, 8.4, 0.2], + [0.6, 3.97, 5.82, 8.21, 1.65, 4.55, 6.72, 9.5, 7.33, 2.34], (-0.12720361919462056, 10)], - [[0,1,2,3,4,5,6,7,8,9], - [None,None,None,None,2,None,None,3,None,None], + [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [None, None, None, None, 2, None, None, 3, None, None], (0.0, 2)]]: with self.subTest(dbdata=dbdata, userdata=userdata): - self.assertEqual(compute_correlation(dbdata,userdata), expected) + self.assertEqual(compute_correlation(dbdata, userdata), expected) diff --git a/tests/unit/computations/test_slink.py b/tests/unit/computations/test_slink.py index 5627767..995393b 100644 --- a/tests/unit/computations/test_slink.py +++ b/tests/unit/computations/test_slink.py @@ -1,5 +1,4 @@ """Module contains tests for slink""" -import unittest from unittest import TestCase from gn3.computations.slink import slink @@ -11,210 +10,302 @@ class TestSlink(TestCase): """Class for testing slink functions""" def test_nearest_expects_list_of_lists(self): + """Test that function only accepts a list of lists.""" # This might be better handled with type-hints and mypy for item in [9, "some string", 5.432, - [1,2,3], ["test", 7.4]]: + [1, 2, 3], ["test", 7.4]]: with self.subTest(item=item): with self.assertRaises(ValueError, msg="Expected list or tuple"): nearest(item, 1, 1) def test_nearest_does_not_allow_empty_lists(self): + """Test that function does not accept an empty list, or any of the child + lists to be empty.""" for lst in [[], - [[],[]], - [[],[],[]], - [[0, 1, 2],[],[1, 2, 0]]]: + [[], []], + [[], [], []], + [[0, 1, 2], [], [1, 2, 0]]]: with self.subTest(lst=lst): with self.assertRaises(ValueError): nearest(lst, 1, 1) - def test_nearest_expects_exception_if_all_child_lists_are_not_of_equal_length_to_length_of_parent_list(self): - for lst in [[[0,1]], - [[0,1,2],[3,4,5]], - [[0,1,2,3],[4,5,6],[7,8,9,0]], - [[0,1,2,3,4],[5,6,7,8,9],[1,2,3,4,5],[2,3],[3,4,5,6,7]]]: + def test_nearest_expects_children_are_same_length_as_parent(self): + """Test that children lists are same length as parent list.""" + for lst in [[[0, 1]], + [[0, 1, 2], [3, 4, 5]], + [[0, 1, 2, 3], [4, 5, 6], [7, 8, 9, 0]], + [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9], [1, 2, 3, 4, 5], [2, 3], + [3, 4, 5, 6, 7]]]: with self.subTest(lst=lst): with self.assertRaises(LengthError): nearest(lst, 1, 1) - def test_nearest_expects_exception_if_distance_of_child_from_itself_is_not_zero(self): + def test_nearest_expects_member_is_zero_distance_from_itself(self): + """Test that distance of a member from itself is zero""" for lst in [[[1]], - [[1,2],[3,4]], - [1,0,0],[0,0,5],[0,3,4], - [0,0,0,0],[0,0,3,3],[0,1,2,3],[0,3,2,0]]: + [[1, 2], [3, 4]], + [1, 0, 0], [0, 0, 5], [0, 3, 4], + [0, 0, 0, 0], [0, 0, 3, 3], [0, 1, 2, 3], [0, 3, 2, 0]]: with self.subTest(lst=lst): with self.assertRaises(ValueError): nearest(lst, 1, 1) - def test_nearest_expects_exception_if_distance_from_child_a_to_child_b_is_not_distance_from_child_b_to_child_a(self): - for lst in [[[0,1],[2,0]], - [[0,1,2],[1,0,3],[9,7,0]], - [[0,1,2,3],[7,0,2,3],[2,3,0,1],[8,9,5,0]]]: + def test_nearest_expects_distance_atob_is_equal_to_distance_btoa(self): + """Test that the distance from member A to member B is the same as that + from member B to member A.""" + for lst in [[[0, 1], [2, 0]], + [[0, 1, 2], [1, 0, 3], [9, 7, 0]], + [[0, 1, 2, 3], [7, 0, 2, 3], [2, 3, 0, 1], [8, 9, 5, 0]]]: with self.subTest(lst=lst): with self.assertRaises(MirrorError): nearest(lst, 1, 1) def test_nearest_expects_zero_or_positive_distances(self): + """Test that all distances are either zero, or greater than zero.""" # Based on: # https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/slink.py#L87-L89 - for lst in [[[0,-1,2,3],[-1,0,3,4],[2,3,0,5],[3,4,5,0]], - [[0,1,-2,3],[1,0,3,4],[-2,3,0,5],[3,4,5,0]], - [[0,1,2,3],[1,0,-3,4],[2,-3,0,5],[3,4,5,0]], - [[0,1,2,-3],[1,0,3,4],[2,3,0,5],[-3,4,5,0]], - [[0,1,2,3],[1,0,3,-4],[2,3,0,5],[3,-4,5,0]], - [[0,1,2,3],[1,0,3,4],[2,3,0,-5],[3,4,-5,0]]]: + for lst in [[[0, -1, 2, 3], [-1, 0, 3, 4], [2, 3, 0, 5], [3, 4, 5, 0]], + [[0, 1, -2, 3], [1, 0, 3, 4], [-2, 3, 0, 5], [3, 4, 5, 0]], + [[0, 1, 2, 3], [1, 0, -3, 4], [2, -3, 0, 5], [3, 4, 5, 0]], + [[0, 1, 2, -3], [1, 0, 3, 4], [2, 3, 0, 5], [-3, 4, 5, 0]], + [[0, 1, 2, 3], [1, 0, 3, -4], [2, 3, 0, 5], [3, -4, 5, 0]], + [[0, 1, 2, 3], [1, 0, 3, 4], [2, 3, 0, -5], [3, 4, -5, 0]]]: with self.subTest(lst=lst): with self.assertRaises(ValueError, msg="Distances should be positive."): nearest(lst, 1, 1) def test_nearest_returns_shortest_distance_given_coordinates_to_both_group_members(self): + """Test that the shortest distance is returned.""" # This test is named wrong - at least I think it is, from the expected results # This tests distance when both `i`, and `j` are integers # We still need to add tests for when (either one/both) (is/are) not (an) integer(s) # https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/slink.py#L39-L40 - for lst, i, j, expected in [[[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - 0,0,0], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - 0,1,9], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - 0,2,3], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - 0,3,6], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - 0,4,11], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - 1,0,9], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - 1,1,0], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - 1,2,7], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - 1,3,5], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - 1,4,10], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - 2,0,3], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - 2,1,7], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - 2,2,0], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - 2,3,9], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - 2,4,2], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - 3,0,6], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - 3,1,5], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - 3,2,9], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - 3,3,0], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - 3,4,8], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - 4,0,11], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - 4,1,10], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - 4,2,2], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - 4,3,8], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - 4,4,0], - [[[0,9,5.5,6,11],[9,0,7,5,10],[5.5,7,0,9,2],[6,5,9,0,3],[11,10,2,3,0]], - 0,0,0], - [[[0,9,5.5,6,11],[9,0,7,5,10],[5.5,7,0,9,2],[6,5,9,0,3],[11,10,2,3,0]], - 0,1,9], - [[[0,9,5.5,6,11],[9,0,7,5,10],[5.5,7,0,9,2],[6,5,9,0,3],[11,10,2,3,0]], - 0,2,5.5], - [[[0,9,5.5,6,11],[9,0,7,5,10],[5.5,7,0,9,2],[6,5,9,0,3],[11,10,2,3,0]], - 0,3,6], - [[[0,9,5.5,6,11],[9,0,7,5,10],[5.5,7,0,9,2],[6,5,9,0,3],[11,10,2,3,0]], - 0,4,11], - [[[0,9,5.5,6,11],[9,0,7,5,10],[5.5,7,0,9,2],[6,5,9,0,3],[11,10,2,3,0]], - 1,0,9], - [[[0,9,5.5,6,11],[9,0,7,5,10],[5.5,7,0,9,2],[6,5,9,0,3],[11,10,2,3,0]], - 1,1,0], - [[[0,9,5.5,6,11],[9,0,7,5,10],[5.5,7,0,9,2],[6,5,9,0,3],[11,10,2,3,0]], - 1,2,7], - [[[0,9,5.5,6,11],[9,0,7,5,10],[5.5,7,0,9,2],[6,5,9,0,3],[11,10,2,3,0]], - 1,3,5], - [[[0,9,5.5,6,11],[9,0,7,5,10],[5.5,7,0,9,2],[6,5,9,0,3],[11,10,2,3,0]], - 1,4,10], - [[[0,9,5.5,6,11],[9,0,7,5,10],[5.5,7,0,9,2],[6,5,9,0,3],[11,10,2,3,0]], - 2,0,5.5], - [[[0,9,5.5,6,11],[9,0,7,5,10],[5.5,7,0,9,2],[6,5,9,0,3],[11,10,2,3,0]], - 2,1,7], - [[[0,9,5.5,6,11],[9,0,7,5,10],[5.5,7,0,9,2],[6,5,9,0,3],[11,10,2,3,0]], - 2,2,0], - [[[0,9,5.5,6,11],[9,0,7,5,10],[5.5,7,0,9,2],[6,5,9,0,3],[11,10,2,3,0]], - 2,3,9], - [[[0,9,5.5,6,11],[9,0,7,5,10],[5.5,7,0,9,2],[6,5,9,0,3],[11,10,2,3,0]], - 2,4,2], - [[[0,9,5.5,6,11],[9,0,7,5,10],[5.5,7,0,9,2],[6,5,9,0,3],[11,10,2,3,0]], - 3,0,6], - [[[0,9,5.5,6,11],[9,0,7,5,10],[5.5,7,0,9,2],[6,5,9,0,3],[11,10,2,3,0]], - 3,1,5], - [[[0,9,5.5,6,11],[9,0,7,5,10],[5.5,7,0,9,2],[6,5,9,0,3],[11,10,2,3,0]], - 3,2,9], - [[[0,9,5.5,6,11],[9,0,7,5,10],[5.5,7,0,9,2],[6,5,9,0,3],[11,10,2,3,0]], - 3,3,0], - [[[0,9,5.5,6,11],[9,0,7,5,10],[5.5,7,0,9,2],[6,5,9,0,3],[11,10,2,3,0]], - 3,4,3], - [[[0,9,5.5,6,11],[9,0,7,5,10],[5.5,7,0,9,2],[6,5,9,0,3],[11,10,2,3,0]], - 4,0,11], - [[[0,9,5.5,6,11],[9,0,7,5,10],[5.5,7,0,9,2],[6,5,9,0,3],[11,10,2,3,0]], - 4,1,10], - [[[0,9,5.5,6,11],[9,0,7,5,10],[5.5,7,0,9,2],[6,5,9,0,3],[11,10,2,3,0]], - 4,2,2], - [[[0,9,5.5,6,11],[9,0,7,5,10],[5.5,7,0,9,2],[6,5,9,0,3],[11,10,2,3,0]], - 4,3,3], - [[[0,9,5.5,6,11],[9,0,7,5,10],[5.5,7,0,9,2],[6,5,9,0,3],[11,10,2,3,0]], - 4,4,0]]: + for lst, i, j, expected in [ + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + 0, 0, 0], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + 0, 1, 9], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + 0, 2, 3], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + 0, 3, 6], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + 0, 4, 11], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + 1, 0, 9], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + 1, 1, 0], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + 1, 2, 7], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + 1, 3, 5], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + 1, 4, 10], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + 2, 0, 3], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + 2, 1, 7], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + 2, 2, 0], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + 2, 3, 9], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + 2, 4, 2], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + 3, 0, 6], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + 3, 1, 5], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + 3, 2, 9], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + 3, 3, 0], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + 3, 4, 8], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + 4, 0, 11], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + 4, 1, 10], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + 4, 2, 2], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + 4, 3, 8], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + 4, 4, 0], + [[[0, 9, 5.5, 6, 11], [9, 0, 7, 5, 10], [5.5, 7, 0, 9, 2], + [6, 5, 9, 0, 3], [11, 10, 2, 3, 0]], + 0, 0, 0], + [[[0, 9, 5.5, 6, 11], [9, 0, 7, 5, 10], [5.5, 7, 0, 9, 2], + [6, 5, 9, 0, 3], [11, 10, 2, 3, 0]], + 0, 1, 9], + [[[0, 9, 5.5, 6, 11], [9, 0, 7, 5, 10], [5.5, 7, 0, 9, 2], + [6, 5, 9, 0, 3], [11, 10, 2, 3, 0]], + 0, 2, 5.5], + [[[0, 9, 5.5, 6, 11], [9, 0, 7, 5, 10], [5.5, 7, 0, 9, 2], + [6, 5, 9, 0, 3], [11, 10, 2, 3, 0]], + 0, 3, 6], + [[[0, 9, 5.5, 6, 11], [9, 0, 7, 5, 10], [5.5, 7, 0, 9, 2], + [6, 5, 9, 0, 3], [11, 10, 2, 3, 0]], + 0, 4, 11], + [[[0, 9, 5.5, 6, 11], [9, 0, 7, 5, 10], [5.5, 7, 0, 9, 2], + [6, 5, 9, 0, 3], [11, 10, 2, 3, 0]], + 1, 0, 9], + [[[0, 9, 5.5, 6, 11], [9, 0, 7, 5, 10], [5.5, 7, 0, 9, 2], + [6, 5, 9, 0, 3], [11, 10, 2, 3, 0]], + 1, 1, 0], + [[[0, 9, 5.5, 6, 11], [9, 0, 7, 5, 10], [5.5, 7, 0, 9, 2], + [6, 5, 9, 0, 3], [11, 10, 2, 3, 0]], + 1, 2, 7], + [[[0, 9, 5.5, 6, 11], [9, 0, 7, 5, 10], [5.5, 7, 0, 9, 2], + [6, 5, 9, 0, 3], [11, 10, 2, 3, 0]], + 1, 3, 5], + [[[0, 9, 5.5, 6, 11], [9, 0, 7, 5, 10], [5.5, 7, 0, 9, 2], + [6, 5, 9, 0, 3], [11, 10, 2, 3, 0]], + 1, 4, 10], + [[[0, 9, 5.5, 6, 11], [9, 0, 7, 5, 10], [5.5, 7, 0, 9, 2], + [6, 5, 9, 0, 3], [11, 10, 2, 3, 0]], + 2, 0, 5.5], + [[[0, 9, 5.5, 6, 11], [9, 0, 7, 5, 10], [5.5, 7, 0, 9, 2], + [6, 5, 9, 0, 3], [11, 10, 2, 3, 0]], + 2, 1, 7], + [[[0, 9, 5.5, 6, 11], [9, 0, 7, 5, 10], [5.5, 7, 0, 9, 2], + [6, 5, 9, 0, 3], [11, 10, 2, 3, 0]], + 2, 2, 0], + [[[0, 9, 5.5, 6, 11], [9, 0, 7, 5, 10], [5.5, 7, 0, 9, 2], + [6, 5, 9, 0, 3], [11, 10, 2, 3, 0]], + 2, 3, 9], + [[[0, 9, 5.5, 6, 11], [9, 0, 7, 5, 10], [5.5, 7, 0, 9, 2], + [6, 5, 9, 0, 3], [11, 10, 2, 3, 0]], + 2, 4, 2], + [[[0, 9, 5.5, 6, 11], [9, 0, 7, 5, 10], [5.5, 7, 0, 9, 2], + [6, 5, 9, 0, 3], [11, 10, 2, 3, 0]], + 3, 0, 6], + [[[0, 9, 5.5, 6, 11], [9, 0, 7, 5, 10], [5.5, 7, 0, 9, 2], + [6, 5, 9, 0, 3], [11, 10, 2, 3, 0]], + 3, 1, 5], + [[[0, 9, 5.5, 6, 11], [9, 0, 7, 5, 10], [5.5, 7, 0, 9, 2], + [6, 5, 9, 0, 3], [11, 10, 2, 3, 0]], + 3, 2, 9], + [[[0, 9, 5.5, 6, 11], [9, 0, 7, 5, 10], [5.5, 7, 0, 9, 2], + [6, 5, 9, 0, 3], [11, 10, 2, 3, 0]], + 3, 3, 0], + [[[0, 9, 5.5, 6, 11], [9, 0, 7, 5, 10], [5.5, 7, 0, 9, 2], + [6, 5, 9, 0, 3], [11, 10, 2, 3, 0]], + 3, 4, 3], + [[[0, 9, 5.5, 6, 11], [9, 0, 7, 5, 10], [5.5, 7, 0, 9, 2], + [6, 5, 9, 0, 3], [11, 10, 2, 3, 0]], + 4, 0, 11], + [[[0, 9, 5.5, 6, 11], [9, 0, 7, 5, 10], [5.5, 7, 0, 9, 2], + [6, 5, 9, 0, 3], [11, 10, 2, 3, 0]], + 4, 1, 10], + [[[0, 9, 5.5, 6, 11], [9, 0, 7, 5, 10], [5.5, 7, 0, 9, 2], + [6, 5, 9, 0, 3], [11, 10, 2, 3, 0]], + 4, 2, 2], + [[[0, 9, 5.5, 6, 11], [9, 0, 7, 5, 10], [5.5, 7, 0, 9, 2], + [6, 5, 9, 0, 3], [11, 10, 2, 3, 0]], + 4, 3, 3], + [[[0, 9, 5.5, 6, 11], [9, 0, 7, 5, 10], [5.5, 7, 0, 9, 2], + [6, 5, 9, 0, 3], [11, 10, 2, 3, 0]], + 4, 4, 0]]: with self.subTest(lst=lst): self.assertEqual(nearest(lst, i, j), expected) - def test_given_a_list_or_tuple_of_members_distances_and_a_coordinate_find_closest_member_to_member_at_coordinate(self): - for md, ml, mc, ed in [ - [[[0,9,3],[9,0,7],[3,7,0]],(0,2,3),1,7], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]],[0,1,2,3,4],3,0], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]],[0,1,2,4],3,5], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]],[0,2,4],3,6], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]],[2,4],3,9]]: + def test_nearest_gives_shortest_distance_between_list_of_members_and_member(self): + """Test that the shortest distance is returned.""" + for members_distances, members_list, member_coordinate, expected_distance in [ + [[[0, 9, 3], [9, 0, 7], [3, 7, 0]], (0, 2, 3), 1, 7], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], [0, 1, 2, 3, 4], 3, 0], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], [0, 1, 2, 4], 3, 5], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], [0, 2, 4], 3, 6], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], [2, 4], 3, 9]]: with self.subTest( - members_distances=md, members_list=ml, member_coordinate=mc, - expected_distance=ed): - self.assertEqual(nearest(md, ml, mc), ed) - self.assertEqual(nearest(md, mc, ml), ed) + members_distances=members_distances, + members_list=members_list, + member_coordinate=member_coordinate, + expected_distance=expected_distance): + self.assertEqual( + nearest( + members_distances, members_list, member_coordinate), + expected_distance) + self.assertEqual( + nearest( + members_distances, member_coordinate, members_list), + expected_distance) - def test_given_2_lists_or_tuples_of_members_distances_nearest_returns_shortest_distance(self): - for md, ml, mc, ed in [ - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - [0,1,2,3,4],[0,1,2,3,4],0], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - [0,1],[3,4],6], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - [0,1],[2,3,4],3], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8],[11,10,2,8,0]], - [0,2],[3,4],6]]: + def test_nearest_returns_shortest_distance_given_two_lists_of_members(self): + """Test that the shortest distance is returned.""" + for members_distances, members_list, member_list2, expected_distance in [ + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + [0, 1, 2, 3, 4], [0, 1, 2, 3, 4], 0], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + [0, 1], [3, 4], 6], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + [0, 1], [2, 3, 4], 3], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], [11, 10, 2, 8, 0]], + [0, 2], [3, 4], 6]]: with self.subTest( - members_distances=md, members_list=ml, member_coordinate=mc, - expected_distance=ed): - self.assertEqual(nearest(md, ml, mc), ed) - self.assertEqual(nearest(md, mc, ml), ed) + members_distances=members_distances, + members_list=members_list, + member_list2=member_list2, + expected_distance=expected_distance): + self.assertEqual( + nearest( + members_distances, members_list, member_list2), + expected_distance) + self.assertEqual( + nearest( + members_distances, member_list2, members_list), + expected_distance) def test_slink_wrong_data_returns_empty_list(self): + """Test that empty list is returned for wrong data.""" for data in [1, "test", [], 2.945, nearest, [0]]: with self.subTest(data=data): self.assertEqual(slink(data), []) def test_slink_with_data(self): + """Test slink with example data, and expected results for each data + sample.""" for data, expected in [ - [[[0,9],[9,0]],[0,1,9]], - [[[0,9,3],[9,0,7],[3,7,0]],[(0,2,3),1,7]], - [[[0,9,3,6],[9,0,7,5],[3,7,0,9],[6,5,9,0]],[(0,2,3),(1,3,5),6]], - [[[0,9,3,6,11],[9,0,7,5,10],[3,7,0,9,2],[6,5,9,0,8], - [11,10,2,8,0]], - [(0,(2,4,2),3),(1,3,5),6]]]: + [[[0, 9], [9, 0]], [0, 1, 9]], + [[[0, 9, 3], [9, 0, 7], [3, 7, 0]], [(0, 2, 3), 1, 7]], + [[[0, 9, 3, 6], [9, 0, 7, 5], [3, 7, 0, 9], [6, 5, 9, 0]], + [(0, 2, 3), (1, 3, 5), 6]], + [[[0, 9, 3, 6, 11], [9, 0, 7, 5, 10], [3, 7, 0, 9, 2], + [6, 5, 9, 0, 8], + [11, 10, 2, 8, 0]], + [(0, (2, 4, 2), 3), (1, 3, 5), 6]]]: with self.subTest(data=data): self.assertEqual(slink(data), expected) -- cgit v1.2.3 From 02791f15d6b4940ae8be07fe9d4f8487d8291c78 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Wed, 28 Jul 2021 09:42:13 +0300 Subject: Retrieve 'ProbeSet' trait name Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * gn3/db/traits.py: new function (retrieve_probeset_trait_name) * tests/unit/db/test_traits.py: test(s) for new function Add a function to retrieve the name of a 'ProbeSet' trait in a manner similar to genenetwork1's retrieval of the same, as implemented here https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlDataset.py#L140-154 Unlike in genenetwork1, we do not mutate an object, instead, we return the values as retrieved from the database, and the caller will deal with the returned values as appropriate. --- gn3/db/traits.py | 18 ++++++++++++++++++ tests/unit/db/test_traits.py | 22 ++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 tests/unit/db/test_traits.py diff --git a/gn3/db/traits.py b/gn3/db/traits.py index 4860a07..37b111e 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -90,3 +90,21 @@ def insert_publication(pubmed_id: int, publication: Optional[Dict], ", ".join(['%s'] * len(publication)))) with conn.cursor() as cursor: cursor.execute(insert_query, tuple(publication.values())) + +def retrieve_probeset_trait_name(threshold, name, connection): + """ + Retrieve the name for a Probeset trait + + This is extracted from the `webqtlDataset.retrieveName` function, + specifically the section dealing with 'ProbeSet' type traits + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlDataset.py#L140-154""" + query = ( + 'SELECT Id, Name, FullName, ShortName, DataScale ' + 'FROM ProbeSetFreeze ' + 'WHERE ' + 'public > %(threshold)s ' + 'AND ' + '(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)') + with connection.cursor() as cursor: + cursor.execute(query, {"threshold": threshold, "name": name}) + return cursor.fetchone() diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py new file mode 100644 index 0000000..6d2ba4d --- /dev/null +++ b/tests/unit/db/test_traits.py @@ -0,0 +1,22 @@ +"""Tests for gn3/db/traits.py""" +from unittest import mock, TestCase +from gn3.db.traits import retrieve_probeset_trait_name + +class TestTraitsDBFunctions(TestCase): + "Test cases for traits functions" + + def test_retrieve_probeset_trait_name(self): + """Test that the function is called correctly.""" + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = ( + "testName", "testNameFull", "testNameShort", "dataScale") + self.assertEqual( + retrieve_probeset_trait_name(9, "testName", db_mock), + ("testName", "testNameFull", "testNameShort", "dataScale")) + cursor.execute.assert_called_once_with( + "SELECT Id, Name, FullName, ShortName, DataScale " + "FROM ProbeSetFreeze " + "WHERE public > %(threshold)s AND " + "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)", + {"threshold": 9, "name": "testName"}) -- cgit v1.2.3 From 8d7f8eec5b5d84937e453c9b02de0bd1b1727265 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Wed, 28 Jul 2021 10:20:18 +0300 Subject: Make name retrieval more general Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * gn3/db/traits.py: make function more general * tests/unit/db/test_traits.py: parametrize the tests Make the name retrieval more general for the different types of traits by changing the column specification and table as appropriate. --- gn3/db/traits.py | 26 +++++++++++++++----------- tests/unit/db/test_traits.py | 40 ++++++++++++++++++++++++++-------------- 2 files changed, 41 insertions(+), 25 deletions(-) diff --git a/gn3/db/traits.py b/gn3/db/traits.py index 37b111e..fddb8be 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -91,20 +91,24 @@ def insert_publication(pubmed_id: int, publication: Optional[Dict], with conn.cursor() as cursor: cursor.execute(insert_query, tuple(publication.values())) -def retrieve_probeset_trait_name(threshold, name, connection): +def retrieve_type_trait_name(trait_type, threshold, name, connection): """ - Retrieve the name for a Probeset trait + Retrieve the name of a trait given the trait's name - This is extracted from the `webqtlDataset.retrieveName` function, - specifically the section dealing with 'ProbeSet' type traits - https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlDataset.py#L140-154""" + This is extracted from the `webqtlDataset.retrieveName` function as is + implemented at + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlDataset.py#L140-L169 + """ + columns = "Id, Name, FullName, ShortName{}".format( + ", DataScale" if trait_type == "ProbeSet" else "") query = ( - 'SELECT Id, Name, FullName, ShortName, DataScale ' - 'FROM ProbeSetFreeze ' - 'WHERE ' - 'public > %(threshold)s ' - 'AND ' - '(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)') + "SELECT {columns} " + "FROM {trait_type}Freeze " + "WHERE " + "public > %(threshold)s " + "AND " + "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)").format( + columns=columns, trait_type=trait_type) with connection.cursor() as cursor: cursor.execute(query, {"threshold": threshold, "name": name}) return cursor.fetchone() diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 6d2ba4d..95c5b27 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -1,22 +1,34 @@ """Tests for gn3/db/traits.py""" from unittest import mock, TestCase -from gn3.db.traits import retrieve_probeset_trait_name +from gn3.db.traits import retrieve_type_trait_name class TestTraitsDBFunctions(TestCase): "Test cases for traits functions" def test_retrieve_probeset_trait_name(self): """Test that the function is called correctly.""" - db_mock = mock.MagicMock() - with db_mock.cursor() as cursor: - cursor.fetchone.return_value = ( - "testName", "testNameFull", "testNameShort", "dataScale") - self.assertEqual( - retrieve_probeset_trait_name(9, "testName", db_mock), - ("testName", "testNameFull", "testNameShort", "dataScale")) - cursor.execute.assert_called_once_with( - "SELECT Id, Name, FullName, ShortName, DataScale " - "FROM ProbeSetFreeze " - "WHERE public > %(threshold)s AND " - "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)", - {"threshold": 9, "name": "testName"}) + for trait_type, thresh, trait_name, columns in [ + ["ProbeSet", 9, "testName", + "Id, Name, FullName, ShortName, DataScale"], + ["Geno", 3, "genoTraitName", "Id, Name, FullName, ShortName"], + ["Publish", 6, "publishTraitName", + "Id, Name, FullName, ShortName"], + ["Temp", 4, "tempTraitName", "Id, Name, FullName, ShortName"]]: + db_mock = mock.MagicMock() + with self.subTest(trait_type=trait_type): + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = ( + "testName", "testNameFull", "testNameShort", + "dataScale") + self.assertEqual( + retrieve_type_trait_name( + trait_type, thresh, trait_name, db_mock), + ("testName", "testNameFull", "testNameShort", + "dataScale")) + cursor.execute.assert_called_once_with( + "SELECT {cols} " + "FROM {ttype}Freeze " + "WHERE public > %(threshold)s AND " + "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)".format( + cols=columns, ttype=trait_type), + {"threshold": thresh, "name": trait_name}) -- cgit v1.2.3 From 00579657abf5f9cadda1a9a479cae63ace28820c Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Wed, 28 Jul 2021 12:32:43 +0300 Subject: Retrieve trait information Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * gn3/db/traits.py: add functions to retrieve traits information * tests/unit/db/test_traits.py: add tests for new function Add functions to retrieve traits information as is done in genenetwork1 https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L397-L456 At this point, the data retrieval functions are probably incomplete, as there is more of the `retrieveInfo` function in GN1 that has not been considered as of this commit. --- gn3/db/traits.py | 133 ++++++++++++++++++++++++++++++++++++++++++- tests/unit/db/test_traits.py | 92 ++++++++++++++++++++++++++++-- 2 files changed, 218 insertions(+), 7 deletions(-) diff --git a/gn3/db/traits.py b/gn3/db/traits.py index fddb8be..3c62df8 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -91,7 +91,7 @@ def insert_publication(pubmed_id: int, publication: Optional[Dict], with conn.cursor() as cursor: cursor.execute(insert_query, tuple(publication.values())) -def retrieve_type_trait_name(trait_type, threshold, name, connection): +def retrieve_trait_dataset_name(trait_type, threshold, name, connection): """ Retrieve the name of a trait given the trait's name @@ -112,3 +112,134 @@ def retrieve_type_trait_name(trait_type, threshold, name, connection): with connection.cursor() as cursor: cursor.execute(query, {"threshold": threshold, "name": name}) return cursor.fetchone() + +PUBLISH_TRAIT_INFO_QUERY = ( + "SELECT " + "PublishXRef.Id, Publication.PubMed_ID, " + "Phenotype.Pre_publication_description, " + "Phenotype.Post_publication_description, " + "Phenotype.Original_description, " + "Phenotype.Pre_publication_abbreviation, " + "Phenotype.Post_publication_abbreviation, " + "Phenotype.Lab_code, Phenotype.Submitter, Phenotype.Owner, " + "Phenotype.Authorized_Users, CAST(Publication.Authors AS BINARY), " + "Publication.Title, Publication.Abstract, Publication.Journal, " + "Publication.Volume, Publication.Pages, Publication.Month, " + "Publication.Year, PublishXRef.Sequence, Phenotype.Units, " + "PublishXRef.comments " + "FROM " + "PublishXRef, Publication, Phenotype, PublishFreeze " + "WHERE " + "PublishXRef.Id = %(trait_name)s AND " + "Phenotype.Id = PublishXRef.PhenotypeId AND " + "Publication.Id = PublishXRef.PublicationId AND " + "PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND " + "PublishFreeze.Id =%(trait_dataset_id)s") + +def retrieve_publish_trait_info(trait_data_source, conn): + """Retrieve trait information for type `Publish` traits. + + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L399-L421""" + with conn.cursor() as cursor: + cursor.execute( + PUBLISH_TRAIT_INFO_QUERY, + { + k:v for k, v in trait_data_source.items() + if k in ["trait_name", "trait_dataset_id"] + }) + return cursor.fetchone() + +PROBESET_TRAIT_INFO_QUERY = ( + "SELECT " + "ProbeSet.name, ProbeSet.symbol, ProbeSet.description, " + "ProbeSet.probe_target_description, ProbeSet.chr, ProbeSet.mb, " + "ProbeSet.alias, ProbeSet.geneid, ProbeSet.genbankid, ProbeSet.unigeneid, " + "ProbeSet.omim, ProbeSet.refseq_transcriptid, ProbeSet.blatseq, " + "ProbeSet.targetseq, ProbeSet.chipid, ProbeSet.comments, " + "ProbeSet.strand_probe, ProbeSet.strand_gene, " + "ProbeSet.probe_set_target_region, ProbeSet.proteinid, " + "ProbeSet.probe_set_specificity, ProbeSet.probe_set_blat_score, " + "ProbeSet.probe_set_blat_mb_start, ProbeSet.probe_set_blat_mb_end, " + "ProbeSet.probe_set_strand, ProbeSet.probe_set_note_by_rw, " + "ProbeSet.flag " + "FROM " + "ProbeSet, ProbeSetFreeze, ProbeSetXRef " + "WHERE " + "ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND " + "ProbeSetXRef.ProbeSetId = ProbeSet.Id AND " + "ProbeSetFreeze.Name = %(trait_dataset_name)s AND " + "ProbeSet.Name = %(trait_name)s") + +def retrieve_probeset_trait_info(trait_data_source, conn): + """Retrieve trait information for type `ProbeSet` traits. + + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L424-L435""" + with conn.cursor() as cursor: + cursor.execute( + PROBESET_TRAIT_INFO_QUERY, + { + k:v for k, v in trait_data_source.items() + if k in ["trait_name", "trait_dataset_name"] + }) + return cursor.fetchone() + +GENO_TRAIT_INFO_QUERY = ( + "SELECT " + "Geno.name, Geno.chr, Geno.mb, Geno.source2, Geno.sequence " + "FROM " + "Geno, GenoFreeze, GenoXRef " + "WHERE " + "GenoXRef.GenoFreezeId = GenoFreeze.Id AND GenoXRef.GenoId = Geno.Id AND " + "GenoFreeze.Name = %(trait_dataset_name)s AND Geno.Name = %(trait_name)s") + +def retrieve_geno_trait_info(trait_data_source, conn): + """Retrieve trait information for type `Geno` traits. + + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L438-L449""" + with conn.cursor() as cursor: + cursor.execute( + GENO_TRAIT_INFO_QUERY, + { + k:v for k, v in trait_data_source.items() + if k in ["trait_name", "trait_dataset_name"] + }) + return cursor.fetchone() + +TEMP_TRAIT_INFO_QUERY = ( + "SELECT name, description FROM Temp " + "WHERE Name = %(trait_name)s") + +def retrieve_temp_trait_info(trait_data_source, conn): + """Retrieve trait information for type `Temp` traits. + + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L450-452""" + with conn.cursor() as cursor: + cursor.execute( + TEMP_TRAIT_INFO_QUERY, + { + k:v for k, v in trait_data_source.items() + if k in ["trait_name"] + }) + return cursor.fetchone() + +def retrieve_trait_info( + trait_type, trait_name, trait_dataset_id, trait_dataset_name, conn): + """Retrieves the trait information. + + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L397-L456 + + This function, or the dependent functions, might be incomplete as they are + currently.""" + trait_info_function_table = { + "Publish": retrieve_publish_trait_info, + "ProbeSet": retrieve_probeset_trait_info, + "Geno": retrieve_geno_trait_info, + "Temp": retrieve_temp_trait_info + } + return trait_info_function_table[trait_type]( + { + "trait_name": trait_name, + "trait_dataset_id": trait_dataset_id, + "trait_dataset_name":trait_dataset_name + }, + conn) diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index 95c5b27..e3c5c28 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -1,13 +1,24 @@ """Tests for gn3/db/traits.py""" from unittest import mock, TestCase -from gn3.db.traits import retrieve_type_trait_name +from gn3.db.traits import ( + GENO_TRAIT_INFO_QUERY, + TEMP_TRAIT_INFO_QUERY, + PUBLISH_TRAIT_INFO_QUERY, + PROBESET_TRAIT_INFO_QUERY) +from gn3.db.traits import ( + retrieve_trait_info, + retrieve_geno_trait_info, + retrieve_temp_trait_info, + retrieve_trait_dataset_name, + retrieve_publish_trait_info, + retrieve_probeset_trait_info) class TestTraitsDBFunctions(TestCase): "Test cases for traits functions" - def test_retrieve_probeset_trait_name(self): + def test_retrieve_trait_dataset_name(self): """Test that the function is called correctly.""" - for trait_type, thresh, trait_name, columns in [ + for trait_type, thresh, trait_dataset_name, columns in [ ["ProbeSet", 9, "testName", "Id, Name, FullName, ShortName, DataScale"], ["Geno", 3, "genoTraitName", "Id, Name, FullName, ShortName"], @@ -21,8 +32,8 @@ class TestTraitsDBFunctions(TestCase): "testName", "testNameFull", "testNameShort", "dataScale") self.assertEqual( - retrieve_type_trait_name( - trait_type, thresh, trait_name, db_mock), + retrieve_trait_dataset_name( + trait_type, thresh, trait_dataset_name, db_mock), ("testName", "testNameFull", "testNameShort", "dataScale")) cursor.execute.assert_called_once_with( @@ -31,4 +42,73 @@ class TestTraitsDBFunctions(TestCase): "WHERE public > %(threshold)s AND " "(Name = %(name)s OR FullName = %(name)s OR ShortName = %(name)s)".format( cols=columns, ttype=trait_type), - {"threshold": thresh, "name": trait_name}) + {"threshold": thresh, "name": trait_dataset_name}) + + def test_retrieve_publish_trait_info(self): + """Test retrieval of type `Publish` traits.""" + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = tuple() + trait_source = { + "trait_name": "PublishTraitName", "trait_dataset_id": 1} + self.assertEqual( + retrieve_publish_trait_info( + trait_source, + db_mock), + tuple()) + cursor.execute.assert_called_once_with( + PUBLISH_TRAIT_INFO_QUERY, trait_source) + + def test_retrieve_probeset_trait_info(self): + """Test retrieval of type `Probeset` traits.""" + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = tuple() + trait_source = { + "trait_name": "ProbeSetTraitName", + "trait_dataset_name": "ProbeSetDatasetTraitName"} + self.assertEqual( + retrieve_probeset_trait_info(trait_source, db_mock), tuple()) + cursor.execute.assert_called_once_with( + PROBESET_TRAIT_INFO_QUERY, trait_source) + + def test_retrieve_geno_trait_info(self): + """Test retrieval of type `Geno` traits.""" + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = tuple() + trait_source = { + "trait_name": "GenoTraitName", + "trait_dataset_name": "GenoDatasetTraitName"} + self.assertEqual( + retrieve_geno_trait_info(trait_source, db_mock), tuple()) + cursor.execute.assert_called_once_with( + GENO_TRAIT_INFO_QUERY, trait_source) + + def test_retrieve_temp_trait_info(self): + """Test retrieval of type `Temp` traits.""" + db_mock = mock.MagicMock() + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = tuple() + trait_source = {"trait_name": "TempTraitName"} + self.assertEqual( + retrieve_temp_trait_info(trait_source, db_mock), tuple()) + cursor.execute.assert_called_once_with( + TEMP_TRAIT_INFO_QUERY, trait_source) + + def test_retrieve_trait_info(self): + """Test that information on traits is retrieved as appropriate.""" + for trait_type, trait_name, trait_dataset_id, trait_dataset_name, in [ + ["Publish", "PublishTraitName", 1, "PublishDatasetTraitName"], + ["ProbeSet", "ProbeSetTraitName", 2, "ProbeSetDatasetTraitName"], + ["Geno", "GenoTraitName", 3, "GenoDatasetTraitName"], + ["Temp", "TempTraitName", 4, "TempDatasetTraitName"]]: + db_mock = mock.MagicMock() + with self.subTest(trait_type=trait_type): + with db_mock.cursor() as cursor: + cursor.fetchone.return_value = tuple() + self.assertEqual( + retrieve_trait_info( + trait_type, trait_name, trait_dataset_id, + trait_dataset_name, db_mock), + tuple()) -- cgit v1.2.3 From 13680aa9206e2302760180bab3254182f11dde68 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Thu, 29 Jul 2021 12:28:21 +0300 Subject: Add type annotations to the function Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * Add some type annotations to the functions to reduce the chances of bugs creeping into the code. --- gn3/db/traits.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/gn3/db/traits.py b/gn3/db/traits.py index 3c62df8..f18e16a 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -91,7 +91,8 @@ def insert_publication(pubmed_id: int, publication: Optional[Dict], with conn.cursor() as cursor: cursor.execute(insert_query, tuple(publication.values())) -def retrieve_trait_dataset_name(trait_type, threshold, name, connection): +def retrieve_trait_dataset_name( + trait_type: str, threshold: int, name: str, connection: Any): """ Retrieve the name of a trait given the trait's name @@ -136,7 +137,7 @@ PUBLISH_TRAIT_INFO_QUERY = ( "PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND " "PublishFreeze.Id =%(trait_dataset_id)s") -def retrieve_publish_trait_info(trait_data_source, conn): +def retrieve_publish_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `Publish` traits. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L399-L421""" @@ -170,7 +171,7 @@ PROBESET_TRAIT_INFO_QUERY = ( "ProbeSetFreeze.Name = %(trait_dataset_name)s AND " "ProbeSet.Name = %(trait_name)s") -def retrieve_probeset_trait_info(trait_data_source, conn): +def retrieve_probeset_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `ProbeSet` traits. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L424-L435""" @@ -192,7 +193,7 @@ GENO_TRAIT_INFO_QUERY = ( "GenoXRef.GenoFreezeId = GenoFreeze.Id AND GenoXRef.GenoId = Geno.Id AND " "GenoFreeze.Name = %(trait_dataset_name)s AND Geno.Name = %(trait_name)s") -def retrieve_geno_trait_info(trait_data_source, conn): +def retrieve_geno_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `Geno` traits. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L438-L449""" @@ -209,7 +210,7 @@ TEMP_TRAIT_INFO_QUERY = ( "SELECT name, description FROM Temp " "WHERE Name = %(trait_name)s") -def retrieve_temp_trait_info(trait_data_source, conn): +def retrieve_temp_trait_info(trait_data_source: Dict[str, Any], conn: Any): """Retrieve trait information for type `Temp` traits. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L450-452""" @@ -223,7 +224,8 @@ def retrieve_temp_trait_info(trait_data_source, conn): return cursor.fetchone() def retrieve_trait_info( - trait_type, trait_name, trait_dataset_id, trait_dataset_name, conn): + trait_type: str, trait_name: str, trait_dataset_id: int, + trait_dataset_name: str, conn: Any): """Retrieves the trait information. https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L397-L456 -- cgit v1.2.3 From 2ccbf1844afe352e23af7ff958ec2b0694cd87ea Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Thu, 29 Jul 2021 14:09:49 +0300 Subject: Add partial type annotations for slink module Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * Add some type annotations for the `nearest` function. * Leave some comments regarding the issues experienced when trying to add some typing annotations to the function to help with future endeavours of the same. --- gn3/computations/slink.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/gn3/computations/slink.py b/gn3/computations/slink.py index 4aac6b3..23d3d88 100644 --- a/gn3/computations/slink.py +++ b/gn3/computations/slink.py @@ -7,6 +7,10 @@ slink: TODO: Describe what the function does... """ import logging +from typing import List, Tuple, Union, Sequence + +NumType = Union[int, float] +SeqOfNums = Sequence[NumType] class LengthError(BaseException): """Raised whenever child lists/tuples are not the same length as the parent @@ -73,7 +77,10 @@ raise an exception.""" def __flatten_list_of_lists(parent): return [item for child in parent for item in child] -def nearest(lists, i, j): +# i and j are Union[SeqOfNums, NumType], but that leads to errors where the +# values of i or j are indexed, since the NumType type is not indexable. +# I don't know how to type this so that it does not fail on running `mypy .` +def nearest(lists: Sequence[SeqOfNums], i, j) -> NumType: """ Computes shortest distance between member(s) in `i` and member(s) in `j`. @@ -126,6 +133,10 @@ def nearest(lists, i, j): raise ValueError("member values (i or j) should be lists/tuples of integers or integers") +# `lists` here could be Sequence[SeqOfNums], but that leads to errors I do not +# understand down the line +# Might have to re-implement the function especially since the errors are thrown +# where `listindexcopy` is involved def slink(lists): """ DESCRIPTION: -- cgit v1.2.3