5 files changed, 510 insertions, 28 deletions
diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py
index bc738a7..bb13ff1 100644
--- a/gn3/computations/correlations.py
+++ b/gn3/computations/correlations.py
@@ -1,4 +1,5 @@
 """module contains code for correlations"""
+import math
 import multiprocessing
 
 from typing import List
@@ -90,7 +91,7 @@ def compute_sample_r_correlation(trait_name, corr_method, trait_vals,
                                        target_values=sanitized_target_vals,
                                        corr_method=corr_method)
 
-        if corr_coefficient is not None:
+        if corr_coefficient is not None and not math.isnan(corr_coefficient):
             return (trait_name, corr_coefficient, p_value, num_overlap)
     return None
 
@@ -123,11 +124,12 @@ def filter_shared_sample_keys(this_samplelist,
     return (this_vals, target_vals)
 
 
-def compute_all_sample_correlation(this_trait,
-                                   target_dataset,
-                                   corr_method="pearson") -> List:
+def fast_compute_all_sample_correlation(this_trait,
+                                        target_dataset,
+                                        corr_method="pearson") -> List:
     """Given a trait data sample-list and target__datasets compute all sample
     correlation
+    this functions uses multiprocessing if not use the normal fun
 
     """
     # xtodo fix trait_name currently returning single one
@@ -159,9 +161,9 @@ def compute_all_sample_correlation(this_trait,
         key=lambda trait_name: -abs(list(trait_name.values())[0]["corr_coefficient"]))
 
 
-def benchmark_compute_all_sample(this_trait,
-                                 target_dataset,
-                                 corr_method="pearson") -> List:
+def compute_all_sample_correlation(this_trait,
+                                   target_dataset,
+                                   corr_method="pearson") -> List:
     """Temp function to benchmark with compute_all_sample_r alternative to
     compute_all_sample_r where we use multiprocessing
 
@@ -173,6 +175,7 @@ def benchmark_compute_all_sample(this_trait,
         target_trait_data = target_trait["trait_sample_data"]
         this_vals, target_vals = filter_shared_sample_keys(
             this_trait_samples, target_trait_data)
+
         sample_correlation = compute_sample_r_correlation(
             trait_name=trait_name,
             corr_method=corr_method,
@@ -189,7 +192,9 @@ def benchmark_compute_all_sample(this_trait,
             "num_overlap": num_overlap
         }
         corr_results.append({trait_name: corr_result})
-    return corr_results
+    return sorted(
+        corr_results,
+        key=lambda trait_name: -abs(list(trait_name.values())[0]["corr_coefficient"]))
 
 
 def tissue_correlation_for_trait(
@@ -335,9 +340,9 @@ def compute_all_lit_correlation(conn, trait_lists: List,
     return sorted_lit_results
 
 
-def compute_all_tissue_correlation(primary_tissue_dict: dict,
-                                   target_tissues_data: dict,
-                                   corr_method: str):
+def compute_tissue_correlation(primary_tissue_dict: dict,
+                               target_tissues_data: dict,
+                               corr_method: str):
     """Function acts as an abstraction for tissue_correlation_for_trait\
     required input are target tissue object and primary tissue trait\
     target tissues data contains the trait_symbol_dict and symbol_tissue_vals
@@ -357,8 +362,7 @@ def compute_all_tissue_correlation(primary_tissue_dict: dict,
             target_tissues_values=target_tissue_vals,
             trait_id=trait_id,
             corr_method=corr_method)
-        tissue_result_dict = {trait_id: tissue_result}
-        tissues_results.append(tissue_result_dict)
+        tissues_results.append(tissue_result)
     return sorted(
         tissues_results,
         key=lambda trait_name: -abs(list(trait_name.values())[0]["tissue_corr"]))
@@ -381,9 +385,9 @@ def process_trait_symbol_dict(trait_symbol_dict, symbol_tissue_vals_dict) -> Lis
     return traits_tissue_vals
 
 
-def compute_tissue_correlation(primary_tissue_dict: dict,
-                               target_tissues_data: dict,
-                               corr_method: str):
+def fast_compute_tissue_correlation(primary_tissue_dict: dict,
+                                    target_tissues_data: dict,
+                                    corr_method: str):
     """Experimental function that uses multiprocessing for computing tissue
     correlation
 
diff --git a/gn3/computations/correlations2.py b/gn3/computations/correlations2.py
new file mode 100644
index 0000000..93db3fa
--- /dev/null
+++ b/gn3/computations/correlations2.py
@@ -0,0 +1,50 @@
+"""
+DESCRIPTION:
+    TODO: Add a description for the module
+
+FUNCTIONS:
+compute_correlation:
+    TODO: Describe what the function does..."""
+
+from math import sqrt
+from functools import reduce
+## From GN1: mostly for clustering and heatmap generation
+
+def __items_with_values(dbdata, userdata):
+    """Retains only corresponding items in the data items that are not `None` values.
+    This should probably be renamed to something sensible"""
+    def both_not_none(item1, item2):
+        """Check that both items are not the value `None`."""
+        if (item1 is not None) and (item2 is not None):
+            return (item1, item2)
+        return None
+    def split_lists(accumulator, item):
+        """Separate the 'x' and 'y' items."""
+        return [accumulator[0] + [item[0]], accumulator[1] + [item[1]]]
+    return reduce(
+        split_lists,
+        filter(lambda x: x is not None, map(both_not_none, dbdata, userdata)),
+        [[], []])
+
+def compute_correlation(dbdata, userdata):
+    """Compute some form of correlation.
+
+    This is extracted from
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/utility/webqtlUtil.py#L622-L647
+    """
+    x_items, y_items = __items_with_values(dbdata, userdata)
+    if len(x_items) < 6:
+        return (0.0, len(x_items))
+    meanx = sum(x_items)/len(x_items)
+    meany = sum(y_items)/len(y_items)
+    def cal_corr_vals(acc, item):
+        xitem, yitem = item
+        return [
+            acc[0] + ((xitem - meanx) * (yitem - meany)),
+            acc[1] + ((xitem - meanx) * (xitem - meanx)),
+            acc[2] + ((yitem - meany) * (yitem - meany))]
+    xyd, sxd, syd = reduce(cal_corr_vals, zip(x_items, y_items), [0.0, 0.0, 0.0])
+    try:
+        return ((xyd/(sqrt(sxd)*sqrt(syd))), len(x_items))
+    except ZeroDivisionError:
+        return(0, len(x_items))
diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
new file mode 100644
index 0000000..3c35029
--- /dev/null
+++ b/gn3/computations/heatmap.py
@@ -0,0 +1,177 @@
+"""
+This module will contain functions to be used in computation of the data used to
+generate various kinds of heatmaps.
+"""
+
+from functools import reduce
+from typing import Any, Dict, Sequence
+from gn3.computations.slink import slink
+from gn3.db.traits import retrieve_trait_data, retrieve_trait_info
+from gn3.computations.correlations2 import compute_correlation
+
+def export_trait_data(
+        trait_data: dict, strainlist: Sequence[str], dtype: str = "val",
+        var_exists: bool = False, n_exists: bool = False):
+    """
+    Export data according to `strainlist`. Mostly used in calculating
+    correlations.
+
+    DESCRIPTION:
+    Migrated from
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L166-L211
+
+    PARAMETERS
+    trait: (dict)
+      The dictionary of key-value pairs representing a trait
+    strainlist: (list)
+      A list of strain names
+    type: (str)
+      ... verify what this is ...
+    var_exists: (bool)
+      A flag indicating existence of variance
+    n_exists: (bool)
+      A flag indicating existence of ndata
+    """
+    def __export_all_types(tdata, strain):
+        sample_data = []
+        if tdata[strain]["value"]:
+            sample_data.append(tdata[strain]["value"])
+            if var_exists:
+                if tdata[strain]["variance"]:
+                    sample_data.append(tdata[strain]["variance"])
+                else:
+                    sample_data.append(None)
+            if n_exists:
+                if tdata[strain]["ndata"]:
+                    sample_data.append(tdata[strain]["ndata"])
+                else:
+                    sample_data.append(None)
+        else:
+            if var_exists and n_exists:
+                sample_data += [None, None, None]
+            elif var_exists or n_exists:
+                sample_data += [None, None]
+            else:
+                sample_data.append(None)
+
+        return tuple(sample_data)
+
+    def __exporter(accumulator, strain):
+        # pylint: disable=[R0911]
+        if strain in trait_data["data"]:
+            if dtype == "val":
+                return accumulator + (trait_data["data"][strain]["value"], )
+            if dtype == "var":
+                return accumulator + (trait_data["data"][strain]["variance"], )
+            if dtype == "N":
+                return accumulator + (trait_data["data"][strain]["ndata"], )
+            if dtype == "all":
+                return accumulator + __export_all_types(trait_data["data"], strain)
+            raise KeyError("Type `%s` is incorrect" % dtype)
+        if var_exists and n_exists:
+            return accumulator + (None, None, None)
+        if var_exists or n_exists:
+            return accumulator + (None, None)
+        return accumulator + (None,)
+
+    return reduce(__exporter, strainlist, tuple())
+
+def trait_display_name(trait: Dict):
+    """
+    Given a trait, return a name to use to display the trait on a heatmap.
+
+    DESCRIPTION
+    Migrated from
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L141-L157
+    """
+    if trait.get("db", None) and trait.get("trait_name", None):
+        if trait["db"]["dataset_type"] == "Temp":
+            desc = trait["description"]
+            if desc.find("PCA") >= 0:
+                return "%s::%s" % (
+                    trait["db"]["displayname"],
+                    desc[desc.rindex(':')+1:].strip())
+            return "%s::%s" % (
+                trait["db"]["displayname"],
+                desc[:desc.index('entered')].strip())
+        prefix = "%s::%s" % (
+            trait["db"]["dataset_name"], trait["trait_name"])
+        if trait["cellid"]:
+            return "%s::%s" % (prefix, trait["cellid"])
+        return prefix
+    return trait["description"]
+
+def cluster_traits(traits_data_list: Sequence[Dict]):
+    """
+    Clusters the trait values.
+
+    DESCRIPTION
+    Attempts to replicate the clustering of the traits, as done at
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L138-L162
+    """
+    def __compute_corr(tdata_i, tdata_j):
+        if tdata_i[0] == tdata_j[0]:
+            return 0.0
+        corr_vals = compute_correlation(tdata_i[1], tdata_j[1])
+        corr = corr_vals[0]
+        if (1 - corr) < 0:
+            return 0.0
+        return 1 - corr
+
+    def __cluster(tdata_i):
+        return tuple(
+            __compute_corr(tdata_i, tdata_j)
+            for tdata_j in enumerate(traits_data_list))
+
+    return tuple(__cluster(tdata_i) for tdata_i in enumerate(traits_data_list))
+
+def heatmap_data(formd, search_result, conn: Any):
+    """
+    heatmap function
+
+    DESCRIPTION
+    This function is an attempt to reproduce the initialisation at
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L46-L64
+    and also the clustering and slink computations at
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L138-L165
+    with the help of the `gn3.computations.heatmap.cluster_traits` function.
+
+    It does not try to actually draw the heatmap image.
+
+    PARAMETERS:
+    TODO: Elaborate on the parameters here...
+    """
+    threshold = 0 # webqtlConfig.PUBLICTHRESH
+    cluster_checked = formd.formdata.getvalue("clusterCheck", "")
+    strainlist = [
+        strain for strain in formd.strainlist if strain not in formd.parlist]
+    genotype = formd.genotype
+
+    def __retrieve_traitlist_and_datalist(threshold, fullname):
+        trait = retrieve_trait_info(threshold, fullname, conn)
+        return (
+            trait,
+            export_trait_data(retrieve_trait_data(trait, conn), strainlist))
+
+    traits_details = [
+        __retrieve_traitlist_and_datalist(threshold, fullname)
+        for fullname in search_result]
+    traits_list = map(lambda x: x[0], traits_details)
+    traits_data_list = map(lambda x: x[1], traits_details)
+
+    return {
+        "target_description_checked": formd.formdata.getvalue(
+            "targetDescriptionCheck", ""),
+        "cluster_checked": cluster_checked,
+        "slink_data": (
+            slink(cluster_traits(traits_data_list))
+            if cluster_checked else False),
+        "sessionfile": formd.formdata.getvalue("session"),
+        "genotype": genotype,
+        "nLoci": sum(map(len, genotype)),
+        "strainlist": strainlist,
+        "ppolar": formd.ppolar,
+        "mpolar":formd.mpolar,
+        "traits_list": traits_list,
+        "traits_data_list": traits_data_list
+    }
diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py
index 00e1db9..38c5000 100644
--- a/gn3/computations/rqtl.py
+++ b/gn3/computations/rqtl.py
@@ -1,6 +1,7 @@
 """Procedures related rqtl computations"""
 import os
-from typing import Dict, List, Union
+from bisect import bisect
+from typing import Dict, List, Tuple, Union
 
 import numpy as np
 
@@ -80,15 +81,15 @@ def process_rqtl_mapping(file_name: str) -> List:
 
     return marker_obs
 
-def process_rqtl_pairscan(file_name: str) -> List:
+def process_rqtl_pairscan(file_name: str, geno_file: str) -> List:
     """Given an output file name, read in R/qtl pair-scan results and return
     a list of both the JSON needed for the d3panels figure and a list of results
     to be used when generating the results table (which will include marker names)
 
     """
 
-    figure_data = pairscan_results_for_figure(file_name)
-    table_data = pairscan_results_for_table(file_name)
+    figure_data = pairscan_for_figure(file_name)
+    table_data = pairscan_for_table(file_name, geno_file)
 
     return [figure_data, table_data]
 
@@ -119,15 +120,14 @@ def pairscan_for_figure(file_name: str) -> Dict:
             if i == 0: # Skip first line
                 continue
             line_items = [item.rstrip('\n') for item in line.split(",")]
-            chr_list.append(line_items[1])
+            chr_list.append(line_items[1][1:-1])
             pos_list.append(line_items[2])
         figure_data['chr'] = chr_list
         figure_data['pos'] = pos_list
 
     return figure_data
 
-
-def pairscan_for_table(file_name: str) -> List:
+def pairscan_for_table(file_name: str, geno_file: str) -> List:
     """Given an output file name, read in R/qtl pair-scan results and return
     a list of results to be used when generating the results table (which will include marker names)
 
@@ -148,32 +148,85 @@ def pairscan_for_table(file_name: str) -> List:
 
             marker_list.append(this_marker)
 
+    # Get the list of original markers from the .geno file
+    original_markers = build_marker_pos_dict(geno_file)
+
     # Open the file with the actual results and write the results as
     # they will be displayed in the results table
     with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
                            "output", file_name), "r") as the_file:
         for i, line in enumerate(the_file.readlines()[1:]):
             marker_1 = marker_list[i]
+            proximal1, distal1 = find_nearest_marker(marker_1['chr'], marker_1['pos'], original_markers)
             line_items = [item.rstrip('\n') for item in line.split(",")]
             for j, item in enumerate(line_items[1:]):
                 marker_2 = marker_list[j]
+                proximal2, distal2 = find_nearest_marker(marker_2['chr'], marker_2['pos'], original_markers)
                 try:
                     lod_score = f"{float(item):.3f}"
                 except:
                     lod_score = f"{item}"
                 this_line = {
-                    'marker1': f"{marker_1['name']}",
+                    'proximal1': proximal1,
+                    'distal1': distal1,
                     'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM",
                     'lod': lod_score,
-                    'marker2': f"{marker_2['name']}",
+                    'proximal2': proximal2,
+                    'distal2': distal2,
                     'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM"
                 }
-            table_data.append(this_line)
 
-    return table_data
+                table_data.append(this_line)
+
+    return sorted(table_data, key = lambda i: float(i['lod']), reverse=True)[:500]
+
+def build_marker_pos_dict(genotype_file: str) -> Dict:
+    """
+    Gets list of markers and their positions from .geno file
+
+    Basically a pared-down version of parse_genotype_file for R/qtl pair-scan
+    """
+
+    with open(genotype_file, "r") as infile:
+        contents = infile.readlines()
+
+    # Get all lines after the metadata
+    lines = tuple(line for line in contents if
+                  ((not line.strip().startswith("#")) and
+                   (not line.strip().startswith("@")) and
+                   (not line.strip() == "")))
+
+    header_items = lines[0].split("\t")
+    mb_exists = "Mb" in header_items
+    pos_column = header_items.index("Mb") if mb_exists else header_items.index("cM")
+
+    the_markers = {}
+    for line in lines[1:]: # The lines with markers
+        line_items = line.split("\t")
+        this_chr = line_items[0]
+        if this_chr not in the_markers:
+            the_markers[this_chr] = {}
+        the_markers[this_chr][str(float(line_items[pos_column]))] = line_items[1]
+
+    return the_markers
+
+def find_nearest_marker(the_chr: str, the_pos: str, marker_list: Dict) -> Tuple[str, str]:
+    """
+    Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results),
+    return the nearest real marker
+    """
+
+    pos_list = [float(pos) for pos in marker_list[the_chr]]
+
+    # Get the position of the pseudomarker in the list of markers for the chr
+    the_pos_index = bisect(pos_list, float(the_pos))
+
+    proximal_marker = marker_list[the_chr][str(pos_list[the_pos_index-1])]
+    distal_marker = marker_list[the_chr][str(pos_list[the_pos_index])]
 
+    return proximal_marker, distal_marker
 
-def process_perm_output(file_name: str):
+def process_perm_output(file_name: str) -> Tuple[List, float, float]:
     """Given base filename, read in R/qtl permutation output and calculate
     suggestive and significant thresholds
 
diff --git a/gn3/computations/slink.py b/gn3/computations/slink.py
new file mode 100644
index 0000000..3d7a576
--- /dev/null
+++ b/gn3/computations/slink.py
@@ -0,0 +1,198 @@
+"""
+DESCRIPTION:
+    TODO: Add a description for the module
+
+FUNCTIONS:
+slink:
+    TODO: Describe what the function does...
+"""
+import logging
+from typing import Union, Sequence
+
+NumType = Union[int, float]
+SeqOfNums = Sequence[NumType]
+
+class LengthError(BaseException):
+    """Raised whenever child lists/tuples are not the same length as the parent
+    list of tuple."""
+
+class MirrorError(BaseException):
+    """Raised if the distance from child A to child B is not the same as the
+    distance from child B to child A."""
+
+def __is_list_or_tuple(item):
+    return type(item) in [list, tuple]
+
+def __raise_valueerror_if_data_is_not_lists_or_tuples(lists):
+    """Check that `lists` is a list of lists: If not, raise an exception."""
+
+    if (not __is_list_or_tuple(lists)) or (not all(map(__is_list_or_tuple, lists))):
+        raise ValueError("Expected list or tuple")
+
+def __raise_valueerror_if_lists_empty(lists):
+    """Check that the list and its direct children are not empty."""
+    def empty(lst):
+        return len(lst) == 0
+    if (empty(lists)) or not all(map(lambda x: not empty(x), lists)):
+        raise ValueError("List/Tuple should NOT be empty!")
+
+def __raise_lengtherror_if_child_lists_are_not_same_as_parent(lists):
+    def len_is_same_as_parent(lst):
+        return len(lst) == len(lists)
+    if not all(map(len_is_same_as_parent, lists)):
+        raise LengthError("All children lists should be same length as the parent.")
+
+def __raise_valueerror_if_child_list_distance_from_itself_is_not_zero(lists):
+    def get_child_distance(child):
+        idx = lists.index(child)
+        return lists[idx][idx]
+    def distance_is_zero(dist):
+        return dist == 0
+    children_distances = map(get_child_distance, lists)
+    if not all(map(distance_is_zero, children_distances)):
+        raise ValueError("Distance of each child list/tuple from itself should be zero!")
+
+def __raise_mirrorerror_of_distances_one_way_are_not_same_other_way(lists):
+    """Check that the distance from A to B, is the same as the distance from B to A.
+If the two distances are different, throw an exception."""
+    inner_coords = range(len(lists))
+    coords = ((i, j) for i in inner_coords for j in inner_coords)
+    def __is_same_reversed(coord):
+        return lists[coord[0]][coord[1]] == lists[coord[1]][coord[0]]
+    if not all(map(__is_same_reversed, coords)):
+        raise MirrorError((
+            "Distance from one child to the other should be the same in both "
+            "directions."))
+
+def __raise_valueerror_on_negative_distances(lists):
+    """Check that distances between 'somethings' are all positive, otherwise,
+raise an exception."""
+    def zero_or_positive(val):
+        return val >= 0
+    # flatten lists
+    flattened = __flatten_list_of_lists(lists)
+    if not all(map(zero_or_positive, flattened)):
+        raise ValueError("Distances should be positive.")
+
+def __flatten_list_of_lists(parent):
+    return [item for child in parent for item in child]
+
+# i and j are Union[SeqOfNums, NumType], but that leads to errors where the
+# values of i or j are indexed, since the NumType type is not indexable.
+# I don't know how to type this so that it does not fail on running `mypy .`
+def nearest(lists: Sequence[SeqOfNums], i, j) -> NumType:
+    """
+    Computes shortest distance between member(s) in `i` and member(s) in `j`.
+
+    Description:
+    This is 'copied' over from genenetwork1, from
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/slink.py#L42-L64.
+
+    This description should be updated to better describe what 'member' means in
+    the context where the function is used.
+
+    Parameters:
+    lists (list of lists of distances): Represents a list of members and their
+        distances from each other.
+        Each inner list represents the distances the member at that coordinate
+        is from other members in the list: for example, a member at index 0 with
+        the values [0, 9, 1, 7] indicates that the member is:
+        - 0 units of distance away from itself
+        - 9 units of distance away from member at coordinate 1
+        - 1 unit of distance away from member at coordinate 2
+        - 7 units of distance away from member at coordinate 3
+    i (int or list of ints): Represents the coordinate of a member, or a list of
+        coordinates of members on the `lists` list.
+    j (int or list of ints): Represents the coordinate of a member, or a list of
+        coordinates of members on the `lists` list.
+
+    Returns:
+    int: Represents the shortest distance between member(s) in `i` and member(s)
+        in `j`."""
+
+    #### Guard Functions: Should we do this a different way? ####
+    __raise_valueerror_if_data_is_not_lists_or_tuples(lists)
+    __raise_valueerror_if_lists_empty(lists)
+    __raise_lengtherror_if_child_lists_are_not_same_as_parent(lists)
+    __raise_valueerror_if_child_list_distance_from_itself_is_not_zero(lists)
+    __raise_mirrorerror_of_distances_one_way_are_not_same_other_way(lists)
+    __raise_valueerror_on_negative_distances(lists)
+    #### END: Guard Functions ####
+    if isinstance(i, int) and isinstance(j, int): # From member i to member j
+        return lists[i][j]
+
+    if isinstance(i, int) and __is_list_or_tuple(j):
+        return min(map(lambda j_new: nearest(lists, i, j_new), j[:-1]))
+    if isinstance(j, int) and __is_list_or_tuple(i):
+        return min(map(lambda i_new: nearest(lists, i_new, j), i[:-1]))
+
+    if __is_list_or_tuple(i) and __is_list_or_tuple(j):
+        coordinate_pairs = __flatten_list_of_lists(
+            [[(itemi, itemj) for itemj in j[:-1]] for itemi in i[:-1]])
+        return min(map(lambda x: nearest(lists, x[0], x[1]), coordinate_pairs))
+
+    raise ValueError("member values (i or j) should be lists/tuples of integers or integers")
+
+# `lists` here could be Sequence[SeqOfNums], but that leads to errors I do not
+# understand down the line
+# Might have to re-implement the function especially since the errors are thrown
+# where `listindexcopy` is involved
+def slink(lists):
+    """
+    DESCRIPTION:
+    TODO: Not quite sure what this function does. Work through the code with a
+        fine tooth comb, once we understand the context of its use, so as to
+        give a better description
+
+        The name of the function does not clearly establish what the function
+        does either, meaning, once that is established, the function should be
+        renamed to give the user an idea of what it does without necessarily
+        reading through a ton of code.
+
+        We should also look into refactoring the function to reduce/eliminate
+        the multiple levels of nested-loops and conditionals
+
+    PARAMETERS:
+    lists (list of lists of numbers): Give this a better name.
+        Each item of this list is a list of coordinates of the members in the
+        group.
+        What 'member' and 'group' in this context means, is not yet established.
+    """
+    try:
+        size = len(lists)
+        listindexcopy = list(range(size))
+        listscopy = [list(child[:]) for child in lists]
+        init_size = size
+        candidate = []
+        while init_size > 2:
+            mindist = 1e10
+            for i in range(init_size):
+                for j in range(i+1, init_size):
+                    if listscopy[i][j] < mindist:
+                        mindist = listscopy[i][j]
+                        candidate = [[i, j]]
+                    elif listscopy[i][j] == mindist:
+                        mindist = listscopy[i][j]
+                        candidate.append([i, j])
+                    else:
+                        pass
+            newmem = (
+                listindexcopy[candidate[0][0]], listindexcopy[candidate[0][1]],
+                mindist)
+            listindexcopy.pop(candidate[0][1])
+            listindexcopy[candidate[0][0]] = newmem
+
+            init_size -= 1
+            for i in range(init_size):
+                for j in range(i+1, init_size):
+                    listscopy[i][j] = nearest(
+                        lists, listindexcopy[i], listindexcopy[j])
+                    listscopy[j][i] = listscopy[i][j]
+        listindexcopy.append(
+            nearest(lists, listindexcopy[0], listindexcopy[1]))
+        return listindexcopy
+    except (LengthError, MirrorError, TypeError, IndexError) as exc:
+        # Look into making the logging log output to the system's
+        #   configured logger(s)
+        logging.warning("Exception: %s, %s", type(exc), exc)
+        return []