aboutsummaryrefslogtreecommitdiff
path: root/gn3/computations
diff options
context:
space:
mode:
Diffstat (limited to 'gn3/computations')
-rw-r--r--gn3/computations/correlations.py36
-rw-r--r--gn3/computations/correlations2.py50
-rw-r--r--gn3/computations/heatmap.py177
-rw-r--r--gn3/computations/rqtl.py77
-rw-r--r--gn3/computations/slink.py198
5 files changed, 510 insertions, 28 deletions
diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py
index bc738a7..bb13ff1 100644
--- a/gn3/computations/correlations.py
+++ b/gn3/computations/correlations.py
@@ -1,4 +1,5 @@
"""module contains code for correlations"""
+import math
import multiprocessing
from typing import List
@@ -90,7 +91,7 @@ def compute_sample_r_correlation(trait_name, corr_method, trait_vals,
target_values=sanitized_target_vals,
corr_method=corr_method)
- if corr_coefficient is not None:
+ if corr_coefficient is not None and not math.isnan(corr_coefficient):
return (trait_name, corr_coefficient, p_value, num_overlap)
return None
@@ -123,11 +124,12 @@ def filter_shared_sample_keys(this_samplelist,
return (this_vals, target_vals)
-def compute_all_sample_correlation(this_trait,
- target_dataset,
- corr_method="pearson") -> List:
+def fast_compute_all_sample_correlation(this_trait,
+ target_dataset,
+ corr_method="pearson") -> List:
"""Given a trait data sample-list and target__datasets compute all sample
correlation
+ this functions uses multiprocessing if not use the normal fun
"""
# xtodo fix trait_name currently returning single one
@@ -159,9 +161,9 @@ def compute_all_sample_correlation(this_trait,
key=lambda trait_name: -abs(list(trait_name.values())[0]["corr_coefficient"]))
-def benchmark_compute_all_sample(this_trait,
- target_dataset,
- corr_method="pearson") -> List:
+def compute_all_sample_correlation(this_trait,
+ target_dataset,
+ corr_method="pearson") -> List:
"""Temp function to benchmark with compute_all_sample_r alternative to
compute_all_sample_r where we use multiprocessing
@@ -173,6 +175,7 @@ def benchmark_compute_all_sample(this_trait,
target_trait_data = target_trait["trait_sample_data"]
this_vals, target_vals = filter_shared_sample_keys(
this_trait_samples, target_trait_data)
+
sample_correlation = compute_sample_r_correlation(
trait_name=trait_name,
corr_method=corr_method,
@@ -189,7 +192,9 @@ def benchmark_compute_all_sample(this_trait,
"num_overlap": num_overlap
}
corr_results.append({trait_name: corr_result})
- return corr_results
+ return sorted(
+ corr_results,
+ key=lambda trait_name: -abs(list(trait_name.values())[0]["corr_coefficient"]))
def tissue_correlation_for_trait(
@@ -335,9 +340,9 @@ def compute_all_lit_correlation(conn, trait_lists: List,
return sorted_lit_results
-def compute_all_tissue_correlation(primary_tissue_dict: dict,
- target_tissues_data: dict,
- corr_method: str):
+def compute_tissue_correlation(primary_tissue_dict: dict,
+ target_tissues_data: dict,
+ corr_method: str):
"""Function acts as an abstraction for tissue_correlation_for_trait\
required input are target tissue object and primary tissue trait\
target tissues data contains the trait_symbol_dict and symbol_tissue_vals
@@ -357,8 +362,7 @@ def compute_all_tissue_correlation(primary_tissue_dict: dict,
target_tissues_values=target_tissue_vals,
trait_id=trait_id,
corr_method=corr_method)
- tissue_result_dict = {trait_id: tissue_result}
- tissues_results.append(tissue_result_dict)
+ tissues_results.append(tissue_result)
return sorted(
tissues_results,
key=lambda trait_name: -abs(list(trait_name.values())[0]["tissue_corr"]))
@@ -381,9 +385,9 @@ def process_trait_symbol_dict(trait_symbol_dict, symbol_tissue_vals_dict) -> Lis
return traits_tissue_vals
-def compute_tissue_correlation(primary_tissue_dict: dict,
- target_tissues_data: dict,
- corr_method: str):
+def fast_compute_tissue_correlation(primary_tissue_dict: dict,
+ target_tissues_data: dict,
+ corr_method: str):
"""Experimental function that uses multiprocessing for computing tissue
correlation
diff --git a/gn3/computations/correlations2.py b/gn3/computations/correlations2.py
new file mode 100644
index 0000000..93db3fa
--- /dev/null
+++ b/gn3/computations/correlations2.py
@@ -0,0 +1,50 @@
+"""
+DESCRIPTION:
+ TODO: Add a description for the module
+
+FUNCTIONS:
+compute_correlation:
+ TODO: Describe what the function does..."""
+
+from math import sqrt
+from functools import reduce
+## From GN1: mostly for clustering and heatmap generation
+
+def __items_with_values(dbdata, userdata):
+ """Retains only corresponding items in the data items that are not `None` values.
+ This should probably be renamed to something sensible"""
+ def both_not_none(item1, item2):
+ """Check that both items are not the value `None`."""
+ if (item1 is not None) and (item2 is not None):
+ return (item1, item2)
+ return None
+ def split_lists(accumulator, item):
+ """Separate the 'x' and 'y' items."""
+ return [accumulator[0] + [item[0]], accumulator[1] + [item[1]]]
+ return reduce(
+ split_lists,
+ filter(lambda x: x is not None, map(both_not_none, dbdata, userdata)),
+ [[], []])
+
+def compute_correlation(dbdata, userdata):
+ """Compute some form of correlation.
+
+ This is extracted from
+ https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/utility/webqtlUtil.py#L622-L647
+ """
+ x_items, y_items = __items_with_values(dbdata, userdata)
+ if len(x_items) < 6:
+ return (0.0, len(x_items))
+ meanx = sum(x_items)/len(x_items)
+ meany = sum(y_items)/len(y_items)
+ def cal_corr_vals(acc, item):
+ xitem, yitem = item
+ return [
+ acc[0] + ((xitem - meanx) * (yitem - meany)),
+ acc[1] + ((xitem - meanx) * (xitem - meanx)),
+ acc[2] + ((yitem - meany) * (yitem - meany))]
+ xyd, sxd, syd = reduce(cal_corr_vals, zip(x_items, y_items), [0.0, 0.0, 0.0])
+ try:
+ return ((xyd/(sqrt(sxd)*sqrt(syd))), len(x_items))
+ except ZeroDivisionError:
+ return(0, len(x_items))
diff --git a/gn3/computations/heatmap.py b/gn3/computations/heatmap.py
new file mode 100644
index 0000000..3c35029
--- /dev/null
+++ b/gn3/computations/heatmap.py
@@ -0,0 +1,177 @@
+"""
+This module will contain functions to be used in computation of the data used to
+generate various kinds of heatmaps.
+"""
+
+from functools import reduce
+from typing import Any, Dict, Sequence
+from gn3.computations.slink import slink
+from gn3.db.traits import retrieve_trait_data, retrieve_trait_info
+from gn3.computations.correlations2 import compute_correlation
+
+def export_trait_data(
+ trait_data: dict, strainlist: Sequence[str], dtype: str = "val",
+ var_exists: bool = False, n_exists: bool = False):
+ """
+ Export data according to `strainlist`. Mostly used in calculating
+ correlations.
+
+ DESCRIPTION:
+ Migrated from
+ https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L166-L211
+
+ PARAMETERS
+ trait: (dict)
+ The dictionary of key-value pairs representing a trait
+ strainlist: (list)
+ A list of strain names
+ type: (str)
+ ... verify what this is ...
+ var_exists: (bool)
+ A flag indicating existence of variance
+ n_exists: (bool)
+ A flag indicating existence of ndata
+ """
+ def __export_all_types(tdata, strain):
+ sample_data = []
+ if tdata[strain]["value"]:
+ sample_data.append(tdata[strain]["value"])
+ if var_exists:
+ if tdata[strain]["variance"]:
+ sample_data.append(tdata[strain]["variance"])
+ else:
+ sample_data.append(None)
+ if n_exists:
+ if tdata[strain]["ndata"]:
+ sample_data.append(tdata[strain]["ndata"])
+ else:
+ sample_data.append(None)
+ else:
+ if var_exists and n_exists:
+ sample_data += [None, None, None]
+ elif var_exists or n_exists:
+ sample_data += [None, None]
+ else:
+ sample_data.append(None)
+
+ return tuple(sample_data)
+
+ def __exporter(accumulator, strain):
+ # pylint: disable=[R0911]
+ if strain in trait_data["data"]:
+ if dtype == "val":
+ return accumulator + (trait_data["data"][strain]["value"], )
+ if dtype == "var":
+ return accumulator + (trait_data["data"][strain]["variance"], )
+ if dtype == "N":
+ return accumulator + (trait_data["data"][strain]["ndata"], )
+ if dtype == "all":
+ return accumulator + __export_all_types(trait_data["data"], strain)
+ raise KeyError("Type `%s` is incorrect" % dtype)
+ if var_exists and n_exists:
+ return accumulator + (None, None, None)
+ if var_exists or n_exists:
+ return accumulator + (None, None)
+ return accumulator + (None,)
+
+ return reduce(__exporter, strainlist, tuple())
+
+def trait_display_name(trait: Dict):
+ """
+ Given a trait, return a name to use to display the trait on a heatmap.
+
+ DESCRIPTION
+ Migrated from
+ https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L141-L157
+ """
+ if trait.get("db", None) and trait.get("trait_name", None):
+ if trait["db"]["dataset_type"] == "Temp":
+ desc = trait["description"]
+ if desc.find("PCA") >= 0:
+ return "%s::%s" % (
+ trait["db"]["displayname"],
+ desc[desc.rindex(':')+1:].strip())
+ return "%s::%s" % (
+ trait["db"]["displayname"],
+ desc[:desc.index('entered')].strip())
+ prefix = "%s::%s" % (
+ trait["db"]["dataset_name"], trait["trait_name"])
+ if trait["cellid"]:
+ return "%s::%s" % (prefix, trait["cellid"])
+ return prefix
+ return trait["description"]
+
+def cluster_traits(traits_data_list: Sequence[Dict]):
+ """
+ Clusters the trait values.
+
+ DESCRIPTION
+ Attempts to replicate the clustering of the traits, as done at
+ https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L138-L162
+ """
+ def __compute_corr(tdata_i, tdata_j):
+ if tdata_i[0] == tdata_j[0]:
+ return 0.0
+ corr_vals = compute_correlation(tdata_i[1], tdata_j[1])
+ corr = corr_vals[0]
+ if (1 - corr) < 0:
+ return 0.0
+ return 1 - corr
+
+ def __cluster(tdata_i):
+ return tuple(
+ __compute_corr(tdata_i, tdata_j)
+ for tdata_j in enumerate(traits_data_list))
+
+ return tuple(__cluster(tdata_i) for tdata_i in enumerate(traits_data_list))
+
+def heatmap_data(formd, search_result, conn: Any):
+ """
+ heatmap function
+
+ DESCRIPTION
+ This function is an attempt to reproduce the initialisation at
+ https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L46-L64
+ and also the clustering and slink computations at
+ https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/Heatmap.py#L138-L165
+ with the help of the `gn3.computations.heatmap.cluster_traits` function.
+
+ It does not try to actually draw the heatmap image.
+
+ PARAMETERS:
+ TODO: Elaborate on the parameters here...
+ """
+ threshold = 0 # webqtlConfig.PUBLICTHRESH
+ cluster_checked = formd.formdata.getvalue("clusterCheck", "")
+ strainlist = [
+ strain for strain in formd.strainlist if strain not in formd.parlist]
+ genotype = formd.genotype
+
+ def __retrieve_traitlist_and_datalist(threshold, fullname):
+ trait = retrieve_trait_info(threshold, fullname, conn)
+ return (
+ trait,
+ export_trait_data(retrieve_trait_data(trait, conn), strainlist))
+
+ traits_details = [
+ __retrieve_traitlist_and_datalist(threshold, fullname)
+ for fullname in search_result]
+ traits_list = map(lambda x: x[0], traits_details)
+ traits_data_list = map(lambda x: x[1], traits_details)
+
+ return {
+ "target_description_checked": formd.formdata.getvalue(
+ "targetDescriptionCheck", ""),
+ "cluster_checked": cluster_checked,
+ "slink_data": (
+ slink(cluster_traits(traits_data_list))
+ if cluster_checked else False),
+ "sessionfile": formd.formdata.getvalue("session"),
+ "genotype": genotype,
+ "nLoci": sum(map(len, genotype)),
+ "strainlist": strainlist,
+ "ppolar": formd.ppolar,
+ "mpolar":formd.mpolar,
+ "traits_list": traits_list,
+ "traits_data_list": traits_data_list
+ }
diff --git a/gn3/computations/rqtl.py b/gn3/computations/rqtl.py
index 00e1db9..38c5000 100644
--- a/gn3/computations/rqtl.py
+++ b/gn3/computations/rqtl.py
@@ -1,6 +1,7 @@
"""Procedures related rqtl computations"""
import os
-from typing import Dict, List, Union
+from bisect import bisect
+from typing import Dict, List, Tuple, Union
import numpy as np
@@ -80,15 +81,15 @@ def process_rqtl_mapping(file_name: str) -> List:
return marker_obs
-def process_rqtl_pairscan(file_name: str) -> List:
+def process_rqtl_pairscan(file_name: str, geno_file: str) -> List:
"""Given an output file name, read in R/qtl pair-scan results and return
a list of both the JSON needed for the d3panels figure and a list of results
to be used when generating the results table (which will include marker names)
"""
- figure_data = pairscan_results_for_figure(file_name)
- table_data = pairscan_results_for_table(file_name)
+ figure_data = pairscan_for_figure(file_name)
+ table_data = pairscan_for_table(file_name, geno_file)
return [figure_data, table_data]
@@ -119,15 +120,14 @@ def pairscan_for_figure(file_name: str) -> Dict:
if i == 0: # Skip first line
continue
line_items = [item.rstrip('\n') for item in line.split(",")]
- chr_list.append(line_items[1])
+ chr_list.append(line_items[1][1:-1])
pos_list.append(line_items[2])
figure_data['chr'] = chr_list
figure_data['pos'] = pos_list
return figure_data
-
-def pairscan_for_table(file_name: str) -> List:
+def pairscan_for_table(file_name: str, geno_file: str) -> List:
"""Given an output file name, read in R/qtl pair-scan results and return
a list of results to be used when generating the results table (which will include marker names)
@@ -148,32 +148,85 @@ def pairscan_for_table(file_name: str) -> List:
marker_list.append(this_marker)
+ # Get the list of original markers from the .geno file
+ original_markers = build_marker_pos_dict(geno_file)
+
# Open the file with the actual results and write the results as
# they will be displayed in the results table
with open(os.path.join(current_app.config.get("TMPDIR", "/tmp"),
"output", file_name), "r") as the_file:
for i, line in enumerate(the_file.readlines()[1:]):
marker_1 = marker_list[i]
+ proximal1, distal1 = find_nearest_marker(marker_1['chr'], marker_1['pos'], original_markers)
line_items = [item.rstrip('\n') for item in line.split(",")]
for j, item in enumerate(line_items[1:]):
marker_2 = marker_list[j]
+ proximal2, distal2 = find_nearest_marker(marker_2['chr'], marker_2['pos'], original_markers)
try:
lod_score = f"{float(item):.3f}"
except:
lod_score = f"{item}"
this_line = {
- 'marker1': f"{marker_1['name']}",
+ 'proximal1': proximal1,
+ 'distal1': distal1,
'pos1': f"Chr {marker_1['chr']} @ {float(marker_1['pos']):.1f} cM",
'lod': lod_score,
- 'marker2': f"{marker_2['name']}",
+ 'proximal2': proximal2,
+ 'distal2': distal2,
'pos2': f"Chr {marker_2['chr']} @ {float(marker_2['pos']):.1f} cM"
}
- table_data.append(this_line)
- return table_data
+ table_data.append(this_line)
+
+ return sorted(table_data, key = lambda i: float(i['lod']), reverse=True)[:500]
+
+def build_marker_pos_dict(genotype_file: str) -> Dict:
+ """
+ Gets list of markers and their positions from .geno file
+
+ Basically a pared-down version of parse_genotype_file for R/qtl pair-scan
+ """
+
+ with open(genotype_file, "r") as infile:
+ contents = infile.readlines()
+
+ # Get all lines after the metadata
+ lines = tuple(line for line in contents if
+ ((not line.strip().startswith("#")) and
+ (not line.strip().startswith("@")) and
+ (not line.strip() == "")))
+
+ header_items = lines[0].split("\t")
+ mb_exists = "Mb" in header_items
+ pos_column = header_items.index("Mb") if mb_exists else header_items.index("cM")
+
+ the_markers = {}
+ for line in lines[1:]: # The lines with markers
+ line_items = line.split("\t")
+ this_chr = line_items[0]
+ if this_chr not in the_markers:
+ the_markers[this_chr] = {}
+ the_markers[this_chr][str(float(line_items[pos_column]))] = line_items[1]
+
+ return the_markers
+
+def find_nearest_marker(the_chr: str, the_pos: str, marker_list: Dict) -> Tuple[str, str]:
+ """
+ Given a chromosome and position of a pseudomarker (from R/qtl pair-scan results),
+ return the nearest real marker
+ """
+
+ pos_list = [float(pos) for pos in marker_list[the_chr]]
+
+ # Get the position of the pseudomarker in the list of markers for the chr
+ the_pos_index = bisect(pos_list, float(the_pos))
+
+ proximal_marker = marker_list[the_chr][str(pos_list[the_pos_index-1])]
+ distal_marker = marker_list[the_chr][str(pos_list[the_pos_index])]
+ return proximal_marker, distal_marker
-def process_perm_output(file_name: str):
+def process_perm_output(file_name: str) -> Tuple[List, float, float]:
"""Given base filename, read in R/qtl permutation output and calculate
suggestive and significant thresholds
diff --git a/gn3/computations/slink.py b/gn3/computations/slink.py
new file mode 100644
index 0000000..3d7a576
--- /dev/null
+++ b/gn3/computations/slink.py
@@ -0,0 +1,198 @@
+"""
+DESCRIPTION:
+ TODO: Add a description for the module
+
+FUNCTIONS:
+slink:
+ TODO: Describe what the function does...
+"""
+import logging
+from typing import Union, Sequence
+
+NumType = Union[int, float]
+SeqOfNums = Sequence[NumType]
+
+class LengthError(BaseException):
+ """Raised whenever child lists/tuples are not the same length as the parent
+ list of tuple."""
+
+class MirrorError(BaseException):
+ """Raised if the distance from child A to child B is not the same as the
+ distance from child B to child A."""
+
+def __is_list_or_tuple(item):
+ return type(item) in [list, tuple]
+
+def __raise_valueerror_if_data_is_not_lists_or_tuples(lists):
+ """Check that `lists` is a list of lists: If not, raise an exception."""
+
+ if (not __is_list_or_tuple(lists)) or (not all(map(__is_list_or_tuple, lists))):
+ raise ValueError("Expected list or tuple")
+
+def __raise_valueerror_if_lists_empty(lists):
+ """Check that the list and its direct children are not empty."""
+ def empty(lst):
+ return len(lst) == 0
+ if (empty(lists)) or not all(map(lambda x: not empty(x), lists)):
+ raise ValueError("List/Tuple should NOT be empty!")
+
+def __raise_lengtherror_if_child_lists_are_not_same_as_parent(lists):
+ def len_is_same_as_parent(lst):
+ return len(lst) == len(lists)
+ if not all(map(len_is_same_as_parent, lists)):
+ raise LengthError("All children lists should be same length as the parent.")
+
+def __raise_valueerror_if_child_list_distance_from_itself_is_not_zero(lists):
+ def get_child_distance(child):
+ idx = lists.index(child)
+ return lists[idx][idx]
+ def distance_is_zero(dist):
+ return dist == 0
+ children_distances = map(get_child_distance, lists)
+ if not all(map(distance_is_zero, children_distances)):
+ raise ValueError("Distance of each child list/tuple from itself should be zero!")
+
+def __raise_mirrorerror_of_distances_one_way_are_not_same_other_way(lists):
+ """Check that the distance from A to B, is the same as the distance from B to A.
+If the two distances are different, throw an exception."""
+ inner_coords = range(len(lists))
+ coords = ((i, j) for i in inner_coords for j in inner_coords)
+ def __is_same_reversed(coord):
+ return lists[coord[0]][coord[1]] == lists[coord[1]][coord[0]]
+ if not all(map(__is_same_reversed, coords)):
+ raise MirrorError((
+ "Distance from one child to the other should be the same in both "
+ "directions."))
+
+def __raise_valueerror_on_negative_distances(lists):
+ """Check that distances between 'somethings' are all positive, otherwise,
+raise an exception."""
+ def zero_or_positive(val):
+ return val >= 0
+ # flatten lists
+ flattened = __flatten_list_of_lists(lists)
+ if not all(map(zero_or_positive, flattened)):
+ raise ValueError("Distances should be positive.")
+
+def __flatten_list_of_lists(parent):
+ return [item for child in parent for item in child]
+
+# i and j are Union[SeqOfNums, NumType], but that leads to errors where the
+# values of i or j are indexed, since the NumType type is not indexable.
+# I don't know how to type this so that it does not fail on running `mypy .`
+def nearest(lists: Sequence[SeqOfNums], i, j) -> NumType:
+ """
+ Computes shortest distance between member(s) in `i` and member(s) in `j`.
+
+ Description:
+ This is 'copied' over from genenetwork1, from
+ https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/heatmap/slink.py#L42-L64.
+
+ This description should be updated to better describe what 'member' means in
+ the context where the function is used.
+
+ Parameters:
+ lists (list of lists of distances): Represents a list of members and their
+ distances from each other.
+ Each inner list represents the distances the member at that coordinate
+ is from other members in the list: for example, a member at index 0 with
+ the values [0, 9, 1, 7] indicates that the member is:
+ - 0 units of distance away from itself
+ - 9 units of distance away from member at coordinate 1
+ - 1 unit of distance away from member at coordinate 2
+ - 7 units of distance away from member at coordinate 3
+ i (int or list of ints): Represents the coordinate of a member, or a list of
+ coordinates of members on the `lists` list.
+ j (int or list of ints): Represents the coordinate of a member, or a list of
+ coordinates of members on the `lists` list.
+
+ Returns:
+ int: Represents the shortest distance between member(s) in `i` and member(s)
+ in `j`."""
+
+ #### Guard Functions: Should we do this a different way? ####
+ __raise_valueerror_if_data_is_not_lists_or_tuples(lists)
+ __raise_valueerror_if_lists_empty(lists)
+ __raise_lengtherror_if_child_lists_are_not_same_as_parent(lists)
+ __raise_valueerror_if_child_list_distance_from_itself_is_not_zero(lists)
+ __raise_mirrorerror_of_distances_one_way_are_not_same_other_way(lists)
+ __raise_valueerror_on_negative_distances(lists)
+ #### END: Guard Functions ####
+ if isinstance(i, int) and isinstance(j, int): # From member i to member j
+ return lists[i][j]
+
+ if isinstance(i, int) and __is_list_or_tuple(j):
+ return min(map(lambda j_new: nearest(lists, i, j_new), j[:-1]))
+ if isinstance(j, int) and __is_list_or_tuple(i):
+ return min(map(lambda i_new: nearest(lists, i_new, j), i[:-1]))
+
+ if __is_list_or_tuple(i) and __is_list_or_tuple(j):
+ coordinate_pairs = __flatten_list_of_lists(
+ [[(itemi, itemj) for itemj in j[:-1]] for itemi in i[:-1]])
+ return min(map(lambda x: nearest(lists, x[0], x[1]), coordinate_pairs))
+
+ raise ValueError("member values (i or j) should be lists/tuples of integers or integers")
+
+# `lists` here could be Sequence[SeqOfNums], but that leads to errors I do not
+# understand down the line
+# Might have to re-implement the function especially since the errors are thrown
+# where `listindexcopy` is involved
+def slink(lists):
+ """
+ DESCRIPTION:
+ TODO: Not quite sure what this function does. Work through the code with a
+ fine tooth comb, once we understand the context of its use, so as to
+ give a better description
+
+ The name of the function does not clearly establish what the function
+ does either, meaning, once that is established, the function should be
+ renamed to give the user an idea of what it does without necessarily
+ reading through a ton of code.
+
+ We should also look into refactoring the function to reduce/eliminate
+ the multiple levels of nested-loops and conditionals
+
+ PARAMETERS:
+ lists (list of lists of numbers): Give this a better name.
+ Each item of this list is a list of coordinates of the members in the
+ group.
+ What 'member' and 'group' in this context means, is not yet established.
+ """
+ try:
+ size = len(lists)
+ listindexcopy = list(range(size))
+ listscopy = [list(child[:]) for child in lists]
+ init_size = size
+ candidate = []
+ while init_size > 2:
+ mindist = 1e10
+ for i in range(init_size):
+ for j in range(i+1, init_size):
+ if listscopy[i][j] < mindist:
+ mindist = listscopy[i][j]
+ candidate = [[i, j]]
+ elif listscopy[i][j] == mindist:
+ mindist = listscopy[i][j]
+ candidate.append([i, j])
+ else:
+ pass
+ newmem = (
+ listindexcopy[candidate[0][0]], listindexcopy[candidate[0][1]],
+ mindist)
+ listindexcopy.pop(candidate[0][1])
+ listindexcopy[candidate[0][0]] = newmem
+
+ init_size -= 1
+ for i in range(init_size):
+ for j in range(i+1, init_size):
+ listscopy[i][j] = nearest(
+ lists, listindexcopy[i], listindexcopy[j])
+ listscopy[j][i] = listscopy[i][j]
+ listindexcopy.append(
+ nearest(lists, listindexcopy[0], listindexcopy[1]))
+ return listindexcopy
+ except (LengthError, MirrorError, TypeError, IndexError) as exc:
+ # Look into making the logging log output to the system's
+ # configured logger(s)
+ logging.warning("Exception: %s, %s", type(exc), exc)
+ return []