From 37cf87e94a44ba2ab26a8e458e0c5a90fe5ecc7a Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 1 Nov 2021 08:18:48 +0300 Subject: Retrieve indices of the selected samples Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * gn3/computations/partial_correlations.py: New function (good_dataset_samples_indexes). * tests/unit/computations/test_partial_correlations.py: Tests for new function (good_dataset_samples_indexes) Get the indices of the selected samples. This is a partial migration of the `web.webqtl.correlation.PartialCorrDBPage.getPartialCorrelationsFast` function in GN1. --- gn3/computations/partial_correlations.py | 15 +++++++++++++++ tests/unit/computations/test_partial_correlations.py | 13 ++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py index 151143a..ba4de9e 100644 --- a/gn3/computations/partial_correlations.py +++ b/gn3/computations/partial_correlations.py @@ -177,3 +177,18 @@ def correlations_of_all_tissue_traits( primary_trait_values = tuple(primary_trait_symbol_value_dict.values())[0] return batch_computed_tissue_correlation( primary_trait_values, symbol_value_dict, method) + +def good_dataset_samples_indexes( + samples: Tuple[str, ...], + samples_from_file: Tuple[str, ...]) -> Tuple[int, ...]: + """ + Return the indexes of the items in `samples_from_files` that are also found + in `samples`. + + This is a partial migration of the + `web.webqtl.correlation.PartialCorrDBPage.getPartialCorrelationsFast` + function in GeneNetwork1. + """ + return tuple(sorted( + samples_from_file.index(good) for good in + set(samples).intersection(set(samples_from_file)))) diff --git a/tests/unit/computations/test_partial_correlations.py b/tests/unit/computations/test_partial_correlations.py index c4ec79a..f7217a9 100644 --- a/tests/unit/computations/test_partial_correlations.py +++ b/tests/unit/computations/test_partial_correlations.py @@ -6,7 +6,8 @@ from gn3.computations.partial_correlations import ( control_samples, dictify_by_samples, tissue_correlation, - find_identical_traits) + find_identical_traits, + good_dataset_samples_indexes) sampleslist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"] control_traits = ( @@ -260,3 +261,13 @@ class TestPartialCorrelations(TestCase): with self.subTest(primary=primary, target=target, method=method): self.assertEqual( tissue_correlation(primary, target, method), expected) + + def test_good_dataset_samples_indexes(self): + """ + Test that `good_dataset_samples_indexes` returns correct indices. + """ + self.assertEqual( + good_dataset_samples_indexes( + ("a", "e", "i", "k"), + ("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l")), + (0, 4, 8, 10)) -- cgit v1.2.3