diff options
author | Frederick Muriuki Muriithi | 2022-05-21 12:09:08 +0300 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2022-05-21 12:13:12 +0300 |
commit | 3e80ab10048d82069a86b29930d48738d4a6484f (patch) | |
tree | 059491d3616620d09ddb3a5a415117db365ad1f2 /gn3/computations | |
parent | 5a5d7e397401f98269cdc729f27ce917bac9280d (diff) | |
download | genenetwork3-3e80ab10048d82069a86b29930d48738d4a6484f.tar.gz |
Process primary, target and control traits in a single iteration
Rework the code to process the traits in a single iteration to improve
performance.
Diffstat (limited to 'gn3/computations')
-rw-r--r-- | gn3/computations/partial_correlations.py | 40 |
1 files changed, 25 insertions, 15 deletions
diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py index 2720316..c12b4ec 100644 --- a/gn3/computations/partial_correlations.py +++ b/gn3/computations/partial_correlations.py @@ -266,21 +266,31 @@ def compute_trait_info(primary_vals, control_vals, target, method): """ targ_vals = target[0] targ_name = target[1] - primary = [ - prim for targ, prim in zip(targ_vals, primary_vals) - if targ is not None] - - if len(primary) < 3: + def __remove_nones__(acc, items): + prim, targ, *conts = items + if targ is None: + return acc + old_conts = acc["controls"] + return { + "primary": acc["primary"] + [prim], + "targets": acc["targets"] + [targ], + "controls": [ + old_conts[idx] + [cont] + for idx, cont in enumerate(conts) + ] + } + processed = reduce( + __remove_nones__, zip(primary_vals, targ_vals, *control_vals), + { + "primary":[], "targets": [], + "controls": [[] for idx in range(0, len(control_vals))] + }) + + if len(processed["primary"]) < 4: return None - def __remove_controls_for_target_nones(cont_targ): - return tuple(cont for cont, targ in cont_targ if targ is not None) - datafrm = build_data_frame( - primary, - [targ for targ in targ_vals if targ is not None], - [__remove_controls_for_target_nones(tuple(zip(control, targ_vals))) - for control in control_vals]) + processed["primary"], processed["targets"], processed["controls"]) covariates = "z" if datafrm.shape[1] == 3 else [ col for col in datafrm.columns if col not in ("x", "y")] ppc = pingouin.partial_corr( @@ -294,10 +304,10 @@ def compute_trait_info(primary_vals, control_vals, target, method): if math.isnan(pc_coeff): return ( - targ_name, len(primary), pc_coeff, 1, zero_order_corr["r"][0], - zero_order_corr["p-val"][0]) + targ_name, len(processed["primary"]), pc_coeff, 1, + zero_order_corr["r"][0], zero_order_corr["p-val"][0]) return ( - targ_name, len(primary), pc_coeff, + targ_name, len(processed["primary"]), pc_coeff, (ppc["p-val"][0] if not math.isnan(ppc["p-val"][0]) else ( 0 if (abs(pc_coeff - 1) < 0.0000001) else 1)), zero_order_corr["r"][0], zero_order_corr["p-val"][0]) |