diff options
author | Frederick Muriuki Muriithi | 2022-03-25 08:45:47 +0300 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2022-03-29 04:17:36 +0300 |
commit | 5b0259c1d9c8735341e2ee19006c48aea44d7988 (patch) | |
tree | f79a2f1affdaa87d64ef4843bba661899361efbf | |
parent | b93b22386056347d8002dd2e403425beeb4657cd (diff) | |
download | genenetwork3-5b0259c1d9c8735341e2ee19006c48aea44d7988.tar.gz |
Remove unused module
* Remove a module that is no longer in use
-rw-r--r-- | gn3/computations/partial_correlations_optimised.py | 244 |
1 files changed, 0 insertions, 244 deletions
diff --git a/gn3/computations/partial_correlations_optimised.py b/gn3/computations/partial_correlations_optimised.py deleted file mode 100644 index 601289c..0000000 --- a/gn3/computations/partial_correlations_optimised.py +++ /dev/null @@ -1,244 +0,0 @@ -""" -This contains an optimised version of the - `gn3.computations.partial_correlations.partial_correlations_entry` -function. -""" -from functools import partial -from typing import Any, Tuple - -from gn3.settings import TEXTDIR -from gn3.function_helpers import compose -from gn3.db.partial_correlations import traits_info, traits_data -from gn3.db.species import species_name, translate_to_mouse_gene_id -from gn3.db.traits import export_informative, retrieve_trait_dataset -from gn3.db.correlations import ( - get_filename, - check_for_literature_info, - check_symbol_for_tissue_correlation) -from gn3.computations.partial_correlations import ( - fix_samples, - partial_corrs, - control_samples, - trait_for_output, - find_identical_traits, - tissue_correlation_by_list, - literature_correlation_by_list) - -def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911] - conn: Any, primary_trait_name: str, - control_trait_names: Tuple[str, ...], method: str, - criteria: int, target_db_name: str) -> dict: - """ - This is the 'ochestration' function for the partial-correlation feature. - - This function will dispatch the functions doing data fetches from the - database (and various other places) and feed that data to the functions - doing the conversions and computations. It will then return the results of - all of that work. - - This function is doing way too much. Look into splitting out the - functionality into smaller functions that do fewer things. - """ - threshold = 0 - corr_min_informative = 4 - - all_traits = traits_info( - conn, threshold, (primary_trait_name,) + control_trait_names) - all_traits_data = traits_data(conn, all_traits) - - # primary_trait = retrieve_trait_info(threshold, primary_trait_name, conn) - primary_trait = tuple( - trait for trait in all_traits - if trait["trait_fullname"] == primary_trait_name)[0] - group = primary_trait["db"]["group"] - # primary_trait_data = retrieve_trait_data(primary_trait, conn) - primary_trait_data = all_traits_data[primary_trait["trait_name"]] - primary_samples, primary_values, _primary_variances = export_informative( - primary_trait_data) - - # cntrl_traits = tuple( - # retrieve_trait_info(threshold, trait_full_name, conn) - # for trait_full_name in control_trait_names) - # cntrl_traits_data = tuple( - # retrieve_trait_data(cntrl_trait, conn) - # for cntrl_trait in cntrl_traits) - cntrl_traits = tuple( - trait for trait in all_traits - if trait["trait_fullname"] != primary_trait_name) - cntrl_traits_data = tuple( - data for trait_name, data in all_traits_data.items() - if trait_name != primary_trait["trait_name"]) - species = species_name(conn, group) - - (cntrl_samples, - cntrl_values, - _cntrl_variances, - _cntrl_ns) = control_samples(cntrl_traits_data, primary_samples) - - common_primary_control_samples = primary_samples - fixed_primary_vals = primary_values - fixed_control_vals = cntrl_values - if not all(cnt_smp == primary_samples for cnt_smp in cntrl_samples): - (common_primary_control_samples, - fixed_primary_vals, - fixed_control_vals, - _primary_variances, - _cntrl_variances) = fix_samples(primary_trait, cntrl_traits) - - if len(common_primary_control_samples) < corr_min_informative: - return { - "status": "error", - "message": ( - f"Fewer than {corr_min_informative} samples data entered for " - f"{group} dataset. No calculation of correlation has been " - "attempted."), - "error_type": "Inadequate Samples"} - - identical_traits_names = find_identical_traits( - primary_trait_name, primary_values, control_trait_names, cntrl_values) - if len(identical_traits_names) > 0: - return { - "status": "error", - "message": ( - f"{identical_traits_names[0]} and {identical_traits_names[1]} " - "have the same values for the {len(fixed_primary_vals)} " - "samples that will be used to compute the partial correlation " - "(common for all primary and control traits). In such cases, " - "partial correlation cannot be computed. Please re-select your " - "traits."), - "error_type": "Identical Traits"} - - input_trait_geneid = primary_trait.get("geneid", 0) - input_trait_symbol = primary_trait.get("symbol", "") - input_trait_mouse_geneid = translate_to_mouse_gene_id( - species, input_trait_geneid, conn) - - tissue_probeset_freeze_id = 1 - db_type = primary_trait["db"]["dataset_type"] - - if db_type == "ProbeSet" and method.lower() in ( - "sgo literature correlation", - "tissue correlation, pearson's r", - "tissue correlation, spearman's rho"): - return { - "status": "error", - "message": ( - "Wrong correlation type: It is not possible to compute the " - f"{method} between your trait and data in the {target_db_name} " - "database. Please try again after selecting another type of " - "correlation."), - "error_type": "Correlation Type"} - - if (method.lower() == "sgo literature correlation" and ( - bool(input_trait_geneid) is False or - check_for_literature_info(conn, input_trait_mouse_geneid))): - return { - "status": "error", - "message": ( - "No Literature Information: This gene does not have any " - "associated Literature Information."), - "error_type": "Literature Correlation"} - - if ( - method.lower() in ( - "tissue correlation, pearson's r", - "tissue correlation, spearman's rho") - and bool(input_trait_symbol) is False): - return { - "status": "error", - "message": ( - "No Tissue Correlation Information: This gene does not have " - "any associated Tissue Correlation Information."), - "error_type": "Tissue Correlation"} - - if ( - method.lower() in ( - "tissue correlation, pearson's r", - "tissue correlation, spearman's rho") - and check_symbol_for_tissue_correlation( - conn, tissue_probeset_freeze_id, input_trait_symbol)): - return { - "status": "error", - "message": ( - "No Tissue Correlation Information: This gene does not have " - "any associated Tissue Correlation Information."), - "error_type": "Tissue Correlation"} - - target_dataset = retrieve_trait_dataset( - ("Temp" if "Temp" in target_db_name else - ("Publish" if "Publish" in target_db_name else - "Geno" if "Geno" in target_db_name else "ProbeSet")), - {"db": {"dataset_name": target_db_name}, "trait_name": "_"}, - threshold, - conn) - - database_filename = get_filename(conn, target_db_name, TEXTDIR) - _total_traits, all_correlations = partial_corrs( - conn, common_primary_control_samples, fixed_primary_vals, - fixed_control_vals, len(fixed_primary_vals), species, - input_trait_geneid, input_trait_symbol, tissue_probeset_freeze_id, - method, {**target_dataset, "dataset_type": target_dataset["type"]}, database_filename) - - - def __make_sorter__(method): - def __sort_6__(row): - return row[6] - - def __sort_3__(row): - return row[3] - - if "literature" in method.lower(): - return __sort_6__ - - if "tissue" in method.lower(): - return __sort_6__ - - return __sort_3__ - - # sorted_correlations = sorted( - # all_correlations, key=__make_sorter__(method)) - - add_lit_corr_and_tiss_corr = compose( - partial(literature_correlation_by_list, conn, species), - partial( - tissue_correlation_by_list, conn, input_trait_symbol, - tissue_probeset_freeze_id, method)) - - selected_results = sorted( - all_correlations, - key=__make_sorter__(method))[:min(criteria, len(all_correlations))] - traits_list_corr_info = { - "{target_dataset['dataset_name']}::{item[0]}": { - "noverlap": item[1], - "partial_corr": item[2], - "partial_corr_p_value": item[3], - "corr": item[4], - "corr_p_value": item[5], - "rank_order": (1 if "spearman" in method.lower() else 0), - **({ - "tissue_corr": item[6], - "tissue_p_value": item[7]} - if len(item) == 8 else {}), - **({"l_corr": item[6]} - if len(item) == 7 else {}) - } for item in selected_results} - - trait_list = add_lit_corr_and_tiss_corr(tuple( - {**trait, **traits_list_corr_info.get(trait["trait_fullname"], {})} - for trait in traits_info( - conn, threshold, - tuple( - f"{target_dataset['dataset_name']}::{item[0]}" - for item in selected_results)))) - - return { - "status": "success", - "results": { - "primary_trait": trait_for_output(primary_trait), - "control_traits": tuple( - trait_for_output(trait) for trait in cntrl_traits), - "correlations": tuple( - trait_for_output(trait) for trait in trait_list), - "dataset_type": target_dataset["type"], - "method": "spearman" if "spearman" in method.lower() else "pearson" - }} |