diff options
author | Frederick Muriuki Muriithi | 2022-05-24 14:19:57 +0300 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2022-05-24 14:19:57 +0300 |
commit | fdf9061981ce5d341d178951adeb19dd0376ee66 (patch) | |
tree | 0476c726f6ccd8af97423aa520566ddf9864e0d1 /scripts/partial_correlations.py | |
parent | bbc4b165761b359115fd1a249ea22e64d55db384 (diff) | |
download | genenetwork3-fdf9061981ce5d341d178951adeb19dd0376ee66.tar.gz |
Run partial correlations with external script
Use new external script to run the partial correlations for both cases,
i.e.
- against an entire dataset, or
- against selected traits
Diffstat (limited to 'scripts/partial_correlations.py')
-rw-r--r--[-rwxr-xr-x] | scripts/partial_correlations.py | 131 |
1 files changed, 99 insertions, 32 deletions
diff --git a/scripts/partial_correlations.py b/scripts/partial_correlations.py index 52bde4c..de364dc 100755..100644 --- a/scripts/partial_correlations.py +++ b/scripts/partial_correlations.py @@ -1,13 +1,99 @@ -import sys +"""Script to run partial correlations""" + import json import traceback from argparse import ArgumentParser from gn3.db_utils import database_connector from gn3.responses.pcorrs_responses import OutputEncoder -from gn3.computations.partial_correlations import partial_correlations_with_target_db +from gn3.computations.partial_correlations import ( + partial_correlations_with_target_db, + partial_correlations_with_target_traits) + +def cleanup_string(the_str): + "Remove tab, newline and carriage return characters." + return the_str.strip('"\t\n\r ') + +def process_common_args(args): + "Process the common CLI arguments to a form usable by the functions" + return { + "primary_trait_name": cleanup_string(args.primary_trait), + "control_trait_names": tuple( + cleanup_string(args.control_traits).split(",")), + "method": cleanup_string(args.method) + } + +def process_trait_args(args): + """Process arguments to a form usable by the + `partial_correlations_with_target_traits` function.""" + return { + **process_common_args(args), + "target_trait_names": tuple( + cleanup_string(args.target_traits).split(",")) + } + +def process_db_args(args): + """Process arguments for the `partial_correlations_with_target_db` + function.""" + return { + **process_common_args(args), + "target_db_name": cleanup_string(args.target_database), + "criteria": args.criteria + } + +def pcorrs_against_traits(dbconn, args): + """Run partial correlations agaist selected traits.""" + return partial_correlations_with_target_traits( + dbconn, **process_trait_args(args)) + +def pcorrs_against_db(dbconn, args): + """Run partial correlations agaist the entire dataset provided.""" + return partial_correlations_with_target_db(dbconn, **process_db_args(args)) + +def run_pcorrs(dbconn, args): + """Run the selected partial correlations function.""" + try: + return args.func(dbconn, args) + except Exception as exc: # pylint: disable=[broad-except,unused-variable] + return { + "status": "exception", + "message": traceback.format_exc() + } + +def against_traits_parser(parent_parser): + """Parser for command to run partial correlations against selected traits""" + parser = parent_parser.add_parser( + "against-traits", + help="Run partial correlations against a select list of traits") + parser.add_argument( + "target_traits", + help=( + "The target traits to run the partial correlations against. " + "This is a comma-separated list of traits' fullnames, in the " + "format <DATASET-NAME>::<TRAIT-NAME> e.g. " + "UCLA_BXDBXH_CARTILAGE_V2::ILM103710672"), + type=str) + parser.set_defaults(func=pcorrs_against_traits) + return parent_parser + +def against_db_parser(parent_parser): + """Parser for command to run partial correlations against entire dataset""" + parser = parent_parser.add_parser( + "against-db", + help="Run partial correlations against an entire dataset") + parser.add_argument( + "target_database", + help="The target database to run the partial correlations against", + type=str) + parser.add_argument( + "--criteria", + help="Number of results to return", + type=int, default=500) + parser.set_defaults(func=pcorrs_against_db) + return parent_parser def process_cli_arguments(): + """Top level parser""" parser = ArgumentParser() parser.add_argument( "primary_trait", @@ -21,39 +107,20 @@ def process_cli_arguments(): "method", help="The correlation method to use", type=str) - parser.add_argument( - "target_database", - help="The target database to run the partial correlations against", - type=str) - parser.add_argument( - "--criteria", - help="Number of results to return", - type=int, default=500) + against_db_parser(against_traits_parser( + parser.add_subparsers( + title="subcommands", + description="valid subcommands", + required=True))) return parser.parse_args() -def cleanup_string(the_str): - return the_str.strip('"\t\n\r ') +def main(): + """Entry point for the script""" + args = process_cli_arguments() -def run_partial_corrs(args): with database_connector() as conn: - try: - return partial_correlations_with_target_db( - conn, cleanup_string(args.primary_trait), - tuple(cleanup_string(args.control_traits).split(",")), - cleanup_string(args.method), args.criteria, - cleanup_string(args.target_database)) - except Exception as exc: - print(traceback.format_exc(), file=sys.stderr) - return { - "status": "exception", - "message": traceback.format_exc() - } - -def enter(): - args = process_cli_arguments() - print(json.dumps( - run_partial_corrs(process_cli_arguments()), - cls = OutputEncoder)) + print(json.dumps(run_pcorrs(conn, args), cls=OutputEncoder)) + if __name__ == "__main__": - enter() + main() |