diff options
author | Frederick Muriuki Muriithi | 2022-05-24 04:46:57 +0300 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2022-05-24 05:16:55 +0300 |
commit | 36f8421a8fe223189ab88ee1df3923719ffa4fc0 (patch) | |
tree | 7f79b4f594ec787190c9a7c7dcbdd4bec40a0db1 /scripts | |
parent | 63d9c9932721e98a9d6715686214157e276af105 (diff) | |
download | genenetwork3-36f8421a8fe223189ab88ee1df3923719ffa4fc0.tar.gz |
New script to compute partial correlations
* Add a new script to compute the partial correlations against:
- a select list of traits, or
- an entire dataset
depending on the specified subcommand. This new script is meant to supercede
the `scripts/partial_correlations.py` script.
* Fix the check for errors
* Reorganise the order of arguments for the
`partial_correlations_with_target_traits` function: move the `method`
argument before the `target_trait_names` argument so that the common
arguments in the partial correlation computation functions share the same
order.
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/pcorrs.py | 126 |
1 files changed, 126 insertions, 0 deletions
diff --git a/scripts/pcorrs.py b/scripts/pcorrs.py new file mode 100644 index 0000000..de364dc --- /dev/null +++ b/scripts/pcorrs.py @@ -0,0 +1,126 @@ +"""Script to run partial correlations""" + +import json +import traceback +from argparse import ArgumentParser + +from gn3.db_utils import database_connector +from gn3.responses.pcorrs_responses import OutputEncoder +from gn3.computations.partial_correlations import ( + partial_correlations_with_target_db, + partial_correlations_with_target_traits) + +def cleanup_string(the_str): + "Remove tab, newline and carriage return characters." + return the_str.strip('"\t\n\r ') + +def process_common_args(args): + "Process the common CLI arguments to a form usable by the functions" + return { + "primary_trait_name": cleanup_string(args.primary_trait), + "control_trait_names": tuple( + cleanup_string(args.control_traits).split(",")), + "method": cleanup_string(args.method) + } + +def process_trait_args(args): + """Process arguments to a form usable by the + `partial_correlations_with_target_traits` function.""" + return { + **process_common_args(args), + "target_trait_names": tuple( + cleanup_string(args.target_traits).split(",")) + } + +def process_db_args(args): + """Process arguments for the `partial_correlations_with_target_db` + function.""" + return { + **process_common_args(args), + "target_db_name": cleanup_string(args.target_database), + "criteria": args.criteria + } + +def pcorrs_against_traits(dbconn, args): + """Run partial correlations agaist selected traits.""" + return partial_correlations_with_target_traits( + dbconn, **process_trait_args(args)) + +def pcorrs_against_db(dbconn, args): + """Run partial correlations agaist the entire dataset provided.""" + return partial_correlations_with_target_db(dbconn, **process_db_args(args)) + +def run_pcorrs(dbconn, args): + """Run the selected partial correlations function.""" + try: + return args.func(dbconn, args) + except Exception as exc: # pylint: disable=[broad-except,unused-variable] + return { + "status": "exception", + "message": traceback.format_exc() + } + +def against_traits_parser(parent_parser): + """Parser for command to run partial correlations against selected traits""" + parser = parent_parser.add_parser( + "against-traits", + help="Run partial correlations against a select list of traits") + parser.add_argument( + "target_traits", + help=( + "The target traits to run the partial correlations against. " + "This is a comma-separated list of traits' fullnames, in the " + "format <DATASET-NAME>::<TRAIT-NAME> e.g. " + "UCLA_BXDBXH_CARTILAGE_V2::ILM103710672"), + type=str) + parser.set_defaults(func=pcorrs_against_traits) + return parent_parser + +def against_db_parser(parent_parser): + """Parser for command to run partial correlations against entire dataset""" + parser = parent_parser.add_parser( + "against-db", + help="Run partial correlations against an entire dataset") + parser.add_argument( + "target_database", + help="The target database to run the partial correlations against", + type=str) + parser.add_argument( + "--criteria", + help="Number of results to return", + type=int, default=500) + parser.set_defaults(func=pcorrs_against_db) + return parent_parser + +def process_cli_arguments(): + """Top level parser""" + parser = ArgumentParser() + parser.add_argument( + "primary_trait", + help="The primary trait's full name", + type=str) + parser.add_argument( + "control_traits", + help="A comma-separated list of traits' full names", + type=str) + parser.add_argument( + "method", + help="The correlation method to use", + type=str) + against_db_parser(against_traits_parser( + parser.add_subparsers( + title="subcommands", + description="valid subcommands", + required=True))) + return parser.parse_args() + +def main(): + """Entry point for the script""" + args = process_cli_arguments() + + with database_connector() as conn: + print(json.dumps(run_pcorrs(conn, args), cls=OutputEncoder)) + + +if __name__ == "__main__": + main() |