From fdf9061981ce5d341d178951adeb19dd0376ee66 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Tue, 24 May 2022 14:19:57 +0300 Subject: Run partial correlations with external script Use new external script to run the partial correlations for both cases, i.e. - against an entire dataset, or - against selected traits --- gn3/api/correlation.py | 41 ++++++------- gn3/commands.py | 36 ++++++++--- scripts/partial_correlations.py | 131 ++++++++++++++++++++++++++++++---------- scripts/pcorrs.py | 126 -------------------------------------- 4 files changed, 145 insertions(+), 189 deletions(-) mode change 100755 => 100644 scripts/partial_correlations.py delete mode 100644 scripts/pcorrs.py diff --git a/gn3/api/correlation.py b/gn3/api/correlation.py index 3aadcb9..1667302 100644 --- a/gn3/api/correlation.py +++ b/gn3/api/correlation.py @@ -16,8 +16,6 @@ from gn3.computations.correlations import map_shared_keys_to_values from gn3.computations.correlations import compute_tissue_correlation from gn3.computations.correlations import compute_all_lit_correlation from gn3.computations.correlations import compute_all_sample_correlation -from gn3.computations.partial_correlations import ( - partial_correlations_with_target_traits) correlation = Blueprint("correlation", __name__) @@ -124,16 +122,25 @@ def partial_correlation(): "messages": request_errors, "error_type": "Client Error"}) - if with_target_db: - with redis.Redis() as conn: - queueing_results = run_async_cmd( + with redis.Redis() as conn: + if with_target_db: + command = compose_pcorrs_command( + trait_fullname(args["primary_trait"]), + tuple( + trait_fullname(trait) for trait in args["control_traits"]), + args["method"], target_database=args["target_db"], + criteria = int(args.get("criteria", 500))) + else: + command = compose_pcorrs_command( + trait_fullname(args["primary_trait"]), + tuple( + trait_fullname(trait) for trait in args["control_traits"]), + args["method"], target_traits=tuple( + trait_fullname(trait) for trait in args["target_traits"])) + + queueing_results = run_async_cmd( conn=conn, - cmd=compose_pcorrs_command( - trait_fullname(args["primary_trait"]), - tuple( - trait_fullname(trait) for trait in args["control_traits"]), - args["method"], args["target_db"], - int(args.get("criteria", 500))), + cmd=command, job_queue=current_app.config.get("REDIS_JOB_QUEUE"), env = {"PYTHONPATH": ":".join(sys.path), "SQL_URI": SQL_URI}) return build_response({ @@ -141,15 +148,3 @@ def partial_correlation(): "results": queueing_results, "queued": True }) - - with database_connector() as conn: - results = partial_correlations_with_target_traits( - conn, - trait_fullname(args["primary_trait"]), - tuple( - trait_fullname(trait) for trait in args["control_traits"]), - args["method"], - tuple( - trait_fullname(trait) for trait in args["target_traits"])) - - return build_response({"status": "success", "results": results}) diff --git a/gn3/commands.py b/gn3/commands.py index 9684eeb..b1b3fc7 100644 --- a/gn3/commands.py +++ b/gn3/commands.py @@ -1,6 +1,5 @@ """Procedures used to work with the various bio-informatics cli commands""" -import os import sys import json import subprocess @@ -51,15 +50,36 @@ def compose_rqtl_cmd(rqtl_wrapper_cmd: str, return cmd +def compose_pcorrs_command_for_selected_traits( + prefix_cmd: Tuple[str, ...], target_traits: Tuple[str, ...]) -> Tuple[ + str, ...]: + """Build command for partial correlations against selected traits.""" + return prefix_cmd + ("against-traits", ",".join(target_traits)) + +def compose_pcorrs_command_for_database( + prefix_cmd: Tuple[str, ...], target_database: str, + criteria: int = 500) -> Tuple[str, ...]: + """Build command for partial correlations against an entire dataset.""" + return prefix_cmd + ( + "against-db", f"{target_database}", f"--criteria={criteria}") + def compose_pcorrs_command( primary_trait: str, control_traits: Tuple[str, ...], method: str, - target_database: str, criteria: int = 500): + **kwargs): """Compose the command to run partias correlations""" - rundir = os.path.abspath(".") - return ( - f"{sys.executable}", f"{rundir}/scripts/partial_correlations.py", - primary_trait, ",".join(control_traits), f'"{method}"', - f"{target_database}", f"--criteria={criteria}") + print(f"KWARGS: {kwargs}") + prefix_cmd = ( + f"{sys.executable}", "-m", "scripts.partial_correlations", + primary_trait, ",".join(control_traits), f'"{method}"') + if ( + kwargs.get("target_database") is not None + and kwargs.get("target_traits") is None): + return compose_pcorrs_command_for_database(prefix_cmd, **kwargs) + if ( + kwargs.get("target_database") is None + and kwargs.get("target_traits") is not None): + return compose_pcorrs_command_for_selected_traits(prefix_cmd, **kwargs) + raise Exception("Invalid state: I don't know what command to generate!") def queue_cmd(conn: Redis, job_queue: str, @@ -111,5 +131,5 @@ def run_async_cmd( """A utility function to call `gn3.commands.queue_cmd` function and run the worker in the `one-shot` mode.""" cmd_id = queue_cmd(conn, job_queue, cmd, email, env) - subprocess.Popen(["python3", "sheepdog/worker.py"]) # pylint: disable=[consider-using-with] + subprocess.Popen([f"{sys.executable}", "-m", "sheepdog.worker"]) # pylint: disable=[consider-using-with] return cmd_id diff --git a/scripts/partial_correlations.py b/scripts/partial_correlations.py old mode 100755 new mode 100644 index 52bde4c..de364dc --- a/scripts/partial_correlations.py +++ b/scripts/partial_correlations.py @@ -1,13 +1,99 @@ -import sys +"""Script to run partial correlations""" + import json import traceback from argparse import ArgumentParser from gn3.db_utils import database_connector from gn3.responses.pcorrs_responses import OutputEncoder -from gn3.computations.partial_correlations import partial_correlations_with_target_db +from gn3.computations.partial_correlations import ( + partial_correlations_with_target_db, + partial_correlations_with_target_traits) + +def cleanup_string(the_str): + "Remove tab, newline and carriage return characters." + return the_str.strip('"\t\n\r ') + +def process_common_args(args): + "Process the common CLI arguments to a form usable by the functions" + return { + "primary_trait_name": cleanup_string(args.primary_trait), + "control_trait_names": tuple( + cleanup_string(args.control_traits).split(",")), + "method": cleanup_string(args.method) + } + +def process_trait_args(args): + """Process arguments to a form usable by the + `partial_correlations_with_target_traits` function.""" + return { + **process_common_args(args), + "target_trait_names": tuple( + cleanup_string(args.target_traits).split(",")) + } + +def process_db_args(args): + """Process arguments for the `partial_correlations_with_target_db` + function.""" + return { + **process_common_args(args), + "target_db_name": cleanup_string(args.target_database), + "criteria": args.criteria + } + +def pcorrs_against_traits(dbconn, args): + """Run partial correlations agaist selected traits.""" + return partial_correlations_with_target_traits( + dbconn, **process_trait_args(args)) + +def pcorrs_against_db(dbconn, args): + """Run partial correlations agaist the entire dataset provided.""" + return partial_correlations_with_target_db(dbconn, **process_db_args(args)) + +def run_pcorrs(dbconn, args): + """Run the selected partial correlations function.""" + try: + return args.func(dbconn, args) + except Exception as exc: # pylint: disable=[broad-except,unused-variable] + return { + "status": "exception", + "message": traceback.format_exc() + } + +def against_traits_parser(parent_parser): + """Parser for command to run partial correlations against selected traits""" + parser = parent_parser.add_parser( + "against-traits", + help="Run partial correlations against a select list of traits") + parser.add_argument( + "target_traits", + help=( + "The target traits to run the partial correlations against. " + "This is a comma-separated list of traits' fullnames, in the " + "format :: e.g. " + "UCLA_BXDBXH_CARTILAGE_V2::ILM103710672"), + type=str) + parser.set_defaults(func=pcorrs_against_traits) + return parent_parser + +def against_db_parser(parent_parser): + """Parser for command to run partial correlations against entire dataset""" + parser = parent_parser.add_parser( + "against-db", + help="Run partial correlations against an entire dataset") + parser.add_argument( + "target_database", + help="The target database to run the partial correlations against", + type=str) + parser.add_argument( + "--criteria", + help="Number of results to return", + type=int, default=500) + parser.set_defaults(func=pcorrs_against_db) + return parent_parser def process_cli_arguments(): + """Top level parser""" parser = ArgumentParser() parser.add_argument( "primary_trait", @@ -21,39 +107,20 @@ def process_cli_arguments(): "method", help="The correlation method to use", type=str) - parser.add_argument( - "target_database", - help="The target database to run the partial correlations against", - type=str) - parser.add_argument( - "--criteria", - help="Number of results to return", - type=int, default=500) + against_db_parser(against_traits_parser( + parser.add_subparsers( + title="subcommands", + description="valid subcommands", + required=True))) return parser.parse_args() -def cleanup_string(the_str): - return the_str.strip('"\t\n\r ') +def main(): + """Entry point for the script""" + args = process_cli_arguments() -def run_partial_corrs(args): with database_connector() as conn: - try: - return partial_correlations_with_target_db( - conn, cleanup_string(args.primary_trait), - tuple(cleanup_string(args.control_traits).split(",")), - cleanup_string(args.method), args.criteria, - cleanup_string(args.target_database)) - except Exception as exc: - print(traceback.format_exc(), file=sys.stderr) - return { - "status": "exception", - "message": traceback.format_exc() - } - -def enter(): - args = process_cli_arguments() - print(json.dumps( - run_partial_corrs(process_cli_arguments()), - cls = OutputEncoder)) + print(json.dumps(run_pcorrs(conn, args), cls=OutputEncoder)) + if __name__ == "__main__": - enter() + main() diff --git a/scripts/pcorrs.py b/scripts/pcorrs.py deleted file mode 100644 index de364dc..0000000 --- a/scripts/pcorrs.py +++ /dev/null @@ -1,126 +0,0 @@ -"""Script to run partial correlations""" - -import json -import traceback -from argparse import ArgumentParser - -from gn3.db_utils import database_connector -from gn3.responses.pcorrs_responses import OutputEncoder -from gn3.computations.partial_correlations import ( - partial_correlations_with_target_db, - partial_correlations_with_target_traits) - -def cleanup_string(the_str): - "Remove tab, newline and carriage return characters." - return the_str.strip('"\t\n\r ') - -def process_common_args(args): - "Process the common CLI arguments to a form usable by the functions" - return { - "primary_trait_name": cleanup_string(args.primary_trait), - "control_trait_names": tuple( - cleanup_string(args.control_traits).split(",")), - "method": cleanup_string(args.method) - } - -def process_trait_args(args): - """Process arguments to a form usable by the - `partial_correlations_with_target_traits` function.""" - return { - **process_common_args(args), - "target_trait_names": tuple( - cleanup_string(args.target_traits).split(",")) - } - -def process_db_args(args): - """Process arguments for the `partial_correlations_with_target_db` - function.""" - return { - **process_common_args(args), - "target_db_name": cleanup_string(args.target_database), - "criteria": args.criteria - } - -def pcorrs_against_traits(dbconn, args): - """Run partial correlations agaist selected traits.""" - return partial_correlations_with_target_traits( - dbconn, **process_trait_args(args)) - -def pcorrs_against_db(dbconn, args): - """Run partial correlations agaist the entire dataset provided.""" - return partial_correlations_with_target_db(dbconn, **process_db_args(args)) - -def run_pcorrs(dbconn, args): - """Run the selected partial correlations function.""" - try: - return args.func(dbconn, args) - except Exception as exc: # pylint: disable=[broad-except,unused-variable] - return { - "status": "exception", - "message": traceback.format_exc() - } - -def against_traits_parser(parent_parser): - """Parser for command to run partial correlations against selected traits""" - parser = parent_parser.add_parser( - "against-traits", - help="Run partial correlations against a select list of traits") - parser.add_argument( - "target_traits", - help=( - "The target traits to run the partial correlations against. " - "This is a comma-separated list of traits' fullnames, in the " - "format :: e.g. " - "UCLA_BXDBXH_CARTILAGE_V2::ILM103710672"), - type=str) - parser.set_defaults(func=pcorrs_against_traits) - return parent_parser - -def against_db_parser(parent_parser): - """Parser for command to run partial correlations against entire dataset""" - parser = parent_parser.add_parser( - "against-db", - help="Run partial correlations against an entire dataset") - parser.add_argument( - "target_database", - help="The target database to run the partial correlations against", - type=str) - parser.add_argument( - "--criteria", - help="Number of results to return", - type=int, default=500) - parser.set_defaults(func=pcorrs_against_db) - return parent_parser - -def process_cli_arguments(): - """Top level parser""" - parser = ArgumentParser() - parser.add_argument( - "primary_trait", - help="The primary trait's full name", - type=str) - parser.add_argument( - "control_traits", - help="A comma-separated list of traits' full names", - type=str) - parser.add_argument( - "method", - help="The correlation method to use", - type=str) - against_db_parser(against_traits_parser( - parser.add_subparsers( - title="subcommands", - description="valid subcommands", - required=True))) - return parser.parse_args() - -def main(): - """Entry point for the script""" - args = process_cli_arguments() - - with database_connector() as conn: - print(json.dumps(run_pcorrs(conn, args), cls=OutputEncoder)) - - -if __name__ == "__main__": - main() -- cgit v1.2.3