From 6d39c92fbc9a7b82cd8eef60c62cd5d83acb49a1 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Wed, 23 Feb 2022 14:51:47 +0300 Subject: Run partial correlations in an external process Run the partial correlations code in an external python process decoupling it from the server and making it asynchronous. Summary of changes: * gn3/api/correlation.py: - Remove response processing code - Queue partial corrs processing - Create new endpoint to get results * gn3/commands.py - Compose the pcorrs command to be run in an external process - Enable running of subprocess commands with list args * gn3/responses/__init__.py: new module indicator file * gn3/responses/pcorrs_responses.py: Hold response processing code extracted from ~gn3.api.correlations.py~ file * scripts/partial_correlations.py: CLI script to process the pcorrs * sheepdog/worker.py: - Add the *genenetwork3* path at the beginning of the ~sys.path~ list to override any GN3 in the site-packages - Add any environment variables to be set for the command to be run --- scripts/partial_correlations.py | 59 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100755 scripts/partial_correlations.py (limited to 'scripts') diff --git a/scripts/partial_correlations.py b/scripts/partial_correlations.py new file mode 100755 index 0000000..ee442df --- /dev/null +++ b/scripts/partial_correlations.py @@ -0,0 +1,59 @@ +import sys +import json +import traceback +from argparse import ArgumentParser + +from gn3.db_utils import database_connector +from gn3.responses.pcorrs_responses import OutputEncoder +from gn3.computations.partial_correlations import partial_correlations_entry + +def process_cli_arguments(): + parser = ArgumentParser() + parser.add_argument( + "primary_trait", + help="The primary trait's full name", + type=str) + parser.add_argument( + "control_traits", + help="A comma-separated list of traits' full names", + type=str) + parser.add_argument( + "method", + help="The correlation method to use", + type=str) + parser.add_argument( + "target_database", + help="The target database to run the partial correlations against", + type=str) + parser.add_argument( + "--criteria", + help="Number of results to return", + type=int, default=500) + return parser.parse_args() + +def cleanup_string(the_str): + return the_str.strip('"\t\n\r ') + +def run_partial_corrs(args): + try: + conn, _cursor_object = database_connector() + return partial_correlations_entry( + conn, cleanup_string(args.primary_trait), + tuple(cleanup_string(args.control_traits).split(",")), + cleanup_string(args.method), args.criteria, + cleanup_string(args.target_database)) + except Exception as exc: + print(traceback.format_exc(), file=sys.stderr) + return { + "status": "exception", + "message": traceback.format_exc() + } + +def enter(): + args = process_cli_arguments() + print(json.dumps( + run_partial_corrs(process_cli_arguments()), + cls = OutputEncoder)) + +if __name__ == "__main__": + enter() -- cgit v1.2.3