about summary refs log tree commit diff
path: root/scripts/partial_correlations.py
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2022-02-23 14:51:47 +0300
committerFrederick Muriuki Muriithi2022-03-03 10:20:04 +0300
commit6d39c92fbc9a7b82cd8eef60c62cd5d83acb49a1 (patch)
tree7efab53cc8fc367f433ac01ece95b0fbecc858d9 /scripts/partial_correlations.py
parent8e0fcfa78fcdb5bdd5b49e2b1ac918ae9cc0fc53 (diff)
downloadgenenetwork3-6d39c92fbc9a7b82cd8eef60c62cd5d83acb49a1.tar.gz
Run partial correlations in an external process
Run the partial correlations code in an external python process decoupling it
from the server and making it asynchronous.

Summary of changes:
* gn3/api/correlation.py:
  - Remove response processing code
  - Queue partial corrs processing
  - Create new endpoint to get results
* gn3/commands.py
  - Compose the pcorrs command to be run in an external process
  - Enable running of subprocess commands with list args
* gn3/responses/__init__.py: new module indicator file
* gn3/responses/pcorrs_responses.py: Hold response processing code extracted
  from ~gn3.api.correlations.py~ file
* scripts/partial_correlations.py: CLI script to process the pcorrs
* sheepdog/worker.py:
  - Add the *genenetwork3* path at the beginning of the ~sys.path~ list to
    override any GN3 in the site-packages
  - Add any environment variables to be set for the command to be run
Diffstat (limited to 'scripts/partial_correlations.py')
-rwxr-xr-xscripts/partial_correlations.py59
1 files changed, 59 insertions, 0 deletions
diff --git a/scripts/partial_correlations.py b/scripts/partial_correlations.py
new file mode 100755
index 0000000..ee442df
--- /dev/null
+++ b/scripts/partial_correlations.py
@@ -0,0 +1,59 @@
+import sys
+import json
+import traceback
+from argparse import ArgumentParser
+
+from gn3.db_utils import database_connector
+from gn3.responses.pcorrs_responses import OutputEncoder
+from gn3.computations.partial_correlations import partial_correlations_entry
+
+def process_cli_arguments():
+    parser = ArgumentParser()
+    parser.add_argument(
+        "primary_trait",
+        help="The primary trait's full name",
+        type=str)
+    parser.add_argument(
+        "control_traits",
+        help="A comma-separated list of traits' full names",
+        type=str)
+    parser.add_argument(
+        "method",
+        help="The correlation method to use",
+        type=str)
+    parser.add_argument(
+        "target_database",
+        help="The target database to run the partial correlations against",
+        type=str)
+    parser.add_argument(
+        "--criteria",
+        help="Number of results to return",
+        type=int, default=500)
+    return parser.parse_args()
+
+def cleanup_string(the_str):
+    return the_str.strip('"\t\n\r ')
+
+def run_partial_corrs(args):
+    try:
+        conn, _cursor_object = database_connector()
+        return partial_correlations_entry(
+            conn, cleanup_string(args.primary_trait),
+            tuple(cleanup_string(args.control_traits).split(",")),
+            cleanup_string(args.method), args.criteria,
+            cleanup_string(args.target_database))
+    except Exception as exc:
+        print(traceback.format_exc(), file=sys.stderr)
+        return {
+            "status": "exception",
+            "message": traceback.format_exc()
+        }
+
+def enter():
+    args = process_cli_arguments()
+    print(json.dumps(
+        run_partial_corrs(process_cli_arguments()),
+        cls = OutputEncoder))
+
+if __name__ == "__main__":
+    enter()