about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2022-05-24 04:46:57 +0300
committerFrederick Muriuki Muriithi2022-05-24 05:16:55 +0300
commit36f8421a8fe223189ab88ee1df3923719ffa4fc0 (patch)
tree7f79b4f594ec787190c9a7c7dcbdd4bec40a0db1
parent63d9c9932721e98a9d6715686214157e276af105 (diff)
downloadgenenetwork3-36f8421a8fe223189ab88ee1df3923719ffa4fc0.tar.gz
New script to compute partial correlations
* Add a new script to compute the partial correlations against:
  - a select list of traits, or
  - an entire dataset
  depending on the specified subcommand. This new script is meant to supercede
  the `scripts/partial_correlations.py` script.

* Fix the check for errors
* Reorganise the order of arguments for the
  `partial_correlations_with_target_traits` function: move the `method`
  argument before the `target_trait_names` argument so that the common
  arguments in the partial correlation computation functions share the same
  order.
-rw-r--r--README.md2
-rw-r--r--gn3/api/correlation.py4
-rw-r--r--gn3/computations/partial_correlations.py8
-rw-r--r--scripts/pcorrs.py126
4 files changed, 133 insertions, 7 deletions
diff --git a/README.md b/README.md
index b5dfcc8..e5f74da 100644
--- a/README.md
+++ b/README.md
@@ -104,7 +104,7 @@ pytest -k unit_test
 Running pylint:
 
 ```bash
-pylint *py tests gn3
+pylint *py tests gn3 scripts sheepdog
 ```
 
 Running mypy(type-checker):
diff --git a/gn3/api/correlation.py b/gn3/api/correlation.py
index 5f2d486..3aadcb9 100644
--- a/gn3/api/correlation.py
+++ b/gn3/api/correlation.py
@@ -148,8 +148,8 @@ def partial_correlation():
             trait_fullname(args["primary_trait"]),
             tuple(
                 trait_fullname(trait) for trait in args["control_traits"]),
+            args["method"],
             tuple(
-                trait_fullname(trait) for trait in args["target_traits"]),
-            args["method"])
+                trait_fullname(trait) for trait in args["target_traits"]))
 
     return build_response({"status": "success", "results": results})
diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py
index 530dd71..f316f67 100644
--- a/gn3/computations/partial_correlations.py
+++ b/gn3/computations/partial_correlations.py
@@ -681,7 +681,7 @@ def partial_correlations_with_target_db(# pylint: disable=[R0913, R0914, R0911]
 
     check_res = check_for_common_errors(
         conn, primary_trait_name, control_trait_names, threshold)
-    if check_res.get("status") == "error":
+    if check_res.get("status") != "success":
         return check_res
 
     primary_trait = check_res["primary_trait"]
@@ -819,15 +819,15 @@ def partial_correlations_with_target_db(# pylint: disable=[R0913, R0914, R0911]
 
 def partial_correlations_with_target_traits(
         conn: Any, primary_trait_name: str,
-        control_trait_names: Tuple[str, ...],
-        target_trait_names: Tuple[str, ...], method: str) -> dict:
+        control_trait_names: Tuple[str, ...], method: str,
+        target_trait_names: Tuple[str, ...]) -> dict:
     """
     Compute partial correlation against a specific selection of traits.
     """
     threshold = 0
     check_res = check_for_common_errors(
         conn, primary_trait_name, control_trait_names, threshold)
-    if check_res.get("status") == "error":
+    if check_res.get("status") != "success":
         return check_res
 
     target_traits = {
diff --git a/scripts/pcorrs.py b/scripts/pcorrs.py
new file mode 100644
index 0000000..de364dc
--- /dev/null
+++ b/scripts/pcorrs.py
@@ -0,0 +1,126 @@
+"""Script to run partial correlations"""
+
+import json
+import traceback
+from argparse import ArgumentParser
+
+from gn3.db_utils import database_connector
+from gn3.responses.pcorrs_responses import OutputEncoder
+from gn3.computations.partial_correlations import (
+    partial_correlations_with_target_db,
+    partial_correlations_with_target_traits)
+
+def cleanup_string(the_str):
+    "Remove tab, newline and carriage return characters."
+    return the_str.strip('"\t\n\r ')
+
+def process_common_args(args):
+    "Process the common CLI arguments to a form usable by the functions"
+    return {
+        "primary_trait_name": cleanup_string(args.primary_trait),
+        "control_trait_names": tuple(
+            cleanup_string(args.control_traits).split(",")),
+        "method": cleanup_string(args.method)
+    }
+
+def process_trait_args(args):
+    """Process arguments to a form usable by the
+    `partial_correlations_with_target_traits` function."""
+    return {
+        **process_common_args(args),
+        "target_trait_names": tuple(
+            cleanup_string(args.target_traits).split(","))
+    }
+
+def process_db_args(args):
+    """Process arguments for the `partial_correlations_with_target_db`
+    function."""
+    return {
+        **process_common_args(args),
+        "target_db_name": cleanup_string(args.target_database),
+        "criteria": args.criteria
+    }
+
+def pcorrs_against_traits(dbconn, args):
+    """Run partial correlations agaist selected traits."""
+    return partial_correlations_with_target_traits(
+        dbconn, **process_trait_args(args))
+
+def pcorrs_against_db(dbconn, args):
+    """Run partial correlations agaist the entire dataset provided."""
+    return partial_correlations_with_target_db(dbconn, **process_db_args(args))
+
+def run_pcorrs(dbconn, args):
+    """Run the selected partial correlations function."""
+    try:
+        return args.func(dbconn, args)
+    except Exception as exc: # pylint: disable=[broad-except,unused-variable]
+        return {
+            "status": "exception",
+            "message": traceback.format_exc()
+        }
+
+def against_traits_parser(parent_parser):
+    """Parser for command to run partial correlations against selected traits"""
+    parser = parent_parser.add_parser(
+        "against-traits",
+        help="Run partial correlations against a select list of traits")
+    parser.add_argument(
+        "target_traits",
+        help=(
+            "The target traits to run the partial correlations against. "
+            "This is a comma-separated list of traits' fullnames, in the "
+            "format <DATASET-NAME>::<TRAIT-NAME> e.g. "
+            "UCLA_BXDBXH_CARTILAGE_V2::ILM103710672"),
+        type=str)
+    parser.set_defaults(func=pcorrs_against_traits)
+    return parent_parser
+
+def against_db_parser(parent_parser):
+    """Parser for command to run partial correlations against entire dataset"""
+    parser = parent_parser.add_parser(
+        "against-db",
+        help="Run partial correlations against an entire dataset")
+    parser.add_argument(
+        "target_database",
+        help="The target database to run the partial correlations against",
+        type=str)
+    parser.add_argument(
+        "--criteria",
+        help="Number of results to return",
+        type=int, default=500)
+    parser.set_defaults(func=pcorrs_against_db)
+    return parent_parser
+
+def process_cli_arguments():
+    """Top level parser"""
+    parser = ArgumentParser()
+    parser.add_argument(
+        "primary_trait",
+        help="The primary trait's full name",
+        type=str)
+    parser.add_argument(
+        "control_traits",
+        help="A comma-separated list of traits' full names",
+        type=str)
+    parser.add_argument(
+        "method",
+        help="The correlation method to use",
+        type=str)
+    against_db_parser(against_traits_parser(
+        parser.add_subparsers(
+            title="subcommands",
+            description="valid subcommands",
+            required=True)))
+    return parser.parse_args()
+
+def main():
+    """Entry point for the script"""
+    args = process_cli_arguments()
+
+    with database_connector() as conn:
+        print(json.dumps(run_pcorrs(conn, args), cls=OutputEncoder))
+
+
+if __name__ == "__main__":
+    main()