aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2022-05-24 14:19:57 +0300
committerFrederick Muriuki Muriithi2022-05-24 14:19:57 +0300
commitfdf9061981ce5d341d178951adeb19dd0376ee66 (patch)
tree0476c726f6ccd8af97423aa520566ddf9864e0d1
parentbbc4b165761b359115fd1a249ea22e64d55db384 (diff)
downloadgenenetwork3-fdf9061981ce5d341d178951adeb19dd0376ee66.tar.gz
Run partial correlations with external script
Use new external script to run the partial correlations for both cases, i.e. - against an entire dataset, or - against selected traits
-rw-r--r--gn3/api/correlation.py41
-rw-r--r--gn3/commands.py36
-rw-r--r--[-rwxr-xr-x]scripts/partial_correlations.py131
-rw-r--r--scripts/pcorrs.py126
4 files changed, 145 insertions, 189 deletions
diff --git a/gn3/api/correlation.py b/gn3/api/correlation.py
index 3aadcb9..1667302 100644
--- a/gn3/api/correlation.py
+++ b/gn3/api/correlation.py
@@ -16,8 +16,6 @@ from gn3.computations.correlations import map_shared_keys_to_values
from gn3.computations.correlations import compute_tissue_correlation
from gn3.computations.correlations import compute_all_lit_correlation
from gn3.computations.correlations import compute_all_sample_correlation
-from gn3.computations.partial_correlations import (
- partial_correlations_with_target_traits)
correlation = Blueprint("correlation", __name__)
@@ -124,16 +122,25 @@ def partial_correlation():
"messages": request_errors,
"error_type": "Client Error"})
- if with_target_db:
- with redis.Redis() as conn:
- queueing_results = run_async_cmd(
+ with redis.Redis() as conn:
+ if with_target_db:
+ command = compose_pcorrs_command(
+ trait_fullname(args["primary_trait"]),
+ tuple(
+ trait_fullname(trait) for trait in args["control_traits"]),
+ args["method"], target_database=args["target_db"],
+ criteria = int(args.get("criteria", 500)))
+ else:
+ command = compose_pcorrs_command(
+ trait_fullname(args["primary_trait"]),
+ tuple(
+ trait_fullname(trait) for trait in args["control_traits"]),
+ args["method"], target_traits=tuple(
+ trait_fullname(trait) for trait in args["target_traits"]))
+
+ queueing_results = run_async_cmd(
conn=conn,
- cmd=compose_pcorrs_command(
- trait_fullname(args["primary_trait"]),
- tuple(
- trait_fullname(trait) for trait in args["control_traits"]),
- args["method"], args["target_db"],
- int(args.get("criteria", 500))),
+ cmd=command,
job_queue=current_app.config.get("REDIS_JOB_QUEUE"),
env = {"PYTHONPATH": ":".join(sys.path), "SQL_URI": SQL_URI})
return build_response({
@@ -141,15 +148,3 @@ def partial_correlation():
"results": queueing_results,
"queued": True
})
-
- with database_connector() as conn:
- results = partial_correlations_with_target_traits(
- conn,
- trait_fullname(args["primary_trait"]),
- tuple(
- trait_fullname(trait) for trait in args["control_traits"]),
- args["method"],
- tuple(
- trait_fullname(trait) for trait in args["target_traits"]))
-
- return build_response({"status": "success", "results": results})
diff --git a/gn3/commands.py b/gn3/commands.py
index 9684eeb..b1b3fc7 100644
--- a/gn3/commands.py
+++ b/gn3/commands.py
@@ -1,6 +1,5 @@
"""Procedures used to work with the various bio-informatics cli
commands"""
-import os
import sys
import json
import subprocess
@@ -51,15 +50,36 @@ def compose_rqtl_cmd(rqtl_wrapper_cmd: str,
return cmd
+def compose_pcorrs_command_for_selected_traits(
+ prefix_cmd: Tuple[str, ...], target_traits: Tuple[str, ...]) -> Tuple[
+ str, ...]:
+ """Build command for partial correlations against selected traits."""
+ return prefix_cmd + ("against-traits", ",".join(target_traits))
+
+def compose_pcorrs_command_for_database(
+ prefix_cmd: Tuple[str, ...], target_database: str,
+ criteria: int = 500) -> Tuple[str, ...]:
+ """Build command for partial correlations against an entire dataset."""
+ return prefix_cmd + (
+ "against-db", f"{target_database}", f"--criteria={criteria}")
+
def compose_pcorrs_command(
primary_trait: str, control_traits: Tuple[str, ...], method: str,
- target_database: str, criteria: int = 500):
+ **kwargs):
"""Compose the command to run partias correlations"""
- rundir = os.path.abspath(".")
- return (
- f"{sys.executable}", f"{rundir}/scripts/partial_correlations.py",
- primary_trait, ",".join(control_traits), f'"{method}"',
- f"{target_database}", f"--criteria={criteria}")
+ print(f"KWARGS: {kwargs}")
+ prefix_cmd = (
+ f"{sys.executable}", "-m", "scripts.partial_correlations",
+ primary_trait, ",".join(control_traits), f'"{method}"')
+ if (
+ kwargs.get("target_database") is not None
+ and kwargs.get("target_traits") is None):
+ return compose_pcorrs_command_for_database(prefix_cmd, **kwargs)
+ if (
+ kwargs.get("target_database") is None
+ and kwargs.get("target_traits") is not None):
+ return compose_pcorrs_command_for_selected_traits(prefix_cmd, **kwargs)
+ raise Exception("Invalid state: I don't know what command to generate!")
def queue_cmd(conn: Redis,
job_queue: str,
@@ -111,5 +131,5 @@ def run_async_cmd(
"""A utility function to call `gn3.commands.queue_cmd` function and run the
worker in the `one-shot` mode."""
cmd_id = queue_cmd(conn, job_queue, cmd, email, env)
- subprocess.Popen(["python3", "sheepdog/worker.py"]) # pylint: disable=[consider-using-with]
+ subprocess.Popen([f"{sys.executable}", "-m", "sheepdog.worker"]) # pylint: disable=[consider-using-with]
return cmd_id
diff --git a/scripts/partial_correlations.py b/scripts/partial_correlations.py
index 52bde4c..de364dc 100755..100644
--- a/scripts/partial_correlations.py
+++ b/scripts/partial_correlations.py
@@ -1,13 +1,99 @@
-import sys
+"""Script to run partial correlations"""
+
import json
import traceback
from argparse import ArgumentParser
from gn3.db_utils import database_connector
from gn3.responses.pcorrs_responses import OutputEncoder
-from gn3.computations.partial_correlations import partial_correlations_with_target_db
+from gn3.computations.partial_correlations import (
+ partial_correlations_with_target_db,
+ partial_correlations_with_target_traits)
+
+def cleanup_string(the_str):
+ "Remove tab, newline and carriage return characters."
+ return the_str.strip('"\t\n\r ')
+
+def process_common_args(args):
+ "Process the common CLI arguments to a form usable by the functions"
+ return {
+ "primary_trait_name": cleanup_string(args.primary_trait),
+ "control_trait_names": tuple(
+ cleanup_string(args.control_traits).split(",")),
+ "method": cleanup_string(args.method)
+ }
+
+def process_trait_args(args):
+ """Process arguments to a form usable by the
+ `partial_correlations_with_target_traits` function."""
+ return {
+ **process_common_args(args),
+ "target_trait_names": tuple(
+ cleanup_string(args.target_traits).split(","))
+ }
+
+def process_db_args(args):
+ """Process arguments for the `partial_correlations_with_target_db`
+ function."""
+ return {
+ **process_common_args(args),
+ "target_db_name": cleanup_string(args.target_database),
+ "criteria": args.criteria
+ }
+
+def pcorrs_against_traits(dbconn, args):
+ """Run partial correlations agaist selected traits."""
+ return partial_correlations_with_target_traits(
+ dbconn, **process_trait_args(args))
+
+def pcorrs_against_db(dbconn, args):
+ """Run partial correlations agaist the entire dataset provided."""
+ return partial_correlations_with_target_db(dbconn, **process_db_args(args))
+
+def run_pcorrs(dbconn, args):
+ """Run the selected partial correlations function."""
+ try:
+ return args.func(dbconn, args)
+ except Exception as exc: # pylint: disable=[broad-except,unused-variable]
+ return {
+ "status": "exception",
+ "message": traceback.format_exc()
+ }
+
+def against_traits_parser(parent_parser):
+ """Parser for command to run partial correlations against selected traits"""
+ parser = parent_parser.add_parser(
+ "against-traits",
+ help="Run partial correlations against a select list of traits")
+ parser.add_argument(
+ "target_traits",
+ help=(
+ "The target traits to run the partial correlations against. "
+ "This is a comma-separated list of traits' fullnames, in the "
+ "format <DATASET-NAME>::<TRAIT-NAME> e.g. "
+ "UCLA_BXDBXH_CARTILAGE_V2::ILM103710672"),
+ type=str)
+ parser.set_defaults(func=pcorrs_against_traits)
+ return parent_parser
+
+def against_db_parser(parent_parser):
+ """Parser for command to run partial correlations against entire dataset"""
+ parser = parent_parser.add_parser(
+ "against-db",
+ help="Run partial correlations against an entire dataset")
+ parser.add_argument(
+ "target_database",
+ help="The target database to run the partial correlations against",
+ type=str)
+ parser.add_argument(
+ "--criteria",
+ help="Number of results to return",
+ type=int, default=500)
+ parser.set_defaults(func=pcorrs_against_db)
+ return parent_parser
def process_cli_arguments():
+ """Top level parser"""
parser = ArgumentParser()
parser.add_argument(
"primary_trait",
@@ -21,39 +107,20 @@ def process_cli_arguments():
"method",
help="The correlation method to use",
type=str)
- parser.add_argument(
- "target_database",
- help="The target database to run the partial correlations against",
- type=str)
- parser.add_argument(
- "--criteria",
- help="Number of results to return",
- type=int, default=500)
+ against_db_parser(against_traits_parser(
+ parser.add_subparsers(
+ title="subcommands",
+ description="valid subcommands",
+ required=True)))
return parser.parse_args()
-def cleanup_string(the_str):
- return the_str.strip('"\t\n\r ')
+def main():
+ """Entry point for the script"""
+ args = process_cli_arguments()
-def run_partial_corrs(args):
with database_connector() as conn:
- try:
- return partial_correlations_with_target_db(
- conn, cleanup_string(args.primary_trait),
- tuple(cleanup_string(args.control_traits).split(",")),
- cleanup_string(args.method), args.criteria,
- cleanup_string(args.target_database))
- except Exception as exc:
- print(traceback.format_exc(), file=sys.stderr)
- return {
- "status": "exception",
- "message": traceback.format_exc()
- }
-
-def enter():
- args = process_cli_arguments()
- print(json.dumps(
- run_partial_corrs(process_cli_arguments()),
- cls = OutputEncoder))
+ print(json.dumps(run_pcorrs(conn, args), cls=OutputEncoder))
+
if __name__ == "__main__":
- enter()
+ main()
diff --git a/scripts/pcorrs.py b/scripts/pcorrs.py
deleted file mode 100644
index de364dc..0000000
--- a/scripts/pcorrs.py
+++ /dev/null
@@ -1,126 +0,0 @@
-"""Script to run partial correlations"""
-
-import json
-import traceback
-from argparse import ArgumentParser
-
-from gn3.db_utils import database_connector
-from gn3.responses.pcorrs_responses import OutputEncoder
-from gn3.computations.partial_correlations import (
- partial_correlations_with_target_db,
- partial_correlations_with_target_traits)
-
-def cleanup_string(the_str):
- "Remove tab, newline and carriage return characters."
- return the_str.strip('"\t\n\r ')
-
-def process_common_args(args):
- "Process the common CLI arguments to a form usable by the functions"
- return {
- "primary_trait_name": cleanup_string(args.primary_trait),
- "control_trait_names": tuple(
- cleanup_string(args.control_traits).split(",")),
- "method": cleanup_string(args.method)
- }
-
-def process_trait_args(args):
- """Process arguments to a form usable by the
- `partial_correlations_with_target_traits` function."""
- return {
- **process_common_args(args),
- "target_trait_names": tuple(
- cleanup_string(args.target_traits).split(","))
- }
-
-def process_db_args(args):
- """Process arguments for the `partial_correlations_with_target_db`
- function."""
- return {
- **process_common_args(args),
- "target_db_name": cleanup_string(args.target_database),
- "criteria": args.criteria
- }
-
-def pcorrs_against_traits(dbconn, args):
- """Run partial correlations agaist selected traits."""
- return partial_correlations_with_target_traits(
- dbconn, **process_trait_args(args))
-
-def pcorrs_against_db(dbconn, args):
- """Run partial correlations agaist the entire dataset provided."""
- return partial_correlations_with_target_db(dbconn, **process_db_args(args))
-
-def run_pcorrs(dbconn, args):
- """Run the selected partial correlations function."""
- try:
- return args.func(dbconn, args)
- except Exception as exc: # pylint: disable=[broad-except,unused-variable]
- return {
- "status": "exception",
- "message": traceback.format_exc()
- }
-
-def against_traits_parser(parent_parser):
- """Parser for command to run partial correlations against selected traits"""
- parser = parent_parser.add_parser(
- "against-traits",
- help="Run partial correlations against a select list of traits")
- parser.add_argument(
- "target_traits",
- help=(
- "The target traits to run the partial correlations against. "
- "This is a comma-separated list of traits' fullnames, in the "
- "format <DATASET-NAME>::<TRAIT-NAME> e.g. "
- "UCLA_BXDBXH_CARTILAGE_V2::ILM103710672"),
- type=str)
- parser.set_defaults(func=pcorrs_against_traits)
- return parent_parser
-
-def against_db_parser(parent_parser):
- """Parser for command to run partial correlations against entire dataset"""
- parser = parent_parser.add_parser(
- "against-db",
- help="Run partial correlations against an entire dataset")
- parser.add_argument(
- "target_database",
- help="The target database to run the partial correlations against",
- type=str)
- parser.add_argument(
- "--criteria",
- help="Number of results to return",
- type=int, default=500)
- parser.set_defaults(func=pcorrs_against_db)
- return parent_parser
-
-def process_cli_arguments():
- """Top level parser"""
- parser = ArgumentParser()
- parser.add_argument(
- "primary_trait",
- help="The primary trait's full name",
- type=str)
- parser.add_argument(
- "control_traits",
- help="A comma-separated list of traits' full names",
- type=str)
- parser.add_argument(
- "method",
- help="The correlation method to use",
- type=str)
- against_db_parser(against_traits_parser(
- parser.add_subparsers(
- title="subcommands",
- description="valid subcommands",
- required=True)))
- return parser.parse_args()
-
-def main():
- """Entry point for the script"""
- args = process_cli_arguments()
-
- with database_connector() as conn:
- print(json.dumps(run_pcorrs(conn, args), cls=OutputEncoder))
-
-
-if __name__ == "__main__":
- main()