"""Script to run partial correlations"""
import json
import traceback
from pathlib import Path
from argparse import ArgumentParser
from gn3.db_utils import database_connection
from gn3.responses.pcorrs_responses import OutputEncoder
from gn3.computations.partial_correlations import (
partial_correlations_with_target_db,
partial_correlations_with_target_traits)
def cleanup_string(the_str):
"Remove tab, newline and carriage return characters."
return the_str.strip('"\t\n\r ')
def process_common_args(args):
"Process the common CLI arguments to a form usable by the functions"
return {
"primary_trait_name": cleanup_string(args.primary_trait),
"control_trait_names": tuple(
cleanup_string(args.control_traits).split(",")),
"method": cleanup_string(args.method)
}
def process_trait_args(args):
"""Process arguments to a form usable by the
`partial_correlations_with_target_traits` function."""
return {
**process_common_args(args),
"target_trait_names": tuple(
cleanup_string(args.target_traits).split(","))
}
def process_db_args(args):
"""Process arguments for the `partial_correlations_with_target_db`
function."""
return {
**process_common_args(args),
"target_db_name": cleanup_string(args.target_database),
"criteria": args.criteria
}
def pcorrs_against_traits(dbconn, args):
"""Run partial correlations agaist selected traits."""
return partial_correlations_with_target_traits(
dbconn, **process_trait_args(args))
def pcorrs_against_db(dbconn, args):
"""Run partial correlations agaist the entire dataset provided."""
return partial_correlations_with_target_db(
dbconn, **process_db_args(args), textdir=args.textdir)
def run_pcorrs(dbconn, args):
"""Run the selected partial correlations function."""
try:
return args.func(dbconn, args)
except Exception as exc: # pylint: disable=[broad-except,unused-variable]
return {
"status": "exception",
"message": traceback.format_exc()
}
def against_traits_parser(parent_parser):
"""Parser for command to run partial correlations against selected traits"""
parser = parent_parser.add_parser(
"against-traits",
help="Run partial correlations against a select list of traits")
parser.add_argument(
"target_traits",
help=(
"The target traits to run the partial correlations against. "
"This is a comma-separated list of traits' fullnames, in the "
"format <DATASET-NAME>::<TRAIT-NAME> e.g. "
"UCLA_BXDBXH_CARTILAGE_V2::ILM103710672"),
type=str)
parser.set_defaults(func=pcorrs_against_traits)
return parent_parser
def against_db_parser(parent_parser):
"""Parser for command to run partial correlations against entire dataset"""
parser = parent_parser.add_parser(
"against-db",
help="Run partial correlations against an entire dataset")
parser.add_argument(
"target_database",
help="The target database to run the partial correlations against",
type=str)
parser.add_argument(
"--criteria",
help="Number of results to return",
type=int, default=500)
parser.add_argument(
"--textdir",
help="Directory to read text files from",
type=Path,
default=Path("/tmp/"))
parser.set_defaults(func=pcorrs_against_db)
return parent_parser
def process_cli_arguments():
"""Top level parser"""
parser = ArgumentParser()
parser.add_argument(
"primary_trait",
help="The primary trait's full name",
type=str)
parser.add_argument(
"control_traits",
help="A comma-separated list of traits' full names",
type=str)
parser.add_argument(
"method",
help="The correlation method to use",
type=str,
choices=("pearsons", "spearmans"))
parser.add_argument(
"sql_uri",
help="The uri to use to connect to the database",
type=str)
against_db_parser(against_traits_parser(
parser.add_subparsers(
title="subcommands",
description="valid subcommands",
required=True)))
return parser.parse_args()
def main():
"""Entry point for the script"""
args = process_cli_arguments()
with database_connection(args.sql_uri) as conn:
print(json.dumps(run_pcorrs(conn, args), cls=OutputEncoder))
if __name__ == "__main__":
main()