scripts/partial_correlations.py - genenetwork3 - GeneNetwork3 REST API for data science and machine learning

"""Script to run partial correlations"""
import json
import traceback
from pathlib import Path
from argparse import ArgumentParser

from gn3.db_utils import database_connection
from gn3.responses.pcorrs_responses import OutputEncoder
from gn3.computations.partial_correlations import (
    partial_correlations_with_target_db,
    partial_correlations_with_target_traits)

def cleanup_string(the_str):
    "Remove tab, newline and carriage return characters."
    return the_str.strip('"\t\n\r ')

def process_common_args(args):
    "Process the common CLI arguments to a form usable by the functions"
    return {
        "primary_trait_name": cleanup_string(args.primary_trait),
        "control_trait_names": tuple(
            cleanup_string(args.control_traits).split(",")),
        "method": cleanup_string(args.method)
    }

def process_trait_args(args):
    """Process arguments to a form usable by the
    `partial_correlations_with_target_traits` function."""
    return {
        **process_common_args(args),
        "target_trait_names": tuple(
            cleanup_string(args.target_traits).split(","))
    }

def process_db_args(args):
    """Process arguments for the `partial_correlations_with_target_db`
    function."""
    return {
        **process_common_args(args),
        "target_db_name": cleanup_string(args.target_database),
        "criteria": args.criteria
    }

def pcorrs_against_traits(dbconn, args):
    """Run partial correlations agaist selected traits."""
    return partial_correlations_with_target_traits(
        dbconn, **process_trait_args(args))

def pcorrs_against_db(dbconn, args):
    """Run partial correlations agaist the entire dataset provided."""
    return partial_correlations_with_target_db(
        dbconn, **process_db_args(args), textdir=args.textdir)

def run_pcorrs(dbconn, args):
    """Run the selected partial correlations function."""
    try:
        return args.func(dbconn, args)
    except Exception as exc: # pylint: disable=[broad-except,unused-variable]
        return {
            "status": "exception",
            "message": traceback.format_exc()
        }

def against_traits_parser(parent_parser):
    """Parser for command to run partial correlations against selected traits"""
    parser = parent_parser.add_parser(
        "against-traits",
        help="Run partial correlations against a select list of traits")
    parser.add_argument(
        "target_traits",
        help=(
            "The target traits to run the partial correlations against. "
            "This is a comma-separated list of traits' fullnames, in the "
            "format <DATASET-NAME>::<TRAIT-NAME> e.g. "
            "UCLA_BXDBXH_CARTILAGE_V2::ILM103710672"),
        type=str)
    parser.set_defaults(func=pcorrs_against_traits)
    return parent_parser

def against_db_parser(parent_parser):
    """Parser for command to run partial correlations against entire dataset"""
    parser = parent_parser.add_parser(
        "against-db",
        help="Run partial correlations against an entire dataset")
    parser.add_argument(
        "target_database",
        help="The target database to run the partial correlations against",
        type=str)
    parser.add_argument(
        "--criteria",
        help="Number of results to return",
        type=int, default=500)
    parser.add_argument(
        "--textdir",
        help="Directory to read text files from",
        type=Path,
        default=Path("/tmp/"))
    parser.set_defaults(func=pcorrs_against_db)
    return parent_parser

def process_cli_arguments():
    """Top level parser"""
    parser = ArgumentParser()
    parser.add_argument(
        "primary_trait",
        help="The primary trait's full name",
        type=str)
    parser.add_argument(
        "control_traits",
        help="A comma-separated list of traits' full names",
        type=str)
    parser.add_argument(
        "method",
        help="The correlation method to use",
        type=str,
        choices=("pearsons", "spearmans"))
    parser.add_argument(
        "sql_uri",
        help="The uri to use to connect to the database",
        type=str)
    against_db_parser(against_traits_parser(
        parser.add_subparsers(
            title="subcommands",
            description="valid subcommands",
            required=True)))
    return parser.parse_args()

def main():
    """Entry point for the script"""
    args = process_cli_arguments()

    with database_connection(args.sql_uri) as conn:
        print(json.dumps(run_pcorrs(conn, args), cls=OutputEncoder))


if __name__ == "__main__":
    main()