scripts/partial_correlations.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131

"""Script to run partial correlations"""

import json
import traceback
from argparse import ArgumentParser

from gn3.db_utils import database_connection
from gn3.responses.pcorrs_responses import OutputEncoder
from gn3.computations.partial_correlations import (
    partial_correlations_with_target_db,
    partial_correlations_with_target_traits)

def cleanup_string(the_str):
    "Remove tab, newline and carriage return characters."
    return the_str.strip('"\t\n\r ')

def process_common_args(args):
    "Process the common CLI arguments to a form usable by the functions"
    return {
        "primary_trait_name": cleanup_string(args.primary_trait),
        "control_trait_names": tuple(
            cleanup_string(args.control_traits).split(",")),
        "method": cleanup_string(args.method)
    }

def process_trait_args(args):
    """Process arguments to a form usable by the
    `partial_correlations_with_target_traits` function."""
    return {
        **process_common_args(args),
        "target_trait_names": tuple(
            cleanup_string(args.target_traits).split(","))
    }

def process_db_args(args):
    """Process arguments for the `partial_correlations_with_target_db`
    function."""
    return {
        **process_common_args(args),
        "target_db_name": cleanup_string(args.target_database),
        "criteria": args.criteria
    }

def pcorrs_against_traits(dbconn, args):
    """Run partial correlations agaist selected traits."""
    return partial_correlations_with_target_traits(
        dbconn, **process_trait_args(args))

def pcorrs_against_db(dbconn, args):
    """Run partial correlations agaist the entire dataset provided."""
    return partial_correlations_with_target_db(dbconn, **process_db_args(args))

def run_pcorrs(dbconn, args):
    """Run the selected partial correlations function."""
    try:
        return args.func(dbconn, args)
    except Exception as exc: # pylint: disable=[broad-except,unused-variable]
        return {
            "status": "exception",
            "message": traceback.format_exc()
        }

def against_traits_parser(parent_parser):
    """Parser for command to run partial correlations against selected traits"""
    parser = parent_parser.add_parser(
        "against-traits",
        help="Run partial correlations against a select list of traits")
    parser.add_argument(
        "target_traits",
        help=(
            "The target traits to run the partial correlations against. "
            "This is a comma-separated list of traits' fullnames, in the "
            "format <DATASET-NAME>::<TRAIT-NAME> e.g. "
            "UCLA_BXDBXH_CARTILAGE_V2::ILM103710672"),
        type=str)
    parser.set_defaults(func=pcorrs_against_traits)
    return parent_parser

def against_db_parser(parent_parser):
    """Parser for command to run partial correlations against entire dataset"""
    parser = parent_parser.add_parser(
        "against-db",
        help="Run partial correlations against an entire dataset")
    parser.add_argument(
        "target_database",
        help="The target database to run the partial correlations against",
        type=str)
    parser.add_argument(
        "--criteria",
        help="Number of results to return",
        type=int, default=500)
    parser.set_defaults(func=pcorrs_against_db)
    return parent_parser

def process_cli_arguments():
    """Top level parser"""
    parser = ArgumentParser()
    parser.add_argument(
        "primary_trait",
        help="The primary trait's full name",
        type=str)
    parser.add_argument(
        "control_traits",
        help="A comma-separated list of traits' full names",
        type=str)
    parser.add_argument(
        "method",
        help="The correlation method to use",
        type=str,
        choices=("pearsons", "spearmans"))
    parser.add_argument(
        "sql_uri",
        help="The uri to use to connect to the database",
        type=str)
    against_db_parser(against_traits_parser(
        parser.add_subparsers(
            title="subcommands",
            description="valid subcommands",
            required=True)))
    return parser.parse_args()

def main():
    """Entry point for the script"""
    args = process_cli_arguments()

    with database_connection(args.sql_uri) as conn:
        print(json.dumps(run_pcorrs(conn, args), cls=OutputEncoder))


if __name__ == "__main__":
    main()