about summary refs log tree commit diff
path: root/scripts/compute-phenotype-means.py
blob: 3b876b7e6eebd1b8b4b7defef4084339edf75ea9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
"""Compute phenotype means."""
import sys
import logging
from pathlib import Path
from typing import TypeVar
from argparse import Namespace, ArgumentParser

import MySQLdb

from gn_libs import mysqldb
from uploader import setup_modules_logging
from .load_phenotypes_to_db import update_means

logger = logging.getLogger(__name__)
logging.basicConfig(
        encoding="utf-8",
        format="%(asctime)s - %(name)s - %(levelname)s%(message)s",
        level=logging.INFO)


def fetch_xref_id(conn: mysqldb.Connection, population_id: int) -> tuple[int, ...]:
    """Fetch a population's cross-reference IDs."""
    logger.debug("Fetching the xref IDs.")
    with conn.cursor(cursorclass=MySQLdb.cursors.DictCursor) as cursor:
        query = "SELECT Id FROM PublishXRef WHERE InbredSetId=%(population_id)s"
        cursor.execute(query, {"population_id": population_id})
        return tuple(int(row["Id"]) for row in cursor.fetchall())


def run(args) -> int:
    """Run the script."""
    logger.debug("Running the script!")
    with mysqldb.database_connection(args.db_uri) as mariadb_conn:
        xref_ids = args.cross_ref_ids or fetch_xref_id(mariadb_conn, args.population_id)
        if len(xref_ids):
            update_means(mariadb_conn,
                         args.population_id,
                         xref_ids)
            logger.debug("Successfully computed means for %02d phenotypes.",
                         len(xref_ids))
            return 0
        _reasons = (
            "no population exists with the ID '%s'",
            "the population exists but it has no phenotypes linked to it yet")
        logger.error(
            ("No cross-reference IDs to run against. Likely causes are: "
             + " OR ".join(_reasons) + "."),
            args.population_id)
        return 1


T = TypeVar("T")
def comma_separated_list(val: str, itemstype: T = str) -> tuple[T, ...]:
        """Convert val into a list of items of type 'itemstype'."""
        return tuple(itemstype(item.strip()) for item in val.split(","))


def comma_separated_list_of_integers(val: str) -> tuple[int, ...]:
    """Convert 'val' into list of items of type 'int'."""
    return comma_separated_list(val, int)


if __name__ == "__main__":
    def parse_args() -> Namespace:
        """Define and parse the CLI parsers accepted by this script."""
        parser = ArgumentParser(
            "compute-phenotype-means",
            description="Compute/Recompute the phenotype means.")
        parser.add_argument("db_uri",
                            metavar="db-uri",
                            type=str,
                            help="MariaDB/MySQL connection URL")
        parser.add_argument("jobs_db_path",
                            metavar="jobs-db-path",
                            type=Path,
                            help="Path to jobs' SQLite database.")
        parser.add_argument("population_id",
                            metavar="population-id",
                            type=int,
                            help=("Identifier for the InbredSet group/"
                                  "population to run means against."))
        ## Optional arguments
        parser.add_argument(
            "--log-level",
            type=str,
            help="Determines what is logged out.",
            choices=("debug", "info", "warning", "error", "critical"),
            default="info")
        parser.add_argument(
            "--cross-ref-ids",
            type=comma_separated_list_of_integers,
            help=("Provide cross-reference IDs to narrow the number of "
                  "phenotypes that the means are computed against."),
            default=[])

        return parser.parse_args()

    def main() -> int:
        """compute-phenotype-means: Entry-point function."""
        args = parse_args()
        logger.setLevel(getattr(logging, args.log_level.upper()))
        setup_modules_logging(logger, ("scripts.load_phenotypes_to_db",))
        return run(args)

    sys.exit(main())