1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
|
"""Compute phenotype means."""
import sys
import logging
from pathlib import Path
from typing import TypeVar
from argparse import Namespace, ArgumentParser
import MySQLdb
from gn_libs import mysqldb
from uploader import setup_modules_logging
from .load_phenotypes_to_db import update_means
logger = logging.getLogger(__name__)
logging.basicConfig(
encoding="utf-8",
format="%(asctime)s - %(name)s - %(levelname)s — %(message)s",
level=logging.INFO)
def fetch_xref_id(conn: mysqldb.Connection, population_id: int) -> tuple[int, ...]:
"""Fetch a population's cross-reference IDs."""
logger.debug("Fetching the xref IDs.")
with conn.cursor(cursorclass=MySQLdb.cursors.DictCursor) as cursor:
query = "SELECT Id FROM PublishXRef WHERE InbredSetId=%(population_id)s"
cursor.execute(query, {"population_id": population_id})
return tuple(int(row["Id"]) for row in cursor.fetchall())
def run(args) -> int:
"""Run the script."""
logger.debug("Running the script!")
with mysqldb.database_connection(args.db_uri) as mariadb_conn:
xref_ids = args.cross_ref_ids or fetch_xref_id(mariadb_conn, args.population_id)
if len(xref_ids):
update_means(mariadb_conn,
args.population_id,
xref_ids)
logger.debug("Successfully computed means for %02d phenotypes.",
len(xref_ids))
return 0
_reasons = (
"no population exists with the ID '%s'",
"the population exists but it has no phenotypes linked to it yet")
logger.error(
("No cross-reference IDs to run against. Likely causes are: "
+ " OR ".join(_reasons) + "."),
args.population_id)
return 1
T = TypeVar("T")
def comma_separated_list(val: str, itemstype: T = str) -> tuple[T, ...]:
"""Convert val into a list of items of type 'itemstype'."""
return tuple(itemstype(item.strip()) for item in val.split(","))
def comma_separated_list_of_integers(val: str) -> tuple[int, ...]:
"""Convert 'val' into list of items of type 'int'."""
return comma_separated_list(val, int)
if __name__ == "__main__":
def parse_args() -> Namespace:
"""Define and parse the CLI parsers accepted by this script."""
parser = ArgumentParser(
"compute-phenotype-means",
description="Compute/Recompute the phenotype means.")
parser.add_argument("db_uri",
metavar="db-uri",
type=str,
help="MariaDB/MySQL connection URL")
parser.add_argument("jobs_db_path",
metavar="jobs-db-path",
type=Path,
help="Path to jobs' SQLite database.")
parser.add_argument("population_id",
metavar="population-id",
type=int,
help=("Identifier for the InbredSet group/"
"population to run means against."))
## Optional arguments
parser.add_argument(
"--log-level",
type=str,
help="Determines what is logged out.",
choices=("debug", "info", "warning", "error", "critical"),
default="info")
parser.add_argument(
"--cross-ref-ids",
type=comma_separated_list_of_integers,
help=("Provide cross-reference IDs to narrow the number of "
"phenotypes that the means are computed against."),
default=[])
return parser.parse_args()
def main() -> int:
"""compute-phenotype-means: Entry-point function."""
args = parse_args()
logger.setLevel(getattr(logging, args.log_level.upper()))
setup_modules_logging(logger, ("scripts.load_phenotypes_to_db",))
return run(args)
sys.exit(main())
|