12 files changed, 225 insertions, 57 deletions
diff --git a/scripts/cli/options.py b/scripts/cli/options.py
index 67f35dc..70d2a27 100644
--- a/scripts/cli/options.py
+++ b/scripts/cli/options.py
@@ -13,7 +13,7 @@ def add_logging(parser: ArgumentParser) -> ArgumentParser:
         type=str,
         default="INFO",
         choices=loglevels,
-        help=(f"Controls the severity of events to log. Valid values are: " +
+        help=("Controls the severity of events to log. Valid values are: " +
               ", ".join(f"'{level}'" for level in loglevels)))
     return parser
 
diff --git a/scripts/compute_phenotype_means.py b/scripts/compute_phenotype_means.py
index ef2fabc..6d39ace 100644
--- a/scripts/compute_phenotype_means.py
+++ b/scripts/compute_phenotype_means.py
@@ -51,7 +51,7 @@ def run(args) -> int:
 
 
 T = TypeVar("T")
-def comma_separated_list(val: str, itemstype: T = str) -> tuple[T, ...]:
+def comma_separated_list(val: str, itemstype: type = str) -> tuple[T, ...]:
     """Convert val into a list of items of type 'itemstype'."""
     return tuple(itemstype(item.strip()) for item in val.split(","))
 
diff --git a/scripts/insert_samples.py b/scripts/insert_samples.py
index fc029f9..96ae8e2 100644
--- a/scripts/insert_samples.py
+++ b/scripts/insert_samples.py
@@ -6,10 +6,10 @@ import argparse
 import traceback
 
 import MySQLdb as mdb
-from redis import Redis
+
 from gn_libs.mysqldb import database_connection
 
-from uploader.check_connections import check_db, check_redis
+from uploader.check_connections import check_db
 from uploader.species.models import species_by_id
 from uploader.population.models import population_by_id
 from uploader.samples.models import (
@@ -35,7 +35,6 @@ class SeparatorAction(argparse.Action):
         setattr(namespace, self.dest, (chr(9) if values == "\\t" else values))
 
 def insert_samples(conn: mdb.Connection,# pylint: disable=[too-many-arguments, too-many-positional-arguments]
-                   rconn: Redis,# pylint: disable=[unused-argument]
                    speciesid: int,
                    populationid: int,
                    samplesfile: pathlib.Path,
@@ -119,11 +118,6 @@ if __name__ == "__main__":
             help=("The character used to delimit (surround?) the value in "
                   "each column."))
 
-        # == Script-specific extras ==
-        parser.add_argument("--redisuri",
-                            help="URL to initialise connection to redis",
-                            default="redis:///")
-
         args = parser.parse_args()
         return args
 
@@ -132,17 +126,13 @@ if __name__ == "__main__":
         status_code = 1 # Exit with an Exception
         args = cli_args()
         check_db(args.databaseuri)
-        check_redis(args.redisuri)
         if not args.samplesfile.exists():
             logging.error("File not found: '%s'.", args.samplesfile)
             return 2
 
-        with (Redis.from_url(args.redisuri, decode_responses=True) as rconn,
-              database_connection(args.databaseuri) as dbconn):
-
+        with database_connection(args.databaseuri) as dbconn:
             try:
                 status_code = insert_samples(dbconn,
-                                             rconn,
                                              args.speciesid,
                                              args.populationid,
                                              args.samplesfile,
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index e449b82..e303bb3 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -6,9 +6,9 @@ import time
 import logging
 import argparse
 import datetime
-from typing import Any
 from pathlib import Path
 from zipfile import ZipFile
+from typing import Any, Iterable
 from urllib.parse import urljoin
 from functools import reduce, partial
 
@@ -55,7 +55,7 @@ def save_phenotypes(
 
     if control_data["phenocovar_transposed"]:
         logger.info("Undoing transposition of the files rows and columns.")
-        phenofiles = (
+        phenofiles = tuple(
             rqtl2.transpose_csv_with_rename(
                 _file,
                 build_line_splitter(control_data),
@@ -86,7 +86,7 @@ def __row_to_dataitems__(
         dataidmap: dict,
         pheno_name2id: dict[str, int],
         samples: dict
-) -> tuple[dict, ...]:
+) -> Iterable[dict]:
     samplename = sample_row["id"]
 
     return ({
@@ -134,7 +134,7 @@ def save_numeric_data(# pylint: disable=[too-many-positional-arguments,too-many-
         conn: mysqldb.Connection,
         dataidmap: dict,
         pheno_name2id: dict[str, int],
-        samples: tuple[dict, ...],
+        samples: dict,
         control_data: dict,
         filesdir: Path,
         filetype: str,
@@ -311,7 +311,9 @@ def update_auth(# pylint: disable=[too-many-locals,too-many-positional-arguments
     ).either(__handle_error__, __handle_success__)
 
 
-def load_data(conn: mysqldb.Connection, job: dict) -> int:#pylint: disable=[too-many-locals]
+def load_data(# pylint: disable=[too-many-locals]
+        conn: mysqldb.Connection, job: dict
+) -> tuple[dict, dict, dict, tuple[int, ...]]:
     """Load the data attached in the given job."""
     _job_metadata = job["metadata"]
     # Steps
@@ -365,9 +367,8 @@ def load_data(conn: mysqldb.Connection, job: dict) -> int:#pylint: disable=[too-
                 "publication_id": row["publication_id"],
                 "data_id": row["data_id"]
             },)))
-    dataidmap, pheno_name2id, _xrefs = reduce(__build_phenos_maps__,
-                                      _phenos,
-                                      ({},{}, tuple()))
+    dataidmap, pheno_name2id, _xrefs = reduce(# type: ignore[var-annotated]
+        __build_phenos_maps__, _phenos, ({},{}, tuple()))
     # 3. a. Fetch the strain names and IDS: create name->ID map
     samples = {
         row["Name"]: row
diff --git a/scripts/phenotypes/__init__.py b/scripts/phenotypes/__init__.py
new file mode 100644
index 0000000..73ad839
--- /dev/null
+++ b/scripts/phenotypes/__init__.py
@@ -0,0 +1 @@
+"Scripts for dealing with phenotypes."
diff --git a/scripts/phenotypes/delete_phenotypes.py b/scripts/phenotypes/delete_phenotypes.py
new file mode 100644
index 0000000..461f3ec
--- /dev/null
+++ b/scripts/phenotypes/delete_phenotypes.py
@@ -0,0 +1,173 @@
+"""Delete phenotypes."""
+import sys
+import logging
+from pathlib import Path
+from typing import Optional
+from urllib.parse import urljoin
+from argparse import Namespace, ArgumentParser
+
+import requests
+from MySQLdb.cursors import DictCursor, BaseCursor
+
+from gn_libs.mysqldb import database_connection
+
+from uploader.phenotypes.models import delete_phenotypes
+from scripts.cli.logging import setup_logging
+from scripts.cli.options import (add_logging,
+                                 add_mariadb_uri,
+                                 add_population_id)
+
+logger = logging.getLogger(__name__)
+
+def read_xref_ids_file(filepath: Optional[Path]) -> tuple[int, ...]:
+    """Read the phenotypes' cross-reference IDS from file."""
+    if filepath is None:
+        return tuple()
+
+    logger.debug("Using file '%s' to retrieve XREF IDs for deletion.",
+                 filepath.name)
+    _ids: tuple[int, ...] = tuple()
+    with filepath.open(mode="r") as infile:
+        for line in infile.readlines():
+            try:
+                _ids += (int(line.strip()),)
+            except TypeError:
+                pass
+
+    return _ids
+
+
+def fetch_all_xref_ids(
+        cursor: BaseCursor, population_id: int) -> tuple[int, ...]:
+    """Fetch all cross-reference IDs."""
+    cursor.execute("SELECT Id FROM PublishXRef WHERE InbredSetId=%s",
+                   (population_id,))
+    return tuple(int(row["Id"]) for row in cursor.fetchall())
+
+
+def update_auth(
+        auth_details: tuple[str, str],
+        species_id: int,
+        population_id: int,
+        dataset_id: int,
+        xref_ids: tuple[int, ...] = tuple()
+):
+    """Update the authorisation server: remove items to delete."""
+    authserver, token = auth_details
+    resp = requests.post(
+        urljoin(authserver,
+                (f"/auth/data/phenotypes/{species_id}/{population_id}"
+                 f"/{dataset_id}/delete")),
+        timeout=(9.13, 20),
+        headers={
+            "Authorization": f"Bearer {token}",
+            "Content-Type": "application/json"
+        },
+        json={"xref_ids": xref_ids})
+    resp.raise_for_status()
+
+
+def delete_the_phenotypes(
+        cursor: BaseCursor,
+        population_id: int,
+        xref_ids: tuple[int, ...] = tuple()) -> int:
+    """Process and delete the phenotypes."""
+    delete_phenotypes(cursor, population_id, xref_ids)
+
+    return 0
+
+if __name__ == "__main__":
+    def parse_args() -> Namespace:
+        """Parse CLI arguments."""
+        parser = add_logging(
+            add_population_id(
+                add_mariadb_uri(
+                    ArgumentParser(
+                        prog="delete-phenotypes",
+                        description=(
+                            "Script to delete phenotypes from the database.")))))
+        parser.add_argument(
+            "dataset_id",
+            metavar="DATASET-ID",
+            type=int,
+            help="The dataset identifier for phenotypes to delete.")
+        parser.add_argument(
+            "auth_server_uri",
+            metavar="AUTH-SERVER-URI",
+            type=str,
+            help="URI to the authorisation server.")
+        parser.add_argument(
+            "auth_token",
+            metavar="AUTH-TOKEN",
+            type=str,
+            help=("Token to use to update the authorisation system with the "
+                  "deletions done."))
+        parser.add_argument(
+            "--xref_ids_file",
+            metavar="XREF-IDS-FILE",
+            type=Path,
+            help=("Path to a file with phenotypes cross-reference IDs to "
+                  "delete."))
+        parser.add_argument(
+            "--delete-all",
+            action="store_true",
+            help=("If no 'XREF-IDS-FILE' is provided, this flag determines "
+                  "whether or not all the phenotypes for the given population "
+                  "will be deleted."))
+        return parser.parse_args()
+
+
+    def main():
+        """The `delete-phenotypes` script's entry point."""
+        args = parse_args()
+        setup_logging(logger, args.log_level.upper(), tuple())
+        with (database_connection(args.db_uri) as conn,
+              conn.cursor(cursorclass=DictCursor) as cursor):
+            xref_ids = read_xref_ids_file(args.xref_ids_file)
+            try:
+                assert not (len(xref_ids) > 0 and args.delete_all)
+                xref_ids = (fetch_all_xref_ids(cursor, args.population_id)
+                            if args.delete_all else xref_ids)
+                logger.debug("Will delete %s phenotypes and related data",
+                             len(xref_ids))
+                if len(xref_ids) == 0:
+                    print("No cross-reference IDs were provided. Aborting.")
+                    return 0
+
+                print("Updating authorisations: ", end="")
+                update_auth((args.auth_server_uri, args.auth_token),
+                            args.species_id,
+                            args.population_id,
+                            args.dataset_id,
+                            xref_ids)
+                print("OK.")
+                print("Deleting the data: ", end="")
+                delete_phenotypes(cursor, args.population_id, xref_ids=xref_ids)
+                print("OK.")
+                if args.xref_ids_file is not None:
+                    print("Deleting temporary file: ", end="")
+                    args.xref_ids_file.unlink()
+                    print("OK.")
+
+                return 0
+            except AssertionError:
+                logger.error(
+                    "'DELETE-ALL' and 'XREF-IDS' are mutually exclusive. "
+                    "If you specify the list of XREF-IDS (in a file) to delete "
+                    "and also specify to 'DELETE-ALL' phenotypes in the "
+                    "population, we have no way of knowing what it is you want.")
+                return 1
+            except requests.exceptions.HTTPError as _exc:
+                resp = _exc.response
+                resp_data = resp.json()
+                logger.debug("%s: %s",
+                             resp_data["error"],
+                             resp_data["error_description"],
+                             exc_info=True)
+                return 1
+            except Exception as _exc:# pylint: disable=[broad-exception-caught]
+                logger.debug("Failed while attempting to delete phenotypes.",
+                             exc_info=True)
+                return 1
+
+    sys.exit(main())
diff --git a/scripts/qc_on_rqtl2_bundle.py b/scripts/qc_on_rqtl2_bundle.py
index 0207938..4e6ef00 100644
--- a/scripts/qc_on_rqtl2_bundle.py
+++ b/scripts/qc_on_rqtl2_bundle.py
@@ -40,7 +40,7 @@ def add_to_errors(rconn: Redis,
     """Add `errors` to a given list of errors"""
     errs = tuple(dict(item) for item in set(
         [dict2tuple(old) for old in
-         json.loads(rconn.hget(fqjobid, key) or "[]")] +
+         json.loads(rconn.hget(fqjobid, key) or "[]")] +# type: ignore[arg-type]
         [dict2tuple({"type": type(error).__name__, **error._asdict()})
          for error in errors]))
     rconn.hset(fqjobid, key, json.dumps(errs))
@@ -83,7 +83,8 @@ def retrieve_errors_with_progress(rconn: Redis,#pylint: disable=[too-many-locals
     count = 0
     checked = 0
     cdata = rqtl2.control_data(zfile)
-    rconn.hset(fqjobid, f"{filetype}-filesize", compute_filesize(zfile, filetype))
+    rconn.hset(
+        fqjobid, f"{filetype}-filesize", str(compute_filesize(zfile, filetype)))
     def __update_processed__(value):
         nonlocal checked
         checked = checked + len(value)
@@ -104,7 +105,7 @@ def retrieve_errors_with_progress(rconn: Redis,#pylint: disable=[too-many-locals
                             yield error
                         __update_processed__(value)
 
-        rconn.hset(fqjobid, f"{filetype}-linecount", count)
+        rconn.hset(fqjobid, f"{filetype}-linecount", count)# type: ignore[arg-type]
     except rqe.MissingFileException:
         fname = cdata.get(filetype)
         yield rqfe.MissingFile(filetype, fname, (
@@ -295,7 +296,7 @@ def run_qc(rconn: Redis,
         return 1
 
     def __fetch_errors__(rkey: str) -> tuple:
-        return tuple(json.loads(rconn.hget(fqjobid, rkey) or "[]"))
+        return tuple(json.loads(rconn.hget(fqjobid, rkey) or "[]")) # type: ignore[arg-type]
 
     return (1 if any((
         bool(__fetch_errors__(key))
diff --git a/scripts/rqtl2/entry.py b/scripts/rqtl2/entry.py
index e0e00e7..7423a4b 100644
--- a/scripts/rqtl2/entry.py
+++ b/scripts/rqtl2/entry.py
@@ -26,7 +26,7 @@ def build_main(
     def main():
         with (Redis.from_url(args.redisuri, decode_responses=True) as rconn,
               database_connection(args.databaseuri) as dbconn):
-            logger.setLevel(args.loglevel.upper())
+            logger.setLevel(args.log_level.upper())
             fqjobid = jobs.job_key(args.redisprefix, args.jobid)
 
             try:
diff --git a/scripts/rqtl2/install_genotypes.py b/scripts/rqtl2/install_genotypes.py
index 8762655..5e6abb0 100644
--- a/scripts/rqtl2/install_genotypes.py
+++ b/scripts/rqtl2/install_genotypes.py
@@ -20,7 +20,7 @@ from scripts.rqtl2.entry import build_main
 from scripts.rqtl2.cli_parser import add_common_arguments
 from scripts.cli_parser import init_cli_parser, add_global_data_arguments
 
-__MODULE__ = "scripts.rqtl2.install_genotypes"
+logger = getLogger(__name__)
 
 def insert_markers(
         dbconn: mdb.Connection,
@@ -191,7 +191,7 @@ def install_genotypes(#pylint: disable=[too-many-locals]
         dbconn: mdb.Connection,
         fullyqualifiedjobid: str,#pylint: disable=[unused-argument]
         args: argparse.Namespace,
-        logger: Logger = getLogger(__name__)
+        logger: Logger = logger # pylint: disable=[redefined-outer-name]
 ) -> int:
     """Load any existing genotypes into the database."""
     (speciesid, populationid, datasetid, rqtl2bundle) = (
@@ -257,5 +257,5 @@ if __name__ == "__main__":
 
         return parser.parse_args()
 
-    main = build_main(cli_args(), install_genotypes, __MODULE__)
+    main = build_main(cli_args(), install_genotypes, logger)
     sys.exit(main())
diff --git a/scripts/rqtl2/install_phenos.py b/scripts/rqtl2/install_phenos.py
index 9059cd6..11ac8a4 100644
--- a/scripts/rqtl2/install_phenos.py
+++ b/scripts/rqtl2/install_phenos.py
@@ -19,7 +19,7 @@ from r_qtl import r_qtl2_qc as rqc
 
 from functional_tools import take
 
-__MODULE__ = "scripts.rqtl2.install_phenos"
+logger = getLogger(__name__)
 
 def insert_probesets(dbconn: mdb.Connection,
                      platformid: int,
@@ -101,7 +101,8 @@ def install_pheno_files(#pylint: disable=[too-many-locals]
         dbconn: mdb.Connection,
         fullyqualifiedjobid: str,#pylint: disable=[unused-argument]
         args: argparse.Namespace,
-        logger: Logger = getLogger()) -> int:
+        logger: Logger = logger # pylint: disable=[redefined-outer-name]
+) -> int:
     """Load data in `pheno` files and other related files into the database."""
     (speciesid, platformid, datasetid, rqtl2bundle) = (
         args.speciesid, args.platformid, args.datasetid, args.rqtl2bundle)
@@ -159,5 +160,5 @@ if __name__ == "__main__":
 
         return parser.parse_args()
 
-    main = build_main(cli_args(), install_pheno_files, __MODULE__)
+    main = build_main(cli_args(), install_pheno_files, logger)
     sys.exit(main())
diff --git a/scripts/rqtl2/phenotypes_qc.py b/scripts/rqtl2/phenotypes_qc.py
index 9f11f57..72d6c83 100644
--- a/scripts/rqtl2/phenotypes_qc.py
+++ b/scripts/rqtl2/phenotypes_qc.py
@@ -376,7 +376,8 @@ def run_qc(# pylint: disable=[too-many-locals]
         rconn: Redis,
         dbconn: mdb.Connection,
         fullyqualifiedjobid: str,
-        args: Namespace
+        args: Namespace,
+        logger: Logger = logger # pylint: disable=[redefined-outer-name]
 ) -> int:
     """Run quality control checks on the bundle."""
     print("Beginning the quality assurance checks.")
diff --git a/scripts/run_qtlreaper.py b/scripts/run_qtlreaper.py
index ab58203..54e5d45 100644
--- a/scripts/run_qtlreaper.py
+++ b/scripts/run_qtlreaper.py
@@ -4,11 +4,10 @@ import csv
 import time
 import secrets
 import logging
-import traceback
 import subprocess
 from pathlib import Path
-from typing import Union
 from functools import reduce
+from typing import Union, Iterator
 from argparse import Namespace, ArgumentParser
 
 from gn_libs import mysqldb
@@ -57,7 +56,7 @@ def reconcile_samples(
 def generate_qtlreaper_traits_file(
         outdir: Path,
         samples: tuple[str, ...],
-        traits_data: dict[str, Union[int, float]],
+        traits_data: tuple[dict[str, Union[int, float]], ...],
         filename_prefix: str = ""
 ) -> Path:
     """Generate a file for use with qtlreaper that contains the traits' data."""
@@ -66,7 +65,7 @@ def generate_qtlreaper_traits_file(
     _dialect.quoting=0
 
     _traitsfile = outdir.joinpath(
-        f"{filename_prefix}_{secrets.token_urlsafe(15)}.tsv")
+        f"{filename_prefix}_{secrets.token_urlsafe(15)}.tsv")#type: ignore[attr-defined]
     with _traitsfile.open(mode="w", encoding="utf-8") as outptr:
         writer = csv.DictWriter(
             outptr, fieldnames=("Trait",) + samples, dialect=_dialect)
@@ -80,14 +79,13 @@ def generate_qtlreaper_traits_file(
     return _traitsfile
 
 
-def parse_tsv_file(results_file: Path) -> list[dict]:
+def parse_tsv_file(results_file: Path) -> Iterator[dict]:
     """Parse the rust-qtlreaper output into usable python objects."""
     with results_file.open("r", encoding="utf-8") as readptr:
         _dialect = csv.unix_dialect()
         _dialect.delimiter = "\t"
         reader = csv.DictReader(readptr, dialect=_dialect)
-        for row in reader:
-            yield row
+        yield from reader
 
 
 def __qtls_by_trait__(qtls, current):
@@ -98,7 +96,8 @@ def __qtls_by_trait__(qtls, current):
     }
 
 
-def save_qtl_values_to_db(conn, qtls: dict):
+def save_qtl_values_to_db(conn, qtls: tuple[dict, ...]):
+    """Save computed QTLs to the database."""
     with conn.cursor() as cursor:
         cursor.executemany(
             "UPDATE PublishXRef SET "
@@ -132,11 +131,11 @@ def dispatch(args: Namespace) -> int:
                              ", ".join(_samples_not_in_genofile))
 
             # Fetch traits data: provided list, or all traits in db
-            _traitsdata = phenotypes_vector_data(
+            _traitsdata = tuple(phenotypes_vector_data(
                 conn,
                 args.species_id,
                 args.population_id,
-                xref_ids=tuple(args.xref_ids)).values()
+                xref_ids=tuple(args.xref_ids)).values())
             logger.debug("Successfully got traits data. Generating the QTLReaper's traits file…")
             _traitsfile = generate_qtlreaper_traits_file(
                 args.working_dir,
@@ -146,7 +145,7 @@ def dispatch(args: Namespace) -> int:
             logger.debug("QTLReaper's Traits file: %s", _traitsfile)
 
             _qtlreaper_main_output = args.working_dir.joinpath(
-                f"main-output-{secrets.token_urlsafe(15)}.tsv")
+                f"main-output-{secrets.token_urlsafe(15)}.tsv")#type: ignore[attr-defined]
             logger.debug("Main output filename: %s", _qtlreaper_main_output)
             with subprocess.Popen(
                     ("qtlreaper",
@@ -157,26 +156,27 @@ def dispatch(args: Namespace) -> int:
                 while _qtlreaper.poll() is None:
                     logger.debug("QTLReaper process running…")
                     time.sleep(1)
-                    results = tuple(max(qtls, key=lambda qtl: qtl["LRS"])
-                                    for qtls in
-                                    reduce(__qtls_by_trait__,
-                                           parse_tsv_file(_qtlreaper_main_output),
-                                           {}).values())
+                    results = tuple(#type: ignore[var-annotated]
+                        max(qtls, key=lambda qtl: qtl["LRS"])
+                        for qtls in
+                        reduce(__qtls_by_trait__,
+                               parse_tsv_file(_qtlreaper_main_output),
+                               {}).values())
             save_qtl_values_to_db(conn, results)
             logger.debug("Cleaning up temporary files.")
             _traitsfile.unlink()
             _qtlreaper_main_output.unlink()
             logger.info("Successfully computed p values for %s traits.", len(_traitsdata))
-            exitcode = 0
+            return 0
         except FileNotFoundError as fnf:
-            logger.error(", ".join(fnf.args), exc_info=False)
+            logger.error(", ".join(str(arg) for arg in fnf.args), exc_info=False)
         except AssertionError as aserr:
             logger.error(", ".join(aserr.args), exc_info=False)
-        except Exception as _exc:
+        except Exception as _exc:# pylint: disable=[broad-exception-caught]
             logger.debug("Type of exception: %s", type(_exc))
             logger.error("General exception!", exc_info=True)
-        finally:
-            return exitcode
+
+        return exitcode
 
 
 if __name__ == "__main__":
@@ -205,7 +205,7 @@ if __name__ == "__main__":
                   "in the population."))
         args = parser.parse_args()
         setup_logging(logger, args.log_level)
-        
+
         return dispatch(args)
 
     sys.exit(main())