about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/cli/options.py10
-rw-r--r--scripts/insert_samples.py16
-rw-r--r--scripts/load_phenotypes_to_db.py52
-rw-r--r--scripts/phenotypes/__init__.py1
-rw-r--r--scripts/phenotypes/delete_phenotypes.py173
-rw-r--r--scripts/rqtl2/phenotypes_qc.py7
-rw-r--r--scripts/run_qtlreaper.py49
7 files changed, 266 insertions, 42 deletions
diff --git a/scripts/cli/options.py b/scripts/cli/options.py
index 70d2a27..58d3df4 100644
--- a/scripts/cli/options.py
+++ b/scripts/cli/options.py
@@ -44,3 +44,13 @@ def add_population_id(parser: ArgumentParser) -> ArgumentParser:
                         type=int,
                         help="The ID for the population to operate on.")
     return parser
+
+
+def add_dataset_id(parser: ArgumentParser) -> ArgumentParser:
+    """Add dataset-id as a mandatory argument."""
+    parser = add_population_id(parser)
+    parser.add_argument("dataset_id",
+                        metavar="DATASET-ID",
+                        type=int,
+                        help="The ID for the dataset to operate on.")
+    return parser
diff --git a/scripts/insert_samples.py b/scripts/insert_samples.py
index fc029f9..96ae8e2 100644
--- a/scripts/insert_samples.py
+++ b/scripts/insert_samples.py
@@ -6,10 +6,10 @@ import argparse
 import traceback
 
 import MySQLdb as mdb
-from redis import Redis
+
 from gn_libs.mysqldb import database_connection
 
-from uploader.check_connections import check_db, check_redis
+from uploader.check_connections import check_db
 from uploader.species.models import species_by_id
 from uploader.population.models import population_by_id
 from uploader.samples.models import (
@@ -35,7 +35,6 @@ class SeparatorAction(argparse.Action):
         setattr(namespace, self.dest, (chr(9) if values == "\\t" else values))
 
 def insert_samples(conn: mdb.Connection,# pylint: disable=[too-many-arguments, too-many-positional-arguments]
-                   rconn: Redis,# pylint: disable=[unused-argument]
                    speciesid: int,
                    populationid: int,
                    samplesfile: pathlib.Path,
@@ -119,11 +118,6 @@ if __name__ == "__main__":
             help=("The character used to delimit (surround?) the value in "
                   "each column."))
 
-        # == Script-specific extras ==
-        parser.add_argument("--redisuri",
-                            help="URL to initialise connection to redis",
-                            default="redis:///")
-
         args = parser.parse_args()
         return args
 
@@ -132,17 +126,13 @@ if __name__ == "__main__":
         status_code = 1 # Exit with an Exception
         args = cli_args()
         check_db(args.databaseuri)
-        check_redis(args.redisuri)
         if not args.samplesfile.exists():
             logging.error("File not found: '%s'.", args.samplesfile)
             return 2
 
-        with (Redis.from_url(args.redisuri, decode_responses=True) as rconn,
-              database_connection(args.databaseuri) as dbconn):
-
+        with database_connection(args.databaseuri) as dbconn:
             try:
                 status_code = insert_samples(dbconn,
-                                             rconn,
                                              args.speciesid,
                                              args.populationid,
                                              args.samplesfile,
diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py
index e303bb3..31eb715 100644
--- a/scripts/load_phenotypes_to_db.py
+++ b/scripts/load_phenotypes_to_db.py
@@ -5,9 +5,9 @@ import json
 import time
 import logging
 import argparse
-import datetime
 from pathlib import Path
 from zipfile import ZipFile
+from datetime import datetime
 from typing import Any, Iterable
 from urllib.parse import urljoin
 from functools import reduce, partial
@@ -198,13 +198,16 @@ save_phenotypes_n = partial(save_numeric_data,
 
 
 def update_auth(# pylint: disable=[too-many-locals,too-many-positional-arguments,too-many-arguments]
-        authserver,
-        token,
+        auth_details,
+        resource_details,
         species,
         population,
         dataset,
         xrefdata):
     """Grant the user access to their data."""
+    logger.info("Updating authorisation for the data.")
+    logger.debug("Resource details for the authorisation: %s", resource_details)
+    authserver, token = auth_details
     _tries = 0
     _delay = 1
     headers = {
@@ -215,14 +218,14 @@ def update_auth(# pylint: disable=[too-many-locals,too-many-positional-arguments
         return urljoin(authserver, endpoint)
 
     def __fetch_user_details__():
-        logger.debug("… Fetching user details")
+        logger.info("… Fetching user details")
         return mrequests.get(
             authserveruri("/auth/user/"),
             headers=headers
         )
 
     def __link_data__(user):
-        logger.debug("… linking uploaded data to user's group")
+        logger.info("… linking uploaded data to user's group")
         return mrequests.post(
             authserveruri("/auth/data/link/phenotype"),
             headers=headers,
@@ -245,7 +248,7 @@ def update_auth(# pylint: disable=[too-many-locals,too-many-positional-arguments
             }).then(lambda ld_results: (user, ld_results))
 
     def __fetch_phenotype_category_details__(user, linkeddata):
-        logger.debug("… fetching phenotype category details")
+        logger.info("… fetching phenotype category details")
         return mrequests.get(
             authserveruri("/auth/resource/categories"),
             headers=headers
@@ -258,20 +261,18 @@ def update_auth(# pylint: disable=[too-many-locals,too-many-positional-arguments
         )
 
     def __create_resource__(user, linkeddata, category):
-        logger.debug("… creating authorisation resource object")
-        now = datetime.datetime.now().isoformat()
+        logger.info("… creating authorisation resource object")
         return mrequests.post(
             authserveruri("/auth/resource/create"),
             headers=headers,
             json={
+                **resource_details,
                 "resource_category": category["resource_category_id"],
-                "resource_name": (f"{user['email']}—{dataset['Name']}—{now}—"
-                                  f"{len(xrefdata)} phenotypes"),
                 "public": "off"
             }).then(lambda cr_results: (user, linkeddata, cr_results))
 
     def __attach_data_to_resource__(user, linkeddata, resource):
-        logger.debug("… attaching data to authorisation resource object")
+        logger.info("… attaching data to authorisation resource object")
         return mrequests.post(
             authserveruri("/auth/resource/data/link"),
             headers=headers,
@@ -288,8 +289,8 @@ def update_auth(# pylint: disable=[too-many-locals,too-many-positional-arguments
             # This is hacky. If the auth already exists, something went wrong
             # somewhere.
             # This needs investigation to recover correctly.
-            logger.info(
-                "The authorisation for the data was already set up.")
+            logger.error(
+                "Error: The authorisation for the data was already set up.")
             return 0
         logger.error("ERROR: Updating the authorisation for the data failed.")
         logger.debug(
@@ -461,6 +462,25 @@ if __name__ == "__main__":
         logging.getLogger("uploader.phenotypes.models").setLevel(log_level)
 
 
+    def __parse_resource_details__(meta) -> dict:
+        """Parse out details regarding the wrapper resource from the metadata."""
+        _key_mappings_ = {
+            # allow both 'data_*' and 'data*' for the metadata.
+            "data_description": "description",
+            "datadescription": "description"
+        }
+        return {
+            "resource_name": meta.get(
+                "dataname",
+                meta.get("data_name",
+                         "Unnamed phenotypes - " + datetime.now().isoformat())),
+            "resource_metadata": {
+                rkey: meta[mkey]
+                for mkey, rkey in _key_mappings_.items() if mkey in meta
+            }
+        }
+
+
     def main():
         """Entry-point for this script."""
         args = parse_args()
@@ -516,8 +536,10 @@ if __name__ == "__main__":
         # Update authorisations (break this down) — maybe loop until it works?
         logger.info("Updating authorisation.")
         _job_metadata = job["metadata"]
-        return update_auth(_job_metadata["authserver"],
-                           _job_metadata["token"],
+
+        return update_auth((_job_metadata["authserver"],
+                            _job_metadata["token"]),
+                           __parse_resource_details__(_job_metadata),
                            *db_results)
 
 
diff --git a/scripts/phenotypes/__init__.py b/scripts/phenotypes/__init__.py
new file mode 100644
index 0000000..73ad839
--- /dev/null
+++ b/scripts/phenotypes/__init__.py
@@ -0,0 +1 @@
+"Scripts for dealing with phenotypes."
diff --git a/scripts/phenotypes/delete_phenotypes.py b/scripts/phenotypes/delete_phenotypes.py
new file mode 100644
index 0000000..461f3ec
--- /dev/null
+++ b/scripts/phenotypes/delete_phenotypes.py
@@ -0,0 +1,173 @@
+"""Delete phenotypes."""
+import sys
+import logging
+from pathlib import Path
+from typing import Optional
+from urllib.parse import urljoin
+from argparse import Namespace, ArgumentParser
+
+import requests
+from MySQLdb.cursors import DictCursor, BaseCursor
+
+from gn_libs.mysqldb import database_connection
+
+from uploader.phenotypes.models import delete_phenotypes
+from scripts.cli.logging import setup_logging
+from scripts.cli.options import (add_logging,
+                                 add_mariadb_uri,
+                                 add_population_id)
+
+logger = logging.getLogger(__name__)
+
+def read_xref_ids_file(filepath: Optional[Path]) -> tuple[int, ...]:
+    """Read the phenotypes' cross-reference IDS from file."""
+    if filepath is None:
+        return tuple()
+
+    logger.debug("Using file '%s' to retrieve XREF IDs for deletion.",
+                 filepath.name)
+    _ids: tuple[int, ...] = tuple()
+    with filepath.open(mode="r") as infile:
+        for line in infile.readlines():
+            try:
+                _ids += (int(line.strip()),)
+            except TypeError:
+                pass
+
+    return _ids
+
+
+def fetch_all_xref_ids(
+        cursor: BaseCursor, population_id: int) -> tuple[int, ...]:
+    """Fetch all cross-reference IDs."""
+    cursor.execute("SELECT Id FROM PublishXRef WHERE InbredSetId=%s",
+                   (population_id,))
+    return tuple(int(row["Id"]) for row in cursor.fetchall())
+
+
+def update_auth(
+        auth_details: tuple[str, str],
+        species_id: int,
+        population_id: int,
+        dataset_id: int,
+        xref_ids: tuple[int, ...] = tuple()
+):
+    """Update the authorisation server: remove items to delete."""
+    authserver, token = auth_details
+    resp = requests.post(
+        urljoin(authserver,
+                (f"/auth/data/phenotypes/{species_id}/{population_id}"
+                 f"/{dataset_id}/delete")),
+        timeout=(9.13, 20),
+        headers={
+            "Authorization": f"Bearer {token}",
+            "Content-Type": "application/json"
+        },
+        json={"xref_ids": xref_ids})
+    resp.raise_for_status()
+
+
+def delete_the_phenotypes(
+        cursor: BaseCursor,
+        population_id: int,
+        xref_ids: tuple[int, ...] = tuple()) -> int:
+    """Process and delete the phenotypes."""
+    delete_phenotypes(cursor, population_id, xref_ids)
+
+    return 0
+
+if __name__ == "__main__":
+    def parse_args() -> Namespace:
+        """Parse CLI arguments."""
+        parser = add_logging(
+            add_population_id(
+                add_mariadb_uri(
+                    ArgumentParser(
+                        prog="delete-phenotypes",
+                        description=(
+                            "Script to delete phenotypes from the database.")))))
+        parser.add_argument(
+            "dataset_id",
+            metavar="DATASET-ID",
+            type=int,
+            help="The dataset identifier for phenotypes to delete.")
+        parser.add_argument(
+            "auth_server_uri",
+            metavar="AUTH-SERVER-URI",
+            type=str,
+            help="URI to the authorisation server.")
+        parser.add_argument(
+            "auth_token",
+            metavar="AUTH-TOKEN",
+            type=str,
+            help=("Token to use to update the authorisation system with the "
+                  "deletions done."))
+        parser.add_argument(
+            "--xref_ids_file",
+            metavar="XREF-IDS-FILE",
+            type=Path,
+            help=("Path to a file with phenotypes cross-reference IDs to "
+                  "delete."))
+        parser.add_argument(
+            "--delete-all",
+            action="store_true",
+            help=("If no 'XREF-IDS-FILE' is provided, this flag determines "
+                  "whether or not all the phenotypes for the given population "
+                  "will be deleted."))
+        return parser.parse_args()
+
+
+    def main():
+        """The `delete-phenotypes` script's entry point."""
+        args = parse_args()
+        setup_logging(logger, args.log_level.upper(), tuple())
+        with (database_connection(args.db_uri) as conn,
+              conn.cursor(cursorclass=DictCursor) as cursor):
+            xref_ids = read_xref_ids_file(args.xref_ids_file)
+            try:
+                assert not (len(xref_ids) > 0 and args.delete_all)
+                xref_ids = (fetch_all_xref_ids(cursor, args.population_id)
+                            if args.delete_all else xref_ids)
+                logger.debug("Will delete %s phenotypes and related data",
+                             len(xref_ids))
+                if len(xref_ids) == 0:
+                    print("No cross-reference IDs were provided. Aborting.")
+                    return 0
+
+                print("Updating authorisations: ", end="")
+                update_auth((args.auth_server_uri, args.auth_token),
+                            args.species_id,
+                            args.population_id,
+                            args.dataset_id,
+                            xref_ids)
+                print("OK.")
+                print("Deleting the data: ", end="")
+                delete_phenotypes(cursor, args.population_id, xref_ids=xref_ids)
+                print("OK.")
+                if args.xref_ids_file is not None:
+                    print("Deleting temporary file: ", end="")
+                    args.xref_ids_file.unlink()
+                    print("OK.")
+
+                return 0
+            except AssertionError:
+                logger.error(
+                    "'DELETE-ALL' and 'XREF-IDS' are mutually exclusive. "
+                    "If you specify the list of XREF-IDS (in a file) to delete "
+                    "and also specify to 'DELETE-ALL' phenotypes in the "
+                    "population, we have no way of knowing what it is you want.")
+                return 1
+            except requests.exceptions.HTTPError as _exc:
+                resp = _exc.response
+                resp_data = resp.json()
+                logger.debug("%s: %s",
+                             resp_data["error"],
+                             resp_data["error_description"],
+                             exc_info=True)
+                return 1
+            except Exception as _exc:# pylint: disable=[broad-exception-caught]
+                logger.debug("Failed while attempting to delete phenotypes.",
+                             exc_info=True)
+                return 1
+
+    sys.exit(main())
diff --git a/scripts/rqtl2/phenotypes_qc.py b/scripts/rqtl2/phenotypes_qc.py
index 72d6c83..084c876 100644
--- a/scripts/rqtl2/phenotypes_qc.py
+++ b/scripts/rqtl2/phenotypes_qc.py
@@ -198,7 +198,7 @@ def qc_phenocovar_file(
                     "-",
                     "-",
                     (f"File {filepath.name} is missing the {heading} heading "
-                     "in the header line."))),)
+                     "in the header row/line."))),)
 
         def collect_errors(errors_and_linecount, line):
             _errs, _lc = errors_and_linecount
@@ -312,8 +312,9 @@ def qc_pheno_file(# pylint: disable=[too-many-locals, too-many-arguments, too-ma
                 "header row",
                 "-",
                 ", ".join(_absent),
-                ("The following phenotype names do not exist in any of the "
-                 f"provided phenocovar files: ({', '.join(_absent)})"))),)
+                ("The following trait names/identifiers do not exist in any of "
+                 "the provided descriptions/covariates files: "
+                 f"({', '.join(_absent)})"))),)
 
         def collect_errors(errors_and_linecount, line):
             _errs, _lc = errors_and_linecount
diff --git a/scripts/run_qtlreaper.py b/scripts/run_qtlreaper.py
index 7d58402..2269ea6 100644
--- a/scripts/run_qtlreaper.py
+++ b/scripts/run_qtlreaper.py
@@ -1,10 +1,12 @@
 """Script to run rust-qtlreaper and update database with results."""
+import os
 import sys
 import csv
 import time
 import secrets
 import logging
 import subprocess
+import multiprocessing
 from pathlib import Path
 from functools import reduce
 from typing import Union, Iterator
@@ -146,30 +148,55 @@ def dispatch(args: Namespace) -> int:
 
             _qtlreaper_main_output = args.working_dir.joinpath(
                 f"main-output-{secrets.token_urlsafe(15)}.tsv")#type: ignore[attr-defined]
+            _qtlreaper_permu_output = args.working_dir.joinpath(
+                f"permu-output-{secrets.token_urlsafe(15)}.tsv")
             logger.debug("Main output filename: %s", _qtlreaper_main_output)
             with subprocess.Popen(
                     ("qtlreaper",
                      "--n_permutations", "1000",
                      "--geno", _genofile,
                      "--traits", _traitsfile,
-                     "--main_output", _qtlreaper_main_output)) as _qtlreaper:
+                     "--main_output", _qtlreaper_main_output,
+                     "--permu_output", _qtlreaper_permu_output,
+                     "--threads", str(int(1+(multiprocessing.cpu_count()/2)))),
+                    env=({**os.environ, "RUST_BACKTRACE": "full"}
+                         if logger.getEffectiveLevel() == logging.DEBUG
+                         else dict(os.environ))) as _qtlreaper:
                 while _qtlreaper.poll() is None:
                     logger.debug("QTLReaper process running…")
                     time.sleep(1)
-                    results = tuple(#type: ignore[var-annotated]
-                        max(qtls, key=lambda qtl: qtl["LRS"])
-                        for qtls in
-                        reduce(__qtls_by_trait__,
-                               parse_tsv_file(_qtlreaper_main_output),
-                               {}).values())
-            save_qtl_values_to_db(conn, results)
+                    results = (
+                        tuple(#type: ignore[var-annotated]
+                            max(qtls, key=lambda qtl: qtl["LRS"])
+                            for qtls in
+                            reduce(__qtls_by_trait__,
+                                   parse_tsv_file(_qtlreaper_main_output),
+                                   {}).values())
+                        if _qtlreaper_main_output.exists()
+                        else tuple())
             logger.debug("Cleaning up temporary files.")
-            _traitsfile.unlink()
-            _qtlreaper_main_output.unlink()
+
+            # short-circuits to delete file if exists
+            if _traitsfile.exists():
+                _traitsfile.unlink()
+                logger.info("Deleted generated traits' file for QTLReaper.")
+
+            if _qtlreaper_main_output.exists():
+                _qtlreaper_main_output.unlink()
+                logger.info("Deleted QTLReaper's main output file.")
+
+            if _qtlreaper_permu_output.exists():
+                _qtlreaper_permu_output.unlink()
+                logger.info("Deleted QTLReaper's permutations file.")
+
+            if _qtlreaper.returncode != 0:
+                return _qtlreaper.returncode
+
+            save_qtl_values_to_db(conn, results)
             logger.info("Successfully computed p values for %s traits.", len(_traitsdata))
             return 0
         except FileNotFoundError as fnf:
-            logger.error(", ".join(fnf.args), exc_info=False)
+            logger.error(", ".join(str(arg) for arg in fnf.args), exc_info=False)
         except AssertionError as aserr:
             logger.error(", ".join(aserr.args), exc_info=False)
         except Exception as _exc:# pylint: disable=[broad-exception-caught]