diff options
Diffstat (limited to 'scripts')
| -rw-r--r-- | scripts/cli/options.py | 10 | ||||
| -rw-r--r-- | scripts/load_phenotypes_to_db.py | 52 | ||||
| -rw-r--r-- | scripts/phenotypes/delete_phenotypes.py | 30 | ||||
| -rw-r--r-- | scripts/rqtl2/phenotypes_qc.py | 7 | ||||
| -rw-r--r-- | scripts/run_qtlreaper.py | 49 |
5 files changed, 115 insertions, 33 deletions
diff --git a/scripts/cli/options.py b/scripts/cli/options.py index 70d2a27..58d3df4 100644 --- a/scripts/cli/options.py +++ b/scripts/cli/options.py @@ -44,3 +44,13 @@ def add_population_id(parser: ArgumentParser) -> ArgumentParser: type=int, help="The ID for the population to operate on.") return parser + + +def add_dataset_id(parser: ArgumentParser) -> ArgumentParser: + """Add dataset-id as a mandatory argument.""" + parser = add_population_id(parser) + parser.add_argument("dataset_id", + metavar="DATASET-ID", + type=int, + help="The ID for the dataset to operate on.") + return parser diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py index e303bb3..31eb715 100644 --- a/scripts/load_phenotypes_to_db.py +++ b/scripts/load_phenotypes_to_db.py @@ -5,9 +5,9 @@ import json import time import logging import argparse -import datetime from pathlib import Path from zipfile import ZipFile +from datetime import datetime from typing import Any, Iterable from urllib.parse import urljoin from functools import reduce, partial @@ -198,13 +198,16 @@ save_phenotypes_n = partial(save_numeric_data, def update_auth(# pylint: disable=[too-many-locals,too-many-positional-arguments,too-many-arguments] - authserver, - token, + auth_details, + resource_details, species, population, dataset, xrefdata): """Grant the user access to their data.""" + logger.info("Updating authorisation for the data.") + logger.debug("Resource details for the authorisation: %s", resource_details) + authserver, token = auth_details _tries = 0 _delay = 1 headers = { @@ -215,14 +218,14 @@ def update_auth(# pylint: disable=[too-many-locals,too-many-positional-arguments return urljoin(authserver, endpoint) def __fetch_user_details__(): - logger.debug("… Fetching user details") + logger.info("… Fetching user details") return mrequests.get( authserveruri("/auth/user/"), headers=headers ) def __link_data__(user): - logger.debug("… linking uploaded data to user's group") + logger.info("… linking uploaded data to user's group") return mrequests.post( authserveruri("/auth/data/link/phenotype"), headers=headers, @@ -245,7 +248,7 @@ def update_auth(# pylint: disable=[too-many-locals,too-many-positional-arguments }).then(lambda ld_results: (user, ld_results)) def __fetch_phenotype_category_details__(user, linkeddata): - logger.debug("… fetching phenotype category details") + logger.info("… fetching phenotype category details") return mrequests.get( authserveruri("/auth/resource/categories"), headers=headers @@ -258,20 +261,18 @@ def update_auth(# pylint: disable=[too-many-locals,too-many-positional-arguments ) def __create_resource__(user, linkeddata, category): - logger.debug("… creating authorisation resource object") - now = datetime.datetime.now().isoformat() + logger.info("… creating authorisation resource object") return mrequests.post( authserveruri("/auth/resource/create"), headers=headers, json={ + **resource_details, "resource_category": category["resource_category_id"], - "resource_name": (f"{user['email']}—{dataset['Name']}—{now}—" - f"{len(xrefdata)} phenotypes"), "public": "off" }).then(lambda cr_results: (user, linkeddata, cr_results)) def __attach_data_to_resource__(user, linkeddata, resource): - logger.debug("… attaching data to authorisation resource object") + logger.info("… attaching data to authorisation resource object") return mrequests.post( authserveruri("/auth/resource/data/link"), headers=headers, @@ -288,8 +289,8 @@ def update_auth(# pylint: disable=[too-many-locals,too-many-positional-arguments # This is hacky. If the auth already exists, something went wrong # somewhere. # This needs investigation to recover correctly. - logger.info( - "The authorisation for the data was already set up.") + logger.error( + "Error: The authorisation for the data was already set up.") return 0 logger.error("ERROR: Updating the authorisation for the data failed.") logger.debug( @@ -461,6 +462,25 @@ if __name__ == "__main__": logging.getLogger("uploader.phenotypes.models").setLevel(log_level) + def __parse_resource_details__(meta) -> dict: + """Parse out details regarding the wrapper resource from the metadata.""" + _key_mappings_ = { + # allow both 'data_*' and 'data*' for the metadata. + "data_description": "description", + "datadescription": "description" + } + return { + "resource_name": meta.get( + "dataname", + meta.get("data_name", + "Unnamed phenotypes - " + datetime.now().isoformat())), + "resource_metadata": { + rkey: meta[mkey] + for mkey, rkey in _key_mappings_.items() if mkey in meta + } + } + + def main(): """Entry-point for this script.""" args = parse_args() @@ -516,8 +536,10 @@ if __name__ == "__main__": # Update authorisations (break this down) — maybe loop until it works? logger.info("Updating authorisation.") _job_metadata = job["metadata"] - return update_auth(_job_metadata["authserver"], - _job_metadata["token"], + + return update_auth((_job_metadata["authserver"], + _job_metadata["token"]), + __parse_resource_details__(_job_metadata), *db_results) diff --git a/scripts/phenotypes/delete_phenotypes.py b/scripts/phenotypes/delete_phenotypes.py index 028f061..461f3ec 100644 --- a/scripts/phenotypes/delete_phenotypes.py +++ b/scripts/phenotypes/delete_phenotypes.py @@ -24,12 +24,15 @@ def read_xref_ids_file(filepath: Optional[Path]) -> tuple[int, ...]: if filepath is None: return tuple() + logger.debug("Using file '%s' to retrieve XREF IDs for deletion.", + filepath.name) _ids: tuple[int, ...] = tuple() with filepath.open(mode="r") as infile: - try: - _ids += (int(infile.readline().strip()),) - except TypeError: - pass + for line in infile.readlines(): + try: + _ids += (int(line.strip()),) + except TypeError: + pass return _ids @@ -125,16 +128,27 @@ if __name__ == "__main__": assert not (len(xref_ids) > 0 and args.delete_all) xref_ids = (fetch_all_xref_ids(cursor, args.population_id) if args.delete_all else xref_ids) + logger.debug("Will delete %s phenotypes and related data", + len(xref_ids)) if len(xref_ids) == 0: print("No cross-reference IDs were provided. Aborting.") return 0 + print("Updating authorisations: ", end="") update_auth((args.auth_server_uri, args.auth_token), args.species_id, args.population_id, args.dataset_id, xref_ids) + print("OK.") + print("Deleting the data: ", end="") delete_phenotypes(cursor, args.population_id, xref_ids=xref_ids) + print("OK.") + if args.xref_ids_file is not None: + print("Deleting temporary file: ", end="") + args.xref_ids_file.unlink() + print("OK.") + return 0 except AssertionError: logger.error( @@ -143,6 +157,14 @@ if __name__ == "__main__": "and also specify to 'DELETE-ALL' phenotypes in the " "population, we have no way of knowing what it is you want.") return 1 + except requests.exceptions.HTTPError as _exc: + resp = _exc.response + resp_data = resp.json() + logger.debug("%s: %s", + resp_data["error"], + resp_data["error_description"], + exc_info=True) + return 1 except Exception as _exc:# pylint: disable=[broad-exception-caught] logger.debug("Failed while attempting to delete phenotypes.", exc_info=True) diff --git a/scripts/rqtl2/phenotypes_qc.py b/scripts/rqtl2/phenotypes_qc.py index 72d6c83..084c876 100644 --- a/scripts/rqtl2/phenotypes_qc.py +++ b/scripts/rqtl2/phenotypes_qc.py @@ -198,7 +198,7 @@ def qc_phenocovar_file( "-", "-", (f"File {filepath.name} is missing the {heading} heading " - "in the header line."))),) + "in the header row/line."))),) def collect_errors(errors_and_linecount, line): _errs, _lc = errors_and_linecount @@ -312,8 +312,9 @@ def qc_pheno_file(# pylint: disable=[too-many-locals, too-many-arguments, too-ma "header row", "-", ", ".join(_absent), - ("The following phenotype names do not exist in any of the " - f"provided phenocovar files: ({', '.join(_absent)})"))),) + ("The following trait names/identifiers do not exist in any of " + "the provided descriptions/covariates files: " + f"({', '.join(_absent)})"))),) def collect_errors(errors_and_linecount, line): _errs, _lc = errors_and_linecount diff --git a/scripts/run_qtlreaper.py b/scripts/run_qtlreaper.py index 7d58402..2269ea6 100644 --- a/scripts/run_qtlreaper.py +++ b/scripts/run_qtlreaper.py @@ -1,10 +1,12 @@ """Script to run rust-qtlreaper and update database with results.""" +import os import sys import csv import time import secrets import logging import subprocess +import multiprocessing from pathlib import Path from functools import reduce from typing import Union, Iterator @@ -146,30 +148,55 @@ def dispatch(args: Namespace) -> int: _qtlreaper_main_output = args.working_dir.joinpath( f"main-output-{secrets.token_urlsafe(15)}.tsv")#type: ignore[attr-defined] + _qtlreaper_permu_output = args.working_dir.joinpath( + f"permu-output-{secrets.token_urlsafe(15)}.tsv") logger.debug("Main output filename: %s", _qtlreaper_main_output) with subprocess.Popen( ("qtlreaper", "--n_permutations", "1000", "--geno", _genofile, "--traits", _traitsfile, - "--main_output", _qtlreaper_main_output)) as _qtlreaper: + "--main_output", _qtlreaper_main_output, + "--permu_output", _qtlreaper_permu_output, + "--threads", str(int(1+(multiprocessing.cpu_count()/2)))), + env=({**os.environ, "RUST_BACKTRACE": "full"} + if logger.getEffectiveLevel() == logging.DEBUG + else dict(os.environ))) as _qtlreaper: while _qtlreaper.poll() is None: logger.debug("QTLReaper process running…") time.sleep(1) - results = tuple(#type: ignore[var-annotated] - max(qtls, key=lambda qtl: qtl["LRS"]) - for qtls in - reduce(__qtls_by_trait__, - parse_tsv_file(_qtlreaper_main_output), - {}).values()) - save_qtl_values_to_db(conn, results) + results = ( + tuple(#type: ignore[var-annotated] + max(qtls, key=lambda qtl: qtl["LRS"]) + for qtls in + reduce(__qtls_by_trait__, + parse_tsv_file(_qtlreaper_main_output), + {}).values()) + if _qtlreaper_main_output.exists() + else tuple()) logger.debug("Cleaning up temporary files.") - _traitsfile.unlink() - _qtlreaper_main_output.unlink() + + # short-circuits to delete file if exists + if _traitsfile.exists(): + _traitsfile.unlink() + logger.info("Deleted generated traits' file for QTLReaper.") + + if _qtlreaper_main_output.exists(): + _qtlreaper_main_output.unlink() + logger.info("Deleted QTLReaper's main output file.") + + if _qtlreaper_permu_output.exists(): + _qtlreaper_permu_output.unlink() + logger.info("Deleted QTLReaper's permutations file.") + + if _qtlreaper.returncode != 0: + return _qtlreaper.returncode + + save_qtl_values_to_db(conn, results) logger.info("Successfully computed p values for %s traits.", len(_traitsdata)) return 0 except FileNotFoundError as fnf: - logger.error(", ".join(fnf.args), exc_info=False) + logger.error(", ".join(str(arg) for arg in fnf.args), exc_info=False) except AssertionError as aserr: logger.error(", ".join(aserr.args), exc_info=False) except Exception as _exc:# pylint: disable=[broad-exception-caught] |
