diff options
Diffstat (limited to 'scripts')
| -rw-r--r-- | scripts/cli/options.py | 10 | ||||
| -rw-r--r-- | scripts/load_phenotypes_to_db.py | 52 | ||||
| -rw-r--r-- | scripts/rqtl2/phenotypes_qc.py | 7 | ||||
| -rw-r--r-- | scripts/run_qtlreaper.py | 20 |
4 files changed, 68 insertions, 21 deletions
diff --git a/scripts/cli/options.py b/scripts/cli/options.py index 70d2a27..58d3df4 100644 --- a/scripts/cli/options.py +++ b/scripts/cli/options.py @@ -44,3 +44,13 @@ def add_population_id(parser: ArgumentParser) -> ArgumentParser: type=int, help="The ID for the population to operate on.") return parser + + +def add_dataset_id(parser: ArgumentParser) -> ArgumentParser: + """Add dataset-id as a mandatory argument.""" + parser = add_population_id(parser) + parser.add_argument("dataset_id", + metavar="DATASET-ID", + type=int, + help="The ID for the dataset to operate on.") + return parser diff --git a/scripts/load_phenotypes_to_db.py b/scripts/load_phenotypes_to_db.py index e303bb3..31eb715 100644 --- a/scripts/load_phenotypes_to_db.py +++ b/scripts/load_phenotypes_to_db.py @@ -5,9 +5,9 @@ import json import time import logging import argparse -import datetime from pathlib import Path from zipfile import ZipFile +from datetime import datetime from typing import Any, Iterable from urllib.parse import urljoin from functools import reduce, partial @@ -198,13 +198,16 @@ save_phenotypes_n = partial(save_numeric_data, def update_auth(# pylint: disable=[too-many-locals,too-many-positional-arguments,too-many-arguments] - authserver, - token, + auth_details, + resource_details, species, population, dataset, xrefdata): """Grant the user access to their data.""" + logger.info("Updating authorisation for the data.") + logger.debug("Resource details for the authorisation: %s", resource_details) + authserver, token = auth_details _tries = 0 _delay = 1 headers = { @@ -215,14 +218,14 @@ def update_auth(# pylint: disable=[too-many-locals,too-many-positional-arguments return urljoin(authserver, endpoint) def __fetch_user_details__(): - logger.debug("… Fetching user details") + logger.info("… Fetching user details") return mrequests.get( authserveruri("/auth/user/"), headers=headers ) def __link_data__(user): - logger.debug("… linking uploaded data to user's group") + logger.info("… linking uploaded data to user's group") return mrequests.post( authserveruri("/auth/data/link/phenotype"), headers=headers, @@ -245,7 +248,7 @@ def update_auth(# pylint: disable=[too-many-locals,too-many-positional-arguments }).then(lambda ld_results: (user, ld_results)) def __fetch_phenotype_category_details__(user, linkeddata): - logger.debug("… fetching phenotype category details") + logger.info("… fetching phenotype category details") return mrequests.get( authserveruri("/auth/resource/categories"), headers=headers @@ -258,20 +261,18 @@ def update_auth(# pylint: disable=[too-many-locals,too-many-positional-arguments ) def __create_resource__(user, linkeddata, category): - logger.debug("… creating authorisation resource object") - now = datetime.datetime.now().isoformat() + logger.info("… creating authorisation resource object") return mrequests.post( authserveruri("/auth/resource/create"), headers=headers, json={ + **resource_details, "resource_category": category["resource_category_id"], - "resource_name": (f"{user['email']}—{dataset['Name']}—{now}—" - f"{len(xrefdata)} phenotypes"), "public": "off" }).then(lambda cr_results: (user, linkeddata, cr_results)) def __attach_data_to_resource__(user, linkeddata, resource): - logger.debug("… attaching data to authorisation resource object") + logger.info("… attaching data to authorisation resource object") return mrequests.post( authserveruri("/auth/resource/data/link"), headers=headers, @@ -288,8 +289,8 @@ def update_auth(# pylint: disable=[too-many-locals,too-many-positional-arguments # This is hacky. If the auth already exists, something went wrong # somewhere. # This needs investigation to recover correctly. - logger.info( - "The authorisation for the data was already set up.") + logger.error( + "Error: The authorisation for the data was already set up.") return 0 logger.error("ERROR: Updating the authorisation for the data failed.") logger.debug( @@ -461,6 +462,25 @@ if __name__ == "__main__": logging.getLogger("uploader.phenotypes.models").setLevel(log_level) + def __parse_resource_details__(meta) -> dict: + """Parse out details regarding the wrapper resource from the metadata.""" + _key_mappings_ = { + # allow both 'data_*' and 'data*' for the metadata. + "data_description": "description", + "datadescription": "description" + } + return { + "resource_name": meta.get( + "dataname", + meta.get("data_name", + "Unnamed phenotypes - " + datetime.now().isoformat())), + "resource_metadata": { + rkey: meta[mkey] + for mkey, rkey in _key_mappings_.items() if mkey in meta + } + } + + def main(): """Entry-point for this script.""" args = parse_args() @@ -516,8 +536,10 @@ if __name__ == "__main__": # Update authorisations (break this down) — maybe loop until it works? logger.info("Updating authorisation.") _job_metadata = job["metadata"] - return update_auth(_job_metadata["authserver"], - _job_metadata["token"], + + return update_auth((_job_metadata["authserver"], + _job_metadata["token"]), + __parse_resource_details__(_job_metadata), *db_results) diff --git a/scripts/rqtl2/phenotypes_qc.py b/scripts/rqtl2/phenotypes_qc.py index 72d6c83..084c876 100644 --- a/scripts/rqtl2/phenotypes_qc.py +++ b/scripts/rqtl2/phenotypes_qc.py @@ -198,7 +198,7 @@ def qc_phenocovar_file( "-", "-", (f"File {filepath.name} is missing the {heading} heading " - "in the header line."))),) + "in the header row/line."))),) def collect_errors(errors_and_linecount, line): _errs, _lc = errors_and_linecount @@ -312,8 +312,9 @@ def qc_pheno_file(# pylint: disable=[too-many-locals, too-many-arguments, too-ma "header row", "-", ", ".join(_absent), - ("The following phenotype names do not exist in any of the " - f"provided phenocovar files: ({', '.join(_absent)})"))),) + ("The following trait names/identifiers do not exist in any of " + "the provided descriptions/covariates files: " + f"({', '.join(_absent)})"))),) def collect_errors(errors_and_linecount, line): _errs, _lc = errors_and_linecount diff --git a/scripts/run_qtlreaper.py b/scripts/run_qtlreaper.py index ab19da0..2269ea6 100644 --- a/scripts/run_qtlreaper.py +++ b/scripts/run_qtlreaper.py @@ -6,6 +6,7 @@ import time import secrets import logging import subprocess +import multiprocessing from pathlib import Path from functools import reduce from typing import Union, Iterator @@ -147,13 +148,17 @@ def dispatch(args: Namespace) -> int: _qtlreaper_main_output = args.working_dir.joinpath( f"main-output-{secrets.token_urlsafe(15)}.tsv")#type: ignore[attr-defined] + _qtlreaper_permu_output = args.working_dir.joinpath( + f"permu-output-{secrets.token_urlsafe(15)}.tsv") logger.debug("Main output filename: %s", _qtlreaper_main_output) with subprocess.Popen( ("qtlreaper", "--n_permutations", "1000", "--geno", _genofile, "--traits", _traitsfile, - "--main_output", _qtlreaper_main_output), + "--main_output", _qtlreaper_main_output, + "--permu_output", _qtlreaper_permu_output, + "--threads", str(int(1+(multiprocessing.cpu_count()/2)))), env=({**os.environ, "RUST_BACKTRACE": "full"} if logger.getEffectiveLevel() == logging.DEBUG else dict(os.environ))) as _qtlreaper: @@ -172,8 +177,17 @@ def dispatch(args: Namespace) -> int: logger.debug("Cleaning up temporary files.") # short-circuits to delete file if exists - _traitsfile.exists() and _traitsfile.unlink() - _qtlreaper_main_output.exists() and _qtlreaper_main_output.unlink() + if _traitsfile.exists(): + _traitsfile.unlink() + logger.info("Deleted generated traits' file for QTLReaper.") + + if _qtlreaper_main_output.exists(): + _qtlreaper_main_output.unlink() + logger.info("Deleted QTLReaper's main output file.") + + if _qtlreaper_permu_output.exists(): + _qtlreaper_permu_output.unlink() + logger.info("Deleted QTLReaper's permutations file.") if _qtlreaper.returncode != 0: return _qtlreaper.returncode |
