diff options
Diffstat (limited to 'scripts/rqtl2/phenotypes_qc.py')
| -rw-r--r-- | scripts/rqtl2/phenotypes_qc.py | 58 |
1 files changed, 32 insertions, 26 deletions
diff --git a/scripts/rqtl2/phenotypes_qc.py b/scripts/rqtl2/phenotypes_qc.py index 76ecb8d..9f11f57 100644 --- a/scripts/rqtl2/phenotypes_qc.py +++ b/scripts/rqtl2/phenotypes_qc.py @@ -36,8 +36,15 @@ from scripts.cli_parser import init_cli_parser, add_global_data_arguments from scripts.rqtl2.bundleutils import build_line_joiner, build_line_splitter __MODULE__ = "scripts.rqtl2.phenotypes_qc" +logging.basicConfig( + format=("%(asctime)s - %(levelname)s %(name)s: " + "(%(pathname)s: %(lineno)d) %(message)s")) +logger = logging.getLogger(__MODULE__) -def validate(phenobundle: Path, logger: Logger) -> dict: +def validate( + phenobundle: Path, + logger: Logger# pylint: disable=[redefined-outer-name] +) -> dict: """Check that the bundle is generally valid""" try: rqc.validate_bundle(phenobundle) @@ -59,7 +66,7 @@ def validate(phenobundle: Path, logger: Logger) -> dict: def check_for_mandatory_pheno_keys( phenobundle: Path, - logger: Logger, + logger: Logger,# pylint: disable=[redefined-outer-name] **kwargs ) -> dict: """Check that the mandatory keys exist for phenotypes.""" @@ -86,7 +93,7 @@ def check_for_mandatory_pheno_keys( def check_for_averages_files( phenobundle: Path, - logger: Logger, + logger: Logger,# pylint: disable=[redefined-outer-name] **kwargs ) -> dict: """Check that averages files appear together""" @@ -140,15 +147,15 @@ def redis_logger( ) -> Iterator[logging.Logger]: """Build a Redis message-list logger.""" rconn = Redis.from_url(redisuri, decode_responses=True) - logger = logging.getLogger(loggername) - logger.propagate = False + _logger = logging.getLogger(loggername) + _logger.propagate = False handler = RedisMessageListHandler( rconn, fullyqualifiedkey(fqkey, filename))#type: ignore[arg-type] handler.setFormatter(logging.getLogger().handlers[0].formatter) - logger.addHandler(handler) + _logger.addHandler(handler) try: - yield logger + yield _logger finally: rconn.close() @@ -175,9 +182,9 @@ def qc_phenocovar_file( redisuri, f"{__MODULE__}.qc_phenocovar_file", filepath.name, - f"{fqkey}:logs") as logger, + f"{fqkey}:logs") as _logger, Redis.from_url(redisuri, decode_responses=True) as rconn): - logger.info("Running QC on file: %s", filepath.name) + print("Running QC on file: ", filepath.name) _csvfile = rqtl2.read_csv_file(filepath, separator, comment_char) _headings = tuple(heading.lower() for heading in next(_csvfile)) _errors: tuple[InvalidValue, ...] = tuple() @@ -195,7 +202,7 @@ def qc_phenocovar_file( def collect_errors(errors_and_linecount, line): _errs, _lc = errors_and_linecount - logger.info("Testing record '%s'", line[0]) + _logger.info("Testing record '%s'", line[0]) if len(line) != len(_headings): _errs = _errs + (save_error(InvalidValue( filepath.name, @@ -205,12 +212,12 @@ def qc_phenocovar_file( (f"Record {_lc} in file {filepath.name} has a different " "number of columns than the number of headings"))),) _line = dict(zip(_headings, line)) - if not bool(_line["description"]): + if not bool(_line.get("description")): _errs = _errs + ( save_error(InvalidValue(filepath.name, _line[_headings[0]], "description", - _line["description"], + _line.get("description"), "The description is not provided!")),) rconn.hset(file_fqkey(fqkey, "metadata", filepath), @@ -236,7 +243,7 @@ def merge_dicts(*dicts): return reduce(lambda merged, dct: {**merged, **dct}, dicts, {}) -def decimal_points_error(# pylint: disable=[too-many-arguments] +def decimal_points_error(# pylint: disable=[too-many-arguments,too-many-positional-arguments] filename: str, rowtitle: str, coltitle: str, @@ -267,7 +274,7 @@ def integer_error( return InvalidValue(filename, rowtitle, coltitle, cellvalue, message) -def qc_pheno_file(# pylint: disable=[too-many-locals, too-many-arguments] +def qc_pheno_file(# pylint: disable=[too-many-locals, too-many-arguments, too-many-positional-arguments] filepath: Path, redisuri: str, fqkey: str, @@ -283,9 +290,9 @@ def qc_pheno_file(# pylint: disable=[too-many-locals, too-many-arguments] redisuri, f"{__MODULE__}.qc_pheno_file", filepath.name, - f"{fqkey}:logs") as logger, + f"{fqkey}:logs") as _logger, Redis.from_url(redisuri, decode_responses=True) as rconn): - logger.info("Running QC on file: %s", filepath.name) + print("Running QC on file: ", filepath.name) save_error = partial( push_error, rconn, file_fqkey(fqkey, "errors", filepath)) _csvfile = rqtl2.read_csv_file(filepath, separator, comment_char) @@ -310,7 +317,7 @@ def qc_pheno_file(# pylint: disable=[too-many-locals, too-many-arguments] def collect_errors(errors_and_linecount, line): _errs, _lc = errors_and_linecount - logger.debug("Checking row %s", line[0]) + _logger.debug("Checking row %s", line[0]) if line[0] not in samples: _errs = _errs + (save_error(InvalidValue( filepath.name, @@ -369,11 +376,10 @@ def run_qc(# pylint: disable=[too-many-locals] rconn: Redis, dbconn: mdb.Connection, fullyqualifiedjobid: str, - args: Namespace, - logger: Logger + args: Namespace ) -> int: """Run quality control checks on the bundle.""" - logger.debug("Beginning the quality assurance checks.") + print("Beginning the quality assurance checks.") results = check_for_averages_files( **check_for_mandatory_pheno_keys( **validate(args.rqtl2bundle, logger))) @@ -398,7 +404,7 @@ def run_qc(# pylint: disable=[too-many-locals] for ftype in ("pheno", "phenocovar", "phenose", "phenonum"))) # - Fetch samples/individuals from database. - logger.debug("Fetching samples/individuals from the database.") + print("Fetching samples/individuals from the database.") samples = tuple(#type: ignore[var-annotated] item for item in set(reduce( lambda acc, item: acc + ( @@ -415,7 +421,7 @@ def run_qc(# pylint: disable=[too-many-locals] json.dumps(tuple(f"{fullyqualifiedjobid}:phenocovar:{_file}" for _file in cdata.get("phenocovar", [])))) with mproc.Pool(mproc.cpu_count() - 1) as pool: - logger.debug("Check for errors in 'phenocovar' file(s).") + print("Check for errors in 'phenocovar' file(s).") _phenocovar_qc_res = merge_dicts(*pool.starmap(qc_phenocovar_file, tuple( (extractiondir.joinpath(_file), args.redisuri, @@ -437,7 +443,7 @@ def run_qc(# pylint: disable=[too-many-locals] "Expected a non-negative number with at least one decimal " "place.")) - logger.debug("Check for errors in 'pheno' file(s).") + print("Check for errors in 'pheno' file(s).") _pheno_qc_res = merge_dicts(*pool.starmap(qc_pheno_file, tuple(( extractiondir.joinpath(_file), args.redisuri, @@ -456,7 +462,7 @@ def run_qc(# pylint: disable=[too-many-locals] # - Check the 3 checks above for phenose and phenonum values too # qc_phenose_files(…) # qc_phenonum_files(…) - logger.debug("Check for errors in 'phenose' file(s).") + print("Check for errors in 'phenose' file(s).") _phenose_qc_res = merge_dicts(*pool.starmap(qc_pheno_file, tuple(( extractiondir.joinpath(_file), args.redisuri, @@ -472,7 +478,7 @@ def run_qc(# pylint: disable=[too-many-locals] dec_err_fn ) for _file in cdata.get("phenose", [])))) - logger.debug("Check for errors in 'phenonum' file(s).") + print("Check for errors in 'phenonum' file(s).") _phenonum_qc_res = merge_dicts(*pool.starmap(qc_pheno_file, tuple(( extractiondir.joinpath(_file), args.redisuri, @@ -509,5 +515,5 @@ if __name__ == "__main__": type=Path) return parser.parse_args() - main = build_main(cli_args(), run_qc, __MODULE__) + main = build_main(cli_args(), run_qc, logger) sys.exit(main()) |
