From d2605cb72d7cdbc7d3cc633b94a451c0acd2edbb Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Wed, 15 Jun 2022 09:36:18 +0300 Subject: Fix linting and type errors --- README.org | 2 +- mypy.ini | 9 +++---- qc_app/__init__.py | 4 ++-- qc_app/entry.py | 50 +++++++++++++++++++-------------------- qc_app/jobs.py | 20 +++++++++------- qc_app/parse.py | 21 +++++----------- quality_control/average.py | 15 +++++------- quality_control/headers.py | 5 ++-- quality_control/parsing.py | 12 ++++------ quality_control/standard_error.py | 15 +++++------- quality_control/utils.py | 12 +++++++++- scripts/worker.py | 25 +++++++++++++------- setup.py | 1 + 13 files changed, 97 insertions(+), 94 deletions(-) diff --git a/README.org b/README.org index 2897d91..5ec53f0 100644 --- a/README.org +++ b/README.org @@ -72,7 +72,7 @@ Run tests with: To run the linter over the code base, run: #+BEGIN_SRC shell - pylint tests quality_control qc_app + pylint *.py tests quality_control qc_app scripts #+END_SRC To check for correct type usage in the application, run: diff --git a/mypy.ini b/mypy.ini index f61431a..74e2d91 100644 --- a/mypy.ini +++ b/mypy.ini @@ -9,17 +9,14 @@ ignore_missing_imports = True [mypy-hypothesis.*] ignore_missing_imports = True -[mypy-rq.*] -ignore_missing_imports = True - [mypy-redis.*] ignore_missing_imports = True -[mypy-magic.*] -ignore_missing_imports = True - [mypy-werkzeug.*] ignore_missing_imports = True [mypy-setuptools.*] +ignore_missing_imports = True + +[mypy-jsonpickle.*] ignore_missing_imports = True \ No newline at end of file diff --git a/qc_app/__init__.py b/qc_app/__init__.py index 7f423c2..08b56c9 100644 --- a/qc_app/__init__.py +++ b/qc_app/__init__.py @@ -17,10 +17,10 @@ def instance_path(): return path -def create_app(instance_path): +def create_app(instance_dir): """The application factory""" app = Flask( - __name__, instance_path=instance_path, instance_relative_config=True) + __name__, instance_path=instance_dir, instance_relative_config=True) app.config.from_pyfile(os.path.join(os.getcwd(), "etc/default_config.py")) app.config.from_pyfile("config.py") # Override defaults with instance path diff --git a/qc_app/entry.py b/qc_app/entry.py index 7f67a33..c53648a 100644 --- a/qc_app/entry.py +++ b/qc_app/entry.py @@ -1,7 +1,5 @@ """Entry-point module""" import os -import random -import string import mimetypes from typing import Tuple from zipfile import ZipFile, is_zipfile @@ -18,24 +16,24 @@ from flask import ( entrybp = Blueprint("entry", __name__) -def errors(request) -> Tuple[str, ...]: - """Return a tuple of the errors found in the `request`. If no error is +def errors(rqst) -> Tuple[str, ...]: + """Return a tuple of the errors found in the request `rqst`. If no error is found, then an empty tuple is returned.""" def __filetype_error__(): return ( ("Invalid file type provided.",) - if request.form.get("filetype") not in ("average", "standard-error") + if rqst.form.get("filetype") not in ("average", "standard-error") else tuple()) def __file_missing_error__(): return ( ("No file was uploaded.",) - if ("qc_text_file" not in request.files or - request.files["qc_text_file"].filename == "") + if ("qc_text_file" not in rqst.files or + rqst.files["qc_text_file"].filename == "") else tuple()) def __file_mimetype_error__(): - text_file = request.files["qc_text_file"] + text_file = rqst.files["qc_text_file"] return ( ( ("Invalid file! Expected a tab-separated-values file, or a zip " @@ -50,26 +48,26 @@ def errors(request) -> Tuple[str, ...]: def zip_file_errors(filepath, upload_dir) -> Tuple[str, ...]: """Check the uploaded zip file for errors.""" - zfile_errors = tuple() + zfile_errors: Tuple[str, ...] = tuple() if is_zipfile(filepath): - zfile = ZipFile(filepath, "r") - infolist = zfile.infolist() - if len(infolist) != 1: - zfile_errors = zfile_errors + ( - ("Expected exactly one (1) member file within the uploaded zip " - "file. Got {len(infolist)} member files.")) - if len(infolist) == 1 and infolist[0].is_dir(): - zfile_errors = zfile_errors + ( - ("Expected a member text file in the uploaded zip file. Got a " - "directory/folder.")) - - if len(infolist) == 1 and not infolist[0].is_dir(): - zfile.extract(infolist[0], path=upload_dir) - mime = mimetypes.guess_type(f"{upload_dir}/{infolist[0].filename}") - if mime[0] != "text/tab-separated-values": + with ZipFile(filepath, "r") as zfile: + infolist = zfile.infolist() + if len(infolist) != 1: + zfile_errors = zfile_errors + ( + ("Expected exactly one (1) member file within the uploaded zip " + "file. Got {len(infolist)} member files."),) + if len(infolist) == 1 and infolist[0].is_dir(): zfile_errors = zfile_errors + ( - ("Expected the member text file in the uploaded zip file to" - " be a tab-separated file.")) + ("Expected a member text file in the uploaded zip file. Got a " + "directory/folder."),) + + if len(infolist) == 1 and not infolist[0].is_dir(): + zfile.extract(infolist[0], path=upload_dir) + mime = mimetypes.guess_type(f"{upload_dir}/{infolist[0].filename}") + if mime[0] != "text/tab-separated-values": + zfile_errors = zfile_errors + ( + ("Expected the member text file in the uploaded zip file to" + " be a tab-separated file."),) return zfile_errors diff --git a/qc_app/jobs.py b/qc_app/jobs.py index e97d175..4e6a11e 100644 --- a/qc_app/jobs.py +++ b/qc_app/jobs.py @@ -1,3 +1,4 @@ +"""Handle jobs""" import os import shlex import subprocess @@ -7,29 +8,32 @@ from datetime import timedelta from redis import Redis def error_filename(job_id, error_dir): + "Compute the path of the file where errors will be dumped." return f"{error_dir}/job_{job_id}.error" -def launch_job( +def launch_job(# pylint: disable=[too-many-arguments] redis_conn: Redis, filepath: str, filetype, redisurl, error_dir, ttl_seconds: int): """Launch a job in the background""" job_id = str(uuid4()) command = [ - "python3", "-m" "scripts.worker", filetype, filepath, redisurl, job_id] - job = { + "python3", "-m", "scripts.worker", filetype, filepath, redisurl, job_id] + the_job = { "job_id": job_id, "command": shlex.join(command), "status": "pending", "filename": os.path.basename(filepath), "percent": 0 } - redis_conn.hset(name=job["job_id"], mapping=job) - redis_conn.expire(name=job["job_id"], time=timedelta(seconds=ttl_seconds)) + redis_conn.hset(name=the_job["job_id"], mapping=the_job) + redis_conn.expire(name=the_job["job_id"], time=timedelta(seconds=ttl_seconds)) if not os.path.exists(error_dir): os.mkdir(error_dir) - with open(error_filename(job_id, error_dir), "w") as errorfile: - subprocess.Popen(command, stderr=errorfile) + with open(error_filename(job_id, error_dir), + "w", + encoding="utf-8") as errorfile: + subprocess.Popen(command, stderr=errorfile) # pylint: disable=[consider-using-with] - return job + return the_job def job(redis_conn, job_id: str): "Retrieve the job" diff --git a/qc_app/parse.py b/qc_app/parse.py index b2a0156..e72a163 100644 --- a/qc_app/parse.py +++ b/qc_app/parse.py @@ -1,23 +1,13 @@ """File parsing module""" import os -from functools import reduce import jsonpickle from redis import Redis -from flask import ( - flash, - request, - url_for, - redirect, - Blueprint, - render_template, - current_app as app) +from flask import flash, request, url_for, redirect, Blueprint, render_template +from flask import current_app as app -from . import jobs from quality_control.errors import InvalidValue -from quality_control.parsing import ( - FileType, - strain_names) +from . import jobs parsebp = Blueprint("parse", __name__) isinvalidvalue = lambda item: isinstance(item, InvalidValue) @@ -25,7 +15,6 @@ isinvalidvalue = lambda item: isinstance(item, InvalidValue) @parsebp.route("/parse", methods=["GET"]) def parse(): """Trigger file parsing""" - # TODO: Maybe implement external process to parse the files errors = False filename = request.args.get("filename") filetype = request.args.get("filetype") @@ -59,6 +48,7 @@ def parse(): @parsebp.route("/status/", methods=["GET"]) def parse_status(job_id: str): + "Retrieve the status of the job" with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn: job = jobs.job(rconn, job_id) @@ -76,7 +66,7 @@ def parse_status(job_id: str): filename = job.get("filename", "uploaded file") errors = jsonpickle.decode( job.get("errors", jsonpickle.encode(tuple()))) - if status == "success" or status == "aborted": + if status in ("success", "aborted"): return redirect(url_for("parse.results", job_id=job_id)) if status == "parse-error": @@ -133,6 +123,7 @@ def fail(job_id: str): @parsebp.route("/abort", methods=["POST"]) def abort(): + """Handle user request to abort file processing""" job_id = request.form["job_id"] with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn: diff --git a/quality_control/average.py b/quality_control/average.py index 06b0a47..2b098db 100644 --- a/quality_control/average.py +++ b/quality_control/average.py @@ -1,19 +1,16 @@ """Contain logic for checking average files""" -import re from typing import Union +from .utils import cell_error from .errors import InvalidValue def invalid_value(line_number: int, column_number: int, val: str) -> Union[ InvalidValue, None]: """Return an `InvalidValue` object if `val` is not a valid "averages" value.""" - if re.search(r"^[0-9]+\.[0-9]{3}$", val): - return None - if re.search(r"^0\.0+$", val) or re.search("^0+$", val): - return None - return InvalidValue( - line_number, column_number, val, ( + return cell_error( + r"^[0-9]+\.[0-9]{3}$", val, line=line_number, column=column_number, + value=val, message=( f"Invalid value '{val}'. " - "Expected string representing a number with exactly three decimal " - "places.")) + "Expected string representing a number with exactly three " + "decimal places.")) diff --git a/quality_control/headers.py b/quality_control/headers.py index 79d7e43..f4f4dad 100644 --- a/quality_control/headers.py +++ b/quality_control/headers.py @@ -27,14 +27,15 @@ def invalid_headings( enumerate(headings, start=2) if header not in strains) def duplicate_headings( - line_number: int, headers: Sequence[str]) -> Union[InvalidValue, None]: + line_number: int, + headers: Sequence[str]) -> Tuple[DuplicateHeading, ...]: """Return a tuple of `DuplicateHeading` objects for each column heading that is a duplicate of another column heading.""" def __update_columns__(acc, item): if item[1] in acc.keys(): return {**acc, item[1]: acc[item[1]] + (item[0],)} return {**acc, item[1]: (item[0],)} - repeated = { + repeated = {# type: ignore[var-annotated] heading: columns for heading, columns in reduce(__update_columns__, enumerate(headers, start=1), {}).items() if len(columns) > 1 diff --git a/quality_control/parsing.py b/quality_control/parsing.py index f1f4f79..ba22e0c 100644 --- a/quality_control/parsing.py +++ b/quality_control/parsing.py @@ -1,15 +1,13 @@ """Module handling the high-level parsing of the files""" - -import os import collections from enum import Enum from functools import partial from zipfile import ZipFile, is_zipfile -from typing import Iterable, Generator, Callable, Optional +from typing import Tuple, Union, Iterable, Generator, Callable, Optional import quality_control.average as avg import quality_control.standard_error as se -from quality_control.errors import InvalidValue +from quality_control.errors import InvalidValue, DuplicateHeading from quality_control.headers import ( invalid_header, invalid_headings, duplicate_headings) @@ -67,16 +65,16 @@ def se_errors(line_number, fields): def collect_errors( filepath: str, filetype: FileType, strains: list, update_progress: Optional[Callable] = None, - user_aborted: Optional[Callable] = lambda: False) -> Generator: + user_aborted: Callable = lambda: False) -> Generator: """Run checks against file and collect all the errors""" - errors = tuple() + errors:Tuple[Union[InvalidValue, DuplicateHeading], ...] = tuple() def __process_errors__(line_number, line, error_checker_fn, errors = tuple()): errs = error_checker_fn( line_number, tuple(field.strip() for field in line.split("\t"))) if errs is None: return errors - if isinstance(errs, collections.Sequence): + if isinstance(errs, collections.abc.Sequence): return errors + tuple(error for error in errs if error is not None) return errors + (errs,) diff --git a/quality_control/standard_error.py b/quality_control/standard_error.py index aa7df3c..7e059ad 100644 --- a/quality_control/standard_error.py +++ b/quality_control/standard_error.py @@ -1,7 +1,7 @@ """Contain logic for checking standard error files""" -import re from typing import Union +from .utils import cell_error from .errors import InvalidValue def invalid_value( @@ -12,12 +12,9 @@ def invalid_value( `val` is not a valid input for standard error files, otherwise, it returns `None`. """ - if re.search(r"^[0-9]+\.[0-9]{6,}$", val): - return None - if re.search(r"^0\.0+$", val) or re.search("^0+$", val): - return None - return InvalidValue( - line_number, column_number, val, ( + return cell_error( + r"^[0-9]+\.[0-9]{6,}$", val, line=line_number, column=column_number, + value=val, message=( f"Invalid value '{val}'. " - "Expected string representing a number with at least six decimal " - "places.")) + "Expected string representing a number with at least six " + "decimal places.")) diff --git a/quality_control/utils.py b/quality_control/utils.py index 0072608..67301d6 100644 --- a/quality_control/utils.py +++ b/quality_control/utils.py @@ -1,7 +1,9 @@ """Utilities that might be useful elsewhere.""" - +import re from collections import namedtuple +from .errors import InvalidValue + ProgressIndicator = namedtuple( "ProgressIndicator", ("filesize", "processedsize", "currentline", "percent")) @@ -19,3 +21,11 @@ def make_progress_calculator(filesize: int): ((processedsize/filesize) * 100)) return __calculator__ + +def cell_error(pattern, val, **error_kwargs): + "Return the error in the cell" + if re.search(pattern, val): + return None + if re.search(r"^0\.0+$", val) or re.search("^0+$", val): + return None + return InvalidValue(**error_kwargs) diff --git a/scripts/worker.py b/scripts/worker.py index ecdfaa2..c6e989f 100644 --- a/scripts/worker.py +++ b/scripts/worker.py @@ -1,20 +1,23 @@ +"""External worker script""" import os import sys +import traceback from typing import Callable -from zipfile import Path, ZipFile, is_zipfile +from zipfile import ZipFile, is_zipfile import jsonpickle from redis import Redis -from redis.exceptions import ConnectionError +from redis.exceptions import ConnectionError # pylint: disable=[redefined-builtin] -from .qc import cli_argument_parser from quality_control.utils import make_progress_calculator -from quality_control.parsing import ( - take, FileType, strain_names, collect_errors) +from quality_control.parsing import FileType, strain_names, collect_errors +from .qc import cli_argument_parser def make_progress_indicator( - redis_connection: Redis, job_id: str, progress_calc_fn: Callable) -> Callable: + redis_connection: Redis, job_id: str, + progress_calc_fn: Callable) -> Callable: + """Make function that will compute the progress and update redis""" def __indicator__(linenumber, linetext): progress = progress_calc_fn(linenumber, linetext) redis_connection.hset(name=job_id, mapping=progress._asdict()) @@ -24,6 +27,7 @@ def make_progress_indicator( return __indicator__ def cli_args_valid(args): + "Check that the command-line arguments are provided and correct" if not os.path.exists(args.filepath): print(f"The file '{args.filepath}' does not exist.", file=sys.stderr) return None @@ -33,14 +37,15 @@ def cli_args_valid(args): return None try: - conn = Redis.from_url(args.redisurl) - except ConnectionError as ce: + conn = Redis.from_url(args.redisurl) # pylint: disable=[unused-variable] + except ConnectionError as conn_err: # pylint: disable=[unused-variable] print(traceback.format_exc(), file=sys.stderr) return None return args def process_cli_arguments(): + """Setup command-line parser""" parser = cli_argument_parser() parser.prog = "worker" parser.add_argument( @@ -50,12 +55,14 @@ def process_cli_arguments(): return cli_args_valid(parser.parse_args()) def stream_error(redis_conn, job_id, error): + """Update redis with the most current error(s) found""" errors = jsonpickle.decode( redis_conn.hget(job_id, key="errors") or jsonpickle.encode(tuple())) redis_conn.hset( job_id, key="errors", value=jsonpickle.encode(errors + (error,))) def make_user_aborted(redis_conn, job_id): + """Mkae function that checks whether the user aborted the process""" def __aborted__(): user_aborted = bool(int( redis_conn.hget(name=job_id, key="user_aborted") or "0")) @@ -66,10 +73,12 @@ def make_user_aborted(redis_conn, job_id): return __aborted__ def get_zipfile_size(filepath): + "Compute size of given zipfile" with ZipFile(filepath, "r") as zfile: return zfile.infolist()[0].file_size def main(): + "entry point to the script" args = process_cli_arguments() if args is None: print("Quiting due to errors!", file=sys.stderr) diff --git a/setup.py b/setup.py index 6068493..24c0a25 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,4 @@ +"""The setup module""" from setuptools import setup setup() -- cgit v1.2.3