aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.org2
-rw-r--r--mypy.ini9
-rw-r--r--qc_app/__init__.py4
-rw-r--r--qc_app/entry.py50
-rw-r--r--qc_app/jobs.py20
-rw-r--r--qc_app/parse.py21
-rw-r--r--quality_control/average.py15
-rw-r--r--quality_control/headers.py5
-rw-r--r--quality_control/parsing.py12
-rw-r--r--quality_control/standard_error.py15
-rw-r--r--quality_control/utils.py12
-rw-r--r--scripts/worker.py25
-rw-r--r--setup.py1
13 files changed, 97 insertions, 94 deletions
diff --git a/README.org b/README.org
index 2897d91..5ec53f0 100644
--- a/README.org
+++ b/README.org
@@ -72,7 +72,7 @@ Run tests with:
To run the linter over the code base, run:
#+BEGIN_SRC shell
- pylint tests quality_control qc_app
+ pylint *.py tests quality_control qc_app scripts
#+END_SRC
To check for correct type usage in the application, run:
diff --git a/mypy.ini b/mypy.ini
index f61431a..74e2d91 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -9,17 +9,14 @@ ignore_missing_imports = True
[mypy-hypothesis.*]
ignore_missing_imports = True
-[mypy-rq.*]
-ignore_missing_imports = True
-
[mypy-redis.*]
ignore_missing_imports = True
-[mypy-magic.*]
-ignore_missing_imports = True
-
[mypy-werkzeug.*]
ignore_missing_imports = True
[mypy-setuptools.*]
+ignore_missing_imports = True
+
+[mypy-jsonpickle.*]
ignore_missing_imports = True \ No newline at end of file
diff --git a/qc_app/__init__.py b/qc_app/__init__.py
index 7f423c2..08b56c9 100644
--- a/qc_app/__init__.py
+++ b/qc_app/__init__.py
@@ -17,10 +17,10 @@ def instance_path():
return path
-def create_app(instance_path):
+def create_app(instance_dir):
"""The application factory"""
app = Flask(
- __name__, instance_path=instance_path, instance_relative_config=True)
+ __name__, instance_path=instance_dir, instance_relative_config=True)
app.config.from_pyfile(os.path.join(os.getcwd(), "etc/default_config.py"))
app.config.from_pyfile("config.py") # Override defaults with instance path
diff --git a/qc_app/entry.py b/qc_app/entry.py
index 7f67a33..c53648a 100644
--- a/qc_app/entry.py
+++ b/qc_app/entry.py
@@ -1,7 +1,5 @@
"""Entry-point module"""
import os
-import random
-import string
import mimetypes
from typing import Tuple
from zipfile import ZipFile, is_zipfile
@@ -18,24 +16,24 @@ from flask import (
entrybp = Blueprint("entry", __name__)
-def errors(request) -> Tuple[str, ...]:
- """Return a tuple of the errors found in the `request`. If no error is
+def errors(rqst) -> Tuple[str, ...]:
+ """Return a tuple of the errors found in the request `rqst`. If no error is
found, then an empty tuple is returned."""
def __filetype_error__():
return (
("Invalid file type provided.",)
- if request.form.get("filetype") not in ("average", "standard-error")
+ if rqst.form.get("filetype") not in ("average", "standard-error")
else tuple())
def __file_missing_error__():
return (
("No file was uploaded.",)
- if ("qc_text_file" not in request.files or
- request.files["qc_text_file"].filename == "")
+ if ("qc_text_file" not in rqst.files or
+ rqst.files["qc_text_file"].filename == "")
else tuple())
def __file_mimetype_error__():
- text_file = request.files["qc_text_file"]
+ text_file = rqst.files["qc_text_file"]
return (
(
("Invalid file! Expected a tab-separated-values file, or a zip "
@@ -50,26 +48,26 @@ def errors(request) -> Tuple[str, ...]:
def zip_file_errors(filepath, upload_dir) -> Tuple[str, ...]:
"""Check the uploaded zip file for errors."""
- zfile_errors = tuple()
+ zfile_errors: Tuple[str, ...] = tuple()
if is_zipfile(filepath):
- zfile = ZipFile(filepath, "r")
- infolist = zfile.infolist()
- if len(infolist) != 1:
- zfile_errors = zfile_errors + (
- ("Expected exactly one (1) member file within the uploaded zip "
- "file. Got {len(infolist)} member files."))
- if len(infolist) == 1 and infolist[0].is_dir():
- zfile_errors = zfile_errors + (
- ("Expected a member text file in the uploaded zip file. Got a "
- "directory/folder."))
-
- if len(infolist) == 1 and not infolist[0].is_dir():
- zfile.extract(infolist[0], path=upload_dir)
- mime = mimetypes.guess_type(f"{upload_dir}/{infolist[0].filename}")
- if mime[0] != "text/tab-separated-values":
+ with ZipFile(filepath, "r") as zfile:
+ infolist = zfile.infolist()
+ if len(infolist) != 1:
+ zfile_errors = zfile_errors + (
+ ("Expected exactly one (1) member file within the uploaded zip "
+ "file. Got {len(infolist)} member files."),)
+ if len(infolist) == 1 and infolist[0].is_dir():
zfile_errors = zfile_errors + (
- ("Expected the member text file in the uploaded zip file to"
- " be a tab-separated file."))
+ ("Expected a member text file in the uploaded zip file. Got a "
+ "directory/folder."),)
+
+ if len(infolist) == 1 and not infolist[0].is_dir():
+ zfile.extract(infolist[0], path=upload_dir)
+ mime = mimetypes.guess_type(f"{upload_dir}/{infolist[0].filename}")
+ if mime[0] != "text/tab-separated-values":
+ zfile_errors = zfile_errors + (
+ ("Expected the member text file in the uploaded zip file to"
+ " be a tab-separated file."),)
return zfile_errors
diff --git a/qc_app/jobs.py b/qc_app/jobs.py
index e97d175..4e6a11e 100644
--- a/qc_app/jobs.py
+++ b/qc_app/jobs.py
@@ -1,3 +1,4 @@
+"""Handle jobs"""
import os
import shlex
import subprocess
@@ -7,29 +8,32 @@ from datetime import timedelta
from redis import Redis
def error_filename(job_id, error_dir):
+ "Compute the path of the file where errors will be dumped."
return f"{error_dir}/job_{job_id}.error"
-def launch_job(
+def launch_job(# pylint: disable=[too-many-arguments]
redis_conn: Redis, filepath: str, filetype, redisurl, error_dir,
ttl_seconds: int):
"""Launch a job in the background"""
job_id = str(uuid4())
command = [
- "python3", "-m" "scripts.worker", filetype, filepath, redisurl, job_id]
- job = {
+ "python3", "-m", "scripts.worker", filetype, filepath, redisurl, job_id]
+ the_job = {
"job_id": job_id, "command": shlex.join(command), "status": "pending",
"filename": os.path.basename(filepath), "percent": 0
}
- redis_conn.hset(name=job["job_id"], mapping=job)
- redis_conn.expire(name=job["job_id"], time=timedelta(seconds=ttl_seconds))
+ redis_conn.hset(name=the_job["job_id"], mapping=the_job)
+ redis_conn.expire(name=the_job["job_id"], time=timedelta(seconds=ttl_seconds))
if not os.path.exists(error_dir):
os.mkdir(error_dir)
- with open(error_filename(job_id, error_dir), "w") as errorfile:
- subprocess.Popen(command, stderr=errorfile)
+ with open(error_filename(job_id, error_dir),
+ "w",
+ encoding="utf-8") as errorfile:
+ subprocess.Popen(command, stderr=errorfile) # pylint: disable=[consider-using-with]
- return job
+ return the_job
def job(redis_conn, job_id: str):
"Retrieve the job"
diff --git a/qc_app/parse.py b/qc_app/parse.py
index b2a0156..e72a163 100644
--- a/qc_app/parse.py
+++ b/qc_app/parse.py
@@ -1,23 +1,13 @@
"""File parsing module"""
import os
-from functools import reduce
import jsonpickle
from redis import Redis
-from flask import (
- flash,
- request,
- url_for,
- redirect,
- Blueprint,
- render_template,
- current_app as app)
+from flask import flash, request, url_for, redirect, Blueprint, render_template
+from flask import current_app as app
-from . import jobs
from quality_control.errors import InvalidValue
-from quality_control.parsing import (
- FileType,
- strain_names)
+from . import jobs
parsebp = Blueprint("parse", __name__)
isinvalidvalue = lambda item: isinstance(item, InvalidValue)
@@ -25,7 +15,6 @@ isinvalidvalue = lambda item: isinstance(item, InvalidValue)
@parsebp.route("/parse", methods=["GET"])
def parse():
"""Trigger file parsing"""
- # TODO: Maybe implement external process to parse the files
errors = False
filename = request.args.get("filename")
filetype = request.args.get("filetype")
@@ -59,6 +48,7 @@ def parse():
@parsebp.route("/status/<job_id>", methods=["GET"])
def parse_status(job_id: str):
+ "Retrieve the status of the job"
with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
job = jobs.job(rconn, job_id)
@@ -76,7 +66,7 @@ def parse_status(job_id: str):
filename = job.get("filename", "uploaded file")
errors = jsonpickle.decode(
job.get("errors", jsonpickle.encode(tuple())))
- if status == "success" or status == "aborted":
+ if status in ("success", "aborted"):
return redirect(url_for("parse.results", job_id=job_id))
if status == "parse-error":
@@ -133,6 +123,7 @@ def fail(job_id: str):
@parsebp.route("/abort", methods=["POST"])
def abort():
+ """Handle user request to abort file processing"""
job_id = request.form["job_id"]
with Redis.from_url(app.config["REDIS_URL"], decode_responses=True) as rconn:
diff --git a/quality_control/average.py b/quality_control/average.py
index 06b0a47..2b098db 100644
--- a/quality_control/average.py
+++ b/quality_control/average.py
@@ -1,19 +1,16 @@
"""Contain logic for checking average files"""
-import re
from typing import Union
+from .utils import cell_error
from .errors import InvalidValue
def invalid_value(line_number: int, column_number: int, val: str) -> Union[
InvalidValue, None]:
"""Return an `InvalidValue` object if `val` is not a valid "averages"
value."""
- if re.search(r"^[0-9]+\.[0-9]{3}$", val):
- return None
- if re.search(r"^0\.0+$", val) or re.search("^0+$", val):
- return None
- return InvalidValue(
- line_number, column_number, val, (
+ return cell_error(
+ r"^[0-9]+\.[0-9]{3}$", val, line=line_number, column=column_number,
+ value=val, message=(
f"Invalid value '{val}'. "
- "Expected string representing a number with exactly three decimal "
- "places."))
+ "Expected string representing a number with exactly three "
+ "decimal places."))
diff --git a/quality_control/headers.py b/quality_control/headers.py
index 79d7e43..f4f4dad 100644
--- a/quality_control/headers.py
+++ b/quality_control/headers.py
@@ -27,14 +27,15 @@ def invalid_headings(
enumerate(headings, start=2) if header not in strains)
def duplicate_headings(
- line_number: int, headers: Sequence[str]) -> Union[InvalidValue, None]:
+ line_number: int,
+ headers: Sequence[str]) -> Tuple[DuplicateHeading, ...]:
"""Return a tuple of `DuplicateHeading` objects for each column heading that
is a duplicate of another column heading."""
def __update_columns__(acc, item):
if item[1] in acc.keys():
return {**acc, item[1]: acc[item[1]] + (item[0],)}
return {**acc, item[1]: (item[0],)}
- repeated = {
+ repeated = {# type: ignore[var-annotated]
heading: columns for heading, columns in
reduce(__update_columns__, enumerate(headers, start=1), {}).items()
if len(columns) > 1
diff --git a/quality_control/parsing.py b/quality_control/parsing.py
index f1f4f79..ba22e0c 100644
--- a/quality_control/parsing.py
+++ b/quality_control/parsing.py
@@ -1,15 +1,13 @@
"""Module handling the high-level parsing of the files"""
-
-import os
import collections
from enum import Enum
from functools import partial
from zipfile import ZipFile, is_zipfile
-from typing import Iterable, Generator, Callable, Optional
+from typing import Tuple, Union, Iterable, Generator, Callable, Optional
import quality_control.average as avg
import quality_control.standard_error as se
-from quality_control.errors import InvalidValue
+from quality_control.errors import InvalidValue, DuplicateHeading
from quality_control.headers import (
invalid_header, invalid_headings, duplicate_headings)
@@ -67,16 +65,16 @@ def se_errors(line_number, fields):
def collect_errors(
filepath: str, filetype: FileType, strains: list,
update_progress: Optional[Callable] = None,
- user_aborted: Optional[Callable] = lambda: False) -> Generator:
+ user_aborted: Callable = lambda: False) -> Generator:
"""Run checks against file and collect all the errors"""
- errors = tuple()
+ errors:Tuple[Union[InvalidValue, DuplicateHeading], ...] = tuple()
def __process_errors__(line_number, line, error_checker_fn, errors = tuple()):
errs = error_checker_fn(
line_number,
tuple(field.strip() for field in line.split("\t")))
if errs is None:
return errors
- if isinstance(errs, collections.Sequence):
+ if isinstance(errs, collections.abc.Sequence):
return errors + tuple(error for error in errs if error is not None)
return errors + (errs,)
diff --git a/quality_control/standard_error.py b/quality_control/standard_error.py
index aa7df3c..7e059ad 100644
--- a/quality_control/standard_error.py
+++ b/quality_control/standard_error.py
@@ -1,7 +1,7 @@
"""Contain logic for checking standard error files"""
-import re
from typing import Union
+from .utils import cell_error
from .errors import InvalidValue
def invalid_value(
@@ -12,12 +12,9 @@ def invalid_value(
`val` is not a valid input for standard error files, otherwise, it returns
`None`.
"""
- if re.search(r"^[0-9]+\.[0-9]{6,}$", val):
- return None
- if re.search(r"^0\.0+$", val) or re.search("^0+$", val):
- return None
- return InvalidValue(
- line_number, column_number, val, (
+ return cell_error(
+ r"^[0-9]+\.[0-9]{6,}$", val, line=line_number, column=column_number,
+ value=val, message=(
f"Invalid value '{val}'. "
- "Expected string representing a number with at least six decimal "
- "places."))
+ "Expected string representing a number with at least six "
+ "decimal places."))
diff --git a/quality_control/utils.py b/quality_control/utils.py
index 0072608..67301d6 100644
--- a/quality_control/utils.py
+++ b/quality_control/utils.py
@@ -1,7 +1,9 @@
"""Utilities that might be useful elsewhere."""
-
+import re
from collections import namedtuple
+from .errors import InvalidValue
+
ProgressIndicator = namedtuple(
"ProgressIndicator", ("filesize", "processedsize", "currentline", "percent"))
@@ -19,3 +21,11 @@ def make_progress_calculator(filesize: int):
((processedsize/filesize) * 100))
return __calculator__
+
+def cell_error(pattern, val, **error_kwargs):
+ "Return the error in the cell"
+ if re.search(pattern, val):
+ return None
+ if re.search(r"^0\.0+$", val) or re.search("^0+$", val):
+ return None
+ return InvalidValue(**error_kwargs)
diff --git a/scripts/worker.py b/scripts/worker.py
index ecdfaa2..c6e989f 100644
--- a/scripts/worker.py
+++ b/scripts/worker.py
@@ -1,20 +1,23 @@
+"""External worker script"""
import os
import sys
+import traceback
from typing import Callable
-from zipfile import Path, ZipFile, is_zipfile
+from zipfile import ZipFile, is_zipfile
import jsonpickle
from redis import Redis
-from redis.exceptions import ConnectionError
+from redis.exceptions import ConnectionError # pylint: disable=[redefined-builtin]
-from .qc import cli_argument_parser
from quality_control.utils import make_progress_calculator
-from quality_control.parsing import (
- take, FileType, strain_names, collect_errors)
+from quality_control.parsing import FileType, strain_names, collect_errors
+from .qc import cli_argument_parser
def make_progress_indicator(
- redis_connection: Redis, job_id: str, progress_calc_fn: Callable) -> Callable:
+ redis_connection: Redis, job_id: str,
+ progress_calc_fn: Callable) -> Callable:
+ """Make function that will compute the progress and update redis"""
def __indicator__(linenumber, linetext):
progress = progress_calc_fn(linenumber, linetext)
redis_connection.hset(name=job_id, mapping=progress._asdict())
@@ -24,6 +27,7 @@ def make_progress_indicator(
return __indicator__
def cli_args_valid(args):
+ "Check that the command-line arguments are provided and correct"
if not os.path.exists(args.filepath):
print(f"The file '{args.filepath}' does not exist.", file=sys.stderr)
return None
@@ -33,14 +37,15 @@ def cli_args_valid(args):
return None
try:
- conn = Redis.from_url(args.redisurl)
- except ConnectionError as ce:
+ conn = Redis.from_url(args.redisurl) # pylint: disable=[unused-variable]
+ except ConnectionError as conn_err: # pylint: disable=[unused-variable]
print(traceback.format_exc(), file=sys.stderr)
return None
return args
def process_cli_arguments():
+ """Setup command-line parser"""
parser = cli_argument_parser()
parser.prog = "worker"
parser.add_argument(
@@ -50,12 +55,14 @@ def process_cli_arguments():
return cli_args_valid(parser.parse_args())
def stream_error(redis_conn, job_id, error):
+ """Update redis with the most current error(s) found"""
errors = jsonpickle.decode(
redis_conn.hget(job_id, key="errors") or jsonpickle.encode(tuple()))
redis_conn.hset(
job_id, key="errors", value=jsonpickle.encode(errors + (error,)))
def make_user_aborted(redis_conn, job_id):
+ """Mkae function that checks whether the user aborted the process"""
def __aborted__():
user_aborted = bool(int(
redis_conn.hget(name=job_id, key="user_aborted") or "0"))
@@ -66,10 +73,12 @@ def make_user_aborted(redis_conn, job_id):
return __aborted__
def get_zipfile_size(filepath):
+ "Compute size of given zipfile"
with ZipFile(filepath, "r") as zfile:
return zfile.infolist()[0].file_size
def main():
+ "entry point to the script"
args = process_cli_arguments()
if args is None:
print("Quiting due to errors!", file=sys.stderr)
diff --git a/setup.py b/setup.py
index 6068493..24c0a25 100644
--- a/setup.py
+++ b/setup.py
@@ -1,3 +1,4 @@
+"""The setup module"""
from setuptools import setup
setup()