about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--scripts/qc.py66
1 files changed, 45 insertions, 21 deletions
diff --git a/scripts/qc.py b/scripts/qc.py
index 688147c..9d57f4b 100644
--- a/scripts/qc.py
+++ b/scripts/qc.py
@@ -5,12 +5,12 @@ import argparse
 
 import magic
 
-from quality_control.errors import ParseError
+from quality_control.errors import InvalidValue
 from quality_control.parsing import (
+    take,
     FileType,
-    parse_file,
     strain_names,
-    parse_errors)
+    collect_errors)
 
 
 def is_file_mime(filepath, mimetype):
@@ -42,28 +42,51 @@ def cli_argument_parser():
         default=default_strains_file)
 
     parser.add_argument(
+        "-c", "--count", type=int,
+        help=(
+            "Number of errors to display. "
+            "A negative number means display all errors."),
+        default=20)
+
+    parser.add_argument(
         "-v", "--verbose",
         help="Controls whether to show extra output",
         default=False, action="store_true")
     return parser
 
-def check(filepath, filetype, strains, verbose=False):
+def progress_indicator(msg):
+    """Utility to display the progress"""
+    print(f"LINE: {msg['line_number']} ({msg['percent']:.2f}%)", end="\r")
+    return msg
+
+def print_errors(errors, verbose):
+    """Print out the errors"""
+    errors_exist = False
+    starter = "\n" if verbose else ""
+    print(f"{starter}line(s)\tcolumn(s)\terrors")
+    for error in errors:
+        cols = (
+            error.column if isinstance(error, InvalidValue)
+            else ", ".join(str(col) for col in error.columns))
+        errors_exist = True
+        print(f"{starter}{error.line}\t{cols}\t{error.message}")
+
+    if not errors_exist:
+        print("No errors were found!")
+
+    return errors
+
+def check(filepath, filetype, strains, count, verbose=False):
     """Check the file and print out results"""
-    try:
-        for line_num, line in enumerate(parse_file(
-                filepath, filetype, strains), start=1):
-            if verbose:
-                print(f"Checked line: {line_num}")
-
-        print(f"Successfully checked the file. No errors found.")
-    except ParseError as pe:
-        print("line\terrors")
-        for line_num, error in enumerate(
-                parse_errors(filepath, filetype, strains,
-                             pe.args[0]["line_number"]),
-                start = pe.args[0]["line_number"] + 1):
-            print(f"{line_num}\t{' '.join(error['message'])}")
-            
+    if verbose:
+        updater = progress_indicator
+
+    if count > 0:
+        return print_errors(
+            take(collect_errors(filepath, filetype, strains, updater), count),
+            verbose)
+    return print_errors(
+        collect_errors(filepath, filetype, strains, updater), verbose)
 
 def main():
     """Entry point function"""
@@ -98,10 +121,11 @@ def main():
     if args.verbose:
         print(f"Checking '{filepath}' for errors")
 
-    check(
+    return check(
         filepath, (
             FileType.AVERAGE if args.filetype == "average"
-            else FileType.STANDARD_ERROR), strains)
+            else FileType.STANDARD_ERROR),
+        strains, args.count, verbose=args.verbose)
 
 if __name__ == "__main__":
     main()