about summary refs log tree commit diff
path: root/qc.py
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2022-04-27 17:37:22 +0300
committerFrederick Muriuki Muriithi2022-04-27 17:37:22 +0300
commit903af1c0b1f2cc695ea4e0c31438f9205571d15d (patch)
tree08b32ee9676bd8ed2456b39895333fdff45b0dd2 /qc.py
parentea70b7a7db42d51fa7f22f3dcb6d2aca6d8a795d (diff)
downloadgn-uploader-903af1c0b1f2cc695ea4e0c31438f9205571d15d.tar.gz
Implement command-line interface for QC of files
Diffstat (limited to 'qc.py')
-rw-r--r--qc.py108
1 files changed, 108 insertions, 0 deletions
diff --git a/qc.py b/qc.py
new file mode 100644
index 0000000..fee74cb
--- /dev/null
+++ b/qc.py
@@ -0,0 +1,108 @@
+"""Implements the command-line interface for the qc application"""
+import os
+import sys
+import argparse
+
+import magic
+
+from quality_control.errors import ParseError
+from quality_control.parsing import (
+    FileType,
+    parse_file,
+    strain_names,
+    parse_errors,
+    parse_strains)
+
+
+def is_file_mime(filepath, mimetype):
+    """Check that `filepath` has a mimetype of `mimetype` or `text/plain`"""
+    return magic.from_file(filepath, mime=True) in ("text/plain", mimetype)
+
+def cli_argument_parser():
+    """Create the parser for the CLI arguments"""
+    parser = argparse.ArgumentParser(
+        prog="qc", description = (
+            "Command-Line Interface program for quality control of data files"))
+    parser.add_argument(
+        "filetype",
+        help="The type of file to check",
+        choices=("average", "standard-error"))
+    parser.add_argument(
+        "filepath",
+        help=(
+            "The path to the file to be checked."
+            "If an absolute path is not provided, then the file will be relative to"
+            f"\t'{os.getcwd()}'"))
+    default_strains_file = os.path.join(
+        os.path.dirname(__file__), "strains.csv")
+    parser.add_argument(
+        "-s", "--strainsfile",
+        help=(
+            "Path to the file containing allowable strains/samples. "
+            f"[default '{default_strains_file}']"),
+        default=default_strains_file)
+
+    parser.add_argument(
+        "-v", "--verbose",
+        help="Controls whether to show extra output",
+        default=False, action="store_true")
+    return parser
+
+def check(filepath, filetype, strains, verbose=False):
+    """Check the file and print out results"""
+    try:
+        for line_num, line in enumerate(parse_file(
+                filepath, filetype, strains), start=1):
+            if verbose:
+                print(f"Checked line: {line_num}")
+
+        print(f"Successfully checked the file. No errors found.")
+    except ParseError as pe:
+        print("line\terrors")
+        for line_num, error in enumerate(
+                parse_errors(filepath, filetype, strains,
+                             pe.args[0]["line_number"]),
+                start = pe.args[0]["line_number"] + 1):
+            print(f"{line_num}\t{' '.join(error['message'])}")
+            
+
+def main():
+    """Entry point function"""
+    argparser = cli_argument_parser()
+    args = argparser.parse_args()
+    if not os.path.exists(args.filepath):
+        print("The file '{args.filepath}' does not exist.", file=sys.stderr)
+        return 1
+
+    if not os.path.exists(args.strainsfile):
+        print("The file '{args.strainsfile}' does not exist.", file=sys.stderr)
+        return 2
+
+    if not is_file_mime(args.filepath, "text/tab-separated-values"):
+        print(
+            f"The file '{args.filepath}' MUST be a tab-separated file.",
+            file=sys.stderr)
+        return 3
+
+    if not is_file_mime(args.strainsfile, "text/csv"):
+        print(
+            f"The file '{args.strainsfile}' MUST be a tab-separated file.",
+            file=sys.stderr)
+        return 4
+
+    if args.verbose:
+        print(f"Parsing the strain names from '{args.strainsfile}'")
+
+    strains = strain_names(parse_strains(os.path.realpath(args.strainsfile)))
+
+    filepath = os.path.realpath(args.filepath)
+    if args.verbose:
+        print(f"Checking '{filepath}' for errors")
+
+    check(
+        filepath, (
+            FileType.AVERAGE if args.filetype == "average"
+            else FileType.STANDARD_ERROR), strains)
+
+if __name__ == "__main__":
+    main()