From aadd9aa5dd4c552b573828ddac581a8b7064b0e2 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Wed, 27 Apr 2022 19:06:45 +0300
Subject: Enable managing app via setup.py

While the application is developed with GNU Guix, the end user might
not be using it, and therefore, this commit provides a way for the
user to install the application with the usual python package
management systems.
---
 scripts/__init__.py |   0
 scripts/qc.py       | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 108 insertions(+)
 create mode 100644 scripts/__init__.py
 create mode 100644 scripts/qc.py

(limited to 'scripts')

diff --git a/scripts/__init__.py b/scripts/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/scripts/qc.py b/scripts/qc.py
new file mode 100644
index 0000000..09758cb
--- /dev/null
+++ b/scripts/qc.py
@@ -0,0 +1,108 @@
+"""Implements the command-line interface for the qc application"""
+import os
+import sys
+import argparse
+
+import magic
+
+from quality_control.errors import ParseError
+from quality_control.parsing import (
+    FileType,
+    parse_file,
+    strain_names,
+    parse_errors,
+    parse_strains)
+
+
+def is_file_mime(filepath, mimetype):
+    """Check that `filepath` has a mimetype of `mimetype` or `text/plain`"""
+    return magic.from_file(filepath, mime=True) in ("text/plain", mimetype)
+
+def cli_argument_parser():
+    """Create the parser for the CLI arguments"""
+    parser = argparse.ArgumentParser(
+        prog="qc", description = (
+            "Command-Line Interface program for quality control of data files"))
+    parser.add_argument(
+        "filetype",
+        help="The type of file to check",
+        choices=("average", "standard-error"))
+    parser.add_argument(
+        "filepath",
+        help=(
+            "The path to the file to be checked."
+            "If an absolute path is not provided, then the file will be relative to"
+            f"\t'{os.getcwd()}'"))
+    default_strains_file = os.path.join(
+        os.path.dirname(os.path.dirname(__file__)), "etc/strains.csv")
+    parser.add_argument(
+        "-s", "--strainsfile",
+        help=(
+            "Path to the file containing allowable strains/samples. "
+            f"[default '{default_strains_file}']"),
+        default=default_strains_file)
+
+    parser.add_argument(
+        "-v", "--verbose",
+        help="Controls whether to show extra output",
+        default=False, action="store_true")
+    return parser
+
+def check(filepath, filetype, strains, verbose=False):
+    """Check the file and print out results"""
+    try:
+        for line_num, line in enumerate(parse_file(
+                filepath, filetype, strains), start=1):
+            if verbose:
+                print(f"Checked line: {line_num}")
+
+        print(f"Successfully checked the file. No errors found.")
+    except ParseError as pe:
+        print("line\terrors")
+        for line_num, error in enumerate(
+                parse_errors(filepath, filetype, strains,
+                             pe.args[0]["line_number"]),
+                start = pe.args[0]["line_number"] + 1):
+            print(f"{line_num}\t{' '.join(error['message'])}")
+            
+
+def main():
+    """Entry point function"""
+    argparser = cli_argument_parser()
+    args = argparser.parse_args()
+    if not os.path.exists(args.filepath):
+        print("The file '{args.filepath}' does not exist.", file=sys.stderr)
+        return 1
+
+    if not os.path.exists(args.strainsfile):
+        print(f"The file '{args.strainsfile}' does not exist.", file=sys.stderr)
+        return 2
+
+    if not is_file_mime(args.filepath, "text/tab-separated-values"):
+        print(
+            f"The file '{args.filepath}' MUST be a tab-separated file.",
+            file=sys.stderr)
+        return 3
+
+    if not is_file_mime(args.strainsfile, "text/csv"):
+        print(
+            f"The file '{args.strainsfile}' MUST be a tab-separated file.",
+            file=sys.stderr)
+        return 4
+
+    if args.verbose:
+        print(f"Parsing the strain names from '{args.strainsfile}'")
+
+    strains = strain_names(parse_strains(os.path.realpath(args.strainsfile)))
+
+    filepath = os.path.realpath(args.filepath)
+    if args.verbose:
+        print(f"Checking '{filepath}' for errors")
+
+    check(
+        filepath, (
+            FileType.AVERAGE if args.filetype == "average"
+            else FileType.STANDARD_ERROR), strains)
+
+if __name__ == "__main__":
+    main()
-- 
cgit v1.2.3