about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--README.org29
-rw-r--r--manifest.scm1
-rw-r--r--qc.py108
-rw-r--r--quality_control/average.py2
-rw-r--r--quality_control/parsing.py3
-rw-r--r--quality_control/standard_error.py2
6 files changed, 141 insertions, 4 deletions
diff --git a/README.org b/README.org
index aef1a08..53ac63c 100644
--- a/README.org
+++ b/README.org
@@ -53,3 +53,32 @@ To check for correct type usage in the application, run:
 #+BEGIN_SRC shell
   mypy --show-error-codes .
 #+END_SRC
+
+** Running QC
+
+*** Command-Line Version
+
+To run qc against a file, the syntax is:
+#+BEGIN_SRC shell
+  python3 -m qc [--strainsfile <strainsfile-path>] [--verbose] <filetype> <filepath>
+#+END_SRC
+where
+- ~<filetype>~ is one of "*average*" or "*standard-error*"
+- ~<filepath>~ is either an absolute path to the file, or a path relative to the
+  current working directory
+- if the ~--strainsfile~ option is not provided, it will default to the one in
+  the root directory of this repository
+- the ~--verbose~ option is a flag, defaulting to ~False~ that controls the
+  display of optional progress messages
+
+To view the usage information for the application, run
+#+BEGIN_SRC shell
+  python3 -m qc --help
+#+END_SRC
+
+**** TODO Figure out how to put qc.py in a /bin or /scripts directory and still be able to import the modules in the repo
+**** TODO Reduce the command to simply ~qc [--strainsfile <strainsfile-path>] [--verbose] <filetype> <filepath>~
+
+*** Web Version
+
+Coming soon...
diff --git a/manifest.scm b/manifest.scm
index d93902a..9ec50a6 100644
--- a/manifest.scm
+++ b/manifest.scm
@@ -4,6 +4,7 @@
        "python-mypy"
        "python-redis"
        "python-flask"
+       "python-magic"
        "python-pylint"
        "python-pytest"
        "python-hypothesis"))
diff --git a/qc.py b/qc.py
new file mode 100644
index 0000000..fee74cb
--- /dev/null
+++ b/qc.py
@@ -0,0 +1,108 @@
+"""Implements the command-line interface for the qc application"""
+import os
+import sys
+import argparse
+
+import magic
+
+from quality_control.errors import ParseError
+from quality_control.parsing import (
+    FileType,
+    parse_file,
+    strain_names,
+    parse_errors,
+    parse_strains)
+
+
+def is_file_mime(filepath, mimetype):
+    """Check that `filepath` has a mimetype of `mimetype` or `text/plain`"""
+    return magic.from_file(filepath, mime=True) in ("text/plain", mimetype)
+
+def cli_argument_parser():
+    """Create the parser for the CLI arguments"""
+    parser = argparse.ArgumentParser(
+        prog="qc", description = (
+            "Command-Line Interface program for quality control of data files"))
+    parser.add_argument(
+        "filetype",
+        help="The type of file to check",
+        choices=("average", "standard-error"))
+    parser.add_argument(
+        "filepath",
+        help=(
+            "The path to the file to be checked."
+            "If an absolute path is not provided, then the file will be relative to"
+            f"\t'{os.getcwd()}'"))
+    default_strains_file = os.path.join(
+        os.path.dirname(__file__), "strains.csv")
+    parser.add_argument(
+        "-s", "--strainsfile",
+        help=(
+            "Path to the file containing allowable strains/samples. "
+            f"[default '{default_strains_file}']"),
+        default=default_strains_file)
+
+    parser.add_argument(
+        "-v", "--verbose",
+        help="Controls whether to show extra output",
+        default=False, action="store_true")
+    return parser
+
+def check(filepath, filetype, strains, verbose=False):
+    """Check the file and print out results"""
+    try:
+        for line_num, line in enumerate(parse_file(
+                filepath, filetype, strains), start=1):
+            if verbose:
+                print(f"Checked line: {line_num}")
+
+        print(f"Successfully checked the file. No errors found.")
+    except ParseError as pe:
+        print("line\terrors")
+        for line_num, error in enumerate(
+                parse_errors(filepath, filetype, strains,
+                             pe.args[0]["line_number"]),
+                start = pe.args[0]["line_number"] + 1):
+            print(f"{line_num}\t{' '.join(error['message'])}")
+            
+
+def main():
+    """Entry point function"""
+    argparser = cli_argument_parser()
+    args = argparser.parse_args()
+    if not os.path.exists(args.filepath):
+        print("The file '{args.filepath}' does not exist.", file=sys.stderr)
+        return 1
+
+    if not os.path.exists(args.strainsfile):
+        print("The file '{args.strainsfile}' does not exist.", file=sys.stderr)
+        return 2
+
+    if not is_file_mime(args.filepath, "text/tab-separated-values"):
+        print(
+            f"The file '{args.filepath}' MUST be a tab-separated file.",
+            file=sys.stderr)
+        return 3
+
+    if not is_file_mime(args.strainsfile, "text/csv"):
+        print(
+            f"The file '{args.strainsfile}' MUST be a tab-separated file.",
+            file=sys.stderr)
+        return 4
+
+    if args.verbose:
+        print(f"Parsing the strain names from '{args.strainsfile}'")
+
+    strains = strain_names(parse_strains(os.path.realpath(args.strainsfile)))
+
+    filepath = os.path.realpath(args.filepath)
+    if args.verbose:
+        print(f"Checking '{filepath}' for errors")
+
+    check(
+        filepath, (
+            FileType.AVERAGE if args.filetype == "average"
+            else FileType.STANDARD_ERROR), strains)
+
+if __name__ == "__main__":
+    main()
diff --git a/quality_control/average.py b/quality_control/average.py
index 3261e1c..2907b9c 100644
--- a/quality_control/average.py
+++ b/quality_control/average.py
@@ -8,6 +8,6 @@ def valid_value(val):
     if re.search(r"^[0-9]+\.[0-9]{3}$", val):
         return float(val)
     raise InvalidCellValue(
-        f"Invalid value '{val}'.\n"
+        f"Invalid value '{val}'. "
         "Expected string representing a number with exactly three decimal "
         "places.")
diff --git a/quality_control/parsing.py b/quality_control/parsing.py
index a4edb0f..9fe88f1 100644
--- a/quality_control/parsing.py
+++ b/quality_control/parsing.py
@@ -62,7 +62,7 @@ def parse_file(filepath: str, filetype: FileType, strains: list):
         with open(filepath, encoding="utf-8") as input_file:
             for line_number, line in enumerate(input_file):
                 if line_number == 0:
-                    yield __parse_header(line, strains)
+                    yield __parse_header(line, strains), seek_pos + len(line)
                     seek_pos = seek_pos + len(line)
                     continue
 
@@ -83,7 +83,6 @@ def parse_file(filepath: str, filetype: FileType, strains: list):
 def parse_errors(filepath: str, filetype: FileType, strains: list,
                  seek_pos: int = 0) -> Generator:
     """Retrieve ALL the parse errors"""
-    print(f"seek_pos: {seek_pos}, {type(seek_pos)}")
     assert seek_pos >= 0, "The seek position must be at least zero (0)"
 
     def __error_type(error):
diff --git a/quality_control/standard_error.py b/quality_control/standard_error.py
index 805c30e..f1e33c4 100644
--- a/quality_control/standard_error.py
+++ b/quality_control/standard_error.py
@@ -8,6 +8,6 @@ def valid_value(val):
     if re.search(r"^[0-9]+\.[0-9]{6,}$", val):
         return float(val)
     raise InvalidCellValue(
-        f"Invalid value '{val}'.\n"
+        f"Invalid value '{val}'. "
         "Expected string representing a number with at least six decimal "
         "places.")