diff options
Diffstat (limited to 'scripts/qc.py')
-rw-r--r-- | scripts/qc.py | 108 |
1 files changed, 108 insertions, 0 deletions
diff --git a/scripts/qc.py b/scripts/qc.py new file mode 100644 index 0000000..09758cb --- /dev/null +++ b/scripts/qc.py @@ -0,0 +1,108 @@ +"""Implements the command-line interface for the qc application""" +import os +import sys +import argparse + +import magic + +from quality_control.errors import ParseError +from quality_control.parsing import ( + FileType, + parse_file, + strain_names, + parse_errors, + parse_strains) + + +def is_file_mime(filepath, mimetype): + """Check that `filepath` has a mimetype of `mimetype` or `text/plain`""" + return magic.from_file(filepath, mime=True) in ("text/plain", mimetype) + +def cli_argument_parser(): + """Create the parser for the CLI arguments""" + parser = argparse.ArgumentParser( + prog="qc", description = ( + "Command-Line Interface program for quality control of data files")) + parser.add_argument( + "filetype", + help="The type of file to check", + choices=("average", "standard-error")) + parser.add_argument( + "filepath", + help=( + "The path to the file to be checked." + "If an absolute path is not provided, then the file will be relative to" + f"\t'{os.getcwd()}'")) + default_strains_file = os.path.join( + os.path.dirname(os.path.dirname(__file__)), "etc/strains.csv") + parser.add_argument( + "-s", "--strainsfile", + help=( + "Path to the file containing allowable strains/samples. " + f"[default '{default_strains_file}']"), + default=default_strains_file) + + parser.add_argument( + "-v", "--verbose", + help="Controls whether to show extra output", + default=False, action="store_true") + return parser + +def check(filepath, filetype, strains, verbose=False): + """Check the file and print out results""" + try: + for line_num, line in enumerate(parse_file( + filepath, filetype, strains), start=1): + if verbose: + print(f"Checked line: {line_num}") + + print(f"Successfully checked the file. No errors found.") + except ParseError as pe: + print("line\terrors") + for line_num, error in enumerate( + parse_errors(filepath, filetype, strains, + pe.args[0]["line_number"]), + start = pe.args[0]["line_number"] + 1): + print(f"{line_num}\t{' '.join(error['message'])}") + + +def main(): + """Entry point function""" + argparser = cli_argument_parser() + args = argparser.parse_args() + if not os.path.exists(args.filepath): + print("The file '{args.filepath}' does not exist.", file=sys.stderr) + return 1 + + if not os.path.exists(args.strainsfile): + print(f"The file '{args.strainsfile}' does not exist.", file=sys.stderr) + return 2 + + if not is_file_mime(args.filepath, "text/tab-separated-values"): + print( + f"The file '{args.filepath}' MUST be a tab-separated file.", + file=sys.stderr) + return 3 + + if not is_file_mime(args.strainsfile, "text/csv"): + print( + f"The file '{args.strainsfile}' MUST be a tab-separated file.", + file=sys.stderr) + return 4 + + if args.verbose: + print(f"Parsing the strain names from '{args.strainsfile}'") + + strains = strain_names(parse_strains(os.path.realpath(args.strainsfile))) + + filepath = os.path.realpath(args.filepath) + if args.verbose: + print(f"Checking '{filepath}' for errors") + + check( + filepath, ( + FileType.AVERAGE if args.filetype == "average" + else FileType.STANDARD_ERROR), strains) + +if __name__ == "__main__": + main() |