From cdd4dc456e56bb4eb055e1cb7f2518d45fb3bfb9 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Sat, 20 Jan 2024 09:57:23 +0300 Subject: Fetch sample/case names from database Fetch the sample/case names from the database rather than from a static file in the repository. Issue: https://issues.genenetwork.org/issues/quality-control/read-samples-from-database-by-species --- scripts/qc.py | 45 +++++++++++++++++---------------------------- 1 file changed, 17 insertions(+), 28 deletions(-) (limited to 'scripts/qc.py') diff --git a/scripts/qc.py b/scripts/qc.py index 7d41d6c..e8573a9 100644 --- a/scripts/qc.py +++ b/scripts/qc.py @@ -1,9 +1,9 @@ """Implements the command-line interface for the qc application""" import os import sys -import argparse import mimetypes from typing import Union, Callable +from argparse import ArgumentParser from functional_tools import take @@ -11,17 +11,18 @@ from quality_control.utils import make_progress_calculator from quality_control.errors import InvalidValue, DuplicateHeading from quality_control.parsing import FileType, strain_names, collect_errors +from qc_app.db_utils import database_connection + +from .cli_parser import init_cli_parser + def is_file_mime(filepath:str, mimetype:str) -> bool: """Check that `filepath` has a mimetype of `mimetype` or `text/plain`""" the_type = mimetypes.guess_type(filepath)[0] return the_type in ("text/plain", mimetype) -def cli_argument_parser(): - """Create the parser for the CLI arguments""" - parser = argparse.ArgumentParser( - prog="qc", description = ( - "Command-Line Interface program for quality control of data files")) +def add_file_validation_arguments(parser: ArgumentParser) -> ArgumentParser: + """File validation specific CLI arguments.""" parser.add_argument( "filetype", help="The type of file to check", @@ -32,14 +33,6 @@ def cli_argument_parser(): "The path to the file to be checked." "If an absolute path is not provided, then the file will be relative to" f"\t'{os.getcwd()}'")) - default_strains_file = os.path.join( - os.path.dirname(os.path.dirname(__file__)), "etc/strains.csv") - parser.add_argument( - "-s", "--strainsfile", - help=( - "Path to the file containing allowable strains/samples. " - f"[default '{default_strains_file}']"), - default=default_strains_file) parser.add_argument( "-c", "--count", type=int, @@ -54,6 +47,14 @@ def cli_argument_parser(): default=False, action="store_true") return parser +def cli_argument_parser(): + """Create the parser for the CLI arguments""" + theparser = init_cli_parser( + "qc", + "Command-Line Interface program for quality control of data files") + theparser.add_argument("speciesid", type=int, help="ID of the species.") + return add_file_validation_arguments(theparser) + def make_progress_indicator( verbose: bool, progress_calc_fn: Callable) -> Union[Callable, None]: """Utility to display the progress""" @@ -106,26 +107,14 @@ def main(): print(f"The file '{args.filepath}' does not exist.", file=sys.stderr) return 1 - if not os.path.exists(args.strainsfile): - print(f"The file '{args.strainsfile}' does not exist.", file=sys.stderr) - return 2 - if not is_file_mime(args.filepath, "text/tab-separated-values"): print( f"The file '{args.filepath}' MUST be a tab-separated file.", file=sys.stderr) return 3 - if not is_file_mime(args.strainsfile, "text/csv"): - print( - f"The file '{args.strainsfile}' MUST be a tab-separated file.", - file=sys.stderr) - return 4 - - if args.verbose: - print(f"Parsing the strain names from '{args.strainsfile}'") - - strains = strain_names(os.path.realpath(args.strainsfile)) + with database_connection(args.databaseuri) as dbconn: + strains = strain_names(dbconn, args.speciesid) filepath = os.path.realpath(args.filepath) if args.verbose: -- cgit v1.2.3