aboutsummaryrefslogtreecommitdiff
path: root/scripts/qc.py
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-01-20 09:57:23 +0300
committerFrederick Muriuki Muriithi2024-01-20 09:57:23 +0300
commitcdd4dc456e56bb4eb055e1cb7f2518d45fb3bfb9 (patch)
tree73248acbadd5014f2b26da41da3098f1ac5ecc1e /scripts/qc.py
parent53b1e7cb181380a24aab4cbc7a9634b2d8dd2d29 (diff)
downloadgn-uploader-cdd4dc456e56bb4eb055e1cb7f2518d45fb3bfb9.tar.gz
Fetch sample/case names from database
Fetch the sample/case names from the database rather than from a static file in the repository. Issue: https://issues.genenetwork.org/issues/quality-control/read-samples-from-database-by-species
Diffstat (limited to 'scripts/qc.py')
-rw-r--r--scripts/qc.py45
1 files changed, 17 insertions, 28 deletions
diff --git a/scripts/qc.py b/scripts/qc.py
index 7d41d6c..e8573a9 100644
--- a/scripts/qc.py
+++ b/scripts/qc.py
@@ -1,9 +1,9 @@
"""Implements the command-line interface for the qc application"""
import os
import sys
-import argparse
import mimetypes
from typing import Union, Callable
+from argparse import ArgumentParser
from functional_tools import take
@@ -11,17 +11,18 @@ from quality_control.utils import make_progress_calculator
from quality_control.errors import InvalidValue, DuplicateHeading
from quality_control.parsing import FileType, strain_names, collect_errors
+from qc_app.db_utils import database_connection
+
+from .cli_parser import init_cli_parser
+
def is_file_mime(filepath:str, mimetype:str) -> bool:
"""Check that `filepath` has a mimetype of `mimetype` or `text/plain`"""
the_type = mimetypes.guess_type(filepath)[0]
return the_type in ("text/plain", mimetype)
-def cli_argument_parser():
- """Create the parser for the CLI arguments"""
- parser = argparse.ArgumentParser(
- prog="qc", description = (
- "Command-Line Interface program for quality control of data files"))
+def add_file_validation_arguments(parser: ArgumentParser) -> ArgumentParser:
+ """File validation specific CLI arguments."""
parser.add_argument(
"filetype",
help="The type of file to check",
@@ -32,14 +33,6 @@ def cli_argument_parser():
"The path to the file to be checked."
"If an absolute path is not provided, then the file will be relative to"
f"\t'{os.getcwd()}'"))
- default_strains_file = os.path.join(
- os.path.dirname(os.path.dirname(__file__)), "etc/strains.csv")
- parser.add_argument(
- "-s", "--strainsfile",
- help=(
- "Path to the file containing allowable strains/samples. "
- f"[default '{default_strains_file}']"),
- default=default_strains_file)
parser.add_argument(
"-c", "--count", type=int,
@@ -54,6 +47,14 @@ def cli_argument_parser():
default=False, action="store_true")
return parser
+def cli_argument_parser():
+ """Create the parser for the CLI arguments"""
+ theparser = init_cli_parser(
+ "qc",
+ "Command-Line Interface program for quality control of data files")
+ theparser.add_argument("speciesid", type=int, help="ID of the species.")
+ return add_file_validation_arguments(theparser)
+
def make_progress_indicator(
verbose: bool, progress_calc_fn: Callable) -> Union[Callable, None]:
"""Utility to display the progress"""
@@ -106,26 +107,14 @@ def main():
print(f"The file '{args.filepath}' does not exist.", file=sys.stderr)
return 1
- if not os.path.exists(args.strainsfile):
- print(f"The file '{args.strainsfile}' does not exist.", file=sys.stderr)
- return 2
-
if not is_file_mime(args.filepath, "text/tab-separated-values"):
print(
f"The file '{args.filepath}' MUST be a tab-separated file.",
file=sys.stderr)
return 3
- if not is_file_mime(args.strainsfile, "text/csv"):
- print(
- f"The file '{args.strainsfile}' MUST be a tab-separated file.",
- file=sys.stderr)
- return 4
-
- if args.verbose:
- print(f"Parsing the strain names from '{args.strainsfile}'")
-
- strains = strain_names(os.path.realpath(args.strainsfile))
+ with database_connection(args.databaseuri) as dbconn:
+ strains = strain_names(dbconn, args.speciesid)
filepath = os.path.realpath(args.filepath)
if args.verbose: