about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-10-22 13:22:17 -0500
committerFrederick Muriuki Muriithi2024-10-22 13:22:17 -0500
commit01dadbc45d3bc4ae184d8b4a5f64c1cc6538b2e9 (patch)
tree1118c351d212e6310af33fea51155913b67ca44a /scripts
parent84bbf8736ed017b24affb7e931207dafc4ae4780 (diff)
downloadgn-uploader-01dadbc45d3bc4ae184d8b4a5f64c1cc6538b2e9.tar.gz
Refactor `qc_pheno_file` and reuse it for different file types.
The QC/QA steps taken by the `qc_pheno_file` function are very
similar across the "pheno", "phenose" and "phenonum" files. This
commit makes the `qc_pheno_file` function a higher-order function and
we pass the file-type specific check(s) as a callable (function) to be
used for the QC/QA process.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/rqtl2/phenotypes_qc.py63
1 files changed, 52 insertions, 11 deletions
diff --git a/scripts/rqtl2/phenotypes_qc.py b/scripts/rqtl2/phenotypes_qc.py
index 668fca0..ccd2110 100644
--- a/scripts/rqtl2/phenotypes_qc.py
+++ b/scripts/rqtl2/phenotypes_qc.py
@@ -4,9 +4,9 @@ import shutil
 import tempfile
 from pathlib import Path
 from zipfile import ZipFile
-from functools import reduce
 import multiprocessing as mproc
-from typing import Optional, Sequence
+from functools import reduce, partial
+from typing import Callable, Optional, Sequence
 from logging import Logger, getLogger, StreamHandler
 
 import MySQLdb as mdb
@@ -183,13 +183,32 @@ def decimal_points_error(
     return None
 
 
+def integer_error(
+        filename: str,
+        rowtitle: str,
+        coltitle: str,
+        cellvalue: str,
+        message: str,
+        decimal_places: int = 1
+) -> Optional[InvalidValue]:
+    """Returns an error if the value does not meet the checks."""
+    try:
+        value = int(cellvalue)
+        if value <= 0:
+            raise ValueError("Must be a non-zero, positive number.")
+        return None
+    except ValueError as _verr:
+        return InvalidValue(filename, rowtitle, coltitle, cellvalue, message)
+
+
 def qc_pheno_file(
         filepath: Path,
         samples: tuple[str, ...],
         phenonames: tuple[str, ...],
         separator: str,
         comment_char: str,
-        na_strings: Sequence[str]
+        na_strings: Sequence[str],
+        error_fn: Callable = decimal_points_error
 ):
     """Run QC/QA on a `pheno` file."""
     _csvfile = rqtl2.read_csv_file(filepath, separator, comment_char)
@@ -220,13 +239,11 @@ def qc_pheno_file(
         for field, value in zip(_headings[1:], line[1:]):
             if value in na_strings:
                 continue
-            _err = decimal_points_error(
+            _err = error_fn(
                 filepath.name,
                 line[0],
                 field,
-                value,
-                ("Expected a non-negative number with at least one decimal "
-                 "place."))
+                value)
             _errs = _errs + ((_err,) if bool(_err) else tuple())
 
         return _errs, _lc+1
@@ -303,18 +320,42 @@ def run_qc(# pylint: disable=[too-many-arguments]
             for _file in cdata.get("phenocovar", [])))
             for name in names))
 
+        dec_err_fn = partial(decimal_points_error, message=(
+            "Expected a non-negative number with at least one decimal "
+            "place."))
         pheno_qc_res = merge_dicts(*pool.starmap(qc_pheno_file, tuple((
             extractiondir.joinpath(_file),
             samples,
             phenonames,
             cdata["sep"],
             cdata["comment.char"],
-            cdata["na.strings"]
+            cdata["na.strings"],
+            dec_err_fn
         ) for _file in cdata.get("pheno", []))))
 
-    #       - Check the 3 checks above for phenose and phenonum values too
-    # qc_phenose_files(…)
-    # qc_phenonum_files(…)
+        #       - Check the 3 checks above for phenose and phenonum values too
+        # qc_phenose_files(…)
+        # qc_phenonum_files(…)
+        phenose_qc_res = merge_dicts(*pool.starmap(qc_pheno_file, tuple((
+            extractiondir.joinpath(_file),
+            samples,
+            phenonames,
+            cdata["sep"],
+            cdata["comment.char"],
+            cdata["na.strings"],
+            dec_err_fn
+        ) for _file in cdata.get("phenose", []))))
+
+        phenonum_qc_res = merge_dicts(*pool.starmap(qc_pheno_file, tuple((
+            extractiondir.joinpath(_file),
+            samples,
+            phenonames,
+            cdata["sep"],
+            cdata["comment.char"],
+            cdata["na.strings"],
+            partial(integer_error, message=(
+                "Expected a non-negative, non-zero integer value."))
+        ) for _file in cdata.get("phenonum", []))))
 
     #       - Delete all extracted files
     shutil.rmtree(extractiondir)