about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-01-03 16:53:40 +0300
committerFrederick Muriuki Muriithi2024-01-03 16:53:40 +0300
commit3e51fb24fd34bab2164a5f4056bc78d21827ca29 (patch)
tree690a4f7453dd276ccb90c7101140117fdeb6f9e7
parent95d2b868adebbc7ebbc2435f9184c30c014ec513 (diff)
downloadgn-uploader-3e51fb24fd34bab2164a5f4056bc78d21827ca29.tar.gz
Use generic parser. Remove obsoleted functions.
-rw-r--r--r_qtl/r_qtl2.py72
-rw-r--r--tests/r_qtl/test_r_qtl2_geno.py8
-rw-r--r--tests/r_qtl/test_r_qtl2_gmap.py12
-rw-r--r--tests/r_qtl/test_r_qtl2_pheno.py4
4 files changed, 14 insertions, 82 deletions
diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py
index 4dac24b..da7db22 100644
--- a/r_qtl/r_qtl2.py
+++ b/r_qtl/r_qtl2.py
@@ -134,78 +134,6 @@ def make_process_data_geno(cdata) -> tuple[
             for items in zip(ids, vals[1:]))
     return (__non_transposed__, __transposed__)
 
-def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]:
-    """Load the genotype file, making use of the control data."""
-    def replace_genotype_codes(val):
-        return cdata["genotypes"].get(val, val)
-
-    def replace_na_strings(val):
-        nastrings = cdata.get("na.strings")
-        if bool(nastrings):
-            return (None if val in nastrings else val)
-        return val
-
-    if not cdata.get("geno_transposed", False):
-        for line in with_non_transposed(
-                zfile,
-                "geno",
-                cdata,
-                lambda row: {
-                    key: thread_op(value, replace_genotype_codes, replace_na_strings)
-                    for key,value in row.items()
-                }):
-            yield line
-        return None
-
-    def __merge__(key, samples, line):
-        marker = line[0]
-        return tuple(
-            dict(zip(
-                [key, marker],
-                (thread_op(item, replace_genotype_codes, replace_na_strings)
-                 for item in items)))
-            for items in zip(samples, line[1:]))
-
-    for row in with_transposed(zfile, "geno", cdata, __merge__):
-        yield row
-
-def map_data(zfile: ZipFile, map_type: str, cdata: dict) -> Iterator[dict]:
-    """Read gmap files to get the genome mapping data"""
-    assert map_type in ("genetic-map", "physical-map"), "Invalid map type"
-    map_file_key = {
-        "genetic-map": "gmap",
-        "physical-map": "pmap"
-    }[map_type]
-    transposed_dict = {
-        "genetic-map": "gmap_transposed",
-        "physical-map": "pmap_transposed"
-    }
-    if not cdata.get(transposed_dict[map_type], False):
-        for row in with_non_transposed(zfile, map_file_key, cdata):
-            yield row
-        return None
-
-    def __merge__(key, samples, line):
-        marker = line[0]
-        return tuple(dict(zip([key, marker], items))
-                     for items in zip(samples, line[1:]))
-
-    for row in with_transposed(zfile, map_file_key, cdata, __merge__):
-        yield row
-
-def phenotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]:
-    """Load phenotype file data."""
-    if not cdata.get("pheno_transposed", False):
-        for row in with_non_transposed(zfile, "pheno", cdata, lambda val: val):
-            yield row
-        return
-
-    def __merge__(id_key, ids, vals):
-        return tuple(dict(zip([id_key, vals[0]], items))
-                     for items in zip(ids, vals[1:]))
-    for row in with_transposed(zfile, "pheno", cdata, __merge__):
-        yield row
-
 def __default_process_value_transposed__(
         id_key: str,
         ids: tuple[str, ...],
diff --git a/tests/r_qtl/test_r_qtl2_geno.py b/tests/r_qtl/test_r_qtl2_geno.py
index 787d13a..c33984e 100644
--- a/tests/r_qtl/test_r_qtl2_geno.py
+++ b/tests/r_qtl/test_r_qtl2_geno.py
@@ -171,8 +171,12 @@ def test_parse_geno_files(relpath, expected):
     THEN: ensure that the data we get is as expected
     """
     with ZipFile(Path(relpath).absolute(), "r") as zfile:
-        assert tuple(
-            rqtl2.genotype_data(zfile, rqtl2.control_data(zfile))) == expected
+        cdata = rqtl2.control_data(zfile)
+        assert tuple(rqtl2.file_data(
+            zfile,
+            "geno",
+            cdata,
+            *rqtl2.make_process_data_geno(cdata))) == expected
 
 @pytest.mark.unit_test
 @pytest.mark.parametrize(
diff --git a/tests/r_qtl/test_r_qtl2_gmap.py b/tests/r_qtl/test_r_qtl2_gmap.py
index ba46c42..5c8ca6a 100644
--- a/tests/r_qtl/test_r_qtl2_gmap.py
+++ b/tests/r_qtl/test_r_qtl2_gmap.py
@@ -10,28 +10,28 @@ from r_qtl import r_qtl2 as rqtl2
 @pytest.mark.parametrize(
     "relpath,mapfiletype,expected",
     (("tests/r_qtl/test_files/test_gmap.zip",
-      "genetic-map",
+      "gmap",
       ({"marker": "PVV4", "chr": "1", "pos": "0.000000"},
        {"marker": "AXR-1", "chr": "1", "pos": "6.250674"},
        {"marker": "HH.335C-Col/PhyA", "chr": "1", "pos": "9.303868"},
        {"marker": "EC.480C", "chr": "1", "pos": "12.577629"},
        {"marker": "EC.66C", "chr": "1", "pos": "18.392830"})),
      ("tests/r_qtl/test_files/test_gmap_transposed.zip",
-      "genetic-map",
+      "gmap",
       ({"marker": "PVV4", "chr": "1", "pos": "0.000000"},
        {"marker": "AXR-1", "chr": "1", "pos": "6.250674"},
        {"marker": "HH.335C-Col/PhyA", "chr": "1", "pos": "9.303868"},
        {"marker": "EC.480C", "chr": "1", "pos": "12.577629"},
        {"marker": "EC.66C", "chr": "1", "pos": "18.392830"})),
      ("tests/r_qtl/test_files/test_pmap.zip",
-      "physical-map",
+      "pmap",
       ({"marker": "D1Mit18", "chr": "1", "pos": "52.418656"},
        {"marker": "D1Mit80", "chr": "1", "pos": "86.377953"},
        {"marker": "D1Mit17", "chr": "1", "pos": "189.571337"},
        {"marker": "D2Mit379", "chr": "2", "pos": "37.451062"},
        {"marker": "D2Mit75", "chr": "2", "pos": "80.584782"})),
      ("tests/r_qtl/test_files/test_pmap_transposed.zip",
-      "physical-map",
+      "pmap",
       ({"marker": "D1Mit18", "chr": "1", "pos": "52.418656"},
        {"marker": "D1Mit80", "chr": "1", "pos": "86.377953"},
        {"marker": "D1Mit17", "chr": "1", "pos": "189.571337"},
@@ -44,5 +44,5 @@ def test_parse_map_files(relpath, mapfiletype, expected):
     THEN: ensure the parsed data is as expected.
     """
     with ZipFile(Path(relpath).absolute(), "r") as zfile:
-        assert tuple(rqtl2.map_data(
-            zfile, mapfiletype, rqtl2.control_data(zfile))) == expected
+        cdata = rqtl2.control_data(zfile)
+        assert tuple(rqtl2.file_data(zfile, mapfiletype, cdata)) == expected
diff --git a/tests/r_qtl/test_r_qtl2_pheno.py b/tests/r_qtl/test_r_qtl2_pheno.py
index 554d9c8..46be469 100644
--- a/tests/r_qtl/test_r_qtl2_pheno.py
+++ b/tests/r_qtl/test_r_qtl2_pheno.py
@@ -29,5 +29,5 @@ def test_parse_pheno_files(filepath, expected):
     THEN: verify the parsed data is as expected
     """
     with ZipFile(Path(filepath).absolute(), "r") as zfile:
-        assert tuple(
-            rqtl2.phenotype_data(zfile, rqtl2.control_data(zfile))) == expected
+        cdata = rqtl2.control_data(zfile)
+        assert tuple(rqtl2.file_data(zfile, "pheno", cdata)) == expected