aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-01-03 16:53:40 +0300
committerFrederick Muriuki Muriithi2024-01-03 16:53:40 +0300
commit3e51fb24fd34bab2164a5f4056bc78d21827ca29 (patch)
tree690a4f7453dd276ccb90c7101140117fdeb6f9e7
parent95d2b868adebbc7ebbc2435f9184c30c014ec513 (diff)
downloadgn-uploader-3e51fb24fd34bab2164a5f4056bc78d21827ca29.tar.gz
Use generic parser. Remove obsoleted functions.
-rw-r--r--r_qtl/r_qtl2.py72
-rw-r--r--tests/r_qtl/test_r_qtl2_geno.py8
-rw-r--r--tests/r_qtl/test_r_qtl2_gmap.py12
-rw-r--r--tests/r_qtl/test_r_qtl2_pheno.py4
4 files changed, 14 insertions, 82 deletions
diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py
index 4dac24b..da7db22 100644
--- a/r_qtl/r_qtl2.py
+++ b/r_qtl/r_qtl2.py
@@ -134,78 +134,6 @@ def make_process_data_geno(cdata) -> tuple[
for items in zip(ids, vals[1:]))
return (__non_transposed__, __transposed__)
-def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]:
- """Load the genotype file, making use of the control data."""
- def replace_genotype_codes(val):
- return cdata["genotypes"].get(val, val)
-
- def replace_na_strings(val):
- nastrings = cdata.get("na.strings")
- if bool(nastrings):
- return (None if val in nastrings else val)
- return val
-
- if not cdata.get("geno_transposed", False):
- for line in with_non_transposed(
- zfile,
- "geno",
- cdata,
- lambda row: {
- key: thread_op(value, replace_genotype_codes, replace_na_strings)
- for key,value in row.items()
- }):
- yield line
- return None
-
- def __merge__(key, samples, line):
- marker = line[0]
- return tuple(
- dict(zip(
- [key, marker],
- (thread_op(item, replace_genotype_codes, replace_na_strings)
- for item in items)))
- for items in zip(samples, line[1:]))
-
- for row in with_transposed(zfile, "geno", cdata, __merge__):
- yield row
-
-def map_data(zfile: ZipFile, map_type: str, cdata: dict) -> Iterator[dict]:
- """Read gmap files to get the genome mapping data"""
- assert map_type in ("genetic-map", "physical-map"), "Invalid map type"
- map_file_key = {
- "genetic-map": "gmap",
- "physical-map": "pmap"
- }[map_type]
- transposed_dict = {
- "genetic-map": "gmap_transposed",
- "physical-map": "pmap_transposed"
- }
- if not cdata.get(transposed_dict[map_type], False):
- for row in with_non_transposed(zfile, map_file_key, cdata):
- yield row
- return None
-
- def __merge__(key, samples, line):
- marker = line[0]
- return tuple(dict(zip([key, marker], items))
- for items in zip(samples, line[1:]))
-
- for row in with_transposed(zfile, map_file_key, cdata, __merge__):
- yield row
-
-def phenotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]:
- """Load phenotype file data."""
- if not cdata.get("pheno_transposed", False):
- for row in with_non_transposed(zfile, "pheno", cdata, lambda val: val):
- yield row
- return
-
- def __merge__(id_key, ids, vals):
- return tuple(dict(zip([id_key, vals[0]], items))
- for items in zip(ids, vals[1:]))
- for row in with_transposed(zfile, "pheno", cdata, __merge__):
- yield row
-
def __default_process_value_transposed__(
id_key: str,
ids: tuple[str, ...],
diff --git a/tests/r_qtl/test_r_qtl2_geno.py b/tests/r_qtl/test_r_qtl2_geno.py
index 787d13a..c33984e 100644
--- a/tests/r_qtl/test_r_qtl2_geno.py
+++ b/tests/r_qtl/test_r_qtl2_geno.py
@@ -171,8 +171,12 @@ def test_parse_geno_files(relpath, expected):
THEN: ensure that the data we get is as expected
"""
with ZipFile(Path(relpath).absolute(), "r") as zfile:
- assert tuple(
- rqtl2.genotype_data(zfile, rqtl2.control_data(zfile))) == expected
+ cdata = rqtl2.control_data(zfile)
+ assert tuple(rqtl2.file_data(
+ zfile,
+ "geno",
+ cdata,
+ *rqtl2.make_process_data_geno(cdata))) == expected
@pytest.mark.unit_test
@pytest.mark.parametrize(
diff --git a/tests/r_qtl/test_r_qtl2_gmap.py b/tests/r_qtl/test_r_qtl2_gmap.py
index ba46c42..5c8ca6a 100644
--- a/tests/r_qtl/test_r_qtl2_gmap.py
+++ b/tests/r_qtl/test_r_qtl2_gmap.py
@@ -10,28 +10,28 @@ from r_qtl import r_qtl2 as rqtl2
@pytest.mark.parametrize(
"relpath,mapfiletype,expected",
(("tests/r_qtl/test_files/test_gmap.zip",
- "genetic-map",
+ "gmap",
({"marker": "PVV4", "chr": "1", "pos": "0.000000"},
{"marker": "AXR-1", "chr": "1", "pos": "6.250674"},
{"marker": "HH.335C-Col/PhyA", "chr": "1", "pos": "9.303868"},
{"marker": "EC.480C", "chr": "1", "pos": "12.577629"},
{"marker": "EC.66C", "chr": "1", "pos": "18.392830"})),
("tests/r_qtl/test_files/test_gmap_transposed.zip",
- "genetic-map",
+ "gmap",
({"marker": "PVV4", "chr": "1", "pos": "0.000000"},
{"marker": "AXR-1", "chr": "1", "pos": "6.250674"},
{"marker": "HH.335C-Col/PhyA", "chr": "1", "pos": "9.303868"},
{"marker": "EC.480C", "chr": "1", "pos": "12.577629"},
{"marker": "EC.66C", "chr": "1", "pos": "18.392830"})),
("tests/r_qtl/test_files/test_pmap.zip",
- "physical-map",
+ "pmap",
({"marker": "D1Mit18", "chr": "1", "pos": "52.418656"},
{"marker": "D1Mit80", "chr": "1", "pos": "86.377953"},
{"marker": "D1Mit17", "chr": "1", "pos": "189.571337"},
{"marker": "D2Mit379", "chr": "2", "pos": "37.451062"},
{"marker": "D2Mit75", "chr": "2", "pos": "80.584782"})),
("tests/r_qtl/test_files/test_pmap_transposed.zip",
- "physical-map",
+ "pmap",
({"marker": "D1Mit18", "chr": "1", "pos": "52.418656"},
{"marker": "D1Mit80", "chr": "1", "pos": "86.377953"},
{"marker": "D1Mit17", "chr": "1", "pos": "189.571337"},
@@ -44,5 +44,5 @@ def test_parse_map_files(relpath, mapfiletype, expected):
THEN: ensure the parsed data is as expected.
"""
with ZipFile(Path(relpath).absolute(), "r") as zfile:
- assert tuple(rqtl2.map_data(
- zfile, mapfiletype, rqtl2.control_data(zfile))) == expected
+ cdata = rqtl2.control_data(zfile)
+ assert tuple(rqtl2.file_data(zfile, mapfiletype, cdata)) == expected
diff --git a/tests/r_qtl/test_r_qtl2_pheno.py b/tests/r_qtl/test_r_qtl2_pheno.py
index 554d9c8..46be469 100644
--- a/tests/r_qtl/test_r_qtl2_pheno.py
+++ b/tests/r_qtl/test_r_qtl2_pheno.py
@@ -29,5 +29,5 @@ def test_parse_pheno_files(filepath, expected):
THEN: verify the parsed data is as expected
"""
with ZipFile(Path(filepath).absolute(), "r") as zfile:
- assert tuple(
- rqtl2.phenotype_data(zfile, rqtl2.control_data(zfile))) == expected
+ cdata = rqtl2.control_data(zfile)
+ assert tuple(rqtl2.file_data(zfile, "pheno", cdata)) == expected