From 9322da0f79dfa4c3f9f899f5a861ce302ce21e9c Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Wed, 10 Jan 2024 04:44:53 +0300 Subject: Make identifier column name explicit Since the R/qtl2 bundle generator could name the identifier column anything, this commit converts the incoming identifier column name into something explicit that we know and can use. --- r_qtl/r_qtl2.py | 30 ++++++++++++++++++++++----- tests/r_qtl/test_r_qtl2_map_files.py | 40 ++++++++++++++++++------------------ tests/r_qtl/test_r_qtl2_pheno.py | 20 +++++++++--------- 3 files changed, 55 insertions(+), 35 deletions(-) diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py index d3a3805..d8231bb 100644 --- a/r_qtl/r_qtl2.py +++ b/r_qtl/r_qtl2.py @@ -48,12 +48,26 @@ def with_non_transposed(zfile: ZipFile, def not_comment_line(line): return not line.startswith(cdata.get("comment.char", "#")) + sep = cdata.get("sep", ",") with zfile.open(cdata[member_key]) as innerfile: - reader = csv.DictReader( - filter(not_comment_line, io.TextIOWrapper(innerfile)), - delimiter=cdata.get("sep", ",")) + wrapped_file = io.TextIOWrapper(innerfile) + firstrow = tuple( + field.strip() for field in + next(filter(not_comment_line, wrapped_file)).strip().split(sep)) + id_key = firstrow[0] + wrapped_file.seek(0) + reader = csv.DictReader(filter(not_comment_line, wrapped_file), + delimiter=sep) for row in reader: - yield process_value(row) + processed = process_value(row) + yield { + "id": processed[id_key], + **{ + key: value + for key, value in processed.items() + if key != id_key + } + } def __make_organise_by_id__(id_key): """Return a function to use with `reduce` to organise values by some @@ -101,7 +115,13 @@ def with_transposed(zfile: ZipFile, for line in batch for row in process_value(id_key, headers, line)), {}).items(): - yield row + yield { + "id": row[id_key], + **{ + key: value + for key, value in row.items() + if key != id_key + }} except StopIteration: pass diff --git a/tests/r_qtl/test_r_qtl2_map_files.py b/tests/r_qtl/test_r_qtl2_map_files.py index 5c8ca6a..3427ffb 100644 --- a/tests/r_qtl/test_r_qtl2_map_files.py +++ b/tests/r_qtl/test_r_qtl2_map_files.py @@ -11,32 +11,32 @@ from r_qtl import r_qtl2 as rqtl2 "relpath,mapfiletype,expected", (("tests/r_qtl/test_files/test_gmap.zip", "gmap", - ({"marker": "PVV4", "chr": "1", "pos": "0.000000"}, - {"marker": "AXR-1", "chr": "1", "pos": "6.250674"}, - {"marker": "HH.335C-Col/PhyA", "chr": "1", "pos": "9.303868"}, - {"marker": "EC.480C", "chr": "1", "pos": "12.577629"}, - {"marker": "EC.66C", "chr": "1", "pos": "18.392830"})), + ({"id": "PVV4", "chr": "1", "pos": "0.000000"}, + {"id": "AXR-1", "chr": "1", "pos": "6.250674"}, + {"id": "HH.335C-Col/PhyA", "chr": "1", "pos": "9.303868"}, + {"id": "EC.480C", "chr": "1", "pos": "12.577629"}, + {"id": "EC.66C", "chr": "1", "pos": "18.392830"})), ("tests/r_qtl/test_files/test_gmap_transposed.zip", "gmap", - ({"marker": "PVV4", "chr": "1", "pos": "0.000000"}, - {"marker": "AXR-1", "chr": "1", "pos": "6.250674"}, - {"marker": "HH.335C-Col/PhyA", "chr": "1", "pos": "9.303868"}, - {"marker": "EC.480C", "chr": "1", "pos": "12.577629"}, - {"marker": "EC.66C", "chr": "1", "pos": "18.392830"})), + ({"id": "PVV4", "chr": "1", "pos": "0.000000"}, + {"id": "AXR-1", "chr": "1", "pos": "6.250674"}, + {"id": "HH.335C-Col/PhyA", "chr": "1", "pos": "9.303868"}, + {"id": "EC.480C", "chr": "1", "pos": "12.577629"}, + {"id": "EC.66C", "chr": "1", "pos": "18.392830"})), ("tests/r_qtl/test_files/test_pmap.zip", "pmap", - ({"marker": "D1Mit18", "chr": "1", "pos": "52.418656"}, - {"marker": "D1Mit80", "chr": "1", "pos": "86.377953"}, - {"marker": "D1Mit17", "chr": "1", "pos": "189.571337"}, - {"marker": "D2Mit379", "chr": "2", "pos": "37.451062"}, - {"marker": "D2Mit75", "chr": "2", "pos": "80.584782"})), + ({"id": "D1Mit18", "chr": "1", "pos": "52.418656"}, + {"id": "D1Mit80", "chr": "1", "pos": "86.377953"}, + {"id": "D1Mit17", "chr": "1", "pos": "189.571337"}, + {"id": "D2Mit379", "chr": "2", "pos": "37.451062"}, + {"id": "D2Mit75", "chr": "2", "pos": "80.584782"})), ("tests/r_qtl/test_files/test_pmap_transposed.zip", "pmap", - ({"marker": "D1Mit18", "chr": "1", "pos": "52.418656"}, - {"marker": "D1Mit80", "chr": "1", "pos": "86.377953"}, - {"marker": "D1Mit17", "chr": "1", "pos": "189.571337"}, - {"marker": "D2Mit379", "chr": "2", "pos": "37.451062"}, - {"marker": "D2Mit75", "chr": "2", "pos": "80.584782"})))) + ({"id": "D1Mit18", "chr": "1", "pos": "52.418656"}, + {"id": "D1Mit80", "chr": "1", "pos": "86.377953"}, + {"id": "D1Mit17", "chr": "1", "pos": "189.571337"}, + {"id": "D2Mit379", "chr": "2", "pos": "37.451062"}, + {"id": "D2Mit75", "chr": "2", "pos": "80.584782"})))) def test_parse_map_files(relpath, mapfiletype, expected): """ GIVEN: A path to a zip file, `relpath` and the type of the map file, diff --git a/tests/r_qtl/test_r_qtl2_pheno.py b/tests/r_qtl/test_r_qtl2_pheno.py index 66850a0..a7de675 100644 --- a/tests/r_qtl/test_r_qtl2_pheno.py +++ b/tests/r_qtl/test_r_qtl2_pheno.py @@ -36,17 +36,17 @@ def test_parse_pheno_files(filepath, expected): @pytest.mark.parametrize( "filepath,expected", (("tests/r_qtl/test_files/test_phenocovar.zip", - ({"pheno": "T0", "time (hrs)": "0"}, - {"pheno": "T2", "time (hrs)": "0.0333333333333333"}, - {"pheno": "T4", "time (hrs)": "0.0666666666666667"}, - {"pheno": "T6", "time (hrs)": "0.1"}, - {"pheno": "T8", "time (hrs)": "0.133333333333333"})), + ({"id": "T0", "time (hrs)": "0"}, + {"id": "T2", "time (hrs)": "0.0333333333333333"}, + {"id": "T4", "time (hrs)": "0.0666666666666667"}, + {"id": "T6", "time (hrs)": "0.1"}, + {"id": "T8", "time (hrs)": "0.133333333333333"})), ("tests/r_qtl/test_files/test_phenocovar_transposed.zip", - ({"pheno": "T0", "time (hrs)": "0"}, - {"pheno": "T2", "time (hrs)": "0.0333333333333333"}, - {"pheno": "T4", "time (hrs)": "0.0666666666666667"}, - {"pheno": "T6", "time (hrs)": "0.1"}, - {"pheno": "T8", "time (hrs)": "0.133333333333333"})))) + ({"id": "T0", "time (hrs)": "0"}, + {"id": "T2", "time (hrs)": "0.0333333333333333"}, + {"id": "T4", "time (hrs)": "0.0666666666666667"}, + {"id": "T6", "time (hrs)": "0.1"}, + {"id": "T8", "time (hrs)": "0.133333333333333"})))) def test_parse_phenocovar_files(filepath, expected): """Test parsing of 'phenocovar' files from the R/qtl2 bundle. -- cgit v1.2.3