about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-01-10 04:44:53 +0300
committerFrederick Muriuki Muriithi2024-01-10 04:44:53 +0300
commit9322da0f79dfa4c3f9f899f5a861ce302ce21e9c (patch)
treead3ac4b8d5791219392eba77067c2c0ce71e25e1
parent21078fecb698972062af3157a9a0f6e84bb8fd0d (diff)
downloadgn-uploader-9322da0f79dfa4c3f9f899f5a861ce302ce21e9c.tar.gz
Make identifier column name explicit
Since the R/qtl2 bundle generator could name the identifier column
anything, this commit converts the incoming identifier column name
into something explicit that we know and can use.
-rw-r--r--r_qtl/r_qtl2.py30
-rw-r--r--tests/r_qtl/test_r_qtl2_map_files.py40
-rw-r--r--tests/r_qtl/test_r_qtl2_pheno.py20
3 files changed, 55 insertions, 35 deletions
diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py
index d3a3805..d8231bb 100644
--- a/r_qtl/r_qtl2.py
+++ b/r_qtl/r_qtl2.py
@@ -48,12 +48,26 @@ def with_non_transposed(zfile: ZipFile,
     def not_comment_line(line):
         return not line.startswith(cdata.get("comment.char", "#"))
 
+    sep = cdata.get("sep", ",")
     with zfile.open(cdata[member_key]) as innerfile:
-        reader = csv.DictReader(
-            filter(not_comment_line, io.TextIOWrapper(innerfile)),
-            delimiter=cdata.get("sep", ","))
+        wrapped_file = io.TextIOWrapper(innerfile)
+        firstrow = tuple(
+            field.strip() for field in
+            next(filter(not_comment_line, wrapped_file)).strip().split(sep))
+        id_key = firstrow[0]
+        wrapped_file.seek(0)
+        reader = csv.DictReader(filter(not_comment_line, wrapped_file),
+                                delimiter=sep)
         for row in reader:
-            yield process_value(row)
+            processed = process_value(row)
+            yield {
+                "id": processed[id_key],
+                **{
+                    key: value
+                    for key, value in processed.items()
+                    if key != id_key
+                }
+            }
 
 def __make_organise_by_id__(id_key):
     """Return a function to use with `reduce` to organise values by some
@@ -101,7 +115,13 @@ def with_transposed(zfile: ZipFile,
                      for line in batch
                      for row in process_value(id_key, headers, line)),
                     {}).items():
-                yield row
+                yield {
+                    "id": row[id_key],
+                    **{
+                        key: value
+                        for key, value in row.items()
+                        if key != id_key
+                    }}
         except StopIteration:
             pass
 
diff --git a/tests/r_qtl/test_r_qtl2_map_files.py b/tests/r_qtl/test_r_qtl2_map_files.py
index 5c8ca6a..3427ffb 100644
--- a/tests/r_qtl/test_r_qtl2_map_files.py
+++ b/tests/r_qtl/test_r_qtl2_map_files.py
@@ -11,32 +11,32 @@ from r_qtl import r_qtl2 as rqtl2
     "relpath,mapfiletype,expected",
     (("tests/r_qtl/test_files/test_gmap.zip",
       "gmap",
-      ({"marker": "PVV4", "chr": "1", "pos": "0.000000"},
-       {"marker": "AXR-1", "chr": "1", "pos": "6.250674"},
-       {"marker": "HH.335C-Col/PhyA", "chr": "1", "pos": "9.303868"},
-       {"marker": "EC.480C", "chr": "1", "pos": "12.577629"},
-       {"marker": "EC.66C", "chr": "1", "pos": "18.392830"})),
+      ({"id": "PVV4", "chr": "1", "pos": "0.000000"},
+       {"id": "AXR-1", "chr": "1", "pos": "6.250674"},
+       {"id": "HH.335C-Col/PhyA", "chr": "1", "pos": "9.303868"},
+       {"id": "EC.480C", "chr": "1", "pos": "12.577629"},
+       {"id": "EC.66C", "chr": "1", "pos": "18.392830"})),
      ("tests/r_qtl/test_files/test_gmap_transposed.zip",
       "gmap",
-      ({"marker": "PVV4", "chr": "1", "pos": "0.000000"},
-       {"marker": "AXR-1", "chr": "1", "pos": "6.250674"},
-       {"marker": "HH.335C-Col/PhyA", "chr": "1", "pos": "9.303868"},
-       {"marker": "EC.480C", "chr": "1", "pos": "12.577629"},
-       {"marker": "EC.66C", "chr": "1", "pos": "18.392830"})),
+      ({"id": "PVV4", "chr": "1", "pos": "0.000000"},
+       {"id": "AXR-1", "chr": "1", "pos": "6.250674"},
+       {"id": "HH.335C-Col/PhyA", "chr": "1", "pos": "9.303868"},
+       {"id": "EC.480C", "chr": "1", "pos": "12.577629"},
+       {"id": "EC.66C", "chr": "1", "pos": "18.392830"})),
      ("tests/r_qtl/test_files/test_pmap.zip",
       "pmap",
-      ({"marker": "D1Mit18", "chr": "1", "pos": "52.418656"},
-       {"marker": "D1Mit80", "chr": "1", "pos": "86.377953"},
-       {"marker": "D1Mit17", "chr": "1", "pos": "189.571337"},
-       {"marker": "D2Mit379", "chr": "2", "pos": "37.451062"},
-       {"marker": "D2Mit75", "chr": "2", "pos": "80.584782"})),
+      ({"id": "D1Mit18", "chr": "1", "pos": "52.418656"},
+       {"id": "D1Mit80", "chr": "1", "pos": "86.377953"},
+       {"id": "D1Mit17", "chr": "1", "pos": "189.571337"},
+       {"id": "D2Mit379", "chr": "2", "pos": "37.451062"},
+       {"id": "D2Mit75", "chr": "2", "pos": "80.584782"})),
      ("tests/r_qtl/test_files/test_pmap_transposed.zip",
       "pmap",
-      ({"marker": "D1Mit18", "chr": "1", "pos": "52.418656"},
-       {"marker": "D1Mit80", "chr": "1", "pos": "86.377953"},
-       {"marker": "D1Mit17", "chr": "1", "pos": "189.571337"},
-       {"marker": "D2Mit379", "chr": "2", "pos": "37.451062"},
-       {"marker": "D2Mit75", "chr": "2", "pos": "80.584782"}))))
+      ({"id": "D1Mit18", "chr": "1", "pos": "52.418656"},
+       {"id": "D1Mit80", "chr": "1", "pos": "86.377953"},
+       {"id": "D1Mit17", "chr": "1", "pos": "189.571337"},
+       {"id": "D2Mit379", "chr": "2", "pos": "37.451062"},
+       {"id": "D2Mit75", "chr": "2", "pos": "80.584782"}))))
 def test_parse_map_files(relpath, mapfiletype, expected):
     """
     GIVEN: A path to a zip file, `relpath` and the type of the map file,
diff --git a/tests/r_qtl/test_r_qtl2_pheno.py b/tests/r_qtl/test_r_qtl2_pheno.py
index 66850a0..a7de675 100644
--- a/tests/r_qtl/test_r_qtl2_pheno.py
+++ b/tests/r_qtl/test_r_qtl2_pheno.py
@@ -36,17 +36,17 @@ def test_parse_pheno_files(filepath, expected):
 @pytest.mark.parametrize(
     "filepath,expected",
     (("tests/r_qtl/test_files/test_phenocovar.zip",
-      ({"pheno": "T0", "time (hrs)": "0"},
-       {"pheno": "T2", "time (hrs)": "0.0333333333333333"},
-       {"pheno": "T4", "time (hrs)": "0.0666666666666667"},
-       {"pheno": "T6", "time (hrs)": "0.1"},
-       {"pheno": "T8", "time (hrs)": "0.133333333333333"})),
+      ({"id": "T0", "time (hrs)": "0"},
+       {"id": "T2", "time (hrs)": "0.0333333333333333"},
+       {"id": "T4", "time (hrs)": "0.0666666666666667"},
+       {"id": "T6", "time (hrs)": "0.1"},
+       {"id": "T8", "time (hrs)": "0.133333333333333"})),
      ("tests/r_qtl/test_files/test_phenocovar_transposed.zip",
-      ({"pheno": "T0", "time (hrs)": "0"},
-       {"pheno": "T2", "time (hrs)": "0.0333333333333333"},
-       {"pheno": "T4", "time (hrs)": "0.0666666666666667"},
-       {"pheno": "T6", "time (hrs)": "0.1"},
-       {"pheno": "T8", "time (hrs)": "0.133333333333333"}))))
+      ({"id": "T0", "time (hrs)": "0"},
+       {"id": "T2", "time (hrs)": "0.0333333333333333"},
+       {"id": "T4", "time (hrs)": "0.0666666666666667"},
+       {"id": "T6", "time (hrs)": "0.1"},
+       {"id": "T8", "time (hrs)": "0.133333333333333"}))))
 def test_parse_phenocovar_files(filepath, expected):
     """Test parsing of 'phenocovar' files from the R/qtl2 bundle.