about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-08-08 17:15:40 -0500
committerFrederick Muriuki Muriithi2024-08-08 17:15:40 -0500
commit4468d5b2ba238975c67e71f41dc23f96f8811d00 (patch)
treea4dae03336c57086f95ef943a6b78e47feb4a75e
parent72f64a5a916221e079f2e06f85c50828dd41bf59 (diff)
downloadgn-uploader-4468d5b2ba238975c67e71f41dc23f96f8811d00.tar.gz
Function to transpose CSV files.
Some files come in a transposed form, so we need to transpose them
again in order to use the same processing code for all files.
-rw-r--r--r_qtl/r_qtl2.py31
1 files changed, 31 insertions, 0 deletions
diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py
index a34caac..23c016d 100644
--- a/r_qtl/r_qtl2.py
+++ b/r_qtl/r_qtl2.py
@@ -50,6 +50,37 @@ def extract(zfile: ZipFile, outputdir: Path) -> tuple[Path, ...]:
                  if not __special_file__(member))
 
 
+def transpose_csv(
+        inpath: Path,
+        linesplitterfn: Callable,
+        linejoinerfn: Callable,
+        outpath: Path):
+    """Transpose a file: Make its rows into columns and its columns into rows.
+
+    This function will create a new file, `outfile`, with the same content as
+    the original, `infile`, except transposed i.e. The rows of `infile` are the
+    columns of `outfile` and the columns of `infile` are the rows of `outfile`.
+
+    Parameters
+    ----------
+    inpath: The CSV file to transpose.
+    linesplitterfn: A function to use for splitting each line into columns
+    linejoinerfn: A function to use to rebuild the lines
+    outpath: The path where the transposed data is stored
+    """
+    def __read_by_line__(_path):
+        with open(_path, "r", encoding="utf8") as infile:
+            for line in infile:
+                yield line
+
+    transposed_data= (f"{linejoinerfn(items)}\n" for items in zip(*(
+        linesplitterfn(line) for line in __read_by_line__(inpath))))
+
+    with open(outpath, "w", encoding="utf8") as outfile:
+        for line in transposed_data:
+            outfile.write(line)
+
+
 def control_data(zfile: ZipFile) -> dict:
     """Retrieve the control file from the zip file info."""
     files = tuple(filename