aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-08-08 17:15:40 -0500
committerFrederick Muriuki Muriithi2024-08-08 17:15:40 -0500
commit4468d5b2ba238975c67e71f41dc23f96f8811d00 (patch)
treea4dae03336c57086f95ef943a6b78e47feb4a75e
parent72f64a5a916221e079f2e06f85c50828dd41bf59 (diff)
downloadgn-uploader-4468d5b2ba238975c67e71f41dc23f96f8811d00.tar.gz
Function to transpose CSV files.
Some files come in a transposed form, so we need to transpose them again in order to use the same processing code for all files.
-rw-r--r--r_qtl/r_qtl2.py31
1 files changed, 31 insertions, 0 deletions
diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py
index a34caac..23c016d 100644
--- a/r_qtl/r_qtl2.py
+++ b/r_qtl/r_qtl2.py
@@ -50,6 +50,37 @@ def extract(zfile: ZipFile, outputdir: Path) -> tuple[Path, ...]:
if not __special_file__(member))
+def transpose_csv(
+ inpath: Path,
+ linesplitterfn: Callable,
+ linejoinerfn: Callable,
+ outpath: Path):
+ """Transpose a file: Make its rows into columns and its columns into rows.
+
+ This function will create a new file, `outfile`, with the same content as
+ the original, `infile`, except transposed i.e. The rows of `infile` are the
+ columns of `outfile` and the columns of `infile` are the rows of `outfile`.
+
+ Parameters
+ ----------
+ inpath: The CSV file to transpose.
+ linesplitterfn: A function to use for splitting each line into columns
+ linejoinerfn: A function to use to rebuild the lines
+ outpath: The path where the transposed data is stored
+ """
+ def __read_by_line__(_path):
+ with open(_path, "r", encoding="utf8") as infile:
+ for line in infile:
+ yield line
+
+ transposed_data= (f"{linejoinerfn(items)}\n" for items in zip(*(
+ linesplitterfn(line) for line in __read_by_line__(inpath))))
+
+ with open(outpath, "w", encoding="utf8") as outfile:
+ for line in transposed_data:
+ outfile.write(line)
+
+
def control_data(zfile: ZipFile) -> dict:
"""Retrieve the control file from the zip file info."""
files = tuple(filename