From 4468d5b2ba238975c67e71f41dc23f96f8811d00 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Thu, 8 Aug 2024 17:15:40 -0500 Subject: Function to transpose CSV files. Some files come in a transposed form, so we need to transpose them again in order to use the same processing code for all files. --- r_qtl/r_qtl2.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'r_qtl') diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py index a34caac..23c016d 100644 --- a/r_qtl/r_qtl2.py +++ b/r_qtl/r_qtl2.py @@ -50,6 +50,37 @@ def extract(zfile: ZipFile, outputdir: Path) -> tuple[Path, ...]: if not __special_file__(member)) +def transpose_csv( + inpath: Path, + linesplitterfn: Callable, + linejoinerfn: Callable, + outpath: Path): + """Transpose a file: Make its rows into columns and its columns into rows. + + This function will create a new file, `outfile`, with the same content as + the original, `infile`, except transposed i.e. The rows of `infile` are the + columns of `outfile` and the columns of `infile` are the rows of `outfile`. + + Parameters + ---------- + inpath: The CSV file to transpose. + linesplitterfn: A function to use for splitting each line into columns + linejoinerfn: A function to use to rebuild the lines + outpath: The path where the transposed data is stored + """ + def __read_by_line__(_path): + with open(_path, "r", encoding="utf8") as infile: + for line in infile: + yield line + + transposed_data= (f"{linejoinerfn(items)}\n" for items in zip(*( + linesplitterfn(line) for line in __read_by_line__(inpath)))) + + with open(outpath, "w", encoding="utf8") as outfile: + for line in transposed_data: + outfile.write(line) + + def control_data(zfile: ZipFile) -> dict: """Retrieve the control file from the zip file info.""" files = tuple(filename -- cgit v1.2.3