diff options
author | Frederick Muriuki Muriithi | 2024-08-08 17:15:40 -0500 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2024-08-08 17:15:40 -0500 |
commit | 4468d5b2ba238975c67e71f41dc23f96f8811d00 (patch) | |
tree | a4dae03336c57086f95ef943a6b78e47feb4a75e /r_qtl | |
parent | 72f64a5a916221e079f2e06f85c50828dd41bf59 (diff) | |
download | gn-uploader-4468d5b2ba238975c67e71f41dc23f96f8811d00.tar.gz |
Function to transpose CSV files.
Some files come in a transposed form, so we need to transpose them
again in order to use the same processing code for all files.
Diffstat (limited to 'r_qtl')
-rw-r--r-- | r_qtl/r_qtl2.py | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py index a34caac..23c016d 100644 --- a/r_qtl/r_qtl2.py +++ b/r_qtl/r_qtl2.py @@ -50,6 +50,37 @@ def extract(zfile: ZipFile, outputdir: Path) -> tuple[Path, ...]: if not __special_file__(member)) +def transpose_csv( + inpath: Path, + linesplitterfn: Callable, + linejoinerfn: Callable, + outpath: Path): + """Transpose a file: Make its rows into columns and its columns into rows. + + This function will create a new file, `outfile`, with the same content as + the original, `infile`, except transposed i.e. The rows of `infile` are the + columns of `outfile` and the columns of `infile` are the rows of `outfile`. + + Parameters + ---------- + inpath: The CSV file to transpose. + linesplitterfn: A function to use for splitting each line into columns + linejoinerfn: A function to use to rebuild the lines + outpath: The path where the transposed data is stored + """ + def __read_by_line__(_path): + with open(_path, "r", encoding="utf8") as infile: + for line in infile: + yield line + + transposed_data= (f"{linejoinerfn(items)}\n" for items in zip(*( + linesplitterfn(line) for line in __read_by_line__(inpath)))) + + with open(outpath, "w", encoding="utf8") as outfile: + for line in transposed_data: + outfile.write(line) + + def control_data(zfile: ZipFile) -> dict: """Retrieve the control file from the zip file info.""" files = tuple(filename |