diff options
author | zsloan | 2021-11-11 11:23:39 -0600 |
---|---|---|
committer | GitHub | 2021-11-11 11:23:39 -0600 |
commit | 8c77af63efae6f06d7c7c3269fc0e41811a8037a (patch) | |
tree | 9ffa4b84fd36f09e772db3e218bc980999324c41 /gn3/data_helpers.py | |
parent | 607c6e627c23c1bce3b199b145855182ab51b211 (diff) | |
parent | 249b85102063debfeeb1b0565956059b8a3af1cf (diff) | |
download | genenetwork3-8c77af63efae6f06d7c7c3269fc0e41811a8037a.tar.gz |
Merge branch 'main' into feature/add_rqtl_pairscan
Diffstat (limited to 'gn3/data_helpers.py')
-rw-r--r-- | gn3/data_helpers.py | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/gn3/data_helpers.py b/gn3/data_helpers.py new file mode 100644 index 0000000..d3f942b --- /dev/null +++ b/gn3/data_helpers.py @@ -0,0 +1,37 @@ +""" +This module will hold generic functions that can operate on a wide-array of +data structures. +""" + +from math import ceil +from functools import reduce +from typing import Any, Tuple, Sequence, Optional + +def partition_all(num: int, items: Sequence[Any]) -> Tuple[Tuple[Any, ...], ...]: + """ + Given a sequence `items`, return a new sequence of the same type as `items` + with the data partitioned into sections of `n` items per partition. + + This is an approximation of clojure's `partition-all` function. + """ + def __compute_start_stop__(acc, iteration): + start = iteration * num + return acc + ((start, start + num),) + + iterations = range(ceil(len(items) / num)) + return tuple([# type: ignore[misc] + tuple(items[start:stop]) for start, stop # type: ignore[has-type] + in reduce( + __compute_start_stop__, iterations, tuple())]) + +def parse_csv_line( + line: str, delimiter: str = ",", + quoting: Optional[str] = '"') -> Tuple[str, ...]: + """ + Parses a line from a CSV file into a tuple of strings. + + This is a migration of the `web.webqtl.utility.webqtlUtil.readLineCSV` + function in GeneNetwork1. + """ + return tuple( + col.strip("{} \t\n".format(quoting)) for col in line.split(delimiter)) |