diff options
author | Frederick Muriuki Muriithi | 2021-12-06 14:04:59 +0300 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2021-12-06 14:04:59 +0300 |
commit | 66406115f41594ba40e3fbbc6f69aace2d11800f (patch) | |
tree | 0f3de09b74a3f47918dd4a192665c8a06c508144 /gn3/data_helpers.py | |
parent | 77099cac68e8f4792bf54d8e1f7ce6f315bedfa7 (diff) | |
parent | 5d2248f1dabbc7dd04f48aafcc9f327817a9c92c (diff) | |
download | genenetwork3-66406115f41594ba40e3fbbc6f69aace2d11800f.tar.gz |
Merge branch 'partial-correlations'
Diffstat (limited to 'gn3/data_helpers.py')
-rw-r--r-- | gn3/data_helpers.py | 52 |
1 files changed, 52 insertions, 0 deletions
diff --git a/gn3/data_helpers.py b/gn3/data_helpers.py new file mode 100644 index 0000000..b72fbc5 --- /dev/null +++ b/gn3/data_helpers.py @@ -0,0 +1,52 @@ +""" +This module will hold generic functions that can operate on a wide-array of +data structures. +""" + +from math import ceil +from functools import reduce +from typing import Any, Tuple, Sequence, Optional + +def partition_all(num: int, items: Sequence[Any]) -> Tuple[Tuple[Any, ...], ...]: + """ + Given a sequence `items`, return a new sequence of the same type as `items` + with the data partitioned into sections of `n` items per partition. + + This is an approximation of clojure's `partition-all` function. + """ + def __compute_start_stop__(acc, iteration): + start = iteration * num + return acc + ((start, start + num),) + + iterations = range(ceil(len(items) / num)) + return tuple([# type: ignore[misc] + tuple(items[start:stop]) for start, stop # type: ignore[has-type] + in reduce( + __compute_start_stop__, iterations, tuple())]) + +def partition_by(partition_fn, items): + """ + Given a sequence `items`, return a tuple of tuples, each of which contain + the values in `items` partitioned such that the first item in each internal + tuple, when passed to `partition_function` returns True. + + This is an approximation of Clojure's `partition-by` function. + """ + def __partitioner__(accumulator, item): + if partition_fn(item): + return accumulator + ((item,),) + return accumulator[:-1] + (accumulator[-1] + (item,),) + + return reduce(__partitioner__, items, tuple()) + +def parse_csv_line( + line: str, delimiter: str = ",", + quoting: Optional[str] = '"') -> Tuple[str, ...]: + """ + Parses a line from a CSV file into a tuple of strings. + + This is a migration of the `web.webqtl.utility.webqtlUtil.readLineCSV` + function in GeneNetwork1. + """ + return tuple( + col.strip("{} \t\n".format(quoting)) for col in line.split(delimiter)) |