aboutsummaryrefslogtreecommitdiff
path: root/gn3/data_helpers.py
blob: 268a0bb6342c1d4d403ea578364ce7a20bd50d2f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
"""
This module will hold generic functions that can operate on a wide-array of
data structures.
"""

from math import ceil
from functools import reduce
from typing import Any, Tuple, Sequence, Optional, Generator

def partition_all(num: int, items: Sequence[Any]) -> Generator:
    """
    Given a sequence `items`, return a new sequence of the same type as `items`
    with the data partitioned into sections of `n` items per partition.

    This is an approximation of clojure's `partition-all` function.
    """
    def __compute_start_stop__(acc, iteration):
        start = iteration * num
        return acc + ((start, start + num),)

    iterations = range(ceil(len(items) / num))
    for start, stop in reduce(# type: ignore[misc]
            __compute_start_stop__, iterations, tuple()):
        yield tuple(items[start:stop]) # type: ignore[has-type]

def partition_by(partition_fn, items):
    """
    Given a sequence `items`, return a tuple of tuples, each of which contain
    the values in `items` partitioned such that the first item in each internal
    tuple, when passed to `partition_function` returns True.

    This is an approximation of Clojure's `partition-by` function.
    """
    def __partitioner__(accumulator, item):
        if partition_fn(item):
            return accumulator + ((item,),)
        return accumulator[:-1] + (accumulator[-1] + (item,),)

    return reduce(__partitioner__, items, tuple())

def parse_csv_line(
        line: str, delimiter: str = ",",
        quoting: Optional[str] = '"') -> Tuple[str, ...]:
    """
    Parses a line from a CSV file into a tuple of strings.

    This is a migration of the `web.webqtl.utility.webqtlUtil.readLineCSV`
    function in GeneNetwork1.
    """
    return tuple(
        col.strip(f"{quoting} \t\n") for col in line.split(delimiter))