aboutsummaryrefslogtreecommitdiff
path: root/gn3/data_helpers.py
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2021-12-06 14:04:59 +0300
committerFrederick Muriuki Muriithi2021-12-06 14:04:59 +0300
commit66406115f41594ba40e3fbbc6f69aace2d11800f (patch)
tree0f3de09b74a3f47918dd4a192665c8a06c508144 /gn3/data_helpers.py
parent77099cac68e8f4792bf54d8e1f7ce6f315bedfa7 (diff)
parent5d2248f1dabbc7dd04f48aafcc9f327817a9c92c (diff)
downloadgenenetwork3-66406115f41594ba40e3fbbc6f69aace2d11800f.tar.gz
Merge branch 'partial-correlations'
Diffstat (limited to 'gn3/data_helpers.py')
-rw-r--r--gn3/data_helpers.py52
1 files changed, 52 insertions, 0 deletions
diff --git a/gn3/data_helpers.py b/gn3/data_helpers.py
new file mode 100644
index 0000000..b72fbc5
--- /dev/null
+++ b/gn3/data_helpers.py
@@ -0,0 +1,52 @@
+"""
+This module will hold generic functions that can operate on a wide-array of
+data structures.
+"""
+
+from math import ceil
+from functools import reduce
+from typing import Any, Tuple, Sequence, Optional
+
+def partition_all(num: int, items: Sequence[Any]) -> Tuple[Tuple[Any, ...], ...]:
+ """
+ Given a sequence `items`, return a new sequence of the same type as `items`
+ with the data partitioned into sections of `n` items per partition.
+
+ This is an approximation of clojure's `partition-all` function.
+ """
+ def __compute_start_stop__(acc, iteration):
+ start = iteration * num
+ return acc + ((start, start + num),)
+
+ iterations = range(ceil(len(items) / num))
+ return tuple([# type: ignore[misc]
+ tuple(items[start:stop]) for start, stop # type: ignore[has-type]
+ in reduce(
+ __compute_start_stop__, iterations, tuple())])
+
+def partition_by(partition_fn, items):
+ """
+ Given a sequence `items`, return a tuple of tuples, each of which contain
+ the values in `items` partitioned such that the first item in each internal
+ tuple, when passed to `partition_function` returns True.
+
+ This is an approximation of Clojure's `partition-by` function.
+ """
+ def __partitioner__(accumulator, item):
+ if partition_fn(item):
+ return accumulator + ((item,),)
+ return accumulator[:-1] + (accumulator[-1] + (item,),)
+
+ return reduce(__partitioner__, items, tuple())
+
+def parse_csv_line(
+ line: str, delimiter: str = ",",
+ quoting: Optional[str] = '"') -> Tuple[str, ...]:
+ """
+ Parses a line from a CSV file into a tuple of strings.
+
+ This is a migration of the `web.webqtl.utility.webqtlUtil.readLineCSV`
+ function in GeneNetwork1.
+ """
+ return tuple(
+ col.strip("{} \t\n".format(quoting)) for col in line.split(delimiter))