diff options
author | Zachary Sloan | 2013-04-17 21:24:04 +0000 |
---|---|---|
committer | Zachary Sloan | 2013-04-17 21:24:04 +0000 |
commit | fb0a8ff87d6a8cbe9e1961220c451f04fddfe8f7 (patch) | |
tree | 05bb0e5b564de0c51bb79ad03686f29bf7a06948 | |
parent | a03a4b6c7d4c76c1b02fa3e179af9b00a6817818 (diff) | |
download | genenetwork2-fb0a8ff87d6a8cbe9e1961220c451f04fddfe8f7.tar.gz |
Added chunk_test.py which compared my/ and sam's split into
chunks algorithms' accuracy/speed
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/chunk_test.py | 107 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 9 |
2 files changed, 111 insertions, 5 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/chunk_test.py b/wqflask/wqflask/my_pylmm/pyLMM/chunk_test.py new file mode 100644 index 00000000..58900778 --- /dev/null +++ b/wqflask/wqflask/my_pylmm/pyLMM/chunk_test.py @@ -0,0 +1,107 @@ +from __future__ import absolute_import, print_function, division + +import math +import time +import collections + +""" +558 exact out of 1000 [Total amount off: 1580] +""" + + + +def zach_divide_into_chunks(the_list, number_chunks): + length = len(the_list) + if length == 0: + return [[]] + + if number_chunks > length: + number_chunks = length + + chunksize = int(math.ceil(length / number_chunks)) + #if length % number_chunks > 0: + # chunksize += 1 + # while (chunksize * number_chunks) > length: + # number_chunks -= 1 + + chunks = [] + for counter in range(0, length, chunksize): + chunks.append(the_list[counter:counter+chunksize]) + + return chunks + + +def sam_divide_into_chunks(the_list, number_chunks): + length = len(the_list) + + if length == 0: + return [[]] + else: + if length <= number_chunks: + number_chunks = length + + chunksize = int(math.ceil(length / number_chunks)) + + chunks = [] + for counter in range(0, length, chunksize): + chunks.append(the_list[counter:counter+chunksize]) + + return chunks + +def confirm_chunk(original, result): + all_chunked = [] + for chunk in result: + all_chunked.extend(chunk) + print("length of all chunked:", len(all_chunked)) + assert original == all_chunked, "You didn't chunk right" + + +def chunk_test(divide_func): + import random + random.seed(7) + + number_exact = 0 + total_amount_off = 0 + + for test in range(1, 1001): + print("\n\ntest:", test) + number_chunks = random.randint(1, 20) + number_elements = random.randint(0, 100) + the_list = list(range(1, number_elements)) + result = divide_func(the_list, number_chunks) + + print("Dividing list of length {} into approximately {} chunks - got {} chunks".format( + len(the_list), number_chunks, len(result))) + print("result:", result) + + confirm_chunk(the_list, result) + + amount_off = abs(number_chunks - len(result)) + if amount_off == 0: + number_exact += 1 + else: + total_amount_off += amount_off + + + print("\n{} exact out of {} [Total amount off: {}]".format(number_exact, + test, + total_amount_off)) + + return number_exact, total_amount_off + + +def main(): + info = dict() + funcs = (("sam", sam_divide_into_chunks), ("zach", zach_divide_into_chunks)) + for name, func in funcs: + start = time.time() + number_exact, total_amount_off = chunk_test(func) + took = time.time() - start + info[name] = dict(number_exact=number_exact, + total_amount_off=total_amount_off, + took=took) + + print("info is:", info) + +if __name__ == '__main__': + main()
\ No newline at end of file diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index db617b23..09cbdc2c 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -111,13 +111,12 @@ def divide_into_chunks(the_list, number_chunks): if number_chunks > length: number_chunks = length - remainder = length - (int(length / number_chunks) * number_chunks) - if remainder > 0: - chunksize = int(length / number_chunks) + 1 + chunksize = int(length / number_chunks) + if length % number_chunks > 0: + chunksize += 1 while (chunksize * number_chunks) > length: + print("in while") number_chunks -= 1 - else: - chunksize = int(length / number_chunks) chunks = [] for counter in range(0, length, chunksize): |