diff options
author | Zachary Sloan | 2013-04-16 23:13:14 +0000 |
---|---|---|
committer | Zachary Sloan | 2013-04-16 23:13:14 +0000 |
commit | db7d051ad6e6fbfd986c552ed7e075cce58a04ab (patch) | |
tree | ee82b9d18c1669a925c990b7817f814b82b4c17b /wqflask | |
parent | aca3001513959116fdbe93eadaa4c2041a49d093 (diff) | |
download | genenetwork2-db7d051ad6e6fbfd986c552ed7e075cce58a04ab.tar.gz |
Wrote functions that divide a list into a specified number of chunks
(to be used on the list of snps for the LMM code)
Diffstat (limited to 'wqflask')
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 59 |
1 files changed, 58 insertions, 1 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index d9189f30..59a89ce9 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -63,8 +63,19 @@ def run_human(pheno_vector, plink_input.getSNPIterator() total_snps = plink_input.numSNPs + number_chunks = 63 + with Bench("snp iterator loop"): count = 0 + + + with Bench("Create list of inputs"): + + inputs = list(plink_input) + + with Bench("Divide into chunks"): + divide_into_chunks(inputs, 63) + for snp, this_id in plink_input: with Bench("part before association"): if count > 10000: @@ -92,6 +103,48 @@ def run_human(pheno_vector, return p_values, t_stats +def divide_into_chunks(the_list, number_chunks): + + length = len(the_list) + print("length the_list:", length) + + remainder = length % number_chunks + + print("remainder is:", remainder) + + #if remainder: + # number_chunks -= 1 + + chunksize = int(length / number_chunks) + print("chunksize:", chunksize) + + #remainder = length % number_chunks + #assert (chunksize * number_chunks) + remainder == length, "Best check yourself!" + + chunks = [] + for counter in range(0, length-1, chunksize): + print("counter is:", counter) + chunks.append(the_list[counter:counter+chunksize]) + + # Deal with remainder + #if remainder: + # chunks.append(the_list[(counter + chunksize):]) + + # Sanity check + all_chunked = [] + for chunk in chunks: + all_chunked.extend(chunk) + print("length of all chunked:", len(all_chunked)) + assert the_list == all_chunked, "You didn't chunk right" + + return chunks + + +def chunk_test(): + the_list = list(range(1, 53)) + results = divide_into_chunks(the_list, 5) + print("results are:", results) + def human_association(snp, n, keep, @@ -593,4 +646,8 @@ class LMM: pl.plot(self.H,p,color) pl.xlabel("Heritability") pl.ylabel("Probability of data") - pl.title(title)
\ No newline at end of file + pl.title(title) + + +if __name__ == '__main__': + chunk_test()
\ No newline at end of file |