diff options
author | Zachary Sloan | 2013-04-18 22:13:25 +0000 |
---|---|---|
committer | Zachary Sloan | 2013-04-18 22:13:25 +0000 |
commit | f36de42faa6565a04c344071a3a4befa60879509 (patch) | |
tree | 9519de36b297755bf20d5fe0a3b998ae5450807a | |
parent | a1c44dd7c11013da06dbd782dd0a0ebbde5cc995 (diff) | |
download | genenetwork2-f36de42faa6565a04c344071a3a4befa60879509.tar.gz |
LMM code now reads in gzipped pickled plink snp iterator object
-rw-r--r-- | misc/notes.txt | 4 | ||||
-rwxr-xr-x | wqflask/wqflask/marker_regression/marker_regression.py | 19 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 35 |
3 files changed, 38 insertions, 20 deletions
diff --git a/misc/notes.txt b/misc/notes.txt index 10a5729a..a48ee5bf 100644 --- a/misc/notes.txt +++ b/misc/notes.txt @@ -161,6 +161,10 @@ du -hms * | sort -n : Gives size used by different directories =========================================== +rm -rfv + +=========================================== + cp -a (archive; copies recursively and doesn't follow symbol links) -i (interactive, prompts before overwrite) -v (verbose) diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index c3e9a934..6ae1318e 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -98,7 +98,7 @@ class MarkerRegression(object): file_base = os.path.join(webqtlConfig.PYLMM_PATH, self.dataset.group.name) plink_input = input.plink(file_base, type='b') - input_file_name = os.path.join(webqtlConfig.SNP_PATH, self.dataset.group.name + ".snps") + input_file_name = os.path.join(webqtlConfig.SNP_PATH, self.dataset.group.name + ".snps.gz") pheno_vector = pheno_vector.reshape((len(pheno_vector), 1)) covariate_matrix = np.ones((pheno_vector.shape[0],1)) @@ -142,13 +142,22 @@ class MarkerRegression(object): def create_snp_iterator_file(group): plink_file_base = os.path.join(webqtlConfig.PYLMM_PATH, group) plink_input = input.plink(plink_file_base, type='b') - inputs = list(plink_input) - snp_file_base = os.path.join(webqtlConfig.SNP_PATH, group + ".snps") + data = dict(plink_input = list(plink_input), + numSNPs = plink_input.numSNPs) - with open(snp_file_base, "wb") as fh: - pickle.dump(inputs, fh) + #input_dict = {} + # + #input_dict['plink_input'] = list(plink_input) + #input_dict['numSNPs'] = plink_input.numSNPs + # + + snp_file_base = os.path.join(webqtlConfig.SNP_PATH, group + ".snps.gz") + + with gzip.open(snp_file_base, "wb") as fh: + pickle.dump(data, fh, pickle.HIGHEST_PROTOCOL) if __name__ == '__main__': import cPickle as pickle + import gzip create_snp_iterator_file("HLC") diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index ab87e4f0..8c0e0282 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -27,6 +27,7 @@ from scipy import optimize from scipy import stats import pdb +import gzip import cPickle as pickle import simplejson as json @@ -70,28 +71,32 @@ def run_human(pheno_vector, print("input_file: ", plink_input_file) - with open(plink_input_file, "rb") as input_file: - plink_input = pickle.load(input_file) + with Bench("Opening and loading pickle file"): + with gzip.open(plink_input_file, "rb") as input_file: + data = pickle.load(input_file) + + plink_input = data['plink_input'] #plink_input.getSNPIterator() - #total_snps = plink_input.numSNPs + with Bench("Calculating numSNPs"): + total_snps = data['numSNPs'] with Bench("snp iterator loop"): count = 0 - #with Bench("Create list of inputs"): - # inputs = list(plink_input) + with Bench("Create list of inputs"): + inputs = list(plink_input) + + with Bench("Divide into chunks"): + results = chunks.divide_into_chunks(inputs, 64) + + result_store = [] + identifier = uuid.uuid4() + for part, result in enumerate(results): + data_store = temp_data.TempData(identifier, part) - #with Bench("Divide into chunks"): - # results = chunks.divide_into_chunks(inputs, 64) - # - #result_store = [] - #identifier = uuid.uuid4() - #for part, result in enumerate(results): - # data_store = temp_data.TempData(identifier, part) - # - # data_store.store(data=pickle.dumps(result)) - # result_store.append(data_store) + data_store.store(data=pickle.dumps(result)) + result_store.append(data_store) for snp, this_id in plink_input: with Bench("part before association"): |