diff options
author | Zachary Sloan | 2013-04-18 21:04:09 +0000 |
---|---|---|
committer | Zachary Sloan | 2013-04-18 21:04:09 +0000 |
commit | a1c44dd7c11013da06dbd782dd0a0ebbde5cc995 (patch) | |
tree | 58c83ba12167c2a6f02751b3c87f054ff20e2421 | |
parent | ea53a2f20d13130f3555967d57282b3c9562da5a (diff) | |
download | genenetwork2-a1c44dd7c11013da06dbd782dd0a0ebbde5cc995.tar.gz |
input file is now loaded by pickle
-rw-r--r-- | misc/notes.txt | 7 | ||||
-rwxr-xr-x | wqflask/wqflask/marker_regression/marker_regression.py | 6 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 37 |
3 files changed, 31 insertions, 19 deletions
diff --git a/misc/notes.txt b/misc/notes.txt index e40dad8f..10a5729a 100644 --- a/misc/notes.txt +++ b/misc/notes.txt @@ -154,6 +154,13 @@ ll h*: Finds items in the directory starting with h =========================================== +du -hms * | sort -n : Gives size used by different directories +-h: human readable +-m: in megabytes (default) +-s: summarize + +=========================================== + cp -a (archive; copies recursively and doesn't follow symbol links) -i (interactive, prompts before overwrite) -v (verbose) diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 2ede5660..c3e9a934 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -98,6 +98,7 @@ class MarkerRegression(object): file_base = os.path.join(webqtlConfig.PYLMM_PATH, self.dataset.group.name) plink_input = input.plink(file_base, type='b') + input_file_name = os.path.join(webqtlConfig.SNP_PATH, self.dataset.group.name + ".snps") pheno_vector = pheno_vector.reshape((len(pheno_vector), 1)) covariate_matrix = np.ones((pheno_vector.shape[0],1)) @@ -107,7 +108,7 @@ class MarkerRegression(object): p_values, t_stats = lmm.run_human( pheno_vector, covariate_matrix, - plink_input, + input_file_name, kinship_matrix, loading_progress=tempdata ) @@ -145,9 +146,8 @@ def create_snp_iterator_file(group): snp_file_base = os.path.join(webqtlConfig.SNP_PATH, group + ".snps") - with open(snp_file_base, "w") as fh: + with open(snp_file_base, "wb") as fh: pickle.dump(inputs, fh) - if __name__ == '__main__': import cPickle as pickle diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 918f8200..ab87e4f0 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -27,7 +27,7 @@ from scipy import optimize from scipy import stats import pdb -#import cPickle as pickle +import cPickle as pickle import simplejson as json from pprint import pformat as pf @@ -41,7 +41,7 @@ from wqflask.my_pylmm.pyLMM import chunks def run_human(pheno_vector, covariate_matrix, - plink_input, + plink_input_file, kinship_matrix, refit=False, loading_progress=None): @@ -68,25 +68,30 @@ def run_human(pheno_vector, p_values = [] t_stats = [] - plink_input.getSNPIterator() - total_snps = plink_input.numSNPs + print("input_file: ", plink_input_file) + + with open(plink_input_file, "rb") as input_file: + plink_input = pickle.load(input_file) + + #plink_input.getSNPIterator() + #total_snps = plink_input.numSNPs with Bench("snp iterator loop"): count = 0 - with Bench("Create list of inputs"): - inputs = list(plink_input) - - with Bench("Divide into chunks"): - results = chunks.divide_into_chunks(inputs, 64) - - result_store = [] - identifier = uuid.uuid4() - for part, result in enumerate(results): - data_store = temp_data.TempData(identifier, part) + #with Bench("Create list of inputs"): + # inputs = list(plink_input) - data_store.store(data=json.dumps(result.tolist())) - result_store.append(data_store) + #with Bench("Divide into chunks"): + # results = chunks.divide_into_chunks(inputs, 64) + # + #result_store = [] + #identifier = uuid.uuid4() + #for part, result in enumerate(results): + # data_store = temp_data.TempData(identifier, part) + # + # data_store.store(data=pickle.dumps(result)) + # result_store.append(data_store) for snp, this_id in plink_input: with Bench("part before association"): |