aboutsummaryrefslogtreecommitdiff
path: root/wqflask
diff options
context:
space:
mode:
authorZachary Sloan2013-04-18 22:13:25 +0000
committerZachary Sloan2013-04-18 22:13:25 +0000
commitf36de42faa6565a04c344071a3a4befa60879509 (patch)
tree9519de36b297755bf20d5fe0a3b998ae5450807a /wqflask
parenta1c44dd7c11013da06dbd782dd0a0ebbde5cc995 (diff)
downloadgenenetwork2-f36de42faa6565a04c344071a3a4befa60879509.tar.gz
LMM code now reads in gzipped pickled plink snp iterator object
Diffstat (limited to 'wqflask')
-rwxr-xr-xwqflask/wqflask/marker_regression/marker_regression.py19
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/lmm.py35
2 files changed, 34 insertions, 20 deletions
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index c3e9a934..6ae1318e 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -98,7 +98,7 @@ class MarkerRegression(object):
file_base = os.path.join(webqtlConfig.PYLMM_PATH, self.dataset.group.name)
plink_input = input.plink(file_base, type='b')
- input_file_name = os.path.join(webqtlConfig.SNP_PATH, self.dataset.group.name + ".snps")
+ input_file_name = os.path.join(webqtlConfig.SNP_PATH, self.dataset.group.name + ".snps.gz")
pheno_vector = pheno_vector.reshape((len(pheno_vector), 1))
covariate_matrix = np.ones((pheno_vector.shape[0],1))
@@ -142,13 +142,22 @@ class MarkerRegression(object):
def create_snp_iterator_file(group):
plink_file_base = os.path.join(webqtlConfig.PYLMM_PATH, group)
plink_input = input.plink(plink_file_base, type='b')
- inputs = list(plink_input)
- snp_file_base = os.path.join(webqtlConfig.SNP_PATH, group + ".snps")
+ data = dict(plink_input = list(plink_input),
+ numSNPs = plink_input.numSNPs)
- with open(snp_file_base, "wb") as fh:
- pickle.dump(inputs, fh)
+ #input_dict = {}
+ #
+ #input_dict['plink_input'] = list(plink_input)
+ #input_dict['numSNPs'] = plink_input.numSNPs
+ #
+
+ snp_file_base = os.path.join(webqtlConfig.SNP_PATH, group + ".snps.gz")
+
+ with gzip.open(snp_file_base, "wb") as fh:
+ pickle.dump(data, fh, pickle.HIGHEST_PROTOCOL)
if __name__ == '__main__':
import cPickle as pickle
+ import gzip
create_snp_iterator_file("HLC")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index ab87e4f0..8c0e0282 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -27,6 +27,7 @@ from scipy import optimize
from scipy import stats
import pdb
+import gzip
import cPickle as pickle
import simplejson as json
@@ -70,28 +71,32 @@ def run_human(pheno_vector,
print("input_file: ", plink_input_file)
- with open(plink_input_file, "rb") as input_file:
- plink_input = pickle.load(input_file)
+ with Bench("Opening and loading pickle file"):
+ with gzip.open(plink_input_file, "rb") as input_file:
+ data = pickle.load(input_file)
+
+ plink_input = data['plink_input']
#plink_input.getSNPIterator()
- #total_snps = plink_input.numSNPs
+ with Bench("Calculating numSNPs"):
+ total_snps = data['numSNPs']
with Bench("snp iterator loop"):
count = 0
- #with Bench("Create list of inputs"):
- # inputs = list(plink_input)
+ with Bench("Create list of inputs"):
+ inputs = list(plink_input)
+
+ with Bench("Divide into chunks"):
+ results = chunks.divide_into_chunks(inputs, 64)
+
+ result_store = []
+ identifier = uuid.uuid4()
+ for part, result in enumerate(results):
+ data_store = temp_data.TempData(identifier, part)
- #with Bench("Divide into chunks"):
- # results = chunks.divide_into_chunks(inputs, 64)
- #
- #result_store = []
- #identifier = uuid.uuid4()
- #for part, result in enumerate(results):
- # data_store = temp_data.TempData(identifier, part)
- #
- # data_store.store(data=pickle.dumps(result))
- # result_store.append(data_store)
+ data_store.store(data=pickle.dumps(result))
+ result_store.append(data_store)
for snp, this_id in plink_input:
with Bench("part before association"):