aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPjotr Prins2015-02-28 12:31:46 +0300
committerPjotr Prins2015-02-28 12:31:46 +0300
commitd73ef7975657ed937d4a603ca32d6ca547292ec5 (patch)
tree96ab40bb75af7e3fcde74d83638b439b9db213a6
parent7fee695b79984912dd610a55e84db602780becac (diff)
downloadgenenetwork2-d73ef7975657ed937d4a603ca32d6ca547292ec5.tar.gz
Started adaptations for introducing multi-core version
-rwxr-xr-xwqflask/wqflask/my_pylmm/pyLMM/input.py2
-rwxr-xr-xwqflask/wqflask/my_pylmm/pyLMM/lmm.py46
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py55
3 files changed, 77 insertions, 26 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/input.py b/wqflask/wqflask/my_pylmm/pyLMM/input.py
index dfe28a4e..fdf02e42 100755
--- a/wqflask/wqflask/my_pylmm/pyLMM/input.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/input.py
@@ -260,4 +260,4 @@ class plink:
L.append(D[self.indivs[i]])
P = P[L,:]
- return P \ No newline at end of file
+ return P
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index d7d60e6e..54a40e1f 100755
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -47,12 +47,13 @@ print("sys.path2:", sys.path)
has_gn2=True
+from utility.benchmark import Bench
+from utility import temp_data
+
try:
- from utility.benchmark import Bench
- from utility import temp_data
from wqflask.my_pylmm.pyLMM import chunks
except ImportError:
- print("WARNING: Standalone version\n")
+ print("WARNING: Standalone version missing the Genenetwork2 environment\n")
has_gn2=False
pass
@@ -253,20 +254,21 @@ def run_other(pheno_vector,
genotype_matrix,
restricted_max_likelihood=True,
refit=False,
- tempdata=None):
+ tempdata=None, # <---- can not be None
+ is_testing=False):
+
"""Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics
restricted_max_likelihood -- whether to use restricted max likelihood; True or False
refit -- whether to refit the variance component for each marker
temp_data -- TempData object that stores the progress for each major step of the
- calculations ("calculate_kinship" and "GWAS" take the majority of time)
+ calculations ("calculate_kinship" and "GWAS" take the majority of time)
"""
-
print("In run_other")
with Bench("Calculate Kinship"):
- kinship_matrix = calculate_kinship(genotype_matrix, tempdata)
+ kinship_matrix = calculate_kinship(genotype_matrix, tempdata, is_testing)
print("kinship_matrix: ", pf(kinship_matrix))
print("kinship_matrix.shape: ", pf(kinship_matrix.shape))
@@ -321,7 +323,7 @@ def matrixMult(A,B):
return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB)
-def calculate_kinship(genotype_matrix, temp_data):
+def calculate_kinship(genotype_matrix, temp_data, is_testing=False):
"""
genotype_matrix is an n x m matrix encoding SNP minor alleles.
@@ -331,10 +333,12 @@ def calculate_kinship(genotype_matrix, temp_data):
"""
n = genotype_matrix.shape[0]
m = genotype_matrix.shape[1]
- print("n is:", n)
- print("m is:", m)
+ print("genotype matrix n is:", n)
+ print("genotype matrix m is:", m)
keep = []
for counter in range(m):
+ if is_testing and counter>8:
+ break
#print("type of genotype_matrix[:,counter]:", pf(genotype_matrix[:,counter]))
#Checks if any values in column are not numbers
not_number = np.isnan(genotype_matrix[:,counter])
@@ -478,7 +482,7 @@ class LMM:
is not done consistently.
"""
- def __init__(self,Y,K,Kva=[],Kve=[],X0=None,verbose=True):
+ def __init__(self,Y,K,Kva=[],Kve=[],X0=None,verbose=True,is_testing=False):
"""
The constructor takes a phenotype vector or array of size n.
@@ -487,7 +491,8 @@ class LMM:
X0 is an optional covariate matrix of size n x q, where there are q covariates.
When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect.
"""
-
+
+ self.is_testing = True
if X0 == None: X0 = np.ones(len(Y)).reshape(len(Y),1)
self.verbose = verbose
@@ -507,14 +512,14 @@ class LMM:
self.nonmissing = x
print("this K is:", pf(K))
-
+
if len(Kva) == 0 or len(Kve) == 0:
if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
begin = time.time()
Kva,Kve = linalg.eigh(K)
end = time.time()
if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin))
-
+
self.K = K
self.Kva = Kva
self.Kve = Kve
@@ -745,7 +750,7 @@ def gn2_main():
genotype_matrix = np.array(params['genotype_matrix']),
restricted_max_likelihood = params['restricted_max_likelihood'],
refit = params['refit'],
- tempdata = tempdata)
+ tempdata = tempdata, is_testing=False)
results_key = "pylmm:results:" + params['temp_uuid']
@@ -755,21 +760,12 @@ def gn2_main():
#Pushing json_results into a list where it is the only item because blpop needs a list
Redis.rpush(results_key, json_results)
Redis.expire(results_key, 60*60)
-
-# This is the main version used without Genenetwork2's environment
-def cli_main():
- ps, ts = run_human(pheno_vector = np.array(params['pheno_vector']),
- covariate_matrix = np.array(params['covariate_matrix']),
- plink_input_file = params['input_file_name'],
- kinship_matrix = np.array(params['kinship_matrix']),
- refit = params['refit'],
- tempdata = tempdata)
if __name__ == '__main__':
if has_gn2:
gn2_main()
else:
- cli_main()
+ print("Run from pylmmGWAS.py instead")
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py b/wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py
new file mode 100644
index 00000000..abb72081
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py
@@ -0,0 +1,55 @@
+import sys
+import time
+import numpy as np
+from numpy.distutils.system_info import get_info;
+from scipy import linalg
+from scipy import optimize
+from scipy import stats
+
+useNumpy = None
+hasBLAS = None
+
+def matrix_initialize(useBLAS):
+ global useNumpy # module based variable
+ if useBLAS and useNumpy == None:
+ print get_info('blas_opt')
+ try:
+ linalg.fblas
+ sys.stderr.write("INFO: using linalg.fblas\n")
+ useNumpy = False
+ hasBLAS = True
+ except AttributeError:
+ sys.stderr.write("WARNING: linalg.fblas not found, using numpy.dot instead!\n")
+ useNumpy = True
+ else:
+ sys.stderr.write("INFO: using numpy.dot\n")
+ useNumpy=True
+
+def matrixMult(A,B):
+ global useNumpy # module based variable
+
+ if useNumpy:
+ return np.dot(A,B)
+
+ # If the matrices are in Fortran order then the computations will be faster
+ # when using dgemm. Otherwise, the function will copy the matrix and that takes time.
+ if not A.flags['F_CONTIGUOUS']:
+ AA = A.T
+ transA = True
+ else:
+ AA = A
+ transA = False
+
+ if not B.flags['F_CONTIGUOUS']:
+ BB = B.T
+ transB = True
+ else:
+ BB = B
+ transB = False
+
+ return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB)
+
+def matrixMultT(M):
+ # res = np.dot(W,W.T)
+ # return linalg.fblas.dgemm(alpha=1.,a=M.T,b=M.T,trans_a=True,trans_b=False)
+ return matrixMult(M,M.T)