about summary refs log tree commit diff
diff options
context:
space:
mode:
authorPjotr Prins2015-02-28 12:31:46 +0300
committerPjotr Prins2015-02-28 12:31:46 +0300
commitd73ef7975657ed937d4a603ca32d6ca547292ec5 (patch)
tree96ab40bb75af7e3fcde74d83638b439b9db213a6
parent7fee695b79984912dd610a55e84db602780becac (diff)
downloadgenenetwork2-d73ef7975657ed937d4a603ca32d6ca547292ec5.tar.gz
Started adaptations for introducing multi-core version
-rwxr-xr-xwqflask/wqflask/my_pylmm/pyLMM/input.py2
-rwxr-xr-xwqflask/wqflask/my_pylmm/pyLMM/lmm.py46
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py55
3 files changed, 77 insertions, 26 deletions
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/input.py b/wqflask/wqflask/my_pylmm/pyLMM/input.py
index dfe28a4e..fdf02e42 100755
--- a/wqflask/wqflask/my_pylmm/pyLMM/input.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/input.py
@@ -260,4 +260,4 @@ class plink:
             L.append(D[self.indivs[i]])
         P = P[L,:]
      
-        return P
\ No newline at end of file
+        return P
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index d7d60e6e..54a40e1f 100755
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -47,12 +47,13 @@ print("sys.path2:", sys.path)
 
 has_gn2=True
 
+from utility.benchmark import Bench
+from utility import temp_data
+
 try:
-    from utility.benchmark import Bench
-    from utility import temp_data
     from wqflask.my_pylmm.pyLMM import chunks
 except ImportError:
-    print("WARNING: Standalone version\n")
+    print("WARNING: Standalone version missing the Genenetwork2 environment\n")
     has_gn2=False
     pass
 
@@ -253,20 +254,21 @@ def run_other(pheno_vector,
         genotype_matrix,
         restricted_max_likelihood=True,
         refit=False,
-        tempdata=None):
+        tempdata=None,      # <---- can not be None
+        is_testing=False):
+    
     """Takes the phenotype vector and genotype matrix and returns a set of p-values and t-statistics
     
     restricted_max_likelihood -- whether to use restricted max likelihood; True or False
     refit -- whether to refit the variance component for each marker
     temp_data -- TempData object that stores the progress for each major step of the
-    calculations ("calculate_kinship" and "GWAS" take the majority of time)
+    calculations ("calculate_kinship" and "GWAS" take the majority of time) 
     
     """
     
-    
     print("In run_other")
     with Bench("Calculate Kinship"):
-        kinship_matrix = calculate_kinship(genotype_matrix, tempdata)
+        kinship_matrix = calculate_kinship(genotype_matrix, tempdata, is_testing)
     
     print("kinship_matrix: ", pf(kinship_matrix))
     print("kinship_matrix.shape: ", pf(kinship_matrix.shape))
@@ -321,7 +323,7 @@ def matrixMult(A,B):
     return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB)
 
 
-def calculate_kinship(genotype_matrix, temp_data):
+def calculate_kinship(genotype_matrix, temp_data, is_testing=False):
     """
     genotype_matrix is an n x m matrix encoding SNP minor alleles.
     
@@ -331,10 +333,12 @@ def calculate_kinship(genotype_matrix, temp_data):
     """
     n = genotype_matrix.shape[0]
     m = genotype_matrix.shape[1]
-    print("n is:", n)
-    print("m is:", m)
+    print("genotype matrix n is:", n)
+    print("genotype matrix m is:", m)
     keep = []
     for counter in range(m):
+        if is_testing and counter>8:
+            break
         #print("type of genotype_matrix[:,counter]:", pf(genotype_matrix[:,counter]))
         #Checks if any values in column are not numbers
         not_number = np.isnan(genotype_matrix[:,counter])
@@ -478,7 +482,7 @@ class LMM:
           is not done consistently.
  
     """
-    def __init__(self,Y,K,Kva=[],Kve=[],X0=None,verbose=True):
+    def __init__(self,Y,K,Kva=[],Kve=[],X0=None,verbose=True,is_testing=False):
  
        """
        The constructor takes a phenotype vector or array of size n.
@@ -487,7 +491,8 @@ class LMM:
        X0 is an optional covariate matrix of size n x q, where there are q covariates.
        When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect.
        """
- 
+
+       self.is_testing = True
        if X0 == None: X0 = np.ones(len(Y)).reshape(len(Y),1)
        self.verbose = verbose
  
@@ -507,14 +512,14 @@ class LMM:
        self.nonmissing = x
  
        print("this K is:", pf(K))
- 
+       
        if len(Kva) == 0 or len(Kve) == 0:
           if self.verbose: sys.stderr.write("Obtaining eigendecomposition for %dx%d matrix\n" % (K.shape[0],K.shape[1]) )
           begin = time.time()
           Kva,Kve = linalg.eigh(K)
           end = time.time()
           if self.verbose: sys.stderr.write("Total time: %0.3f\n" % (end - begin))
-       
+
        self.K = K
        self.Kva = Kva
        self.Kve = Kve
@@ -745,7 +750,7 @@ def gn2_main():
                   genotype_matrix = np.array(params['genotype_matrix']),
                   restricted_max_likelihood = params['restricted_max_likelihood'],
                   refit = params['refit'],
-                  tempdata = tempdata)
+                  tempdata = tempdata, is_testing=False)
         
     results_key = "pylmm:results:" + params['temp_uuid']
 
@@ -755,21 +760,12 @@ def gn2_main():
     #Pushing json_results into a list where it is the only item because blpop needs a list
     Redis.rpush(results_key, json_results)
     Redis.expire(results_key, 60*60)
-
-# This is the main version used without Genenetwork2's environment
-def cli_main():
-    ps, ts = run_human(pheno_vector = np.array(params['pheno_vector']),
-                       covariate_matrix = np.array(params['covariate_matrix']),
-                       plink_input_file = params['input_file_name'],
-                       kinship_matrix = np.array(params['kinship_matrix']),
-                       refit = params['refit'],
-                       tempdata = tempdata)
         
 if __name__ == '__main__':
     if has_gn2:
         gn2_main()
     else:
-        cli_main()
+        print("Run from pylmmGWAS.py instead")
 
 
 
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py b/wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py
new file mode 100644
index 00000000..abb72081
--- /dev/null
+++ b/wqflask/wqflask/my_pylmm/pyLMM/optmatrix.py
@@ -0,0 +1,55 @@
+import sys
+import time
+import numpy as np
+from numpy.distutils.system_info import get_info;
+from scipy import linalg
+from scipy import optimize
+from scipy import stats
+
+useNumpy = None
+hasBLAS = None
+
+def matrix_initialize(useBLAS):
+    global useNumpy  # module based variable
+    if useBLAS and useNumpy == None:
+        print get_info('blas_opt')
+        try:
+            linalg.fblas
+            sys.stderr.write("INFO: using linalg.fblas\n")
+            useNumpy = False
+            hasBLAS  = True
+        except AttributeError:
+            sys.stderr.write("WARNING: linalg.fblas not found, using numpy.dot instead!\n")
+            useNumpy = True
+    else:
+        sys.stderr.write("INFO: using numpy.dot\n")
+        useNumpy=True
+        
+def matrixMult(A,B):
+   global useNumpy  # module based variable
+
+   if useNumpy:
+       return np.dot(A,B)
+
+   # If the matrices are in Fortran order then the computations will be faster
+   # when using dgemm.  Otherwise, the function will copy the matrix and that takes time.
+   if not A.flags['F_CONTIGUOUS']:
+      AA = A.T
+      transA = True
+   else:
+      AA = A
+      transA = False
+
+   if not B.flags['F_CONTIGUOUS']:
+      BB = B.T
+      transB = True
+   else:
+      BB = B
+      transB = False
+
+   return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB)
+
+def matrixMultT(M):
+    # res = np.dot(W,W.T)
+    # return linalg.fblas.dgemm(alpha=1.,a=M.T,b=M.T,trans_a=True,trans_b=False)
+    return matrixMult(M,M.T)