aboutsummaryrefslogtreecommitdiff
path: root/wqflask
diff options
context:
space:
mode:
authorZachary Sloan2013-03-06 21:47:55 +0000
committerZachary Sloan2013-03-06 21:47:55 +0000
commit4bce386b619c55e66d6cdd81b11b54baadc5ae87 (patch)
treeb0e8c4213a69ed3e719d91b60e3362e6819e07cc /wqflask
parent6ca1d86ff86b887fd83a8b4c3b9ad6b5dfd26182 (diff)
downloadgenenetwork2-4bce386b619c55e66d6cdd81b11b54baadc5ae87.tar.gz
Stored percent of calculate_kinship function completion in
variable
Diffstat (limited to 'wqflask')
-rwxr-xr-xwqflask/wqflask/marker_regression/marker_regression.py18
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/lmm.py51
2 files changed, 35 insertions, 34 deletions
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 23cec6d0..40ebc546 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -465,7 +465,7 @@ class MarkerRegression(object):
def gen_data(self):
"""Todo: Fill this in here"""
- print("something")
+ print("Session UUID: ", self.start_vars[session_uuid])
genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers]
@@ -473,13 +473,10 @@ class MarkerRegression(object):
trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
pheno_vector = np.array([float(val) for val in self.vals if val!="x"])
- genotypes = np.array(trimmed_genotype_data).T
+ genotype_matrix = np.array(trimmed_genotype_data).T
- #times = collections.OrderedDict()
- #times['start'] = time.time()
-
with Bench("Calculate Kinship"):
- kinship_matrix = lmm.calculateKinship(genotypes)
+ kinship_matrix = lmm.calculate_kinship(genotype_matrix)
with Bench("Create LMM object"):
lmm_ob = lmm.LMM(pheno_vector, kinship_matrix)
@@ -490,19 +487,12 @@ class MarkerRegression(object):
with Bench("Doing gwas"):
t_stats, p_values = lmm.GWAS(pheno_vector,
- genotypes,
+ genotype_matrix,
kinship_matrix,
REML=True,
refit=False)
Bench().report()
-
- #previous_time = None
- #for operation, this_time in times.iteritems():
- # if previous_time:
- # print("{} run time: {}".format(operation, this_time-previous_time))
- # #print("time[{}]:{}\t{}".format(key, thistime, thistime-lasttime))
- # previous_time = this_time
self.dataset.group.markers.add_pvalues(p_values)
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 33f6573e..374452f0 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -53,34 +53,45 @@ def matrixMult(A,B):
return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB)
-def calculateKinship(W):
+
+def calculate_kinship(genotype_matrix):
"""
- W is an n x m matrix encoding SNP minor alleles.
+ genotype_matrix is an n x m matrix encoding SNP minor alleles.
+
+ This function takes a matrix oF SNPs, imputes missing values with the maf,
+ normalizes the resulting vectors and returns the RRM matrix.
- This function takes a matrix oF SNPs, imputes missing values with the maf,
- normalizes the resulting vectors and returns the RRM matrix.
"""
- n = W.shape[0]
- m = W.shape[1]
+ n = genotype_matrix.shape[0]
+ m = genotype_matrix.shape[1]
print("n is:", n)
print("m is:", m)
keep = []
- for i in range(m):
- print("type of W[:,i]:", pf(W[:,i]))
- foo = np.isnan(W[:,i])
- print("type of foo:", type(foo))
- mn = W[True - foo,i]
- print("type of mn is:", type(mn))
- mn = mn.mean()
- W[np.isnan(W[:,i]),i] = mn
- vr = W[:,i].var()
+ for counter in range(m):
+ print("type of genotype_matrix[:,counter]:", pf(genotype_matrix[:,counter]))
+ #Checks if any values in column are not numbers
+ not_number = np.isnan(genotype_matrix[:,counter])
+ print("type of not_number:", type(not_number))
+
+ #Gets vector of values for column (no values in vector if not all values in col are numbers)
+ marker_values = genotype_matrix[True - not_number, counter]
+ print("type of marker_values is:", type(marker_values))
+
+ #Gets mean of values in vector
+ values_mean = marker_values.mean()
+
+ genotype_matrix[not_number,counter] = values_mean
+ vr = genotype_matrix[:,counter].var()
if vr == 0:
continue
- keep.append(i)
- W[:,i] = (W[:,i] - mn) / np.sqrt(vr)
- W = W[:,keep]
- K = np.dot(W,W.T) * 1.0/float(m)
- return K
+ keep.append(counter)
+ genotype_matrix[:,counter] = (genotype_matrix[:,counter] - values_mean) / np.sqrt(vr)
+
+ stage = round((counter/m)*45)
+ print("Percent complete: ", stage)
+ genotype_matrix = genotype_matrix[:,keep]
+ kinship_matrix = np.dot(genotype_matrix,genotype_matrix.T) * 1.0/float(m)
+ return kinship_matrix
def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False):
"""