diff options
Diffstat (limited to 'wqflask')
-rwxr-xr-x | wqflask/wqflask/marker_regression/marker_regression.py | 18 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 51 |
2 files changed, 35 insertions, 34 deletions
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 23cec6d0..40ebc546 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -465,7 +465,7 @@ class MarkerRegression(object): def gen_data(self): """Todo: Fill this in here""" - print("something") + print("Session UUID: ", self.start_vars[session_uuid]) genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers] @@ -473,13 +473,10 @@ class MarkerRegression(object): trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples) pheno_vector = np.array([float(val) for val in self.vals if val!="x"]) - genotypes = np.array(trimmed_genotype_data).T + genotype_matrix = np.array(trimmed_genotype_data).T - #times = collections.OrderedDict() - #times['start'] = time.time() - with Bench("Calculate Kinship"): - kinship_matrix = lmm.calculateKinship(genotypes) + kinship_matrix = lmm.calculate_kinship(genotype_matrix) with Bench("Create LMM object"): lmm_ob = lmm.LMM(pheno_vector, kinship_matrix) @@ -490,19 +487,12 @@ class MarkerRegression(object): with Bench("Doing gwas"): t_stats, p_values = lmm.GWAS(pheno_vector, - genotypes, + genotype_matrix, kinship_matrix, REML=True, refit=False) Bench().report() - - #previous_time = None - #for operation, this_time in times.iteritems(): - # if previous_time: - # print("{} run time: {}".format(operation, this_time-previous_time)) - # #print("time[{}]:{}\t{}".format(key, thistime, thistime-lasttime)) - # previous_time = this_time self.dataset.group.markers.add_pvalues(p_values) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 33f6573e..374452f0 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -53,34 +53,45 @@ def matrixMult(A,B): return linalg.fblas.dgemm(alpha=1.,a=AA,b=BB,trans_a=transA,trans_b=transB) -def calculateKinship(W): + +def calculate_kinship(genotype_matrix): """ - W is an n x m matrix encoding SNP minor alleles. + genotype_matrix is an n x m matrix encoding SNP minor alleles. + + This function takes a matrix oF SNPs, imputes missing values with the maf, + normalizes the resulting vectors and returns the RRM matrix. - This function takes a matrix oF SNPs, imputes missing values with the maf, - normalizes the resulting vectors and returns the RRM matrix. """ - n = W.shape[0] - m = W.shape[1] + n = genotype_matrix.shape[0] + m = genotype_matrix.shape[1] print("n is:", n) print("m is:", m) keep = [] - for i in range(m): - print("type of W[:,i]:", pf(W[:,i])) - foo = np.isnan(W[:,i]) - print("type of foo:", type(foo)) - mn = W[True - foo,i] - print("type of mn is:", type(mn)) - mn = mn.mean() - W[np.isnan(W[:,i]),i] = mn - vr = W[:,i].var() + for counter in range(m): + print("type of genotype_matrix[:,counter]:", pf(genotype_matrix[:,counter])) + #Checks if any values in column are not numbers + not_number = np.isnan(genotype_matrix[:,counter]) + print("type of not_number:", type(not_number)) + + #Gets vector of values for column (no values in vector if not all values in col are numbers) + marker_values = genotype_matrix[True - not_number, counter] + print("type of marker_values is:", type(marker_values)) + + #Gets mean of values in vector + values_mean = marker_values.mean() + + genotype_matrix[not_number,counter] = values_mean + vr = genotype_matrix[:,counter].var() if vr == 0: continue - keep.append(i) - W[:,i] = (W[:,i] - mn) / np.sqrt(vr) - W = W[:,keep] - K = np.dot(W,W.T) * 1.0/float(m) - return K + keep.append(counter) + genotype_matrix[:,counter] = (genotype_matrix[:,counter] - values_mean) / np.sqrt(vr) + + stage = round((counter/m)*45) + print("Percent complete: ", stage) + genotype_matrix = genotype_matrix[:,keep] + kinship_matrix = np.dot(genotype_matrix,genotype_matrix.T) * 1.0/float(m) + return kinship_matrix def GWAS(Y, X, K, Kva=[], Kve=[], X0=None, REML=True, refit=False): """ |