diff options
Diffstat (limited to 'wqflask')
-rwxr-xr-x | wqflask/base/webqtlConfig.py | 3 | ||||
-rwxr-xr-x | wqflask/base/webqtlConfigLocal.py | 2 | ||||
-rwxr-xr-x | wqflask/wqflask/marker_regression/marker_regression.py | 42 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/data/prep_data.py | 74 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/example.py | 2 | ||||
-rw-r--r-- | wqflask/wqflask/my_pylmm/pyLMM/lmm.py | 21 | ||||
-rwxr-xr-x | wqflask/wqflask/show_trait/show_trait.py | 2 | ||||
-rw-r--r-- | wqflask/wqflask/views.py | 6 |
8 files changed, 103 insertions, 49 deletions
diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py index d5f09b64..d05fa6e0 100755 --- a/wqflask/base/webqtlConfig.py +++ b/wqflask/base/webqtlConfig.py @@ -55,8 +55,9 @@ HTMLPATH = GNROOT + 'web/' IMGDIR = HTMLPATH +'image/' IMAGESPATH = HTMLPATH + 'images/' UPLOADPATH = IMAGESPATH + 'upload/' -TMPDIR = '/tmp/' +TMPDIR = HTMLPATH + 'tmp/' GENODIR = HTMLPATH + 'genotypes/' +NEWGENODIR = HTMLPATH + 'new_genotypes/' GENO_ARCHIVE_DIR = GENODIR + 'archive/' TEXTDIR = HTMLPATH + 'ProbeSetFreeze_DataMatrix/' CMDLINEDIR = HTMLPATH + 'webqtl/cmdLine/' diff --git a/wqflask/base/webqtlConfigLocal.py b/wqflask/base/webqtlConfigLocal.py index 84686234..8e3e0bbe 100755 --- a/wqflask/base/webqtlConfigLocal.py +++ b/wqflask/base/webqtlConfigLocal.py @@ -12,7 +12,7 @@ DB_UPDNAME = 'db_webqtl_zas1024' DB_UPDUSER = 'webqtl' DB_UPDPASSWD = 'webqtl' -GNROOT = '/home/zas1024/gn/' +GNROOT = '/home/zas1024/gene/' ROOT_URL = 'http://alexandria.uthsc.edu:91/' PythonPath = '/usr/bin/python' PIDDLE_FONT_PATH = '/usr/lib/python2.4/site-packages/piddle/truetypefonts/' diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index 7cdc350f..92270eb2 100755 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -15,6 +15,8 @@ import os import httplib import urllib +import numpy as np + from htmlgen import HTMLgen2 as HT from utility import Plot, Bunch from wqflask.interval_analyst import GeneUtil @@ -25,6 +27,8 @@ from utility import webqtlUtil, helper_functions from base import webqtlConfig from dbFunction import webqtlDatabaseFunction from base.GeneralObject import GeneralObject +from wqflask.my_pylmm.data import prep_data +from wqflask.my_pylmm.pyLMM import lmm import reaper import cPickle @@ -63,22 +67,24 @@ class MarkerRegression(object): self.samples = [] # Want only ones with values self.vals = [] - self.variances = [] + #self.variances = [] assert start_vars['display_all_lrs'] in ('True', 'False') self.display_all_lrs = True if start_vars['display_all_lrs'] == 'True' else False for sample in self.dataset.group.samplelist: value = start_vars['value:' + sample] - variance = start_vars['variance:' + sample] - if variance.strip().lower() == 'x': - variance = 0 - else: - variance = float(variance) - if value.strip().lower() != 'x': - self.samples.append(str(sample)) - self.vals.append(float(value)) - self.variances.append(variance) + #variance = start_vars['variance:' + sample] + #if variance.strip().lower() == 'x': + # variance = 0 + #else: + # variance = float(variance) + #if value.strip().lower() != 'x': + self.samples.append(str(sample)) + self.vals.append(value) + #self.variances.append(variance) + + #self.initializeParameters(start_vars) @@ -447,6 +453,22 @@ class MarkerRegression(object): def gen_data(self): """Todo: Fill this in here""" + prep_data.PrepData(self.vals, self.dataset.group.name) + + pheno_vector = np.array([float(val) for val in self.vals if val!="x"]) + genotypes = np.genfromtxt(os.path.join(webqtlConfig.TMPDIR, + self.dataset.group.name + '.snps.new')) + + print("genotypes is:", pf(genotypes)) + + kinship_matrix = lmm.calculateKinship(genotypes) + print("kinship_matrix is:", pf(kinship_matrix)) + print("pheno_vector is:", pf(pheno_vector)) + + lmm_ob = lmm.LMM(pheno_vector, kinship_matrix) + lmm_ob.fit() + + #calculate QTL for each trait self.qtl_results = self.genotype.regression(strains = self.samples, trait = self.vals) diff --git a/wqflask/wqflask/my_pylmm/data/prep_data.py b/wqflask/wqflask/my_pylmm/data/prep_data.py index b7a133c2..ef42a297 100644 --- a/wqflask/wqflask/my_pylmm/data/prep_data.py +++ b/wqflask/wqflask/my_pylmm/data/prep_data.py @@ -1,27 +1,29 @@ #!/usr/bin/python from __future__ import absolute_import, print_function, division +import os + import numpy - +from base import webqtlConfig + + class PrepData(object): - def __init__(self, exprs_file, snps_file): - self.exprs_file = exprs_file - self.snps_file = snps_file - self.empty_columns = set() + def __init__(self, pheno_vector, group_name): + self.pheno_vector = pheno_vector + self.group_name = group_name + self.no_val_samples = set() #self.identify_no_genotype_samples() self.identify_empty_samples() self.trim_files() def identify_empty_samples(self): - with open(self.exprs_file) as fh: - for line in fh: - for pos, item in enumerate(line.split()): - if item == "NA": - self.empty_columns.add(pos) - #print("self.empty_columns:", self.empty_columns) - nums = set(range(0, 176)) - print("not included:", nums-self.empty_columns) + for sample_count, val in enumerate(self.pheno_vector): + if val == "x": + self.no_val_samples.add(sample_count) + print("self.no_val_samples:", self.no_val_samples) + #nums = set(range(0, 176)) + #print("not included:", nums-self.empty_columns) #def identify_no_genotype_samples(self): # #for this_file in (self.exprs_file, self.snps_file): @@ -43,22 +45,36 @@ class PrepData(object): # print(no_geno_samples) def trim_files(self): - for this_file in (self.exprs_file, self.snps_file): - input_file = open(this_file) - this_file_name_output = this_file + ".new" - with open(this_file_name_output, "w") as output: - for line in input_file: - data_wanted = [] - for pos, item in enumerate(line.split()): - if pos in self.empty_columns: - continue - else: - data_wanted.append("%2s" % (item)) - #print("data_wanted is", data_wanted) - output.write(" ".join(data_wanted) + "\n") - print("Done writing file:", this_file_name_output) + input_file = open(os.path.join(webqtlConfig.NEWGENODIR, self.group_name+'.snps')) + output_file = os.path.join(webqtlConfig.TMPDIR, self.group_name + '.snps.new') + with open(output_file, "w") as output_file: + for line in input_file: + data_to_write = [] + for pos, item in enumerate(line.split()): + if pos in self.no_val_samples: + continue + else: + data_to_write.append("%s" % (item)) + output_file.write(" ".join(data_to_write) + "\n") + + print("Done writing:", output_file) + + #for this_file in (self.exprs_file, self.genotype_file): + # input_file = open(this_file) + # this_file_name_output = this_file + ".new" + # with open(this_file_name_output, "w") as output_file: + # for line in input_file: + # data_wanted = [] + # for pos, item in enumerate(line.split()): + # if pos in self.empty_columns: + # continue + # else: + # data_wanted.append("%2s" % (item)) + # #print("data_wanted is", data_wanted) + # output_file.write(" ".join(data_wanted) + "\n") + # print("Done writing file:", this_file_name_output) if __name__=="__main__": exprs_file = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/mdp.exprs.1""" - snps_file = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/mdp.snps.1000""" - PrepData(exprs_file, snps_file)
\ No newline at end of file + genotype_file = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/mdp.snps.1000""" + PrepData(pheno_vector, genotype_file)
\ No newline at end of file diff --git a/wqflask/wqflask/my_pylmm/example.py b/wqflask/wqflask/my_pylmm/example.py index 0348d67b..8b30debd 100644 --- a/wqflask/wqflask/my_pylmm/example.py +++ b/wqflask/wqflask/my_pylmm/example.py @@ -20,7 +20,7 @@ print("exprs is:", pf(Y.shape)) # These three lines will load all SNPs (from npdump or from txt) and # calculate the kinship -snps = np.genfromtxt('data/mdp.snps.1000.new').T +snps = np.genfromtxt('/home/zas1024/gene/web/new_genotypers/mdp.snps.1000.new').T print("snps is:", pf(snps.shape)) #snps = snps[~np.isnan(snps).all(axis=1)] #print ("snps is now:", pf(snps)) diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py index 7fe599c4..1ae663d4 100644 --- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py +++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py @@ -142,20 +142,35 @@ class LMM: is not done consistently. """ - def __init__(self,Y,K,Kva=[],Kve=[],X0=None): - + def __init__(self, Y, K, Kva=None, Kve=None, X0=None): """ The constructor takes a phenotype vector or array of size n. It takes a kinship matrix of size n x n. Kva and Kve can be computed as Kva,Kve = linalg.eigh(K) and cached. If they are not provided, the constructor will calculate them. X0 is an optional covariate matrix of size n x q, where there are q covariates. When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect. + """ - if X0 == None: X0 = np.ones(len(Y)).reshape(len(Y),1) + if Kva is None: + Kva = [] + if Kve is None: + Kve = [] + + + if X0 == None: + X0 = np.ones(len(Y)).reshape(len(Y),1) + print("Y is:", pf(Y)) + + for key, value in locals().iteritems(): + print(" %s - %s" % (key, type(value))) + x = Y != -9 + print("x is:", pf(x)) if not x.sum() == len(Y): + print("x.sum is:", pf(x.sum())) + print("len(Y) is:", pf(len(Y))) sys.stderr.write("Removing %d missing values from Y\n" % ((True - x).sum())) Y = Y[x] K = K[x,:][:,x] diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index 603c40f5..33ea6e86 100755 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -130,7 +130,7 @@ class ShowTrait(object): js_data = dict(sample_group_types = self.sample_group_types, sample_lists = sample_lists, attribute_names = self.sample_groups[0].attributes) - print("js_data:", pf(js_data)) + #print("js_data:", pf(js_data)) self.js_data = js_data diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 472548f0..81777742 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -136,15 +136,15 @@ def show_trait_page(): #fd = webqtlFormData.webqtlFormData(request.args) #print("stp y1:", pf(vars(fd))) template_vars = show_trait.ShowTrait(request.args) - print("js_data before dump:", template_vars.js_data) + #print("js_data before dump:", template_vars.js_data) template_vars.js_data = json.dumps(template_vars.js_data, default=json_default_handler, indent=" ") # Sorting the keys messes up the ordered dictionary, so don't do that #sort_keys=True) - print("js_data after dump:", template_vars.js_data) - print("show_trait template_vars:", pf(template_vars.__dict__)) + #print("js_data after dump:", template_vars.js_data) + #print("show_trait template_vars:", pf(template_vars.__dict__)) return render_template("show_trait.html", **template_vars.__dict__) @app.route("/marker_regression", methods=('POST',)) |