aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZachary Sloan2013-02-07 17:58:34 -0600
committerZachary Sloan2013-02-07 17:58:34 -0600
commit9b0264bf13e994298de95a4e08198336b6c97a38 (patch)
tree9d4e4773699555a93e9509fa484d6264ff4c6bfd
parentd75fc63891f617fbe8b2b030fdce80b1628c6a41 (diff)
downloadgenenetwork2-9b0264bf13e994298de95a4e08198336b6c97a38.tar.gz
Added code to marker_regression.py that creates the numpy arrays to
pass to Nick's code and changed the prep_data.py code to operate on a list of phenotype values instead of a textfile with the values delimited
-rwxr-xr-xwqflask/base/webqtlConfig.py3
-rwxr-xr-xwqflask/base/webqtlConfigLocal.py2
-rwxr-xr-xwqflask/wqflask/marker_regression/marker_regression.py42
-rw-r--r--wqflask/wqflask/my_pylmm/data/prep_data.py74
-rw-r--r--wqflask/wqflask/my_pylmm/example.py2
-rw-r--r--wqflask/wqflask/my_pylmm/pyLMM/lmm.py21
-rwxr-xr-xwqflask/wqflask/show_trait/show_trait.py2
-rw-r--r--wqflask/wqflask/views.py6
8 files changed, 103 insertions, 49 deletions
diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py
index d5f09b64..d05fa6e0 100755
--- a/wqflask/base/webqtlConfig.py
+++ b/wqflask/base/webqtlConfig.py
@@ -55,8 +55,9 @@ HTMLPATH = GNROOT + 'web/'
IMGDIR = HTMLPATH +'image/'
IMAGESPATH = HTMLPATH + 'images/'
UPLOADPATH = IMAGESPATH + 'upload/'
-TMPDIR = '/tmp/'
+TMPDIR = HTMLPATH + 'tmp/'
GENODIR = HTMLPATH + 'genotypes/'
+NEWGENODIR = HTMLPATH + 'new_genotypes/'
GENO_ARCHIVE_DIR = GENODIR + 'archive/'
TEXTDIR = HTMLPATH + 'ProbeSetFreeze_DataMatrix/'
CMDLINEDIR = HTMLPATH + 'webqtl/cmdLine/'
diff --git a/wqflask/base/webqtlConfigLocal.py b/wqflask/base/webqtlConfigLocal.py
index 84686234..8e3e0bbe 100755
--- a/wqflask/base/webqtlConfigLocal.py
+++ b/wqflask/base/webqtlConfigLocal.py
@@ -12,7 +12,7 @@ DB_UPDNAME = 'db_webqtl_zas1024'
DB_UPDUSER = 'webqtl'
DB_UPDPASSWD = 'webqtl'
-GNROOT = '/home/zas1024/gn/'
+GNROOT = '/home/zas1024/gene/'
ROOT_URL = 'http://alexandria.uthsc.edu:91/'
PythonPath = '/usr/bin/python'
PIDDLE_FONT_PATH = '/usr/lib/python2.4/site-packages/piddle/truetypefonts/'
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 7cdc350f..92270eb2 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -15,6 +15,8 @@ import os
import httplib
import urllib
+import numpy as np
+
from htmlgen import HTMLgen2 as HT
from utility import Plot, Bunch
from wqflask.interval_analyst import GeneUtil
@@ -25,6 +27,8 @@ from utility import webqtlUtil, helper_functions
from base import webqtlConfig
from dbFunction import webqtlDatabaseFunction
from base.GeneralObject import GeneralObject
+from wqflask.my_pylmm.data import prep_data
+from wqflask.my_pylmm.pyLMM import lmm
import reaper
import cPickle
@@ -63,22 +67,24 @@ class MarkerRegression(object):
self.samples = [] # Want only ones with values
self.vals = []
- self.variances = []
+ #self.variances = []
assert start_vars['display_all_lrs'] in ('True', 'False')
self.display_all_lrs = True if start_vars['display_all_lrs'] == 'True' else False
for sample in self.dataset.group.samplelist:
value = start_vars['value:' + sample]
- variance = start_vars['variance:' + sample]
- if variance.strip().lower() == 'x':
- variance = 0
- else:
- variance = float(variance)
- if value.strip().lower() != 'x':
- self.samples.append(str(sample))
- self.vals.append(float(value))
- self.variances.append(variance)
+ #variance = start_vars['variance:' + sample]
+ #if variance.strip().lower() == 'x':
+ # variance = 0
+ #else:
+ # variance = float(variance)
+ #if value.strip().lower() != 'x':
+ self.samples.append(str(sample))
+ self.vals.append(value)
+ #self.variances.append(variance)
+
+
#self.initializeParameters(start_vars)
@@ -447,6 +453,22 @@ class MarkerRegression(object):
def gen_data(self):
"""Todo: Fill this in here"""
+ prep_data.PrepData(self.vals, self.dataset.group.name)
+
+ pheno_vector = np.array([float(val) for val in self.vals if val!="x"])
+ genotypes = np.genfromtxt(os.path.join(webqtlConfig.TMPDIR,
+ self.dataset.group.name + '.snps.new'))
+
+ print("genotypes is:", pf(genotypes))
+
+ kinship_matrix = lmm.calculateKinship(genotypes)
+ print("kinship_matrix is:", pf(kinship_matrix))
+ print("pheno_vector is:", pf(pheno_vector))
+
+ lmm_ob = lmm.LMM(pheno_vector, kinship_matrix)
+ lmm_ob.fit()
+
+
#calculate QTL for each trait
self.qtl_results = self.genotype.regression(strains = self.samples,
trait = self.vals)
diff --git a/wqflask/wqflask/my_pylmm/data/prep_data.py b/wqflask/wqflask/my_pylmm/data/prep_data.py
index b7a133c2..ef42a297 100644
--- a/wqflask/wqflask/my_pylmm/data/prep_data.py
+++ b/wqflask/wqflask/my_pylmm/data/prep_data.py
@@ -1,27 +1,29 @@
#!/usr/bin/python
from __future__ import absolute_import, print_function, division
+import os
+
import numpy
-
+from base import webqtlConfig
+
+
class PrepData(object):
- def __init__(self, exprs_file, snps_file):
- self.exprs_file = exprs_file
- self.snps_file = snps_file
- self.empty_columns = set()
+ def __init__(self, pheno_vector, group_name):
+ self.pheno_vector = pheno_vector
+ self.group_name = group_name
+ self.no_val_samples = set()
#self.identify_no_genotype_samples()
self.identify_empty_samples()
self.trim_files()
def identify_empty_samples(self):
- with open(self.exprs_file) as fh:
- for line in fh:
- for pos, item in enumerate(line.split()):
- if item == "NA":
- self.empty_columns.add(pos)
- #print("self.empty_columns:", self.empty_columns)
- nums = set(range(0, 176))
- print("not included:", nums-self.empty_columns)
+ for sample_count, val in enumerate(self.pheno_vector):
+ if val == "x":
+ self.no_val_samples.add(sample_count)
+ print("self.no_val_samples:", self.no_val_samples)
+ #nums = set(range(0, 176))
+ #print("not included:", nums-self.empty_columns)
#def identify_no_genotype_samples(self):
# #for this_file in (self.exprs_file, self.snps_file):
@@ -43,22 +45,36 @@ class PrepData(object):
# print(no_geno_samples)
def trim_files(self):
- for this_file in (self.exprs_file, self.snps_file):
- input_file = open(this_file)
- this_file_name_output = this_file + ".new"
- with open(this_file_name_output, "w") as output:
- for line in input_file:
- data_wanted = []
- for pos, item in enumerate(line.split()):
- if pos in self.empty_columns:
- continue
- else:
- data_wanted.append("%2s" % (item))
- #print("data_wanted is", data_wanted)
- output.write(" ".join(data_wanted) + "\n")
- print("Done writing file:", this_file_name_output)
+ input_file = open(os.path.join(webqtlConfig.NEWGENODIR, self.group_name+'.snps'))
+ output_file = os.path.join(webqtlConfig.TMPDIR, self.group_name + '.snps.new')
+ with open(output_file, "w") as output_file:
+ for line in input_file:
+ data_to_write = []
+ for pos, item in enumerate(line.split()):
+ if pos in self.no_val_samples:
+ continue
+ else:
+ data_to_write.append("%s" % (item))
+ output_file.write(" ".join(data_to_write) + "\n")
+
+ print("Done writing:", output_file)
+
+ #for this_file in (self.exprs_file, self.genotype_file):
+ # input_file = open(this_file)
+ # this_file_name_output = this_file + ".new"
+ # with open(this_file_name_output, "w") as output_file:
+ # for line in input_file:
+ # data_wanted = []
+ # for pos, item in enumerate(line.split()):
+ # if pos in self.empty_columns:
+ # continue
+ # else:
+ # data_wanted.append("%2s" % (item))
+ # #print("data_wanted is", data_wanted)
+ # output_file.write(" ".join(data_wanted) + "\n")
+ # print("Done writing file:", this_file_name_output)
if __name__=="__main__":
exprs_file = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/mdp.exprs.1"""
- snps_file = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/mdp.snps.1000"""
- PrepData(exprs_file, snps_file) \ No newline at end of file
+ genotype_file = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/mdp.snps.1000"""
+ PrepData(pheno_vector, genotype_file) \ No newline at end of file
diff --git a/wqflask/wqflask/my_pylmm/example.py b/wqflask/wqflask/my_pylmm/example.py
index 0348d67b..8b30debd 100644
--- a/wqflask/wqflask/my_pylmm/example.py
+++ b/wqflask/wqflask/my_pylmm/example.py
@@ -20,7 +20,7 @@ print("exprs is:", pf(Y.shape))
# These three lines will load all SNPs (from npdump or from txt) and
# calculate the kinship
-snps = np.genfromtxt('data/mdp.snps.1000.new').T
+snps = np.genfromtxt('/home/zas1024/gene/web/new_genotypers/mdp.snps.1000.new').T
print("snps is:", pf(snps.shape))
#snps = snps[~np.isnan(snps).all(axis=1)]
#print ("snps is now:", pf(snps))
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 7fe599c4..1ae663d4 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -142,20 +142,35 @@ class LMM:
is not done consistently.
"""
- def __init__(self,Y,K,Kva=[],Kve=[],X0=None):
-
+ def __init__(self, Y, K, Kva=None, Kve=None, X0=None):
"""
The constructor takes a phenotype vector or array of size n.
It takes a kinship matrix of size n x n. Kva and Kve can be computed as Kva,Kve = linalg.eigh(K) and cached.
If they are not provided, the constructor will calculate them.
X0 is an optional covariate matrix of size n x q, where there are q covariates.
When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect.
+
"""
- if X0 == None: X0 = np.ones(len(Y)).reshape(len(Y),1)
+ if Kva is None:
+ Kva = []
+ if Kve is None:
+ Kve = []
+
+
+ if X0 == None:
+ X0 = np.ones(len(Y)).reshape(len(Y),1)
+ print("Y is:", pf(Y))
+
+ for key, value in locals().iteritems():
+ print(" %s - %s" % (key, type(value)))
+
x = Y != -9
+ print("x is:", pf(x))
if not x.sum() == len(Y):
+ print("x.sum is:", pf(x.sum()))
+ print("len(Y) is:", pf(len(Y)))
sys.stderr.write("Removing %d missing values from Y\n" % ((True - x).sum()))
Y = Y[x]
K = K[x,:][:,x]
diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
index 603c40f5..33ea6e86 100755
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -130,7 +130,7 @@ class ShowTrait(object):
js_data = dict(sample_group_types = self.sample_group_types,
sample_lists = sample_lists,
attribute_names = self.sample_groups[0].attributes)
- print("js_data:", pf(js_data))
+ #print("js_data:", pf(js_data))
self.js_data = js_data
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index 472548f0..81777742 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -136,15 +136,15 @@ def show_trait_page():
#fd = webqtlFormData.webqtlFormData(request.args)
#print("stp y1:", pf(vars(fd)))
template_vars = show_trait.ShowTrait(request.args)
- print("js_data before dump:", template_vars.js_data)
+ #print("js_data before dump:", template_vars.js_data)
template_vars.js_data = json.dumps(template_vars.js_data,
default=json_default_handler,
indent=" ")
# Sorting the keys messes up the ordered dictionary, so don't do that
#sort_keys=True)
- print("js_data after dump:", template_vars.js_data)
- print("show_trait template_vars:", pf(template_vars.__dict__))
+ #print("js_data after dump:", template_vars.js_data)
+ #print("show_trait template_vars:", pf(template_vars.__dict__))
return render_template("show_trait.html", **template_vars.__dict__)
@app.route("/marker_regression", methods=('POST',))