Added code to marker_regression.py that creates the numpy arrays to

pass to Nick's code and changed the prep_data.py code to operate on a list of phenotype values instead of a textfile with the values delimited
author: Zachary Sloan 2013-02-07 17:58:34 -0600
committer: Zachary Sloan 2013-02-07 17:58:34 -0600
commit: 9b0264bf13e994298de95a4e08198336b6c97a38 (patch)
tree: 9d4e4773699555a93e9509fa484d6264ff4c6bfd
parent: d75fc63891f617fbe8b2b030fdce80b1628c6a41 (diff)
download: genenetwork2-9b0264bf13e994298de95a4e08198336b6c97a38.tar.gz
8 files changed, 103 insertions, 49 deletions
diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py
index d5f09b64..d05fa6e0 100755
--- a/wqflask/base/webqtlConfig.py
+++ b/wqflask/base/webqtlConfig.py
@@ -55,8 +55,9 @@ HTMLPATH = GNROOT + 'web/'
 IMGDIR = HTMLPATH +'image/'
 IMAGESPATH = HTMLPATH + 'images/'
 UPLOADPATH = IMAGESPATH + 'upload/'
-TMPDIR = '/tmp/'
+TMPDIR = HTMLPATH + 'tmp/'
 GENODIR = HTMLPATH + 'genotypes/'
+NEWGENODIR = HTMLPATH + 'new_genotypes/'
 GENO_ARCHIVE_DIR = GENODIR + 'archive/'
 TEXTDIR = HTMLPATH + 'ProbeSetFreeze_DataMatrix/'
 CMDLINEDIR = HTMLPATH + 'webqtl/cmdLine/'
diff --git a/wqflask/base/webqtlConfigLocal.py b/wqflask/base/webqtlConfigLocal.py
index 84686234..8e3e0bbe 100755
--- a/wqflask/base/webqtlConfigLocal.py
+++ b/wqflask/base/webqtlConfigLocal.py
@@ -12,7 +12,7 @@ DB_UPDNAME = 'db_webqtl_zas1024'
 DB_UPDUSER = 'webqtl'
 DB_UPDPASSWD = 'webqtl'
 
-GNROOT = '/home/zas1024/gn/'
+GNROOT = '/home/zas1024/gene/'
 ROOT_URL = 'http://alexandria.uthsc.edu:91/'
 PythonPath = '/usr/bin/python'
 PIDDLE_FONT_PATH = '/usr/lib/python2.4/site-packages/piddle/truetypefonts/'
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 7cdc350f..92270eb2 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -15,6 +15,8 @@ import os
 import httplib
 import urllib
 
+import numpy as np
+
 from htmlgen import HTMLgen2 as HT
 from utility import Plot, Bunch
 from wqflask.interval_analyst import GeneUtil
@@ -25,6 +27,8 @@ from utility import webqtlUtil, helper_functions
 from base import webqtlConfig
 from dbFunction import webqtlDatabaseFunction
 from base.GeneralObject import GeneralObject
+from wqflask.my_pylmm.data import prep_data
+from wqflask.my_pylmm.pyLMM import lmm
 
 import reaper
 import cPickle
@@ -63,22 +67,24 @@ class MarkerRegression(object):
         
         self.samples = []   # Want only ones with values
         self.vals = []
-        self.variances = []
+        #self.variances = []
         
         assert start_vars['display_all_lrs'] in ('True', 'False')
         self.display_all_lrs = True if start_vars['display_all_lrs'] == 'True' else False
         
         for sample in self.dataset.group.samplelist:
             value = start_vars['value:' + sample]
-            variance = start_vars['variance:' + sample]
-            if variance.strip().lower() == 'x':
-                variance = 0
-            else:
-                variance = float(variance)
-            if value.strip().lower() != 'x':
-                self.samples.append(str(sample))
-                self.vals.append(float(value))
-                self.variances.append(variance)
+            #variance = start_vars['variance:' + sample]
+            #if variance.strip().lower() == 'x':
+            #    variance = 0
+            #else:
+            #    variance = float(variance)
+            #if value.strip().lower() != 'x':
+            self.samples.append(str(sample))
+            self.vals.append(value)
+            #self.variances.append(variance)
+        
+        
 
         #self.initializeParameters(start_vars)
 
@@ -447,6 +453,22 @@ class MarkerRegression(object):
     def gen_data(self):
         """Todo: Fill this in here"""
 
+        prep_data.PrepData(self.vals, self.dataset.group.name)
+        
+        pheno_vector = np.array([float(val) for val in self.vals if val!="x"])
+        genotypes = np.genfromtxt(os.path.join(webqtlConfig.TMPDIR,
+                                               self.dataset.group.name + '.snps.new'))
+        
+        print("genotypes is:", pf(genotypes))
+        
+        kinship_matrix = lmm.calculateKinship(genotypes)
+        print("kinship_matrix is:", pf(kinship_matrix))
+        print("pheno_vector is:", pf(pheno_vector))
+        
+        lmm_ob = lmm.LMM(pheno_vector, kinship_matrix)
+        lmm_ob.fit()
+        
+
         #calculate QTL for each trait
         self.qtl_results = self.genotype.regression(strains = self.samples,
                                                 trait = self.vals)
diff --git a/wqflask/wqflask/my_pylmm/data/prep_data.py b/wqflask/wqflask/my_pylmm/data/prep_data.py
index b7a133c2..ef42a297 100644
--- a/wqflask/wqflask/my_pylmm/data/prep_data.py
+++ b/wqflask/wqflask/my_pylmm/data/prep_data.py
@@ -1,27 +1,29 @@
 #!/usr/bin/python
 
 from __future__ import absolute_import, print_function, division
+import os
+
 import numpy
 
-    
+from base import webqtlConfig
+
+
 class PrepData(object):
-    def __init__(self, exprs_file, snps_file):
-        self.exprs_file = exprs_file
-        self.snps_file = snps_file
-        self.empty_columns = set()
+    def __init__(self, pheno_vector, group_name):
+        self.pheno_vector = pheno_vector
+        self.group_name = group_name
+        self.no_val_samples = set()
         #self.identify_no_genotype_samples()
         self.identify_empty_samples()
         self.trim_files()
 
     def identify_empty_samples(self):
-        with open(self.exprs_file) as fh:
-            for line in fh:
-                for pos, item in enumerate(line.split()):
-                    if item == "NA":
-                        self.empty_columns.add(pos)
-        #print("self.empty_columns:", self.empty_columns)
-        nums = set(range(0, 176))
-        print("not included:", nums-self.empty_columns)
+        for sample_count, val in enumerate(self.pheno_vector):
+            if val == "x":
+                self.no_val_samples.add(sample_count)
+        print("self.no_val_samples:", self.no_val_samples)
+        #nums = set(range(0, 176))
+        #print("not included:", nums-self.empty_columns)
             
     #def identify_no_genotype_samples(self):
     #    #for this_file in (self.exprs_file, self.snps_file):
@@ -43,22 +45,36 @@ class PrepData(object):
     #        print(no_geno_samples)
 
     def trim_files(self):
-        for this_file in (self.exprs_file, self.snps_file):
-            input_file = open(this_file)
-            this_file_name_output = this_file + ".new"
-            with open(this_file_name_output, "w") as output:
-                for line in input_file:
-                    data_wanted = []
-                    for pos, item in enumerate(line.split()):
-                        if pos in self.empty_columns:
-                            continue
-                        else:
-                            data_wanted.append("%2s" % (item))
-                    #print("data_wanted is", data_wanted)
-                    output.write(" ".join(data_wanted) + "\n")
-            print("Done writing file:", this_file_name_output)
+        input_file = open(os.path.join(webqtlConfig.NEWGENODIR, self.group_name+'.snps'))
+        output_file = os.path.join(webqtlConfig.TMPDIR, self.group_name + '.snps.new')
+        with open(output_file, "w") as output_file:
+            for line in input_file:
+                data_to_write = []
+                for pos, item in enumerate(line.split()):
+                    if pos in self.no_val_samples:
+                        continue
+                    else:
+                        data_to_write.append("%s" % (item))
+                output_file.write(" ".join(data_to_write) + "\n")
+        
+        print("Done writing:", output_file)
+        
+        #for this_file in (self.exprs_file, self.genotype_file):
+        #    input_file = open(this_file)
+        #    this_file_name_output = this_file + ".new"
+        #    with open(this_file_name_output, "w") as output_file:
+        #        for line in input_file:
+        #            data_wanted = []
+        #            for pos, item in enumerate(line.split()):
+        #                if pos in self.empty_columns:
+        #                    continue
+        #                else:
+        #                    data_wanted.append("%2s" % (item))
+        #            #print("data_wanted is", data_wanted)
+        #            output_file.write(" ".join(data_wanted) + "\n")
+        #    print("Done writing file:", this_file_name_output)
 
 if __name__=="__main__":
     exprs_file = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/mdp.exprs.1"""
-    snps_file = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/mdp.snps.1000"""
-    PrepData(exprs_file, snps_file)
\ No newline at end of file
+    genotype_file = """/home/zas1024/gene/wqflask/wqflask/pylmm/data/mdp.snps.1000"""
+    PrepData(pheno_vector, genotype_file)
\ No newline at end of file
diff --git a/wqflask/wqflask/my_pylmm/example.py b/wqflask/wqflask/my_pylmm/example.py
index 0348d67b..8b30debd 100644
--- a/wqflask/wqflask/my_pylmm/example.py
+++ b/wqflask/wqflask/my_pylmm/example.py
@@ -20,7 +20,7 @@ print("exprs is:", pf(Y.shape))
 
 # These three lines will load all SNPs (from npdump or from txt) and 
 # calculate the kinship
-snps = np.genfromtxt('data/mdp.snps.1000.new').T
+snps = np.genfromtxt('/home/zas1024/gene/web/new_genotypers/mdp.snps.1000.new').T
 print("snps is:", pf(snps.shape))
 #snps = snps[~np.isnan(snps).all(axis=1)]
 #print ("snps is now:", pf(snps))
diff --git a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
index 7fe599c4..1ae663d4 100644
--- a/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
+++ b/wqflask/wqflask/my_pylmm/pyLMM/lmm.py
@@ -142,20 +142,35 @@ class LMM:
           is not done consistently.
 
     """
-    def __init__(self,Y,K,Kva=[],Kve=[],X0=None):
-
+    def __init__(self, Y, K, Kva=None, Kve=None, X0=None):
         """
         The constructor takes a phenotype vector or array of size n.
         It takes a kinship matrix of size n x n.  Kva and Kve can be computed as Kva,Kve = linalg.eigh(K) and cached.
         If they are not provided, the constructor will calculate them.
         X0 is an optional covariate matrix of size n x q, where there are q covariates.
         When this parameter is not provided, the constructor will set X0 to an n x 1 matrix of all ones to represent a mean effect.
+       
         """
 
-        if X0 == None: X0 = np.ones(len(Y)).reshape(len(Y),1)
+        if Kva is None:
+            Kva = []
+        if Kve is None:
+            Kve = []
+            
+
+        if X0 == None:
+            X0 = np.ones(len(Y)).reshape(len(Y),1)
 
+        print("Y is:", pf(Y))
+        
+        for key, value in locals().iteritems():
+            print("  %s - %s" % (key, type(value)))
+        
         x = Y != -9
+        print("x is:", pf(x))
         if not x.sum() == len(Y):
+            print("x.sum is:", pf(x.sum()))
+            print("len(Y) is:", pf(len(Y)))
             sys.stderr.write("Removing %d missing values from Y\n" % ((True - x).sum()))
             Y = Y[x]
             K = K[x,:][:,x]
diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
index 603c40f5..33ea6e86 100755
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -130,7 +130,7 @@ class ShowTrait(object):
         js_data = dict(sample_group_types = self.sample_group_types,
                         sample_lists = sample_lists,
                         attribute_names = self.sample_groups[0].attributes)
-        print("js_data:", pf(js_data))
+        #print("js_data:", pf(js_data))
         self.js_data = js_data
 
 
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index 472548f0..81777742 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -136,15 +136,15 @@ def show_trait_page():
     #fd = webqtlFormData.webqtlFormData(request.args)
     #print("stp y1:", pf(vars(fd)))
     template_vars = show_trait.ShowTrait(request.args)
-    print("js_data before dump:", template_vars.js_data)
+    #print("js_data before dump:", template_vars.js_data)
     template_vars.js_data = json.dumps(template_vars.js_data,
                                        default=json_default_handler,
                                        indent="   ")
     # Sorting the keys messes up the ordered dictionary, so don't do that
                                        #sort_keys=True)
 
-    print("js_data after dump:", template_vars.js_data)
-    print("show_trait template_vars:", pf(template_vars.__dict__))
+    #print("js_data after dump:", template_vars.js_data)
+    #print("show_trait template_vars:", pf(template_vars.__dict__))
     return render_template("show_trait.html", **template_vars.__dict__)
 
 @app.route("/marker_regression", methods=('POST',))
author	Zachary Sloan	2013-02-07 17:58:34 -0600
committer	Zachary Sloan	2013-02-07 17:58:34 -0600
commit	9b0264bf13e994298de95a4e08198336b6c97a38 (patch)
tree	9d4e4773699555a93e9509fa484d6264ff4c6bfd
parent	d75fc63891f617fbe8b2b030fdce80b1628c6a41 (diff)
download	genenetwork2-9b0264bf13e994298de95a4e08198336b6c97a38.tar.gz