From b3853925653cf6145d7fb56b71edfc824a2d051a Mon Sep 17 00:00:00 2001
From: Zachary Sloan
Date: Tue, 12 Feb 2013 16:20:56 -0600
Subject: Edited marker_regression.py and data_set.py to store the p-values and
 their corresponding markers to be used in the table of qtl results and other
 figures

---
 wqflask/base/data_set.py                           | 29 +++++++-
 .../wqflask/marker_regression/marker_regression.py | 80 +++++++++++++---------
 wqflask/wqflask/my_pylmm/data/genofile_parser.py   | 15 +++-
 3 files changed, 89 insertions(+), 35 deletions(-)

diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 8ced1528..182e15e6 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -23,6 +23,8 @@
 from __future__ import absolute_import, print_function, division
 import os
 
+import json
+
 from flask import Flask, g
 
 from htmlgen import HTMLgen2 as HT
@@ -64,6 +66,21 @@ def create_dataset(dataset_name):
     return dataset_class(dataset_name)
 
 
+class Markers(object):
+    """Todo: Build in cacheing so it saves us reading the same file more than once"""
+    def __init__(self, name):
+        json_data_fh = open(os.path.join(webqtlConfig.NEWGENODIR + name + '.json'))
+        self.markers = json.load(json_data)
+    
+    def add_pvalues(p_values):
+        #for count, marker in enumerate(self.markers):
+        #    marker['p_value'] = p_values[count]
+            
+        for marker, p_value in itertools.izip(self.markers, p_values):
+            marker['p_value'] = p_value
+            #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values
+            marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61
+
 class DatasetGroup(object):
     """
     Each group has multiple datasets; each species has multiple groups.
@@ -84,6 +101,7 @@ class DatasetGroup(object):
         self.f1list = None
         self.parlist = None
         self.allsamples = None
+        self.markers = Markers(self.name)
 
 
     #def read_genotype(self):
@@ -91,9 +109,16 @@ class DatasetGroup(object):
     #
     #    if not self.genotype:   # Didn'd succeed, so we try method 2
     #        self.read_genotype_data()
-            
+
+    #def read_genotype_json(self):
+    #    '''Read genotype from json file'''
+    #    
+    #    json_data = open(os.path.join(webqtlConfig.NEWGENODIR + self.name + '.json'))
+    #    markers = json.load(json_data)
+    #    
+
     def read_genotype_file(self):
-        '''read genotype from .geno file instead of database'''
+        '''Read genotype from .geno file instead of database'''
         #if self.group == 'BXD300':
         #    self.group = 'BXD'
         #
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 13ec4280..1d005df4 100755
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -454,10 +454,11 @@ class MarkerRegression(object):
 
     def gen_data(self):
         """Todo: Fill this in here"""
-
-        json_data = open(os.path.join(webqtlConfig.NEWGENODIR + self.dataset.group.name + '.json'))
-        markers = json.load(json_data)
-        genotype_data = [marker['genotypes'] for marker in markers]
+        
+        #json_data = open(os.path.join(webqtlConfig.NEWGENODIR + self.dataset.group.name + '.json'))
+        #markers = json.load(json_data)
+        
+        genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers]
         
         no_val_samples = self.identify_empty_samples()
         trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)
@@ -466,7 +467,6 @@ class MarkerRegression(object):
         
         #for marker_object in genotype_data:
         #    print("marker_object:", pf(marker_object))
-        
 
         #prep_data.PrepData(self.vals, genotype_data)
         
@@ -492,40 +492,60 @@ class MarkerRegression(object):
                                      refit=False)
 
         print("p_values is:", pf(len(p_values)))
+        
+        self.dataset.group.markers.add_pvalues(p_values)
 
         #calculate QTL for each trait
-        self.qtl_results = self.genotype.regression(strains = self.samples,
-                                                trait = self.vals)
-        self.lrs_array = self.genotype.permutation(strains = self.samples,
-                                               trait = self.vals,
-                                               nperm=self.num_perm)
+        #self.qtl_results = self.genotype.regression(strains = self.samples,
+        #                                        trait = self.vals)
+        #self.lrs_array = self.genotype.permutation(strains = self.samples,
+        #                                       trait = self.vals,
+        #                                       nperm=self.num_perm)
+
+        self.lrs_values = [marker['lrs_value'] for marker in self.dataset.group.markers]
 
         self.lrs_thresholds = Bunch(
-                                suggestive = self.lrs_array[int(self.num_perm*0.37-1)],
-                                significant = self.lrs_array[int(self.num_perm*0.95-1)],
-                                highly_significant = self.lrs_array[int(self.num_perm*0.99-1)]
+                                suggestive = self.lrs_values[int(self.num_perm*0.37-1)],
+                                significant = self.lrs_values[int(self.num_perm*0.95-1)],
+                                highly_significant = self.lrs_values[int(self.num_perm*0.99-1)]
                                 )
 
+        #self.lrs_thresholds = Bunch(
+        #                        suggestive = self.lrs_array[int(self.num_perm*0.37-1)],
+        #                        significant = self.lrs_array[int(self.num_perm*0.95-1)],
+        #                        highly_significant = self.lrs_array[int(self.num_perm*0.99-1)]
+        #                        )
+
         if self.display_all_lrs:
-            filtered_results = self.qtl_results
+            self.filtered_results = self.dataset.group.markers.markers
         else:
-            suggestive_results = []
+            self.filtered_results = []
             self.pure_qtl_results = []
-            for result in self.qtl_results:
-                self.pure_qtl_results.append(dict(locus=dict(name=result.locus.name,
-                                                             mb=result.locus.Mb,
-                                                             chromosome=result.locus.chr),
-                                                  lrs=result.lrs,
-                                                  additive=result.additive))
-                if result.lrs > self.lrs_thresholds.suggestive:
-                    suggestive_results.append(result)
-            filtered_results = suggestive_results 
+            for marker in self.dataset.group.markers.markers:
+                self.pure_qtl_results.append(marker)
+                if marker['lrs_value'] > self.lrs_thresholds.suggestive:
+                    self.filtered_results.append(marker)
+
+        #if self.display_all_lrs:
+        #    filtered_results = self.qtl_results
+        #else:
+        #    suggestive_results = []
+        #    self.pure_qtl_results = []
+        #    for result in self.qtl_results:
+        #        self.pure_qtl_results.append(dict(locus=dict(name=result.locus.name,
+        #                                                     mb=result.locus.Mb,
+        #                                                     chromosome=result.locus.chr),
+        #                                          lrs=result.lrs,
+        #                                          additive=result.additive))
+        #        if result.lrs > self.lrs_thresholds.suggestive:
+        #            suggestive_results.append(result)
+        #    filtered_results = suggestive_results 
 
 
         # Todo (2013): Use top_10 variable to generate page message about whether top 10 was used
-        if not filtered_results:
+        if not self.filtered_results:
             # We use the 10 results with the highest LRS values
-            filtered_results = sorted(self.qtl_results)[-10:]
+            self.filtered_results = sorted(self.qtl_results)[-10:]
             self.top_10 = True
         else:
             self.top_10 = False
@@ -567,11 +587,9 @@ class MarkerRegression(object):
         #permutation = HT.TableLite()
         #permutation.append(HT.TR(HT.TD(img)))
 
-        for marker in filtered_results:
-            if marker.lrs > webqtlConfig.MAXLRS:
-                marker.lrs = webqtlConfig.MAXLRS
-        
-        self.filtered_results = filtered_results
+        for marker in self.filtered_results:
+            if marker['lrs_value'] > webqtlConfig.MAXLRS:
+                marker['lrs_value'] = webqtlConfig.MAXLRS
 
         #if fd.genotype.type == 'intercross':
         #    ncol =len(headerList)
diff --git a/wqflask/wqflask/my_pylmm/data/genofile_parser.py b/wqflask/wqflask/my_pylmm/data/genofile_parser.py
index ec8c521c..8c74fe74 100644
--- a/wqflask/wqflask/my_pylmm/data/genofile_parser.py
+++ b/wqflask/wqflask/my_pylmm/data/genofile_parser.py
@@ -28,10 +28,11 @@ class Marker(object):
 
 class ConvertGenoFile(object):
 
-    def __init__(self, input_file, output_file):
+    def __init__(self, input_file, output_file, file_type):
         
         self.input_file = input_file
         self.output_file = output_file
+        self.file_type = file_type
         
         self.mb_exists = False
         self.markers = []
@@ -57,7 +58,10 @@ class ConvertGenoFile(object):
         self.input_fh = open(self.input_file)
         
         with open(self.output_file, "w") as self.output_fh:
-            self.process_csv()
+            if self.file_type == "geno":
+                self.process_csv()
+            elif self.file_type == "snps":
+                self.process_snps_file()
 
 
     #def process_row(self, row):
@@ -66,6 +70,7 @@ class ConvertGenoFile(object):
     #        if char 
     #        counter += 1
 
+
     def process_csv(self):
         for row_count, row in enumerate(self.process_rows()):
             #self.latest_row_pos = row_count
@@ -146,6 +151,12 @@ class ConvertGenoFile(object):
                 print("    Column is:", convertob.latest_col_value)
                 print("    Row is:", convertob.latest_row_value)
                 break
+            
+    def process_snps_file(cls, snps_file, new_directory):
+        output_file = os.path.join(new_directory, "mouse_families.json")
+        print("%s -> %s" % (snps_file, output_file))
+        convertob = ConvertGenoFile(input_file, output_file)
+        
 
 
 if __name__=="__main__":
-- 
cgit v1.2.3