From 29b50856ab9b9ef87b81228b782dd6bebe28b7c1 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Mon, 23 May 2016 22:23:51 +0000
Subject: Reaper mapping results are now correct and account for samples with
 two names (like BXD65a)

R/qtl and PYLMM still not working with the change and bootstrap results are also still wrong, so need to fix those issues.
---
 wqflask/base/data_set.py                           |  6 +--
 wqflask/base/trait.py                              | 46 ++--------------------
 wqflask/base/webqtlCaseData.py                     |  3 +-
 wqflask/base/webqtlConfig.py                       |  0
 wqflask/wqflask/heatmap/heatmap.py                 |  2 +-
 .../wqflask/marker_regression/marker_regression.py | 28 +++++++++----
 wqflask/wqflask/show_trait/show_trait.py           |  9 ++---
 7 files changed, 34 insertions(+), 60 deletions(-)
 mode change 100755 => 100644 wqflask/base/webqtlConfig.py
 mode change 100755 => 100644 wqflask/wqflask/show_trait/show_trait.py

(limited to 'wqflask')

diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 053b45fc..4953e728 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -711,7 +711,7 @@ class PhenotypeDataSet(DataSet):
     def retrieve_sample_data(self, trait):
         query = """
                     SELECT
-                            Strain.Name, PublishData.value, PublishSE.error, NStrain.count
+                            Strain.Name, PublishData.value, PublishSE.error, NStrain.count, Strain.Name2
                     FROM
                             (PublishData, Strain, PublishXRef, PublishFreeze)
                     left join PublishSE on
@@ -803,7 +803,7 @@ class GenotypeDataSet(DataSet):
     def retrieve_sample_data(self, trait):
         query = """
                     SELECT
-                            Strain.Name, GenoData.value, GenoSE.error, GenoData.Id
+                            Strain.Name, GenoData.value, GenoSE.error, GenoData.Id, Sample.Name2
                     FROM
                             (GenoData, GenoFreeze, Strain, Geno, GenoXRef)
                     left join GenoSE on
@@ -1031,7 +1031,7 @@ class MrnaAssayDataSet(DataSet):
     def retrieve_sample_data(self, trait):
         query = """
                     SELECT
-                            Strain.Name, ProbeSetData.value, ProbeSetSE.error, ProbeSetData.Id
+                            Strain.Name, ProbeSetData.value, ProbeSetSE.error, ProbeSetData.Id, Strain.Name2
                     FROM
                             (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef)
                     left join ProbeSetSE on
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index af22b5a1..05ee3d96 100644
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -180,13 +180,15 @@ class GeneralTrait(object):
         samples = []
         vals = []
         the_vars = []
+        sample_aliases = []
         for sample_name, sample_data in self.data.items():
             if sample_data.value != None:
                 if not include_variance or sample_data.variance != None:
                     samples.append(sample_name)
                     vals.append(sample_data.value)
                     the_vars.append(sample_data.variance)
-        return  samples, vals, the_vars
+                    sample_aliases.append(sample_data.name2)
+        return  samples, vals, the_vars, sample_aliases
 
 
     #
@@ -230,7 +232,7 @@ class GeneralTrait(object):
 
         if results:
             for item in results:
-                name, value, variance, num_cases = item
+                name, value, variance, num_cases, name2 = item
                 if not samplelist or (samplelist and name in samplelist):
                     self.data[name] = webqtlCaseData(*item)   #name, value, variance, num_cases)
 
@@ -308,46 +310,6 @@ class GeneralTrait(object):
                 if isinstance(trait_info[i], basestring):
                     holder = unicode(trait_info[i], "utf8", "ignore")
                 setattr(self, field, holder)
-<<<<<<< HEAD
-
-            description_string = unicode(str(self.description).strip(codecs.BOM_UTF8), 'utf-8')
-            target_string = unicode(str(self.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8')
-
-            if len(description_string) > 1 and description_string != 'None':
-                description_display = description_string
-            else:
-                description_display = self.symbol
-
-            if (len(description_display) > 1 and description_display != 'N/A' and
-                    len(target_string) > 1 and target_string != 'None'):
-                description_display = description_display + '; ' + target_string.strip()
-
-            # Save it for the jinja2 template
-            self.description_display = description_display
-
-            #XZ: trait_location_value is used for sorting
-            trait_location_repr = 'N/A'
-            trait_location_value = 1000000
-
-            if self.chr and self.mb:
-                #Checks if the chromosome number can be cast to an int (i.e. isn't "X" or "Y")
-                #This is so we can convert the location to a number used for sorting
-                trait_location_value = convert_location_to_value(self.chr, self.mb)
-                #try:
-                #    trait_location_value = int(self.chr)*1000 + self.mb
-                #except ValueError:
-                #    if self.chr.upper() == 'X':
-                #        trait_location_value = 20*1000 + self.mb
-                #    else:
-                #        trait_location_value = (ord(str(self.chr).upper()[0])*1000 +
-                #                               self.mb)
-
-                #ZS: Put this in function currently called "convert_location_to_value"
-                self.location_repr = 'Chr%s: %.6f' % (self.chr, float(self.mb))
-                self.location_value = trait_location_value
-                
-=======
->>>>>>> e0c5c1aae3aaaa1d81bcec36835a97e169dcc2e2
                 
             if self.dataset.type == 'Publish':
                 self.confidential = 0
diff --git a/wqflask/base/webqtlCaseData.py b/wqflask/base/webqtlCaseData.py
index 42763aed..99a34866 100755
--- a/wqflask/base/webqtlCaseData.py
+++ b/wqflask/base/webqtlCaseData.py
@@ -29,8 +29,9 @@ print("Mr. Mojo Risin 2")
 class webqtlCaseData(object):
     """one case data in one trait"""
 
-    def __init__(self, name, value=None, variance=None, num_cases=None):
+    def __init__(self, name, value=None, variance=None, num_cases=None, name2=None):
         self.name = name
+        self.name2 = name2                  # Other name (for traits like BXD65a)
         self.value = value                  # Trait Value
         self.variance = variance            # Trait Variance
         self.num_cases = num_cases          # Number of individuals/cases
diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py
old mode 100755
new mode 100644
diff --git a/wqflask/wqflask/heatmap/heatmap.py b/wqflask/wqflask/heatmap/heatmap.py
index 2445b37f..19c330eb 100644
--- a/wqflask/wqflask/heatmap/heatmap.py
+++ b/wqflask/wqflask/heatmap/heatmap.py
@@ -136,7 +136,7 @@ class Heatmap(object):
             this_trait = trait_db[0]
             #this_db = trait_db[1]
             genotype = self.dataset.group.read_genotype_file()
-            samples, values, variances = this_trait.export_informative()
+            samples, values, variances, sample_aliases = this_trait.export_informative()
 
             trimmed_samples = []
             trimmed_values = []
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index 08f422f0..9cd02bd1 100644
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -56,11 +56,21 @@ class MarkerRegression(object):
         
         self.samples = [] # Want only ones with values
         self.vals = []
-
+        
+        #for sample in self.this_trait.data.keys():
         for sample in self.dataset.group.samplelist:
-            value = start_vars['value:' + sample]
-            self.samples.append(str(sample))
-            self.vals.append(value)
+            in_trait_data = False        
+            for item in self.this_trait.data:
+                if self.this_trait.data[item].name2 == sample:
+                    value = start_vars['value:' + self.this_trait.data[item].name]
+                    self.samples.append(self.this_trait.data[item].name)
+                    self.vals.append(value)
+                    in_trait_data = True
+                    break
+            if not in_trait_data:
+                value = start_vars['value:' + sample]
+                self.samples.append(sample)
+                self.vals.append(value)
  
         self.mapping_method = start_vars['method']
         if start_vars['manhattan_plot'] == "True":
@@ -641,15 +651,17 @@ class MarkerRegression(object):
         if self.manhattan_plot != True:
             genotype = genotype.addinterval()
         
-        samples, values, variances = self.this_trait.export_informative()
-
+        samples, values, variances, sample_aliases = self.this_trait.export_informative()
+        
         trimmed_samples = []
         trimmed_values = []
         for i in range(0, len(samples)):
-            if samples[i] in self.dataset.group.samplelist:
-                trimmed_samples.append(samples[i])
+            if self.this_trait.data[samples[i]].name2 in self.dataset.group.samplelist:
+                trimmed_samples.append(sample_aliases[i])
                 trimmed_values.append(values[i])
 
+        #print("THE SAMPLES:", trimmed_samples)
+                
         if self.num_perm < 100:
             self.suggestive = 0
             self.significant = 0
diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
old mode 100755
new mode 100644
index 074c78bf..76651cbf
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -1184,17 +1184,16 @@ class ShowTrait(object):
         all_samples_ordered = self.dataset.group.all_samples_ordered()
 
         primary_sample_names = list(all_samples_ordered)
-
-        print("self.dataset.group", pf(self.dataset.group.__dict__))
-        print("-*- primary_samplelist is:", pf(primary_sample_names))
-
+        
         other_sample_names = []
         for sample in this_trait.data.keys():
+            if (this_trait.data[sample].name2 in primary_sample_names) and (this_trait.data[sample].name not in primary_sample_names):
+                primary_sample_names.append(this_trait.data[sample].name)
+                primary_sample_names.remove(this_trait.data[sample].name2)
             if sample not in all_samples_ordered:
                 all_samples_ordered.append(sample)
                 other_sample_names.append(sample)
 
-        print("species:", self.dataset.group.species)
         if self.dataset.group.species == "human":
             primary_sample_names += other_sample_names
 
-- 
cgit 1.4.1