about summary refs log tree commit diff
path: root/gn2/wqflask/show_trait/SampleList.py
diff options
context:
space:
mode:
Diffstat (limited to 'gn2/wqflask/show_trait/SampleList.py')
-rw-r--r--gn2/wqflask/show_trait/SampleList.py223
1 files changed, 223 insertions, 0 deletions
diff --git a/gn2/wqflask/show_trait/SampleList.py b/gn2/wqflask/show_trait/SampleList.py
new file mode 100644
index 00000000..64fc8fe6
--- /dev/null
+++ b/gn2/wqflask/show_trait/SampleList.py
@@ -0,0 +1,223 @@
+import re
+import itertools
+
+from gn2.wqflask.database import database_connection
+from gn2.base import webqtlCaseData, webqtlConfig
+from pprint import pformat as pf
+
+from gn2.utility import Plot
+from gn2.utility import Bunch
+from gn2.utility.tools import get_setting
+
+class SampleList:
+    def __init__(self,
+                 dataset,
+                 sample_names,
+                 this_trait,
+                 sample_group_type="primary",
+                 header="Samples"):
+
+        self.dataset = dataset
+        self.this_trait = this_trait
+        self.sample_group_type = sample_group_type    # primary or other
+        self.header = header
+
+        self.sample_list = []  # The actual list
+        self.sample_attribute_values = {}
+
+        self.get_attributes()
+
+        if self.this_trait and self.dataset:
+            self.get_extra_attribute_values()
+
+        for counter, sample_name in enumerate(sample_names, 1):
+            sample_name = sample_name.replace("_2nd_", "")
+
+            # self.this_trait will be a list if it is a Temp trait
+            if isinstance(self.this_trait, list):
+                sample = webqtlCaseData.webqtlCaseData(name=sample_name)
+                if counter <= len(self.this_trait):
+                    if isinstance(self.this_trait[counter - 1], (bytes, bytearray)):
+                        if (self.this_trait[counter - 1].decode("utf-8").lower() != 'x'):
+                            sample = webqtlCaseData.webqtlCaseData(
+                                name=sample_name,
+                                value=float(self.this_trait[counter - 1]))
+                    else:
+                        if (self.this_trait[counter - 1].lower() != 'x'):
+                            sample = webqtlCaseData.webqtlCaseData(
+                                name=sample_name,
+                                value=float(self.this_trait[counter - 1]))
+            else:
+                # If there's no value for the sample/strain,
+                # create the sample object (so samples with no value
+                # are still displayed in the table)
+                try:
+                    sample = self.this_trait.data[sample_name]
+                except KeyError:
+                    sample = webqtlCaseData.webqtlCaseData(name=sample_name)
+
+            sample.extra_info = {}
+            if (self.dataset.group.name == 'AXBXA'
+                    and sample_name in ('AXB18/19/20', 'AXB13/14', 'BXA8/17')):
+                sample.extra_info['url'] = "/mouseCross.html#AXB/BXA"
+                sample.extra_info['css_class'] = "fs12"
+
+            sample.this_id = str(counter)
+
+            # For extra attribute columns; currently only used by
+            # several datasets
+            if self.sample_attribute_values:
+                sample.extra_attributes = self.sample_attribute_values.get(
+                    sample_name, {})
+
+                # Add a url so RRID case attributes can be displayed as links
+                if '36' in sample.extra_attributes:
+                    rrid_string = str(sample.extra_attributes['36'])
+                    if self.dataset.group.species == "mouse":
+                        if len(rrid_string.split(":")) > 1:
+                            the_rrid = rrid_string.split(":")[1]
+                            sample.extra_attributes['36'] = [
+                                rrid_string]
+                            sample.extra_attributes['36'].append(
+                                webqtlConfig.RRID_MOUSE_URL % the_rrid)
+                    elif self.dataset.group.species == "rat":
+                        if len(rrid_string):
+                            the_rrid = rrid_string.split("_")[1]
+                            sample.extra_attributes['36'] = [
+                                rrid_string]
+                            sample.extra_attributes['36'].append(
+                                webqtlConfig.RRID_RAT_URL % the_rrid)
+
+            self.sample_list.append(sample)
+
+        self.se_exists = any(sample.variance for sample in self.sample_list)
+        self.num_cases_exists = False
+        if (any(sample.num_cases for sample in self.sample_list) and
+            any((sample.num_cases and sample.num_cases != "1") for sample in self.sample_list)):
+            self.num_cases_exists = True
+
+        first_attr_col = self.get_first_attr_col()
+        for sample in self.sample_list:
+            sample.first_attr_col = first_attr_col
+
+        self.do_outliers()
+
+    def __repr__(self):
+        return "<SampleList> --> %s" % (pf(self.__dict__))
+
+    def do_outliers(self):
+        values = [sample.value for sample in self.sample_list
+                  if sample.value is not None]
+        upper_bound, lower_bound = Plot.find_outliers(values)
+
+        for sample in self.sample_list:
+            if sample.value:
+                if upper_bound and sample.value > upper_bound:
+                    sample.outlier = True
+                elif lower_bound and sample.value < lower_bound:
+                    sample.outlier = True
+                else:
+                    sample.outlier = False
+
+    def get_attributes(self):
+        """Finds which extra attributes apply to this dataset"""
+
+        # Get attribute names and distinct values for each attribute
+        with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor:
+            cursor.execute(
+                "SELECT DISTINCT CaseAttribute.CaseAttributeId, "
+                "CaseAttribute.Name, CaseAttribute.Description, "
+                "CaseAttributeXRefNew.Value FROM "
+                "CaseAttribute, CaseAttributeXRefNew WHERE "
+                "CaseAttributeXRefNew.CaseAttributeId = CaseAttribute.CaseAttributeId "
+                "AND CaseAttributeXRefNew.InbredSetId = %s "
+                "ORDER BY CaseAttribute.CaseAttributeId", (str(self.dataset.group.id),)
+            )
+
+            self.attributes = {}
+            for attr, values in itertools.groupby(
+                    cursor.fetchall(), lambda row: (row[0], row[1], row[2])
+            ):
+                key, name, description = attr
+                self.attributes[key] = Bunch()
+                self.attributes[key].id = key
+                self.attributes[key].name = name
+                self.attributes[key].description = description
+                self.attributes[key].distinct_values = [
+                    item[3] for item in values]
+                self.attributes[key].distinct_values = natural_sort(
+                    self.attributes[key].distinct_values)
+                all_numbers = True
+                for value in self.attributes[key].distinct_values:
+                    try:
+                        val_as_float = float(value)
+                    except:
+                        all_numbers = False
+                        break
+
+                if all_numbers:
+                    self.attributes[key].alignment = "right"
+                else:
+                    self.attributes[key].alignment = "left"
+
+    def get_extra_attribute_values(self):
+        if self.attributes:
+            with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor:
+                cursor.execute(
+                    "SELECT Strain.Name AS SampleName, "
+                    "CaseAttributeId AS Id, "
+                    "CaseAttributeXRefNew.Value FROM Strain, "
+                    "StrainXRef, InbredSet, CaseAttributeXRefNew "
+                    "WHERE StrainXRef.StrainId = Strain.Id "
+                    "AND InbredSet.Id = StrainXRef.InbredSetId "
+                    "AND CaseAttributeXRefNew.StrainId = Strain.Id "
+                    "AND InbredSet.Id = CaseAttributeXRefNew.InbredSetId "
+                    "AND CaseAttributeXRefNew.InbredSetId = %s "
+                    "ORDER BY SampleName", (self.dataset.group.id,)
+                )
+
+                for sample_name, items in itertools.groupby(
+                        cursor.fetchall(), lambda row: row[0]
+                ):
+                    attribute_values = {}
+                    # Make a list of attr IDs without values (that have values for other samples)
+                    valueless_attr_ids = [self.attributes[key].id for key in self.attributes.keys()]
+                    for item in items:
+                        sample_name, _id, value = item
+                        valueless_attr_ids.remove(_id)
+                        attribute_value = value
+
+                        # If it's an int, turn it into one for sorting
+                        # (for example, 101 would be lower than 80 if
+                        # they're strings instead of ints)
+                        try:
+                            attribute_value = int(attribute_value)
+                        except ValueError:
+                            pass
+
+                        attribute_values[str(_id)] = attribute_value
+                    for attr_id in valueless_attr_ids:
+                        attribute_values[str(attr_id)] = ""
+
+                    self.sample_attribute_values[sample_name] = attribute_values
+
+    def get_first_attr_col(self):
+        first_attr_col = 4
+        if self.se_exists:
+            first_attr_col += 2
+        if self.num_cases_exists:
+            first_attr_col += 1
+
+        return first_attr_col
+
+
+def natural_sort(a_list, key=lambda s: s):
+    """
+    Sort the list into natural alphanumeric order.
+    """
+    def get_alphanum_key_func(key):
+        def convert(text): return int(text) if text.isdigit() else text
+        return lambda s: [convert(c) for c in re.split('([0-9]+)', key(s))]
+    sort_key = get_alphanum_key_func(key)
+    sorted_list = sorted(a_list, key=sort_key)
+    return sorted_list