diff options
Diffstat (limited to 'gn2/wqflask/show_trait/SampleList.py')
-rw-r--r-- | gn2/wqflask/show_trait/SampleList.py | 223 |
1 files changed, 223 insertions, 0 deletions
diff --git a/gn2/wqflask/show_trait/SampleList.py b/gn2/wqflask/show_trait/SampleList.py new file mode 100644 index 00000000..64fc8fe6 --- /dev/null +++ b/gn2/wqflask/show_trait/SampleList.py @@ -0,0 +1,223 @@ +import re +import itertools + +from gn2.wqflask.database import database_connection +from gn2.base import webqtlCaseData, webqtlConfig +from pprint import pformat as pf + +from gn2.utility import Plot +from gn2.utility import Bunch +from gn2.utility.tools import get_setting + +class SampleList: + def __init__(self, + dataset, + sample_names, + this_trait, + sample_group_type="primary", + header="Samples"): + + self.dataset = dataset + self.this_trait = this_trait + self.sample_group_type = sample_group_type # primary or other + self.header = header + + self.sample_list = [] # The actual list + self.sample_attribute_values = {} + + self.get_attributes() + + if self.this_trait and self.dataset: + self.get_extra_attribute_values() + + for counter, sample_name in enumerate(sample_names, 1): + sample_name = sample_name.replace("_2nd_", "") + + # self.this_trait will be a list if it is a Temp trait + if isinstance(self.this_trait, list): + sample = webqtlCaseData.webqtlCaseData(name=sample_name) + if counter <= len(self.this_trait): + if isinstance(self.this_trait[counter - 1], (bytes, bytearray)): + if (self.this_trait[counter - 1].decode("utf-8").lower() != 'x'): + sample = webqtlCaseData.webqtlCaseData( + name=sample_name, + value=float(self.this_trait[counter - 1])) + else: + if (self.this_trait[counter - 1].lower() != 'x'): + sample = webqtlCaseData.webqtlCaseData( + name=sample_name, + value=float(self.this_trait[counter - 1])) + else: + # If there's no value for the sample/strain, + # create the sample object (so samples with no value + # are still displayed in the table) + try: + sample = self.this_trait.data[sample_name] + except KeyError: + sample = webqtlCaseData.webqtlCaseData(name=sample_name) + + sample.extra_info = {} + if (self.dataset.group.name == 'AXBXA' + and sample_name in ('AXB18/19/20', 'AXB13/14', 'BXA8/17')): + sample.extra_info['url'] = "/mouseCross.html#AXB/BXA" + sample.extra_info['css_class'] = "fs12" + + sample.this_id = str(counter) + + # For extra attribute columns; currently only used by + # several datasets + if self.sample_attribute_values: + sample.extra_attributes = self.sample_attribute_values.get( + sample_name, {}) + + # Add a url so RRID case attributes can be displayed as links + if '36' in sample.extra_attributes: + rrid_string = str(sample.extra_attributes['36']) + if self.dataset.group.species == "mouse": + if len(rrid_string.split(":")) > 1: + the_rrid = rrid_string.split(":")[1] + sample.extra_attributes['36'] = [ + rrid_string] + sample.extra_attributes['36'].append( + webqtlConfig.RRID_MOUSE_URL % the_rrid) + elif self.dataset.group.species == "rat": + if len(rrid_string): + the_rrid = rrid_string.split("_")[1] + sample.extra_attributes['36'] = [ + rrid_string] + sample.extra_attributes['36'].append( + webqtlConfig.RRID_RAT_URL % the_rrid) + + self.sample_list.append(sample) + + self.se_exists = any(sample.variance for sample in self.sample_list) + self.num_cases_exists = False + if (any(sample.num_cases for sample in self.sample_list) and + any((sample.num_cases and sample.num_cases != "1") for sample in self.sample_list)): + self.num_cases_exists = True + + first_attr_col = self.get_first_attr_col() + for sample in self.sample_list: + sample.first_attr_col = first_attr_col + + self.do_outliers() + + def __repr__(self): + return "<SampleList> --> %s" % (pf(self.__dict__)) + + def do_outliers(self): + values = [sample.value for sample in self.sample_list + if sample.value is not None] + upper_bound, lower_bound = Plot.find_outliers(values) + + for sample in self.sample_list: + if sample.value: + if upper_bound and sample.value > upper_bound: + sample.outlier = True + elif lower_bound and sample.value < lower_bound: + sample.outlier = True + else: + sample.outlier = False + + def get_attributes(self): + """Finds which extra attributes apply to this dataset""" + + # Get attribute names and distinct values for each attribute + with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT DISTINCT CaseAttribute.CaseAttributeId, " + "CaseAttribute.Name, CaseAttribute.Description, " + "CaseAttributeXRefNew.Value FROM " + "CaseAttribute, CaseAttributeXRefNew WHERE " + "CaseAttributeXRefNew.CaseAttributeId = CaseAttribute.CaseAttributeId " + "AND CaseAttributeXRefNew.InbredSetId = %s " + "ORDER BY CaseAttribute.CaseAttributeId", (str(self.dataset.group.id),) + ) + + self.attributes = {} + for attr, values in itertools.groupby( + cursor.fetchall(), lambda row: (row[0], row[1], row[2]) + ): + key, name, description = attr + self.attributes[key] = Bunch() + self.attributes[key].id = key + self.attributes[key].name = name + self.attributes[key].description = description + self.attributes[key].distinct_values = [ + item[3] for item in values] + self.attributes[key].distinct_values = natural_sort( + self.attributes[key].distinct_values) + all_numbers = True + for value in self.attributes[key].distinct_values: + try: + val_as_float = float(value) + except: + all_numbers = False + break + + if all_numbers: + self.attributes[key].alignment = "right" + else: + self.attributes[key].alignment = "left" + + def get_extra_attribute_values(self): + if self.attributes: + with database_connection(get_setting("SQL_URI")) as conn, conn.cursor() as cursor: + cursor.execute( + "SELECT Strain.Name AS SampleName, " + "CaseAttributeId AS Id, " + "CaseAttributeXRefNew.Value FROM Strain, " + "StrainXRef, InbredSet, CaseAttributeXRefNew " + "WHERE StrainXRef.StrainId = Strain.Id " + "AND InbredSet.Id = StrainXRef.InbredSetId " + "AND CaseAttributeXRefNew.StrainId = Strain.Id " + "AND InbredSet.Id = CaseAttributeXRefNew.InbredSetId " + "AND CaseAttributeXRefNew.InbredSetId = %s " + "ORDER BY SampleName", (self.dataset.group.id,) + ) + + for sample_name, items in itertools.groupby( + cursor.fetchall(), lambda row: row[0] + ): + attribute_values = {} + # Make a list of attr IDs without values (that have values for other samples) + valueless_attr_ids = [self.attributes[key].id for key in self.attributes.keys()] + for item in items: + sample_name, _id, value = item + valueless_attr_ids.remove(_id) + attribute_value = value + + # If it's an int, turn it into one for sorting + # (for example, 101 would be lower than 80 if + # they're strings instead of ints) + try: + attribute_value = int(attribute_value) + except ValueError: + pass + + attribute_values[str(_id)] = attribute_value + for attr_id in valueless_attr_ids: + attribute_values[str(attr_id)] = "" + + self.sample_attribute_values[sample_name] = attribute_values + + def get_first_attr_col(self): + first_attr_col = 4 + if self.se_exists: + first_attr_col += 2 + if self.num_cases_exists: + first_attr_col += 1 + + return first_attr_col + + +def natural_sort(a_list, key=lambda s: s): + """ + Sort the list into natural alphanumeric order. + """ + def get_alphanum_key_func(key): + def convert(text): return int(text) if text.isdigit() else text + return lambda s: [convert(c) for c in re.split('([0-9]+)', key(s))] + sort_key = get_alphanum_key_func(key) + sorted_list = sorted(a_list, key=sort_key) + return sorted_list |