From e5896aebed0c7a433ecd79553b85920752f518d8 Mon Sep 17 00:00:00 2001 From: Artem Tarasov Date: Fri, 15 May 2015 16:31:25 +0300 Subject: reduce number of queries --- wqflask/wqflask/show_trait/SampleList.py | 114 +++++++++++++------------------ 1 file changed, 47 insertions(+), 67 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/show_trait/SampleList.py b/wqflask/wqflask/show_trait/SampleList.py index 060cb519..46f2ae43 100755 --- a/wqflask/wqflask/show_trait/SampleList.py +++ b/wqflask/wqflask/show_trait/SampleList.py @@ -10,6 +10,8 @@ import numpy as np from scipy import stats from pprint import pformat as pf +import itertools + class SampleList(object): def __init__(self, dataset, @@ -25,14 +27,12 @@ class SampleList(object): self.sample_list = [] # The actual list - try: - self.get_attributes() - except Exception: - print("failed to get attributes") - self.attributes = {} - + self.get_attributes() print("camera: attributes are:", pf(self.attributes)) + if self.this_trait and self.dataset and self.dataset.type == 'ProbeSet': + self.get_extra_attribute_values() + for counter, sample_name in enumerate(sample_names, 1): sample_name = sample_name.replace("_2nd_", "") @@ -59,8 +59,8 @@ class SampleList(object): sample.this_id = "Other_" + str(counter) #### For extra attribute columns; currently only used by several datasets - Zach - if self.this_trait and self.dataset and self.dataset.type == 'ProbeSet': - sample.extra_attributes = self.get_extra_attribute_values(sample_name) + if self.sample_attribute_values: + sample.extra_attributes = self.sample_attribute_values.get(sample_name, {}) print("sample.extra_attributes is", pf(sample.extra_attributes)) self.sample_list.append(sample) @@ -135,75 +135,55 @@ class SampleList(object): else: sample.outlier = False - def get_attributes(self): """Finds which extra attributes apply to this dataset""" - - #ZS: Id and name values for this trait's extra attributes - case_attributes = g.db.execute('''SELECT CaseAttribute.Id, CaseAttribute.Name - FROM CaseAttribute, CaseAttributeXRef - WHERE CaseAttributeXRef.ProbeSetFreezeId = %s AND - CaseAttribute.Id = CaseAttributeXRef.CaseAttributeId - group by CaseAttributeXRef.CaseAttributeId''', - (str(self.dataset.id),)) + # Get attribute names and distinct values for each attribute + results = g.db.execute(''' + SELECT DISTINCT CaseAttribute.Id, CaseAttribute.Name, CaseAttributeXRef.Value + FROM CaseAttribute, CaseAttributeXRef + WHERE CaseAttributeXRef.CaseAttributeId = CaseAttribute.Id + AND CaseAttributeXRef.ProbeSetFreezeId = %s + ORDER BY CaseAttribute.Name''', (str(self.dataset.id),)) self.attributes = {} - for key, value in case_attributes.fetchall(): - print("radish: %s - %s" % (key, value)) + for attr, values in itertools.groupby(results.fetchall(), lambda row: (row.Id, row.Name)): + key, name = attr + print("radish: %s - %s" % (key, name)) self.attributes[key] = Bunch() - self.attributes[key].name = value - - attribute_values = g.db.execute('''SELECT DISTINCT CaseAttributeXRef.Value - FROM CaseAttribute, CaseAttributeXRef - WHERE CaseAttribute.Name = %s AND - CaseAttributeXRef.CaseAttributeId = CaseAttribute.Id''', (value,)) - - self.attributes[key].distinct_values = [item[0] for item in attribute_values.fetchall()] + self.attributes[key].name = name + self.attributes[key].distinct_values = [item.Value for item in values] self.attributes[key].distinct_values.sort(key=natural_sort_key) - - def get_extra_attribute_values(self, sample_name): - - attribute_values = {} - + def get_extra_attribute_values(self): + self.sample_attribute_values = {} if self.attributes: + results = g.db.execute(''' + SELECT Strain.Name AS SampleName, CaseAttributeId AS Id, CaseAttributeXRef.Value + FROM Strain, StrainXRef, InbredSet, CaseAttributeXRef + WHERE StrainXRef.StrainId = Strain.Id + AND InbredSet.Id = StrainXRef.InbredSetId + AND CaseAttributeXRef.StrainId = Strain.Id + AND InbredSet.Name = %s + AND CaseAttributeXRef.ProbeSetFreezeId = %s + ORDER BY SampleName''', + (self.dataset.group.name, self.this_trait.dataset.id)) + + for sample_name, items in itertools.groupby(results.fetchall(), lambda row: row.SampleName): + attribute_values = {} + for item in items: + attribute_value = item.Value + + #ZS: If it's an int, turn it into one for sorting + #(for example, 101 would be lower than 80 if they're strings instead of ints) + try: + attribute_value = int(attribute_value) + except ValueError: + pass + + attribute_values[self.attributes[item.Id].name] = attribute_value + self.sample_attribute_values[sample_name] = attribute_values - #ZS: Get StrainId value for the next query - result = g.db.execute("""SELECT Strain.Id - FROM Strain, StrainXRef, InbredSet - WHERE Strain.Name = %s and - StrainXRef.StrainId = Strain.Id and - InbredSet.Id = StrainXRef.InbredSetId and - InbredSet.Name = %s""", (sample_name, - self.dataset.group.name)) - - sample_id = result.fetchone().Id - - for attribute in self.attributes: - - #ZS: Add extra case attribute values (if any) - result = g.db.execute("""SELECT Value - FROM CaseAttributeXRef - WHERE ProbeSetFreezeId = %s AND - StrainId = %s AND - CaseAttributeId = %s - group by CaseAttributeXRef.CaseAttributeId""", ( - self.this_trait.dataset.id, sample_id, str(attribute))) - - attribute_value = result.fetchone().Value #Trait-specific attributes, if any - - #ZS: If it's an int, turn it into one for sorting - #(for example, 101 would be lower than 80 if they're strings instead of ints) - try: - attribute_value = int(attribute_value) - except ValueError: - pass - - attribute_values[self.attributes[attribute].name] = attribute_value - - return attribute_values - def se_exists(self): """Returns true if SE values exist for any samples, otherwise false""" -- cgit v1.2.3