diff options
Diffstat (limited to 'wqflask/base/mrna_assay_tissue_data.py')
-rw-r--r-- | wqflask/base/mrna_assay_tissue_data.py | 102 |
1 files changed, 0 insertions, 102 deletions
diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py deleted file mode 100644 index a78182e3..00000000 --- a/wqflask/base/mrna_assay_tissue_data.py +++ /dev/null @@ -1,102 +0,0 @@ -import collections - -from utility import Bunch - - -class MrnaAssayTissueData: - - def __init__(self, conn, gene_symbols=None): - self.gene_symbols = gene_symbols - self.conn = conn - if self.gene_symbols is None: - self.gene_symbols = [] - - self.data = collections.defaultdict(Bunch) - results = () - # Note that inner join is necessary in this query to get - # distinct record in one symbol group with highest mean value - # Due to the limit size of TissueProbeSetFreezeId table in DB, - # performance of inner join is - # acceptable.MrnaAssayTissueData(gene_symbols=symbol_list) - with conn.cursor() as cursor: - if len(self.gene_symbols) == 0: - cursor.execute( - "SELECT t.Symbol, t.GeneId, t.DataId, " - "t.Chr, t.Mb, t.description, " - "t.Probe_Target_Description FROM (SELECT Symbol, " - "max(Mean) AS maxmean " - "FROM TissueProbeSetXRef WHERE " - "TissueProbeSetFreezeId=1 AND " - "Symbol != '' AND Symbol IS NOT " - "Null GROUP BY Symbol) " - "AS x INNER JOIN " - "TissueProbeSetXRef AS t ON " - "t.Symbol = x.Symbol " - "AND t.Mean = x.maxmean") - else: - cursor.execute( - "SELECT t.Symbol, t.GeneId, t.DataId, " - "t.Chr, t.Mb, t.description, " - "t.Probe_Target_Description FROM (SELECT Symbol, " - "max(Mean) AS maxmean " - "FROM TissueProbeSetXRef WHERE " - "TissueProbeSetFreezeId=1 AND " - "Symbol IN " - f"({', '.join(['%s'] * len(self.gene_symbols))}) " - "GROUP BY Symbol) AS x INNER JOIN " - "TissueProbeSetXRef AS t ON t.Symbol = x.Symbol " - "AND t.Mean = x.maxmean", - tuple(self.gene_symbols)) - results = list(cursor.fetchall()) - lower_symbols = {} - for gene_symbol in self.gene_symbols: - if gene_symbol is not None: - lower_symbols[gene_symbol.lower()] = True - - for result in results: - (symbol, gene_id, data_id, _chr, _mb, - descr, probeset_target_descr) = result - if symbol is not None and lower_symbols.get(symbol.lower()): - symbol = symbol.lower() - self.data[symbol].gene_id = gene_id - self.data[symbol].data_id = data_id - self.data[symbol].chr = _chr - self.data[symbol].mb = _mb - self.data[symbol].description = descr - (self.data[symbol] - .probe_target_description) = probeset_target_descr - - - def get_symbol_values_pairs(self): - """Get one dictionary whose key is gene symbol and value is - tissue expression data (list type). All keys are lower case. - - The output is a symbolValuepairDict (dictionary): one - dictionary of Symbol and Value Pair; key is symbol, value is - one list of expression values of one probeSet; - - """ - id_list = [self.data[symbol].data_id for symbol in self.data] - - symbol_values_dict = {} - - if len(id_list) > 0: - results = [] - with self.conn.cursor() as cursor: - - cursor.execute( - "SELECT TissueProbeSetXRef.Symbol, TissueProbeSetData.value " - "FROM TissueProbeSetXRef, TissueProbeSetData" - f" WHERE TissueProbeSetData.Id IN ({', '.join(['%s'] * len(id_list))})" - " AND TissueProbeSetXRef.DataId = TissueProbeSetData.Id" - ,tuple(id_list)) - - results = cursor.fetchall() - for result in results: - (symbol, value) = result - if symbol.lower() not in symbol_values_dict: - symbol_values_dict[symbol.lower()] = [value] - else: - symbol_values_dict[symbol.lower()].append( - value) - return symbol_values_dict |