about summary refs log tree commit diff
path: root/gn2/base/mrna_assay_tissue_data.py
diff options
context:
space:
mode:
Diffstat (limited to 'gn2/base/mrna_assay_tissue_data.py')
-rw-r--r--gn2/base/mrna_assay_tissue_data.py102
1 files changed, 102 insertions, 0 deletions
diff --git a/gn2/base/mrna_assay_tissue_data.py b/gn2/base/mrna_assay_tissue_data.py
new file mode 100644
index 00000000..7b7914aa
--- /dev/null
+++ b/gn2/base/mrna_assay_tissue_data.py
@@ -0,0 +1,102 @@
+import collections
+
+from gn2.utility import Bunch
+
+
+class MrnaAssayTissueData:
+
+    def __init__(self, conn, gene_symbols=None):
+        self.gene_symbols = gene_symbols
+        self.conn = conn
+        if self.gene_symbols is None:
+            self.gene_symbols = []
+
+        self.data = collections.defaultdict(Bunch)
+        results = ()
+        # Note that inner join is necessary in this query to get
+        # distinct record in one symbol group with highest mean value
+        # Due to the limit size of TissueProbeSetFreezeId table in DB,
+        # performance of inner join is
+        # acceptable.MrnaAssayTissueData(gene_symbols=symbol_list)
+        with conn.cursor() as cursor:
+            if len(self.gene_symbols) == 0:
+                cursor.execute(
+                    "SELECT t.Symbol, t.GeneId, t.DataId, "
+                    "t.Chr, t.Mb, t.description, "
+                    "t.Probe_Target_Description FROM (SELECT Symbol, "
+                    "max(Mean) AS maxmean "
+                    "FROM TissueProbeSetXRef WHERE "
+                    "TissueProbeSetFreezeId=1 AND "
+                    "Symbol != '' AND Symbol IS NOT "
+                    "Null GROUP BY Symbol) "
+                    "AS x INNER JOIN "
+                    "TissueProbeSetXRef AS t ON "
+                    "t.Symbol = x.Symbol "
+                    "AND t.Mean = x.maxmean")
+            else:
+                cursor.execute(
+                    "SELECT t.Symbol, t.GeneId, t.DataId, "
+                    "t.Chr, t.Mb, t.description, "
+                    "t.Probe_Target_Description FROM (SELECT Symbol, "
+                    "max(Mean) AS maxmean "
+                    "FROM TissueProbeSetXRef WHERE "
+                    "TissueProbeSetFreezeId=1 AND "
+                    "Symbol IN "
+                    f"({', '.join(['%s'] * len(self.gene_symbols))}) "
+                    "GROUP BY Symbol) AS x INNER JOIN "
+                    "TissueProbeSetXRef AS t ON t.Symbol = x.Symbol "
+                    "AND t.Mean = x.maxmean",
+                    tuple(self.gene_symbols))
+            results = list(cursor.fetchall())
+        lower_symbols = {}
+        for gene_symbol in self.gene_symbols:
+            if gene_symbol is not None:
+                lower_symbols[gene_symbol.lower()] = True
+
+        for result in results:
+            (symbol, gene_id, data_id, _chr, _mb,
+             descr, probeset_target_descr) = result
+            if symbol is not None and lower_symbols.get(symbol.lower()):
+                symbol = symbol.lower()
+                self.data[symbol].gene_id = gene_id
+                self.data[symbol].data_id = data_id
+                self.data[symbol].chr = _chr
+                self.data[symbol].mb = _mb
+                self.data[symbol].description = descr
+                (self.data[symbol]
+                 .probe_target_description) = probeset_target_descr
+
+
+    def get_symbol_values_pairs(self):
+        """Get one dictionary whose key is gene symbol and value is
+        tissue expression data (list type).  All keys are lower case.
+
+        The output is a symbolValuepairDict (dictionary): one
+        dictionary of Symbol and Value Pair; key is symbol, value is
+        one list of expression values of one probeSet;
+
+        """
+        id_list = [self.data[symbol].data_id for symbol in self.data]
+
+        symbol_values_dict = {}
+
+        if len(id_list) > 0:
+            results = []
+            with self.conn.cursor() as cursor:
+
+                cursor.execute(
+                    "SELECT TissueProbeSetXRef.Symbol, TissueProbeSetData.value "
+                    "FROM TissueProbeSetXRef, TissueProbeSetData"
+                    f" WHERE TissueProbeSetData.Id IN ({', '.join(['%s'] * len(id_list))})"
+                    " AND TissueProbeSetXRef.DataId = TissueProbeSetData.Id"
+                    ,tuple(id_list))
+
+                results = cursor.fetchall()
+                for result in results:
+                    (symbol, value) = result
+                    if symbol.lower() not in symbol_values_dict:
+                        symbol_values_dict[symbol.lower()] = [value]
+                    else:
+                        symbol_values_dict[symbol.lower()].append(
+                            value)
+        return symbol_values_dict