aboutsummaryrefslogtreecommitdiff
path: root/wqflask/base
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/base')
-rwxr-xr-xwqflask/base/data_set.py51
-rw-r--r--wqflask/base/mrna_assay_tissue_data.py146
-rwxr-xr-xwqflask/base/trait.py5
3 files changed, 195 insertions, 7 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 96e04df0..beb62bd7 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -436,8 +436,13 @@ class DataSet(object):
print("Dataset {} is not yet available in GeneNetwork.".format(self.name))
pass
- def get_trait_data(self):
- self.samplelist = self.group.samplelist + self.group.parlist + self.group.f1list
+ def get_trait_data(self, sample_list=None):
+ if sample_list:
+ self.samplelist = sample_list + self.group.parlist + self.group.f1list
+ else:
+ self.samplelist = self.group.samplelist + self.group.parlist + self.group.f1list
+
+
query = """
SELECT Strain.Name, Strain.Id FROM Strain, Species
WHERE Strain.Name IN {}
@@ -1055,7 +1060,6 @@ class MrnaAssayDataSet(DataSet):
""" % (escape(self.name), escape(self.dataset.name))
results = g.db.execute(query).fetchone()
return results[0]
-
def retrieve_sample_data(self, trait):
query = """
@@ -1076,6 +1080,47 @@ class MrnaAssayDataSet(DataSet):
""" % (escape(trait), escape(self.name))
results = g.db.execute(query).fetchall()
return results
+
+
+ def retrieve_genes(self, column_name):
+ query = """
+ select ProbeSet.Name, ProbeSet.%s
+ from ProbeSet,ProbeSetXRef
+ where ProbeSetXRef.ProbeSetFreezeId = %s and
+ ProbeSetXRef.ProbeSetId=ProbeSet.Id;
+ """ % (column_name, escape(str(self.id)))
+ results = g.db.execute(query).fetchall()
+
+ return dict(results)
+
+ #def retrieve_gene_symbols(self):
+ # query = """
+ # select ProbeSet.Name, ProbeSet.Symbol, ProbeSet.GeneId
+ # from ProbeSet,ProbeSetXRef
+ # where ProbeSetXRef.ProbeSetFreezeId = %s and
+ # ProbeSetXRef.ProbeSetId=ProbeSet.Id;
+ # """ % (self.id)
+ # results = g.db.execute(query).fetchall()
+ # symbol_dict = {}
+ # for item in results:
+ # symbol_dict[item[0]] = item[1]
+ # return symbol_dict
+ #
+ #def retrieve_gene_ids(self):
+ # query = """
+ # select ProbeSet.Name, ProbeSet.GeneId
+ # from ProbeSet,ProbeSetXRef
+ # where ProbeSetXRef.ProbeSetFreezeId = %s and
+ # ProbeSetXRef.ProbeSetId=ProbeSet.Id;
+ # """ % (self.id)
+ # return process_and_run_query(query)
+ # results = g.db.execute(query).fetchall()
+ # symbol_dict = {}
+ # for item in results:
+ # symbol_dict[item[0]] = item[1]
+ # return symbol_dict
+
+
class TempDataSet(DataSet):
diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py
new file mode 100644
index 00000000..be5df657
--- /dev/null
+++ b/wqflask/base/mrna_assay_tissue_data.py
@@ -0,0 +1,146 @@
+from __future__ import absolute_import, print_function, division
+
+import collections
+
+from flask import g
+
+from utility import db_tools
+from utility import Bunch
+
+from MySQLdb import escape_string as escape
+
+from pprint import pformat as pf
+
+class MrnaAssayTissueData(object):
+
+ def __init__(self, gene_symbols=None):
+ self.gene_symbols = gene_symbols
+ self.have_data = False
+ if self.gene_symbols == None:
+ self.gene_symbols = []
+
+ self.data = collections.defaultdict(Bunch)
+
+ #self.gene_id_dict ={}
+ #self.data_id_dict = {}
+ #self.chr_dict = {}
+ #self.mb_dict = {}
+ #self.desc_dict = {}
+ #self.probe_target_desc_dict = {}
+
+ query = '''select t.Symbol, t.GeneId, t.DataId,t.Chr, t.Mb, t.description, t.Probe_Target_Description
+ from (
+ select Symbol, max(Mean) as maxmean
+ from TissueProbeSetXRef
+ where TissueProbeSetFreezeId=1 and '''
+
+ # Note that inner join is necessary in this query to get distinct record in one symbol group
+ # with highest mean value
+ # Due to the limit size of TissueProbeSetFreezeId table in DB,
+ # performance of inner join is acceptable.MrnaAssayTissueData(gene_symbols=symbol_list)
+ #print("len(gene_symbols): ", len(gene_symbols))
+ if len(gene_symbols) == 0:
+ query += '''Symbol!='' and Symbol Is Not Null group by Symbol)
+ as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
+ and t.Mean = x.maxmean;
+ '''
+ else:
+ in_clause = db_tools.create_in_clause(gene_symbols)
+
+ query += ''' Symbol in {} group by Symbol)
+ as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
+ and t.Mean = x.maxmean;
+ '''.format(in_clause)
+
+ results = g.db.execute(query).fetchall()
+ for result in results:
+ symbol = result[0]
+ if symbol in gene_symbols:
+ #gene_symbols.append(symbol)
+ symbol = symbol.lower()
+
+ self.data[symbol].gene_id = result.GeneId
+ self.data[symbol].data_id = result.DataId
+ self.data[symbol].chr = result.Chr
+ self.data[symbol].mb = result.Mb
+ self.data[symbol].description = result.description
+ self.data[symbol].probe_target_description = result.Probe_Target_Description
+
+ #print("self.data: ", pf(self.data))
+
+ ###########################################################################
+ #Input: cursor, symbolList (list), dataIdDict(Dict)
+ #output: symbolValuepairDict (dictionary):one dictionary of Symbol and Value Pair,
+ # key is symbol, value is one list of expression values of one probeSet;
+ #function: get one dictionary whose key is gene symbol and value is tissue expression data (list type).
+ #Attention! All keys are lower case!
+ ###########################################################################
+
+ def get_symbol_values_pairs(self):
+ id_list = [self.data[symbol].data_id for symbol in self.data]
+
+ symbol_values_dict = {}
+
+ query = """SELECT TissueProbeSetXRef.Symbol, TissueProbeSetData.value
+ FROM TissueProbeSetXRef, TissueProbeSetData
+ WHERE TissueProbeSetData.Id IN {} and
+ TissueProbeSetXRef.DataId = TissueProbeSetData.Id""".format(db_tools.create_in_clause(id_list))
+
+ results = g.db.execute(query).fetchall()
+ for result in results:
+ if result.Symbol.lower() not in symbol_values_dict:
+ symbol_values_dict[result.Symbol.lower()] = [result.value]
+ else:
+ symbol_values_dict[result.Symbol.lower()].append(result.value)
+
+ #for symbol in self.data:
+ # data_id = self.data[symbol].data_id
+ # symbol_values_dict[symbol] = self.get_tissue_values(data_id)
+
+
+ return symbol_values_dict
+
+
+ #def get_tissue_values(self, data_id):
+ # """Gets the tissue values for a particular gene"""
+ #
+ # tissue_values=[]
+ #
+ # query = """SELECT value, id
+ # FROM TissueProbeSetData
+ # WHERE Id IN {}""".format(db_tools.create_in_clause(data_id))
+ #
+ # #try :
+ # results = g.db.execute(query).fetchall()
+ # for result in results:
+ # tissue_values.append(result.value)
+ # #symbol_values_dict[symbol] = value_list
+ # #except:
+ # # symbol_values_pairs[symbol] = None
+ #
+ # return tissue_values
+
+########################################################################################################
+#input: cursor, symbolList (list), dataIdDict(Dict): key is symbol
+#output: SymbolValuePairDict(dictionary):one dictionary of Symbol and Value Pair.
+# key is symbol, value is one list of expression values of one probeSet.
+#function: wrapper function for getSymbolValuePairDict function
+# build gene symbol list if necessary, cut it into small lists if necessary,
+# then call getSymbolValuePairDict function and merge the results.
+########################################################################################################
+
+#def get_trait_symbol_and_tissue_values(symbol_list=None):
+# tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list)
+#
+# #symbolList,
+# #geneIdDict,
+# #dataIdDict,
+# #ChrDict,
+# #MbDict,
+# #descDict,
+# #pTargetDescDict = getTissueProbeSetXRefInfo(
+# # GeneNameLst=GeneNameLst,TissueProbeSetFreezeId=TissueProbeSetFreezeId)
+#
+# if len(tissue_data.gene_symbols):
+# return get_symbol_values_pairs(tissue_data)
+
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index c893c887..6a64eeaf 100755
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -286,7 +286,6 @@ class GeneralTrait(object):
escape(self.dataset.name),
escape(self.name))
trait_info = g.db.execute(query).fetchone()
- #print("trait_info is: ", pf(trait_info))
#XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name
# to avoid the problem of same marker name from different species.
elif self.dataset.type == 'Geno':
@@ -359,7 +358,6 @@ class GeneralTrait(object):
InbredSet.SpeciesId = Species.Id AND
Species.TaxonomyId = Homologene.TaxonomyId
""" % (escape(str(self.geneid)), escape(self.dataset.group.name))
- print("-> query is:", query)
result = g.db.execute(query).fetchone()
#else:
# result = None
@@ -391,7 +389,6 @@ class GeneralTrait(object):
Geno.Name = '{}' and
Geno.SpeciesId = Species.Id
""".format(self.dataset.group.species, self.locus)
- print("query is:", query)
result = g.db.execute(query).fetchone()
self.locus_chr = result[0]
self.locus_mb = result[1]
@@ -603,4 +600,4 @@ class GeneralTrait(object):
else:
ZValue = 0.5*log((1.0+self.correlation)/(1.0-self.correlation))
ZValue = ZValue*sqrt(self.overlap-3)
- self.p_value = 2.0*(1.0 - reaper.normp(abs(ZValue))) \ No newline at end of file
+ self.p_value = 2.0*(1.0 - reaper.normp(abs(ZValue)))