From 0bdeca3490c1ddbb7fa29165893a97f90eeefba7 Mon Sep 17 00:00:00 2001 From: Zachary Sloan Date: Fri, 6 Jun 2014 22:00:30 +0000 Subject: Implimented Karl Broman's lodchart code for the interval mapping function. Suggestive/significant bars and additive effect curve added --- wqflask/base/data_set.py | 85 +++++++++++++++++++++++----------- wqflask/base/mrna_assay_tissue_data.py | 11 ++++- wqflask/base/trait.py | 4 ++ 3 files changed, 72 insertions(+), 28 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index f4ca3ae0..a2b13748 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -167,28 +167,56 @@ class Markers(object): marker['Mb'] = float(marker['Mb']) self.markers = markers - print("self.markers:", self.markers) + #print("self.markers:", self.markers) def add_pvalues(self, p_values): print("length of self.markers:", len(self.markers)) print("length of p_values:", len(p_values)) - # THIS IS only needed for the case when we are limiting the number of p-values calculated - if len(self.markers) < len(p_values): - self.markers = self.markers[:len(p_values)] - - for marker, p_value in itertools.izip(self.markers, p_values): - if not p_value: - continue - marker['p_value'] = float(p_value) - if math.isnan(marker['p_value']) or marker['p_value'] <= 0: - marker['lod_score'] = 0 - marker['lrs_value'] = 0 - else: - marker['lod_score'] = -math.log10(marker['p_value']) - #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values - marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 + if type(p_values) is list: + # THIS IS only needed for the case when we are limiting the number of p-values calculated + #if len(self.markers) > len(p_values): + # self.markers = self.markers[:len(p_values)] + + for marker, p_value in itertools.izip(self.markers, p_values): + if not p_value: + continue + marker['p_value'] = float(p_value) + if math.isnan(marker['p_value']) or marker['p_value'] <= 0: + marker['lod_score'] = 0 + marker['lrs_value'] = 0 + else: + marker['lod_score'] = -math.log10(marker['p_value']) + #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values + marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 + elif type(p_values) is dict: + filtered_markers = [] + for marker in self.markers: + #print("marker[name]", marker['name']) + #print("p_values:", p_values) + if marker['name'] in p_values: + #print("marker {} IS in p_values".format(i)) + marker['p_value'] = p_values[marker['name']] + if math.isnan(marker['p_value']) or (marker['p_value'] <= 0): + marker['lod_score'] = 0 + marker['lrs_value'] = 0 + else: + marker['lod_score'] = -math.log10(marker['p_value']) + #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values + marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 + filtered_markers.append(marker) + #else: + #print("marker {} NOT in p_values".format(i)) + #self.markers.remove(marker) + #del self.markers[i] + self.markers = filtered_markers + + #for i, marker in enumerate(self.markers): + # if not 'p_value' in marker: + # #print("self.markers[i]", self.markers[i]) + # del self.markers[i] + # #self.markers.remove(self.markers[i]) class HumanMarkers(Markers): @@ -226,15 +254,15 @@ class HumanMarkers(Markers): # #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values # marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 - print("p_values2:", p_values) + #print("p_values2:", pf(p_values)) super(HumanMarkers, self).add_pvalues(p_values) - with Bench("deleting markers"): - markers = [] - for marker in self.markers: - if not marker['Mb'] <= 0 and not marker['chr'] == 0: - markers.append(marker) - self.markers = markers + #with Bench("deleting markers"): + # markers = [] + # for marker in self.markers: + # if not marker['Mb'] <= 0 and not marker['chr'] == 0: + # markers.append(marker) + # self.markers = markers @@ -254,7 +282,7 @@ class DatasetGroup(object): self.name = "BXD" self.f1list = None - self.parlist = None + self.parlist = None self.get_f1_parent_strains() #print("parents/f1s: {}:{}".format(self.parlist, self.f1list)) @@ -476,8 +504,9 @@ class DataSet(object): else: self.samplelist = self.group.samplelist - if (self.group.parlist + self.group.f1list) in self.samplelist: - self.samplelist += self.group.parlist + self.group.f1list + if self.group.parlist != None and self.group.f1list != None: + if (self.group.parlist + self.group.f1list) in self.samplelist: + self.samplelist += self.group.parlist + self.group.f1list query = """ SELECT Strain.Name, Strain.Id FROM Strain, Species @@ -547,7 +576,11 @@ class DataSet(object): order by {}.Id """.format(*mescape(self.type, self.type, self.type, self.type, self.name, dataset_type, self.type, self.type, dataset_type)) + + #print("trait data query: ", query) + results = g.db.execute(query).fetchall() + #print("query results:", results) trait_sample_data.append(results) trait_count = len(trait_sample_data[0]) diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py index be5df657..1a05fce7 100755 --- a/wqflask/base/mrna_assay_tissue_data.py +++ b/wqflask/base/mrna_assay_tissue_data.py @@ -19,6 +19,8 @@ class MrnaAssayTissueData(object): if self.gene_symbols == None: self.gene_symbols = [] + print("self.gene_symbols:", self.gene_symbols) + self.data = collections.defaultdict(Bunch) #self.gene_id_dict ={} @@ -28,7 +30,7 @@ class MrnaAssayTissueData(object): #self.desc_dict = {} #self.probe_target_desc_dict = {} - query = '''select t.Symbol, t.GeneId, t.DataId,t.Chr, t.Mb, t.description, t.Probe_Target_Description + query = '''select t.Symbol, t.GeneId, t.DataId, t.Chr, t.Mb, t.description, t.Probe_Target_Description from ( select Symbol, max(Mean) as maxmean from TissueProbeSetXRef @@ -53,6 +55,7 @@ class MrnaAssayTissueData(object): '''.format(in_clause) results = g.db.execute(query).fetchall() + for result in results: symbol = result[0] if symbol in gene_symbols: @@ -66,7 +69,7 @@ class MrnaAssayTissueData(object): self.data[symbol].description = result.description self.data[symbol].probe_target_description = result.Probe_Target_Description - #print("self.data: ", pf(self.data)) + print("self.data: ", pf(self.data)) ########################################################################### #Input: cursor, symbolList (list), dataIdDict(Dict) @@ -79,6 +82,8 @@ class MrnaAssayTissueData(object): def get_symbol_values_pairs(self): id_list = [self.data[symbol].data_id for symbol in self.data] + print("id_list:", id_list) + symbol_values_dict = {} query = """SELECT TissueProbeSetXRef.Symbol, TissueProbeSetData.value @@ -86,6 +91,8 @@ class MrnaAssayTissueData(object): WHERE TissueProbeSetData.Id IN {} and TissueProbeSetXRef.DataId = TissueProbeSetData.Id""".format(db_tools.create_in_clause(id_list)) + print("TISSUE QUERY:", query) + results = g.db.execute(query).fetchall() for result in results: if result.Symbol.lower() not in symbol_values_dict: diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index 712d9af5..3f80d4a4 100755 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -40,6 +40,7 @@ class GeneralTrait(object): else: self.dataset = kw.get('dataset') self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc. + print("THE NAME IS:", self.name) self.cellid = kw.get('cellid') self.identification = kw.get('identification', 'un-named trait') self.haveinfo = kw.get('haveinfo', False) @@ -295,6 +296,9 @@ class GeneralTrait(object): PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND PublishFreeze.Id = %s """ % (self.name, self.dataset.id) + + print("query is:", query) + trait_info = g.db.execute(query).fetchone() #XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name #XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms. -- cgit v1.2.3