diff options
author | Lei Yan | 2014-07-14 16:23:12 +0000 |
---|---|---|
committer | Lei Yan | 2014-07-14 17:02:01 +0000 |
commit | 214bf0128b8ecdda718983c5563cf34160743758 (patch) | |
tree | 917e7a5ae78dc4ff65dc08d107a0e3ae3a47c67c /wqflask/base | |
parent | 8de6fec18cd98a10c58702c448a1e01e147dc5f7 (diff) | |
parent | fbdbf4b7410185e2a978ecc8e120ae56ff6da0ce (diff) | |
download | genenetwork2-214bf0128b8ecdda718983c5563cf34160743758.tar.gz |
Merge /home/zas1024/gene
Conflicts:
wqflask/wqflask/static/new/javascript/dataset_select_menu.js
wqflask/wqflask/templates/corr_scatter_plot_old.html
Diffstat (limited to 'wqflask/base')
-rwxr-xr-x[-rw-r--r--] | wqflask/base/JinjaPage.py | 0 | ||||
-rwxr-xr-x | wqflask/base/anon_collection.py | 21 | ||||
-rwxr-xr-x | wqflask/base/data_set.py | 134 | ||||
-rwxr-xr-x[-rw-r--r--] | wqflask/base/generate_probesetfreeze_file.py | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | wqflask/base/mrna_assay_tissue_data.py | 11 | ||||
-rwxr-xr-x[-rw-r--r--] | wqflask/base/species.py | 6 | ||||
-rwxr-xr-x | wqflask/base/trait.py | 6 | ||||
-rwxr-xr-x | wqflask/base/trait_collection.py | 53 |
8 files changed, 192 insertions, 39 deletions
diff --git a/wqflask/base/JinjaPage.py b/wqflask/base/JinjaPage.py index 07e485b1..07e485b1 100644..100755 --- a/wqflask/base/JinjaPage.py +++ b/wqflask/base/JinjaPage.py diff --git a/wqflask/base/anon_collection.py b/wqflask/base/anon_collection.py new file mode 100755 index 00000000..8ee73296 --- /dev/null +++ b/wqflask/base/anon_collection.py @@ -0,0 +1,21 @@ +class AnonCollection(TraitCollection):
+
+ def __init__(self, anon_id)
+ self.anon_id = anon_id
+ self.collection_members = Redis.smembers(self.anon_id)
+ print("self.collection_members is:", self.collection_members)
+ self.num_members = len(self.collection_members)
+
+
+ @app.route("/collections/remove", methods=('POST',))
+ def remove_traits(traits_to_remove):
+ print("traits_to_remove:", traits_to_remove)
+ for trait in traits_to_remove:
+ Redis.srem(self.anon_id, trait)
+ members_now = self.collection_members - traits_to_remove
+ print("members_now:", members_now)
+ print("Went from {} to {} members in set.".format(len(self.collection_members), len(members_now)))
+
+ # We need to return something so we'll return this...maybe in the future
+ # we can use it to check the results
+ return str(len(members_now))
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index fbe78d5d..2a79dc9c 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -23,6 +23,7 @@ import os import math import string import collections +import codecs import json import gzip @@ -156,38 +157,90 @@ class Markers(object): """Todo: Build in cacheing so it saves us reading the same file more than once""" def __init__(self, name): json_data_fh = open(os.path.join(webqtlConfig.NEWGENODIR + name + '.json')) - self.markers = json.load(json_data_fh) + markers = json.load(json_data_fh) + + for marker in markers: + if (marker['chr'] != "X") and (marker['chr'] != "Y"): + marker['chr'] = int(marker['chr']) + #else: + # marker['chr'] = 20 + print("Mb:", marker['Mb']) + marker['Mb'] = float(marker['Mb']) + + self.markers = markers + #print("self.markers:", self.markers) + def add_pvalues(self, p_values): - #print("length of self.markers:", len(self.markers)) - #print("length of p_values:", len(p_values)) - - # THIS IS only needed for the case when we are limiting the number of p-values calculated - if len(self.markers) < len(p_values): - self.markers = self.markers[:len(p_values)] - - for marker, p_value in itertools.izip(self.markers, p_values): - marker['p_value'] = p_value - if math.isnan(marker['p_value']): - print("p_value is:", marker['p_value']) - marker['lod_score'] = -math.log10(marker['p_value']) - #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values - marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 - + print("length of self.markers:", len(self.markers)) + print("length of p_values:", len(p_values)) + if type(p_values) is list: + # THIS IS only needed for the case when we are limiting the number of p-values calculated + #if len(self.markers) > len(p_values): + # self.markers = self.markers[:len(p_values)] + + for marker, p_value in itertools.izip(self.markers, p_values): + if not p_value: + continue + marker['p_value'] = float(p_value) + if math.isnan(marker['p_value']) or marker['p_value'] <= 0: + marker['lod_score'] = 0 + marker['lrs_value'] = 0 + else: + marker['lod_score'] = -math.log10(marker['p_value']) + #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values + marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 + elif type(p_values) is dict: + filtered_markers = [] + for marker in self.markers: + #print("marker[name]", marker['name']) + #print("p_values:", p_values) + if marker['name'] in p_values: + #print("marker {} IS in p_values".format(i)) + marker['p_value'] = p_values[marker['name']] + if math.isnan(marker['p_value']) or (marker['p_value'] <= 0): + marker['lod_score'] = 0 + marker['lrs_value'] = 0 + else: + marker['lod_score'] = -math.log10(marker['p_value']) + #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values + marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 + filtered_markers.append(marker) + #else: + #print("marker {} NOT in p_values".format(i)) + #self.markers.remove(marker) + #del self.markers[i] + self.markers = filtered_markers + + #for i, marker in enumerate(self.markers): + # if not 'p_value' in marker: + # #print("self.markers[i]", self.markers[i]) + # del self.markers[i] + # #self.markers.remove(self.markers[i]) class HumanMarkers(Markers): - def __init__(self, name): + def __init__(self, name, specified_markers = []): marker_data_fh = open(os.path.join(webqtlConfig.PYLMM_PATH + name + '.bim')) self.markers = [] for line in marker_data_fh: splat = line.strip().split() - marker = {} - marker['chr'] = int(splat[0]) - marker['name'] = splat[1] - marker['Mb'] = float(splat[3]) / 1000000 + #print("splat:", splat) + if len(specified_markers) > 0: + if splat[1] in specified_markers: + marker = {} + marker['chr'] = int(splat[0]) + marker['name'] = splat[1] + marker['Mb'] = float(splat[3]) / 1000000 + else: + continue + else: + marker = {} + marker['chr'] = int(splat[0]) + marker['name'] = splat[1] + marker['Mb'] = float(splat[3]) / 1000000 self.markers.append(marker) #print("markers is: ", pf(self.markers)) @@ -203,14 +256,15 @@ class HumanMarkers(Markers): # #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values # marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 + #print("p_values2:", pf(p_values)) super(HumanMarkers, self).add_pvalues(p_values) - with Bench("deleting markers"): - markers = [] - for marker in self.markers: - if not marker['Mb'] <= 0 and not marker['chr'] == 0: - markers.append(marker) - self.markers = markers + #with Bench("deleting markers"): + # markers = [] + # for marker in self.markers: + # if not marker['Mb'] <= 0 and not marker['chr'] == 0: + # markers.append(marker) + # self.markers = markers @@ -230,7 +284,7 @@ class DatasetGroup(object): self.name = "BXD" self.f1list = None - self.parlist = None + self.parlist = None self.get_f1_parent_strains() #print("parents/f1s: {}:{}".format(self.parlist, self.f1list)) @@ -239,6 +293,8 @@ class DatasetGroup(object): self.incparentsf1 = False self.allsamples = None + def get_specified_markers(self, markers = []): + self.markers = HumanMarkers(self.name, markers) def get_markers(self): #print("self.species is:", self.species) @@ -450,8 +506,9 @@ class DataSet(object): else: self.samplelist = self.group.samplelist - if (self.group.parlist + self.group.f1list) in self.samplelist: - self.samplelist += self.group.parlist + self.group.f1list + if self.group.parlist != None and self.group.f1list != None: + if (self.group.parlist + self.group.f1list) in self.samplelist: + self.samplelist += self.group.parlist + self.group.f1list query = """ SELECT Strain.Name, Strain.Id FROM Strain, Species @@ -521,7 +578,11 @@ class DataSet(object): order by {}.Id """.format(*mescape(self.type, self.type, self.type, self.type, self.name, dataset_type, self.type, self.type, dataset_type)) + + #print("trait data query: ", query) + results = g.db.execute(query).fetchall() + #print("query results:", results) trait_sample_data.append(results) trait_count = len(trait_sample_data[0]) @@ -611,6 +672,7 @@ class PhenotypeDataSet(DataSet): def get_trait_info(self, trait_list, species = ''): for this_trait in trait_list: + if not this_trait.haveinfo: this_trait.retrieve_info(get_qtl_info=True) @@ -620,6 +682,7 @@ class PhenotypeDataSet(DataSet): #phenotype traits, then display the pre-publication description instead #of the post-publication description if this_trait.confidential: + this_trait.description_display = "" continue # for now if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait( @@ -629,7 +692,12 @@ class PhenotypeDataSet(DataSet): description = this_trait.pre_publication_description - this_trait.description_display = description.strip() + if len(description) > 0: + this_trait.description_display = description.strip() + else: + this_trait.description_display = "" + + print("this_trait.description_display is:", this_trait.description_display) if not this_trait.year.isdigit(): this_trait.pubmed_text = "N/A" @@ -952,8 +1020,8 @@ class MrnaAssayDataSet(DataSet): #XZ, 12/08/2008: description #XZ, 06/05/2009: Rob asked to add probe target description - description_string = str(this_trait.description).strip() - target_string = str(this_trait.probe_target_description).strip() + description_string = unicode(str(this_trait.description).strip(codecs.BOM_UTF8), 'utf-8') + target_string = unicode(str(this_trait.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8') if len(description_string) > 1 and description_string != 'None': description_display = description_string diff --git a/wqflask/base/generate_probesetfreeze_file.py b/wqflask/base/generate_probesetfreeze_file.py index a0ff804b..a0ff804b 100644..100755 --- a/wqflask/base/generate_probesetfreeze_file.py +++ b/wqflask/base/generate_probesetfreeze_file.py diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py index be5df657..1a05fce7 100644..100755 --- a/wqflask/base/mrna_assay_tissue_data.py +++ b/wqflask/base/mrna_assay_tissue_data.py @@ -19,6 +19,8 @@ class MrnaAssayTissueData(object): if self.gene_symbols == None: self.gene_symbols = [] + print("self.gene_symbols:", self.gene_symbols) + self.data = collections.defaultdict(Bunch) #self.gene_id_dict ={} @@ -28,7 +30,7 @@ class MrnaAssayTissueData(object): #self.desc_dict = {} #self.probe_target_desc_dict = {} - query = '''select t.Symbol, t.GeneId, t.DataId,t.Chr, t.Mb, t.description, t.Probe_Target_Description + query = '''select t.Symbol, t.GeneId, t.DataId, t.Chr, t.Mb, t.description, t.Probe_Target_Description from ( select Symbol, max(Mean) as maxmean from TissueProbeSetXRef @@ -53,6 +55,7 @@ class MrnaAssayTissueData(object): '''.format(in_clause) results = g.db.execute(query).fetchall() + for result in results: symbol = result[0] if symbol in gene_symbols: @@ -66,7 +69,7 @@ class MrnaAssayTissueData(object): self.data[symbol].description = result.description self.data[symbol].probe_target_description = result.Probe_Target_Description - #print("self.data: ", pf(self.data)) + print("self.data: ", pf(self.data)) ########################################################################### #Input: cursor, symbolList (list), dataIdDict(Dict) @@ -79,6 +82,8 @@ class MrnaAssayTissueData(object): def get_symbol_values_pairs(self): id_list = [self.data[symbol].data_id for symbol in self.data] + print("id_list:", id_list) + symbol_values_dict = {} query = """SELECT TissueProbeSetXRef.Symbol, TissueProbeSetData.value @@ -86,6 +91,8 @@ class MrnaAssayTissueData(object): WHERE TissueProbeSetData.Id IN {} and TissueProbeSetXRef.DataId = TissueProbeSetData.Id""".format(db_tools.create_in_clause(id_list)) + print("TISSUE QUERY:", query) + results = g.db.execute(query).fetchall() for result in results: if result.Symbol.lower() not in symbol_values_dict: diff --git a/wqflask/base/species.py b/wqflask/base/species.py index ebc2bfed..52bd8297 100644..100755 --- a/wqflask/base/species.py +++ b/wqflask/base/species.py @@ -13,7 +13,7 @@ from pprint import pformat as pf class TheSpecies(object): def __init__(self, dataset): self.dataset = dataset - print("self.dataset is:", pf(self.dataset.__dict__)) + #print("self.dataset is:", pf(self.dataset.__dict__)) self.chromosomes = Chromosomes(self.dataset) self.genome_mb_length = self.chromosomes.get_genome_mb_length() @@ -56,8 +56,8 @@ class Chromosomes(object): InbredSet.Name = %s Order by OrderId """, self.dataset.group.name).fetchall() - print("group: ", self.dataset.group.name) - print("bike:", results) + #print("group: ", self.dataset.group.name) + #print("bike:", results) for item in results: self.chromosomes[item.OrderId] = IndChromosome(item.Name, item.Length) diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index 712d9af5..2bbd1f2a 100755 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -40,6 +40,7 @@ class GeneralTrait(object): else: self.dataset = kw.get('dataset') self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc. + print("THE NAME IS:", self.name) self.cellid = kw.get('cellid') self.identification = kw.get('identification', 'un-named trait') self.haveinfo = kw.get('haveinfo', False) @@ -295,6 +296,9 @@ class GeneralTrait(object): PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND PublishFreeze.Id = %s """ % (self.name, self.dataset.id) + + print("query is:", query) + trait_info = g.db.execute(query).fetchone() #XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name #XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms. @@ -641,7 +645,7 @@ def get_sample_data(): trait_ob = GeneralTrait(name=trait, dataset_name=dataset) - return json.dumps({key: value.value for key, value in trait_ob.data.iteritems() }) + return json.dumps([trait, {key: value.value for key, value in trait_ob.data.iteritems() }]) #jsonable_sample_data = {} #for sample in trait_ob.data.iteritems(): diff --git a/wqflask/base/trait_collection.py b/wqflask/base/trait_collection.py new file mode 100755 index 00000000..d388a3af --- /dev/null +++ b/wqflask/base/trait_collection.py @@ -0,0 +1,53 @@ +class TraitCollection(object):
+
+ def __init__(self, is_anon=False):
+ self.is_anon = is_anon
+
+
+ @app.route("/collections/remove", methods=('POST',))
+ def remove_traits():
+ if is_anon:
+ AnonCollection.remove_traits()
+ else:
+ UserCollection.remove_traits()
+
+ params = request.form
+ print("params are:", params)
+ uc_id = params['uc_id']
+ uc = model.UserCollection.query.get(uc_id)
+ traits_to_remove = params.getlist('traits[]')
+ print("traits_to_remove are:", traits_to_remove)
+ traits_to_remove = process_traits(traits_to_remove)
+ print("\n\n after processing, traits_to_remove:", traits_to_remove)
+ all_traits = uc.members_as_set()
+ print(" all_traits:", all_traits)
+ members_now = all_traits - traits_to_remove
+ print(" members_now:", members_now)
+ print("Went from {} to {} members in set.".format(len(all_traits), len(members_now)))
+ uc.members = json.dumps(list(members_now))
+ uc.changed_timestamp = datetime.datetime.utcnow()
+ db_session.commit()
+
+ # We need to return something so we'll return this...maybe in the future
+ # we can use it to check the results
+ return str(len(members_now))
+
+ def __init__(self, anon_id)
+ self.anon_key = anon_key
+ self.collection_members = Redis.smembers(self.anon_id)
+ print("self.collection_members is:", self.collection_members)
+ self.num_members = len(self.collection_members)
+
+
+ @app.route("/collections/remove", methods=('POST',))
+ def remove_traits(traits_to_remove):
+ print("traits_to_remove:", traits_to_remove)
+ for trait in traits_to_remove:
+ Redis.srem(self.anon_id, trait)
+ members_now = self.collection_members - traits_to_remove
+ print("members_now:", members_now)
+ print("Went from {} to {} members in set.".format(len(self.collection_members), len(members_now)))
+
+ # We need to return something so we'll return this...maybe in the future
+ # we can use it to check the results
+ return str(len(members_now))
|