aboutsummaryrefslogtreecommitdiff
path: root/wqflask/base
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/base')
-rwxr-xr-x[-rw-r--r--]wqflask/base/JinjaPage.py0
-rwxr-xr-xwqflask/base/anon_collection.py21
-rwxr-xr-xwqflask/base/data_set.py134
-rwxr-xr-x[-rw-r--r--]wqflask/base/generate_probesetfreeze_file.py0
-rwxr-xr-x[-rw-r--r--]wqflask/base/mrna_assay_tissue_data.py11
-rwxr-xr-x[-rw-r--r--]wqflask/base/species.py6
-rwxr-xr-xwqflask/base/trait.py6
-rwxr-xr-xwqflask/base/trait_collection.py53
8 files changed, 192 insertions, 39 deletions
diff --git a/wqflask/base/JinjaPage.py b/wqflask/base/JinjaPage.py
index 07e485b1..07e485b1 100644..100755
--- a/wqflask/base/JinjaPage.py
+++ b/wqflask/base/JinjaPage.py
diff --git a/wqflask/base/anon_collection.py b/wqflask/base/anon_collection.py
new file mode 100755
index 00000000..8ee73296
--- /dev/null
+++ b/wqflask/base/anon_collection.py
@@ -0,0 +1,21 @@
+class AnonCollection(TraitCollection):
+
+ def __init__(self, anon_id)
+ self.anon_id = anon_id
+ self.collection_members = Redis.smembers(self.anon_id)
+ print("self.collection_members is:", self.collection_members)
+ self.num_members = len(self.collection_members)
+
+
+ @app.route("/collections/remove", methods=('POST',))
+ def remove_traits(traits_to_remove):
+ print("traits_to_remove:", traits_to_remove)
+ for trait in traits_to_remove:
+ Redis.srem(self.anon_id, trait)
+ members_now = self.collection_members - traits_to_remove
+ print("members_now:", members_now)
+ print("Went from {} to {} members in set.".format(len(self.collection_members), len(members_now)))
+
+ # We need to return something so we'll return this...maybe in the future
+ # we can use it to check the results
+ return str(len(members_now))
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index fbe78d5d..2a79dc9c 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -23,6 +23,7 @@ import os
import math
import string
import collections
+import codecs
import json
import gzip
@@ -156,38 +157,90 @@ class Markers(object):
"""Todo: Build in cacheing so it saves us reading the same file more than once"""
def __init__(self, name):
json_data_fh = open(os.path.join(webqtlConfig.NEWGENODIR + name + '.json'))
- self.markers = json.load(json_data_fh)
+ markers = json.load(json_data_fh)
+
+ for marker in markers:
+ if (marker['chr'] != "X") and (marker['chr'] != "Y"):
+ marker['chr'] = int(marker['chr'])
+ #else:
+ # marker['chr'] = 20
+ print("Mb:", marker['Mb'])
+ marker['Mb'] = float(marker['Mb'])
+
+ self.markers = markers
+ #print("self.markers:", self.markers)
+
def add_pvalues(self, p_values):
- #print("length of self.markers:", len(self.markers))
- #print("length of p_values:", len(p_values))
-
- # THIS IS only needed for the case when we are limiting the number of p-values calculated
- if len(self.markers) < len(p_values):
- self.markers = self.markers[:len(p_values)]
-
- for marker, p_value in itertools.izip(self.markers, p_values):
- marker['p_value'] = p_value
- if math.isnan(marker['p_value']):
- print("p_value is:", marker['p_value'])
- marker['lod_score'] = -math.log10(marker['p_value'])
- #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values
- marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61
-
+ print("length of self.markers:", len(self.markers))
+ print("length of p_values:", len(p_values))
+ if type(p_values) is list:
+ # THIS IS only needed for the case when we are limiting the number of p-values calculated
+ #if len(self.markers) > len(p_values):
+ # self.markers = self.markers[:len(p_values)]
+
+ for marker, p_value in itertools.izip(self.markers, p_values):
+ if not p_value:
+ continue
+ marker['p_value'] = float(p_value)
+ if math.isnan(marker['p_value']) or marker['p_value'] <= 0:
+ marker['lod_score'] = 0
+ marker['lrs_value'] = 0
+ else:
+ marker['lod_score'] = -math.log10(marker['p_value'])
+ #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values
+ marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61
+ elif type(p_values) is dict:
+ filtered_markers = []
+ for marker in self.markers:
+ #print("marker[name]", marker['name'])
+ #print("p_values:", p_values)
+ if marker['name'] in p_values:
+ #print("marker {} IS in p_values".format(i))
+ marker['p_value'] = p_values[marker['name']]
+ if math.isnan(marker['p_value']) or (marker['p_value'] <= 0):
+ marker['lod_score'] = 0
+ marker['lrs_value'] = 0
+ else:
+ marker['lod_score'] = -math.log10(marker['p_value'])
+ #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values
+ marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61
+ filtered_markers.append(marker)
+ #else:
+ #print("marker {} NOT in p_values".format(i))
+ #self.markers.remove(marker)
+ #del self.markers[i]
+ self.markers = filtered_markers
+
+ #for i, marker in enumerate(self.markers):
+ # if not 'p_value' in marker:
+ # #print("self.markers[i]", self.markers[i])
+ # del self.markers[i]
+ # #self.markers.remove(self.markers[i])
class HumanMarkers(Markers):
- def __init__(self, name):
+ def __init__(self, name, specified_markers = []):
marker_data_fh = open(os.path.join(webqtlConfig.PYLMM_PATH + name + '.bim'))
self.markers = []
for line in marker_data_fh:
splat = line.strip().split()
- marker = {}
- marker['chr'] = int(splat[0])
- marker['name'] = splat[1]
- marker['Mb'] = float(splat[3]) / 1000000
+ #print("splat:", splat)
+ if len(specified_markers) > 0:
+ if splat[1] in specified_markers:
+ marker = {}
+ marker['chr'] = int(splat[0])
+ marker['name'] = splat[1]
+ marker['Mb'] = float(splat[3]) / 1000000
+ else:
+ continue
+ else:
+ marker = {}
+ marker['chr'] = int(splat[0])
+ marker['name'] = splat[1]
+ marker['Mb'] = float(splat[3]) / 1000000
self.markers.append(marker)
#print("markers is: ", pf(self.markers))
@@ -203,14 +256,15 @@ class HumanMarkers(Markers):
# #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values
# marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61
+ #print("p_values2:", pf(p_values))
super(HumanMarkers, self).add_pvalues(p_values)
- with Bench("deleting markers"):
- markers = []
- for marker in self.markers:
- if not marker['Mb'] <= 0 and not marker['chr'] == 0:
- markers.append(marker)
- self.markers = markers
+ #with Bench("deleting markers"):
+ # markers = []
+ # for marker in self.markers:
+ # if not marker['Mb'] <= 0 and not marker['chr'] == 0:
+ # markers.append(marker)
+ # self.markers = markers
@@ -230,7 +284,7 @@ class DatasetGroup(object):
self.name = "BXD"
self.f1list = None
- self.parlist = None
+ self.parlist = None
self.get_f1_parent_strains()
#print("parents/f1s: {}:{}".format(self.parlist, self.f1list))
@@ -239,6 +293,8 @@ class DatasetGroup(object):
self.incparentsf1 = False
self.allsamples = None
+ def get_specified_markers(self, markers = []):
+ self.markers = HumanMarkers(self.name, markers)
def get_markers(self):
#print("self.species is:", self.species)
@@ -450,8 +506,9 @@ class DataSet(object):
else:
self.samplelist = self.group.samplelist
- if (self.group.parlist + self.group.f1list) in self.samplelist:
- self.samplelist += self.group.parlist + self.group.f1list
+ if self.group.parlist != None and self.group.f1list != None:
+ if (self.group.parlist + self.group.f1list) in self.samplelist:
+ self.samplelist += self.group.parlist + self.group.f1list
query = """
SELECT Strain.Name, Strain.Id FROM Strain, Species
@@ -521,7 +578,11 @@ class DataSet(object):
order by {}.Id
""".format(*mescape(self.type, self.type, self.type, self.type,
self.name, dataset_type, self.type, self.type, dataset_type))
+
+ #print("trait data query: ", query)
+
results = g.db.execute(query).fetchall()
+ #print("query results:", results)
trait_sample_data.append(results)
trait_count = len(trait_sample_data[0])
@@ -611,6 +672,7 @@ class PhenotypeDataSet(DataSet):
def get_trait_info(self, trait_list, species = ''):
for this_trait in trait_list:
+
if not this_trait.haveinfo:
this_trait.retrieve_info(get_qtl_info=True)
@@ -620,6 +682,7 @@ class PhenotypeDataSet(DataSet):
#phenotype traits, then display the pre-publication description instead
#of the post-publication description
if this_trait.confidential:
+ this_trait.description_display = ""
continue # for now
if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(
@@ -629,7 +692,12 @@ class PhenotypeDataSet(DataSet):
description = this_trait.pre_publication_description
- this_trait.description_display = description.strip()
+ if len(description) > 0:
+ this_trait.description_display = description.strip()
+ else:
+ this_trait.description_display = ""
+
+ print("this_trait.description_display is:", this_trait.description_display)
if not this_trait.year.isdigit():
this_trait.pubmed_text = "N/A"
@@ -952,8 +1020,8 @@ class MrnaAssayDataSet(DataSet):
#XZ, 12/08/2008: description
#XZ, 06/05/2009: Rob asked to add probe target description
- description_string = str(this_trait.description).strip()
- target_string = str(this_trait.probe_target_description).strip()
+ description_string = unicode(str(this_trait.description).strip(codecs.BOM_UTF8), 'utf-8')
+ target_string = unicode(str(this_trait.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8')
if len(description_string) > 1 and description_string != 'None':
description_display = description_string
diff --git a/wqflask/base/generate_probesetfreeze_file.py b/wqflask/base/generate_probesetfreeze_file.py
index a0ff804b..a0ff804b 100644..100755
--- a/wqflask/base/generate_probesetfreeze_file.py
+++ b/wqflask/base/generate_probesetfreeze_file.py
diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py
index be5df657..1a05fce7 100644..100755
--- a/wqflask/base/mrna_assay_tissue_data.py
+++ b/wqflask/base/mrna_assay_tissue_data.py
@@ -19,6 +19,8 @@ class MrnaAssayTissueData(object):
if self.gene_symbols == None:
self.gene_symbols = []
+ print("self.gene_symbols:", self.gene_symbols)
+
self.data = collections.defaultdict(Bunch)
#self.gene_id_dict ={}
@@ -28,7 +30,7 @@ class MrnaAssayTissueData(object):
#self.desc_dict = {}
#self.probe_target_desc_dict = {}
- query = '''select t.Symbol, t.GeneId, t.DataId,t.Chr, t.Mb, t.description, t.Probe_Target_Description
+ query = '''select t.Symbol, t.GeneId, t.DataId, t.Chr, t.Mb, t.description, t.Probe_Target_Description
from (
select Symbol, max(Mean) as maxmean
from TissueProbeSetXRef
@@ -53,6 +55,7 @@ class MrnaAssayTissueData(object):
'''.format(in_clause)
results = g.db.execute(query).fetchall()
+
for result in results:
symbol = result[0]
if symbol in gene_symbols:
@@ -66,7 +69,7 @@ class MrnaAssayTissueData(object):
self.data[symbol].description = result.description
self.data[symbol].probe_target_description = result.Probe_Target_Description
- #print("self.data: ", pf(self.data))
+ print("self.data: ", pf(self.data))
###########################################################################
#Input: cursor, symbolList (list), dataIdDict(Dict)
@@ -79,6 +82,8 @@ class MrnaAssayTissueData(object):
def get_symbol_values_pairs(self):
id_list = [self.data[symbol].data_id for symbol in self.data]
+ print("id_list:", id_list)
+
symbol_values_dict = {}
query = """SELECT TissueProbeSetXRef.Symbol, TissueProbeSetData.value
@@ -86,6 +91,8 @@ class MrnaAssayTissueData(object):
WHERE TissueProbeSetData.Id IN {} and
TissueProbeSetXRef.DataId = TissueProbeSetData.Id""".format(db_tools.create_in_clause(id_list))
+ print("TISSUE QUERY:", query)
+
results = g.db.execute(query).fetchall()
for result in results:
if result.Symbol.lower() not in symbol_values_dict:
diff --git a/wqflask/base/species.py b/wqflask/base/species.py
index ebc2bfed..52bd8297 100644..100755
--- a/wqflask/base/species.py
+++ b/wqflask/base/species.py
@@ -13,7 +13,7 @@ from pprint import pformat as pf
class TheSpecies(object):
def __init__(self, dataset):
self.dataset = dataset
- print("self.dataset is:", pf(self.dataset.__dict__))
+ #print("self.dataset is:", pf(self.dataset.__dict__))
self.chromosomes = Chromosomes(self.dataset)
self.genome_mb_length = self.chromosomes.get_genome_mb_length()
@@ -56,8 +56,8 @@ class Chromosomes(object):
InbredSet.Name = %s
Order by OrderId
""", self.dataset.group.name).fetchall()
- print("group: ", self.dataset.group.name)
- print("bike:", results)
+ #print("group: ", self.dataset.group.name)
+ #print("bike:", results)
for item in results:
self.chromosomes[item.OrderId] = IndChromosome(item.Name, item.Length)
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index 712d9af5..2bbd1f2a 100755
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -40,6 +40,7 @@ class GeneralTrait(object):
else:
self.dataset = kw.get('dataset')
self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc.
+ print("THE NAME IS:", self.name)
self.cellid = kw.get('cellid')
self.identification = kw.get('identification', 'un-named trait')
self.haveinfo = kw.get('haveinfo', False)
@@ -295,6 +296,9 @@ class GeneralTrait(object):
PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
PublishFreeze.Id = %s
""" % (self.name, self.dataset.id)
+
+ print("query is:", query)
+
trait_info = g.db.execute(query).fetchone()
#XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name
#XZ, 05/08/2009: to avoid the problem of same probeset name from different platforms.
@@ -641,7 +645,7 @@ def get_sample_data():
trait_ob = GeneralTrait(name=trait, dataset_name=dataset)
- return json.dumps({key: value.value for key, value in trait_ob.data.iteritems() })
+ return json.dumps([trait, {key: value.value for key, value in trait_ob.data.iteritems() }])
#jsonable_sample_data = {}
#for sample in trait_ob.data.iteritems():
diff --git a/wqflask/base/trait_collection.py b/wqflask/base/trait_collection.py
new file mode 100755
index 00000000..d388a3af
--- /dev/null
+++ b/wqflask/base/trait_collection.py
@@ -0,0 +1,53 @@
+class TraitCollection(object):
+
+ def __init__(self, is_anon=False):
+ self.is_anon = is_anon
+
+
+ @app.route("/collections/remove", methods=('POST',))
+ def remove_traits():
+ if is_anon:
+ AnonCollection.remove_traits()
+ else:
+ UserCollection.remove_traits()
+
+ params = request.form
+ print("params are:", params)
+ uc_id = params['uc_id']
+ uc = model.UserCollection.query.get(uc_id)
+ traits_to_remove = params.getlist('traits[]')
+ print("traits_to_remove are:", traits_to_remove)
+ traits_to_remove = process_traits(traits_to_remove)
+ print("\n\n after processing, traits_to_remove:", traits_to_remove)
+ all_traits = uc.members_as_set()
+ print(" all_traits:", all_traits)
+ members_now = all_traits - traits_to_remove
+ print(" members_now:", members_now)
+ print("Went from {} to {} members in set.".format(len(all_traits), len(members_now)))
+ uc.members = json.dumps(list(members_now))
+ uc.changed_timestamp = datetime.datetime.utcnow()
+ db_session.commit()
+
+ # We need to return something so we'll return this...maybe in the future
+ # we can use it to check the results
+ return str(len(members_now))
+
+ def __init__(self, anon_id)
+ self.anon_key = anon_key
+ self.collection_members = Redis.smembers(self.anon_id)
+ print("self.collection_members is:", self.collection_members)
+ self.num_members = len(self.collection_members)
+
+
+ @app.route("/collections/remove", methods=('POST',))
+ def remove_traits(traits_to_remove):
+ print("traits_to_remove:", traits_to_remove)
+ for trait in traits_to_remove:
+ Redis.srem(self.anon_id, trait)
+ members_now = self.collection_members - traits_to_remove
+ print("members_now:", members_now)
+ print("Went from {} to {} members in set.".format(len(self.collection_members), len(members_now)))
+
+ # We need to return something so we'll return this...maybe in the future
+ # we can use it to check the results
+ return str(len(members_now))