aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xwqflask/base/data_set.py30
-rwxr-xr-xwqflask/base/trait.py27
-rw-r--r--wqflask/utility/helper_functions.py2
-rw-r--r--wqflask/wqflask/correlation/show_corr_results.py99
-rw-r--r--wqflask/wqflask/search_results.py2
5 files changed, 83 insertions, 77 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 0c7676c4..0903bf16 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -90,8 +90,8 @@ class Markers(object):
self.markers = json.load(json_data_fh)
def add_pvalues(self, p_values):
- print("length of self.markers:", len(self.markers))
- print("length of p_values:", len(p_values))
+ #print("length of self.markers:", len(self.markers))
+ #print("length of p_values:", len(p_values))
# THIS IS only needed for the case when we are limiting the number of p-values calculated
if len(self.markers) < len(p_values):
@@ -161,7 +161,7 @@ class DatasetGroup(object):
self.f1list = None
self.parlist = None
self.get_f1_parent_strains()
- print("parents/f1s: {}:{}".format(self.parlist, self.f1list))
+ #print("parents/f1s: {}:{}".format(self.parlist, self.f1list))
self.species = webqtlDatabaseFunction.retrieve_species(self.name)
@@ -170,7 +170,7 @@ class DatasetGroup(object):
def get_markers(self):
- print("self.species is:", self.species)
+ #print("self.species is:", self.species)
if self.species == "human":
marker_class = HumanMarkers
else:
@@ -293,14 +293,14 @@ class DataSet(object):
self.name,
self.name,
self.name))
- print("query_args are:", query_args)
+ #print("query_args are:", query_args)
- print("""
- SELECT Id, Name, FullName, ShortName
- FROM %s
- WHERE public > %s AND
- (Name = '%s' OR FullName = '%s' OR ShortName = '%s')
- """ % (query_args))
+ #print("""
+ # SELECT Id, Name, FullName, ShortName
+ # FROM %s
+ # WHERE public > %s AND
+ # (Name = '%s' OR FullName = '%s' OR ShortName = '%s')
+ # """ % (query_args))
self.id, self.name, self.fullname, self.shortname = g.db.execute("""
SELECT Id, Name, FullName, ShortName
@@ -624,12 +624,12 @@ class MrnaAssayDataSet(DataSet):
and ProbeSetFreezeId = {}
""".format(escape(str(self.id)))
results = g.db.execute(query).fetchall()
- print("After get_trait_list query")
+ #print("After get_trait_list query")
trait_data = {}
for trait in results:
print("Retrieving sample_data for ", trait[0])
trait_data[trait[0]] = self.retrieve_sample_data(trait[0])
- print("After retrieve_sample_data")
+ #print("After retrieve_sample_data")
return trait_data
def get_trait_data(self):
@@ -763,7 +763,7 @@ class MrnaAssayDataSet(DataSet):
""" % (escape(str(this_trait.dataset.id)),
escape(this_trait.name)))
- print("query is:", pf(query))
+ #print("query is:", pf(query))
result = g.db.execute(query).fetchone()
@@ -926,7 +926,7 @@ class TempDataSet(DataSet):
def geno_mrna_confidentiality(ob):
dataset_table = ob.type + "Freeze"
- print("dataset_table [%s]: %s" % (type(dataset_table), dataset_table))
+ #print("dataset_table [%s]: %s" % (type(dataset_table), dataset_table))
query = '''SELECT Id, Name, FullName, confidentiality,
AuthorisedUsers FROM %s WHERE Name = %%s''' % (dataset_table)
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index 53f41779..f333d5a7 100755
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -1,6 +1,8 @@
from __future__ import absolute_import, division, print_function
import string
+import resource
+
from htmlgen import HTMLgen2 as HT
@@ -15,6 +17,10 @@ from pprint import pformat as pf
from flask import Flask, g
+def print_mem(stage=""):
+ mem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+ print("{}: {}".format(stage, mem/1024))
+
class GeneralTrait(object):
"""
Trait class defines a trait in webqtl, can be either Microarray,
@@ -23,8 +29,12 @@ class GeneralTrait(object):
"""
def __init__(self, **kw):
- #print("in GeneralTrait")
- self.dataset = kw.get('dataset') # database name
+ # xor assertion
+ assert bool(kw.get('dataset')) != bool(kw.get('dataset_name')), "Needs dataset ob. xor name";
+ if kw.get('dataset_name'):
+ self.dataset = create_dataset(kw.get('dataset_name'))
+ else:
+ self.dataset = kw.get('dataset')
self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc.
self.cellid = kw.get('cellid')
self.identification = kw.get('identification', 'un-named trait')
@@ -39,8 +49,6 @@ class GeneralTrait(object):
# self.cellid is set to None above
elif len(name2) == 3:
self.dataset, self.name, self.cellid = name2
-
- self.dataset = create_dataset(self.dataset)
# Todo: These two lines are necessary most of the time, but perhaps not all of the time
# So we could add a simple if statement to short-circuit this if necessary
@@ -355,8 +363,17 @@ class GeneralTrait(object):
#traitQTL = self.cursor.fetchone()
if traitQTL:
self.locus, self.lrs, self.pvalue, self.mean = traitQTL
+ if self.locus:
+ result = g.db.execute("""
+ select Geno.Chr, Geno.Mb from Geno, Species
+ where Species.Name = '%s' and
+ Geno.Name = '%s' and
+ Geno.SpeciesId = Species.Id
+ """, (species, self.locus)).fetchone()
+ self.locus_chr = result[0]
+ self.locus_mb = result[1]
else:
- self.locus = self.lrs = self.pvalue = self.mean = ""
+ self.locus = self.locus_chr = self.locus_mb = self.lrs = self.pvalue = self.mean = ""
if self.dataset.type == 'Publish':
traitQTL = g.db.execute("""
SELECT
diff --git a/wqflask/utility/helper_functions.py b/wqflask/utility/helper_functions.py
index 28242c27..d76a32ce 100644
--- a/wqflask/utility/helper_functions.py
+++ b/wqflask/utility/helper_functions.py
@@ -9,7 +9,7 @@ def get_species_dataset_trait(self, start_vars):
#assert type(read_genotype) == type(bool()), "Expecting boolean value for read_genotype"
self.dataset = data_set.create_dataset(start_vars['dataset'])
self.species = TheSpecies(dataset=self.dataset)
- self.this_trait = GeneralTrait(dataset=self.dataset.name,
+ self.this_trait = GeneralTrait(dataset=self.dataset,
name=start_vars['trait_id'],
cellid=None)
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index 96c0155b..3b8b7ba2 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -92,11 +92,6 @@ class CorrelationResults(object):
#
#RANK_ORDERS = {"1": 0, "2": 1, "3": 0, "4": 0, "5": 1}
-
- #def error(self, message, *args, **kw):
- # heading = heading or self.PAGE_HEADING
- # return templatePage.error(heading = heading, detail = [message], error=error)
-
def __init__(self, start_vars):
# get trait list from db (database name)
# calculate correlation with Base vector and targets
@@ -104,10 +99,8 @@ class CorrelationResults(object):
#self.this_trait = GeneralTrait(dataset=self.dataset.name,
# name=start_vars['trait_id'],
# cellid=None)
-
#print("start_vars: ", pf(start_vars))
with Bench("Doing correlations"):
- print_mem("At beginning")
helper_functions.get_species_dataset_trait(self, start_vars)
self.dataset.group.read_genotype_file()
@@ -138,7 +131,6 @@ class CorrelationResults(object):
self.correlation_data = {}
- print_mem("Before calculating correlations")
for trait, values in self.target_dataset.trait_data.iteritems():
this_trait_values = []
target_values = []
@@ -150,63 +142,60 @@ class CorrelationResults(object):
target_values.append(target_sample_value)
this_trait_values, target_values = normalize_values(this_trait_values, target_values)
-
+
if self.corr_method == 'pearson':
sample_r, sample_p = scipy.stats.pearsonr(this_trait_values, target_values)
else:
sample_r, sample_p = scipy.stats.spearmanr(this_trait_values, target_values)
-
+
self.correlation_data[trait] = [sample_r, sample_p]
-
- print_mem("After calculating correlations")
-
+
self.correlation_data = collections.OrderedDict(sorted(self.correlation_data.items(),
key=lambda t: -abs(t[1][0])))
-
+
self.correlation_data_slice = collections.OrderedDict()
-
- old_memory_usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
-
+
for trait_counter, trait in enumerate(self.correlation_data.keys()[:300]):
- print_mem("In trait info loop")
- print("\nTrait #:", trait_counter)
- print_mem("Before trait_object")
- trait_object = GeneralTrait(dataset=self.dataset.name, name=trait)
- print_mem("After trait object")
- trait_info = dict(
- correlation = float(self.correlation_data[trait][0]),
- p_value = float(self.correlation_data[trait][1]),
- symbol = trait_object.symbol,
- alias = trait_object.alias,
- description = trait_object.description,
- chromosome = trait_object.chr,
- mb = trait_object.mb
- )
- print_mem("Before deleting trait object")
- del trait_object
- print_mem("After deleting trait object")
- gc.collect()
- print_mem("After colleting garabage")
- print("** trait_info:", pf(trait_info))
- print("\n** Start trait_info")
- counter = 1
- for key, value in trait_info.iteritems():
- print(" <{}> [{}] {}: [{}] {}\n".format(
- counter, type(key), key, type(value), value))
- counter += 1
- print("** Done trait_info")
+ trait_object = GeneralTrait(dataset=self.dataset, name=trait)
+ if self.dataset.type == 'ProbeSet':
+ trait_info = collections.OrderedDict(
+ correlation = float(self.correlation_data[trait][0]),
+ p_value = float(self.correlation_data[trait][1]),
+ symbol = trait_object.symbol,
+ alias = trait_object.alias,
+ description = trait_object.description,
+ chromosome = trait_object.chr,
+ mb = trait_object.mb
+ )
+ if hasattr(trait_object, 'mean'):
+ trait_info[mean] = trait_object.mean
+ if hasattr(trait_object, 'lrs'):
+ trait_info[lrs] = trait_object.lrs
+ if hasattr(trait_object, 'locus_chr'):
+ trait_info[locus_chr] = trait_object.locus_chr
+ if hasattr(trait_object, 'locus_mb'):
+ trait_info[locus_mb] = trait_object.locus_mb
+ elif self.dataset.type == 'Geno':
+ trait_info = collections.OrderedDict(
+ correlation = float(self.correlation_data[trait][0]),
+ p_value = float(self.correlation_data[trait][1]),
+ symbol = trait_object.symbol,
+ alias = trait_object.alias,
+ description = trait_object.description,
+ chromosome = trait_object.chr,
+ mb = trait_object.mb
+ )
+ else: # 'Publish'
+ trait_info = collections.OrderedDict(
+ correlation = float(self.correlation_data[trait][0]),
+ p_value = float(self.correlation_data[trait][1]),
+ symbol = trait_object.symbol,
+ alias = trait_object.alias,
+ description = trait_object.description,
+ chromosome = trait_object.chr,
+ mb = trait_object.mb
+ )
self.correlation_data_slice[trait] = trait_info
- #self.correlation_data_slice[trait].append(trait_object)
-
- new_memory_usage = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
- print("Memory difference:", new_memory_usage-old_memory_usage)
- old_memory_usage = new_memory_usage
- print_mem("End of purple loop")
- print("*************************** End purple ******** ")
-
- print_mem("After getting trait info")
- print("Garbage colleting...")
- gc.collect()
#XZ, 09/18/2008: get all information about the user selected database.
#target_db_name = fd.corr_dataset
diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py
index dc872a8b..e171f1ab 100644
--- a/wqflask/wqflask/search_results.py
+++ b/wqflask/wqflask/search_results.py
@@ -106,7 +106,7 @@ class SearchResultPage(object):
print("foo locals are:", locals())
trait_id = result[0]
- this_trait = GeneralTrait(dataset=self.dataset.name, name=trait_id)
+ this_trait = GeneralTrait(dataset=self.dataset, name=trait_id)
this_trait.retrieve_info(QTL=True)
self.trait_list.append(this_trait)