aboutsummaryrefslogtreecommitdiff
path: root/wqflask
diff options
context:
space:
mode:
authorLei Yan2013-05-23 20:53:11 +0000
committerLei Yan2013-05-23 20:53:11 +0000
commite31d163325d0d417bf266d1c3d9e52b6ff00f83b (patch)
treedac6d3d62bb6839449e388b6874f3b535a74bb94 /wqflask
parentb4371ef0d96605187b7474e7e4844dbebab67d8b (diff)
downloadgenenetwork2-e31d163325d0d417bf266d1c3d9e52b6ff00f83b.tar.gz
Now calculates correlation values for traits, but not yet in template
Diffstat (limited to 'wqflask')
-rwxr-xr-xwqflask/base/data_set.py46
-rw-r--r--wqflask/wqflask/correlation/show_corr_results.py55
2 files changed, 67 insertions, 34 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 1520b180..89bbf03d 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -46,7 +46,7 @@ from pprint import pformat as pf
DS_NAME_MAP = {}
def create_dataset(dataset_name):
- print("dataset_name:", dataset_name)
+ #print("dataset_name:", dataset_name)
query = """
SELECT DBType.Name
@@ -71,7 +71,7 @@ def create_dataset(dataset_name):
def mescape(*items):
"""Multiple escape"""
escaped = [escape(item) for item in items]
- print("escaped is:", escaped)
+ #print("escaped is:", escaped)
return escaped
@@ -235,6 +235,7 @@ class DataSet(object):
self.retrieve_other_names()
self.group = DatasetGroup(self) # sets self.group and self.group_id and gets genotype
+ self.group.read_genotype_file()
self.species = species.TheSpecies(self)
@@ -624,17 +625,34 @@ class MrnaAssayDataSet(DataSet):
return trait_data
def get_trait_data(self):
+ import pdb
+ pdb.set_trace()
+ #samplelist = []
+ #samplelist += self.group.samplelist
+ #samplelist += self.group.parlist
+ #samplelist += self.group.f1list
+ #self.samplelist = samplelist
+
+ self.samplelist = self.group.samplelist + self.group.parlist + self.group.f1list
+
sample_ids = []
- for sample in self.group.samplelist:
- query = """
- SELECT Strain.Id FROM Strain, Species
- WHERE Strain.Name = '{}'
- and Strain.SpeciesId=Species.Id
- and Species.name = '{}'
- """.format(*mescape(sample, self.group.species))
- this_id = g.db.execute(query).fetchone()[0]
- sample_ids.append('%d' % this_id)
- print("sample_ids size: ", len(sample_ids))
+
+ where_clause = ""
+ for sample in self.samplelist:
+ if len(where_clause):
+ where_clause += " or "
+ where_clause += """'{}'""".format(*mescape(sample))
+
+ query = """
+ SELECT Strain.Id, Strain.Name FROM Strain, Species
+ WHERE Strain.Name = '{}'
+ and Strain.SpeciesId=Species.Id
+ and Species.name = '{}'
+ """.format(*mescape(where_clause, self.group.species))
+ result = g.db.execute(query).fetchall()
+
+ print("[blueberry] result is:", pf(result))
+ #sample_ids.append('%d' % this_id)
# MySQL limits the number of tables that can be used in a join to 61,
# so we break the sample ids into smaller chunks
@@ -642,7 +660,6 @@ class MrnaAssayDataSet(DataSet):
n = len(sample_ids) / chunk_count
if len(sample_ids) % chunk_count:
n += 1
- print("n: ", n)
#XZ, 09/24/2008: build one temporary table that only contains the records associated with the input GeneId
#tempTable = None
#if GeneId and db.type == "ProbeSet":
@@ -681,10 +698,9 @@ class MrnaAssayDataSet(DataSet):
order by {}.Id
""".format(*mescape(self.type, self.type, self.type, self.type,
self.name, self.type, self.type, self.type, self.type))
- print("query: ", query)
results = g.db.execute(query).fetchall()
trait_sample_data.append(results)
-
+
trait_count = len(trait_sample_data[0])
self.trait_data = collections.defaultdict(list)
# put all of the separate data together into a dictionary where the keys are
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index 1d0368cc..ee732050 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -37,6 +37,7 @@ import time
#import pyXLWriter as xl
import pp
import math
+import collections
from pprint import pformat as pf
@@ -285,16 +286,15 @@ class CorrelationResults(object):
# name=start_vars['trait_id'],
# cellid=None)
- print("start_vars: ", pf(start_vars))
+ #print("start_vars: ", pf(start_vars))
helper_functions.get_species_dataset_trait(self, start_vars)
self.dataset.group.read_genotype_file()
-
- self.samples = [] # Want only ones with values
- self.vals = []
corr_samples_group = start_vars['corr_samples_group']
+ self.sample_data = {}
+
#The two if statements below append samples to the sample list based upon whether the user
#selected Primary Samples Only, Other Samples Only, or All Samples
@@ -310,16 +310,24 @@ class CorrelationResults(object):
self.dataset.group.f1list +
self.dataset.group.samplelist)
self.process_samples(start_vars, self.this_trait.data.keys(), primary_samples)
-
- #for i, sample in enumerate(self.samples):
- # print("{} : {}".format(sample, self.vals[i]))
-
self.target_dataset = data_set.create_dataset(start_vars['corr_dataset'])
self.target_dataset.get_trait_data()
- print("trait_list: {}".format(pf(self.target_dataset.trait_data)))
# Lei Yan todo
+ import pdb
+ pdb.set_trace()
+ correlation_data = collections.defaultdict(list)
for trait, values in self.target_dataset.trait_data.iteritems():
- correlation = calCorrelation(values, )
+ values_1 = []
+ values_2 = []
+ for index,sample in enumerate(self.target_dataset.samplelist):
+ target_value = values[index]
+ if sample in self.sample_data.keys():
+ this_value = self.sample_data[sample]
+ values_1.append(this_value)
+ values_2.append(target_value)
+ correlation = calCorrelation(values_1, values_2)
+ correlation_data[trait] = correlation
+ print ('%s %s' % (trait, correlation))
#XZ, 09/18/2008: get all information about the user selected database.
#target_db_name = fd.corr_dataset
@@ -779,19 +787,28 @@ makeWebGestaltTree(thisForm, '%s', %d, 'edag_only.php');
"""
+ #def process_samples(self, start_vars, sample_names, excluded_samples):
+ # for sample in sample_names:
+ # if sample not in excluded_samples:
+ # value = start_vars['value:' + sample]
+ # variance = start_vars['variance:' + sample]
+ # if variance.strip().lower() == 'x':
+ # variance = 0
+ # else:
+ # variance = float(variance)
+ # if value.strip().lower() != 'x':
+ # self.samples.append(str(sample))
+ # self.vals.append(float(value))
+ # #self.variances.append(variance)
+
def process_samples(self, start_vars, sample_names, excluded_samples):
for sample in sample_names:
if sample not in excluded_samples:
value = start_vars['value:' + sample]
- variance = start_vars['variance:' + sample]
- if variance.strip().lower() == 'x':
- variance = 0
+ if value.strip().lower() == 'x':
+ self.sample_data[str(sample)] = None
else:
- variance = float(variance)
- if value.strip().lower() != 'x':
- self.samples.append(str(sample))
- self.vals.append(float(value))
- #self.variances.append(variance)
+ self.sample_data[str(sample)] = float(value)
def getSortByValue(self, calculationMethod):
@@ -2134,7 +2151,7 @@ Resorting this table <br>
def calCorrelation(values_1, values_2):
- N = Math.min(len(values_1), len(values_2))
+ N = min(len(values_1), len(values_2))
X = []
Y = []
for i in range(N):