aboutsummaryrefslogtreecommitdiff
path: root/wqflask
diff options
context:
space:
mode:
authorLei Yan2013-05-23 23:01:54 +0000
committerLei Yan2013-05-23 23:01:54 +0000
commit8d0c6166a297d2cc89394649b8f56d8c6bf5d0f7 (patch)
tree104da8e98f21dc4ac73179c58fd9c17047d0d7de /wqflask
parent953b41486b035fbe786c7d2675f7b6cf898c12da (diff)
downloadgenenetwork2-8d0c6166a297d2cc89394649b8f56d8c6bf5d0f7.tar.gz
Worked on rewriting the function in data_set.py that gets the sample
values for each trait
Diffstat (limited to 'wqflask')
-rwxr-xr-xwqflask/base/data_set.py72
-rw-r--r--wqflask/wqflask/correlation/show_corr_results.py5
2 files changed, 49 insertions, 28 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index b2836480..edee6685 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -38,6 +38,7 @@ from base import species
from dbFunction import webqtlDatabaseFunction
from utility import webqtlUtil
from utility.benchmark import Bench
+from wqflask.my_pylmm.pyLMM import chunks
from MySQLdb import escape_string as escape
from pprint import pformat as pf
@@ -68,6 +69,13 @@ def create_dataset(dataset_name):
dataset_class = globals()[dataset_ob]
return dataset_class(dataset_name)
+def create_in_clause(items):
+ """Create an in clause for mysql"""
+ in_clause = ', '.join("'{}'".format(x) for x in mescape(*items))
+ in_clause = '( {} )'.format(in_clause)
+ return in_clause
+
+
def mescape(*items):
"""Multiple escape"""
escaped = [escape(item) for item in items]
@@ -626,31 +634,45 @@ class MrnaAssayDataSet(DataSet):
def get_trait_data(self):
self.samplelist = self.group.samplelist + self.group.parlist + self.group.f1list
- sample_ids = []
- where_clause = ""
- #for sample in self.samplelist:
- # if len(where_clause):
- # where_clause += " or "
- # where_clause += "Strain.Name = '{}'".format(*mescape(sample))
-
+ #query_samplelist = ', '.join("'{}'".format(x) for x in mescape(*samplelist))
+ #query_samplelist = '( ' + query_samplelist + ' )'
+ #query_samplelist = create_in(samplelist)
+
+ print("self.samplelist is:", self.samplelist)
+
query = """
- SELECT Strain.Id, Strain.Name FROM Strain, Species
- WHERE ({})
+ SELECT Strain.Name, Strain.Id FROM Strain, Species
+ WHERE Strain.Name IN {}
and Strain.SpeciesId=Species.Id
and Species.name = '{}'
- """.format(where_clause, *mescape(self.group.species))
- print("raspberry query: ", query)
- result = g.db.execute(query).fetchall()
+ """.format(create_in_clause(self.samplelist), *mescape(self.group.species))
+ results = dict(g.db.execute(query).fetchall())
+ print("results are:", results)
+ print("type results are:", type(results))
+
+ #sample_ids = []
+ #for item in self.samplelist:
+ # sample_ids.append(results[item])
+
+ sample_ids = [results[item] for item in self.samplelist]
+ print("sample_ids are:", sample_ids)
+
+ #for sample in self.samplelist:
+ # pass
- print("[blackberry] result is:", pf(result))
- #sample_ids.append('%d' % this_id)
+ #for index in range(len(results)):
+ # sample_ids.append(results[index][0])
# MySQL limits the number of tables that can be used in a join to 61,
# so we break the sample ids into smaller chunks
- chunk_count = 50
- n = len(sample_ids) / chunk_count
- if len(sample_ids) % chunk_count:
- n += 1
+ # Postgres doesn't have that limit, so we can get rid of this after we transition
+ chunk_size = 50
+
+ number_chunks = int(math.ceil(len(sample_ids) / chunk_size))
+
+ trait_sample_data = []
+ for sample_ids_step in chunks.divide_into_chunks(sample_ids, number_chunks):
+
#XZ, 09/24/2008: build one temporary table that only contains the records associated with the input GeneId
#tempTable = None
#if GeneId and db.type == "ProbeSet":
@@ -664,12 +686,14 @@ class MrnaAssayDataSet(DataSet):
# TissueProbeSetFreezeId=tissueProbeSetFreezeId,
# method=method,
# returnNumber=returnNumber)
- trait_sample_data = []
- for step in range(int(n)):
- temp = []
- sample_ids_step = sample_ids[step*chunk_count:min(len(sample_ids), (step+1)*chunk_count)]
- for item in sample_ids_step:
- temp.append('T%s.value' % item)
+
+ #for step in range(int(n)):
+ #temp = []
+ #sample_ids_step = sample_ids[step*chunk_size:min(len(sample_ids), (step+1)*chunk_size)]
+ #for item in sample_ids_step:
+ # temp.append('T%s.value' % item)
+
+ temp = ['T%s.value' % item for item in sample_ids_step]
query = "SELECT {}.Name,".format(escape(self.type))
data_start_pos = 1
query += string.join(temp, ', ')
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index ee732050..9b1843bd 100644
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -312,9 +312,6 @@ class CorrelationResults(object):
self.process_samples(start_vars, self.this_trait.data.keys(), primary_samples)
self.target_dataset = data_set.create_dataset(start_vars['corr_dataset'])
self.target_dataset.get_trait_data()
- # Lei Yan todo
- import pdb
- pdb.set_trace()
correlation_data = collections.defaultdict(list)
for trait, values in self.target_dataset.trait_data.iteritems():
values_1 = []
@@ -327,7 +324,7 @@ class CorrelationResults(object):
values_2.append(target_value)
correlation = calCorrelation(values_1, values_2)
correlation_data[trait] = correlation
- print ('%s %s' % (trait, correlation))
+ print ('correlation result: %s %s' % (trait, correlation))
#XZ, 09/18/2008: get all information about the user selected database.
#target_db_name = fd.corr_dataset