aboutsummaryrefslogtreecommitdiff
path: root/wqflask/base
diff options
context:
space:
mode:
authorLei Yan2015-07-10 20:12:00 +0000
committerLei Yan2015-07-10 20:12:00 +0000
commitaa159a17785cc415e81346963aa76f05f5f9d4ad (patch)
treeae0502dac39d8cf20115b58817df4931a03b727c /wqflask/base
parent239c5ff97a88bd9ae3c439ca244daca2696fe68b (diff)
parent840285e3533790760b763aaa43d3099f9b0a5d69 (diff)
downloadgenenetwork2-aa159a17785cc415e81346963aa76f05f5f9d4ad.tar.gz
Merge https://github.com/genenetwork/genenetwork2
Diffstat (limited to 'wqflask/base')
-rwxr-xr-xwqflask/base/data_set.py105
-rwxr-xr-xwqflask/base/mrna_assay_tissue_data.py6
-rwxr-xr-xwqflask/base/trait.py21
-rwxr-xr-xwqflask/base/webqtlCaseData.py2
-rwxr-xr-xwqflask/base/webqtlConfig.py2
5 files changed, 99 insertions, 37 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 1cd57b4b..414cc71a 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -29,6 +29,7 @@ import json
import gzip
import cPickle as pickle
import itertools
+from operator import itemgetter
from redis import Redis
Redis = Redis()
@@ -292,6 +293,7 @@ class DatasetGroup(object):
self.incparentsf1 = False
self.allsamples = None
+ self._datasets = None
def get_specified_markers(self, markers = []):
self.markers = HumanMarkers(self.name, markers)
@@ -305,6 +307,56 @@ class DatasetGroup(object):
self.markers = marker_class(self.name)
+ def datasets(self):
+ key = "group_dataset_menu:v1:" + self.name
+ print("key is:", key)
+ with Bench("Loading cache"):
+ result = Redis.get(key)
+ if result:
+ self._datasets = pickle.loads(result)
+ return self._datasets
+
+ dataset_menu = []
+ print("[tape4] webqtlConfig.PUBLICTHRESH:", webqtlConfig.PUBLICTHRESH)
+ print("[tape4] type webqtlConfig.PUBLICTHRESH:", type(webqtlConfig.PUBLICTHRESH))
+ results = g.db.execute('''
+ (SELECT '#PublishFreeze',PublishFreeze.FullName,PublishFreeze.Name
+ FROM PublishFreeze,InbredSet
+ WHERE PublishFreeze.InbredSetId = InbredSet.Id
+ and InbredSet.Name = %s
+ and PublishFreeze.public > %s)
+ UNION
+ (SELECT '#GenoFreeze',GenoFreeze.FullName,GenoFreeze.Name
+ FROM GenoFreeze, InbredSet
+ WHERE GenoFreeze.InbredSetId = InbredSet.Id
+ and InbredSet.Name = %s
+ and GenoFreeze.public > %s)
+ UNION
+ (SELECT Tissue.Name, ProbeSetFreeze.FullName,ProbeSetFreeze.Name
+ FROM ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue
+ WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id
+ and ProbeFreeze.TissueId = Tissue.Id
+ and ProbeFreeze.InbredSetId = InbredSet.Id
+ and InbredSet.Name like %s
+ and ProbeSetFreeze.public > %s
+ ORDER BY Tissue.Name, ProbeSetFreeze.CreateTime desc, ProbeSetFreeze.AvgId)
+ ''', (self.name, webqtlConfig.PUBLICTHRESH,
+ self.name, webqtlConfig.PUBLICTHRESH,
+ "%" + self.name + "%", webqtlConfig.PUBLICTHRESH))
+
+ for tissue_name, dataset in itertools.groupby(results.fetchall(), itemgetter(0)):
+ if tissue_name in ['#PublishFreeze', '#GenoFreeze']:
+ for item in dataset:
+ dataset_menu.append(dict(tissue=None, datasets=[item[1:]]))
+ else:
+ dataset_sub_menu = [item[1:] for item in dataset]
+ dataset_menu.append(dict(tissue=tissue_name,
+ datasets=dataset_sub_menu))
+
+ Redis.set(key, pickle.dumps(dataset_menu, pickle.HIGHEST_PROTOCOL))
+ Redis.expire(key, 60*5)
+ self._datasets = dataset_menu
+ return self._datasets
def get_f1_parent_strains(self):
try:
@@ -319,7 +371,7 @@ class DatasetGroup(object):
self.parlist = [maternal, paternal]
def get_samplelist(self):
- key = "samplelist:v4:" + self.name
+ key = "samplelist:v2:" + self.name
print("key is:", key)
with Bench("Loading cache"):
result = Redis.get(key)
@@ -332,14 +384,29 @@ class DatasetGroup(object):
print(" self.samplelist: ", self.samplelist)
else:
print("Cache not hit")
- try:
- self.samplelist = get_group_samplelists.get_samplelist(self.name + ".geno")
- except IOError:
+
+ from utility.tools import plink_command
+ PLINK_PATH,PLINK_COMMAND = plink_command()
+
+ geno_file_path = webqtlConfig.GENODIR+self.name+".geno"
+ plink_file_path = PLINK_PATH+"/"+self.name+".fam"
+
+ if os.path.isfile(plink_file_path):
+ self.samplelist = get_group_samplelists.get_samplelist("plink", plink_file_path)
+ elif os.path.isfile(geno_file_path):
+ self.samplelist = get_group_samplelists.get_samplelist("geno", geno_file_path)
+ else:
self.samplelist = None
print("after get_samplelist")
Redis.set(key, json.dumps(self.samplelist))
Redis.expire(key, 60*5)
+ def all_samples_ordered(self):
+ result = []
+ lists = (self.parlist, self.f1list, self.samplelist)
+ [result.extend(l) for l in lists if l]
+ return result
+
def read_genotype_file(self):
'''Read genotype from .geno file instead of database'''
#if self.group == 'BXD300':
@@ -633,7 +700,7 @@ class PhenotypeDataSet(DataSet):
'sequence', 'units', 'comments']
# Fields displayed in the search results table header
- self.header_fields = ['',
+ self.header_fields = ['Index',
'ID',
'Description',
'Authors',
@@ -737,7 +804,7 @@ class PhenotypeDataSet(DataSet):
this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs
this_trait.LRS_score_value = LRS_score_value = this_trait.lrs
- this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb))
+ this_trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (LRS_Chr, float(LRS_Mb))
def retrieve_sample_data(self, trait):
query = """
@@ -753,11 +820,11 @@ class PhenotypeDataSet(DataSet):
WHERE
PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = %s AND
- PublishFreeze.Id = %d AND PublishData.StrainId = Strain.Id
+ PublishFreeze.Id = %s AND PublishData.StrainId = Strain.Id
Order BY
Strain.Name
- """ % (trait, self.id)
- results = g.db.execute(query).fetchall()
+ """
+ results = g.db.execute(query, (trait, self.id)).fetchall()
return results
@@ -777,7 +844,7 @@ class GenotypeDataSet(DataSet):
'sequence']
# Fields displayed in the search results table header
- self.header_fields = ['',
+ self.header_fields = ['Index',
'ID',
'Location']
@@ -828,7 +895,7 @@ class GenotypeDataSet(DataSet):
else:
trait_location_value = ord(str(this_trait.chr).upper()[0])*1000 + this_trait.mb
- this_trait.location_repr = 'Chr%s: %.4f' % (this_trait.chr, float(this_trait.mb) )
+ this_trait.location_repr = 'Chr%s: %.6f' % (this_trait.chr, float(this_trait.mb) )
this_trait.location_value = trait_location_value
def retrieve_sample_data(self, trait):
@@ -840,15 +907,17 @@ class GenotypeDataSet(DataSet):
left join GenoSE on
(GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId)
WHERE
- Geno.SpeciesId = %s AND Geno.Name = '%s' AND GenoXRef.GenoId = Geno.Id AND
+ Geno.SpeciesId = %s AND Geno.Name = %s AND GenoXRef.GenoId = Geno.Id AND
GenoXRef.GenoFreezeId = GenoFreeze.Id AND
- GenoFreeze.Name = '%s' AND
+ GenoFreeze.Name = %s AND
GenoXRef.DataId = GenoData.Id AND
GenoData.StrainId = Strain.Id
Order BY
Strain.Name
- """ % (webqtlDatabaseFunction.retrieve_species_id(self.group.name), trait, self.name)
- results = g.db.execute(query).fetchall()
+ """
+ results = g.db.execute(query,
+ (webqtlDatabaseFunction.retrieve_species_id(self.group.name),
+ trait, self.name)).fetchall()
return results
@@ -893,7 +962,7 @@ class MrnaAssayDataSet(DataSet):
'flag']
# Fields displayed in the search results table header
- self.header_fields = ['',
+ self.header_fields = ['Index',
'ID',
'Symbol',
'Description',
@@ -1055,7 +1124,7 @@ class MrnaAssayDataSet(DataSet):
# this_trait.mb)
#ZS: Put this in function currently called "convert_location_to_value"
- this_trait.location_repr = 'Chr %s: %.4f Mb' % (this_trait.chr,
+ this_trait.location_repr = 'Chr%s: %.6f' % (this_trait.chr,
float(this_trait.mb))
this_trait.location_value = trait_location_value
@@ -1111,7 +1180,7 @@ class MrnaAssayDataSet(DataSet):
this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs
this_trait.LRS_score_value = this_trait.lrs
- this_trait.LRS_location_repr = 'Chr %s: %.4f Mb' % (lrs_chr, float(lrs_mb))
+ this_trait.LRS_location_repr = 'Chr%s: %.6f' % (lrs_chr, float(lrs_mb))
def convert_location_to_value(self, chromosome, mb):
diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py
index 1a05fce7..b2c0448a 100755
--- a/wqflask/base/mrna_assay_tissue_data.py
+++ b/wqflask/base/mrna_assay_tissue_data.py
@@ -51,15 +51,15 @@ class MrnaAssayTissueData(object):
query += ''' Symbol in {} group by Symbol)
as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
- and t.Mean = x.maxmean;
+ and t.Mean = x.maxmean;http://docs.python.org/2/library/string.html?highlight=lower#string.lower
'''.format(in_clause)
results = g.db.execute(query).fetchall()
for result in results:
symbol = result[0]
- if symbol in gene_symbols:
- #gene_symbols.append(symbol)
+ if symbol.lower() in [gene_symbol.lower() for gene_symbol in gene_symbols]:
+ #gene_symbols.append(symbol)
symbol = symbol.lower()
self.data[symbol].gene_id = result.GeneId
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index 8930c917..ff80795c 100755
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -251,14 +251,7 @@ class GeneralTrait(object):
# Todo: is this necessary? If not remove
self.data.clear()
- if self.dataset.group.parlist:
- all_samples_ordered = (self.dataset.group.parlist +
- self.dataset.group.f1list +
- self.dataset.group.samplelist)
- elif self.dataset.group.f1list:
- all_samples_ordered = self.dataset.group.f1list + self.dataset.group.samplelist
- else:
- all_samples_ordered = self.dataset.group.samplelist
+ all_samples_ordered = self.dataset.group.all_samples_ordered()
if results:
for item in results:
@@ -298,7 +291,7 @@ class GeneralTrait(object):
PublishFreeze.Id = %s
""" % (self.name, self.dataset.id)
- print("query is:", query)
+ print("query is:", query)
trait_info = g.db.execute(query).fetchone()
#XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name
@@ -337,10 +330,10 @@ class GeneralTrait(object):
trait_info = g.db.execute(query).fetchone()
#print("trait_info is: ", pf(trait_info))
else: #Temp type
- query = """SELECT %s FROM %s WHERE Name = %s
- """ % (string.join(self.dataset.display_fields,','),
- self.dataset.type, self.name)
- trait_info = g.db.execute(query).fetchone()
+ query = """SELECT %s FROM %s WHERE Name = %s"""
+ trait_info = g.db.execute(query,
+ (string.join(self.dataset.display_fields,','),
+ self.dataset.type, self.name)).fetchone()
if trait_info:
self.haveinfo = True
@@ -423,6 +416,8 @@ class GeneralTrait(object):
if result:
self.locus_chr = result[0]
self.locus_mb = result[1]
+ else:
+ self.locus = self.locus_chr = self.locus_mb = ""
else:
self.locus = self.locus_chr = self.locus_mb = ""
else:
diff --git a/wqflask/base/webqtlCaseData.py b/wqflask/base/webqtlCaseData.py
index 5927b0f4..42763aed 100755
--- a/wqflask/base/webqtlCaseData.py
+++ b/wqflask/base/webqtlCaseData.py
@@ -34,8 +34,6 @@ class webqtlCaseData(object):
self.value = value # Trait Value
self.variance = variance # Trait Variance
self.num_cases = num_cases # Number of individuals/cases
- self.prob_plot_value = None # Ordered value for probability plot; this is sort of wrong but not sure how else to do this
- self.z_score = None
self.extra_attributes = None
self.this_id = None # Set a sane default (can't be just "id" cause that's a reserved word)
self.outlier = None # Not set to True/False until later
diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py
index 48d8cd0a..330fec56 100755
--- a/wqflask/base/webqtlConfig.py
+++ b/wqflask/base/webqtlConfig.py
@@ -53,7 +53,7 @@ GNROOT = "/home/zas1024/gene/" # Will remove this and dependent items later
SECUREDIR = GNROOT + 'secure/'
COMMON_LIB = GNROOT + 'support/admin'
HTMLPATH = GNROOT + 'genotype_files/'
-PYLMM_PATH = '/home/zas1024/plink/'
+PYLMM_PATH = '/home/zas1024/plink_gemma/'
SNP_PATH = '/home/zas1024/snps/'
IMGDIR = GNROOT + '/wqflask/wqflask/images/'
IMAGESPATH = HTMLPATH + 'images/'