aboutsummaryrefslogtreecommitdiff
path: root/wqflask/base/data_set.py
diff options
context:
space:
mode:
authorpjotrp2015-11-18 11:19:01 +0100
committerpjotrp2015-11-18 11:19:01 +0100
commitcb0f10fc4850b6b06f2237b532317a5c6668584a (patch)
treed143662ecceac5e05bd06afee4c87b2beb88859b /wqflask/base/data_set.py
parent28ec342362ba068b3d0b5b9a302bc279d251f160 (diff)
parent0310301b30c59eca45235cd1bd1ff8e15923950a (diff)
downloadgenenetwork2-cb0f10fc4850b6b06f2237b532317a5c6668584a.tar.gz
Merge branch 'master' of https://github.com/zsloan/genenetwork2 into zsloan
Diffstat (limited to 'wqflask/base/data_set.py')
-rwxr-xr-xwqflask/base/data_set.py202
1 files changed, 150 insertions, 52 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 489bd374..d6a46c2e 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -29,6 +29,7 @@ import json
import gzip
import cPickle as pickle
import itertools
+from operator import itemgetter
from redis import Redis
Redis = Redis()
@@ -42,7 +43,7 @@ from base import species
from dbFunction import webqtlDatabaseFunction
from utility import webqtlUtil
from utility.benchmark import Bench
-from wqflask.my_pylmm.pyLMM import chunks
+from utility import chunks
from maintenance import get_group_samplelists
@@ -88,7 +89,7 @@ class Dataset_Types(object):
for group in data['datasets'][species]:
for dataset_type in data['datasets'][species][group]:
for dataset in data['datasets'][species][group][dataset_type]:
- print("dataset is:", dataset)
+ #print("dataset is:", dataset)
short_dataset_name = dataset[0]
if dataset_type == "Phenotypes":
@@ -162,8 +163,6 @@ class Markers(object):
for marker in markers:
if (marker['chr'] != "X") and (marker['chr'] != "Y"):
marker['chr'] = int(marker['chr'])
- #else:
- # marker['chr'] = 20
print("Mb:", marker['Mb'])
marker['Mb'] = float(marker['Mb'])
@@ -278,7 +277,7 @@ class DatasetGroup(object):
"""
def __init__(self, dataset):
"""This sets self.group and self.group_id"""
- print("dataset name:", dataset.name)
+ print("DATASET NAME2:", dataset.name)
self.name, self.id = g.db.execute(dataset.query_for_group).fetchone()
if self.name == 'BXD300':
self.name = "BXD"
@@ -292,6 +291,7 @@ class DatasetGroup(object):
self.incparentsf1 = False
self.allsamples = None
+ self._datasets = None
def get_specified_markers(self, markers = []):
self.markers = HumanMarkers(self.name, markers)
@@ -305,6 +305,75 @@ class DatasetGroup(object):
self.markers = marker_class(self.name)
+ def datasets(self):
+ key = "group_dataset_menu:v2:" + self.name
+ print("key is2:", key)
+ #with Bench("Loading cache"):
+ # result = Redis.get(key)
+ #if result:
+ # self._datasets = pickle.loads(result)
+ # return self._datasets
+
+ dataset_menu = []
+ print("[tape4] webqtlConfig.PUBLICTHRESH:", webqtlConfig.PUBLICTHRESH)
+ print("[tape4] type webqtlConfig.PUBLICTHRESH:", type(webqtlConfig.PUBLICTHRESH))
+ results = g.db.execute('''
+ (SELECT '#PublishFreeze',PublishFreeze.FullName,PublishFreeze.Name
+ FROM PublishFreeze,InbredSet
+ WHERE PublishFreeze.InbredSetId = InbredSet.Id
+ and InbredSet.Name = %s
+ and PublishFreeze.public > %s)
+ UNION
+ (SELECT '#GenoFreeze',GenoFreeze.FullName,GenoFreeze.Name
+ FROM GenoFreeze, InbredSet
+ WHERE GenoFreeze.InbredSetId = InbredSet.Id
+ and InbredSet.Name = %s
+ and GenoFreeze.public > %s)
+ UNION
+ (SELECT Tissue.Name, ProbeSetFreeze.FullName,ProbeSetFreeze.Name
+ FROM ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue
+ WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id
+ and ProbeFreeze.TissueId = Tissue.Id
+ and ProbeFreeze.InbredSetId = InbredSet.Id
+ and InbredSet.Name like %s
+ and ProbeSetFreeze.public > %s
+ ORDER BY Tissue.Name, ProbeSetFreeze.CreateTime desc, ProbeSetFreeze.AvgId)
+ ''', (self.name, webqtlConfig.PUBLICTHRESH,
+ self.name, webqtlConfig.PUBLICTHRESH,
+ "%" + self.name + "%", webqtlConfig.PUBLICTHRESH))
+
+ the_results = results.fetchall()
+
+ #for tissue_name, dataset in itertools.groupby(the_results, itemgetter(0)):
+ for dataset_item in the_results:
+ tissue_name = dataset_item[0]
+ dataset = dataset_item[1]
+ dataset_short = dataset_item[2]
+ if tissue_name in ['#PublishFreeze', '#GenoFreeze']:
+ dataset_menu.append(dict(tissue=None, datasets=[(dataset, dataset_short)]))
+ else:
+ dataset_sub_menu = [item[1:] for item in dataset]
+
+ tissue_already_exists = False
+ tissue_position = None
+ for i, tissue_dict in enumerate(dataset_menu):
+ if tissue_dict['tissue'] == tissue_name:
+ tissue_already_exists = True
+ tissue_position = i
+ break
+
+ if tissue_already_exists:
+ print("dataset_menu:", dataset_menu[i]['datasets'])
+ dataset_menu[i]['datasets'].append((dataset, dataset_short))
+ else:
+ dataset_menu.append(dict(tissue=tissue_name,
+ datasets=[(dataset, dataset_short)]))
+
+ Redis.set(key, pickle.dumps(dataset_menu, pickle.HIGHEST_PROTOCOL))
+ Redis.expire(key, 60*5)
+ self._datasets = dataset_menu
+
+ return self._datasets
def get_f1_parent_strains(self):
try:
@@ -319,7 +388,7 @@ class DatasetGroup(object):
self.parlist = [maternal, paternal]
def get_samplelist(self):
- key = "samplelist:v4:" + self.name
+ key = "samplelist:v2:" + self.name
print("key is:", key)
with Bench("Loading cache"):
result = Redis.get(key)
@@ -332,14 +401,29 @@ class DatasetGroup(object):
print(" self.samplelist: ", self.samplelist)
else:
print("Cache not hit")
- try:
- self.samplelist = get_group_samplelists.get_samplelist(self.name + ".geno")
- except IOError:
+
+ from utility.tools import plink_command
+ PLINK_PATH,PLINK_COMMAND = plink_command()
+
+ geno_file_path = webqtlConfig.GENODIR+self.name+".geno"
+ plink_file_path = PLINK_PATH+"/"+self.name+".fam"
+
+ if os.path.isfile(plink_file_path):
+ self.samplelist = get_group_samplelists.get_samplelist("plink", plink_file_path)
+ elif os.path.isfile(geno_file_path):
+ self.samplelist = get_group_samplelists.get_samplelist("geno", geno_file_path)
+ else:
self.samplelist = None
print("after get_samplelist")
Redis.set(key, json.dumps(self.samplelist))
Redis.expire(key, 60*5)
+ def all_samples_ordered(self):
+ result = []
+ lists = (self.parlist, self.f1list, self.samplelist)
+ [result.extend(l) for l in lists if l]
+ return result
+
def read_genotype_file(self):
'''Read genotype from .geno file instead of database'''
#if self.group == 'BXD300':
@@ -434,6 +518,8 @@ class DataSet(object):
self.group.get_samplelist()
self.species = species.TheSpecies(self)
+ print("TESTING!!!")
+
def get_desc(self):
"""Gets overridden later, at least for Temp...used by trait's get_given_name"""
@@ -473,29 +559,39 @@ class DataSet(object):
This is not meant to retrieve the data set info if no name at all is passed.
"""
-
- query_args = tuple(escape(x) for x in (
- (self.type + "Freeze"),
- str(webqtlConfig.PUBLICTHRESH),
- self.name,
- self.name,
- self.name))
- print("query_args are:", query_args)
-
- #print("""
- # SELECT Id, Name, FullName, ShortName
- # FROM %s
- # WHERE public > %s AND
- # (Name = '%s' OR FullName = '%s' OR ShortName = '%s')
- # """ % (query_args))
try:
- self.id, self.name, self.fullname, self.shortname = g.db.execute("""
- SELECT Id, Name, FullName, ShortName
- FROM %s
- WHERE public > %s AND
- (Name = '%s' OR FullName = '%s' OR ShortName = '%s')
- """ % (query_args)).fetchone()
+ if self.type == "ProbeSet":
+ query_args = tuple(escape(x) for x in (
+ str(webqtlConfig.PUBLICTHRESH),
+ self.name,
+ self.name,
+ self.name))
+
+ self.id, self.name, self.fullname, self.shortname, self.tissue = g.db.execute("""
+ SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName, ProbeSetFreeze.ShortName, Tissue.Name
+ FROM ProbeSetFreeze, ProbeFreeze, Tissue
+ WHERE ProbeSetFreeze.public > %s AND
+ ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND
+ ProbeFreeze.TissueId = Tissue.Id AND
+ (ProbeSetFreeze.Name = '%s' OR ProbeSetFreeze.FullName = '%s' OR ProbeSetFreeze.ShortName = '%s')
+ """ % (query_args)).fetchone()
+ else:
+ query_args = tuple(escape(x) for x in (
+ (self.type + "Freeze"),
+ str(webqtlConfig.PUBLICTHRESH),
+ self.name,
+ self.name,
+ self.name))
+
+ self.tissue = "N/A"
+ self.id, self.name, self.fullname, self.shortname = g.db.execute("""
+ SELECT Id, Name, FullName, ShortName
+ FROM %s
+ WHERE public > %s AND
+ (Name = '%s' OR FullName = '%s' OR ShortName = '%s')
+ """ % (query_args)).fetchone()
+
except TypeError:
print("Dataset {} is not yet available in GeneNetwork.".format(self.name))
pass
@@ -633,14 +729,14 @@ class PhenotypeDataSet(DataSet):
'sequence', 'units', 'comments']
# Fields displayed in the search results table header
- self.header_fields = ['',
- 'ID',
+ self.header_fields = ['Index',
+ 'Record',
'Description',
'Authors',
'Year',
'Max LRS',
'Max LRS Location',
- 'Add. Effect<a href="http://genenetwork.org//glossary.html#A" target="_blank"><sup style="color:#f00"> ?</sup></a>']
+ 'Additive Effect']
self.type = 'Publish'
@@ -719,7 +815,6 @@ class PhenotypeDataSet(DataSet):
Geno.Name = %s and
Geno.SpeciesId = Species.Id
""", (species, this_trait.locus)).fetchone()
- #result = self.cursor.fetchone()
if result:
if result[0] and result[1]:
@@ -737,7 +832,7 @@ class PhenotypeDataSet(DataSet):
this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs
this_trait.LRS_score_value = LRS_score_value = this_trait.lrs
- this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb))
+ this_trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (LRS_Chr, float(LRS_Mb))
def retrieve_sample_data(self, trait):
query = """
@@ -753,11 +848,11 @@ class PhenotypeDataSet(DataSet):
WHERE
PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = %s AND
- PublishFreeze.Id = %d AND PublishData.StrainId = Strain.Id
+ PublishFreeze.Id = %s AND PublishData.StrainId = Strain.Id
Order BY
Strain.Name
- """ % (trait, self.id)
- results = g.db.execute(query).fetchall()
+ """
+ results = g.db.execute(query, (trait, self.id)).fetchall()
return results
@@ -777,7 +872,7 @@ class GenotypeDataSet(DataSet):
'sequence']
# Fields displayed in the search results table header
- self.header_fields = ['',
+ self.header_fields = ['Index',
'ID',
'Location']
@@ -828,7 +923,7 @@ class GenotypeDataSet(DataSet):
else:
trait_location_value = ord(str(this_trait.chr).upper()[0])*1000 + this_trait.mb
- this_trait.location_repr = 'Chr%s: %.4f' % (this_trait.chr, float(this_trait.mb) )
+ this_trait.location_repr = 'Chr%s: %.6f' % (this_trait.chr, float(this_trait.mb) )
this_trait.location_value = trait_location_value
def retrieve_sample_data(self, trait):
@@ -840,15 +935,17 @@ class GenotypeDataSet(DataSet):
left join GenoSE on
(GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId)
WHERE
- Geno.SpeciesId = %s AND Geno.Name = '%s' AND GenoXRef.GenoId = Geno.Id AND
+ Geno.SpeciesId = %s AND Geno.Name = %s AND GenoXRef.GenoId = Geno.Id AND
GenoXRef.GenoFreezeId = GenoFreeze.Id AND
- GenoFreeze.Name = '%s' AND
+ GenoFreeze.Name = %s AND
GenoXRef.DataId = GenoData.Id AND
GenoData.StrainId = Strain.Id
Order BY
Strain.Name
- """ % (webqtlDatabaseFunction.retrieve_species_id(self.group.name), trait, self.name)
- results = g.db.execute(query).fetchall()
+ """
+ results = g.db.execute(query,
+ (webqtlDatabaseFunction.retrieve_species_id(self.group.name),
+ trait, self.name)).fetchall()
return results
@@ -893,15 +990,15 @@ class MrnaAssayDataSet(DataSet):
'flag']
# Fields displayed in the search results table header
- self.header_fields = ['',
- 'ID',
+ self.header_fields = ['Index',
+ 'Record',
'Symbol',
'Description',
'Location',
- 'Mean Expr',
+ 'Mean',
'Max LRS',
'Max LRS Location',
- 'Add. Effect<a href="http://genenetwork.org//glossary.html#A" target="_blank"><sup style="color:#f00"> ?</sup></a>']
+ 'Additive Effect']
# Todo: Obsolete or rename this field
self.type = 'ProbeSet'
@@ -1055,7 +1152,7 @@ class MrnaAssayDataSet(DataSet):
# this_trait.mb)
#ZS: Put this in function currently called "convert_location_to_value"
- this_trait.location_repr = 'Chr %s: %.4f Mb' % (this_trait.chr,
+ this_trait.location_repr = 'Chr%s: %.6f' % (this_trait.chr,
float(this_trait.mb))
this_trait.location_value = trait_location_value
@@ -1074,7 +1171,8 @@ class MrnaAssayDataSet(DataSet):
mean = result[0] if result else 0
- this_trait.mean = "%2.3f" % mean
+ if mean:
+ this_trait.mean = "%2.3f" % mean
#LRS and its location
this_trait.LRS_score_repr = 'N/A'
@@ -1111,7 +1209,7 @@ class MrnaAssayDataSet(DataSet):
this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs
this_trait.LRS_score_value = this_trait.lrs
- this_trait.LRS_location_repr = 'Chr %s: %.4f Mb' % (lrs_chr, float(lrs_mb))
+ this_trait.LRS_location_repr = 'Chr%s: %.6f' % (lrs_chr, float(lrs_mb))
def convert_location_to_value(self, chromosome, mb):
@@ -1159,7 +1257,7 @@ class MrnaAssayDataSet(DataSet):
Strain.Name
""" % (escape(trait), escape(self.name))
results = g.db.execute(query).fetchall()
- print("RETRIEVED RESULTS HERE:", results)
+ #print("RETRIEVED RESULTS HERE:", results)
return results