about summary refs log tree commit diff
path: root/wqflask/base/data_set.py
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask/base/data_set.py')
-rwxr-xr-xwqflask/base/data_set.py105
1 files changed, 87 insertions, 18 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 1cd57b4b..414cc71a 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -29,6 +29,7 @@ import json
 import gzip
 import cPickle as pickle
 import itertools
+from operator import itemgetter
 
 from redis import Redis
 Redis = Redis()
@@ -292,6 +293,7 @@ class DatasetGroup(object):
         
         self.incparentsf1 = False
         self.allsamples = None
+        self._datasets = None
 
     def get_specified_markers(self, markers = []):
         self.markers = HumanMarkers(self.name, markers)
@@ -305,6 +307,56 @@ class DatasetGroup(object):
 
         self.markers = marker_class(self.name)
 
+    def datasets(self):
+        key = "group_dataset_menu:v1:" + self.name
+        print("key is:", key)
+        with Bench("Loading cache"):
+            result = Redis.get(key)
+        if result:
+            self._datasets = pickle.loads(result)
+            return self._datasets
+
+        dataset_menu = []
+        print("[tape4] webqtlConfig.PUBLICTHRESH:", webqtlConfig.PUBLICTHRESH)
+        print("[tape4] type webqtlConfig.PUBLICTHRESH:", type(webqtlConfig.PUBLICTHRESH))
+        results = g.db.execute('''
+             (SELECT '#PublishFreeze',PublishFreeze.FullName,PublishFreeze.Name
+              FROM PublishFreeze,InbredSet
+              WHERE PublishFreeze.InbredSetId = InbredSet.Id
+                and InbredSet.Name = %s
+                and PublishFreeze.public > %s)
+             UNION
+             (SELECT '#GenoFreeze',GenoFreeze.FullName,GenoFreeze.Name
+              FROM GenoFreeze, InbredSet
+              WHERE GenoFreeze.InbredSetId = InbredSet.Id
+                and InbredSet.Name = %s
+                and GenoFreeze.public > %s)
+             UNION
+             (SELECT Tissue.Name, ProbeSetFreeze.FullName,ProbeSetFreeze.Name
+              FROM ProbeSetFreeze, ProbeFreeze, InbredSet, Tissue
+              WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id
+                and ProbeFreeze.TissueId = Tissue.Id
+                and ProbeFreeze.InbredSetId = InbredSet.Id
+                and InbredSet.Name like %s
+                and ProbeSetFreeze.public > %s
+              ORDER BY Tissue.Name, ProbeSetFreeze.CreateTime desc, ProbeSetFreeze.AvgId)
+            ''', (self.name, webqtlConfig.PUBLICTHRESH,
+                  self.name, webqtlConfig.PUBLICTHRESH,
+                  "%" + self.name + "%", webqtlConfig.PUBLICTHRESH))
+
+        for tissue_name, dataset in itertools.groupby(results.fetchall(), itemgetter(0)):
+            if tissue_name in ['#PublishFreeze', '#GenoFreeze']:
+                for item in dataset:
+                    dataset_menu.append(dict(tissue=None, datasets=[item[1:]]))
+            else:
+                dataset_sub_menu = [item[1:] for item in dataset]
+                dataset_menu.append(dict(tissue=tissue_name,
+                                    datasets=dataset_sub_menu))
+
+        Redis.set(key, pickle.dumps(dataset_menu, pickle.HIGHEST_PROTOCOL))
+        Redis.expire(key, 60*5)
+        self._datasets = dataset_menu
+        return self._datasets
 
     def get_f1_parent_strains(self):
         try:
@@ -319,7 +371,7 @@ class DatasetGroup(object):
             self.parlist = [maternal, paternal]
 
     def get_samplelist(self):
-        key = "samplelist:v4:" + self.name
+        key = "samplelist:v2:" + self.name
         print("key is:", key)
         with Bench("Loading cache"):
             result = Redis.get(key)
@@ -332,14 +384,29 @@ class DatasetGroup(object):
             print("  self.samplelist: ", self.samplelist)
         else:
             print("Cache not hit")
-            try:
-                self.samplelist = get_group_samplelists.get_samplelist(self.name + ".geno")
-            except IOError:
+
+            from utility.tools import plink_command
+            PLINK_PATH,PLINK_COMMAND = plink_command()
+
+            geno_file_path = webqtlConfig.GENODIR+self.name+".geno"
+            plink_file_path = PLINK_PATH+"/"+self.name+".fam"
+
+            if os.path.isfile(plink_file_path):
+                self.samplelist = get_group_samplelists.get_samplelist("plink", plink_file_path)
+            elif os.path.isfile(geno_file_path):
+                self.samplelist = get_group_samplelists.get_samplelist("geno", geno_file_path)
+            else:
                 self.samplelist = None
             print("after get_samplelist")
             Redis.set(key, json.dumps(self.samplelist))
             Redis.expire(key, 60*5)
 
+    def all_samples_ordered(self):
+        result = []
+        lists = (self.parlist, self.f1list, self.samplelist)
+        [result.extend(l) for l in lists if l]
+        return result
+
     def read_genotype_file(self):
         '''Read genotype from .geno file instead of database'''
         #if self.group == 'BXD300':
@@ -633,7 +700,7 @@ class PhenotypeDataSet(DataSet):
                                'sequence', 'units', 'comments']
 
         # Fields displayed in the search results table header
-        self.header_fields = ['',
+        self.header_fields = ['Index',
                             'ID',
                             'Description',
                             'Authors',
@@ -737,7 +804,7 @@ class PhenotypeDataSet(DataSet):
 
                         this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs
                         this_trait.LRS_score_value = LRS_score_value = this_trait.lrs
-                        this_trait.LRS_location_repr = LRS_location_repr = 'Chr %s: %.4f Mb' % (LRS_Chr, float(LRS_Mb))
+                        this_trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (LRS_Chr, float(LRS_Mb))
                         
     def retrieve_sample_data(self, trait):
         query = """
@@ -753,11 +820,11 @@ class PhenotypeDataSet(DataSet):
                     WHERE
                             PublishXRef.InbredSetId = PublishFreeze.InbredSetId AND
                             PublishData.Id = PublishXRef.DataId AND PublishXRef.Id = %s AND
-                            PublishFreeze.Id = %d AND PublishData.StrainId = Strain.Id
+                            PublishFreeze.Id = %s AND PublishData.StrainId = Strain.Id
                     Order BY
                             Strain.Name
-                    """ % (trait, self.id)
-        results = g.db.execute(query).fetchall()
+                    """
+        results = g.db.execute(query, (trait, self.id)).fetchall()
         return results
 
 
@@ -777,7 +844,7 @@ class GenotypeDataSet(DataSet):
                                'sequence']
 
         # Fields displayed in the search results table header
-        self.header_fields = ['',
+        self.header_fields = ['Index',
                               'ID',
                               'Location']
 
@@ -828,7 +895,7 @@ class GenotypeDataSet(DataSet):
                     else:
                         trait_location_value = ord(str(this_trait.chr).upper()[0])*1000 + this_trait.mb
 
-                this_trait.location_repr = 'Chr%s: %.4f' % (this_trait.chr, float(this_trait.mb) )
+                this_trait.location_repr = 'Chr%s: %.6f' % (this_trait.chr, float(this_trait.mb) )
                 this_trait.location_value = trait_location_value
                 
     def retrieve_sample_data(self, trait):
@@ -840,15 +907,17 @@ class GenotypeDataSet(DataSet):
                     left join GenoSE on
                             (GenoSE.DataId = GenoData.Id AND GenoSE.StrainId = GenoData.StrainId)
                     WHERE
-                            Geno.SpeciesId = %s AND Geno.Name = '%s' AND GenoXRef.GenoId = Geno.Id AND
+                            Geno.SpeciesId = %s AND Geno.Name = %s AND GenoXRef.GenoId = Geno.Id AND
                             GenoXRef.GenoFreezeId = GenoFreeze.Id AND
-                            GenoFreeze.Name = '%s' AND
+                            GenoFreeze.Name = %s AND
                             GenoXRef.DataId = GenoData.Id AND
                             GenoData.StrainId = Strain.Id
                     Order BY
                             Strain.Name
-                    """ % (webqtlDatabaseFunction.retrieve_species_id(self.group.name), trait, self.name)
-        results = g.db.execute(query).fetchall()
+                    """
+        results = g.db.execute(query,
+                               (webqtlDatabaseFunction.retrieve_species_id(self.group.name),
+                                trait, self.name)).fetchall()
         return results
 
 
@@ -893,7 +962,7 @@ class MrnaAssayDataSet(DataSet):
                                'flag']
 
         # Fields displayed in the search results table header
-        self.header_fields = ['',
+        self.header_fields = ['Index',
                              'ID',
                              'Symbol',
                              'Description',
@@ -1055,7 +1124,7 @@ class MrnaAssayDataSet(DataSet):
                 #                               this_trait.mb)
 
                 #ZS: Put this in function currently called "convert_location_to_value"
-                this_trait.location_repr = 'Chr %s: %.4f Mb' % (this_trait.chr,
+                this_trait.location_repr = 'Chr%s: %.6f' % (this_trait.chr,
                                                                 float(this_trait.mb))
                 this_trait.location_value = trait_location_value
 
@@ -1111,7 +1180,7 @@ class MrnaAssayDataSet(DataSet):
 
                     this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs
                     this_trait.LRS_score_value = this_trait.lrs
-                    this_trait.LRS_location_repr = 'Chr %s: %.4f Mb' % (lrs_chr, float(lrs_mb))
+                    this_trait.LRS_location_repr = 'Chr%s: %.6f' % (lrs_chr, float(lrs_mb))
       
 
     def convert_location_to_value(self, chromosome, mb):