Merge branch 'testing' of https://github.com/genenetwork/genenetwork2 into production

author: zsloan 2018-08-14 20:16:32 +0000
committer: zsloan 2018-08-14 20:16:32 +0000
commit: 838362c116b02c090dadeb76cda27e9902a6626a (patch)
tree: a6be104cc73e3bc9e271f9b5ca854dd32f3b810d /wqflask/base/data_set.py
parent: 0bead53661ea701ffd9f9d565e4d2ecbbed81a8e (diff)
parent: 85defabb17ecdef1c7b8e92fa2e06b44d1e9ca49 (diff)
download: genenetwork2-838362c116b02c090dadeb76cda27e9902a6626a.tar.gz
1 files changed, 23 insertions, 147 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index a4eaaa2e..4a422ee4 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -29,7 +29,6 @@ import json
 import gzip
 import cPickle as pickle
 import itertools
-from operator import itemgetter
 
 from redis import Redis
 Redis = Redis()
@@ -170,8 +169,27 @@ class Markers(object):
     """Todo: Build in cacheing so it saves us reading the same file more than once"""
     def __init__(self, name):
         json_data_fh = open(locate(name + ".json",'genotype/json'))
+
         try:
-            markers = json.load(json_data_fh)
+            markers = []
+            with open(locate(name + "_snps.txt", 'r')) as bimbam_fh:
+                marker = {}
+                if len(bimbam_fh[0].split(", ")) > 2:
+                    delimiter = ", "
+                elif len(bimbam_fh[0].split(",")) > 2:
+                    delimiter = ","
+                elif len(bimbam_fh[0].split("\t")) > 2:
+                    delimiter = "\t"
+                else:
+                    delimiter = " "
+                for line in bimbam_fh:
+                    marker['name'] = line.split(delimiter)[0]
+                    marker['Mb']
+                    marker['chr'] = line.split(delimiter)[2]
+                    marker['cM']
+                    markers.append(marker)
+        #try:
+        #    markers = json.load(json_data_fh)
         except:
             markers = []
 
@@ -181,8 +199,6 @@ class Markers(object):
             marker['Mb'] = float(marker['Mb'])
 
         self.markers = markers
-        #logger.debug("self.markers:", self.markers)
-
 
     def add_pvalues(self, p_values):
         logger.debug("length of self.markers:", len(self.markers))
@@ -316,9 +332,6 @@ class DatasetGroup(object):
 
         return mapping_id, mapping_names
 
-    def get_specified_markers(self, markers = []):
-        self.markers = HumanMarkers(self.name, markers)
-
     def get_markers(self):
         logger.debug("self.species is:", self.species)
 
@@ -449,7 +462,6 @@ def datasets(group_name, this_group = None):
               group_name, webqtlConfig.PUBLICTHRESH,
               "'" + group_name + "'", webqtlConfig.PUBLICTHRESH))
 
-    #for tissue_name, dataset in itertools.groupby(the_results, itemgetter(0)):
     for dataset_item in the_results:
         tissue_name = dataset_item[0]
         dataset = dataset_item[1]
@@ -457,14 +469,10 @@ def datasets(group_name, this_group = None):
         if tissue_name in ['#PublishFreeze', '#GenoFreeze']:
             dataset_menu.append(dict(tissue=None, datasets=[(dataset, dataset_short)]))
         else:
-            dataset_sub_menu = [item[1:] for item in dataset]
-
             tissue_already_exists = False
-            tissue_position = None
             for i, tissue_dict in enumerate(dataset_menu):
                 if tissue_dict['tissue'] == tissue_name:
                     tissue_already_exists = True
-                    tissue_position = i
                     break
 
             if tissue_already_exists:
@@ -719,20 +727,6 @@ class PhenotypeDataSet(DataSet):
         # (Urgently?) Need to write this
         pass
 
-    def get_trait_list(self):
-        query = """
-            select PublishXRef.Id
-            from PublishXRef, PublishFreeze
-            where PublishFreeze.InbredSetId=PublishXRef.InbredSetId
-            and PublishFreeze.Id = {}
-            """.format(escape(str(self.id)))
-        logger.sql(query)
-        results = g.db.execute(query).fetchall()
-        trait_data = {}
-        for trait in results:
-            trait_data[trait[0]] = self.retrieve_sample_data(trait[0])
-        return trait_data
-
     def get_trait_info(self, trait_list, species = ''):
         for this_trait in trait_list:
 
@@ -746,7 +740,7 @@ class PhenotypeDataSet(DataSet):
             #of the post-publication description
             if this_trait.confidential:
                 this_trait.description_display = ""
-                continue   # for now
+                continue   # for now, because no authorization features
 
                 if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(
                         privilege=self.privilege,
@@ -770,9 +764,7 @@ class PhenotypeDataSet(DataSet):
 
             #LRS and its location
             this_trait.LRS_score_repr = "N/A"
-            this_trait.LRS_score_value = 0
             this_trait.LRS_location_repr = "N/A"
-            this_trait.LRS_location_value = 1000000
 
             if this_trait.lrs:
                 query = """
@@ -789,17 +781,7 @@ class PhenotypeDataSet(DataSet):
                         LRS_Chr = result[0]
                         LRS_Mb = result[1]
 
-                        #XZ: LRS_location_value is used for sorting
-                        try:
-                            LRS_location_value = int(LRS_Chr)*1000 + float(LRS_Mb)
-                        except:
-                            if LRS_Chr.upper() == 'X':
-                                LRS_location_value = 20*1000 + float(LRS_Mb)
-                            else:
-                                LRS_location_value = ord(str(LRS_chr).upper()[0])*1000 + float(LRS_Mb)
-
                         this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs
-                        this_trait.LRS_score_value = LRS_score_value = this_trait.lrs
                         this_trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (LRS_Chr, float(LRS_Mb))
 
     def retrieve_sample_data(self, trait):
@@ -861,40 +843,13 @@ class GenotypeDataSet(DataSet):
     def check_confidentiality(self):
         return geno_mrna_confidentiality(self)
 
-    def get_trait_list(self):
-        query = """
-            select Geno.Name
-            from Geno, GenoXRef
-            where GenoXRef.GenoId = Geno.Id
-            and GenoFreezeId = {}
-            """.format(escape(str(self.id)))
-        logger.sql(query)
-        results = g.db.execute(query).fetchall()
-        trait_data = {}
-        for trait in results:
-            trait_data[trait[0]] = self.retrieve_sample_data(trait[0])
-        return trait_data
-
     def get_trait_info(self, trait_list, species=None):
         for this_trait in trait_list:
             if not this_trait.haveinfo:
                 this_trait.retrieveInfo()
 
-            #XZ: trait_location_value is used for sorting
-            trait_location_repr = 'N/A'
-            trait_location_value = 1000000
-
             if this_trait.chr and this_trait.mb:
-                try:
-                    trait_location_value = int(this_trait.chr)*1000 + this_trait.mb
-                except:
-                    if this_trait.chr.upper() == 'X':
-                        trait_location_value = 20*1000 + this_trait.mb
-                    else:
-                        trait_location_value = ord(str(this_trait.chr).upper()[0])*1000 + this_trait.mb
-
                 this_trait.location_repr = 'Chr%s: %.6f' % (this_trait.chr, float(this_trait.mb) )
-                this_trait.location_value = trait_location_value
 
     def retrieve_sample_data(self, trait):
         query = """
@@ -989,20 +944,6 @@ class MrnaAssayDataSet(DataSet):
     def check_confidentiality(self):
         return geno_mrna_confidentiality(self)
 
-    def get_trait_list_1(self):
-        query = """
-            select ProbeSet.Name
-            from ProbeSet, ProbeSetXRef
-            where ProbeSetXRef.ProbeSetId = ProbeSet.Id
-            and ProbeSetFreezeId = {}
-            """.format(escape(str(self.id)))
-        logger.sql(query)
-        results = g.db.execute(query).fetchall()
-        trait_data = {}
-        for trait in results:
-            trait_data[trait[0]] = self.retrieve_sample_data(trait[0])
-        return trait_data
-
     def get_trait_info(self, trait_list=None, species=''):
 
         #  Note: setting trait_list to [] is probably not a great idea.
@@ -1034,27 +975,8 @@ class MrnaAssayDataSet(DataSet):
             # Save it for the jinja2 template
             this_trait.description_display = description_display
 
-            #XZ: trait_location_value is used for sorting
-            trait_location_repr = 'N/A'
-            trait_location_value = 1000000
-
             if this_trait.chr and this_trait.mb:
-                #Checks if the chromosome number can be cast to an int (i.e. isn't "X" or "Y")
-                #This is so we can convert the location to a number used for sorting
-                trait_location_value = self.convert_location_to_value(this_trait.chr, this_trait.mb)
-                #try:
-                #    trait_location_value = int(this_trait.chr)*1000 + this_trait.mb
-                #except ValueError:
-                #    if this_trait.chr.upper() == 'X':
-                #        trait_location_value = 20*1000 + this_trait.mb
-                #    else:
-                #        trait_location_value = (ord(str(this_trait.chr).upper()[0])*1000 +
-                #                               this_trait.mb)
-
-                #ZS: Put this in function currently called "convert_location_to_value"
-                this_trait.location_repr = 'Chr%s: %.6f' % (this_trait.chr,
-                                                                float(this_trait.mb))
-                this_trait.location_value = trait_location_value
+                this_trait.location_repr = 'Chr%s: %.6f' % (this_trait.chr, float(this_trait.mb))
 
             #Get mean expression value
             query = (
@@ -1076,9 +998,7 @@ class MrnaAssayDataSet(DataSet):
 
             #LRS and its location
             this_trait.LRS_score_repr = 'N/A'
-            this_trait.LRS_score_value = 0
             this_trait.LRS_location_repr = 'N/A'
-            this_trait.LRS_location_value = 1000000
 
             #Max LRS and its Locus location
             if this_trait.lrs and this_trait.locus:
@@ -1093,40 +1013,10 @@ class MrnaAssayDataSet(DataSet):
 
                 if result:
                     lrs_chr, lrs_mb = result
-                    #XZ: LRS_location_value is used for sorting
-                    lrs_location_value = self.convert_location_to_value(lrs_chr, lrs_mb)
                     this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs
-                    this_trait.LRS_score_value = this_trait.lrs
                     this_trait.LRS_location_repr = 'Chr%s: %.6f' % (lrs_chr, float(lrs_mb))
 
-
-    def convert_location_to_value(self, chromosome, mb):
-        try:
-            location_value = int(chromosome)*1000 + float(mb)
-        except ValueError:
-            if chromosome.upper() == 'X':
-                location_value = 20*1000 + float(mb)
-            else:
-                location_value = (ord(str(chromosome).upper()[0])*1000 +
-                                  float(mb))
-
-        return location_value
-
-    def get_sequence(self):
-        query = """
-                    SELECT
-                            ProbeSet.BlatSeq
-                    FROM
-                            ProbeSet, ProbeSetFreeze, ProbeSetXRef
-                    WHERE
-                            ProbeSet.Id=ProbeSetXRef.ProbeSetId and
-                            ProbeSetFreeze.Id = ProbeSetXRef.ProbSetFreezeId and
-                            ProbeSet.Name = %s
-                            ProbeSetFreeze.Name = %s
-                """ % (escape(self.name), escape(self.dataset.name))
-        logger.sql(query)
-        results = g.db.execute(query).fetchone()
-        return results[0]
+        return trait_list
 
     def retrieve_sample_data(self, trait):
         query = """
@@ -1150,7 +1040,6 @@ class MrnaAssayDataSet(DataSet):
         #logger.debug("RETRIEVED RESULTS HERE:", results)
         return results
 
-
     def retrieve_genes(self, column_name):
         query = """
                     select ProbeSet.Name, ProbeSet.%s
@@ -1204,19 +1093,6 @@ class TempDataSet(DataSet):
         desc = self.handle_pca(desc)
         return desc
 
-    def get_group(self):
-        query = """
-                    SELECT
-                            InbredSet.Name, InbredSet.Id
-                    FROM
-                            InbredSet, Temp
-                    WHERE
-                            Temp.InbredSetId = InbredSet.Id AND
-                            Temp.Name = "%s"
-            """ % self.name
-        logger.sql(query)
-        self.group, self.group_id = g.db.execute(query).fetchone()
-
     def retrieve_sample_data(self, trait):
         query = """
                 SELECT
author	zsloan	2018-08-14 20:16:32 +0000
committer	zsloan	2018-08-14 20:16:32 +0000
commit	838362c116b02c090dadeb76cda27e9902a6626a (patch)
tree	a6be104cc73e3bc9e271f9b5ca854dd32f3b810d /wqflask/base/data_set.py
parent	0bead53661ea701ffd9f9d565e4d2ecbbed81a8e (diff)
parent	85defabb17ecdef1c7b8e92fa2e06b44d1e9ca49 (diff)
download	genenetwork2-838362c116b02c090dadeb76cda27e9902a6626a.tar.gz