about summary refs log tree commit diff
path: root/wqflask
diff options
context:
space:
mode:
Diffstat (limited to 'wqflask')
-rwxr-xr-xwqflask/base/data_set.py228
1 files changed, 0 insertions, 228 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 8b2a9f01..52ac95f0 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -58,25 +58,13 @@ DS_NAME_MAP = {}
 def create_dataset(dataset_name, dataset_type = None, get_samplelist = True):
     if not dataset_type:
         dataset_type = Dataset_Getter(dataset_name)
-        #dataset_type = get_dataset_type_from_json(dataset_name)
 
         print("dataset_type is:", dataset_type)
-        #query = """
-        #    SELECT DBType.Name
-        #    FROM DBList, DBType
-        #    WHERE DBList.Name = '{}' and
-        #          DBType.Id = DBList.DBTypeId
-        #    """.format(escape(dataset_name))
-        #dataset_type = g.db.execute(query).fetchone().Name
-
 
     dataset_ob = DS_NAME_MAP[dataset_type]
     dataset_class = globals()[dataset_ob]
     return dataset_class(dataset_name, get_samplelist)
 
-
-#def get_dataset_type_from_json(dataset_name):
-
 class Dataset_Types(object):
 
     def __init__(self):
@@ -90,8 +78,6 @@ class Dataset_Types(object):
             for group in data['datasets'][species]:
                 for dataset_type in data['datasets'][species][group]:
                     for dataset in data['datasets'][species][group][dataset_type]:
-                        #print("dataset is:", dataset)
-
                         short_dataset_name = dataset[1]
                         if dataset_type == "Phenotypes":
                             new_type = "Publish"
@@ -107,10 +93,6 @@ class Dataset_Types(object):
 # Do the intensive work at startup one time only
 Dataset_Getter = Dataset_Types()
 
-#
-#print("Running at startup:", get_dataset_type_from_json("HBTRC-MLPFC_0611"))
-
-
 def create_datasets_list():
     key = "all_datasets"
     result = Redis.get(key)
@@ -215,13 +197,6 @@ class Markers(object):
                     #del self.markers[i]
             self.markers = filtered_markers
 
-
-        #for i, marker in enumerate(self.markers):
-        #    if not 'p_value' in marker:
-        #        #print("self.markers[i]", self.markers[i])
-        #        del self.markers[i]
-        #        #self.markers.remove(self.markers[i])
-
 class HumanMarkers(Markers):
 
     def __init__(self, name, specified_markers = []):
@@ -249,26 +224,8 @@ class HumanMarkers(Markers):
 
 
     def add_pvalues(self, p_values):
-        #for marker, p_value in itertools.izip(self.markers, p_values):
-        #    if marker['Mb'] <= 0 and marker['chr'] == 0:
-        #        continue
-        #    marker['p_value'] = p_value
-        #    print("p_value is:", marker['p_value'])
-        #    marker['lod_score'] = -math.log10(marker['p_value'])
-        #    #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values
-        #    marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61
-
-        #print("p_values2:", pf(p_values))
         super(HumanMarkers, self).add_pvalues(p_values)
 
-        #with Bench("deleting markers"):
-        #    markers = []
-        #    for marker in self.markers:
-        #        if not marker['Mb'] <= 0 and not marker['chr'] == 0:
-        #            markers.append(marker)
-        #    self.markers = markers
-
-
 
 class DatasetGroup(object):
     """
@@ -311,12 +268,6 @@ class DatasetGroup(object):
     def datasets(self):
         key = "group_dataset_menu:v2:" + self.name
         print("key is2:", key)
-        #with Bench("Loading cache"):
-        #    result = Redis.get(key)
-        #if result:
-        #    self._datasets = pickle.loads(result)
-        #    return self._datasets
-
         dataset_menu = []
         print("[tape4] webqtlConfig.PUBLICTHRESH:", webqtlConfig.PUBLICTHRESH)
         print("[tape4] type webqtlConfig.PUBLICTHRESH:", type(webqtlConfig.PUBLICTHRESH))
@@ -425,11 +376,6 @@ class DatasetGroup(object):
 
     def read_genotype_file(self):
         '''Read genotype from .geno file instead of database'''
-        #if self.group == 'BXD300':
-        #    self.group = 'BXD'
-        #
-        #assert self.group, "self.group needs to be set"
-
         #genotype_1 is Dataset Object without parents and f1
         #genotype_2 is Dataset Object with parents and f1 (not for intercross)
 
@@ -446,40 +392,16 @@ class DatasetGroup(object):
 
         #determine default genotype object
         if self.incparentsf1 and genotype_1.type != "intercross":
-            #self.genotype = genotype_2
             genotype = genotype_2
         else:
             self.incparentsf1 = 0
-            #self.genotype = genotype_1
             genotype = genotype_1
 
-        #self.samplelist = list(self.genotype.prgy)
         self.samplelist = list(genotype.prgy)
 
         return genotype
 
 
-#class DataSets(object):
-#    """Builds a list of DataSets"""
-#
-#    def __init__(self):
-#        self.datasets = list()
-#
-
-
-        #query = """SELECT Name FROM ProbeSetFreeze
-        #           UNION
-        #           SELECT Name From PublishFreeze
-        #           UNION
-        #           SELECT Name From GenoFreeze"""
-        #
-        #for result in g.db.execute(query).fetchall():
-        #    dataset = DataSet(result.Name)
-        #    self.datasets.append(dataset)
-
-#ds = DataSets()
-#print("[orange] ds:", ds.datasets)
-
 class DataSet(object):
     """
     DataSet class defines a dataset in webqtl, can be either Microarray,
@@ -512,32 +434,11 @@ class DataSet(object):
         """Gets overridden later, at least for Temp...used by trait's get_given_name"""
         return None
 
-    #@staticmethod
-    #def get_by_trait_id(trait_id):
-    #    """Gets the dataset object given the trait id"""
-    #
-    #
-    #
-    #    name = g.db.execute(""" SELECT
-    #
-    #                        """)
-    #
-    #    return DataSet(name)
-
     # Delete this eventually
     @property
     def riset():
         Weve_Renamed_This_As_Group
 
-
-    #@property
-    #def group(self):
-    #    if not self._group:
-    #        self.get_group()
-    #
-    #    return self._group
-
-
     def retrieve_other_names(self):
         """
         If the data set name parameter is not found in the 'Name' field of the data set table,
@@ -609,21 +510,6 @@ class DataSet(object):
         number_chunks = int(math.ceil(len(sample_ids) / chunk_size))
         trait_sample_data = []
         for sample_ids_step in chunks.divide_into_chunks(sample_ids, number_chunks):
-
-        #XZ, 09/24/2008: build one temporary table that only contains the records associated with the input GeneId
-        #tempTable = None
-        #if GeneId and db.type == "ProbeSet":
-        #    if method == "3":
-        #        tempTable = self.getTempLiteratureTable(species=species,
-        #                                                input_species_geneid=GeneId,
-        #                                                returnNumber=returnNumber)
-        #
-        #    if method == "4" or method == "5":
-        #        tempTable = self.getTempTissueCorrTable(primaryTraitSymbol=GeneSymbol,
-        #                                        TissueProbeSetFreezeId=tissueProbeSetFreezeId,
-        #                                        method=method,
-        #                                        returnNumber=returnNumber)
-
             if self.type == "Publish":
                 dataset_type = "Phenotype"
             else:
@@ -1013,83 +899,11 @@ class MrnaAssayDataSet(DataSet):
             and ProbeSetFreezeId = {}
             """.format(escape(str(self.id)))
         results = g.db.execute(query).fetchall()
-        #print("After get_trait_list query")
         trait_data = {}
         for trait in results:
-            #print("Retrieving sample_data for ", trait[0])
             trait_data[trait[0]] = self.retrieve_sample_data(trait[0])
-        #print("After retrieve_sample_data")
         return trait_data
 
-    #def get_trait_data(self):
-    #    self.samplelist = self.group.samplelist + self.group.parlist + self.group.f1list
-    #    query = """
-    #        SELECT Strain.Name, Strain.Id FROM Strain, Species
-    #        WHERE Strain.Name IN {}
-    #        and Strain.SpeciesId=Species.Id
-    #        and Species.name = '{}'
-    #        """.format(create_in_clause(self.samplelist), *mescape(self.group.species))
-    #    results = dict(g.db.execute(query).fetchall())
-    #    sample_ids = [results[item] for item in self.samplelist]
-    #
-    #    # MySQL limits the number of tables that can be used in a join to 61,
-    #    # so we break the sample ids into smaller chunks
-    #    # Postgres doesn't have that limit, so we can get rid of this after we transition
-    #    chunk_size = 50
-    #    number_chunks = int(math.ceil(len(sample_ids) / chunk_size))
-    #    trait_sample_data = []
-    #    for sample_ids_step in chunks.divide_into_chunks(sample_ids, number_chunks):
-    #
-    #    #XZ, 09/24/2008: build one temporary table that only contains the records associated with the input GeneId
-    #    #tempTable = None
-    #    #if GeneId and db.type == "ProbeSet":
-    #    #    if method == "3":
-    #    #        tempTable = self.getTempLiteratureTable(species=species,
-    #    #                                                input_species_geneid=GeneId,
-    #    #                                                returnNumber=returnNumber)
-    #    #
-    #    #    if method == "4" or method == "5":
-    #    #        tempTable = self.getTempTissueCorrTable(primaryTraitSymbol=GeneSymbol,
-    #    #                                        TissueProbeSetFreezeId=tissueProbeSetFreezeId,
-    #    #                                        method=method,
-    #    #                                        returnNumber=returnNumber)
-    #
-    #        temp = ['T%s.value' % item for item in sample_ids_step]
-    #        query = "SELECT {}.Name,".format(escape(self.type))
-    #        data_start_pos = 1
-    #        query += string.join(temp, ', ')
-    #        query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(self.type,
-    #                                                                 self.type,
-    #                                                                 self.type))
-    #
-    #        for item in sample_ids_step:
-    #            query += """
-    #                    left join {}Data as T{} on T{}.Id = {}XRef.DataId
-    #                    and T{}.StrainId={}\n
-    #                    """.format(*mescape(self.type, item, item, self.type, item, item))
-    #
-    #        query += """
-    #                WHERE {}XRef.{}FreezeId = {}Freeze.Id
-    #                and {}Freeze.Name = '{}'
-    #                and {}.Id = {}XRef.{}Id
-    #                order by {}.Id
-    #                """.format(*mescape(self.type, self.type, self.type, self.type,
-    #                           self.name, self.type, self.type, self.type, self.type))
-    #        results = g.db.execute(query).fetchall()
-    #        trait_sample_data.append(results)
-    #
-    #    trait_count = len(trait_sample_data[0])
-    #    self.trait_data = collections.defaultdict(list)
-    #
-    #    # put all of the separate data together into a dictionary where the keys are
-    #    # trait names and values are lists of sample values
-    #    for trait_counter in range(trait_count):
-    #        trait_name = trait_sample_data[0][trait_counter][0]
-    #        for chunk_counter in range(int(number_chunks)):
-    #            self.trait_data[trait_name] += (
-    #                trait_sample_data[chunk_counter][trait_counter][data_start_pos:])
-
-
     def get_trait_info(self, trait_list=None, species=''):
 
         #  Note: setting trait_list to [] is probably not a great idea.
@@ -1178,22 +992,9 @@ class MrnaAssayDataSet(DataSet):
                 result = g.db.execute(query).fetchone()
 
                 if result:
-                    #if result[0] and result[1]:
-                    #    lrs_chr = result[0]
-                    #    lrs_mb = result[1]
                     lrs_chr, lrs_mb = result
                     #XZ: LRS_location_value is used for sorting
                     lrs_location_value = self.convert_location_to_value(lrs_chr, lrs_mb)
-
-                    #try:
-                    #    lrs_location_value = int(lrs_chr)*1000 + float(lrs_mb)
-                    #except:
-                    #    if lrs_chr.upper() == 'X':
-                    #        lrs_location_value = 20*1000 + float(lrs_mb)
-                    #    else:
-                    #        lrs_location_value = (ord(str(LRS_chr).upper()[0])*1000 +
-                    #                              float(lrs_mb))
-
                     this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs
                     this_trait.LRS_score_value = this_trait.lrs
                     this_trait.LRS_location_repr = 'Chr%s: %.6f' % (lrs_chr, float(lrs_mb))
@@ -1259,35 +1060,6 @@ class MrnaAssayDataSet(DataSet):
 
         return dict(results)
 
-    #def retrieve_gene_symbols(self):
-    #    query = """
-    #                select ProbeSet.Name, ProbeSet.Symbol, ProbeSet.GeneId
-    #                from ProbeSet,ProbeSetXRef
-    #                where ProbeSetXRef.ProbeSetFreezeId = %s and
-    #                ProbeSetXRef.ProbeSetId=ProbeSet.Id;
-    #            """ % (self.id)
-    #    results = g.db.execute(query).fetchall()
-    #    symbol_dict = {}
-    #    for item in results:
-    #        symbol_dict[item[0]] = item[1]
-    #    return symbol_dict
-    #
-    #def retrieve_gene_ids(self):
-    #    query = """
-    #                select ProbeSet.Name, ProbeSet.GeneId
-    #                from ProbeSet,ProbeSetXRef
-    #                where ProbeSetXRef.ProbeSetFreezeId = %s and
-    #                ProbeSetXRef.ProbeSetId=ProbeSet.Id;
-    #            """ % (self.id)
-    #    return process_and_run_query(query)
-    #    results = g.db.execute(query).fetchall()
-    #    symbol_dict = {}
-    #    for item in results:
-    #        symbol_dict[item[0]] = item[1]
-    #    return symbol_dict
-
-
-
 
 class TempDataSet(DataSet):
     '''Temporary user-generated data set'''