1 files changed, 139 insertions, 98 deletions
diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
index 457eb902..e5e94c7e 100644
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -19,20 +19,22 @@ from utility import helper_functions
 from utility.authentication_tools import check_owner_or_admin
 from utility.tools import locate_ignore_error
 from utility.redis_tools import get_redis_conn, get_resource_id
+from utility.logger import getLogger
+
+
 Redis = get_redis_conn()
 ONE_YEAR = 60 * 60 * 24 * 365
 
-
-from utility.logger import getLogger
-logger = getLogger(__name__ )
+logger = getLogger(__name__)
 
 ###############################################
 #
-# Todo: Put in security to ensure that user has permission to access confidential data sets
-# And add i.p.limiting as necessary
+# Todo: Put in security to ensure that user has permission to access
+# confidential data sets And add i.p.limiting as necessary
 #
 ##############################################
 
+
 class ShowTrait(object):
 
     def __init__(self, kw):
@@ -41,13 +43,16 @@ class ShowTrait(object):
             self.trait_id = kw['trait_id']
             helper_functions.get_species_dataset_trait(self, kw)
             self.resource_id = get_resource_id(self.dataset, self.trait_id)
-            self.admin_status = check_owner_or_admin(resource_id=self.resource_id)
+            self.admin_status = check_owner_or_admin(
+                resource_id=self.resource_id)
         elif 'group' in kw:
             self.temp_trait = True
-            self.trait_id = "Temp_"+kw['species']+ "_" + kw['group'] + "_" + datetime.datetime.now().strftime("%m%d%H%M%S")
+            self.trait_id = "Temp_"+kw['species'] + "_" + kw['group'] + \
+                "_" + datetime.datetime.now().strftime("%m%d%H%M%S")
             self.temp_species = kw['species']
             self.temp_group = kw['group']
-            self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = self.temp_group)
+            self.dataset = data_set.create_dataset(
+                dataset_name="Temp", dataset_type="Temp", group_name=self.temp_group)
 
             # Put values in Redis so they can be looked up later if added to a collection
             Redis.set(self.trait_id, kw['trait_paste'], ex=ONE_YEAR)
@@ -56,25 +61,28 @@ class ShowTrait(object):
                                            name=self.trait_id,
                                            cellid=None)
 
-            self.admin_status = check_owner_or_admin(dataset=self.dataset, trait_id=self.trait_id)
+            self.admin_status = check_owner_or_admin(
+                dataset=self.dataset, trait_id=self.trait_id)
         else:
             self.temp_trait = True
             self.trait_id = kw['trait_id']
             self.temp_species = self.trait_id.split("_")[1]
             self.temp_group = self.trait_id.split("_")[2]
-            self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = self.temp_group)
+            self.dataset = data_set.create_dataset(
+                dataset_name="Temp", dataset_type="Temp", group_name=self.temp_group)
             self.this_trait = create_trait(dataset=self.dataset,
                                            name=self.trait_id,
                                            cellid=None)
 
             self.trait_vals = Redis.get(self.trait_id).split()
-            self.admin_status = check_owner_or_admin(dataset=self.dataset, trait_id=self.trait_id)
+            self.admin_status = check_owner_or_admin(
+                dataset=self.dataset, trait_id=self.trait_id)
 
-        #ZS: Get verify/rna-seq link URLs
+        # ZS: Get verify/rna-seq link URLs
         try:
             blatsequence = self.this_trait.blatseq
             if not blatsequence:
-                #XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead.
+                # XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead.
                 query1 = """SELECT Probe.Sequence, Probe.Name
                             FROM Probe, ProbeSet, ProbeSetFreeze, ProbeSetXRef
                             WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
@@ -91,10 +99,10 @@ class ShowTrait(object):
                         if int(seqt[1][-1]) % 2 == 1:
                             blatsequence += string.strip(seqt[0])
 
-            #--------Hongqiang add this part in order to not only blat ProbeSet, but also blat Probe
+            # --------Hongqiang add this part in order to not only blat ProbeSet, but also blat Probe
             blatsequence = '%3E' + self.this_trait.name + '%0A' + blatsequence + '%0A'
 
-            #XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead.
+            # XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead.
             query2 = """SELECT Probe.Sequence, Probe.Name
                         FROM Probe, ProbeSet, ProbeSetFreeze, ProbeSetXRef
                         WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
@@ -105,24 +113,29 @@ class ShowTrait(object):
 
             seqs = g.db.execute(query2).fetchall()
             for seqt in seqs:
-                if int(seqt[1][-1]) %2 == 1:
-                    blatsequence += '%3EProbe_' + seqt[1].strip() + '%0A' + seqt[0].strip() + '%0A'
+                if int(seqt[1][-1]) % 2 == 1:
+                    blatsequence += '%3EProbe_' + \
+                        seqt[1].strip() + '%0A' + seqt[0].strip() + '%0A'
 
             if self.dataset.group.species == "rat":
-                self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('rat', 'rn6', blatsequence)
+                self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % (
+                    'rat', 'rn6', blatsequence)
                 self.UTHSC_BLAT_URL = ""
             elif self.dataset.group.species == "mouse":
-                self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('mouse', 'mm10', blatsequence)
-                self.UTHSC_BLAT_URL = webqtlConfig.UTHSC_BLAT % ('mouse', 'mm10', blatsequence)
+                self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % (
+                    'mouse', 'mm10', blatsequence)
+                self.UTHSC_BLAT_URL = webqtlConfig.UTHSC_BLAT % (
+                    'mouse', 'mm10', blatsequence)
             elif self.dataset.group.species == "human":
-                self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('human', 'hg38', blatsequence)
+                self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % (
+                    'human', 'hg38', blatsequence)
                 self.UTHSC_BLAT_URL = ""
             else:
                 self.UCSC_BLAT_URL = ""
                 self.UTHSC_BLAT_URL = ""
         except:
-           self.UCSC_BLAT_URL = ""
-           self.UTHSC_BLAT_URL = ""
+            self.UCSC_BLAT_URL = ""
+            self.UTHSC_BLAT_URL = ""
 
         if self.dataset.type == "ProbeSet":
             self.show_probes = "True"
@@ -136,7 +149,8 @@ class ShowTrait(object):
         # Get nearest marker for composite mapping
         if not self.temp_trait:
             if check_if_attr_exists(self.this_trait, 'locus_chr') and self.dataset.type != "Geno" and self.dataset.type != "Publish":
-                self.nearest_marker = get_nearest_marker(self.this_trait, self.dataset)
+                self.nearest_marker = get_nearest_marker(
+                    self.this_trait, self.dataset)
             else:
                 self.nearest_marker = ""
 
@@ -159,22 +173,25 @@ class ShowTrait(object):
         categorical_var_list = []
         if not self.temp_trait:
             # ZS: Only using first samplelist, since I think mapping only uses those samples
-            categorical_var_list = get_categorical_variables(self.this_trait, self.sample_groups[0])
+            categorical_var_list = get_categorical_variables(
+                self.this_trait, self.sample_groups[0])
 
         # ZS: Get list of chromosomes to select for mapping
         self.chr_list = [["All", -1]]
         for i, this_chr in enumerate(self.dataset.species.chromosomes.chromosomes):
-            self.chr_list.append([self.dataset.species.chromosomes.chromosomes[this_chr].name, i])
+            self.chr_list.append(
+                [self.dataset.species.chromosomes.chromosomes[this_chr].name, i])
 
         self.genofiles = self.dataset.group.get_genofiles()
 
         # ZS: No need to grab scales from .geno file unless it's using
         # a mapping method that reads .geno files
-        if "QTLReaper" or "R/qtl" in dataset.group.mapping_names: 
+        if "QTLReaper" or "R/qtl" in dataset.group.mapping_names:
             if self.genofiles:
                 self.scales_in_geno = get_genotype_scales(self.genofiles)
             else:
-                self.scales_in_geno = get_genotype_scales(self.dataset.group.name + ".geno")
+                self.scales_in_geno = get_genotype_scales(
+                    self.dataset.group.name + ".geno")
         else:
             self.scales_in_geno = {}
 
@@ -211,7 +228,8 @@ class ShowTrait(object):
 
         sample_column_width = max_samplename_width * 8
 
-        self.stats_table_width, self.trait_table_width = get_table_widths(self.sample_groups, sample_column_width, self.has_num_cases)
+        self.stats_table_width, self.trait_table_width = get_table_widths(
+            self.sample_groups, sample_column_width, self.has_num_cases)
 
         if self.num_values >= 5000:
             self.maf = 0.01
@@ -242,9 +260,9 @@ class ShowTrait(object):
         hddn['dataset'] = self.dataset.name
         hddn['temp_trait'] = False
         if self.temp_trait:
-           hddn['temp_trait'] = True
-           hddn['group'] = self.temp_group
-           hddn['species'] = self.temp_species
+            hddn['temp_trait'] = True
+            hddn['group'] = self.temp_group
+            hddn['species'] = self.temp_species
         hddn['use_outliers'] = False
         hddn['method'] = "gemma"
         hddn['selected_chr'] = -1
@@ -264,41 +282,46 @@ class ShowTrait(object):
         hddn['export_data'] = ""
         hddn['export_format'] = "excel"
         if len(self.scales_in_geno) < 2:
-            hddn['mapping_scale'] = self.scales_in_geno[list(self.scales_in_geno.keys())[0]][0][0]
+            hddn['mapping_scale'] = self.scales_in_geno[list(
+                self.scales_in_geno.keys())[0]][0][0]
 
         # We'll need access to this_trait and hddn in the Jinja2
         # Template, so we put it inside self
         self.hddn = hddn
 
-        js_data = dict(trait_id = self.trait_id,
-                       trait_symbol = trait_symbol,
-                       short_description = short_description,
-                       unit_type = trait_units,
-                       dataset_type = self.dataset.type,
-                       species = self.dataset.group.species,
-                       scales_in_geno = self.scales_in_geno,
-                       data_scale = self.dataset.data_scale,
-                       sample_group_types = self.sample_group_types,
-                       sample_lists = sample_lists,
-                       se_exists = self.sample_groups[0].se_exists,
-                       has_num_cases = self.has_num_cases,
-                       attributes = self.sample_groups[0].attributes,
-                       categorical_attr_exists = self.categorical_attr_exists,
-                       categorical_vars = ",".join(categorical_var_list),
-                       num_values = self.num_values,
-                       qnorm_values = self.qnorm_vals,
-                       zscore_values = self.z_scores,
-                       sample_column_width = sample_column_width,
-                       temp_uuid = self.temp_uuid)
+        js_data = dict(trait_id=self.trait_id,
+                       trait_symbol=trait_symbol,
+                       short_description=short_description,
+                       unit_type=trait_units,
+                       dataset_type=self.dataset.type,
+                       species=self.dataset.group.species,
+                       scales_in_geno=self.scales_in_geno,
+                       data_scale=self.dataset.data_scale,
+                       sample_group_types=self.sample_group_types,
+                       sample_lists=sample_lists,
+                       se_exists=self.sample_groups[0].se_exists,
+                       has_num_cases=self.has_num_cases,
+                       attributes=self.sample_groups[0].attributes,
+                       categorical_attr_exists=self.categorical_attr_exists,
+                       categorical_vars=",".join(categorical_var_list),
+                       num_values=self.num_values,
+                       qnorm_values=self.qnorm_vals,
+                       zscore_values=self.z_scores,
+                       sample_column_width=sample_column_width,
+                       temp_uuid=self.temp_uuid)
         self.js_data = js_data
 
     def get_external_links(self):
         # ZS: There's some weirdness here because some fields don't
         # exist while others are empty strings
-        self.pubmed_link = webqtlConfig.PUBMEDLINK_URL % self.this_trait.pubmed_id if check_if_attr_exists(self.this_trait, 'pubmed_id') else None
-        self.ncbi_gene_link = webqtlConfig.NCBI_LOCUSID % self.this_trait.geneid if check_if_attr_exists(self.this_trait, 'geneid') else None
-        self.omim_link = webqtlConfig.OMIM_ID % self.this_trait.omim if check_if_attr_exists(self.this_trait, 'omim') else None
-        self.homologene_link = webqtlConfig.HOMOLOGENE_ID % self.this_trait.homologeneid if check_if_attr_exists(self.this_trait, 'homologeneid') else None
+        self.pubmed_link = webqtlConfig.PUBMEDLINK_URL % self.this_trait.pubmed_id if check_if_attr_exists(
+            self.this_trait, 'pubmed_id') else None
+        self.ncbi_gene_link = webqtlConfig.NCBI_LOCUSID % self.this_trait.geneid if check_if_attr_exists(
+            self.this_trait, 'geneid') else None
+        self.omim_link = webqtlConfig.OMIM_ID % self.this_trait.omim if check_if_attr_exists(
+            self.this_trait, 'omim') else None
+        self.homologene_link = webqtlConfig.HOMOLOGENE_ID % self.this_trait.homologeneid if check_if_attr_exists(
+            self.this_trait, 'homologeneid') else None
 
         self.genbank_link = None
         if check_if_attr_exists(self.this_trait, 'genbankid'):
@@ -322,11 +345,14 @@ class ShowTrait(object):
             self.protein_atlas_link = webqtlConfig.PROTEIN_ATLAS_URL % self.this_trait.symbol
 
             if self.dataset.group.species == "mouse" or self.dataset.group.species == "human":
-                self.rgd_link = webqtlConfig.RGD_URL % (self.this_trait.symbol, self.dataset.group.species.capitalize())
+                self.rgd_link = webqtlConfig.RGD_URL % (
+                    self.this_trait.symbol, self.dataset.group.species.capitalize())
                 if self.dataset.group.species == "mouse":
-                    self.genemania_link = webqtlConfig.GENEMANIA_URL % ("mus-musculus", self.this_trait.symbol)
+                    self.genemania_link = webqtlConfig.GENEMANIA_URL % (
+                        "mus-musculus", self.this_trait.symbol)
                 else:
-                    self.genemania_link = webqtlConfig.GENEMANIA_URL % ("homo-sapiens", self.this_trait.symbol)
+                    self.genemania_link = webqtlConfig.GENEMANIA_URL % (
+                        "homo-sapiens", self.this_trait.symbol)
 
                 if self.dataset.group.species == "mouse":
                     self.aba_link = webqtlConfig.ABA_URL % self.this_trait.symbol
@@ -344,12 +370,16 @@ class ShowTrait(object):
                     if chr and transcript_start and transcript_end and self.this_trait.refseq_transcriptid:
                         transcript_start = int(transcript_start*1000000)
                         transcript_end = int(transcript_end*1000000)
-                        self.ucsc_blat_link = webqtlConfig.UCSC_REFSEQ % ('mm10', self.this_trait.refseq_transcriptid, chr, transcript_start, transcript_end)
+                        self.ucsc_blat_link = webqtlConfig.UCSC_REFSEQ % (
+                            'mm10', self.this_trait.refseq_transcriptid, chr, transcript_start, transcript_end)
 
             if self.dataset.group.species == "rat":
-                self.rgd_link = webqtlConfig.RGD_URL % (self.this_trait.symbol, self.dataset.group.species.capitalize())
-                self.phenogen_link = webqtlConfig.PHENOGEN_URL % (self.this_trait.symbol)
-                self.genemania_link = webqtlConfig.GENEMANIA_URL % ("rattus-norvegicus", self.this_trait.symbol)
+                self.rgd_link = webqtlConfig.RGD_URL % (
+                    self.this_trait.symbol, self.dataset.group.species.capitalize())
+                self.phenogen_link = webqtlConfig.PHENOGEN_URL % (
+                    self.this_trait.symbol)
+                self.genemania_link = webqtlConfig.GENEMANIA_URL % (
+                    "rattus-norvegicus", self.this_trait.symbol)
 
                 query = """SELECT kgID, chromosome, txStart, txEnd
                         FROM GeneList_rn33
@@ -362,12 +392,15 @@ class ShowTrait(object):
                     kgId = chr = transcript_start = transcript_end = None
 
                 if chr and transcript_start and transcript_end and kgId:
-                    transcript_start = int(transcript_start*1000000) # Convert to bases from megabases
+                    # Convert to bases from megabases
+                    transcript_start = int(transcript_start*1000000)
                     transcript_end = int(transcript_end*1000000)
-                    self.ucsc_blat_link = webqtlConfig.UCSC_REFSEQ % ('rn6', kgId, chr, transcript_start, transcript_end)
+                    self.ucsc_blat_link = webqtlConfig.UCSC_REFSEQ % (
+                        'rn6', kgId, chr, transcript_start, transcript_end)
 
             if self.this_trait.geneid and (self.dataset.group.species == "mouse" or self.dataset.group.species == "rat" or self.dataset.group.species == "human"):
-                self.biogps_link = webqtlConfig.BIOGPS_URL % (self.dataset.group.species, self.this_trait.geneid)
+                self.biogps_link = webqtlConfig.BIOGPS_URL % (
+                    self.dataset.group.species, self.this_trait.geneid)
                 self.gemma_link = webqtlConfig.GEMMA_URL % self.this_trait.geneid
 
                 if self.dataset.group.species == "human":
@@ -388,40 +421,43 @@ class ShowTrait(object):
             if self.temp_trait == True:
                 dataset_menu = data_set.datasets(this_group)
             else:
-                dataset_menu = data_set.datasets(this_group, self.dataset.group)
+                dataset_menu = data_set.datasets(
+                    this_group, self.dataset.group)
             dataset_menu_selected = None
             if len(dataset_menu):
                 if self.dataset:
                     dataset_menu_selected = self.dataset.name
 
-            return_results_menu = (100, 200, 500, 1000, 2000, 5000, 10000, 15000, 20000)
+            return_results_menu = (100, 200, 500, 1000,
+                                   2000, 5000, 10000, 15000, 20000)
             return_results_menu_selected = 500
 
-            self.corr_tools = dict(dataset_menu = dataset_menu,
-                                          dataset_menu_selected = dataset_menu_selected,
-                                          return_results_menu = return_results_menu,
-                                          return_results_menu_selected = return_results_menu_selected,)
+            self.corr_tools = dict(dataset_menu=dataset_menu,
+                                   dataset_menu_selected=dataset_menu_selected,
+                                   return_results_menu=return_results_menu,
+                                   return_results_menu_selected=return_results_menu_selected,)
 
     def make_sample_lists(self):
 
         all_samples_ordered = self.dataset.group.all_samples_ordered()
-        
+
         parent_f1_samples = []
         if self.dataset.group.parlist and self.dataset.group.f1list:
             parent_f1_samples = self.dataset.group.parlist + self.dataset.group.f1list
 
         primary_sample_names = list(all_samples_ordered)
 
-
         if not self.temp_trait:
             other_sample_names = []
 
             for sample in list(self.this_trait.data.keys()):
                 if (self.this_trait.data[sample].name2 != self.this_trait.data[sample].name):
                     if ((self.this_trait.data[sample].name2 in primary_sample_names) and
-                        (self.this_trait.data[sample].name not in primary_sample_names)):
-                        primary_sample_names.append(self.this_trait.data[sample].name)
-                        primary_sample_names.remove(self.this_trait.data[sample].name2)
+                            (self.this_trait.data[sample].name not in primary_sample_names)):
+                        primary_sample_names.append(
+                            self.this_trait.data[sample].name)
+                        primary_sample_names.remove(
+                            self.this_trait.data[sample].name2)
 
                 all_samples_set = set(all_samples_ordered)
                 if sample not in all_samples_set:
@@ -440,34 +476,34 @@ class ShowTrait(object):
             else:
                 primary_header = "Samples"
 
-            primary_samples = SampleList(dataset = self.dataset,
-                                            sample_names=primary_sample_names,
-                                            this_trait=self.this_trait,
-                                            sample_group_type='primary',
-                                            header=primary_header)
+            primary_samples = SampleList(dataset=self.dataset,
+                                         sample_names=primary_sample_names,
+                                         this_trait=self.this_trait,
+                                         sample_group_type='primary',
+                                         header=primary_header)
 
             # if other_sample_names and self.dataset.group.species !=
             # "human" and self.dataset.group.name != "CFW":
             if len(other_sample_names) > 0:
-                other_sample_names.sort() #Sort other samples
+                other_sample_names.sort()  # Sort other samples
                 if parent_f1_samples:
                     other_sample_names = parent_f1_samples + other_sample_names
 
                 other_samples = SampleList(dataset=self.dataset,
-                                            sample_names=other_sample_names,
-                                            this_trait=self.this_trait,
-                                            sample_group_type='other',
-                                            header="Other")
+                                           sample_names=other_sample_names,
+                                           this_trait=self.this_trait,
+                                           sample_group_type='other',
+                                           header="Other")
 
                 self.sample_groups = (primary_samples, other_samples)
             else:
                 self.sample_groups = (primary_samples,)
         else:
-            primary_samples = SampleList(dataset = self.dataset,
-                                            sample_names=primary_sample_names,
-                                            this_trait=self.trait_vals,
-                                            sample_group_type='primary',
-                                            header="%s Only" % (self.dataset.group.name))
+            primary_samples = SampleList(dataset=self.dataset,
+                                         sample_names=primary_sample_names,
+                                         this_trait=self.trait_vals,
+                                         sample_group_type='primary',
+                                         header="%s Only" % (self.dataset.group.name))
             self.sample_groups = (primary_samples,)
 
         self.primary_sample_names = primary_sample_names
@@ -622,8 +658,10 @@ def get_ncbi_summary(this_trait):
         # ZS: Need to switch this try/except to something that checks
         # the output later
         try:
-            response = requests.get("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=gene&id=%s&retmode=json" % this_trait.geneid)
-            summary = json.loads(response.content)['result'][this_trait.geneid]['summary']
+            response = requests.get(
+                "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=gene&id=%s&retmode=json" % this_trait.geneid)
+            summary = json.loads(response.content)[
+                'result'][this_trait.geneid]['summary']
             return summary
         except:
             return None
@@ -639,13 +677,15 @@ def get_categorical_variables(this_trait, sample_list):
             attribute_vals = []
             for sample_name in list(this_trait.data.keys()):
                 if sample_list.attributes[attribute].name in this_trait.data[sample_name].extra_attributes:
-                    attribute_vals.append(this_trait.data[sample_name].extra_attributes[sample_list.attributes[attribute].name])
+                    attribute_vals.append(
+                        this_trait.data[sample_name].extra_attributes[sample_list.attributes[attribute].name])
                 else:
                     attribute_vals.append("N/A")
             num_distinct = len(set(attribute_vals))
 
             if num_distinct < 10:
-                categorical_var_list.append(sample_list.attributes[attribute].name)
+                categorical_var_list.append(
+                    sample_list.attributes[attribute].name)
 
     return categorical_var_list
 
@@ -655,7 +695,8 @@ def get_genotype_scales(genofiles):
     if isinstance(genofiles, list):
         for the_file in genofiles:
             file_location = the_file['location']
-            geno_scales[file_location] = get_scales_from_genofile(file_location)
+            geno_scales[file_location] = get_scales_from_genofile(
+                file_location)
     else:
         geno_scales[genofiles] = get_scales_from_genofile(genofiles)