Merge branch 'testing' of github.com:genenetwork/genenetwork2 into feature/remove_trait_creation_from_search

author: zsloan 2021-10-18 17:50:26 +0000
committer: zsloan 2021-10-18 17:50:26 +0000
commit: e36eaf0003a598bc5aa688803dd1b36c24a4c051 (patch)
tree: a59b7dadf02241575eb0774f97c6048e2425c053 /wqflask/base
parent: bd421438f1f0b4de913fa40cd49cfcda27e6b16f (diff)
parent: 04f3d13aceeaec2e52b94037d59f08ed6dc6a8bb (diff)
download: genenetwork2-e36eaf0003a598bc5aa688803dd1b36c24a4c051.tar.gz
7 files changed, 294 insertions, 252 deletions
diff --git a/wqflask/base/GeneralObject.py b/wqflask/base/GeneralObject.py
index 249195e2..ce8e60b8 100644
--- a/wqflask/base/GeneralObject.py
+++ b/wqflask/base/GeneralObject.py
@@ -62,5 +62,5 @@ class GeneralObject:
         return s
 
     def __eq__(self, other):
-        return (len(list(self.__dict__.keys())) ==
-                len(list(other.__dict__.keys())))
+        return (len(list(self.__dict__.keys()))
+                == len(list(other.__dict__.keys())))
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 178234fe..8906ab69 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -17,7 +17,10 @@
 # at rwilliams@uthsc.edu and xzhou15@uthsc.edu
 #
 # This module is used by GeneNetwork project (www.genenetwork.org)
-
+from dataclasses import dataclass
+from dataclasses import field
+from dataclasses import InitVar
+from typing import Optional, Dict
 from db.call import fetchall, fetchone, fetch1
 from utility.logger import getLogger
 from utility.tools import USE_GN_SERVER, USE_REDIS, flat_files, flat_file_exists, GN2_BASE_URL
@@ -59,7 +62,8 @@ logger = getLogger(__name__)
 DS_NAME_MAP = {}
 
 
-def create_dataset(dataset_name, dataset_type=None, get_samplelist=True, group_name=None):
+def create_dataset(dataset_name, dataset_type=None,
+                   get_samplelist=True, group_name=None):
     if dataset_name == "Temp":
         dataset_type = "Temp"
 
@@ -74,11 +78,10 @@ def create_dataset(dataset_name, dataset_type=None, get_samplelist=True, group_n
         return dataset_class(dataset_name, get_samplelist)
 
 
+@dataclass
 class DatasetType:
-
-    def __init__(self, redis_instance):
-        """Create a dictionary of samples where the value is set to Geno,
-Publish or ProbeSet. E.g.
+    """Create a dictionary of samples where the value is set to Geno,
+    Publish or ProbeSet. E.g.
 
         {'AD-cases-controls-MyersGeno': 'Geno',
          'AD-cases-controls-MyersPublish': 'Publish',
@@ -89,21 +92,28 @@ Publish or ProbeSet. E.g.
          'All Phenotypes': 'Publish',
          'B139_K_1206_M': 'ProbeSet',
          'B139_K_1206_R': 'ProbeSet' ...
-
+        }
         """
+    redis_instance: InitVar[Redis]
+    datasets: Optional[Dict] = field(init=False, default_factory=dict)
+    data: Optional[Dict] = field(init=False)
+
+    def __post_init__(self, redis_instance):
         self.redis_instance = redis_instance
-        self.datasets = {}
-        data = self.redis_instance.get("dataset_structure")
+        data = redis_instance.get("dataset_structure")
         if data:
             self.datasets = json.loads(data)
-        else:  # ZS: I don't think this should ever run unless Redis is emptied
+        else:
+            # ZS: I don't think this should ever run unless Redis is
+            # emptied
             try:
                 data = json.loads(requests.get(
-                    GN2_BASE_URL + "/api/v_pre1/gen_dropdown", timeout=5).content)
-                for species in data['datasets']:
-                    for group in data['datasets'][species]:
-                        for dataset_type in data['datasets'][species][group]:
-                            for dataset in data['datasets'][species][group][dataset_type]:
+                    GN2_BASE_URL + "/api/v_pre1/gen_dropdown",
+                    timeout=5).content)
+                for _species in data['datasets']:
+                    for group in data['datasets'][_species]:
+                        for dataset_type in data['datasets'][_species][group]:
+                            for dataset in data['datasets'][_species][group][dataset_type]:
                                 short_dataset_name = dataset[1]
                                 if dataset_type == "Phenotypes":
                                     new_type = "Publish"
@@ -112,15 +122,16 @@ Publish or ProbeSet. E.g.
                                 else:
                                     new_type = "ProbeSet"
                                 self.datasets[short_dataset_name] = new_type
-            except:
+            except Exception:  # Do nothing
                 pass
 
-            self.redis_instance.set("dataset_structure", json.dumps(self.datasets))
+            self.redis_instance.set("dataset_structure",
+                                    json.dumps(self.datasets))
+        self.data = data
 
     def set_dataset_key(self, t, name):
-        """If name is not in the object's dataset dictionary, set it, and update
-        dataset_structure in Redis
-
+        """If name is not in the object's dataset dictionary, set it, and
+        update dataset_structure in Redis
         args:
           t: Type of dataset structure which can be: 'mrna_expr', 'pheno',
              'other_pheno', 'geno'
@@ -128,19 +139,20 @@ Publish or ProbeSet. E.g.
 
         """
         sql_query_mapping = {
-            'mrna_expr': ("""SELECT ProbeSetFreeze.Id FROM """ +
-                          """ProbeSetFreeze WHERE ProbeSetFreeze.Name = "{}" """),
-            'pheno': ("""SELECT InfoFiles.GN_AccesionId """ +
-                      """FROM InfoFiles, PublishFreeze, InbredSet """ +
-                      """WHERE InbredSet.Name = '{}' AND """ +
-                      """PublishFreeze.InbredSetId = InbredSet.Id AND """ +
-                      """InfoFiles.InfoPageName = PublishFreeze.Name"""),
-            'other_pheno': ("""SELECT PublishFreeze.Name """ +
-                            """FROM PublishFreeze, InbredSet """ +
-                            """WHERE InbredSet.Name = '{}' AND """ +
-                            """PublishFreeze.InbredSetId = InbredSet.Id"""),
-            'geno':  ("""SELECT GenoFreeze.Id FROM GenoFreeze WHERE """ +
-                      """GenoFreeze.Name = "{}" """)
+            'mrna_expr': ("SELECT ProbeSetFreeze.Id FROM "
+                          "ProbeSetFreeze WHERE "
+                          "ProbeSetFreeze.Name = \"%s\" "),
+            'pheno': ("SELECT InfoFiles.GN_AccesionId "
+                      "FROM InfoFiles, PublishFreeze, InbredSet "
+                      "WHERE InbredSet.Name = '%s' AND "
+                      "PublishFreeze.InbredSetId = InbredSet.Id AND "
+                      "InfoFiles.InfoPageName = PublishFreeze.Name"),
+            'other_pheno': ("SELECT PublishFreeze.Name "
+                            "FROM PublishFreeze, InbredSet "
+                            "WHERE InbredSet.Name = '%s' AND "
+                            "PublishFreeze.InbredSetId = InbredSet.Id"),
+            'geno': ("SELECT GenoFreeze.Id FROM GenoFreeze WHERE "
+                     "GenoFreeze.Name = \"%s\" ")
         }
 
         dataset_name_mapping = {
@@ -154,22 +166,23 @@ Publish or ProbeSet. E.g.
         if t in ['pheno', 'other_pheno']:
             group_name = name.replace("Publish", "")
 
-        results = g.db.execute(sql_query_mapping[t].format(group_name)).fetchone()
+        results = g.db.execute(sql_query_mapping[t] % group_name).fetchone()
         if results:
             self.datasets[name] = dataset_name_mapping[t]
-            self.redis_instance.set("dataset_structure", json.dumps(self.datasets))
+            self.redis_instance.set(
+                "dataset_structure", json.dumps(self.datasets))
             return True
-
         return None
 
     def __call__(self, name):
-
         if name not in self.datasets:
             for t in ["mrna_expr", "pheno", "other_pheno", "geno"]:
-                # This has side-effects, with the end result being a truth-y value
+                # This has side-effects, with the end result being a
+                # truth-y value
                 if(self.set_dataset_key(t, name)):
                     break
-        return self.datasets.get(name, None)  # Return None if name has not been set
+        # Return None if name has not been set
+        return self.datasets.get(name, None)
 
 
 # Do the intensive work at startup one time only
@@ -204,12 +217,12 @@ def create_datasets_list():
 
         if USE_REDIS:
             r.set(key, pickle.dumps(datasets, pickle.HIGHEST_PROTOCOL))
-            r.expire(key, 60*60)
+            r.expire(key, 60 * 60)
 
     return datasets
 
 
-class Markers(object):
+class Markers:
     """Todo: Build in cacheing so it saves us reading the same file more than once"""
 
     def __init__(self, name):
@@ -228,7 +241,8 @@ class Markers(object):
             for line in bimbam_fh:
                 marker = {}
                 marker['name'] = line.split(delimiter)[0].rstrip()
-                marker['Mb'] = float(line.split(delimiter)[1].rstrip())/1000000
+                marker['Mb'] = float(line.split(delimiter)[
+                                     1].rstrip()) / 1000000
                 marker['chr'] = line.split(delimiter)[2].rstrip()
                 markers.append(marker)
 
@@ -262,10 +276,7 @@ class Markers(object):
         elif isinstance(p_values, dict):
             filtered_markers = []
             for marker in self.markers:
-                #logger.debug("marker[name]", marker['name'])
-                #logger.debug("p_values:", p_values)
                 if marker['name'] in p_values:
-                    #logger.debug("marker {} IS in p_values".format(i))
                     marker['p_value'] = p_values[marker['name']]
                     if math.isnan(marker['p_value']) or (marker['p_value'] <= 0):
                         marker['lod_score'] = 0
@@ -276,10 +287,6 @@ class Markers(object):
                         marker['lrs_value'] = - \
                             math.log10(marker['p_value']) * 4.61
                     filtered_markers.append(marker)
-                # else:
-                    #logger.debug("marker {} NOT in p_values".format(i))
-                    # self.markers.remove(marker)
-                    #del self.markers[i]
             self.markers = filtered_markers
 
 
@@ -290,7 +297,6 @@ class HumanMarkers(Markers):
         self.markers = []
         for line in marker_data_fh:
             splat = line.strip().split()
-            #logger.debug("splat:", splat)
             if len(specified_markers) > 0:
                 if splat[1] in specified_markers:
                     marker = {}
@@ -306,13 +312,11 @@ class HumanMarkers(Markers):
                 marker['Mb'] = float(splat[3]) / 1000000
             self.markers.append(marker)
 
-        #logger.debug("markers is: ", pf(self.markers))
-
     def add_pvalues(self, p_values):
         super(HumanMarkers, self).add_pvalues(p_values)
 
 
-class DatasetGroup(object):
+class DatasetGroup:
     """
     Each group has multiple datasets; each species has multiple groups.
 
@@ -365,8 +369,8 @@ class DatasetGroup(object):
     def get_markers(self):
         def check_plink_gemma():
             if flat_file_exists("mapping"):
-                MAPPING_PATH = flat_files("mapping")+"/"
-                if os.path.isfile(MAPPING_PATH+self.name+".bed"):
+                MAPPING_PATH = flat_files("mapping") + "/"
+                if os.path.isfile(MAPPING_PATH + self.name + ".bed"):
                     return True
             return False
 
@@ -392,6 +396,15 @@ class DatasetGroup(object):
         if maternal and paternal:
             self.parlist = [maternal, paternal]
 
+    def get_study_samplelists(self):
+        study_sample_file = locate_ignore_error(self.name + ".json", 'study_sample_lists')
+        try:
+            f = open(study_sample_file)
+        except:
+            return []
+        study_samples = json.load(f)
+        return study_samples
+
     def get_genofiles(self):
         jsonfile = "%s/%s.json" % (webqtlConfig.GENODIR, self.name)
         try:
@@ -412,7 +425,7 @@ class DatasetGroup(object):
         else:
             logger.debug("Cache not hit")
 
-            genotype_fn = locate_ignore_error(self.name+".geno", 'genotype')
+            genotype_fn = locate_ignore_error(self.name + ".geno", 'genotype')
             if genotype_fn:
                 self.samplelist = get_group_samplelists.get_samplelist(
                     "geno", genotype_fn)
@@ -421,7 +434,7 @@ class DatasetGroup(object):
 
             if USE_REDIS:
                 r.set(key, json.dumps(self.samplelist))
-                r.expire(key, 60*5)
+                r.expire(key, 60 * 5)
 
     def all_samples_ordered(self):
         result = []
@@ -434,7 +447,6 @@ class DatasetGroup(object):
         # genotype_1 is Dataset Object without parents and f1
         # genotype_2 is Dataset Object with parents and f1 (not for intercross)
 
-        #genotype_1 = reaper.Dataset()
 
         # reaper barfs on unicode filenames, so here we ensure it's a string
         if self.genofile:
@@ -520,7 +532,6 @@ def datasets(group_name, this_group=None):
                     break
 
             if tissue_already_exists:
-                #logger.debug("dataset_menu:", dataset_menu[i]['datasets'])
                 dataset_menu[i]['datasets'].append((dataset, dataset_short))
             else:
                 dataset_menu.append(dict(tissue=tissue_name,
@@ -528,7 +539,7 @@ def datasets(group_name, this_group=None):
 
     if USE_REDIS:
         r.set(key, pickle.dumps(dataset_menu, pickle.HIGHEST_PROTOCOL))
-        r.expire(key, 60*5)
+        r.expire(key, 60 * 5)
 
     if this_group != None:
         this_group._datasets = dataset_menu
@@ -537,7 +548,7 @@ def datasets(group_name, this_group=None):
         return dataset_menu
 
 
-class DataSet(object):
+class DataSet:
     """
     DataSet class defines a dataset in webqtl, can be either Microarray,
     Published phenotype, genotype, or user input dataset(temp)
@@ -553,6 +564,7 @@ class DataSet(object):
         self.fullname = None
         self.type = None
         self.data_scale = None  # ZS: For example log2
+        self.accession_id = None
 
         self.setup()
 
@@ -569,14 +581,16 @@ class DataSet(object):
             self.group.get_samplelist()
         self.species = species.TheSpecies(self)
 
-    def get_desc(self):
-        """Gets overridden later, at least for Temp...used by trait's get_given_name"""
-        return None
-
-    # Delete this eventually
-    @property
-    def riset():
-        Weve_Renamed_This_As_Group
+    def as_dict(self):
+        return {
+            'name': self.name,
+            'shortname': self.shortname,
+            'fullname': self.fullname,
+            'type': self.type,
+            'data_scale': self.data_scale,
+            'group': self.group.name,
+            'accession_id': self.accession_id
+        }
 
     def get_accession_id(self):
         if self.type == "Publish":
@@ -628,7 +642,7 @@ class DataSet(object):
     WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id
     AND ProbeFreeze.TissueId = Tissue.Id
     AND (ProbeSetFreeze.Name = '%s' OR ProbeSetFreeze.FullName = '%s' OR ProbeSetFreeze.ShortName = '%s')
-                """ % (query_args), "/dataset/"+self.name+".json",
+                """ % (query_args), "/dataset/" + self.name + ".json",
                     lambda r: (r["id"], r["name"], r["full_name"],
                                r["short_name"], r["data_scale"], r["tissue"])
                 )
@@ -651,6 +665,69 @@ class DataSet(object):
                 "Dataset {} is not yet available in GeneNetwork.".format(self.name))
             pass
 
+    def chunk_dataset(self, dataset, n):
+
+        results = {}
+
+        query = """
+                SELECT ProbeSetXRef.DataId,ProbeSet.Name
+                FROM ProbeSet, ProbeSetXRef, ProbeSetFreeze
+                WHERE ProbeSetFreeze.Name = '{}' AND
+                      ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
+                      ProbeSetXRef.ProbeSetId = ProbeSet.Id
+        """.format(self.name)
+
+        # should cache this
+
+        traits_name_dict = dict(g.db.execute(query).fetchall())
+
+        for i in range(0, len(dataset), n):
+            matrix = list(dataset[i:i + n])
+            trait_name = traits_name_dict[matrix[0][0]]
+
+            my_values = [value for (trait_name, strain, value) in matrix]
+            results[trait_name] = my_values
+        return results
+
+    def get_probeset_data(self, sample_list=None, trait_ids=None):
+
+        # improvement of get trait data--->>>
+        if sample_list:
+            self.samplelist = sample_list
+
+        else:
+            self.samplelist = self.group.samplelist
+
+        if self.group.parlist != None and self.group.f1list != None:
+            if (self.group.parlist + self.group.f1list) in self.samplelist:
+                self.samplelist += self.group.parlist + self.group.f1list
+
+        query = """
+            SELECT Strain.Name, Strain.Id FROM Strain, Species
+            WHERE Strain.Name IN {}
+            and Strain.SpeciesId=Species.Id
+            and Species.name = '{}'
+            """.format(create_in_clause(self.samplelist), *mescape(self.group.species))
+        results = dict(g.db.execute(query).fetchall())
+        sample_ids = [results[item] for item in self.samplelist]
+
+        sorted_samplelist = [strain_name for strain_name, strain_id in sorted(
+            results.items(), key=lambda item: item[1])]
+
+        query = """SELECT * from ProbeSetData
+                where StrainID in {}
+                and id in (SELECT ProbeSetXRef.DataId
+                FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze)
+                WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id
+                and ProbeSetFreeze.Name = '{}'
+                and ProbeSet.Id = ProbeSetXRef.ProbeSetId)""".format(create_in_clause(sample_ids), self.name)
+
+        query_results = list(g.db.execute(query).fetchall())
+        data_results = self.chunk_dataset(query_results, len(sample_ids))
+        self.samplelist = sorted_samplelist
+        self.trait_data = data_results
+        
+
     def get_trait_data(self, sample_list=None):
         if sample_list:
             self.samplelist = sample_list
@@ -667,7 +744,6 @@ class DataSet(object):
             and Strain.SpeciesId=Species.Id
             and Species.name = '{}'
             """.format(create_in_clause(self.samplelist), *mescape(self.group.species))
-        logger.sql(query)
         results = dict(g.db.execute(query).fetchall())
         sample_ids = [results[item] for item in self.samplelist]
 
@@ -735,9 +811,6 @@ class PhenotypeDataSet(DataSet):
     DS_NAME_MAP['Publish'] = 'PhenotypeDataSet'
 
     def setup(self):
-
-        #logger.debug("IS A PHENOTYPEDATASET")
-
         # Fields in the database table
         self.search_fields = ['Phenotype.Post_publication_description',
                               'Phenotype.Pre_publication_description',
@@ -841,7 +914,6 @@ class PhenotypeDataSet(DataSet):
                         Geno.Name = '%s' and
                         Geno.SpeciesId = Species.Id
                 """ % (species, this_trait.locus)
-                logger.sql(query)
                 result = g.db.execute(query).fetchone()
 
                 if result:
@@ -871,7 +943,6 @@ class PhenotypeDataSet(DataSet):
                     Order BY
                             Strain.Name
                     """
-        logger.sql(query)
         results = g.db.execute(query, (trait, self.id)).fetchall()
         return results
 
@@ -938,7 +1009,6 @@ class GenotypeDataSet(DataSet):
                     Order BY
                             Strain.Name
                     """
-        logger.sql(query)
         results = g.db.execute(query,
                                (webqtlDatabaseFunction.retrieve_species_id(self.group.name),
                                 trait, self.name)).fetchall()
@@ -1040,8 +1110,8 @@ class MrnaAssayDataSet(DataSet):
             else:
                 description_display = this_trait.symbol
 
-            if (len(description_display) > 1 and description_display != 'N/A' and
-                    len(target_string) > 1 and target_string != 'None'):
+            if (len(description_display) > 1 and description_display != 'N/A'
+                    and len(target_string) > 1 and target_string != 'None'):
                 description_display = description_display + '; ' + target_string.strip()
 
             # Save it for the jinja2 template
@@ -1059,9 +1129,6 @@ class MrnaAssayDataSet(DataSet):
                 ProbeSet.Name = '%s'
             """ % (escape(str(this_trait.dataset.id)),
                    escape(this_trait.name)))
-
-            #logger.debug("query is:", pf(query))
-            logger.sql(query)
             result = g.db.execute(query).fetchone()
 
             mean = result[0] if result else 0
@@ -1081,7 +1148,6 @@ class MrnaAssayDataSet(DataSet):
                         Geno.Name = '{}' and
                         Geno.SpeciesId = Species.Id
                 """.format(species, this_trait.locus)
-                logger.sql(query)
                 result = g.db.execute(query).fetchone()
 
                 if result:
@@ -1097,7 +1163,8 @@ class MrnaAssayDataSet(DataSet):
                     SELECT
                             Strain.Name, ProbeSetData.value, ProbeSetSE.error, NStrain.count, Strain.Name2
                     FROM
-                            (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef)
+                            (ProbeSetData, ProbeSetFreeze,
+                             Strain, ProbeSet, ProbeSetXRef)
                     left join ProbeSetSE on
                             (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
                     left join NStrain on
@@ -1112,9 +1179,7 @@ class MrnaAssayDataSet(DataSet):
                     Order BY
                             Strain.Name
                     """ % (escape(trait), escape(self.name))
-        logger.sql(query)
         results = g.db.execute(query).fetchall()
-        #logger.debug("RETRIEVED RESULTS HERE:", results)
         return results
 
     def retrieve_genes(self, column_name):
@@ -1124,7 +1189,6 @@ class MrnaAssayDataSet(DataSet):
                     where ProbeSetXRef.ProbeSetFreezeId = %s and
                     ProbeSetXRef.ProbeSetId=ProbeSet.Id;
                 """ % (column_name, escape(str(self.id)))
-        logger.sql(query)
         results = g.db.execute(query).fetchall()
 
         return dict(results)
@@ -1155,11 +1219,19 @@ class TempDataSet(DataSet):
 
 def geno_mrna_confidentiality(ob):
     dataset_table = ob.type + "Freeze"
-    #logger.debug("dataset_table [%s]: %s" % (type(dataset_table), dataset_table))
 
     query = '''SELECT Id, Name, FullName, confidentiality,
                         AuthorisedUsers FROM %s WHERE Name = "%s"''' % (dataset_table, ob.name)
-    logger.sql(query)
+    result = g.db.execute(query)
+
+    (dataset_id,
+     name,
+     full_name,
+     confidential,
+     authorized_users) = result.fetchall()[0]
+
+    if confidential:
+        return True
     result = g.db.execute(query)
 
     (dataset_id,
diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py
index f1929518..8f8e2b0a 100644
--- a/wqflask/base/mrna_assay_tissue_data.py
+++ b/wqflask/base/mrna_assay_tissue_data.py
@@ -6,12 +6,14 @@ from utility import db_tools
 from utility import Bunch
 
 from utility.db_tools import escape
+from gn3.db_utils import database_connector
 
 
 from utility.logger import getLogger
-logger = getLogger(__name__ )
+logger = getLogger(__name__)
 
-class MrnaAssayTissueData(object):
+
+class MrnaAssayTissueData:
 
     def __init__(self, gene_symbols=None):
         self.gene_symbols = gene_symbols
@@ -20,7 +22,7 @@ class MrnaAssayTissueData(object):
 
         self.data = collections.defaultdict(Bunch)
 
-        query =  '''select t.Symbol, t.GeneId, t.DataId, t.Chr, t.Mb, t.description, t.Probe_Target_Description
+        query = '''select t.Symbol, t.GeneId, t.DataId, t.Chr, t.Mb, t.description, t.Probe_Target_Description
                         from (
                         select Symbol, max(Mean) as maxmean
                         from TissueProbeSetXRef
@@ -31,29 +33,31 @@ class MrnaAssayTissueData(object):
         # Due to the limit size of TissueProbeSetFreezeId table in DB,
         # performance of inner join is acceptable.MrnaAssayTissueData(gene_symbols=symbol_list)
         if len(gene_symbols) == 0:
-            query +=  '''Symbol!='' and Symbol Is Not Null group by Symbol)
+            query += '''Symbol!='' and Symbol Is Not Null group by Symbol)
                 as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
                 and t.Mean = x.maxmean;
                     '''
         else:
             in_clause = db_tools.create_in_clause(gene_symbols)
 
-            #ZS: This was in the query, not sure why: http://docs.python.org/2/library/string.html?highlight=lower#string.lower
+            # ZS: This was in the query, not sure why: http://docs.python.org/2/library/string.html?highlight=lower#string.lower
             query += ''' Symbol in {} group by Symbol)
                 as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
                 and t.Mean = x.maxmean;
                     '''.format(in_clause)
 
-        results = g.db.execute(query).fetchall()
 
-        lower_symbols = []
+        # lower_symbols = []
+        lower_symbols = {}
         for gene_symbol in gene_symbols:
+            # lower_symbols[gene_symbol.lower()] = True
             if gene_symbol != None:
-                lower_symbols.append(gene_symbol.lower())
-
+                lower_symbols[gene_symbol.lower()] = True
+        results = list(g.db.execute(query).fetchall())
         for result in results:
             symbol = result[0]
-            if symbol.lower() in lower_symbols:
+            if symbol  is not None and lower_symbols.get(symbol.lower()):
+
                 symbol = symbol.lower()
 
                 self.data[symbol].gene_id = result.GeneId
@@ -64,16 +68,16 @@ class MrnaAssayTissueData(object):
                 self.data[symbol].probe_target_description = result.Probe_Target_Description
 
     ###########################################################################
-    #Input: cursor, symbolList (list), dataIdDict(Dict)
-    #output: symbolValuepairDict (dictionary):one dictionary of Symbol and Value Pair,
+    # Input: cursor, symbolList (list), dataIdDict(Dict)
+    # output: symbolValuepairDict (dictionary):one dictionary of Symbol and Value Pair,
     #        key is symbol, value is one list of expression values of one probeSet;
-    #function: get one dictionary whose key is gene symbol and value is tissue expression data (list type).
-    #Attention! All keys are lower case!
+    # function: get one dictionary whose key is gene symbol and value is tissue expression data (list type).
+    # Attention! All keys are lower case!
     ###########################################################################
 
     def get_symbol_values_pairs(self):
         id_list = [self.data[symbol].data_id for symbol in self.data]
-        
+
         symbol_values_dict = {}
 
         if len(id_list) > 0:
@@ -82,11 +86,13 @@ class MrnaAssayTissueData(object):
                        WHERE TissueProbeSetData.Id IN {} and
                              TissueProbeSetXRef.DataId = TissueProbeSetData.Id""".format(db_tools.create_in_clause(id_list))
 
+
             results = g.db.execute(query).fetchall()
             for result in results:
                 if result.Symbol.lower() not in symbol_values_dict:
                     symbol_values_dict[result.Symbol.lower()] = [result.value]
                 else:
-                    symbol_values_dict[result.Symbol.lower()].append(result.value)
+                    symbol_values_dict[result.Symbol.lower()].append(
+                        result.value)
 
         return symbol_values_dict
diff --git a/wqflask/base/species.py b/wqflask/base/species.py
index 2771d116..f303aabb 100644
--- a/wqflask/base/species.py
+++ b/wqflask/base/species.py
@@ -1,55 +1,66 @@
-import collections
+from collections import OrderedDict
+from dataclasses import dataclass
+from dataclasses import InitVar
+from typing import Optional, Dict
+from flask import g
 
-from flask import Flask, g
 
+@dataclass
+class TheSpecies:
+    """Data related to species."""
+    dataset: Optional[Dict] = None
+    species_name: Optional[str] = None
 
-from utility.logger import getLogger
-logger = getLogger(__name__ )
-
-class TheSpecies(object):
-    def __init__(self, dataset=None, species_name=None):
-        if species_name != None:
-            self.name = species_name
+    def __post_init__(self):
+        if self.species_name is not None:
+            self.name = self.species_name
             self.chromosomes = Chromosomes(species=self.name)
         else:
-            self.dataset = dataset
             self.chromosomes = Chromosomes(dataset=self.dataset)
 
-class IndChromosome(object):
-    def __init__(self, name, length):
-        self.name = name
-        self.length = length
+
+@dataclass
+class IndChromosome:
+    """Data related to IndChromosome"""
+    name: str
+    length: int
 
     @property
     def mb_length(self):
-        """Chromosome length in megabases"""
+        """Chromosome length in mega-bases"""
         return self.length / 1000000
 
-class Chromosomes(object):
-    def __init__(self, dataset=None, species=None):
-        self.chromosomes = collections.OrderedDict()
-        if species != None:
-            query = """
-                Select
-                        Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, Species
-                where
-                        Chr_Length.SpeciesId = Species.SpeciesId AND
-                        Species.Name = '%s'
-                Order by OrderId
-                """ % species.capitalize()
-        else:
+
+@dataclass
+class Chromosomes:
+    """Data related to a chromosome"""
+    dataset: InitVar[Dict] = None
+    species: Optional[str] = None
+
+    def __post_init__(self, dataset):
+        if self.species is None:
             self.dataset = dataset
 
-            query = """
-                Select
-                        Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, InbredSet
-                where
-                        Chr_Length.SpeciesId = InbredSet.SpeciesId AND
-                        InbredSet.Name = '%s'
-                Order by OrderId
-                """ % self.dataset.group.name
-        logger.sql(query)
+    @property
+    def chromosomes(self):
+        """Lazily fetch the chromosomes"""
+        chromosomes = OrderedDict()
+        if self.species is not None:
+            query = (
+                "SELECT Chr_Length.Name, Chr_Length.OrderId, Length "
+                "FROM Chr_Length, Species WHERE "
+                "Chr_Length.SpeciesId = Species.SpeciesId AND "
+                "Species.Name = "
+                "'%s' ORDER BY OrderId" % self.species.capitalize())
+        else:
+            query = (
+                "SELECT Chr_Length.Name, Chr_Length.OrderId, "
+                "Length FROM Chr_Length, InbredSet WHERE "
+                "Chr_Length.SpeciesId = InbredSet.SpeciesId AND "
+                "InbredSet.Name = "
+                "'%s' ORDER BY OrderId" % self.dataset.group.name)
         results = g.db.execute(query).fetchall()
-
         for item in results:
-            self.chromosomes[item.OrderId] = IndChromosome(item.Name, item.Length)
+            chromosomes[item.OrderId] = IndChromosome(
+                item.Name, item.Length)
+        return chromosomes
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index 41e2603c..96a09302 100644
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -27,11 +27,13 @@ def create_trait(**kw):
 
     assert bool(kw.get('name')), "Needs trait name"
 
-    if kw.get('dataset_name'):
+    if bool(kw.get('dataset')):
+        dataset = kw.get('dataset')
+    else:
         if kw.get('dataset_name') != "Temp":
             dataset = create_dataset(kw.get('dataset_name'))
-    else:
-        dataset = kw.get('dataset')
+        else:
+            dataset = create_dataset("Temp", group_name=kw.get('group_name'))
 
     if dataset.type == 'Publish':
         permissions = check_resource_availability(
@@ -51,7 +53,7 @@ def create_trait(**kw):
         return None
 
 
-class GeneralTrait(object):
+class GeneralTrait:
     """
     Trait class defines a trait in webqtl, can be either Microarray,
     Published phenotype, genotype, or user input trait
@@ -284,17 +286,19 @@ def get_sample_data():
         return None
 
 
-def jsonable(trait):
+def jsonable(trait, dataset=None):
     """Return a dict suitable for using as json
 
     Actual turning into json doesn't happen here though"""
 
-    dataset = create_dataset(dataset_name=trait.dataset.name,
-                             dataset_type=trait.dataset.type,
-                             group_name=trait.dataset.group.name)
+    if not dataset:
+        dataset = create_dataset(dataset_name=trait.dataset.name,
+                                dataset_type=trait.dataset.type,
+                                group_name=trait.dataset.group.name)
 
     if dataset.type == "ProbeSet":
         return dict(name=trait.name,
+                    view=trait.view,
                     symbol=trait.symbol,
                     dataset=dataset.name,
                     dataset_name=dataset.shortname,
@@ -308,103 +312,46 @@ def jsonable(trait):
     elif dataset.type == "Publish":
         if trait.pubmed_id:
             return dict(name=trait.name,
+                        view=trait.view,
                         dataset=dataset.name,
                         dataset_name=dataset.shortname,
                         description=trait.description_display,
                         abbreviation=trait.abbreviation,
                         authors=trait.authors,
+                        pubmed_id=trait.pubmed_id,
                         pubmed_text=trait.pubmed_text,
                         pubmed_link=trait.pubmed_link,
+                        mean=trait.mean,
                         lrs_score=trait.LRS_score_repr,
                         lrs_location=trait.LRS_location_repr,
                         additive=trait.additive
                         )
         else:
             return dict(name=trait.name,
+                        view=trait.view,
                         dataset=dataset.name,
                         dataset_name=dataset.shortname,
                         description=trait.description_display,
                         abbreviation=trait.abbreviation,
                         authors=trait.authors,
                         pubmed_text=trait.pubmed_text,
+                        mean=trait.mean,
                         lrs_score=trait.LRS_score_repr,
                         lrs_location=trait.LRS_location_repr,
                         additive=trait.additive
                         )
     elif dataset.type == "Geno":
         return dict(name=trait.name,
+                    view=trait.view,
                     dataset=dataset.name,
                     dataset_name=dataset.shortname,
                     location=trait.location_repr
                     )
-    else:
-        return dict()
-
-
-def jsonable_table_row(trait, dataset_name, index):
-    """Return a list suitable for json and intended to be displayed in a table
-
-    Actual turning into json doesn't happen here though"""
-
-    dataset = create_dataset(dataset_name)
-
-    if dataset.type == "ProbeSet":
-        if trait.mean == "":
-            mean = "N/A"
-        else:
-            mean = "%.3f" % round(float(trait.mean), 2)
-        if trait.additive == "":
-            additive = "N/A"
-        else:
-            additive = "%.3f" % round(float(trait.additive), 2)
-        return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
-                index,
-                '<a href="/show_trait?trait_id=' +
-                str(trait.name)+'&dataset='+dataset.name +
-                '">'+str(trait.name)+'</a>',
-                trait.symbol,
-                trait.description_display,
-                trait.location_repr,
-                mean,
-                trait.LRS_score_repr,
-                trait.LRS_location_repr,
-                additive]
-    elif dataset.type == "Publish":
-        if trait.additive == "":
-            additive = "N/A"
-        else:
-            additive = "%.2f" % round(float(trait.additive), 2)
-        if trait.pubmed_id:
-            return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
-                    index,
-                    '<a href="/show_trait?trait_id=' +
-                    str(trait.name)+'&dataset='+dataset.name +
-                    '">'+str(trait.name)+'</a>',
-                    trait.description_display,
-                    trait.authors,
-                    '<a href="' + trait.pubmed_link + '">' + trait.pubmed_text + '</href>',
-                    trait.LRS_score_repr,
-                    trait.LRS_location_repr,
-                    additive]
-        else:
-            return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
-                    index,
-                    '<a href="/show_trait?trait_id=' +
-                    str(trait.name)+'&dataset='+dataset.name +
-                    '">'+str(trait.name)+'</a>',
-                    trait.description_display,
-                    trait.authors,
-                    trait.pubmed_text,
-                    trait.LRS_score_repr,
-                    trait.LRS_location_repr,
-                    additive]
-    elif dataset.type == "Geno":
-        return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
-                index,
-                '<a href="/show_trait?trait_id=' +
-                str(trait.name)+'&dataset='+dataset.name +
-                '">'+str(trait.name)+'</a>',
-                trait.location_repr]
+    elif dataset.name == "Temp":
+        return dict(name=trait.name,
+                    view=trait.view,
+                    dataset="Temp",
+                    dataset_name="Temp")
     else:
         return dict()
 
@@ -516,10 +463,11 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False):
             # If the dataset is confidential and the user has access to confidential
             # phenotype traits, then display the pre-publication description instead
             # of the post-publication description
-            trait.description_display = ""
+            trait.description_display = "N/A"
             if not trait.pubmed_id:
                 trait.abbreviation = trait.pre_publication_abbreviation
-                trait.description_display = trait.pre_publication_description
+                if trait.pre_publication_description:
+                    trait.description_display = trait.pre_publication_description
             else:
                 trait.abbreviation = trait.post_publication_abbreviation
                 if description:
@@ -542,9 +490,9 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False):
             else:
                 description_display = trait.symbol
 
-            if (str(description_display or "") != "" and
-                description_display != 'N/A' and
-                    str(target_string or "") != "" and target_string != 'None'):
+            if (str(description_display or "") != ""
+                and description_display != 'N/A'
+                    and str(target_string or "") != "" and target_string != 'None'):
                 description_display = description_display + '; ' + target_string.strip()
 
             # Save it for the jinja2 template
@@ -638,6 +586,6 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False):
                 if str(trait.lrs or "") != "":
                     trait.LRS_score_repr = LRS_score_repr = '%3.1f' % trait.lrs
     else:
-        raise KeyError(repr(trait.name) +
-                       ' information is not found in the database.')
+        raise KeyError(repr(trait.name)
+                       + ' information is not found in the database.')
     return trait
diff --git a/wqflask/base/webqtlCaseData.py b/wqflask/base/webqtlCaseData.py
index aa55470f..25b6cb8a 100644
--- a/wqflask/base/webqtlCaseData.py
+++ b/wqflask/base/webqtlCaseData.py
@@ -28,17 +28,20 @@ import utility.tools
 
 utility.tools.show_settings()
 
+
 class webqtlCaseData:
     """one case data in one trait"""
 
     def __init__(self, name, value=None, variance=None, num_cases=None, name2=None):
         self.name = name
-        self.name2 = name2                  # Other name (for traits like BXD65a)
+        # Other name (for traits like BXD65a)
+        self.name2 = name2
         self.value = value                  # Trait Value
         self.variance = variance            # Trait Variance
         self.num_cases = num_cases          # Number of individuals/cases
         self.extra_attributes = None
-        self.this_id = None   # Set a sane default (can't be just "id" cause that's a reserved word)
+        # Set a sane default (can't be just "id" cause that's a reserved word)
+        self.this_id = None
         self.outlier = None   # Not set to True/False until later
 
     def __repr__(self):
@@ -78,4 +81,4 @@ class webqtlCaseData:
     def display_num_cases(self):
         if self.num_cases is not None:
             return "%s" % self.num_cases
-        return "x"
-\ No newline at end of file
+        return "x"
diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py
index aee8616a..39947158 100644
--- a/wqflask/base/webqtlConfig.py
+++ b/wqflask/base/webqtlConfig.py
@@ -1,4 +1,4 @@
-#########################################'
+# '
 #      Environment Variables - public
 #
 # Note: much of this needs to handled by the settings/environment
@@ -10,35 +10,35 @@
 
 from utility.tools import valid_path, mk_dir, assert_dir, assert_writable_dir, flat_files, TEMPDIR
 
-#Debug Level
-#1 for debug, mod python will reload import each time
+# Debug Level
+# 1 for debug, mod python will reload import each time
 DEBUG = 1
 
-#USER privilege
-USERDICT = {'guest':1,'user':2, 'admin':3, 'root':4}
+# USER privilege
+USERDICT = {'guest': 1, 'user': 2, 'admin': 3, 'root': 4}
 
-#Set privileges
+# Set privileges
 SUPER_PRIVILEGES = {'data': 'edit', 'metadata': 'edit', 'admin': 'edit-admins'}
 DEFAULT_PRIVILEGES = {'data': 'view', 'metadata': 'view', 'admin': 'not-admin'}
 
-#minimum number of informative strains
+# minimum number of informative strains
 KMININFORMATIVE = 5
 
-#Daily download limit from one IP
+# Daily download limit from one IP
 DAILYMAXIMUM = 1000
 
-#maximum LRS value
+# maximum LRS value
 MAXLRS = 460.0
 
-#MINIMUM Database public value
+# MINIMUM Database public value
 PUBLICTHRESH = 0
 
-#EXTERNAL LINK ADDRESSES
+# EXTERNAL LINK ADDRESSES
 PUBMEDLINK_URL = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=%s&dopt=Abstract"
 UCSC_BLAT = 'http://genome.ucsc.edu/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s'
 UTHSC_BLAT = 'http://ucscbrowser.genenetwork.org/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s'
 UTHSC_BLAT2 = 'http://ucscbrowserbeta.genenetwork.org/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s'
-GENOMEBROWSER_URL="https://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=%s"
+GENOMEBROWSER_URL = "https://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=%s"
 NCBI_LOCUSID = "http://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&dopt=Graphics&list_uids=%s"
 GENBANK_ID = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Nucleotide&cmd=search&doptcmdl=DocSum&term=%s"
 OMIM_ID = "http://www.ncbi.nlm.nih.gov/omim/%s"
@@ -56,26 +56,28 @@ GEMMA_URL = "http://www.chibi.ubc.ca/Gemma/gene/showGene.html?ncbiid=%s"
 ABA_URL = "http://mouse.brain-map.org/search/show?search_type=gene&search_term=%s"
 EBIGWAS_URL = "https://www.ebi.ac.uk/gwas/search?query=%s"
 WIKI_PI_URL = "http://severus.dbmi.pitt.edu/wiki-pi/index.php/search?q=%s"
-ENSEMBLETRANSCRIPT_URL="http://useast.ensembl.org/Mus_musculus/Transcript/Idhistory?t=%s"
+ENSEMBLETRANSCRIPT_URL = "http://useast.ensembl.org/Mus_musculus/Transcript/Idhistory?t=%s"
 DBSNP = 'http://ensembl.org/Mus_musculus/Variation/Population?v=%s'
 PROTEIN_ATLAS_URL = "http://www.proteinatlas.org/search/%s"
 OPEN_TARGETS_URL = "https://genetics.opentargets.org/gene/%s"
 UNIPROT_URL = "https://www.uniprot.org/uniprot/%s"
 RGD_URL = "https://rgd.mcw.edu/rgdweb/elasticResults.html?term=%s&category=Gene&species=%s"
 PHENOGEN_URL = "https://phenogen.org/gene.jsp?speciesCB=Rn&auto=Y&geneTxt=%s&genomeVer=rn6&section=geneEQTL"
+RRID_MOUSE_URL = "https://www.jax.org/strain/%s"
+RRID_RAT_URL = "https://rgd.mcw.edu/rgdweb/report/strain/main.html?id=%s"
 
 # Temporary storage (note that this TMPDIR can be set as an
 # environment variable - use utility.tools.TEMPDIR when you
 # want to reach this base dir
 assert_writable_dir(TEMPDIR)
 
-TMPDIR               = mk_dir(TEMPDIR+'/gn2/')
+TMPDIR = mk_dir(TEMPDIR + '/gn2/')
 assert_writable_dir(TMPDIR)
 
-CACHEDIR             = mk_dir(TMPDIR+'/cache/')
+CACHEDIR = mk_dir(TMPDIR + '/cache/')
 # We can no longer write into the git tree:
-GENERATED_IMAGE_DIR  = mk_dir(TMPDIR+'generated/')
-GENERATED_TEXT_DIR   = mk_dir(TMPDIR+'generated_text/')
+GENERATED_IMAGE_DIR = mk_dir(TMPDIR + 'generated/')
+GENERATED_TEXT_DIR = mk_dir(TMPDIR + 'generated_text/')
 
 # Make sure we have permissions to access these
 assert_writable_dir(CACHEDIR)
@@ -83,12 +85,12 @@ assert_writable_dir(GENERATED_IMAGE_DIR)
 assert_writable_dir(GENERATED_TEXT_DIR)
 
 # Flat file directories
-GENODIR              = flat_files('genotype')+'/'
+GENODIR = flat_files('genotype') + '/'
 assert_dir(GENODIR)
 # assert_dir(GENODIR+'bimbam') # for gemma
 
 # JSON genotypes are OBSOLETE
-JSON_GENODIR         = flat_files('genotype/json')+'/'
+JSON_GENODIR = flat_files('genotype/json') + '/'
 if not valid_path(JSON_GENODIR):
     # fall back on old location (move the dir, FIXME)
     JSON_GENODIR = flat_files('json')
@@ -96,4 +98,4 @@ if not valid_path(JSON_GENODIR):
 # Are we using the following...?
 PORTADDR = "http://50.16.251.170"
 INFOPAGEHREF = '/dbdoc/%s.html'
-CGIDIR = '/webqtl/' #XZ: The variable name 'CGIDIR' should be changed to 'PYTHONDIR'
+CGIDIR = '/webqtl/'  # XZ: The variable name 'CGIDIR' should be changed to 'PYTHONDIR'
author	zsloan	2021-10-18 17:50:26 +0000
committer	zsloan	2021-10-18 17:50:26 +0000
commit	e36eaf0003a598bc5aa688803dd1b36c24a4c051 (patch)
tree	a59b7dadf02241575eb0774f97c6048e2425c053 /wqflask/base
parent	bd421438f1f0b4de913fa40cd49cfcda27e6b16f (diff)
parent	04f3d13aceeaec2e52b94037d59f08ed6dc6a8bb (diff)
download	genenetwork2-e36eaf0003a598bc5aa688803dd1b36c24a4c051.tar.gz