Merge branch 'testing' of github.com:genenetwork/genenetwork2 into reaper_bootstrap_fix

author: zsloan 2020-10-29 14:35:09 -0500
committer: zsloan 2020-10-29 14:35:09 -0500
commit: 7c1c9e2a519ba662e9f293eea73eb7922b2160e4 (patch)
tree: f6d4db465d338c1433bbb126e911062a6c31748b /wqflask/base
parent: 5a1f69aa85809768577069ae63d92c9ef6aecc02 (diff)
parent: 6e6911b466c2727b16a190d8b714f55d7842d7e2 (diff)
download: genenetwork2-7c1c9e2a519ba662e9f293eea73eb7922b2160e4.tar.gz
6 files changed, 66 insertions, 114 deletions
diff --git a/wqflask/base/GeneralObject.py b/wqflask/base/GeneralObject.py
index 0fccaab3..0122ee32 100644
--- a/wqflask/base/GeneralObject.py
+++ b/wqflask/base/GeneralObject.py
@@ -33,7 +33,7 @@ class GeneralObject:
 
     def __init__(self, *args, **kw):
         self.contents = list(args)
-        for name, value in kw.items():
+        for name, value in list(kw.items()):
             setattr(self, name, value)
 
     def __setitem__(self, key, value):
@@ -50,16 +50,17 @@ class GeneralObject:
 
     def __str__(self):
         s = ''
-        for key in self.__dict__.keys():
+        for key in list(self.__dict__.keys()):
             if key != 'contents':
                 s += '%s = %s\n' % (key, self.__dict__[key])
         return s
 
     def __repr__(self):
         s = ''
-        for key in self.__dict__.keys():
+        for key in list(self.__dict__.keys()):
             s += '%s = %s\n' % (key, self.__dict__[key])
         return s
 
-    def __cmp__(self, other):
-        return len(self.__dict__.keys()).__cmp__(len(other.__dict__.keys()))
+    def __eq__(self, other):
+        return (len(list(self.__dict__.keys())) ==
+                len(list(other.__dict__.keys())))
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 2f1549ae..0d4ac24b 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -18,13 +18,14 @@
 #
 # This module is used by GeneNetwork project (www.genenetwork.org)
 
-from __future__ import absolute_import, print_function, division
 from db.call import fetchall, fetchone, fetch1
 from utility.logger import getLogger
 from utility.tools import USE_GN_SERVER, USE_REDIS, flat_files, flat_file_exists, GN2_BASE_URL
 from db.gn_server import menu_main
 from pprint import pformat as pf
-from MySQLdb import escape_string as escape
+from utility.db_tools import escape
+from utility.db_tools import mescape
+from utility.db_tools import create_in_clause
 from maintenance import get_group_samplelists
 from utility.tools import locate, locate_ignore_error, flat_files
 from utility import gen_geno_ob
@@ -34,7 +35,6 @@ from utility import webqtlUtil
 from db import webqtlDatabaseFunction
 from base import species
 from base import webqtlConfig
-import reaper
 from flask import Flask, g
 import os
 import math
@@ -45,7 +45,7 @@ import codecs
 import json
 import requests
 import gzip
-import cPickle as pickle
+import pickle as pickle
 import itertools
 
 from redis import Redis
@@ -209,20 +209,6 @@ def create_datasets_list():
     return datasets
 
 
-def create_in_clause(items):
-    """Create an in clause for mysql"""
-    in_clause = ', '.join("'{}'".format(x) for x in mescape(*items))
-    in_clause = '( {} )'.format(in_clause)
-    return in_clause
-
-
-def mescape(*items):
-    """Multiple escape"""
-    escaped = [escape(str(item)) for item in items]
-    #logger.debug("escaped is:", escaped)
-    return escaped
-
-
 class Markers(object):
     """Todo: Build in cacheing so it saves us reading the same file more than once"""
 
@@ -257,12 +243,12 @@ class Markers(object):
         logger.debug("length of self.markers:", len(self.markers))
         logger.debug("length of p_values:", len(p_values))
 
-        if type(p_values) is list:
+        if isinstance(p_values, list):
             # THIS IS only needed for the case when we are limiting the number of p-values calculated
             # if len(self.markers) > len(p_values):
             #    self.markers = self.markers[:len(p_values)]
 
-            for marker, p_value in itertools.izip(self.markers, p_values):
+            for marker, p_value in zip(self.markers, p_values):
                 if not p_value:
                     continue
                 marker['p_value'] = float(p_value)
@@ -273,7 +259,7 @@ class Markers(object):
                     marker['lod_score'] = -math.log10(marker['p_value'])
                     # Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values
                     marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61
-        elif type(p_values) is dict:
+        elif isinstance(p_values, dict):
             filtered_markers = []
             for marker in self.markers:
                 #logger.debug("marker[name]", marker['name'])
@@ -459,12 +445,7 @@ class DatasetGroup(object):
                 full_filename = str(locate(self.genofile, 'genotype'))
         else:
             full_filename = str(locate(self.name + '.geno', 'genotype'))
-
-        if use_reaper:
-            genotype_1 = reaper.Dataset()
-            genotype_1.read(full_filename)
-        else:
-            genotype_1 = gen_geno_ob.genotype(full_filename)
+        genotype_1 = gen_geno_ob.genotype(full_filename)
 
         if genotype_1.type == "group" and self.parlist:
             genotype_2 = genotype_1.add(
@@ -707,7 +688,7 @@ class DataSet(object):
             else:
                 query = "SELECT {}.Name,".format(escape(dataset_type))
             data_start_pos = 1
-            query += string.join(temp, ', ')
+            query += ', '.join(temp)
             query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(dataset_type,
                                                                      self.type,
                                                                      self.type))
@@ -1053,9 +1034,9 @@ class MrnaAssayDataSet(DataSet):
 
             # XZ, 12/08/2008: description
             # XZ, 06/05/2009: Rob asked to add probe target description
-            description_string = unicode(
+            description_string = str(
                 str(this_trait.description).strip(codecs.BOM_UTF8), 'utf-8')
-            target_string = unicode(
+            target_string = str(
                 str(this_trait.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8')
 
             if len(description_string) > 1 and description_string != 'None':
diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py
index 6fec5dcd..f1929518 100644
--- a/wqflask/base/mrna_assay_tissue_data.py
+++ b/wqflask/base/mrna_assay_tissue_data.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, print_function, division
-
 import collections
 
 from flask import g
@@ -7,9 +5,8 @@ from flask import g
 from utility import db_tools
 from utility import Bunch
 
-from MySQLdb import escape_string as escape
+from utility.db_tools import escape
 
-from pprint import pformat as pf
 
 from utility.logger import getLogger
 logger = getLogger(__name__ )
@@ -92,4 +89,4 @@ class MrnaAssayTissueData(object):
                 else:
                     symbol_values_dict[result.Symbol.lower()].append(result.value)
 
-        return symbol_values_dict
-\ No newline at end of file
+        return symbol_values_dict
diff --git a/wqflask/base/species.py b/wqflask/base/species.py
index 6d99af65..2771d116 100644
--- a/wqflask/base/species.py
+++ b/wqflask/base/species.py
@@ -1,14 +1,7 @@
-from __future__ import absolute_import, print_function, division
-
 import collections
 
 from flask import Flask, g
 
-#from MySQLdb import escape_string as escape
-
-from utility import Bunch
-
-from pprint import pformat as pf
 
 from utility.logger import getLogger
 logger = getLogger(__name__ )
@@ -59,4 +52,4 @@ class Chromosomes(object):
         results = g.db.execute(query).fetchall()
 
         for item in results:
-            self.chromosomes[item.OrderId] = IndChromosome(item.Name, item.Length)
-\ No newline at end of file
+            self.chromosomes[item.OrderId] = IndChromosome(item.Name, item.Length)
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index 7ebbc4bb..cfc02f8b 100644
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -1,42 +1,30 @@
-from __future__ import absolute_import, division, print_function
-from utility.logger import getLogger
-from flask import Flask, g, request, url_for, redirect, make_response, render_template
-from pprint import pformat as pf
-from MySQLdb import escape_string as escape
+import requests
 import simplejson as json
 from wqflask import app
 
-import os
-import string
-import resource
-import codecs
-import requests
-import random
-import urllib
-
 from base import webqtlConfig
 from base.webqtlCaseData import webqtlCaseData
 from base.data_set import create_dataset
-from db import webqtlDatabaseFunction
-from utility import webqtlUtil
 from utility import hmac
 from utility.authentication_tools import check_resource_availability
-from utility.tools import GN2_BASE_URL, GN_VERSION
-from utility.redis_tools import get_redis_conn
-from utility.redis_tools import get_resource_id
-from utility.redis_tools import get_resource_info
+from utility.tools import GN2_BASE_URL
+from utility.redis_tools import get_redis_conn, get_resource_id
 
-Redis = get_redis_conn()
+from utility.db_tools import escape
+
+from flask import g, request, url_for
 
+from utility.logger import getLogger
 
 logger = getLogger(__name__)
 
+Redis = get_redis_conn()
+
 
 def create_trait(**kw):
     assert bool(kw.get('dataset')) != bool(
         kw.get('dataset_name')), "Needs dataset ob. or name"
 
-    permitted = True
     if kw.get('name'):
         if kw.get('dataset_name'):
             if kw.get('dataset_name') != "Temp":
@@ -55,7 +43,9 @@ def create_trait(**kw):
         the_trait = GeneralTrait(**kw)
         if the_trait.dataset.type != "Temp":
             the_trait = retrieve_trait_info(
-                the_trait, the_trait.dataset, get_qtl_info=kw.get('get_qtl_info'))
+                the_trait,
+                the_trait.dataset,
+                get_qtl_info=kw.get('get_qtl_info'))
         return the_trait
     else:
         return None
@@ -78,7 +68,9 @@ class GeneralTrait(object):
             if kw.get('dataset_name') == "Temp":
                 temp_group = self.name.split("_")[2]
                 self.dataset = create_dataset(
-                    dataset_name="Temp", dataset_type="Temp", group_name=temp_group)
+                    dataset_name="Temp",
+                    dataset_type="Temp",
+                    group_name=temp_group)
             else:
                 self.dataset = create_dataset(kw.get('dataset_name'))
         else:
@@ -113,9 +105,10 @@ class GeneralTrait(object):
             elif len(name2) == 3:
                 self.dataset, self.name, self.cellid = name2
 
-        # Todo: These two lines are necessary most of the time, but perhaps not all of the time
-        # So we could add a simple if statement to short-circuit this if necessary
-        if get_sample_info != False:
+        # Todo: These two lines are necessary most of the time, but
+        # perhaps not all of the time So we could add a simple if
+        # statement to short-circuit this if necessary
+        if get_sample_info is not False:
             self = retrieve_sample_data(self, self.dataset)
 
     def export_informative(self, include_variance=0):
@@ -128,9 +121,9 @@ class GeneralTrait(object):
         vals = []
         the_vars = []
         sample_aliases = []
-        for sample_name, sample_data in self.data.items():
-            if sample_data.value != None:
-                if not include_variance or sample_data.variance != None:
+        for sample_name, sample_data in list(self.data.items()):
+            if sample_data.value is not None:
+                if not include_variance or sample_data.variance is not None:
                     samples.append(sample_name)
                     vals.append(sample_data.value)
                     the_vars.append(sample_data.variance)
@@ -154,7 +147,8 @@ class GeneralTrait(object):
                 formatted = self.post_publication_description
         else:
             formatted = "Not available"
-
+        if isinstance(formatted, bytes):
+            formatted = formatted.decode("utf-8")
         return formatted
 
     @property
@@ -163,8 +157,8 @@ class GeneralTrait(object):
 
         alias = 'Not available'
         if getattr(self, "alias", None):
-            alias = string.replace(self.alias, ";", " ")
-            alias = string.join(string.split(alias), ", ")
+            alias = self.alias.replace(";", " ")
+            alias = ", ".join(alias.split())
 
         return alias
 
@@ -183,7 +177,8 @@ class GeneralTrait(object):
 
             if human_response and mouse_response and other_response:
                 alias_list = json.loads(human_response.content) + json.loads(
-                    mouse_response.content) + json.loads(other_response.content)
+                    mouse_response.content) + \
+                    json.loads(other_response.content)
 
                 filtered_aliases = []
                 seen = set()
@@ -201,7 +196,8 @@ class GeneralTrait(object):
     def location_fmt(self):
         """Return a text formatted location
 
-        While we're at it we set self.location in case we need it later (do we?)
+        While we're at it we set self.location in case we need it
+        later (do we?)
 
         """
 
@@ -223,7 +219,7 @@ class GeneralTrait(object):
 
 
 def retrieve_sample_data(trait, dataset, samplelist=None):
-    if samplelist == None:
+    if samplelist is None:
         samplelist = []
 
     if dataset.type == "Temp":
@@ -278,7 +274,9 @@ def get_sample_data():
                 trait_dict['pubmed_link'] = trait_ob.pubmed_link
             trait_dict['pubmed_text'] = trait_ob.pubmed_text
 
-        return json.dumps([trait_dict, {key: value.value for key, value in trait_ob.data.iteritems()}])
+        return json.dumps([trait_dict, {key: value.value for
+                                        key, value in list(
+                                            trait_ob.data.items())}])
     else:
         return None
 
@@ -289,7 +287,8 @@ def jsonable(trait):
     Actual turning into json doesn't happen here though"""
 
     dataset = create_dataset(dataset_name=trait.dataset.name,
-                             dataset_type=trait.dataset.type, group_name=trait.dataset.group.name)
+                             dataset_type=trait.dataset.type,
+                             group_name=trait.dataset.group.name)
 
     if dataset.type == "ProbeSet":
         return dict(name=trait.name,
@@ -471,8 +470,7 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False):
         # XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name
         # to avoid the problem of same marker name from different species.
         elif dataset.type == 'Geno':
-            display_fields_string = string.join(
-                dataset.display_fields, ',Geno.')
+            display_fields_string = ',Geno.'.join(dataset.display_fields)
             display_fields_string = 'Geno.' + display_fields_string
             query = """
                     SELECT %s
@@ -491,13 +489,15 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False):
             query = """SELECT %s FROM %s WHERE Name = %s"""
             logger.sql(query)
             trait_info = g.db.execute(query,
-                                      (string.join(dataset.display_fields, ','),
-                                       dataset.type, trait.name)).fetchone()
+                                      ','.join(dataset.display_fields),
+                                      dataset.type, trait.name).fetchone()
 
     if trait_info:
         trait.haveinfo = True
         for i, field in enumerate(dataset.display_fields):
             holder = trait_info[i]
+            if isinstance(holder, bytes):
+                holder = holder.decode("utf-8", errors="ignore")
             setattr(trait, field, holder)
 
         if dataset.type == 'Publish':
@@ -523,13 +523,6 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False):
                 else:
                     trait.description_display = ""
 
-            trait.abbreviation = unicode(str(trait.abbreviation).strip(
-                codecs.BOM_UTF8), 'utf-8', errors="replace")
-            trait.description_display = unicode(str(trait.description_display).strip(
-                codecs.BOM_UTF8), 'utf-8', errors="replace")
-            trait.authors = unicode(str(trait.authors).strip(
-                codecs.BOM_UTF8), 'utf-8', errors="replace")
-
             if not trait.year.isdigit():
                 trait.pubmed_text = "N/A"
             else:
@@ -539,10 +532,8 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False):
                 trait.pubmed_link = webqtlConfig.PUBMEDLINK_URL % trait.pubmed_id
 
         if dataset.type == 'ProbeSet' and dataset.group:
-            description_string = unicode(
-                str(trait.description).strip(codecs.BOM_UTF8), 'utf-8')
-            target_string = unicode(
-                str(trait.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8')
+            description_string = trait.description
+            target_string = trait.probe_target_description
 
             if str(description_string or "") != "" and description_string != 'None':
                 description_display = description_string
@@ -645,6 +636,6 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False):
                 if str(trait.lrs or "") != "":
                     trait.LRS_score_repr = LRS_score_repr = '%3.1f' % trait.lrs
     else:
-        raise KeyError, `trait.name`+ ' information is not found in the database.'
-
+        raise KeyError(repr(trait.name) +
+                       ' information is not found in the database.')
     return trait
diff --git a/wqflask/base/webqtlCaseData.py b/wqflask/base/webqtlCaseData.py
index 3cf2d80d..aa55470f 100644
--- a/wqflask/base/webqtlCaseData.py
+++ b/wqflask/base/webqtlCaseData.py
@@ -41,8 +41,6 @@ class webqtlCaseData:
         self.this_id = None   # Set a sane default (can't be just "id" cause that's a reserved word)
         self.outlier = None   # Not set to True/False until later
 
-        self.first_attr_col = self.get_first_attr_col()
-
     def __repr__(self):
         case_data_string = "<webqtlCaseData> "
         if self.value is not None:
@@ -80,13 +78,4 @@ class webqtlCaseData:
     def display_num_cases(self):
         if self.num_cases is not None:
             return "%s" % self.num_cases
-        return "x"
-
-    def get_first_attr_col(self):
-        col_num = 4
-        if self.variance is not None:
-            col_num += 2
-        if self.num_cases is not None:
-            col_num += 1
-
-        return col_num
-\ No newline at end of file
+        return "x"
+\ No newline at end of file
author	zsloan	2020-10-29 14:35:09 -0500
committer	zsloan	2020-10-29 14:35:09 -0500
commit	7c1c9e2a519ba662e9f293eea73eb7922b2160e4 (patch)
tree	f6d4db465d338c1433bbb126e911062a6c31748b /wqflask/base
parent	5a1f69aa85809768577069ae63d92c9ef6aecc02 (diff)
parent	6e6911b466c2727b16a190d8b714f55d7842d7e2 (diff)
download	genenetwork2-7c1c9e2a519ba662e9f293eea73eb7922b2160e4.tar.gz