diff options
author | zsloan | 2020-10-29 14:35:09 -0500 |
---|---|---|
committer | zsloan | 2020-10-29 14:35:09 -0500 |
commit | 7c1c9e2a519ba662e9f293eea73eb7922b2160e4 (patch) | |
tree | f6d4db465d338c1433bbb126e911062a6c31748b /wqflask/base | |
parent | 5a1f69aa85809768577069ae63d92c9ef6aecc02 (diff) | |
parent | 6e6911b466c2727b16a190d8b714f55d7842d7e2 (diff) | |
download | genenetwork2-7c1c9e2a519ba662e9f293eea73eb7922b2160e4.tar.gz |
Merge branch 'testing' of github.com:genenetwork/genenetwork2 into reaper_bootstrap_fix
Diffstat (limited to 'wqflask/base')
-rw-r--r-- | wqflask/base/GeneralObject.py | 11 | ||||
-rw-r--r-- | wqflask/base/data_set.py | 41 | ||||
-rw-r--r-- | wqflask/base/mrna_assay_tissue_data.py | 7 | ||||
-rw-r--r-- | wqflask/base/species.py | 9 | ||||
-rw-r--r-- | wqflask/base/trait.py | 99 | ||||
-rw-r--r-- | wqflask/base/webqtlCaseData.py | 13 |
6 files changed, 66 insertions, 114 deletions
diff --git a/wqflask/base/GeneralObject.py b/wqflask/base/GeneralObject.py index 0fccaab3..0122ee32 100644 --- a/wqflask/base/GeneralObject.py +++ b/wqflask/base/GeneralObject.py @@ -33,7 +33,7 @@ class GeneralObject: def __init__(self, *args, **kw): self.contents = list(args) - for name, value in kw.items(): + for name, value in list(kw.items()): setattr(self, name, value) def __setitem__(self, key, value): @@ -50,16 +50,17 @@ class GeneralObject: def __str__(self): s = '' - for key in self.__dict__.keys(): + for key in list(self.__dict__.keys()): if key != 'contents': s += '%s = %s\n' % (key, self.__dict__[key]) return s def __repr__(self): s = '' - for key in self.__dict__.keys(): + for key in list(self.__dict__.keys()): s += '%s = %s\n' % (key, self.__dict__[key]) return s - def __cmp__(self, other): - return len(self.__dict__.keys()).__cmp__(len(other.__dict__.keys())) + def __eq__(self, other): + return (len(list(self.__dict__.keys())) == + len(list(other.__dict__.keys()))) diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 2f1549ae..0d4ac24b 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -18,13 +18,14 @@ # # This module is used by GeneNetwork project (www.genenetwork.org) -from __future__ import absolute_import, print_function, division from db.call import fetchall, fetchone, fetch1 from utility.logger import getLogger from utility.tools import USE_GN_SERVER, USE_REDIS, flat_files, flat_file_exists, GN2_BASE_URL from db.gn_server import menu_main from pprint import pformat as pf -from MySQLdb import escape_string as escape +from utility.db_tools import escape +from utility.db_tools import mescape +from utility.db_tools import create_in_clause from maintenance import get_group_samplelists from utility.tools import locate, locate_ignore_error, flat_files from utility import gen_geno_ob @@ -34,7 +35,6 @@ from utility import webqtlUtil from db import webqtlDatabaseFunction from base import species from base import webqtlConfig -import reaper from flask import Flask, g import os import math @@ -45,7 +45,7 @@ import codecs import json import requests import gzip -import cPickle as pickle +import pickle as pickle import itertools from redis import Redis @@ -209,20 +209,6 @@ def create_datasets_list(): return datasets -def create_in_clause(items): - """Create an in clause for mysql""" - in_clause = ', '.join("'{}'".format(x) for x in mescape(*items)) - in_clause = '( {} )'.format(in_clause) - return in_clause - - -def mescape(*items): - """Multiple escape""" - escaped = [escape(str(item)) for item in items] - #logger.debug("escaped is:", escaped) - return escaped - - class Markers(object): """Todo: Build in cacheing so it saves us reading the same file more than once""" @@ -257,12 +243,12 @@ class Markers(object): logger.debug("length of self.markers:", len(self.markers)) logger.debug("length of p_values:", len(p_values)) - if type(p_values) is list: + if isinstance(p_values, list): # THIS IS only needed for the case when we are limiting the number of p-values calculated # if len(self.markers) > len(p_values): # self.markers = self.markers[:len(p_values)] - for marker, p_value in itertools.izip(self.markers, p_values): + for marker, p_value in zip(self.markers, p_values): if not p_value: continue marker['p_value'] = float(p_value) @@ -273,7 +259,7 @@ class Markers(object): marker['lod_score'] = -math.log10(marker['p_value']) # Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 - elif type(p_values) is dict: + elif isinstance(p_values, dict): filtered_markers = [] for marker in self.markers: #logger.debug("marker[name]", marker['name']) @@ -459,12 +445,7 @@ class DatasetGroup(object): full_filename = str(locate(self.genofile, 'genotype')) else: full_filename = str(locate(self.name + '.geno', 'genotype')) - - if use_reaper: - genotype_1 = reaper.Dataset() - genotype_1.read(full_filename) - else: - genotype_1 = gen_geno_ob.genotype(full_filename) + genotype_1 = gen_geno_ob.genotype(full_filename) if genotype_1.type == "group" and self.parlist: genotype_2 = genotype_1.add( @@ -707,7 +688,7 @@ class DataSet(object): else: query = "SELECT {}.Name,".format(escape(dataset_type)) data_start_pos = 1 - query += string.join(temp, ', ') + query += ', '.join(temp) query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(dataset_type, self.type, self.type)) @@ -1053,9 +1034,9 @@ class MrnaAssayDataSet(DataSet): # XZ, 12/08/2008: description # XZ, 06/05/2009: Rob asked to add probe target description - description_string = unicode( + description_string = str( str(this_trait.description).strip(codecs.BOM_UTF8), 'utf-8') - target_string = unicode( + target_string = str( str(this_trait.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8') if len(description_string) > 1 and description_string != 'None': diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py index 6fec5dcd..f1929518 100644 --- a/wqflask/base/mrna_assay_tissue_data.py +++ b/wqflask/base/mrna_assay_tissue_data.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, print_function, division - import collections from flask import g @@ -7,9 +5,8 @@ from flask import g from utility import db_tools from utility import Bunch -from MySQLdb import escape_string as escape +from utility.db_tools import escape -from pprint import pformat as pf from utility.logger import getLogger logger = getLogger(__name__ ) @@ -92,4 +89,4 @@ class MrnaAssayTissueData(object): else: symbol_values_dict[result.Symbol.lower()].append(result.value) - return symbol_values_dict
\ No newline at end of file + return symbol_values_dict diff --git a/wqflask/base/species.py b/wqflask/base/species.py index 6d99af65..2771d116 100644 --- a/wqflask/base/species.py +++ b/wqflask/base/species.py @@ -1,14 +1,7 @@ -from __future__ import absolute_import, print_function, division - import collections from flask import Flask, g -#from MySQLdb import escape_string as escape - -from utility import Bunch - -from pprint import pformat as pf from utility.logger import getLogger logger = getLogger(__name__ ) @@ -59,4 +52,4 @@ class Chromosomes(object): results = g.db.execute(query).fetchall() for item in results: - self.chromosomes[item.OrderId] = IndChromosome(item.Name, item.Length)
\ No newline at end of file + self.chromosomes[item.OrderId] = IndChromosome(item.Name, item.Length) diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index 7ebbc4bb..cfc02f8b 100644 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -1,42 +1,30 @@ -from __future__ import absolute_import, division, print_function -from utility.logger import getLogger -from flask import Flask, g, request, url_for, redirect, make_response, render_template -from pprint import pformat as pf -from MySQLdb import escape_string as escape +import requests import simplejson as json from wqflask import app -import os -import string -import resource -import codecs -import requests -import random -import urllib - from base import webqtlConfig from base.webqtlCaseData import webqtlCaseData from base.data_set import create_dataset -from db import webqtlDatabaseFunction -from utility import webqtlUtil from utility import hmac from utility.authentication_tools import check_resource_availability -from utility.tools import GN2_BASE_URL, GN_VERSION -from utility.redis_tools import get_redis_conn -from utility.redis_tools import get_resource_id -from utility.redis_tools import get_resource_info +from utility.tools import GN2_BASE_URL +from utility.redis_tools import get_redis_conn, get_resource_id -Redis = get_redis_conn() +from utility.db_tools import escape + +from flask import g, request, url_for +from utility.logger import getLogger logger = getLogger(__name__) +Redis = get_redis_conn() + def create_trait(**kw): assert bool(kw.get('dataset')) != bool( kw.get('dataset_name')), "Needs dataset ob. or name" - permitted = True if kw.get('name'): if kw.get('dataset_name'): if kw.get('dataset_name') != "Temp": @@ -55,7 +43,9 @@ def create_trait(**kw): the_trait = GeneralTrait(**kw) if the_trait.dataset.type != "Temp": the_trait = retrieve_trait_info( - the_trait, the_trait.dataset, get_qtl_info=kw.get('get_qtl_info')) + the_trait, + the_trait.dataset, + get_qtl_info=kw.get('get_qtl_info')) return the_trait else: return None @@ -78,7 +68,9 @@ class GeneralTrait(object): if kw.get('dataset_name') == "Temp": temp_group = self.name.split("_")[2] self.dataset = create_dataset( - dataset_name="Temp", dataset_type="Temp", group_name=temp_group) + dataset_name="Temp", + dataset_type="Temp", + group_name=temp_group) else: self.dataset = create_dataset(kw.get('dataset_name')) else: @@ -113,9 +105,10 @@ class GeneralTrait(object): elif len(name2) == 3: self.dataset, self.name, self.cellid = name2 - # Todo: These two lines are necessary most of the time, but perhaps not all of the time - # So we could add a simple if statement to short-circuit this if necessary - if get_sample_info != False: + # Todo: These two lines are necessary most of the time, but + # perhaps not all of the time So we could add a simple if + # statement to short-circuit this if necessary + if get_sample_info is not False: self = retrieve_sample_data(self, self.dataset) def export_informative(self, include_variance=0): @@ -128,9 +121,9 @@ class GeneralTrait(object): vals = [] the_vars = [] sample_aliases = [] - for sample_name, sample_data in self.data.items(): - if sample_data.value != None: - if not include_variance or sample_data.variance != None: + for sample_name, sample_data in list(self.data.items()): + if sample_data.value is not None: + if not include_variance or sample_data.variance is not None: samples.append(sample_name) vals.append(sample_data.value) the_vars.append(sample_data.variance) @@ -154,7 +147,8 @@ class GeneralTrait(object): formatted = self.post_publication_description else: formatted = "Not available" - + if isinstance(formatted, bytes): + formatted = formatted.decode("utf-8") return formatted @property @@ -163,8 +157,8 @@ class GeneralTrait(object): alias = 'Not available' if getattr(self, "alias", None): - alias = string.replace(self.alias, ";", " ") - alias = string.join(string.split(alias), ", ") + alias = self.alias.replace(";", " ") + alias = ", ".join(alias.split()) return alias @@ -183,7 +177,8 @@ class GeneralTrait(object): if human_response and mouse_response and other_response: alias_list = json.loads(human_response.content) + json.loads( - mouse_response.content) + json.loads(other_response.content) + mouse_response.content) + \ + json.loads(other_response.content) filtered_aliases = [] seen = set() @@ -201,7 +196,8 @@ class GeneralTrait(object): def location_fmt(self): """Return a text formatted location - While we're at it we set self.location in case we need it later (do we?) + While we're at it we set self.location in case we need it + later (do we?) """ @@ -223,7 +219,7 @@ class GeneralTrait(object): def retrieve_sample_data(trait, dataset, samplelist=None): - if samplelist == None: + if samplelist is None: samplelist = [] if dataset.type == "Temp": @@ -278,7 +274,9 @@ def get_sample_data(): trait_dict['pubmed_link'] = trait_ob.pubmed_link trait_dict['pubmed_text'] = trait_ob.pubmed_text - return json.dumps([trait_dict, {key: value.value for key, value in trait_ob.data.iteritems()}]) + return json.dumps([trait_dict, {key: value.value for + key, value in list( + trait_ob.data.items())}]) else: return None @@ -289,7 +287,8 @@ def jsonable(trait): Actual turning into json doesn't happen here though""" dataset = create_dataset(dataset_name=trait.dataset.name, - dataset_type=trait.dataset.type, group_name=trait.dataset.group.name) + dataset_type=trait.dataset.type, + group_name=trait.dataset.group.name) if dataset.type == "ProbeSet": return dict(name=trait.name, @@ -471,8 +470,7 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): # XZ, 05/08/2009: We also should use Geno.Id to find marker instead of just using Geno.Name # to avoid the problem of same marker name from different species. elif dataset.type == 'Geno': - display_fields_string = string.join( - dataset.display_fields, ',Geno.') + display_fields_string = ',Geno.'.join(dataset.display_fields) display_fields_string = 'Geno.' + display_fields_string query = """ SELECT %s @@ -491,13 +489,15 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): query = """SELECT %s FROM %s WHERE Name = %s""" logger.sql(query) trait_info = g.db.execute(query, - (string.join(dataset.display_fields, ','), - dataset.type, trait.name)).fetchone() + ','.join(dataset.display_fields), + dataset.type, trait.name).fetchone() if trait_info: trait.haveinfo = True for i, field in enumerate(dataset.display_fields): holder = trait_info[i] + if isinstance(holder, bytes): + holder = holder.decode("utf-8", errors="ignore") setattr(trait, field, holder) if dataset.type == 'Publish': @@ -523,13 +523,6 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): else: trait.description_display = "" - trait.abbreviation = unicode(str(trait.abbreviation).strip( - codecs.BOM_UTF8), 'utf-8', errors="replace") - trait.description_display = unicode(str(trait.description_display).strip( - codecs.BOM_UTF8), 'utf-8', errors="replace") - trait.authors = unicode(str(trait.authors).strip( - codecs.BOM_UTF8), 'utf-8', errors="replace") - if not trait.year.isdigit(): trait.pubmed_text = "N/A" else: @@ -539,10 +532,8 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): trait.pubmed_link = webqtlConfig.PUBMEDLINK_URL % trait.pubmed_id if dataset.type == 'ProbeSet' and dataset.group: - description_string = unicode( - str(trait.description).strip(codecs.BOM_UTF8), 'utf-8') - target_string = unicode( - str(trait.probe_target_description).strip(codecs.BOM_UTF8), 'utf-8') + description_string = trait.description + target_string = trait.probe_target_description if str(description_string or "") != "" and description_string != 'None': description_display = description_string @@ -645,6 +636,6 @@ def retrieve_trait_info(trait, dataset, get_qtl_info=False): if str(trait.lrs or "") != "": trait.LRS_score_repr = LRS_score_repr = '%3.1f' % trait.lrs else: - raise KeyError, `trait.name`+ ' information is not found in the database.' - + raise KeyError(repr(trait.name) + + ' information is not found in the database.') return trait diff --git a/wqflask/base/webqtlCaseData.py b/wqflask/base/webqtlCaseData.py index 3cf2d80d..aa55470f 100644 --- a/wqflask/base/webqtlCaseData.py +++ b/wqflask/base/webqtlCaseData.py @@ -41,8 +41,6 @@ class webqtlCaseData: self.this_id = None # Set a sane default (can't be just "id" cause that's a reserved word) self.outlier = None # Not set to True/False until later - self.first_attr_col = self.get_first_attr_col() - def __repr__(self): case_data_string = "<webqtlCaseData> " if self.value is not None: @@ -80,13 +78,4 @@ class webqtlCaseData: def display_num_cases(self): if self.num_cases is not None: return "%s" % self.num_cases - return "x" - - def get_first_attr_col(self): - col_num = 4 - if self.variance is not None: - col_num += 2 - if self.num_cases is not None: - col_num += 1 - - return col_num
\ No newline at end of file + return "x"
\ No newline at end of file |