diff options
Diffstat (limited to 'gn3/utility')
-rw-r--r-- | gn3/utility/__init__.py | 0 | ||||
-rw-r--r-- | gn3/utility/bunch.py | 16 | ||||
-rw-r--r-- | gn3/utility/chunks.py | 32 | ||||
-rw-r--r-- | gn3/utility/corr_result_helpers.py | 45 | ||||
-rw-r--r-- | gn3/utility/db_tools.py | 19 | ||||
-rw-r--r-- | gn3/utility/get_group_samplelists.py | 47 | ||||
-rw-r--r-- | gn3/utility/helper_functions.py | 24 | ||||
-rw-r--r-- | gn3/utility/hmac.py | 50 | ||||
-rw-r--r-- | gn3/utility/logger.py | 163 | ||||
-rw-r--r-- | gn3/utility/species.py | 71 | ||||
-rw-r--r-- | gn3/utility/tools.py | 37 | ||||
-rw-r--r-- | gn3/utility/webqtlUtil.py | 66 |
12 files changed, 570 insertions, 0 deletions
diff --git a/gn3/utility/__init__.py b/gn3/utility/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/gn3/utility/__init__.py diff --git a/gn3/utility/bunch.py b/gn3/utility/bunch.py new file mode 100644 index 0000000..c1fd907 --- /dev/null +++ b/gn3/utility/bunch.py @@ -0,0 +1,16 @@ +"""module contains Bunch class a dictionary like with object notation """ + +from pprint import pformat as pf + + +class Bunch: + """Like a dictionary but using object notation""" + + def __init__(self, **kw): + self.__dict__ = kw + + def __repr__(self): + return pf(self.__dict__) + + def __str__(self): + return self.__class__.__name__ diff --git a/gn3/utility/chunks.py b/gn3/utility/chunks.py new file mode 100644 index 0000000..fa27a39 --- /dev/null +++ b/gn3/utility/chunks.py @@ -0,0 +1,32 @@ +"""module for chunks functions""" + +import math + + +def divide_into_chunks(the_list, number_chunks): + """Divides a list into approximately number_chunks smaller lists + + >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 3) + [[1, 2, 7], [3, 22, 8], [5, 22, 333]] + >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 4) + [[1, 2, 7], [3, 22, 8], [5, 22, 333]] + >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 5) + [[1, 2], [7, 3], [22, 8], [5, 22], [333]] + >>> + + """ + length = len(the_list) + + if length == 0: + return [[]] + + if length <= number_chunks: + number_chunks = length + + chunksize = int(math.ceil(length / number_chunks)) + + chunks = [] + for counter in range(0, length, chunksize): + chunks.append(the_list[counter:counter+chunksize]) + + return chunks diff --git a/gn3/utility/corr_result_helpers.py b/gn3/utility/corr_result_helpers.py new file mode 100644 index 0000000..a68308e --- /dev/null +++ b/gn3/utility/corr_result_helpers.py @@ -0,0 +1,45 @@ +"""module contains helper function for corr results""" + +#pylint:disable=C0103 +#above disable snake_case for variable tod refactor +def normalize_values(a_values, b_values): + """ + Trim two lists of values to contain only the values they both share + + Given two lists of sample values, trim each list so that it contains + only the samples that contain a value in both lists. Also returns + the number of such samples. + + >>> normalize_values([2.3, None, None, 3.2, 4.1, 5], [3.4, 7.2, 1.3, None, 6.2, 4.1]) + ([2.3, 4.1, 5], [3.4, 6.2, 4.1], 3) + + """ + a_new = [] + b_new = [] + for a, b in zip(a_values, b_values): + if (a and b is not None): + a_new.append(a) + b_new.append(b) + return a_new, b_new, len(a_new) + + +def common_keys(a_samples, b_samples): + """ + >>> a = dict(BXD1 = 9.113, BXD2 = 9.825, BXD14 = 8.985, BXD15 = 9.300) + >>> b = dict(BXD1 = 9.723, BXD3 = 9.825, BXD14 = 9.124, BXD16 = 9.300) + >>> sorted(common_keys(a, b)) + ['BXD1', 'BXD14'] + """ + return set(a_samples.keys()).intersection(set(b_samples.keys())) + + +def normalize_values_with_samples(a_samples, b_samples): + """function to normalize values with samples""" + common_samples = common_keys(a_samples, b_samples) + a_new = {} + b_new = {} + for sample in common_samples: + a_new[sample] = a_samples[sample] + b_new[sample] = b_samples[sample] + + return a_new, b_new, len(a_new) diff --git a/gn3/utility/db_tools.py b/gn3/utility/db_tools.py new file mode 100644 index 0000000..446acda --- /dev/null +++ b/gn3/utility/db_tools.py @@ -0,0 +1,19 @@ +"""module for db_tools""" +from MySQLdb import escape_string as escape_ + + +def create_in_clause(items): + """Create an in clause for mysql""" + in_clause = ', '.join("'{}'".format(x) for x in mescape(*items)) + in_clause = '( {} )'.format(in_clause) + return in_clause + + +def mescape(*items): + """Multiple escape""" + return [escape_(str(item)).decode('utf8') for item in items] + + +def escape(string_): + """escape function""" + return escape_(string_).decode('utf8') diff --git a/gn3/utility/get_group_samplelists.py b/gn3/utility/get_group_samplelists.py new file mode 100644 index 0000000..8fb322a --- /dev/null +++ b/gn3/utility/get_group_samplelists.py @@ -0,0 +1,47 @@ + +"""module for group samplelist""" +import os + +#todo close the files after opening +def get_samplelist(file_type, geno_file): + """get samplelist function""" + if file_type == "geno": + return get_samplelist_from_geno(geno_file) + elif file_type == "plink": + return get_samplelist_from_plink(geno_file) + +def get_samplelist_from_geno(genofilename): + if os.path.isfile(genofilename + '.gz'): + genofilename += '.gz' + genofile = gzip.open(genofilename) + else: + genofile = open(genofilename) + + for line in genofile: + line = line.strip() + if not line: + continue + if line.startswith(("#", "@")): + continue + break + + headers = line.split("\t") + + if headers[3] == "Mb": + samplelist = headers[4:] + else: + samplelist = headers[3:] + return samplelist + + + +def get_samplelist_from_plink(genofilename): + """get samplelist from plink""" + genofile = open(genofilename) + + samplelist = [] + for line in genofile: + line = line.split(" ") + samplelist.append(line[1]) + + return samplelist diff --git a/gn3/utility/helper_functions.py b/gn3/utility/helper_functions.py new file mode 100644 index 0000000..f5a8b80 --- /dev/null +++ b/gn3/utility/helper_functions.py @@ -0,0 +1,24 @@ +"""module contains general helper functions """ +from gn3.base.data_set import create_dataset +from gn3.base.trait import create_trait +from gn3.base.species import TheSpecies + + +def get_species_dataset_trait(self, start_vars): + """function to get species dataset and trait""" + if "temp_trait" in list(start_vars.keys()): + if start_vars['temp_trait'] == "True": + self.dataset = create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name=start_vars['group']) + + else: + self.dataset = create_dataset(start_vars['dataset']) + + else: + self.dataset = create_dataset(start_vars['dataset']) + self.species = TheSpecies(dataset=self.dataset) + + self.this_trait = create_trait(dataset=self.dataset, + name=start_vars['trait_id'], + cellid=None, + get_qtl_info=True) diff --git a/gn3/utility/hmac.py b/gn3/utility/hmac.py new file mode 100644 index 0000000..eb39e59 --- /dev/null +++ b/gn3/utility/hmac.py @@ -0,0 +1,50 @@ +"""module for hmac """ + +# pylint: disable-all +import hmac +import hashlib + +# xtodo work on this file + +# from main import app + + +def hmac_creation(stringy): + """Helper function to create the actual hmac""" + + # secret = app.config['SECRET_HMAC_CODE'] + # put in config + secret = "my secret" + hmaced = hmac.new(bytearray(secret, "latin-1"), + bytearray(stringy, "utf-8"), + hashlib.sha1) + hm = hmaced.hexdigest() + # ZS: Leaving the below comment here to ask Pjotr about + # "Conventional wisdom is that you don't lose much in terms of security if you throw away up to half of the output." + # http://www.w3.org/QA/2009/07/hmac_truncation_in_xml_signatu.html + hm = hm[:20] + return hm + + +def data_hmac(stringy): + """Takes arbitrary data string and appends :hmac so we know data hasn't been tampered with""" + return stringy + ":" + hmac_creation(stringy) + + +def url_for_hmac(endpoint, **values): + """Like url_for but adds an hmac at the end to insure the url hasn't been tampered with""" + + url = url_for(endpoint, **values) + + hm = hmac_creation(url) + if '?' in url: + combiner = "&" + else: + combiner = "?" + return url + combiner + "hm=" + hm + + + +# todo +# app.jinja_env.globals.update(url_for_hmac=url_for_hmac, +# data_hmac=data_hmac) diff --git a/gn3/utility/logger.py b/gn3/utility/logger.py new file mode 100644 index 0000000..4245a02 --- /dev/null +++ b/gn3/utility/logger.py @@ -0,0 +1,163 @@ +""" +# GeneNetwork logger +# +# The standard python logging module is very good. This logger adds a +# few facilities on top of that. Main one being that it picks up +# settings for log levels (global and by module) and (potentially) +# offers some fine grained log levels for the standard levels. +# +# All behaviour is defined here. Global settings (defined in +# default_settings.py). +# +# To use logging and settings put this at the top of a module: +# +# import utility.logger +# logger = utility.logger.getLogger(__name__ ) +# +# To override global behaviour set the LOG_LEVEL in default_settings.py +# or use an environment variable, e.g. +# +# env LOG_LEVEL=INFO ./bin/genenetwork2 +# +# To override log level for a module replace that with, for example, +# +# import logging +# import utility.logger +# logger = utility.logger.getLogger(__name__,level=logging.DEBUG) +# +# We'll add more overrides soon. +""" +# todo incomplete file + +# pylint: disable-all +import logging +import datetime +from inspect import isfunction +from inspect import stack + +from pprint import pformat as pf + + +# from utility.tools import LOG_LEVEL, LOG_LEVEL_DEBUG, LOG_SQL + +LOG_SQL = True + + +class GNLogger: + """A logger class with some additional functionality, such as + multiple parameter logging, SQL logging, timing, colors, and lazy + functions. + + """ + + def __init__(self, name): + self.logger = logging.getLogger(name) + + def setLevel(self, value): + """Set the undelying log level""" + self.logger.setLevel(value) + + def debug(self, *args): + """Call logging.debug for multiple args. Use (lazy) debugf and +level=num to filter on LOG_LEVEL_DEBUG. + + """ + self.collect(self.logger.debug, *args) + + def debug20(self, *args): + """Call logging.debug for multiple args. Use level=num to filter on +LOG_LEVEL_DEBUG (NYI). + + """ + if level <= LOG_LEVEL_DEBUG: + if self.logger.getEffectiveLevel() < 20: + self.collect(self.logger.debug, *args) + + def info(self, *args): + """Call logging.info for multiple args""" + self.collect(self.logger.info, *args) + + def warning(self, *args): + """Call logging.warning for multiple args""" + self.collect(self.logger.warning, *args) + # self.logger.warning(self.collect(*args)) + + def error(self, *args): + """Call logging.error for multiple args""" + now = datetime.datetime.utcnow() + time_str = now.strftime('%H:%M:%S UTC %Y%m%d') + l = [time_str]+list(args) + self.collect(self.logger.error, *l) + + def infof(self, *args): + """Call logging.info for multiple args lazily""" + # only evaluate function when logging + if self.logger.getEffectiveLevel() < 30: + self.collectf(self.logger.debug, *args) + + def debugf(self, level=0, *args): + """Call logging.debug for multiple args lazily and handle + LOG_LEVEL_DEBUG correctly + + """ + # only evaluate function when logging + if level <= LOG_LEVEL_DEBUG: + if self.logger.getEffectiveLevel() < 20: + self.collectf(self.logger.debug, *args) + + def sql(self, sqlcommand, fun=None): + """Log SQL command, optionally invoking a timed fun""" + if LOG_SQL: + caller = stack()[1][3] + if caller in ['fetchone', 'fetch1', 'fetchall']: + caller = stack()[2][3] + self.info(caller, sqlcommand) + if fun: + result = fun(sqlcommand) + if LOG_SQL: + self.info(result) + return result + + def collect(self, fun, *args): + """Collect arguments and use fun to output""" + out = "."+stack()[2][3] + for a in args: + if len(out) > 1: + out += ": " + if isinstance(a, str): + out = out + a + else: + out = out + pf(a, width=160) + fun(out) + + def collectf(self, fun, *args): + """Collect arguments and use fun to output one by one""" + out = "."+stack()[2][3] + for a in args: + if len(out) > 1: + out += ": " + if isfunction(a): + out += a() + else: + if isinstance(a, str): + out = out + a + else: + out = out + pf(a, width=160) + fun(out) + +# Get the module logger. You can override log levels at the +# module level + + +def getLogger(name, level=None): + """method to get logger""" + gnlogger = GNLogger(name) + _logger = gnlogger.logger + + # if level: + # logger.setLevel(level) + # else: + # logger.setLevel(LOG_LEVEL) + + # logger.info("Log level of "+name+" set to "+logging.getLevelName(logger.getEffectiveLevel())) + return gnlogger diff --git a/gn3/utility/species.py b/gn3/utility/species.py new file mode 100644 index 0000000..0140d41 --- /dev/null +++ b/gn3/utility/species.py @@ -0,0 +1,71 @@ +"""module contains species and chromosomes classes""" +import collections + +from flask import g + + +from gn3.utility.logger import getLogger +logger = getLogger(__name__) + + # pylint: disable=too-few-public-methods + # intentionally disabled check for few public methods + +class TheSpecies: + """class for Species""" + + def __init__(self, dataset=None, species_name=None): + if species_name is not None: + self.name = species_name + self.chromosomes = Chromosomes(species=self.name) + else: + self.dataset = dataset + self.chromosomes = Chromosomes(dataset=self.dataset) + + + +class IndChromosome: + """class for IndChromosome""" + + def __init__(self, name, length): + self.name = name + self.length = length + + @property + def mb_length(self): + """Chromosome length in megabases""" + return self.length / 1000000 + + + + +class Chromosomes: + """class for Chromosomes""" + + def __init__(self, dataset=None, species=None): + self.chromosomes = collections.OrderedDict() + if species is not None: + query = """ + Select + Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, Species + where + Chr_Length.SpeciesId = Species.SpeciesId AND + Species.Name = '%s' + Order by OrderId + """ % species.capitalize() + else: + self.dataset = dataset + + query = """ + Select + Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, InbredSet + where + Chr_Length.SpeciesId = InbredSet.SpeciesId AND + InbredSet.Name = '%s' + Order by OrderId + """ % self.dataset.group.name + logger.sql(query) + results = g.db.execute(query).fetchall() + + for item in results: + self.chromosomes[item.OrderId] = IndChromosome( + item.Name, item.Length) diff --git a/gn3/utility/tools.py b/gn3/utility/tools.py new file mode 100644 index 0000000..85df9f6 --- /dev/null +++ b/gn3/utility/tools.py @@ -0,0 +1,37 @@ +"""module contains general tools forgenenetwork""" + +import os + +from default_settings import GENENETWORK_FILES + + +def valid_file(file_name): + """check if file is valid""" + if os.path.isfile(file_name): + return file_name + return None + + +def valid_path(dir_name): + """check if path is valid""" + if os.path.isdir(dir_name): + return dir_name + return None + + +def locate_ignore_error(name, subdir=None): + """ + Locate a static flat file in the GENENETWORK_FILES environment. + + This function does not throw an error when the file is not found + but returns None. + """ + base = GENENETWORK_FILES + if subdir: + base = base+"/"+subdir + if valid_path(base): + lookfor = base + "/" + name + if valid_file(lookfor): + return lookfor + + return None diff --git a/gn3/utility/webqtlUtil.py b/gn3/utility/webqtlUtil.py new file mode 100644 index 0000000..1c76410 --- /dev/null +++ b/gn3/utility/webqtlUtil.py @@ -0,0 +1,66 @@ +""" +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License +# as published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the GNU Affero General Public License for more details. +# +# This program is available from Source Forge: at GeneNetwork Project +# (sourceforge.net/projects/genenetwork/). +# +# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010) +# at rwilliams@uthsc.edu and xzhou15@uthsc.edu +# +# +# +# This module is used by GeneNetwork project (www.genenetwork.org) +# +# Created by GeneNetwork Core Team 2010/08/10 +# +# Last updated by GeneNetwork Core Team 2010/10/20 + +# from base import webqtlConfig + +# NL, 07/27/2010. moved from webqtlForm.py +# Dict of Parents and F1 information, In the order of [F1, Mat, Pat] + +""" +ParInfo = { + 'BXH': ['BHF1', 'HBF1', 'C57BL/6J', 'C3H/HeJ'], + 'AKXD': ['AKF1', 'KAF1', 'AKR/J', 'DBA/2J'], + 'BXD': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'], + 'C57BL-6JxC57BL-6NJF2': ['', '', 'C57BL/6J', 'C57BL/6NJ'], + 'BXD300': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'], + 'B6BTBRF2': ['B6BTBRF1', 'BTBRB6F1', 'C57BL/6J', 'BTBRT<+>tf/J'], + 'BHHBF2': ['B6HF2', 'HB6F2', 'C57BL/6J', 'C3H/HeJ'], + 'BHF2': ['B6HF2', 'HB6F2', 'C57BL/6J', 'C3H/HeJ'], + 'B6D2F2': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'], + 'BDF2-1999': ['B6D2F2', 'D2B6F2', 'C57BL/6J', 'DBA/2J'], + 'BDF2-2005': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'], + 'CTB6F2': ['CTB6F2', 'B6CTF2', 'C57BL/6J', 'Castaneous'], + 'CXB': ['CBF1', 'BCF1', 'C57BL/6ByJ', 'BALB/cByJ'], + 'AXBXA': ['ABF1', 'BAF1', 'C57BL/6J', 'A/J'], + 'AXB': ['ABF1', 'BAF1', 'C57BL/6J', 'A/J'], + 'BXA': ['BAF1', 'ABF1', 'C57BL/6J', 'A/J'], + 'LXS': ['LSF1', 'SLF1', 'ISS', 'ILS'], + 'HXBBXH': ['SHR_BNF1', 'BN_SHRF1', 'BN-Lx/Cub', 'SHR/OlaIpcv'], + 'BayXSha': ['BayXShaF1', 'ShaXBayF1', 'Bay-0', 'Shahdara'], + 'ColXBur': ['ColXBurF1', 'BurXColF1', 'Col-0', 'Bur-0'], + 'ColXCvi': ['ColXCviF1', 'CviXColF1', 'Col-0', 'Cvi'], + 'SXM': ['SMF1', 'MSF1', 'Steptoe', 'Morex'], + 'HRDP': ['SHR_BNF1', 'BN_SHRF1', 'BN-Lx/Cub', 'SHR/OlaIpcv'] +} + + +def has_access_to_confidentail_phenotype_trait(privilege, username, authorized_users): + """function to access to confidential phenotype Traits further implementation needed""" + access_to_confidential_phenotype_trait = 0 + + results = (privilege, username, authorized_users) + return access_to_confidential_phenotype_trait |