aboutsummaryrefslogtreecommitdiff
path: root/gn3/utility
diff options
context:
space:
mode:
Diffstat (limited to 'gn3/utility')
-rw-r--r--gn3/utility/__init__.py0
-rw-r--r--gn3/utility/bunch.py16
-rw-r--r--gn3/utility/chunks.py32
-rw-r--r--gn3/utility/corr_result_helpers.py45
-rw-r--r--gn3/utility/db_tools.py19
-rw-r--r--gn3/utility/get_group_samplelists.py47
-rw-r--r--gn3/utility/helper_functions.py24
-rw-r--r--gn3/utility/hmac.py50
-rw-r--r--gn3/utility/logger.py163
-rw-r--r--gn3/utility/species.py71
-rw-r--r--gn3/utility/tools.py37
-rw-r--r--gn3/utility/webqtlUtil.py66
12 files changed, 570 insertions, 0 deletions
diff --git a/gn3/utility/__init__.py b/gn3/utility/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/gn3/utility/__init__.py
diff --git a/gn3/utility/bunch.py b/gn3/utility/bunch.py
new file mode 100644
index 0000000..c1fd907
--- /dev/null
+++ b/gn3/utility/bunch.py
@@ -0,0 +1,16 @@
+"""module contains Bunch class a dictionary like with object notation """
+
+from pprint import pformat as pf
+
+
+class Bunch:
+ """Like a dictionary but using object notation"""
+
+ def __init__(self, **kw):
+ self.__dict__ = kw
+
+ def __repr__(self):
+ return pf(self.__dict__)
+
+ def __str__(self):
+ return self.__class__.__name__
diff --git a/gn3/utility/chunks.py b/gn3/utility/chunks.py
new file mode 100644
index 0000000..fa27a39
--- /dev/null
+++ b/gn3/utility/chunks.py
@@ -0,0 +1,32 @@
+"""module for chunks functions"""
+
+import math
+
+
+def divide_into_chunks(the_list, number_chunks):
+ """Divides a list into approximately number_chunks smaller lists
+
+ >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 3)
+ [[1, 2, 7], [3, 22, 8], [5, 22, 333]]
+ >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 4)
+ [[1, 2, 7], [3, 22, 8], [5, 22, 333]]
+ >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 5)
+ [[1, 2], [7, 3], [22, 8], [5, 22], [333]]
+ >>>
+
+ """
+ length = len(the_list)
+
+ if length == 0:
+ return [[]]
+
+ if length <= number_chunks:
+ number_chunks = length
+
+ chunksize = int(math.ceil(length / number_chunks))
+
+ chunks = []
+ for counter in range(0, length, chunksize):
+ chunks.append(the_list[counter:counter+chunksize])
+
+ return chunks
diff --git a/gn3/utility/corr_result_helpers.py b/gn3/utility/corr_result_helpers.py
new file mode 100644
index 0000000..a68308e
--- /dev/null
+++ b/gn3/utility/corr_result_helpers.py
@@ -0,0 +1,45 @@
+"""module contains helper function for corr results"""
+
+#pylint:disable=C0103
+#above disable snake_case for variable tod refactor
+def normalize_values(a_values, b_values):
+ """
+ Trim two lists of values to contain only the values they both share
+
+ Given two lists of sample values, trim each list so that it contains
+ only the samples that contain a value in both lists. Also returns
+ the number of such samples.
+
+ >>> normalize_values([2.3, None, None, 3.2, 4.1, 5], [3.4, 7.2, 1.3, None, 6.2, 4.1])
+ ([2.3, 4.1, 5], [3.4, 6.2, 4.1], 3)
+
+ """
+ a_new = []
+ b_new = []
+ for a, b in zip(a_values, b_values):
+ if (a and b is not None):
+ a_new.append(a)
+ b_new.append(b)
+ return a_new, b_new, len(a_new)
+
+
+def common_keys(a_samples, b_samples):
+ """
+ >>> a = dict(BXD1 = 9.113, BXD2 = 9.825, BXD14 = 8.985, BXD15 = 9.300)
+ >>> b = dict(BXD1 = 9.723, BXD3 = 9.825, BXD14 = 9.124, BXD16 = 9.300)
+ >>> sorted(common_keys(a, b))
+ ['BXD1', 'BXD14']
+ """
+ return set(a_samples.keys()).intersection(set(b_samples.keys()))
+
+
+def normalize_values_with_samples(a_samples, b_samples):
+ """function to normalize values with samples"""
+ common_samples = common_keys(a_samples, b_samples)
+ a_new = {}
+ b_new = {}
+ for sample in common_samples:
+ a_new[sample] = a_samples[sample]
+ b_new[sample] = b_samples[sample]
+
+ return a_new, b_new, len(a_new)
diff --git a/gn3/utility/db_tools.py b/gn3/utility/db_tools.py
new file mode 100644
index 0000000..446acda
--- /dev/null
+++ b/gn3/utility/db_tools.py
@@ -0,0 +1,19 @@
+"""module for db_tools"""
+from MySQLdb import escape_string as escape_
+
+
+def create_in_clause(items):
+ """Create an in clause for mysql"""
+ in_clause = ', '.join("'{}'".format(x) for x in mescape(*items))
+ in_clause = '( {} )'.format(in_clause)
+ return in_clause
+
+
+def mescape(*items):
+ """Multiple escape"""
+ return [escape_(str(item)).decode('utf8') for item in items]
+
+
+def escape(string_):
+ """escape function"""
+ return escape_(string_).decode('utf8')
diff --git a/gn3/utility/get_group_samplelists.py b/gn3/utility/get_group_samplelists.py
new file mode 100644
index 0000000..8fb322a
--- /dev/null
+++ b/gn3/utility/get_group_samplelists.py
@@ -0,0 +1,47 @@
+
+"""module for group samplelist"""
+import os
+
+#todo close the files after opening
+def get_samplelist(file_type, geno_file):
+ """get samplelist function"""
+ if file_type == "geno":
+ return get_samplelist_from_geno(geno_file)
+ elif file_type == "plink":
+ return get_samplelist_from_plink(geno_file)
+
+def get_samplelist_from_geno(genofilename):
+ if os.path.isfile(genofilename + '.gz'):
+ genofilename += '.gz'
+ genofile = gzip.open(genofilename)
+ else:
+ genofile = open(genofilename)
+
+ for line in genofile:
+ line = line.strip()
+ if not line:
+ continue
+ if line.startswith(("#", "@")):
+ continue
+ break
+
+ headers = line.split("\t")
+
+ if headers[3] == "Mb":
+ samplelist = headers[4:]
+ else:
+ samplelist = headers[3:]
+ return samplelist
+
+
+
+def get_samplelist_from_plink(genofilename):
+ """get samplelist from plink"""
+ genofile = open(genofilename)
+
+ samplelist = []
+ for line in genofile:
+ line = line.split(" ")
+ samplelist.append(line[1])
+
+ return samplelist
diff --git a/gn3/utility/helper_functions.py b/gn3/utility/helper_functions.py
new file mode 100644
index 0000000..f5a8b80
--- /dev/null
+++ b/gn3/utility/helper_functions.py
@@ -0,0 +1,24 @@
+"""module contains general helper functions """
+from gn3.base.data_set import create_dataset
+from gn3.base.trait import create_trait
+from gn3.base.species import TheSpecies
+
+
+def get_species_dataset_trait(self, start_vars):
+ """function to get species dataset and trait"""
+ if "temp_trait" in list(start_vars.keys()):
+ if start_vars['temp_trait'] == "True":
+ self.dataset = create_dataset(
+ dataset_name="Temp", dataset_type="Temp", group_name=start_vars['group'])
+
+ else:
+ self.dataset = create_dataset(start_vars['dataset'])
+
+ else:
+ self.dataset = create_dataset(start_vars['dataset'])
+ self.species = TheSpecies(dataset=self.dataset)
+
+ self.this_trait = create_trait(dataset=self.dataset,
+ name=start_vars['trait_id'],
+ cellid=None,
+ get_qtl_info=True)
diff --git a/gn3/utility/hmac.py b/gn3/utility/hmac.py
new file mode 100644
index 0000000..eb39e59
--- /dev/null
+++ b/gn3/utility/hmac.py
@@ -0,0 +1,50 @@
+"""module for hmac """
+
+# pylint: disable-all
+import hmac
+import hashlib
+
+# xtodo work on this file
+
+# from main import app
+
+
+def hmac_creation(stringy):
+ """Helper function to create the actual hmac"""
+
+ # secret = app.config['SECRET_HMAC_CODE']
+ # put in config
+ secret = "my secret"
+ hmaced = hmac.new(bytearray(secret, "latin-1"),
+ bytearray(stringy, "utf-8"),
+ hashlib.sha1)
+ hm = hmaced.hexdigest()
+ # ZS: Leaving the below comment here to ask Pjotr about
+ # "Conventional wisdom is that you don't lose much in terms of security if you throw away up to half of the output."
+ # http://www.w3.org/QA/2009/07/hmac_truncation_in_xml_signatu.html
+ hm = hm[:20]
+ return hm
+
+
+def data_hmac(stringy):
+ """Takes arbitrary data string and appends :hmac so we know data hasn't been tampered with"""
+ return stringy + ":" + hmac_creation(stringy)
+
+
+def url_for_hmac(endpoint, **values):
+ """Like url_for but adds an hmac at the end to insure the url hasn't been tampered with"""
+
+ url = url_for(endpoint, **values)
+
+ hm = hmac_creation(url)
+ if '?' in url:
+ combiner = "&"
+ else:
+ combiner = "?"
+ return url + combiner + "hm=" + hm
+
+
+
+# todo
+# app.jinja_env.globals.update(url_for_hmac=url_for_hmac,
+# data_hmac=data_hmac)
diff --git a/gn3/utility/logger.py b/gn3/utility/logger.py
new file mode 100644
index 0000000..4245a02
--- /dev/null
+++ b/gn3/utility/logger.py
@@ -0,0 +1,163 @@
+"""
+# GeneNetwork logger
+#
+# The standard python logging module is very good. This logger adds a
+# few facilities on top of that. Main one being that it picks up
+# settings for log levels (global and by module) and (potentially)
+# offers some fine grained log levels for the standard levels.
+#
+# All behaviour is defined here. Global settings (defined in
+# default_settings.py).
+#
+# To use logging and settings put this at the top of a module:
+#
+# import utility.logger
+# logger = utility.logger.getLogger(__name__ )
+#
+# To override global behaviour set the LOG_LEVEL in default_settings.py
+# or use an environment variable, e.g.
+#
+# env LOG_LEVEL=INFO ./bin/genenetwork2
+#
+# To override log level for a module replace that with, for example,
+#
+# import logging
+# import utility.logger
+# logger = utility.logger.getLogger(__name__,level=logging.DEBUG)
+#
+# We'll add more overrides soon.
+"""
+# todo incomplete file
+
+# pylint: disable-all
+import logging
+import datetime
+from inspect import isfunction
+from inspect import stack
+
+from pprint import pformat as pf
+
+
+# from utility.tools import LOG_LEVEL, LOG_LEVEL_DEBUG, LOG_SQL
+
+LOG_SQL = True
+
+
+class GNLogger:
+ """A logger class with some additional functionality, such as
+ multiple parameter logging, SQL logging, timing, colors, and lazy
+ functions.
+
+ """
+
+ def __init__(self, name):
+ self.logger = logging.getLogger(name)
+
+ def setLevel(self, value):
+ """Set the undelying log level"""
+ self.logger.setLevel(value)
+
+ def debug(self, *args):
+ """Call logging.debug for multiple args. Use (lazy) debugf and
+level=num to filter on LOG_LEVEL_DEBUG.
+
+ """
+ self.collect(self.logger.debug, *args)
+
+ def debug20(self, *args):
+ """Call logging.debug for multiple args. Use level=num to filter on
+LOG_LEVEL_DEBUG (NYI).
+
+ """
+ if level <= LOG_LEVEL_DEBUG:
+ if self.logger.getEffectiveLevel() < 20:
+ self.collect(self.logger.debug, *args)
+
+ def info(self, *args):
+ """Call logging.info for multiple args"""
+ self.collect(self.logger.info, *args)
+
+ def warning(self, *args):
+ """Call logging.warning for multiple args"""
+ self.collect(self.logger.warning, *args)
+ # self.logger.warning(self.collect(*args))
+
+ def error(self, *args):
+ """Call logging.error for multiple args"""
+ now = datetime.datetime.utcnow()
+ time_str = now.strftime('%H:%M:%S UTC %Y%m%d')
+ l = [time_str]+list(args)
+ self.collect(self.logger.error, *l)
+
+ def infof(self, *args):
+ """Call logging.info for multiple args lazily"""
+ # only evaluate function when logging
+ if self.logger.getEffectiveLevel() < 30:
+ self.collectf(self.logger.debug, *args)
+
+ def debugf(self, level=0, *args):
+ """Call logging.debug for multiple args lazily and handle
+ LOG_LEVEL_DEBUG correctly
+
+ """
+ # only evaluate function when logging
+ if level <= LOG_LEVEL_DEBUG:
+ if self.logger.getEffectiveLevel() < 20:
+ self.collectf(self.logger.debug, *args)
+
+ def sql(self, sqlcommand, fun=None):
+ """Log SQL command, optionally invoking a timed fun"""
+ if LOG_SQL:
+ caller = stack()[1][3]
+ if caller in ['fetchone', 'fetch1', 'fetchall']:
+ caller = stack()[2][3]
+ self.info(caller, sqlcommand)
+ if fun:
+ result = fun(sqlcommand)
+ if LOG_SQL:
+ self.info(result)
+ return result
+
+ def collect(self, fun, *args):
+ """Collect arguments and use fun to output"""
+ out = "."+stack()[2][3]
+ for a in args:
+ if len(out) > 1:
+ out += ": "
+ if isinstance(a, str):
+ out = out + a
+ else:
+ out = out + pf(a, width=160)
+ fun(out)
+
+ def collectf(self, fun, *args):
+ """Collect arguments and use fun to output one by one"""
+ out = "."+stack()[2][3]
+ for a in args:
+ if len(out) > 1:
+ out += ": "
+ if isfunction(a):
+ out += a()
+ else:
+ if isinstance(a, str):
+ out = out + a
+ else:
+ out = out + pf(a, width=160)
+ fun(out)
+
+# Get the module logger. You can override log levels at the
+# module level
+
+
+def getLogger(name, level=None):
+ """method to get logger"""
+ gnlogger = GNLogger(name)
+ _logger = gnlogger.logger
+
+ # if level:
+ # logger.setLevel(level)
+ # else:
+ # logger.setLevel(LOG_LEVEL)
+
+ # logger.info("Log level of "+name+" set to "+logging.getLevelName(logger.getEffectiveLevel()))
+ return gnlogger
diff --git a/gn3/utility/species.py b/gn3/utility/species.py
new file mode 100644
index 0000000..0140d41
--- /dev/null
+++ b/gn3/utility/species.py
@@ -0,0 +1,71 @@
+"""module contains species and chromosomes classes"""
+import collections
+
+from flask import g
+
+
+from gn3.utility.logger import getLogger
+logger = getLogger(__name__)
+
+ # pylint: disable=too-few-public-methods
+ # intentionally disabled check for few public methods
+
+class TheSpecies:
+ """class for Species"""
+
+ def __init__(self, dataset=None, species_name=None):
+ if species_name is not None:
+ self.name = species_name
+ self.chromosomes = Chromosomes(species=self.name)
+ else:
+ self.dataset = dataset
+ self.chromosomes = Chromosomes(dataset=self.dataset)
+
+
+
+class IndChromosome:
+ """class for IndChromosome"""
+
+ def __init__(self, name, length):
+ self.name = name
+ self.length = length
+
+ @property
+ def mb_length(self):
+ """Chromosome length in megabases"""
+ return self.length / 1000000
+
+
+
+
+class Chromosomes:
+ """class for Chromosomes"""
+
+ def __init__(self, dataset=None, species=None):
+ self.chromosomes = collections.OrderedDict()
+ if species is not None:
+ query = """
+ Select
+ Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, Species
+ where
+ Chr_Length.SpeciesId = Species.SpeciesId AND
+ Species.Name = '%s'
+ Order by OrderId
+ """ % species.capitalize()
+ else:
+ self.dataset = dataset
+
+ query = """
+ Select
+ Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, InbredSet
+ where
+ Chr_Length.SpeciesId = InbredSet.SpeciesId AND
+ InbredSet.Name = '%s'
+ Order by OrderId
+ """ % self.dataset.group.name
+ logger.sql(query)
+ results = g.db.execute(query).fetchall()
+
+ for item in results:
+ self.chromosomes[item.OrderId] = IndChromosome(
+ item.Name, item.Length)
diff --git a/gn3/utility/tools.py b/gn3/utility/tools.py
new file mode 100644
index 0000000..85df9f6
--- /dev/null
+++ b/gn3/utility/tools.py
@@ -0,0 +1,37 @@
+"""module contains general tools forgenenetwork"""
+
+import os
+
+from default_settings import GENENETWORK_FILES
+
+
+def valid_file(file_name):
+ """check if file is valid"""
+ if os.path.isfile(file_name):
+ return file_name
+ return None
+
+
+def valid_path(dir_name):
+ """check if path is valid"""
+ if os.path.isdir(dir_name):
+ return dir_name
+ return None
+
+
+def locate_ignore_error(name, subdir=None):
+ """
+ Locate a static flat file in the GENENETWORK_FILES environment.
+
+ This function does not throw an error when the file is not found
+ but returns None.
+ """
+ base = GENENETWORK_FILES
+ if subdir:
+ base = base+"/"+subdir
+ if valid_path(base):
+ lookfor = base + "/" + name
+ if valid_file(lookfor):
+ return lookfor
+
+ return None
diff --git a/gn3/utility/webqtlUtil.py b/gn3/utility/webqtlUtil.py
new file mode 100644
index 0000000..1c76410
--- /dev/null
+++ b/gn3/utility/webqtlUtil.py
@@ -0,0 +1,66 @@
+"""
+# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU Affero General Public License
+# as published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the GNU Affero General Public License for more details.
+#
+# This program is available from Source Forge: at GeneNetwork Project
+# (sourceforge.net/projects/genenetwork/).
+#
+# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
+# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
+#
+#
+#
+# This module is used by GeneNetwork project (www.genenetwork.org)
+#
+# Created by GeneNetwork Core Team 2010/08/10
+#
+# Last updated by GeneNetwork Core Team 2010/10/20
+
+# from base import webqtlConfig
+
+# NL, 07/27/2010. moved from webqtlForm.py
+# Dict of Parents and F1 information, In the order of [F1, Mat, Pat]
+
+"""
+ParInfo = {
+ 'BXH': ['BHF1', 'HBF1', 'C57BL/6J', 'C3H/HeJ'],
+ 'AKXD': ['AKF1', 'KAF1', 'AKR/J', 'DBA/2J'],
+ 'BXD': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'],
+ 'C57BL-6JxC57BL-6NJF2': ['', '', 'C57BL/6J', 'C57BL/6NJ'],
+ 'BXD300': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'],
+ 'B6BTBRF2': ['B6BTBRF1', 'BTBRB6F1', 'C57BL/6J', 'BTBRT<+>tf/J'],
+ 'BHHBF2': ['B6HF2', 'HB6F2', 'C57BL/6J', 'C3H/HeJ'],
+ 'BHF2': ['B6HF2', 'HB6F2', 'C57BL/6J', 'C3H/HeJ'],
+ 'B6D2F2': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'],
+ 'BDF2-1999': ['B6D2F2', 'D2B6F2', 'C57BL/6J', 'DBA/2J'],
+ 'BDF2-2005': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'],
+ 'CTB6F2': ['CTB6F2', 'B6CTF2', 'C57BL/6J', 'Castaneous'],
+ 'CXB': ['CBF1', 'BCF1', 'C57BL/6ByJ', 'BALB/cByJ'],
+ 'AXBXA': ['ABF1', 'BAF1', 'C57BL/6J', 'A/J'],
+ 'AXB': ['ABF1', 'BAF1', 'C57BL/6J', 'A/J'],
+ 'BXA': ['BAF1', 'ABF1', 'C57BL/6J', 'A/J'],
+ 'LXS': ['LSF1', 'SLF1', 'ISS', 'ILS'],
+ 'HXBBXH': ['SHR_BNF1', 'BN_SHRF1', 'BN-Lx/Cub', 'SHR/OlaIpcv'],
+ 'BayXSha': ['BayXShaF1', 'ShaXBayF1', 'Bay-0', 'Shahdara'],
+ 'ColXBur': ['ColXBurF1', 'BurXColF1', 'Col-0', 'Bur-0'],
+ 'ColXCvi': ['ColXCviF1', 'CviXColF1', 'Col-0', 'Cvi'],
+ 'SXM': ['SMF1', 'MSF1', 'Steptoe', 'Morex'],
+ 'HRDP': ['SHR_BNF1', 'BN_SHRF1', 'BN-Lx/Cub', 'SHR/OlaIpcv']
+}
+
+
+def has_access_to_confidentail_phenotype_trait(privilege, username, authorized_users):
+ """function to access to confidential phenotype Traits further implementation needed"""
+ access_to_confidential_phenotype_trait = 0
+
+ results = (privilege, username, authorized_users)
+ return access_to_confidential_phenotype_trait