about summary refs log tree commit diff
path: root/gn3/utility
diff options
context:
space:
mode:
Diffstat (limited to 'gn3/utility')
-rw-r--r--gn3/utility/__init__.py0
-rw-r--r--gn3/utility/bunch.py16
-rw-r--r--gn3/utility/chunks.py32
-rw-r--r--gn3/utility/corr_result_helpers.py45
-rw-r--r--gn3/utility/db_tools.py19
-rw-r--r--gn3/utility/get_group_samplelists.py47
-rw-r--r--gn3/utility/helper_functions.py24
-rw-r--r--gn3/utility/hmac.py50
-rw-r--r--gn3/utility/logger.py163
-rw-r--r--gn3/utility/species.py71
-rw-r--r--gn3/utility/tools.py37
-rw-r--r--gn3/utility/webqtlUtil.py66
12 files changed, 570 insertions, 0 deletions
diff --git a/gn3/utility/__init__.py b/gn3/utility/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/gn3/utility/__init__.py
diff --git a/gn3/utility/bunch.py b/gn3/utility/bunch.py
new file mode 100644
index 0000000..c1fd907
--- /dev/null
+++ b/gn3/utility/bunch.py
@@ -0,0 +1,16 @@
+"""module contains Bunch class a dictionary like with object notation """
+
+from pprint import pformat as pf
+
+
+class Bunch:
+    """Like a dictionary but using object notation"""
+
+    def __init__(self, **kw):
+        self.__dict__ = kw
+
+    def __repr__(self):
+        return pf(self.__dict__)
+
+    def __str__(self):
+        return self.__class__.__name__
diff --git a/gn3/utility/chunks.py b/gn3/utility/chunks.py
new file mode 100644
index 0000000..fa27a39
--- /dev/null
+++ b/gn3/utility/chunks.py
@@ -0,0 +1,32 @@
+"""module for chunks functions"""
+
+import math
+
+
+def divide_into_chunks(the_list, number_chunks):
+    """Divides a list into approximately number_chunks smaller lists
+
+    >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 3)
+    [[1, 2, 7], [3, 22, 8], [5, 22, 333]]
+    >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 4)
+    [[1, 2, 7], [3, 22, 8], [5, 22, 333]]
+    >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 5)
+    [[1, 2], [7, 3], [22, 8], [5, 22], [333]]
+    >>>
+
+    """
+    length = len(the_list)
+
+    if length == 0:
+        return [[]]
+
+    if length <= number_chunks:
+        number_chunks = length
+
+    chunksize = int(math.ceil(length / number_chunks))
+
+    chunks = []
+    for counter in range(0, length, chunksize):
+        chunks.append(the_list[counter:counter+chunksize])
+
+    return chunks
diff --git a/gn3/utility/corr_result_helpers.py b/gn3/utility/corr_result_helpers.py
new file mode 100644
index 0000000..a68308e
--- /dev/null
+++ b/gn3/utility/corr_result_helpers.py
@@ -0,0 +1,45 @@
+"""module contains helper function for corr results"""
+
+#pylint:disable=C0103
+#above disable snake_case for variable tod refactor
+def normalize_values(a_values, b_values):
+    """
+    Trim two lists of values to contain only the values they both share
+
+    Given two lists of sample values, trim each list so that it contains
+    only the samples that contain a value in both lists. Also returns
+    the number of such samples.
+
+    >>> normalize_values([2.3, None, None, 3.2, 4.1, 5], [3.4, 7.2, 1.3, None, 6.2, 4.1])
+    ([2.3, 4.1, 5], [3.4, 6.2, 4.1], 3)
+
+    """
+    a_new = []
+    b_new = []
+    for a, b in zip(a_values, b_values):
+        if (a and b is not None):
+            a_new.append(a)
+            b_new.append(b)
+    return a_new, b_new, len(a_new)
+
+
+def common_keys(a_samples, b_samples):
+    """
+    >>> a = dict(BXD1 = 9.113, BXD2 = 9.825, BXD14 = 8.985, BXD15 = 9.300)
+    >>> b = dict(BXD1 = 9.723, BXD3 = 9.825, BXD14 = 9.124, BXD16 = 9.300)
+    >>> sorted(common_keys(a, b))
+    ['BXD1', 'BXD14']
+    """
+    return set(a_samples.keys()).intersection(set(b_samples.keys()))
+
+
+def normalize_values_with_samples(a_samples, b_samples):
+    """function to normalize values with samples"""
+    common_samples = common_keys(a_samples, b_samples)
+    a_new = {}
+    b_new = {}
+    for sample in common_samples:
+        a_new[sample] = a_samples[sample]
+        b_new[sample] = b_samples[sample]
+
+    return a_new, b_new, len(a_new)
diff --git a/gn3/utility/db_tools.py b/gn3/utility/db_tools.py
new file mode 100644
index 0000000..446acda
--- /dev/null
+++ b/gn3/utility/db_tools.py
@@ -0,0 +1,19 @@
+"""module for db_tools"""
+from MySQLdb import escape_string as escape_
+
+
+def create_in_clause(items):
+    """Create an in clause for mysql"""
+    in_clause = ', '.join("'{}'".format(x) for x in mescape(*items))
+    in_clause = '( {} )'.format(in_clause)
+    return in_clause
+
+
+def mescape(*items):
+    """Multiple escape"""
+    return [escape_(str(item)).decode('utf8') for item in items]
+
+
+def escape(string_):
+    """escape function"""
+    return escape_(string_).decode('utf8')
diff --git a/gn3/utility/get_group_samplelists.py b/gn3/utility/get_group_samplelists.py
new file mode 100644
index 0000000..8fb322a
--- /dev/null
+++ b/gn3/utility/get_group_samplelists.py
@@ -0,0 +1,47 @@
+
+"""module for group samplelist"""
+import os
+
+#todo close the files after opening
+def get_samplelist(file_type, geno_file):
+    """get samplelist function"""
+    if file_type == "geno":
+        return get_samplelist_from_geno(geno_file)
+    elif file_type == "plink":
+        return get_samplelist_from_plink(geno_file)
+
+def get_samplelist_from_geno(genofilename):
+    if os.path.isfile(genofilename + '.gz'):
+        genofilename += '.gz'
+        genofile = gzip.open(genofilename)
+    else:
+        genofile = open(genofilename)
+
+    for line in genofile:
+        line = line.strip()
+        if not line:
+            continue
+        if line.startswith(("#", "@")):
+            continue
+        break
+
+    headers = line.split("\t")
+
+    if headers[3] == "Mb":
+        samplelist = headers[4:]
+    else:
+        samplelist = headers[3:]
+    return samplelist
+
+
+
+def get_samplelist_from_plink(genofilename):
+    """get samplelist from plink"""
+    genofile = open(genofilename)
+
+    samplelist = []
+    for line in genofile:
+        line = line.split(" ")
+        samplelist.append(line[1])
+
+    return samplelist
diff --git a/gn3/utility/helper_functions.py b/gn3/utility/helper_functions.py
new file mode 100644
index 0000000..f5a8b80
--- /dev/null
+++ b/gn3/utility/helper_functions.py
@@ -0,0 +1,24 @@
+"""module contains general helper functions """
+from gn3.base.data_set import create_dataset
+from gn3.base.trait import create_trait
+from gn3.base.species import TheSpecies
+
+
+def get_species_dataset_trait(self, start_vars):
+    """function to get species dataset and trait"""
+    if "temp_trait" in list(start_vars.keys()):
+        if start_vars['temp_trait'] == "True":
+            self.dataset = create_dataset(
+                dataset_name="Temp", dataset_type="Temp", group_name=start_vars['group'])
+
+        else:
+            self.dataset = create_dataset(start_vars['dataset'])
+
+    else:
+        self.dataset = create_dataset(start_vars['dataset'])
+    self.species = TheSpecies(dataset=self.dataset)
+
+    self.this_trait = create_trait(dataset=self.dataset,
+                                   name=start_vars['trait_id'],
+                                   cellid=None,
+                                   get_qtl_info=True)
diff --git a/gn3/utility/hmac.py b/gn3/utility/hmac.py
new file mode 100644
index 0000000..eb39e59
--- /dev/null
+++ b/gn3/utility/hmac.py
@@ -0,0 +1,50 @@
+"""module for hmac """
+
+# pylint: disable-all
+import hmac
+import hashlib
+
+# xtodo work on this file
+
+# from main import app
+
+
+def hmac_creation(stringy):
+    """Helper function to create the actual hmac"""
+
+    # secret = app.config['SECRET_HMAC_CODE']
+    # put in config
+    secret = "my secret"
+    hmaced = hmac.new(bytearray(secret, "latin-1"),
+                      bytearray(stringy, "utf-8"),
+                      hashlib.sha1)
+    hm = hmaced.hexdigest()
+    # ZS: Leaving the below comment here to ask Pjotr about
+    # "Conventional wisdom is that you don't lose much in terms of security if you throw away up to half of the output."
+    # http://www.w3.org/QA/2009/07/hmac_truncation_in_xml_signatu.html
+    hm = hm[:20]
+    return hm
+
+
+def data_hmac(stringy):
+    """Takes arbitrary data string and appends :hmac so we know data hasn't been tampered with"""
+    return stringy + ":" + hmac_creation(stringy)
+
+
+def url_for_hmac(endpoint, **values):
+    """Like url_for but adds an hmac at the end to insure the url hasn't been tampered with"""
+
+    url = url_for(endpoint, **values)
+
+    hm = hmac_creation(url)
+    if '?' in url:
+        combiner = "&"
+    else:
+        combiner = "?"
+    return url + combiner + "hm=" + hm
+
+
+
+# todo
+# app.jinja_env.globals.update(url_for_hmac=url_for_hmac,
+#                              data_hmac=data_hmac)
diff --git a/gn3/utility/logger.py b/gn3/utility/logger.py
new file mode 100644
index 0000000..4245a02
--- /dev/null
+++ b/gn3/utility/logger.py
@@ -0,0 +1,163 @@
+"""
+# GeneNetwork logger
+#
+# The standard python logging module is very good. This logger adds a
+# few facilities on top of that. Main one being that it picks up
+# settings for log levels (global and by module) and (potentially)
+# offers some fine grained log levels for the standard levels.
+#
+# All behaviour is defined here.  Global settings (defined in
+# default_settings.py).
+#
+# To use logging and settings put this at the top of a module:
+#
+#   import utility.logger
+#   logger = utility.logger.getLogger(__name__ )
+#
+# To override global behaviour set the LOG_LEVEL in default_settings.py
+# or use an environment variable, e.g.
+#
+#    env LOG_LEVEL=INFO ./bin/genenetwork2
+#
+# To override log level for a module replace that with, for example,
+#
+#   import logging
+#   import utility.logger
+#   logger = utility.logger.getLogger(__name__,level=logging.DEBUG)
+#
+# We'll add more overrides soon.
+"""
+# todo incomplete file
+
+# pylint: disable-all
+import logging
+import datetime
+from inspect import isfunction
+from inspect import stack
+
+from pprint import pformat as pf
+
+
+# from utility.tools import LOG_LEVEL, LOG_LEVEL_DEBUG, LOG_SQL
+
+LOG_SQL = True
+
+
+class GNLogger:
+    """A logger class with some additional functionality, such as
+    multiple parameter logging, SQL logging, timing, colors, and lazy
+    functions.
+
+    """
+
+    def __init__(self, name):
+        self.logger = logging.getLogger(name)
+
+    def setLevel(self, value):
+        """Set the undelying log level"""
+        self.logger.setLevel(value)
+
+    def debug(self, *args):
+        """Call logging.debug for multiple args. Use (lazy) debugf and
+level=num to filter on LOG_LEVEL_DEBUG.
+
+        """
+        self.collect(self.logger.debug, *args)
+
+    def debug20(self, *args):
+        """Call logging.debug for multiple args. Use level=num to filter on
+LOG_LEVEL_DEBUG (NYI).
+
+        """
+        if level <= LOG_LEVEL_DEBUG:
+            if self.logger.getEffectiveLevel() < 20:
+                self.collect(self.logger.debug, *args)
+
+    def info(self, *args):
+        """Call logging.info for multiple args"""
+        self.collect(self.logger.info, *args)
+
+    def warning(self, *args):
+        """Call logging.warning for multiple args"""
+        self.collect(self.logger.warning, *args)
+        # self.logger.warning(self.collect(*args))
+
+    def error(self, *args):
+        """Call logging.error for multiple args"""
+        now = datetime.datetime.utcnow()
+        time_str = now.strftime('%H:%M:%S UTC %Y%m%d')
+        l = [time_str]+list(args)
+        self.collect(self.logger.error, *l)
+
+    def infof(self, *args):
+        """Call logging.info for multiple args lazily"""
+        # only evaluate function when logging
+        if self.logger.getEffectiveLevel() < 30:
+            self.collectf(self.logger.debug, *args)
+
+    def debugf(self, level=0, *args):
+        """Call logging.debug for multiple args lazily and handle
+        LOG_LEVEL_DEBUG correctly
+
+        """
+        # only evaluate function when logging
+        if level <= LOG_LEVEL_DEBUG:
+            if self.logger.getEffectiveLevel() < 20:
+                self.collectf(self.logger.debug, *args)
+
+    def sql(self, sqlcommand, fun=None):
+        """Log SQL command, optionally invoking a timed fun"""
+        if LOG_SQL:
+            caller = stack()[1][3]
+            if caller in ['fetchone', 'fetch1', 'fetchall']:
+                caller = stack()[2][3]
+            self.info(caller, sqlcommand)
+        if fun:
+            result = fun(sqlcommand)
+            if LOG_SQL:
+                self.info(result)
+            return result
+
+    def collect(self, fun, *args):
+        """Collect arguments and use fun to output"""
+        out = "."+stack()[2][3]
+        for a in args:
+            if len(out) > 1:
+                out += ": "
+            if isinstance(a, str):
+                out = out + a
+            else:
+                out = out + pf(a, width=160)
+        fun(out)
+
+    def collectf(self, fun, *args):
+        """Collect arguments and use fun to output one by one"""
+        out = "."+stack()[2][3]
+        for a in args:
+            if len(out) > 1:
+                out += ": "
+                if isfunction(a):
+                    out += a()
+                else:
+                    if isinstance(a, str):
+                        out = out + a
+                    else:
+                        out = out + pf(a, width=160)
+        fun(out)
+
+# Get the module logger. You can override log levels at the
+# module level
+
+
+def getLogger(name, level=None):
+    """method to get logger"""
+    gnlogger = GNLogger(name)
+    _logger = gnlogger.logger
+
+    # if level:
+    #     logger.setLevel(level)
+    # else:
+    #     logger.setLevel(LOG_LEVEL)
+
+    # logger.info("Log level of "+name+" set to "+logging.getLevelName(logger.getEffectiveLevel()))
+    return gnlogger
diff --git a/gn3/utility/species.py b/gn3/utility/species.py
new file mode 100644
index 0000000..0140d41
--- /dev/null
+++ b/gn3/utility/species.py
@@ -0,0 +1,71 @@
+"""module contains species and chromosomes classes"""
+import collections
+
+from flask import g
+
+
+from gn3.utility.logger import getLogger
+logger = getLogger(__name__)
+
+ # pylint: disable=too-few-public-methods
+ # intentionally disabled check for few public methods
+
+class TheSpecies:
+    """class for Species"""
+
+    def __init__(self, dataset=None, species_name=None):
+        if species_name is not None:
+            self.name = species_name
+            self.chromosomes = Chromosomes(species=self.name)
+        else:
+            self.dataset = dataset
+            self.chromosomes = Chromosomes(dataset=self.dataset)
+
+
+
+class IndChromosome:
+    """class for IndChromosome"""
+
+    def __init__(self, name, length):
+        self.name = name
+        self.length = length
+
+    @property
+    def mb_length(self):
+        """Chromosome length in megabases"""
+        return self.length / 1000000
+
+
+
+
+class Chromosomes:
+    """class for Chromosomes"""
+
+    def __init__(self, dataset=None, species=None):
+        self.chromosomes = collections.OrderedDict()
+        if species is not None:
+            query = """
+                Select
+                        Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, Species
+                where
+                        Chr_Length.SpeciesId = Species.SpeciesId AND
+                        Species.Name = '%s'
+                Order by OrderId
+                """ % species.capitalize()
+        else:
+            self.dataset = dataset
+
+            query = """
+                Select
+                        Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, InbredSet
+                where
+                        Chr_Length.SpeciesId = InbredSet.SpeciesId AND
+                        InbredSet.Name = '%s'
+                Order by OrderId
+                """ % self.dataset.group.name
+        logger.sql(query)
+        results = g.db.execute(query).fetchall()
+
+        for item in results:
+            self.chromosomes[item.OrderId] = IndChromosome(
+                item.Name, item.Length)
diff --git a/gn3/utility/tools.py b/gn3/utility/tools.py
new file mode 100644
index 0000000..85df9f6
--- /dev/null
+++ b/gn3/utility/tools.py
@@ -0,0 +1,37 @@
+"""module contains general tools forgenenetwork"""
+
+import os
+
+from default_settings import GENENETWORK_FILES
+
+
+def valid_file(file_name):
+    """check if file is valid"""
+    if os.path.isfile(file_name):
+        return file_name
+    return None
+
+
+def valid_path(dir_name):
+    """check if path is valid"""
+    if os.path.isdir(dir_name):
+        return dir_name
+    return None
+
+
+def locate_ignore_error(name, subdir=None):
+    """
+    Locate a static flat file in the GENENETWORK_FILES environment.
+
+    This function does not throw an error when the file is not found
+    but returns None.
+    """
+    base = GENENETWORK_FILES
+    if subdir:
+        base = base+"/"+subdir
+    if valid_path(base):
+        lookfor = base + "/" + name
+        if valid_file(lookfor):
+            return lookfor
+
+    return None
diff --git a/gn3/utility/webqtlUtil.py b/gn3/utility/webqtlUtil.py
new file mode 100644
index 0000000..1c76410
--- /dev/null
+++ b/gn3/utility/webqtlUtil.py
@@ -0,0 +1,66 @@
+"""
+# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU Affero General Public License
+# as published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# See the GNU Affero General Public License for more details.
+#
+# This program is available from Source Forge: at GeneNetwork Project
+# (sourceforge.net/projects/genenetwork/).
+#
+# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
+# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
+#
+#
+#
+# This module is used by GeneNetwork project (www.genenetwork.org)
+#
+# Created by GeneNetwork Core Team 2010/08/10
+#
+# Last updated by GeneNetwork Core Team 2010/10/20
+
+# from base import webqtlConfig
+
+# NL, 07/27/2010. moved from webqtlForm.py
+# Dict of Parents and F1 information, In the order of [F1, Mat, Pat]
+
+"""
+ParInfo = {
+    'BXH': ['BHF1', 'HBF1', 'C57BL/6J', 'C3H/HeJ'],
+    'AKXD': ['AKF1', 'KAF1', 'AKR/J', 'DBA/2J'],
+    'BXD': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'],
+    'C57BL-6JxC57BL-6NJF2': ['', '', 'C57BL/6J', 'C57BL/6NJ'],
+    'BXD300': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'],
+    'B6BTBRF2': ['B6BTBRF1', 'BTBRB6F1', 'C57BL/6J', 'BTBRT<+>tf/J'],
+    'BHHBF2': ['B6HF2', 'HB6F2', 'C57BL/6J', 'C3H/HeJ'],
+    'BHF2': ['B6HF2', 'HB6F2', 'C57BL/6J', 'C3H/HeJ'],
+    'B6D2F2': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'],
+    'BDF2-1999': ['B6D2F2', 'D2B6F2', 'C57BL/6J', 'DBA/2J'],
+    'BDF2-2005': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'],
+    'CTB6F2': ['CTB6F2', 'B6CTF2', 'C57BL/6J', 'Castaneous'],
+    'CXB': ['CBF1', 'BCF1', 'C57BL/6ByJ', 'BALB/cByJ'],
+    'AXBXA': ['ABF1', 'BAF1', 'C57BL/6J', 'A/J'],
+    'AXB': ['ABF1', 'BAF1', 'C57BL/6J', 'A/J'],
+    'BXA': ['BAF1', 'ABF1', 'C57BL/6J', 'A/J'],
+    'LXS': ['LSF1', 'SLF1', 'ISS', 'ILS'],
+    'HXBBXH': ['SHR_BNF1', 'BN_SHRF1', 'BN-Lx/Cub', 'SHR/OlaIpcv'],
+    'BayXSha': ['BayXShaF1', 'ShaXBayF1', 'Bay-0', 'Shahdara'],
+    'ColXBur': ['ColXBurF1', 'BurXColF1', 'Col-0', 'Bur-0'],
+    'ColXCvi': ['ColXCviF1', 'CviXColF1', 'Col-0', 'Cvi'],
+    'SXM': ['SMF1', 'MSF1', 'Steptoe', 'Morex'],
+    'HRDP': ['SHR_BNF1', 'BN_SHRF1', 'BN-Lx/Cub', 'SHR/OlaIpcv']
+}
+
+
+def has_access_to_confidentail_phenotype_trait(privilege, username, authorized_users):
+    """function to access to confidential phenotype Traits  further implementation needed"""
+    access_to_confidential_phenotype_trait = 0
+
+    results = (privilege, username, authorized_users)
+    return access_to_confidential_phenotype_trait