delete unwanted correlation stuff (#5)

* delete unwanted correlation stuff * Refactor/clean up correlations (#4) * initial commit for Refactor/clean-up-correlation * add python scipy dependency * initial commit for sample correlation * initial commit for sample correlation endpoint * initial commit for integration and unittest * initial commit for registering correlation blueprint * add and modify unittest and integration tests for correlation * Add compute compute_all_sample_corr method for correlation * add scipy to requirement txt file * add tissue correlation for trait list * add unittest for tissue correlation * add lit correlation for trait list * add unittests for lit correlation for trait list * modify lit correlarion for trait list * add unittests for lit correlation for trait list * add correlation metho in dynamic url * add file format for expected structure input while doing sample correlation * modify input data structure -> add trait id * update tests for sample r correlation * add compute all lit correlation method * add endpoint for computing lit_corr * add unit and integration tests for computing lit corr * add /api/correlation/tissue_corr/{corr_method} endpoint for tissue correlation * add unittest and integration tests for tissue correlation Co-authored-by: BonfaceKilz <bonfacemunyoki@gmail.com> * update guix scm file * fix pylint error for correlations api Co-authored-by: BonfaceKilz <bonfacemunyoki@gmail.com>
author: Alexander Kabui 2021-03-16 11:38:13 +0300
committer: GitHub 2021-03-16 11:38:13 +0300
commit: 56ce88ad31dec3cece63e9370ca4e4c02139753b (patch)
tree: 766504dfaca75a14cc91fc3d88c41d1e775d415f /gn3/utility
parent: 43d1bb7f6cd2b5890d5b3eb7c357caafda25a35c (diff)
download: genenetwork3-56ce88ad31dec3cece63e9370ca4e4c02139753b.tar.gz
12 files changed, 0 insertions, 570 deletions
diff --git a/gn3/utility/__init__.py b/gn3/utility/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/gn3/utility/__init__.py
+++ /dev/null
diff --git a/gn3/utility/bunch.py b/gn3/utility/bunch.py
deleted file mode 100644
index c1fd907..0000000
--- a/gn3/utility/bunch.py
+++ /dev/null
@@ -1,16 +0,0 @@
-"""module contains Bunch class a dictionary like with object notation """
-
-from pprint import pformat as pf
-
-
-class Bunch:
-    """Like a dictionary but using object notation"""
-
-    def __init__(self, **kw):
-        self.__dict__ = kw
-
-    def __repr__(self):
-        return pf(self.__dict__)
-
-    def __str__(self):
-        return self.__class__.__name__
diff --git a/gn3/utility/chunks.py b/gn3/utility/chunks.py
deleted file mode 100644
index fa27a39..0000000
--- a/gn3/utility/chunks.py
+++ /dev/null
@@ -1,32 +0,0 @@
-"""module for chunks functions"""
-
-import math
-
-
-def divide_into_chunks(the_list, number_chunks):
-    """Divides a list into approximately number_chunks smaller lists
-
-    >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 3)
-    [[1, 2, 7], [3, 22, 8], [5, 22, 333]]
-    >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 4)
-    [[1, 2, 7], [3, 22, 8], [5, 22, 333]]
-    >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 5)
-    [[1, 2], [7, 3], [22, 8], [5, 22], [333]]
-    >>>
-
-    """
-    length = len(the_list)
-
-    if length == 0:
-        return [[]]
-
-    if length <= number_chunks:
-        number_chunks = length
-
-    chunksize = int(math.ceil(length / number_chunks))
-
-    chunks = []
-    for counter in range(0, length, chunksize):
-        chunks.append(the_list[counter:counter+chunksize])
-
-    return chunks
diff --git a/gn3/utility/corr_result_helpers.py b/gn3/utility/corr_result_helpers.py
deleted file mode 100644
index a68308e..0000000
--- a/gn3/utility/corr_result_helpers.py
+++ /dev/null
@@ -1,45 +0,0 @@
-"""module contains helper function for corr results"""
-
-#pylint:disable=C0103
-#above disable snake_case for variable tod refactor
-def normalize_values(a_values, b_values):
-    """
-    Trim two lists of values to contain only the values they both share
-
-    Given two lists of sample values, trim each list so that it contains
-    only the samples that contain a value in both lists. Also returns
-    the number of such samples.
-
-    >>> normalize_values([2.3, None, None, 3.2, 4.1, 5], [3.4, 7.2, 1.3, None, 6.2, 4.1])
-    ([2.3, 4.1, 5], [3.4, 6.2, 4.1], 3)
-
-    """
-    a_new = []
-    b_new = []
-    for a, b in zip(a_values, b_values):
-        if (a and b is not None):
-            a_new.append(a)
-            b_new.append(b)
-    return a_new, b_new, len(a_new)
-
-
-def common_keys(a_samples, b_samples):
-    """
-    >>> a = dict(BXD1 = 9.113, BXD2 = 9.825, BXD14 = 8.985, BXD15 = 9.300)
-    >>> b = dict(BXD1 = 9.723, BXD3 = 9.825, BXD14 = 9.124, BXD16 = 9.300)
-    >>> sorted(common_keys(a, b))
-    ['BXD1', 'BXD14']
-    """
-    return set(a_samples.keys()).intersection(set(b_samples.keys()))
-
-
-def normalize_values_with_samples(a_samples, b_samples):
-    """function to normalize values with samples"""
-    common_samples = common_keys(a_samples, b_samples)
-    a_new = {}
-    b_new = {}
-    for sample in common_samples:
-        a_new[sample] = a_samples[sample]
-        b_new[sample] = b_samples[sample]
-
-    return a_new, b_new, len(a_new)
diff --git a/gn3/utility/db_tools.py b/gn3/utility/db_tools.py
deleted file mode 100644
index 446acda..0000000
--- a/gn3/utility/db_tools.py
+++ /dev/null
@@ -1,19 +0,0 @@
-"""module for db_tools"""
-from MySQLdb import escape_string as escape_
-
-
-def create_in_clause(items):
-    """Create an in clause for mysql"""
-    in_clause = ', '.join("'{}'".format(x) for x in mescape(*items))
-    in_clause = '( {} )'.format(in_clause)
-    return in_clause
-
-
-def mescape(*items):
-    """Multiple escape"""
-    return [escape_(str(item)).decode('utf8') for item in items]
-
-
-def escape(string_):
-    """escape function"""
-    return escape_(string_).decode('utf8')
diff --git a/gn3/utility/get_group_samplelists.py b/gn3/utility/get_group_samplelists.py
deleted file mode 100644
index 8fb322a..0000000
--- a/gn3/utility/get_group_samplelists.py
+++ /dev/null
@@ -1,47 +0,0 @@
-
-"""module for group samplelist"""
-import os
-
-#todo close the files after opening
-def get_samplelist(file_type, geno_file):
-    """get samplelist function"""
-    if file_type == "geno":
-        return get_samplelist_from_geno(geno_file)
-    elif file_type == "plink":
-        return get_samplelist_from_plink(geno_file)
-
-def get_samplelist_from_geno(genofilename):
-    if os.path.isfile(genofilename + '.gz'):
-        genofilename += '.gz'
-        genofile = gzip.open(genofilename)
-    else:
-        genofile = open(genofilename)
-
-    for line in genofile:
-        line = line.strip()
-        if not line:
-            continue
-        if line.startswith(("#", "@")):
-            continue
-        break
-
-    headers = line.split("\t")
-
-    if headers[3] == "Mb":
-        samplelist = headers[4:]
-    else:
-        samplelist = headers[3:]
-    return samplelist
-
-
-
-def get_samplelist_from_plink(genofilename):
-    """get samplelist from plink"""
-    genofile = open(genofilename)
-
-    samplelist = []
-    for line in genofile:
-        line = line.split(" ")
-        samplelist.append(line[1])
-
-    return samplelist
diff --git a/gn3/utility/helper_functions.py b/gn3/utility/helper_functions.py
deleted file mode 100644
index f5a8b80..0000000
--- a/gn3/utility/helper_functions.py
+++ /dev/null
@@ -1,24 +0,0 @@
-"""module contains general helper functions """
-from gn3.base.data_set import create_dataset
-from gn3.base.trait import create_trait
-from gn3.base.species import TheSpecies
-
-
-def get_species_dataset_trait(self, start_vars):
-    """function to get species dataset and trait"""
-    if "temp_trait" in list(start_vars.keys()):
-        if start_vars['temp_trait'] == "True":
-            self.dataset = create_dataset(
-                dataset_name="Temp", dataset_type="Temp", group_name=start_vars['group'])
-
-        else:
-            self.dataset = create_dataset(start_vars['dataset'])
-
-    else:
-        self.dataset = create_dataset(start_vars['dataset'])
-    self.species = TheSpecies(dataset=self.dataset)
-
-    self.this_trait = create_trait(dataset=self.dataset,
-                                   name=start_vars['trait_id'],
-                                   cellid=None,
-                                   get_qtl_info=True)
diff --git a/gn3/utility/hmac.py b/gn3/utility/hmac.py
deleted file mode 100644
index eb39e59..0000000
--- a/gn3/utility/hmac.py
+++ /dev/null
@@ -1,50 +0,0 @@
-"""module for hmac """
-
-# pylint: disable-all
-import hmac
-import hashlib
-
-# xtodo work on this file
-
-# from main import app
-
-
-def hmac_creation(stringy):
-    """Helper function to create the actual hmac"""
-
-    # secret = app.config['SECRET_HMAC_CODE']
-    # put in config
-    secret = "my secret"
-    hmaced = hmac.new(bytearray(secret, "latin-1"),
-                      bytearray(stringy, "utf-8"),
-                      hashlib.sha1)
-    hm = hmaced.hexdigest()
-    # ZS: Leaving the below comment here to ask Pjotr about
-    # "Conventional wisdom is that you don't lose much in terms of security if you throw away up to half of the output."
-    # http://www.w3.org/QA/2009/07/hmac_truncation_in_xml_signatu.html
-    hm = hm[:20]
-    return hm
-
-
-def data_hmac(stringy):
-    """Takes arbitrary data string and appends :hmac so we know data hasn't been tampered with"""
-    return stringy + ":" + hmac_creation(stringy)
-
-
-def url_for_hmac(endpoint, **values):
-    """Like url_for but adds an hmac at the end to insure the url hasn't been tampered with"""
-
-    url = url_for(endpoint, **values)
-
-    hm = hmac_creation(url)
-    if '?' in url:
-        combiner = "&"
-    else:
-        combiner = "?"
-    return url + combiner + "hm=" + hm
-
-
-
-# todo
-# app.jinja_env.globals.update(url_for_hmac=url_for_hmac,
-#                              data_hmac=data_hmac)
diff --git a/gn3/utility/logger.py b/gn3/utility/logger.py
deleted file mode 100644
index 4245a02..0000000
--- a/gn3/utility/logger.py
+++ /dev/null
@@ -1,163 +0,0 @@
-"""
-# GeneNetwork logger
-#
-# The standard python logging module is very good. This logger adds a
-# few facilities on top of that. Main one being that it picks up
-# settings for log levels (global and by module) and (potentially)
-# offers some fine grained log levels for the standard levels.
-#
-# All behaviour is defined here.  Global settings (defined in
-# default_settings.py).
-#
-# To use logging and settings put this at the top of a module:
-#
-#   import utility.logger
-#   logger = utility.logger.getLogger(__name__ )
-#
-# To override global behaviour set the LOG_LEVEL in default_settings.py
-# or use an environment variable, e.g.
-#
-#    env LOG_LEVEL=INFO ./bin/genenetwork2
-#
-# To override log level for a module replace that with, for example,
-#
-#   import logging
-#   import utility.logger
-#   logger = utility.logger.getLogger(__name__,level=logging.DEBUG)
-#
-# We'll add more overrides soon.
-"""
-# todo incomplete file
-
-# pylint: disable-all
-import logging
-import datetime
-from inspect import isfunction
-from inspect import stack
-
-from pprint import pformat as pf
-
-
-# from utility.tools import LOG_LEVEL, LOG_LEVEL_DEBUG, LOG_SQL
-
-LOG_SQL = True
-
-
-class GNLogger:
-    """A logger class with some additional functionality, such as
-    multiple parameter logging, SQL logging, timing, colors, and lazy
-    functions.
-
-    """
-
-    def __init__(self, name):
-        self.logger = logging.getLogger(name)
-
-    def setLevel(self, value):
-        """Set the undelying log level"""
-        self.logger.setLevel(value)
-
-    def debug(self, *args):
-        """Call logging.debug for multiple args. Use (lazy) debugf and
-level=num to filter on LOG_LEVEL_DEBUG.
-
-        """
-        self.collect(self.logger.debug, *args)
-
-    def debug20(self, *args):
-        """Call logging.debug for multiple args. Use level=num to filter on
-LOG_LEVEL_DEBUG (NYI).
-
-        """
-        if level <= LOG_LEVEL_DEBUG:
-            if self.logger.getEffectiveLevel() < 20:
-                self.collect(self.logger.debug, *args)
-
-    def info(self, *args):
-        """Call logging.info for multiple args"""
-        self.collect(self.logger.info, *args)
-
-    def warning(self, *args):
-        """Call logging.warning for multiple args"""
-        self.collect(self.logger.warning, *args)
-        # self.logger.warning(self.collect(*args))
-
-    def error(self, *args):
-        """Call logging.error for multiple args"""
-        now = datetime.datetime.utcnow()
-        time_str = now.strftime('%H:%M:%S UTC %Y%m%d')
-        l = [time_str]+list(args)
-        self.collect(self.logger.error, *l)
-
-    def infof(self, *args):
-        """Call logging.info for multiple args lazily"""
-        # only evaluate function when logging
-        if self.logger.getEffectiveLevel() < 30:
-            self.collectf(self.logger.debug, *args)
-
-    def debugf(self, level=0, *args):
-        """Call logging.debug for multiple args lazily and handle
-        LOG_LEVEL_DEBUG correctly
-
-        """
-        # only evaluate function when logging
-        if level <= LOG_LEVEL_DEBUG:
-            if self.logger.getEffectiveLevel() < 20:
-                self.collectf(self.logger.debug, *args)
-
-    def sql(self, sqlcommand, fun=None):
-        """Log SQL command, optionally invoking a timed fun"""
-        if LOG_SQL:
-            caller = stack()[1][3]
-            if caller in ['fetchone', 'fetch1', 'fetchall']:
-                caller = stack()[2][3]
-            self.info(caller, sqlcommand)
-        if fun:
-            result = fun(sqlcommand)
-            if LOG_SQL:
-                self.info(result)
-            return result
-
-    def collect(self, fun, *args):
-        """Collect arguments and use fun to output"""
-        out = "."+stack()[2][3]
-        for a in args:
-            if len(out) > 1:
-                out += ": "
-            if isinstance(a, str):
-                out = out + a
-            else:
-                out = out + pf(a, width=160)
-        fun(out)
-
-    def collectf(self, fun, *args):
-        """Collect arguments and use fun to output one by one"""
-        out = "."+stack()[2][3]
-        for a in args:
-            if len(out) > 1:
-                out += ": "
-                if isfunction(a):
-                    out += a()
-                else:
-                    if isinstance(a, str):
-                        out = out + a
-                    else:
-                        out = out + pf(a, width=160)
-        fun(out)
-
-# Get the module logger. You can override log levels at the
-# module level
-
-
-def getLogger(name, level=None):
-    """method to get logger"""
-    gnlogger = GNLogger(name)
-    _logger = gnlogger.logger
-
-    # if level:
-    #     logger.setLevel(level)
-    # else:
-    #     logger.setLevel(LOG_LEVEL)
-
-    # logger.info("Log level of "+name+" set to "+logging.getLevelName(logger.getEffectiveLevel()))
-    return gnlogger
diff --git a/gn3/utility/species.py b/gn3/utility/species.py
deleted file mode 100644
index 0140d41..0000000
--- a/gn3/utility/species.py
+++ /dev/null
@@ -1,71 +0,0 @@
-"""module contains species and chromosomes classes"""
-import collections
-
-from flask import g
-
-
-from gn3.utility.logger import getLogger
-logger = getLogger(__name__)
-
- # pylint: disable=too-few-public-methods
- # intentionally disabled check for few public methods
-
-class TheSpecies:
-    """class for Species"""
-
-    def __init__(self, dataset=None, species_name=None):
-        if species_name is not None:
-            self.name = species_name
-            self.chromosomes = Chromosomes(species=self.name)
-        else:
-            self.dataset = dataset
-            self.chromosomes = Chromosomes(dataset=self.dataset)
-
-
-
-class IndChromosome:
-    """class for IndChromosome"""
-
-    def __init__(self, name, length):
-        self.name = name
-        self.length = length
-
-    @property
-    def mb_length(self):
-        """Chromosome length in megabases"""
-        return self.length / 1000000
-
-
-
-
-class Chromosomes:
-    """class for Chromosomes"""
-
-    def __init__(self, dataset=None, species=None):
-        self.chromosomes = collections.OrderedDict()
-        if species is not None:
-            query = """
-                Select
-                        Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, Species
-                where
-                        Chr_Length.SpeciesId = Species.SpeciesId AND
-                        Species.Name = '%s'
-                Order by OrderId
-                """ % species.capitalize()
-        else:
-            self.dataset = dataset
-
-            query = """
-                Select
-                        Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, InbredSet
-                where
-                        Chr_Length.SpeciesId = InbredSet.SpeciesId AND
-                        InbredSet.Name = '%s'
-                Order by OrderId
-                """ % self.dataset.group.name
-        logger.sql(query)
-        results = g.db.execute(query).fetchall()
-
-        for item in results:
-            self.chromosomes[item.OrderId] = IndChromosome(
-                item.Name, item.Length)
diff --git a/gn3/utility/tools.py b/gn3/utility/tools.py
deleted file mode 100644
index 85df9f6..0000000
--- a/gn3/utility/tools.py
+++ /dev/null
@@ -1,37 +0,0 @@
-"""module contains general tools forgenenetwork"""
-
-import os
-
-from default_settings import GENENETWORK_FILES
-
-
-def valid_file(file_name):
-    """check if file is valid"""
-    if os.path.isfile(file_name):
-        return file_name
-    return None
-
-
-def valid_path(dir_name):
-    """check if path is valid"""
-    if os.path.isdir(dir_name):
-        return dir_name
-    return None
-
-
-def locate_ignore_error(name, subdir=None):
-    """
-    Locate a static flat file in the GENENETWORK_FILES environment.
-
-    This function does not throw an error when the file is not found
-    but returns None.
-    """
-    base = GENENETWORK_FILES
-    if subdir:
-        base = base+"/"+subdir
-    if valid_path(base):
-        lookfor = base + "/" + name
-        if valid_file(lookfor):
-            return lookfor
-
-    return None
diff --git a/gn3/utility/webqtlUtil.py b/gn3/utility/webqtlUtil.py
deleted file mode 100644
index 1c76410..0000000
--- a/gn3/utility/webqtlUtil.py
+++ /dev/null
@@ -1,66 +0,0 @@
-"""
-# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
-#
-# This program is free software: you can redistribute it and/or modify it
-# under the terms of the GNU Affero General Public License
-# as published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the GNU Affero General Public License for more details.
-#
-# This program is available from Source Forge: at GeneNetwork Project
-# (sourceforge.net/projects/genenetwork/).
-#
-# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
-# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
-#
-#
-#
-# This module is used by GeneNetwork project (www.genenetwork.org)
-#
-# Created by GeneNetwork Core Team 2010/08/10
-#
-# Last updated by GeneNetwork Core Team 2010/10/20
-
-# from base import webqtlConfig
-
-# NL, 07/27/2010. moved from webqtlForm.py
-# Dict of Parents and F1 information, In the order of [F1, Mat, Pat]
-
-"""
-ParInfo = {
-    'BXH': ['BHF1', 'HBF1', 'C57BL/6J', 'C3H/HeJ'],
-    'AKXD': ['AKF1', 'KAF1', 'AKR/J', 'DBA/2J'],
-    'BXD': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'],
-    'C57BL-6JxC57BL-6NJF2': ['', '', 'C57BL/6J', 'C57BL/6NJ'],
-    'BXD300': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'],
-    'B6BTBRF2': ['B6BTBRF1', 'BTBRB6F1', 'C57BL/6J', 'BTBRT<+>tf/J'],
-    'BHHBF2': ['B6HF2', 'HB6F2', 'C57BL/6J', 'C3H/HeJ'],
-    'BHF2': ['B6HF2', 'HB6F2', 'C57BL/6J', 'C3H/HeJ'],
-    'B6D2F2': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'],
-    'BDF2-1999': ['B6D2F2', 'D2B6F2', 'C57BL/6J', 'DBA/2J'],
-    'BDF2-2005': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'],
-    'CTB6F2': ['CTB6F2', 'B6CTF2', 'C57BL/6J', 'Castaneous'],
-    'CXB': ['CBF1', 'BCF1', 'C57BL/6ByJ', 'BALB/cByJ'],
-    'AXBXA': ['ABF1', 'BAF1', 'C57BL/6J', 'A/J'],
-    'AXB': ['ABF1', 'BAF1', 'C57BL/6J', 'A/J'],
-    'BXA': ['BAF1', 'ABF1', 'C57BL/6J', 'A/J'],
-    'LXS': ['LSF1', 'SLF1', 'ISS', 'ILS'],
-    'HXBBXH': ['SHR_BNF1', 'BN_SHRF1', 'BN-Lx/Cub', 'SHR/OlaIpcv'],
-    'BayXSha': ['BayXShaF1', 'ShaXBayF1', 'Bay-0', 'Shahdara'],
-    'ColXBur': ['ColXBurF1', 'BurXColF1', 'Col-0', 'Bur-0'],
-    'ColXCvi': ['ColXCviF1', 'CviXColF1', 'Col-0', 'Cvi'],
-    'SXM': ['SMF1', 'MSF1', 'Steptoe', 'Morex'],
-    'HRDP': ['SHR_BNF1', 'BN_SHRF1', 'BN-Lx/Cub', 'SHR/OlaIpcv']
-}
-
-
-def has_access_to_confidentail_phenotype_trait(privilege, username, authorized_users):
-    """function to access to confidential phenotype Traits  further implementation needed"""
-    access_to_confidential_phenotype_trait = 0
-
-    results = (privilege, username, authorized_users)
-    return access_to_confidential_phenotype_trait
author	Alexander Kabui	2021-03-16 11:38:13 +0300
committer	GitHub	2021-03-16 11:38:13 +0300
commit	56ce88ad31dec3cece63e9370ca4e4c02139753b (patch)
tree	766504dfaca75a14cc91fc3d88c41d1e775d415f /gn3/utility
parent	43d1bb7f6cd2b5890d5b3eb7c357caafda25a35c (diff)
download	genenetwork3-56ce88ad31dec3cece63e9370ca4e4c02139753b.tar.gz