aboutsummaryrefslogtreecommitdiff
path: root/gn3/utility
diff options
context:
space:
mode:
authorAlexander Kabui2021-03-16 11:38:13 +0300
committerGitHub2021-03-16 11:38:13 +0300
commit56ce88ad31dec3cece63e9370ca4e4c02139753b (patch)
tree766504dfaca75a14cc91fc3d88c41d1e775d415f /gn3/utility
parent43d1bb7f6cd2b5890d5b3eb7c357caafda25a35c (diff)
downloadgenenetwork3-56ce88ad31dec3cece63e9370ca4e4c02139753b.tar.gz
delete unwanted correlation stuff (#5)
* delete unwanted correlation stuff * Refactor/clean up correlations (#4) * initial commit for Refactor/clean-up-correlation * add python scipy dependency * initial commit for sample correlation * initial commit for sample correlation endpoint * initial commit for integration and unittest * initial commit for registering correlation blueprint * add and modify unittest and integration tests for correlation * Add compute compute_all_sample_corr method for correlation * add scipy to requirement txt file * add tissue correlation for trait list * add unittest for tissue correlation * add lit correlation for trait list * add unittests for lit correlation for trait list * modify lit correlarion for trait list * add unittests for lit correlation for trait list * add correlation metho in dynamic url * add file format for expected structure input while doing sample correlation * modify input data structure -> add trait id * update tests for sample r correlation * add compute all lit correlation method * add endpoint for computing lit_corr * add unit and integration tests for computing lit corr * add /api/correlation/tissue_corr/{corr_method} endpoint for tissue correlation * add unittest and integration tests for tissue correlation Co-authored-by: BonfaceKilz <bonfacemunyoki@gmail.com> * update guix scm file * fix pylint error for correlations api Co-authored-by: BonfaceKilz <bonfacemunyoki@gmail.com>
Diffstat (limited to 'gn3/utility')
-rw-r--r--gn3/utility/__init__.py0
-rw-r--r--gn3/utility/bunch.py16
-rw-r--r--gn3/utility/chunks.py32
-rw-r--r--gn3/utility/corr_result_helpers.py45
-rw-r--r--gn3/utility/db_tools.py19
-rw-r--r--gn3/utility/get_group_samplelists.py47
-rw-r--r--gn3/utility/helper_functions.py24
-rw-r--r--gn3/utility/hmac.py50
-rw-r--r--gn3/utility/logger.py163
-rw-r--r--gn3/utility/species.py71
-rw-r--r--gn3/utility/tools.py37
-rw-r--r--gn3/utility/webqtlUtil.py66
12 files changed, 0 insertions, 570 deletions
diff --git a/gn3/utility/__init__.py b/gn3/utility/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/gn3/utility/__init__.py
+++ /dev/null
diff --git a/gn3/utility/bunch.py b/gn3/utility/bunch.py
deleted file mode 100644
index c1fd907..0000000
--- a/gn3/utility/bunch.py
+++ /dev/null
@@ -1,16 +0,0 @@
-"""module contains Bunch class a dictionary like with object notation """
-
-from pprint import pformat as pf
-
-
-class Bunch:
- """Like a dictionary but using object notation"""
-
- def __init__(self, **kw):
- self.__dict__ = kw
-
- def __repr__(self):
- return pf(self.__dict__)
-
- def __str__(self):
- return self.__class__.__name__
diff --git a/gn3/utility/chunks.py b/gn3/utility/chunks.py
deleted file mode 100644
index fa27a39..0000000
--- a/gn3/utility/chunks.py
+++ /dev/null
@@ -1,32 +0,0 @@
-"""module for chunks functions"""
-
-import math
-
-
-def divide_into_chunks(the_list, number_chunks):
- """Divides a list into approximately number_chunks smaller lists
-
- >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 3)
- [[1, 2, 7], [3, 22, 8], [5, 22, 333]]
- >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 4)
- [[1, 2, 7], [3, 22, 8], [5, 22, 333]]
- >>> divide_into_chunks([1, 2, 7, 3, 22, 8, 5, 22, 333], 5)
- [[1, 2], [7, 3], [22, 8], [5, 22], [333]]
- >>>
-
- """
- length = len(the_list)
-
- if length == 0:
- return [[]]
-
- if length <= number_chunks:
- number_chunks = length
-
- chunksize = int(math.ceil(length / number_chunks))
-
- chunks = []
- for counter in range(0, length, chunksize):
- chunks.append(the_list[counter:counter+chunksize])
-
- return chunks
diff --git a/gn3/utility/corr_result_helpers.py b/gn3/utility/corr_result_helpers.py
deleted file mode 100644
index a68308e..0000000
--- a/gn3/utility/corr_result_helpers.py
+++ /dev/null
@@ -1,45 +0,0 @@
-"""module contains helper function for corr results"""
-
-#pylint:disable=C0103
-#above disable snake_case for variable tod refactor
-def normalize_values(a_values, b_values):
- """
- Trim two lists of values to contain only the values they both share
-
- Given two lists of sample values, trim each list so that it contains
- only the samples that contain a value in both lists. Also returns
- the number of such samples.
-
- >>> normalize_values([2.3, None, None, 3.2, 4.1, 5], [3.4, 7.2, 1.3, None, 6.2, 4.1])
- ([2.3, 4.1, 5], [3.4, 6.2, 4.1], 3)
-
- """
- a_new = []
- b_new = []
- for a, b in zip(a_values, b_values):
- if (a and b is not None):
- a_new.append(a)
- b_new.append(b)
- return a_new, b_new, len(a_new)
-
-
-def common_keys(a_samples, b_samples):
- """
- >>> a = dict(BXD1 = 9.113, BXD2 = 9.825, BXD14 = 8.985, BXD15 = 9.300)
- >>> b = dict(BXD1 = 9.723, BXD3 = 9.825, BXD14 = 9.124, BXD16 = 9.300)
- >>> sorted(common_keys(a, b))
- ['BXD1', 'BXD14']
- """
- return set(a_samples.keys()).intersection(set(b_samples.keys()))
-
-
-def normalize_values_with_samples(a_samples, b_samples):
- """function to normalize values with samples"""
- common_samples = common_keys(a_samples, b_samples)
- a_new = {}
- b_new = {}
- for sample in common_samples:
- a_new[sample] = a_samples[sample]
- b_new[sample] = b_samples[sample]
-
- return a_new, b_new, len(a_new)
diff --git a/gn3/utility/db_tools.py b/gn3/utility/db_tools.py
deleted file mode 100644
index 446acda..0000000
--- a/gn3/utility/db_tools.py
+++ /dev/null
@@ -1,19 +0,0 @@
-"""module for db_tools"""
-from MySQLdb import escape_string as escape_
-
-
-def create_in_clause(items):
- """Create an in clause for mysql"""
- in_clause = ', '.join("'{}'".format(x) for x in mescape(*items))
- in_clause = '( {} )'.format(in_clause)
- return in_clause
-
-
-def mescape(*items):
- """Multiple escape"""
- return [escape_(str(item)).decode('utf8') for item in items]
-
-
-def escape(string_):
- """escape function"""
- return escape_(string_).decode('utf8')
diff --git a/gn3/utility/get_group_samplelists.py b/gn3/utility/get_group_samplelists.py
deleted file mode 100644
index 8fb322a..0000000
--- a/gn3/utility/get_group_samplelists.py
+++ /dev/null
@@ -1,47 +0,0 @@
-
-"""module for group samplelist"""
-import os
-
-#todo close the files after opening
-def get_samplelist(file_type, geno_file):
- """get samplelist function"""
- if file_type == "geno":
- return get_samplelist_from_geno(geno_file)
- elif file_type == "plink":
- return get_samplelist_from_plink(geno_file)
-
-def get_samplelist_from_geno(genofilename):
- if os.path.isfile(genofilename + '.gz'):
- genofilename += '.gz'
- genofile = gzip.open(genofilename)
- else:
- genofile = open(genofilename)
-
- for line in genofile:
- line = line.strip()
- if not line:
- continue
- if line.startswith(("#", "@")):
- continue
- break
-
- headers = line.split("\t")
-
- if headers[3] == "Mb":
- samplelist = headers[4:]
- else:
- samplelist = headers[3:]
- return samplelist
-
-
-
-def get_samplelist_from_plink(genofilename):
- """get samplelist from plink"""
- genofile = open(genofilename)
-
- samplelist = []
- for line in genofile:
- line = line.split(" ")
- samplelist.append(line[1])
-
- return samplelist
diff --git a/gn3/utility/helper_functions.py b/gn3/utility/helper_functions.py
deleted file mode 100644
index f5a8b80..0000000
--- a/gn3/utility/helper_functions.py
+++ /dev/null
@@ -1,24 +0,0 @@
-"""module contains general helper functions """
-from gn3.base.data_set import create_dataset
-from gn3.base.trait import create_trait
-from gn3.base.species import TheSpecies
-
-
-def get_species_dataset_trait(self, start_vars):
- """function to get species dataset and trait"""
- if "temp_trait" in list(start_vars.keys()):
- if start_vars['temp_trait'] == "True":
- self.dataset = create_dataset(
- dataset_name="Temp", dataset_type="Temp", group_name=start_vars['group'])
-
- else:
- self.dataset = create_dataset(start_vars['dataset'])
-
- else:
- self.dataset = create_dataset(start_vars['dataset'])
- self.species = TheSpecies(dataset=self.dataset)
-
- self.this_trait = create_trait(dataset=self.dataset,
- name=start_vars['trait_id'],
- cellid=None,
- get_qtl_info=True)
diff --git a/gn3/utility/hmac.py b/gn3/utility/hmac.py
deleted file mode 100644
index eb39e59..0000000
--- a/gn3/utility/hmac.py
+++ /dev/null
@@ -1,50 +0,0 @@
-"""module for hmac """
-
-# pylint: disable-all
-import hmac
-import hashlib
-
-# xtodo work on this file
-
-# from main import app
-
-
-def hmac_creation(stringy):
- """Helper function to create the actual hmac"""
-
- # secret = app.config['SECRET_HMAC_CODE']
- # put in config
- secret = "my secret"
- hmaced = hmac.new(bytearray(secret, "latin-1"),
- bytearray(stringy, "utf-8"),
- hashlib.sha1)
- hm = hmaced.hexdigest()
- # ZS: Leaving the below comment here to ask Pjotr about
- # "Conventional wisdom is that you don't lose much in terms of security if you throw away up to half of the output."
- # http://www.w3.org/QA/2009/07/hmac_truncation_in_xml_signatu.html
- hm = hm[:20]
- return hm
-
-
-def data_hmac(stringy):
- """Takes arbitrary data string and appends :hmac so we know data hasn't been tampered with"""
- return stringy + ":" + hmac_creation(stringy)
-
-
-def url_for_hmac(endpoint, **values):
- """Like url_for but adds an hmac at the end to insure the url hasn't been tampered with"""
-
- url = url_for(endpoint, **values)
-
- hm = hmac_creation(url)
- if '?' in url:
- combiner = "&"
- else:
- combiner = "?"
- return url + combiner + "hm=" + hm
-
-
-
-# todo
-# app.jinja_env.globals.update(url_for_hmac=url_for_hmac,
-# data_hmac=data_hmac)
diff --git a/gn3/utility/logger.py b/gn3/utility/logger.py
deleted file mode 100644
index 4245a02..0000000
--- a/gn3/utility/logger.py
+++ /dev/null
@@ -1,163 +0,0 @@
-"""
-# GeneNetwork logger
-#
-# The standard python logging module is very good. This logger adds a
-# few facilities on top of that. Main one being that it picks up
-# settings for log levels (global and by module) and (potentially)
-# offers some fine grained log levels for the standard levels.
-#
-# All behaviour is defined here. Global settings (defined in
-# default_settings.py).
-#
-# To use logging and settings put this at the top of a module:
-#
-# import utility.logger
-# logger = utility.logger.getLogger(__name__ )
-#
-# To override global behaviour set the LOG_LEVEL in default_settings.py
-# or use an environment variable, e.g.
-#
-# env LOG_LEVEL=INFO ./bin/genenetwork2
-#
-# To override log level for a module replace that with, for example,
-#
-# import logging
-# import utility.logger
-# logger = utility.logger.getLogger(__name__,level=logging.DEBUG)
-#
-# We'll add more overrides soon.
-"""
-# todo incomplete file
-
-# pylint: disable-all
-import logging
-import datetime
-from inspect import isfunction
-from inspect import stack
-
-from pprint import pformat as pf
-
-
-# from utility.tools import LOG_LEVEL, LOG_LEVEL_DEBUG, LOG_SQL
-
-LOG_SQL = True
-
-
-class GNLogger:
- """A logger class with some additional functionality, such as
- multiple parameter logging, SQL logging, timing, colors, and lazy
- functions.
-
- """
-
- def __init__(self, name):
- self.logger = logging.getLogger(name)
-
- def setLevel(self, value):
- """Set the undelying log level"""
- self.logger.setLevel(value)
-
- def debug(self, *args):
- """Call logging.debug for multiple args. Use (lazy) debugf and
-level=num to filter on LOG_LEVEL_DEBUG.
-
- """
- self.collect(self.logger.debug, *args)
-
- def debug20(self, *args):
- """Call logging.debug for multiple args. Use level=num to filter on
-LOG_LEVEL_DEBUG (NYI).
-
- """
- if level <= LOG_LEVEL_DEBUG:
- if self.logger.getEffectiveLevel() < 20:
- self.collect(self.logger.debug, *args)
-
- def info(self, *args):
- """Call logging.info for multiple args"""
- self.collect(self.logger.info, *args)
-
- def warning(self, *args):
- """Call logging.warning for multiple args"""
- self.collect(self.logger.warning, *args)
- # self.logger.warning(self.collect(*args))
-
- def error(self, *args):
- """Call logging.error for multiple args"""
- now = datetime.datetime.utcnow()
- time_str = now.strftime('%H:%M:%S UTC %Y%m%d')
- l = [time_str]+list(args)
- self.collect(self.logger.error, *l)
-
- def infof(self, *args):
- """Call logging.info for multiple args lazily"""
- # only evaluate function when logging
- if self.logger.getEffectiveLevel() < 30:
- self.collectf(self.logger.debug, *args)
-
- def debugf(self, level=0, *args):
- """Call logging.debug for multiple args lazily and handle
- LOG_LEVEL_DEBUG correctly
-
- """
- # only evaluate function when logging
- if level <= LOG_LEVEL_DEBUG:
- if self.logger.getEffectiveLevel() < 20:
- self.collectf(self.logger.debug, *args)
-
- def sql(self, sqlcommand, fun=None):
- """Log SQL command, optionally invoking a timed fun"""
- if LOG_SQL:
- caller = stack()[1][3]
- if caller in ['fetchone', 'fetch1', 'fetchall']:
- caller = stack()[2][3]
- self.info(caller, sqlcommand)
- if fun:
- result = fun(sqlcommand)
- if LOG_SQL:
- self.info(result)
- return result
-
- def collect(self, fun, *args):
- """Collect arguments and use fun to output"""
- out = "."+stack()[2][3]
- for a in args:
- if len(out) > 1:
- out += ": "
- if isinstance(a, str):
- out = out + a
- else:
- out = out + pf(a, width=160)
- fun(out)
-
- def collectf(self, fun, *args):
- """Collect arguments and use fun to output one by one"""
- out = "."+stack()[2][3]
- for a in args:
- if len(out) > 1:
- out += ": "
- if isfunction(a):
- out += a()
- else:
- if isinstance(a, str):
- out = out + a
- else:
- out = out + pf(a, width=160)
- fun(out)
-
-# Get the module logger. You can override log levels at the
-# module level
-
-
-def getLogger(name, level=None):
- """method to get logger"""
- gnlogger = GNLogger(name)
- _logger = gnlogger.logger
-
- # if level:
- # logger.setLevel(level)
- # else:
- # logger.setLevel(LOG_LEVEL)
-
- # logger.info("Log level of "+name+" set to "+logging.getLevelName(logger.getEffectiveLevel()))
- return gnlogger
diff --git a/gn3/utility/species.py b/gn3/utility/species.py
deleted file mode 100644
index 0140d41..0000000
--- a/gn3/utility/species.py
+++ /dev/null
@@ -1,71 +0,0 @@
-"""module contains species and chromosomes classes"""
-import collections
-
-from flask import g
-
-
-from gn3.utility.logger import getLogger
-logger = getLogger(__name__)
-
- # pylint: disable=too-few-public-methods
- # intentionally disabled check for few public methods
-
-class TheSpecies:
- """class for Species"""
-
- def __init__(self, dataset=None, species_name=None):
- if species_name is not None:
- self.name = species_name
- self.chromosomes = Chromosomes(species=self.name)
- else:
- self.dataset = dataset
- self.chromosomes = Chromosomes(dataset=self.dataset)
-
-
-
-class IndChromosome:
- """class for IndChromosome"""
-
- def __init__(self, name, length):
- self.name = name
- self.length = length
-
- @property
- def mb_length(self):
- """Chromosome length in megabases"""
- return self.length / 1000000
-
-
-
-
-class Chromosomes:
- """class for Chromosomes"""
-
- def __init__(self, dataset=None, species=None):
- self.chromosomes = collections.OrderedDict()
- if species is not None:
- query = """
- Select
- Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, Species
- where
- Chr_Length.SpeciesId = Species.SpeciesId AND
- Species.Name = '%s'
- Order by OrderId
- """ % species.capitalize()
- else:
- self.dataset = dataset
-
- query = """
- Select
- Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, InbredSet
- where
- Chr_Length.SpeciesId = InbredSet.SpeciesId AND
- InbredSet.Name = '%s'
- Order by OrderId
- """ % self.dataset.group.name
- logger.sql(query)
- results = g.db.execute(query).fetchall()
-
- for item in results:
- self.chromosomes[item.OrderId] = IndChromosome(
- item.Name, item.Length)
diff --git a/gn3/utility/tools.py b/gn3/utility/tools.py
deleted file mode 100644
index 85df9f6..0000000
--- a/gn3/utility/tools.py
+++ /dev/null
@@ -1,37 +0,0 @@
-"""module contains general tools forgenenetwork"""
-
-import os
-
-from default_settings import GENENETWORK_FILES
-
-
-def valid_file(file_name):
- """check if file is valid"""
- if os.path.isfile(file_name):
- return file_name
- return None
-
-
-def valid_path(dir_name):
- """check if path is valid"""
- if os.path.isdir(dir_name):
- return dir_name
- return None
-
-
-def locate_ignore_error(name, subdir=None):
- """
- Locate a static flat file in the GENENETWORK_FILES environment.
-
- This function does not throw an error when the file is not found
- but returns None.
- """
- base = GENENETWORK_FILES
- if subdir:
- base = base+"/"+subdir
- if valid_path(base):
- lookfor = base + "/" + name
- if valid_file(lookfor):
- return lookfor
-
- return None
diff --git a/gn3/utility/webqtlUtil.py b/gn3/utility/webqtlUtil.py
deleted file mode 100644
index 1c76410..0000000
--- a/gn3/utility/webqtlUtil.py
+++ /dev/null
@@ -1,66 +0,0 @@
-"""
-# Copyright (C) University of Tennessee Health Science Center, Memphis, TN.
-#
-# This program is free software: you can redistribute it and/or modify it
-# under the terms of the GNU Affero General Public License
-# as published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the GNU Affero General Public License for more details.
-#
-# This program is available from Source Forge: at GeneNetwork Project
-# (sourceforge.net/projects/genenetwork/).
-#
-# Contact Drs. Robert W. Williams and Xiaodong Zhou (2010)
-# at rwilliams@uthsc.edu and xzhou15@uthsc.edu
-#
-#
-#
-# This module is used by GeneNetwork project (www.genenetwork.org)
-#
-# Created by GeneNetwork Core Team 2010/08/10
-#
-# Last updated by GeneNetwork Core Team 2010/10/20
-
-# from base import webqtlConfig
-
-# NL, 07/27/2010. moved from webqtlForm.py
-# Dict of Parents and F1 information, In the order of [F1, Mat, Pat]
-
-"""
-ParInfo = {
- 'BXH': ['BHF1', 'HBF1', 'C57BL/6J', 'C3H/HeJ'],
- 'AKXD': ['AKF1', 'KAF1', 'AKR/J', 'DBA/2J'],
- 'BXD': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'],
- 'C57BL-6JxC57BL-6NJF2': ['', '', 'C57BL/6J', 'C57BL/6NJ'],
- 'BXD300': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'],
- 'B6BTBRF2': ['B6BTBRF1', 'BTBRB6F1', 'C57BL/6J', 'BTBRT<+>tf/J'],
- 'BHHBF2': ['B6HF2', 'HB6F2', 'C57BL/6J', 'C3H/HeJ'],
- 'BHF2': ['B6HF2', 'HB6F2', 'C57BL/6J', 'C3H/HeJ'],
- 'B6D2F2': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'],
- 'BDF2-1999': ['B6D2F2', 'D2B6F2', 'C57BL/6J', 'DBA/2J'],
- 'BDF2-2005': ['B6D2F1', 'D2B6F1', 'C57BL/6J', 'DBA/2J'],
- 'CTB6F2': ['CTB6F2', 'B6CTF2', 'C57BL/6J', 'Castaneous'],
- 'CXB': ['CBF1', 'BCF1', 'C57BL/6ByJ', 'BALB/cByJ'],
- 'AXBXA': ['ABF1', 'BAF1', 'C57BL/6J', 'A/J'],
- 'AXB': ['ABF1', 'BAF1', 'C57BL/6J', 'A/J'],
- 'BXA': ['BAF1', 'ABF1', 'C57BL/6J', 'A/J'],
- 'LXS': ['LSF1', 'SLF1', 'ISS', 'ILS'],
- 'HXBBXH': ['SHR_BNF1', 'BN_SHRF1', 'BN-Lx/Cub', 'SHR/OlaIpcv'],
- 'BayXSha': ['BayXShaF1', 'ShaXBayF1', 'Bay-0', 'Shahdara'],
- 'ColXBur': ['ColXBurF1', 'BurXColF1', 'Col-0', 'Bur-0'],
- 'ColXCvi': ['ColXCviF1', 'CviXColF1', 'Col-0', 'Cvi'],
- 'SXM': ['SMF1', 'MSF1', 'Steptoe', 'Morex'],
- 'HRDP': ['SHR_BNF1', 'BN_SHRF1', 'BN-Lx/Cub', 'SHR/OlaIpcv']
-}
-
-
-def has_access_to_confidentail_phenotype_trait(privilege, username, authorized_users):
- """function to access to confidential phenotype Traits further implementation needed"""
- access_to_confidential_phenotype_trait = 0
-
- results = (privilege, username, authorized_users)
- return access_to_confidential_phenotype_trait