about summary refs log tree commit diff
path: root/gn3/utility/species.py
diff options
context:
space:
mode:
authorAlexander Kabui2021-03-13 13:04:33 +0300
committerGitHub2021-03-13 13:04:33 +0300
commit236ca06dc4c84baecb7b090b8724db997a5d988a (patch)
tree7fce724ae007dacfe3cf0f7511756b6064026ea3 /gn3/utility/species.py
parent7f9a293929be021eb73aec35defe254351557dcb (diff)
downloadgenenetwork3-236ca06dc4c84baecb7b090b8724db997a5d988a.tar.gz
Correlation api (#2)
* add file for correlation api

* register initial correlation api

* add correlation package

* add function  for getting page data

* delete loading page api

* modify code for correlation

* add tests folder for correlations

* fix error in correlation api

* add tests for correlation

* add tests for  correlation loading data

* add module for correlation computations

* modify api to return json when computing correlation

* add tests for computing correlation

* modify code for loading correlation data

* modify tests for correlation computation

* test loading correlation data using api endpoint

* add tests for asserting error in creating Correlation object

* add do correlation method

* add dummy tests for do_correlation method

* delete unused modules

* add tests for creating trait and dataset

* add intergration test for correlation api

* add tests for correlation api

* edit docorrelation method

* modify integration tests for correlation api

* modify tests for show_corr_results

* add create dataset function

* pep8 formatting and fix return value for api

* add more test data for doing correlation

* modify tests for correlation

* pep8 formatting

* add getting formatted corr type method

* import json library

add process samples method for correlation

* fix issue with sample_vals key_error

* create utility module for correlation

* refactor endpoint for /corr_compute

* add test and mocks for compute_correlation function

* add compute correlation function  and pep8 formatting

* move get genofile samplelist to utility module

* refactor code for CorrelationResults object

* pep8 formatting for module

* remove CorrelationResults from Api

* add base package

initialize data_set module with create_dataset,redis and Dataset_Getter

* set dataset_structure if redis is empty

* add callable for DatsetType

* add set_dataset_key method If name is not in the object's dataset dictionary

* add Dataset object and MrnaAssayDataSet

* add db_tools

* add mysql client

* add DatasetGroup object

* add species module

* get mapping method

* import helper functions and new dataset

* add connection to db before request

* add helper functions

* add logger module

* add get_group_samplelists module

* add logger for debug

* add code for adding sample_data

* pep8 formatting

* Add chunks module

* add correlation helper module

* add  get_sample_r_and_p_values method

add get_header_fields function

* add generate corr json method

* add function to retrieve_trait_info

* remove comments and clean up code in show_corr_results

* remove comments and clean up code for data_set module

* pep8 formatting for helper_functions module

* pep8 formatting for trait module

* add module for species

* add Temp Dataset Object

* add Phenotype Dataset

* add Genotype Dataset

* add rettrieve sample_sample_data method

* add webqtlUtil module

* add do lit correlation for all traits

* add webqtlCaseData:Settings not ported

* return the_trait for create trait method

* add correlation_test json data

* add tests fore show corr results

* add dictfier package

* add tests for show_corr_results

* add assertion for trait_id

* refactor code for show_corr_results

* add test file for compute_corr intergration tests

* add scipy dependency

* refactor show_corr_results object

add do lit correlation for trait_list

* add hmac module

* add bunch module:Dictionary using object notation

* add correlation functions

* add rpy2 dependency

* add hmac module

* add MrnaAssayTissueData object and get_symbol_values_pairs function

* add config module

* add get json_results method

* pep8 formatting remove comments

* add config file

* add db package

* refactor correlatio compuatation module

* add do tissue correlation for trait list

* add  do lit correlation for all traits

* add do tissue correlation for all traits

* add do_bicor for bicor method

* raise error for when initital start vars is None

* add support for both form and json data when for correlation input

* remove print statement and pep8 formatting

* add default settings file

* add tools module for locate_ignore_error

* refactor code remove comments for trait module

* Add new test data for  computing correlation

* pep8 formatting and use pickle

* refactor function for filtering form/json data

* remove unused imports

* remove mock functions in correlation_utility module

* refactor tests for compute correlation and pep8 formatting

* add tests for show_correlation results

* modify tests for show_corr_results

* add json files for tests

* pep8 formatting for show_corr_results

* Todo:Lint base files

* pylint for intergration tests

* add test module for test_corr_helpers

* Add test chunk module

* lint utility package

* refactoring and pep8 formatting

* implement simple metric for correlation

* add  hmac utility file

* add correlation prefix

* fix merge conflict

* minor fixes for endpoints

* import:python-scipy,python-sqlalchemy from guix

* add python mysqlclient

* remove pkg-resources from requirements

* add python-rpy3 from guix

* refactor code for species module

* pep8 formatting and refactor code

* add tests for genereating correlation results

* lint correlation functions

* fix failing tests for show_corr_results

* add new correlation test data fix errors

* fix issues related to getting group samplelists

* refactor intergration tests for correlation

* add todo  for refactoring_wanted_inputs

* replace custom Attribute setter with SimpleNamespace

* comparison of sample r correlation results btwn genenenetwork2 and genenetwork3

* delete AttributeSetter

* test request for /api/correlation/compute_correlation took 18.55710196495056 Seconds

* refactor tests and show_correlation results

* remove  unneccessary comments and print statements

* edit requirement txt file

* api/correlation took 114.29814600944519 Seconds for correlation resullts:20000

 - corr-type:lit

- corr-method:pearson

corr-dataset:corr_dataset:HC_M2_0606_P

* capture SQL_URI and GENENETWORK FILES path

* pep8 formatting edit && remove print statements

* delete filter_input function

update test and data for correlation

* add docstring for required correlation_input

* /api/correlation took 12.905632972717285 Seconds

 *  pearson

 * lit

 *dataset:HX_M2_0606_P

trait_id :1444666

p_range:(lower->-0.60,uppper->0.74)

corr_return_results: 100

* update integration and unittest for correlation

* add simple markdown docs for correlation

* update docs

* add tests and catch for invalid correlation_input

* minor fix for api

* Remove jupyter from deps

* guix.scm: Remove duplicate entry

* guix.scm: Add extra action items as comments

* Trim requirements.txt file

Co-authored-by: BonfaceKilz <me@bonfacemunyoki.com>
Diffstat (limited to 'gn3/utility/species.py')
-rw-r--r--gn3/utility/species.py71
1 files changed, 71 insertions, 0 deletions
diff --git a/gn3/utility/species.py b/gn3/utility/species.py
new file mode 100644
index 0000000..0140d41
--- /dev/null
+++ b/gn3/utility/species.py
@@ -0,0 +1,71 @@
+"""module contains species and chromosomes classes"""
+import collections
+
+from flask import g
+
+
+from gn3.utility.logger import getLogger
+logger = getLogger(__name__)
+
+ # pylint: disable=too-few-public-methods
+ # intentionally disabled check for few public methods
+
+class TheSpecies:
+    """class for Species"""
+
+    def __init__(self, dataset=None, species_name=None):
+        if species_name is not None:
+            self.name = species_name
+            self.chromosomes = Chromosomes(species=self.name)
+        else:
+            self.dataset = dataset
+            self.chromosomes = Chromosomes(dataset=self.dataset)
+
+
+
+class IndChromosome:
+    """class for IndChromosome"""
+
+    def __init__(self, name, length):
+        self.name = name
+        self.length = length
+
+    @property
+    def mb_length(self):
+        """Chromosome length in megabases"""
+        return self.length / 1000000
+
+
+
+
+class Chromosomes:
+    """class for Chromosomes"""
+
+    def __init__(self, dataset=None, species=None):
+        self.chromosomes = collections.OrderedDict()
+        if species is not None:
+            query = """
+                Select
+                        Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, Species
+                where
+                        Chr_Length.SpeciesId = Species.SpeciesId AND
+                        Species.Name = '%s'
+                Order by OrderId
+                """ % species.capitalize()
+        else:
+            self.dataset = dataset
+
+            query = """
+                Select
+                        Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, InbredSet
+                where
+                        Chr_Length.SpeciesId = InbredSet.SpeciesId AND
+                        InbredSet.Name = '%s'
+                Order by OrderId
+                """ % self.dataset.group.name
+        logger.sql(query)
+        results = g.db.execute(query).fetchall()
+
+        for item in results:
+            self.chromosomes[item.OrderId] = IndChromosome(
+                item.Name, item.Length)