From 236ca06dc4c84baecb7b090b8724db997a5d988a Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Sat, 13 Mar 2021 13:04:33 +0300
Subject: Correlation api (#2)

* add file for correlation api

* register initial correlation api

* add correlation package

* add function  for getting page data

* delete loading page api

* modify code for correlation

* add tests folder for correlations

* fix error in correlation api

* add tests for correlation

* add tests for  correlation loading data

* add module for correlation computations

* modify api to return json when computing correlation

* add tests for computing correlation

* modify code for loading correlation data

* modify tests for correlation computation

* test loading correlation data using api endpoint

* add tests for asserting error in creating Correlation object

* add do correlation method

* add dummy tests for do_correlation method

* delete unused modules

* add tests for creating trait and dataset

* add intergration test for correlation api

* add tests for correlation api

* edit docorrelation method

* modify integration tests for correlation api

* modify tests for show_corr_results

* add create dataset function

* pep8 formatting and fix return value for api

* add more test data for doing correlation

* modify tests for correlation

* pep8 formatting

* add getting formatted corr type method

* import json library

add process samples method for correlation

* fix issue with sample_vals key_error

* create utility module for correlation

* refactor endpoint for /corr_compute

* add test and mocks for compute_correlation function

* add compute correlation function  and pep8 formatting

* move get genofile samplelist to utility module

* refactor code for CorrelationResults object

* pep8 formatting for module

* remove CorrelationResults from Api

* add base package

initialize data_set module with create_dataset,redis and Dataset_Getter

* set dataset_structure if redis is empty

* add callable for DatsetType

* add set_dataset_key method If name is not in the object's dataset dictionary

* add Dataset object and MrnaAssayDataSet

* add db_tools

* add mysql client

* add DatasetGroup object

* add species module

* get mapping method

* import helper functions and new dataset

* add connection to db before request

* add helper functions

* add logger module

* add get_group_samplelists module

* add logger for debug

* add code for adding sample_data

* pep8 formatting

* Add chunks module

* add correlation helper module

* add  get_sample_r_and_p_values method

add get_header_fields function

* add generate corr json method

* add function to retrieve_trait_info

* remove comments and clean up code in show_corr_results

* remove comments and clean up code for data_set module

* pep8 formatting for helper_functions module

* pep8 formatting for trait module

* add module for species

* add Temp Dataset Object

* add Phenotype Dataset

* add Genotype Dataset

* add rettrieve sample_sample_data method

* add webqtlUtil module

* add do lit correlation for all traits

* add webqtlCaseData:Settings not ported

* return the_trait for create trait method

* add correlation_test json data

* add tests fore show corr results

* add dictfier package

* add tests for show_corr_results

* add assertion for trait_id

* refactor code for show_corr_results

* add test file for compute_corr intergration tests

* add scipy dependency

* refactor show_corr_results object

add do lit correlation for trait_list

* add hmac module

* add bunch module:Dictionary using object notation

* add correlation functions

* add rpy2 dependency

* add hmac module

* add MrnaAssayTissueData object and get_symbol_values_pairs function

* add config module

* add get json_results method

* pep8 formatting remove comments

* add config file

* add db package

* refactor correlatio compuatation module

* add do tissue correlation for trait list

* add  do lit correlation for all traits

* add do tissue correlation for all traits

* add do_bicor for bicor method

* raise error for when initital start vars is None

* add support for both form and json data when for correlation input

* remove print statement and pep8 formatting

* add default settings file

* add tools module for locate_ignore_error

* refactor code remove comments for trait module

* Add new test data for  computing correlation

* pep8 formatting and use pickle

* refactor function for filtering form/json data

* remove unused imports

* remove mock functions in correlation_utility module

* refactor tests for compute correlation and pep8 formatting

* add tests for show_correlation results

* modify tests for show_corr_results

* add json files for tests

* pep8 formatting for show_corr_results

* Todo:Lint base files

* pylint for intergration tests

* add test module for test_corr_helpers

* Add test chunk module

* lint utility package

* refactoring and pep8 formatting

* implement simple metric for correlation

* add  hmac utility file

* add correlation prefix

* fix merge conflict

* minor fixes for endpoints

* import:python-scipy,python-sqlalchemy from guix

* add python mysqlclient

* remove pkg-resources from requirements

* add python-rpy3 from guix

* refactor code for species module

* pep8 formatting and refactor code

* add tests for genereating correlation results

* lint correlation functions

* fix failing tests for show_corr_results

* add new correlation test data fix errors

* fix issues related to getting group samplelists

* refactor intergration tests for correlation

* add todo  for refactoring_wanted_inputs

* replace custom Attribute setter with SimpleNamespace

* comparison of sample r correlation results btwn genenenetwork2 and genenetwork3

* delete AttributeSetter

* test request for /api/correlation/compute_correlation took 18.55710196495056 Seconds

* refactor tests and show_correlation results

* remove  unneccessary comments and print statements

* edit requirement txt file

* api/correlation took 114.29814600944519 Seconds for correlation resullts:20000

 - corr-type:lit

- corr-method:pearson

corr-dataset:corr_dataset:HC_M2_0606_P

* capture SQL_URI and GENENETWORK FILES path

* pep8 formatting edit && remove print statements

* delete filter_input function

update test and data for correlation

* add docstring for required correlation_input

* /api/correlation took 12.905632972717285 Seconds

 *  pearson

 * lit

 *dataset:HX_M2_0606_P

trait_id :1444666

p_range:(lower->-0.60,uppper->0.74)

corr_return_results: 100

* update integration and unittest for correlation

* add simple markdown docs for correlation

* update docs

* add tests and catch for invalid correlation_input

* minor fix for api

* Remove jupyter from deps

* guix.scm: Remove duplicate entry

* guix.scm: Add extra action items as comments

* Trim requirements.txt file

Co-authored-by: BonfaceKilz <me@bonfacemunyoki.com>---
 gn3/base/mrna_assay_tissue_data.py | 94 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)
 create mode 100644 gn3/base/mrna_assay_tissue_data.py

(limited to 'gn3/base/mrna_assay_tissue_data.py')

diff --git a/gn3/base/mrna_assay_tissue_data.py b/gn3/base/mrna_assay_tissue_data.py
new file mode 100644
index 0000000..0f51ade
--- /dev/null
+++ b/gn3/base/mrna_assay_tissue_data.py
@@ -0,0 +1,94 @@
+
+# pylint: disable-all
+import collections
+
+from flask import g
+
+from gn3.utility.db_tools import create_in_clause
+from gn3.utility.db_tools import escape
+from gn3.utility.bunch import Bunch
+
+
+# from utility.logger import getLogger
+# logger = getLogger(__name__ )
+
+class MrnaAssayTissueData(object):
+
+    def __init__(self, gene_symbols=None):
+        self.gene_symbols = gene_symbols
+        if self.gene_symbols == None:
+            self.gene_symbols = []
+
+        self.data = collections.defaultdict(Bunch)
+
+        query = '''select t.Symbol, t.GeneId, t.DataId, t.Chr, t.Mb, t.description, t.Probe_Target_Description
+                        from (
+                        select Symbol, max(Mean) as maxmean
+                        from TissueProbeSetXRef
+                        where TissueProbeSetFreezeId=1 and '''
+
+        # Note that inner join is necessary in this query to get distinct record in one symbol group
+        # with highest mean value
+        # Due to the limit size of TissueProbeSetFreezeId table in DB,
+        # performance of inner join is acceptable.MrnaAssayTissueData(gene_symbols=symbol_list)
+        if len(gene_symbols) == 0:
+            query += '''Symbol!='' and Symbol Is Not Null group by Symbol)
+                as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
+                and t.Mean = x.maxmean;
+                    '''
+        else:
+            in_clause = create_in_clause(gene_symbols)
+
+            # ZS: This was in the query, not sure why: http://docs.python.org/2/library/string.html?highlight=lower#string.lower
+            query += ''' Symbol in {} group by Symbol)
+                as x inner join TissueProbeSetXRef as t on t.Symbol = x.Symbol
+                and t.Mean = x.maxmean;
+                    '''.format(in_clause)
+
+        results = g.db.execute(query).fetchall()
+
+        lower_symbols = []
+        for gene_symbol in gene_symbols:
+            if gene_symbol != None:
+                lower_symbols.append(gene_symbol.lower())
+
+        for result in results:
+            symbol = result[0]
+            if symbol.lower() in lower_symbols:
+                symbol = symbol.lower()
+
+                self.data[symbol].gene_id = result.GeneId
+                self.data[symbol].data_id = result.DataId
+                self.data[symbol].chr = result.Chr
+                self.data[symbol].mb = result.Mb
+                self.data[symbol].description = result.description
+                self.data[symbol].probe_target_description = result.Probe_Target_Description
+
+    ###########################################################################
+    # Input: cursor, symbolList (list), dataIdDict(Dict)
+    # output: symbolValuepairDict (dictionary):one dictionary of Symbol and Value Pair,
+    #        key is symbol, value is one list of expression values of one probeSet;
+    # function: get one dictionary whose key is gene symbol and value is tissue expression data (list type).
+    # Attention! All keys are lower case!
+    ###########################################################################
+
+    def get_symbol_values_pairs(self):
+        id_list = [self.data[symbol].data_id for symbol in self.data]
+
+        symbol_values_dict = {}
+
+        if len(id_list) > 0:
+            query = """SELECT TissueProbeSetXRef.Symbol, TissueProbeSetData.value
+                       FROM TissueProbeSetXRef, TissueProbeSetData
+                       WHERE TissueProbeSetData.Id IN {} and
+                             TissueProbeSetXRef.DataId = TissueProbeSetData.Id""".format(create_in_clause(id_list))
+
+            results = g.db.execute(query).fetchall()
+            for result in results:
+                if result.Symbol.lower() not in symbol_values_dict:
+                    symbol_values_dict[result.Symbol.lower()] = [result.value]
+                else:
+                    symbol_values_dict[result.Symbol.lower()].append(
+                        result.value)
+
+        return symbol_values_dict
-- 
cgit v1.2.3