From f914dc21577253f293b50b890ea0ac4bd2fd5d1b Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 21 Feb 2022 09:07:31 +0300 Subject: Test partial corrs API with mix of existing and non-existing control traits Test that the partial correlations endpoint handles a mix of existing and non-existing control traits gracefully and issues a warning to the user. Summary of changes: * gn3/computations/partial_correlations.py: Issue a warning for all non-existing control traits * gn3/db/partial_correlations.py: update queries - use `INNER JOIN` for tables instead of comma-separated list of tables * tests/integration/conftest.py: Add `db_conn` fixture to provide a database connection to the tests. This will probably be changed in the future to connect to a temporary database for tests. * tests/integration/test_partial_correlations.py: Add test to check for correct behaviour with a mix of existing and non-existing control traits --- gn3/computations/partial_correlations.py | 6 ++++ gn3/db/partial_correlations.py | 18 +++++------ tests/integration/conftest.py | 10 +++++++ tests/integration/test_partial_correlations.py | 41 +++++++++++++++++++++++++- 4 files changed, 65 insertions(+), 10 deletions(-) diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py index 1cc969c..3633a59 100644 --- a/gn3/computations/partial_correlations.py +++ b/gn3/computations/partial_correlations.py @@ -628,6 +628,12 @@ def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911] return { "status": "not-found", "message": "None of the requested control traits were found."} + for trait in cntrl_traits: + if trait["haveinfo"] == False: + warnings.warn( + (f"Control traits {trait['trait_fullname']} was not found " + "- continuing without it."), + category=UserWarning) group = primary_trait["db"]["group"] primary_trait_data = all_traits_data[primary_trait["trait_name"]] diff --git a/gn3/db/partial_correlations.py b/gn3/db/partial_correlations.py index 0931f09..0075cad 100644 --- a/gn3/db/partial_correlations.py +++ b/gn3/db/partial_correlations.py @@ -347,10 +347,11 @@ def probeset_traits_info( "probe_set_note_by_rw", "flag") query = ( "SELECT ProbeSet.Name AS trait_name, {columns} " - "FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef " - "WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id " - "AND ProbeSetXRef.ProbeSetId = ProbeSet.Id " - "AND ProbeSetFreeze.Name IN ({dataset_names}) " + "FROM ProbeSet INNER JOIN ProbeSetXRef " + "ON ProbeSetXRef.ProbeSetId = ProbeSet.Id " + "INNER JOIN ProbeSetFreeze " + "ON ProbeSetFreeze.Id = ProbeSetXRef.ProbeSetFreezeId " + "WHERE ProbeSetFreeze.Name IN ({dataset_names}) " "AND ProbeSet.Name IN ({trait_names})").format( columns=", ".join(["ProbeSet.{}".format(x) for x in keys]), dataset_names=", ".join(["%s"] * len(dataset_names)), @@ -376,11 +377,10 @@ def geno_traits_info( "SELECT " "Geno.Name AS trait_name, {columns} " "FROM " - "Geno, GenoFreeze, GenoXRef " - "WHERE " - "GenoXRef.GenoFreezeId = GenoFreeze.Id AND GenoXRef.GenoId = Geno.Id AND " - "GenoFreeze.Name IN ({dataset_names}) AND " - "Geno.Name IN ({trait_names})").format( + "Geno INNER JOIN GenoXRef ON GenoXRef.GenoId = Geno.Id " + "INNER JOIN GenoFreeze ON GenoFreeze.Id = GenoXRef.GenoFreezeId " + "WHERE GenoFreeze.Name IN ({dataset_names}) " + "AND Geno.Name IN ({trait_names})").format( columns=", ".join(["Geno.{}".format(x) for x in keys]), dataset_names=", ".join(["%s"] * len(dataset_names)), trait_names=", ".join(["%s"] * len(traits))) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index be58d80..e1d1c37 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -1,12 +1,22 @@ +"""Module that holds fixtures for integration tests""" import pytest from gn3.app import create_app +from gn3.db_utils import database_connector @pytest.fixture(scope="session") def client(): + """Create a test client fixture for tests""" # Do some setup app = create_app() app.config.update({"TESTING": True}) app.testing = True yield app.test_client() # Do some teardown/cleanup + + +@pytest.fixture +def db_conn(): + """Create a db connection fixture for tests""" + ## Update this to use temp db once that is in place + return database_connector()[0] diff --git a/tests/integration/test_partial_correlations.py b/tests/integration/test_partial_correlations.py index ff6d771..7f9ff30 100644 --- a/tests/integration/test_partial_correlations.py +++ b/tests/integration/test_partial_correlations.py @@ -1,7 +1,7 @@ """Test partial correlations""" import pytest -from tests.integration.conftest import client +from gn3.computations.partial_correlations import partial_correlations_entry @pytest.mark.integration_test @pytest.mark.parametrize( @@ -176,3 +176,42 @@ def test_partial_correlation_api_with_non_existent_control_traits(client, post_d assert ( response.status_code == 404 and response.is_json and response.json.get("status") != "error") + +@pytest.mark.integration_test +@pytest.mark.slow +@pytest.mark.parametrize( + "primary,controls,method,target", ( + (# Probeset + "UCLA_BXDBXH_CARTILAGE_V2::ILM103710672", ( + "UCLA_BXDBXH_CARTILAGE_V2::nonExisting01", + "UCLA_BXDBXH_CARTILAGE_V2::nonExisting02", + "UCLA_BXDBXH_CARTILAGE_V2::ILM380019"), + "Genetic Correlation, Pearson's r", "BXDPublish"), + (# Publish + "BXDPublish::17937", ( + "BXDPublish::17940", + "BXDPublish::nonExisting03"), + "Genetic Correlation, Spearman's rho", "BXDPublish"), + (# Geno + "AKXDGeno::D4Mit16", ( + "AKXDGeno::D1Mit170", + "AKXDGeno::nonExisting04", + "AKXDGeno::D1Mit135", + "AKXDGeno::nonExisting05", + "AKXDGeno::nonExisting06"), + "SGO Literature Correlation", "BXDPublish") + ) + # Temp -- the data in the database for these is ephemeral, making it + # difficult to test for these without a temp database with the temp + # traits data set to something we are in control of + ) +def test_part_corr_api_with_mix_of_existing_and_non_existing_control_traits( + db_conn, primary, controls, method, target): + """ + Check that calling the function with a mix of existing and missing control + traits raises an warning. + """ + criteria = 10 + with pytest.warns(UserWarning): + partial_correlations_entry( + db_conn, primary, controls, method, criteria, target) -- cgit v1.2.3