From 83a7aa7533f8f4ecac049dc0e93aff6429e6e5ae Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Fri, 18 Feb 2022 07:17:50 +0300
Subject: Test partial correlations endpoint with non-existent primary traits

Test that the partial correlations endpoint responds with an appropriate
"not-found" message and the corresponding 404 status code in the case where a
request is made and the primary trait requested for does not exist in the
database.

Summary of the changes in each file:
* gn3/api/correlation.py: generalise the building of the response
* gn3/computations/partial_correlations.py: return with a "not-found" if the
  primary trait does not exist in the database
* gn3/db/partial_correlations.py: Fix a number of bugs that led to exceptions
  in the case that the primary trait did not exist
* pytest.ini: register a `slow` pytest marker
* tests/integration/test_partial_correlations.py: Add a new test to check for
  an appropriate 404 response in case of a primary trait that does not exist
  in the database.
---
 gn3/api/correlation.py                         |  26 +++---
 gn3/computations/partial_correlations.py       |   5 ++
 gn3/db/partial_correlations.py                 | 116 ++++++++++++++-----------
 pytest.ini                                     |   1 +
 tests/integration/test_partial_correlations.py |  51 ++++++++++-
 5 files changed, 133 insertions(+), 66 deletions(-)

diff --git a/gn3/api/correlation.py b/gn3/api/correlation.py
index 57b808e..cbe01d8 100644
--- a/gn3/api/correlation.py
+++ b/gn3/api/correlation.py
@@ -118,25 +118,25 @@ def partial_correlation():
                 return str(o, encoding="utf-8")
             return json.JSONEncoder.default(self, o)
 
+    def __build_response__(data):
+        status_codes = {"error": 400, "not-found": 404, "success": 200}
+        response = make_response(
+            json.dumps(data, cls=OutputEncoder),
+            status_codes[data["status"]])
+        response.headers["Content-Type"] = "application/json"
+        return response
+
     args = request.get_json()
     request_errors = __errors__(
         args, ("primary_trait", "control_traits", "target_db", "method"))
     if request_errors:
-        response = make_response(
-            json.dumps({
-                "status": "error",
-                "messages": request_errors,
-                "error_type": "Client Error"}),
-            400)
-        response.headers["Content-Type"] = "application/json"
-        return response
+        return __build_response__({
+            "status": "error",
+            "messages": request_errors,
+            "error_type": "Client Error"})
     conn, _cursor_object = database_connector()
     corr_results = partial_correlations_entry(
         conn, trait_fullname(args["primary_trait"]),
         tuple(trait_fullname(trait) for trait in args["control_traits"]),
         args["method"], int(args.get("criteria", 500)), args["target_db"])
-    response = make_response(
-        json.dumps(corr_results, cls=OutputEncoder),
-        400 if "error" in corr_results.keys() else 200)
-    response.headers["Content-Type"] = "application/json"
-    return response
+    return __build_response__(corr_results)
diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py
index 85e3c11..16cbbdb 100644
--- a/gn3/computations/partial_correlations.py
+++ b/gn3/computations/partial_correlations.py
@@ -616,6 +616,11 @@ def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911]
     primary_trait = tuple(
         trait for trait in all_traits
         if trait["trait_fullname"] == primary_trait_name)[0]
+    if not primary_trait["haveinfo"]:
+        return {
+            "status": "not-found",
+            "message": f"Could not find primary trait {primary_trait['trait_fullname']}"
+        }
     group = primary_trait["db"]["group"]
     primary_trait_data = all_traits_data[primary_trait["trait_name"]]
     primary_samples, primary_values, _primary_variances = export_informative(
diff --git a/gn3/db/partial_correlations.py b/gn3/db/partial_correlations.py
index 157f8ee..3e77367 100644
--- a/gn3/db/partial_correlations.py
+++ b/gn3/db/partial_correlations.py
@@ -62,7 +62,9 @@ def publish_traits_data(conn, traits):
     """
     Retrieve trait data for `Publish` traits.
     """
-    dataset_ids = tuple(set(trait["db"]["dataset_id"] for trait in traits))
+    dataset_ids = tuple(set(
+        trait["db"]["dataset_id"] for trait in traits
+        if trait["db"].get("dataset_id") is not None))
     query = (
         "SELECT "
         "PublishXRef.Id AS trait_name, Strain.Name AS sample_name, "
@@ -83,12 +85,13 @@ def publish_traits_data(conn, traits):
         "ORDER BY Strain.Name").format(
             trait_names=", ".join(["%s"] * len(traits)),
             dataset_ids=", ".join(["%s"] * len(dataset_ids)))
-    with conn.cursor(cursorclass=DictCursor) as cursor:
-        cursor.execute(
-            query,
-            tuple(trait["trait_name"] for trait in traits) +
-            tuple(dataset_ids))
-        return organise_trait_data_by_trait(cursor.fetchall())
+    if len(dataset_ids) > 0:
+        with conn.cursor(cursorclass=DictCursor) as cursor:
+            cursor.execute(
+                query,
+                tuple(trait["trait_name"] for trait in traits) +
+                tuple(dataset_ids))
+            return organise_trait_data_by_trait(cursor.fetchall())
     return {}
 
 def cellid_traits_data(conn, traits):
@@ -161,15 +164,18 @@ def species_ids(conn, traits):
     """
     Retrieve the IDS of the related species from the given list of traits.
     """
-    groups = tuple(set(trait["db"]["group"] for trait in traits))
+    groups = tuple(set(
+        trait["db"]["group"] for trait in traits
+        if trait["db"].get("group") is not None))
     query = (
         "SELECT Name AS `group`, SpeciesId AS species_id "
         "FROM InbredSet "
         "WHERE Name IN ({groups})").format(
             groups=", ".join(["%s"] * len(groups)))
-    with conn.cursor(cursorclass=DictCursor) as cursor:
-        cursor.execute(query, groups)
-        return tuple(row for row in cursor.fetchall())
+    if len(groups) > 0:
+        with conn.cursor(cursorclass=DictCursor) as cursor:
+            cursor.execute(query, groups)
+            return tuple(row for row in cursor.fetchall())
     return tuple()
 
 def geno_traits_data(conn, traits):
@@ -194,12 +200,13 @@ def geno_traits_data(conn, traits):
             species_ids=sp_ids,
             trait_names=", ".join(["%s"] * len(traits)),
             dataset_names=", ".join(["%s"] * len(dataset_names)))
-    with conn.cursor(cursorclass=DictCursor) as cursor:
-        cursor.execute(
-            query,
-            tuple(trait["trait_name"] for trait in traits) +
-            tuple(dataset_names))
-        return organise_trait_data_by_trait(cursor.fetchall())
+    if len(sp_ids) > 0 and len(dataset_names) > 0:
+        with conn.cursor(cursorclass=DictCursor) as cursor:
+            cursor.execute(
+                query,
+                tuple(trait["trait_name"] for trait in traits) +
+                tuple(dataset_names))
+            return organise_trait_data_by_trait(cursor.fetchall())
     return {}
 
 def traits_data(
@@ -283,7 +290,9 @@ def publish_traits_info(
     this one fetches multiple items in a single query, unlike the original that
     fetches one item per query.
     """
-    trait_dataset_ids = set(trait["db"]["dataset_id"] for trait in traits)
+    trait_dataset_ids = set(
+        trait["db"]["dataset_id"] for trait in traits
+        if trait["db"].get("dataset_id") is not None)
     columns = (
         "PublishXRef.Id, Publication.PubMed_ID, "
         "Phenotype.Pre_publication_description, "
@@ -311,13 +320,14 @@ def publish_traits_info(
             columns=columns,
             trait_names=", ".join(["%s"] * len(traits)),
             trait_dataset_ids=", ".join(["%s"] * len(trait_dataset_ids)))
-    with conn.cursor(cursorclass=DictCursor) as cursor:
-        cursor.execute(
-            query,
-            (
-                tuple(trait["trait_name"] for trait in traits) +
-                tuple(trait_dataset_ids)))
-        return merge_traits_and_info(traits, cursor.fetchall())
+    if trait_dataset_ids:
+        with conn.cursor(cursorclass=DictCursor) as cursor:
+            cursor.execute(
+                query,
+                (
+                    tuple(trait["trait_name"] for trait in traits) +
+                    tuple(trait_dataset_ids)))
+            return merge_traits_and_info(traits, cursor.fetchall())
     return tuple({**trait, "haveinfo": False} for trait in traits)
 
 def probeset_traits_info(
@@ -728,33 +738,35 @@ def set_homologene_id(conn, traits):
     """
     Retrieve and set the 'homologene_id' values for ProbeSet traits.
     """
-    geneids = set(trait["geneid"] for trait in traits)
-    groups = set(trait["db"]["group"] for trait in traits)
-    query = (
-        "SELECT InbredSet.Name AS `group`, Homologene.GeneId AS geneid, "
-        "HomologeneId "
-        "FROM Homologene, Species, InbredSet "
-        "WHERE Homologene.GeneId IN ({geneids}) "
-        "AND InbredSet.Name IN ({groups}) "
-        "AND InbredSet.SpeciesId = Species.Id "
-        "AND Species.TaxonomyId = Homologene.TaxonomyId").format(
-            geneids=", ".join(["%s"] * len(geneids)),
-            groups=", ".join(["%s"] * len(groups)))
-    with conn.cursor(cursorclass=DictCursor) as cursor:
-        cursor.execute(query, (tuple(geneids) + tuple(groups)))
-        results = {
-            row["group"]: {
-                row["geneid"]: {
-                    key: val for key, val in row.items()
-                    if key not in ("group", "geneid")
-                }
-            } for row in cursor.fetchall()
-        }
-        return tuple(
-            {
-                **trait, **results.get(
-                    trait["db"]["group"], {}).get(trait["geneid"], {})
-            } for trait in traits)
+    geneids = set(trait.get("geneid") for trait in traits if trait["haveinfo"])
+    groups = set(
+        trait["db"].get("group") for trait in traits if trait["haveinfo"])
+    if len(geneids) > 1 and len(groups) > 1:
+        query = (
+            "SELECT InbredSet.Name AS `group`, Homologene.GeneId AS geneid, "
+            "HomologeneId "
+            "FROM Homologene, Species, InbredSet "
+            "WHERE Homologene.GeneId IN ({geneids}) "
+            "AND InbredSet.Name IN ({groups}) "
+            "AND InbredSet.SpeciesId = Species.Id "
+            "AND Species.TaxonomyId = Homologene.TaxonomyId").format(
+                geneids=", ".join(["%s"] * len(geneids)),
+                groups=", ".join(["%s"] * len(groups)))
+        with conn.cursor(cursorclass=DictCursor) as cursor:
+            cursor.execute(query, (tuple(geneids) + tuple(groups)))
+            results = {
+                row["group"]: {
+                    row["geneid"]: {
+                        key: val for key, val in row.items()
+                        if key not in ("group", "geneid")
+                    }
+                } for row in cursor.fetchall()
+            }
+            return tuple(
+                {
+                    **trait, **results.get(
+                        trait["db"]["group"], {}).get(trait["geneid"], {})
+                } for trait in traits)
     return traits
 
 def traits_datasets(conn, threshold, traits):
diff --git a/pytest.ini b/pytest.ini
index 58eba11..ba87787 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,6 +1,7 @@
 [pytest]
 addopts = --strict-markers
 markers =
+	slow
 	unit_test
 	integration_test
 	performance_test
\ No newline at end of file
diff --git a/tests/integration/test_partial_correlations.py b/tests/integration/test_partial_correlations.py
index 5b520e0..17ea539 100644
--- a/tests/integration/test_partial_correlations.py
+++ b/tests/integration/test_partial_correlations.py
@@ -83,8 +83,57 @@ from tests.integration.conftest import client
             "target_db": None
         }))
 def test_partial_correlation_api_with_missing_request_data(client, post_data):
-    "Test /api/correlations/partial"
+    """
+    Test /api/correlations/partial endpoint with various expected request data
+    missing.
+    """
     response = client.post("/api/correlation/partial", json=post_data)
     assert (
         response.status_code == 400 and response.is_json and
         response.json.get("status") == "error")
+
+
+@pytest.mark.integration_test
+@pytest.mark.slow
+@pytest.mark.parametrize(
+    "post_data",
+    ({# ProbeSet
+        "primary_trait": {"dataset": "a_dataset", "name": "a_name"},
+        "control_traits": [
+            {"dataset": "a_dataset", "name": "a_name"},
+            {"dataset": "a_dataset2", "name": "a_name2"}],
+        "method": "a_method",
+        "target_db": "a_db"
+    }, {# Publish
+        "primary_trait": {"dataset": "a_Publish_dataset", "name": "a_name"},
+        "control_traits": [
+            {"dataset": "a_dataset", "name": "a_name"},
+            {"dataset": "a_dataset2", "name": "a_name2"}],
+        "method": "a_method",
+        "target_db": "a_db"
+    }, {# Geno
+        "primary_trait": {"dataset": "a_Geno_dataset", "name": "a_name"},
+        "control_traits": [
+            {"dataset": "a_dataset", "name": "a_name"},
+            {"dataset": "a_dataset2", "name": "a_name2"}],
+        "method": "a_method",
+        "target_db": "a_db"
+    }, {# Temp -- Fails due to missing table. Remove this sample if it is
+        # confirmed that the deletion of the database table is on purpose, and
+        # that Temp traits are no longer a thing
+        "primary_trait": {"dataset": "a_Temp_dataset", "name": "a_name"},
+        "control_traits": [
+            {"dataset": "a_dataset", "name": "a_name"},
+            {"dataset": "a_dataset2", "name": "a_name2"}],
+        "method": "a_method",
+        "target_db": "a_db"
+    }))
+def test_partial_correlation_api_with_non_existent_traits(client, post_data):
+    """
+    Check that the system responds appropriately in the case where the user
+    makes a request with a non-existent primary trait.
+    """
+    response = client.post("/api/correlation/partial", json=post_data)
+    assert (
+        response.status_code == 404 and response.is_json and
+        response.json.get("status") != "error")
-- 
cgit v1.2.3