about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--README.md92
-rw-r--r--conftest.py89
-rw-r--r--pyproject.toml18
-rw-r--r--tests/__init__.py0
-rw-r--r--tests/test_gn2_smoke.py167
-rw-r--r--tests/test_gn3_smoke.py215
-rw-r--r--tests/test_gn_auth_smoke.py216
8 files changed, 798 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..301718d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+/**/__pycache__
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..69ba217
--- /dev/null
+++ b/README.md
@@ -0,0 +1,92 @@
+# gn-integration-tests
+
+Integration and end-to-end tests for the GeneNetwork stack
+([gn2](https://github.com/genenetwork/genenetwork2),
+[gn3](https://git.genenetwork.org/genenetwork3),
+[gn-auth](https://git.genenetwork.org/gn-auth)).
+
+Tests run against a live deployment (CD or production) — no mocking, no
+local stack required for the smoke suite.
+
+## Quick start
+
+```sh
+pip install -e .          # or: pip install requests pytest
+pytest -m "smoke"         # run all smoke tests (no credentials needed)
+```
+
+By default tests target **CD** (`cd.genenetwork.org` / `auth-cd.genenetwork.org`).
+Override with environment variables:
+
+| Variable | Default | Description |
+|---|---|---|
+| `GN2_BASE_URL` | `https://cd.genenetwork.org` | genenetwork2 root |
+| `GN3_BASE_URL` | `https://cd.genenetwork.org/api3` | gn3 API proxy path |
+| `GN_AUTH_BASE_URL` | `https://auth-cd.genenetwork.org` | gn-auth server |
+
+To target **production**:
+
+```sh
+GN2_BASE_URL=https://genenetwork.org \
+GN3_BASE_URL=https://genenetwork.org/api3 \
+GN_AUTH_BASE_URL=https://auth.genenetwork.org \
+pytest -m "smoke"
+```
+
+## Test marks
+
+| Mark | Description |
+|---|---|
+| `smoke` | Public endpoints, no credentials, fast |
+| `gn2` | Tests targeting genenetwork2 |
+| `gn3` | Tests targeting the gn3 REST API |
+| `gn_auth` | Tests targeting gn-auth |
+| `auth_flow` | Requires valid test-user credentials (see below) |
+
+Run a specific service:
+
+```sh
+pytest -m "gn_auth and smoke"
+pytest -m "gn3 and smoke"
+pytest -m "gn2 and smoke"
+```
+
+## Auth-flow tests (Phase 2)
+
+Set the following environment variables to enable credential-backed tests:
+
+```sh
+export GN_TEST_EMAIL=testuser@example.com
+export GN_TEST_PASSWORD=secret
+export GN_OAUTH2_CLIENT_ID=<uuid>
+export GN_OAUTH2_CLIENT_SECRET=<secret>
+pytest -m "auth_flow"
+```
+
+Without these variables the `auth_flow` tests are automatically skipped.
+
+## CI integration (planned)
+
+Each component repo's CI triggers only its relevant mark after a successful
+per-repo build:
+
+- genenetwork2 CI → `pytest -m gn2`
+- genenetwork3 CI → `pytest -m gn3`
+- gn-auth CI → `pytest -m gn_auth`
+
+Target: `cd.genenetwork.org` before merges; nightly against production.
+
+## URL structure notes
+
+nginx on the deployment host rewrites `/api3/<path>` → `/api/<path>` before
+proxying to gn3, and gn-auth lives on a separate subdomain:
+
+```
+https://cd.genenetwork.org/          → gn2
+https://cd.genenetwork.org/api3/     → gn3 (/api3/foo → gn3's /api/foo)
+https://auth-cd.genenetwork.org/     → gn-auth
+
+https://genenetwork.org/             → gn2  (production)
+https://genenetwork.org/api3/        → gn3  (production)
+https://auth.genenetwork.org/        → gn-auth (production)
+```
diff --git a/conftest.py b/conftest.py
new file mode 100644
index 0000000..5b5eb87
--- /dev/null
+++ b/conftest.py
@@ -0,0 +1,89 @@
+"""
+Shared fixtures for the GeneNetwork integration test suite.
+
+Base URLs default to the CD environment.  Override via environment variables:
+
+    GN2_BASE_URL     e.g. https://genenetwork.org          (default: CD)
+    GN3_BASE_URL     e.g. https://genenetwork.org/api3     (default: CD)
+    GN_AUTH_BASE_URL e.g. https://auth.genenetwork.org     (default: CD)
+
+For auth-flow tests also set:
+
+    GN_TEST_EMAIL    registered test-user e-mail address
+    GN_TEST_PASSWORD password for GN_TEST_EMAIL
+    GN_OAUTH2_CLIENT_ID     OAuth2 client UUID
+    GN_OAUTH2_CLIENT_SECRET OAuth2 client secret
+"""
+
+import os
+import pytest
+import requests
+
+
+_GN2_DEFAULT = "https://cd.genenetwork.org"
+_GN3_DEFAULT = "https://cd.genenetwork.org/api3"
+_GN_AUTH_DEFAULT = "https://auth-cd.genenetwork.org"
+
+
+@pytest.fixture(scope="session")
+def gn2_url() -> str:
+    return os.environ.get("GN2_BASE_URL", _GN2_DEFAULT).rstrip("/")
+
+
+@pytest.fixture(scope="session")
+def gn3_url() -> str:
+    return os.environ.get("GN3_BASE_URL", _GN3_DEFAULT).rstrip("/")
+
+
+@pytest.fixture(scope="session")
+def gn_auth_url() -> str:
+    return os.environ.get("GN_AUTH_BASE_URL", _GN_AUTH_DEFAULT).rstrip("/")
+
+
+@pytest.fixture(scope="session")
+def http() -> requests.Session:
+    """Shared requests.Session; sets a conservative timeout for all calls."""
+    with requests.Session() as session:
+        session.headers.update({"Accept": "application/json"})
+        yield session
+
+
+# ---------------------------------------------------------------------------
+# Auth-flow helpers (Phase 2 tests)
+# ---------------------------------------------------------------------------
+
+@pytest.fixture(scope="session")
+def oauth2_credentials():
+    """Returns (email, password, client_id, client_secret) or skips the test."""
+    email = os.environ.get("GN_TEST_EMAIL")
+    password = os.environ.get("GN_TEST_PASSWORD")
+    client_id = os.environ.get("GN_OAUTH2_CLIENT_ID")
+    client_secret = os.environ.get("GN_OAUTH2_CLIENT_SECRET")
+    if not all([email, password, client_id, client_secret]):
+        pytest.skip(
+            "Set GN_TEST_EMAIL, GN_TEST_PASSWORD, GN_OAUTH2_CLIENT_ID, and "
+            "GN_OAUTH2_CLIENT_SECRET to run auth-flow tests."
+        )
+    return email, password, client_id, client_secret
+
+
+@pytest.fixture(scope="session")
+def access_token(gn_auth_url, oauth2_credentials, http):
+    """Obtains a Bearer token via the password grant and caches it for the session."""
+    email, password, client_id, client_secret = oauth2_credentials
+    resp = http.post(
+        f"{gn_auth_url}/auth/token",
+        json={
+            "grant_type": "password",
+            "username": email,
+            "password": password,
+            "scope": "profile group resource",
+            "client_id": client_id,
+            "client_secret": client_secret,
+        },
+        timeout=30,
+    )
+    assert resp.status_code == 200, f"Token request failed: {resp.text}"
+    data = resp.json()
+    assert "access_token" in data
+    return data["access_token"]
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..6a8f9a4
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "gn-integration-tests"
+version = "0.1.0"
+requires-python = ">=3.9"
+dependencies = [
+    "requests",
+    "pytest",
+]
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+markers = [
+    "smoke: Quick sanity checks requiring no authentication",
+    "gn2: Tests exercising genenetwork2 directly",
+    "gn3: Tests exercising the genenetwork3 REST API",
+    "gn_auth: Tests exercising the gn-auth service",
+    "auth_flow: Tests requiring valid user credentials (set GN_TEST_EMAIL and GN_TEST_PASSWORD)",
+]
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/__init__.py
diff --git a/tests/test_gn2_smoke.py b/tests/test_gn2_smoke.py
new file mode 100644
index 0000000..7c23642
--- /dev/null
+++ b/tests/test_gn2_smoke.py
@@ -0,0 +1,167 @@
+"""
+Smoke tests for the genenetwork2 web frontend.
+
+These tests make HTTP requests to gn2 and assert on status codes and key
+page content.  They do not require authentication and do not parse full
+HTML — just enough to confirm the page rendered without error and the
+expected structural marker is present.
+
+Run with:
+
+    pytest -m "gn2 and smoke"
+"""
+
+import pytest
+
+
+pytestmark = [pytest.mark.gn2, pytest.mark.smoke]
+
+# ---------------------------------------------------------------------------
+# Known-good test data
+# ---------------------------------------------------------------------------
+_DATASET = "HC_M2_0606_P"
+_TRAIT_ID = "1435395_s_at"
+_SEARCH_SPECIES = "mouse"
+_SEARCH_GROUP = "BXD"
+_SEARCH_TYPE = "Hippocampus mRNA"
+
+
+# ---------------------------------------------------------------------------
+# Home page
+# ---------------------------------------------------------------------------
+
+class TestHomePage:
+    def test_returns_200(self, gn2_url, http):
+        resp = http.get(gn2_url + "/", timeout=30)
+        assert resp.status_code == 200
+
+    def test_content_type_is_html(self, gn2_url, http):
+        resp = http.get(gn2_url + "/", timeout=30)
+        assert "text/html" in resp.headers.get("Content-Type", "")
+
+    def test_search_form_present(self, gn2_url, http):
+        resp = http.get(gn2_url + "/", timeout=30)
+        # The search button has id="btsearch" (from existing gn2 test suite).
+        assert "btsearch" in resp.text, (
+            "Search button element (#btsearch) not found on home page"
+        )
+
+
+# ---------------------------------------------------------------------------
+# Dataset search
+# ---------------------------------------------------------------------------
+
+class TestSearch:
+    def test_search_returns_200(self, gn2_url, http):
+        params = {
+            "species": _SEARCH_SPECIES,
+            "group": _SEARCH_GROUP,
+            "type": _SEARCH_TYPE,
+            "dataset": _DATASET,
+            "search_terms_or": "",
+            "search_terms_and": "MEAN=(15 16) LRS=(23 46)",
+        }
+        resp = http.get(gn2_url + "/search", params=params, timeout=60)
+        assert resp.status_code == 200
+
+    def test_search_shows_results_found(self, gn2_url, http):
+        params = {
+            "species": _SEARCH_SPECIES,
+            "group": _SEARCH_GROUP,
+            "type": _SEARCH_TYPE,
+            "dataset": _DATASET,
+            "search_terms_or": "",
+            "search_terms_and": "MEAN=(15 16) LRS=(23 46)",
+        }
+        resp = http.get(gn2_url + "/search", params=params, timeout=60)
+        assert "records found" in resp.text, (
+            "Search results page does not contain 'records found'"
+        )
+
+
+# ---------------------------------------------------------------------------
+# Trait page
+# ---------------------------------------------------------------------------
+
+class TestTraitPage:
+    def test_known_trait_returns_200(self, gn2_url, http):
+        resp = http.get(
+            gn2_url + "/show_trait",
+            params={"trait_id": _TRAIT_ID, "dataset": _DATASET},
+            timeout=60,
+        )
+        assert resp.status_code == 200, (
+            f"Expected 200 for show_trait?trait_id={_TRAIT_ID}&dataset={_DATASET}, "
+            f"got {resp.status_code}"
+        )
+
+    def test_trait_page_has_data_form(self, gn2_url, http):
+        resp = http.get(
+            gn2_url + "/show_trait",
+            params={"trait_id": _TRAIT_ID, "dataset": _DATASET},
+            timeout=60,
+        )
+        assert 'id="trait_data_form"' in resp.text, (
+            "Trait page is missing the #trait_data_form element"
+        )
+
+    def test_trait_page_references_dataset(self, gn2_url, http):
+        resp = http.get(
+            gn2_url + "/show_trait",
+            params={"trait_id": _TRAIT_ID, "dataset": _DATASET},
+            timeout=60,
+        )
+        assert _DATASET in resp.text, (
+            f"Dataset name '{_DATASET}' not found in trait page"
+        )
+
+    def test_unknown_trait_does_not_500(self, gn2_url, http):
+        resp = http.get(
+            gn2_url + "/show_trait",
+            params={"trait_id": "nonexistent_00000", "dataset": "NONEXISTENT_XYZ"},
+            timeout=60,
+        )
+        assert resp.status_code != 500, (
+            f"Unknown trait/dataset caused 500: {resp.text[:500]}"
+        )
+
+
+# ---------------------------------------------------------------------------
+# gn2 → gn-auth authorization call-through
+#
+# When gn2 renders the trait page for a public trait, it calls gn-auth's
+# POST /auth/data/authorisation internally.  A 200 response with the data
+# form present confirms that the gn2 → gn-auth auth call succeeded.
+# ---------------------------------------------------------------------------
+
+class TestGn2ToGnAuthAuthFlow:
+    def test_public_trait_page_renders_without_auth(self, gn2_url, http):
+        """
+        The trait page for a known public trait must render fully without
+        the user being logged in.  A 200 response with the data form confirms
+        that gn2 successfully called gn-auth's data/authorisation endpoint
+        and got a public-access response.
+        """
+        resp = http.get(
+            gn2_url + "/show_trait",
+            params={"trait_id": _TRAIT_ID, "dataset": _DATASET},
+            timeout=60,
+        )
+        assert resp.status_code == 200
+        assert 'id="trait_data_form"' in resp.text
+
+
+# ---------------------------------------------------------------------------
+# gn2 OAuth2 endpoints — basic reachability
+# ---------------------------------------------------------------------------
+
+class TestOAuth2Endpoints:
+    def test_login_page_reachable(self, gn2_url, http):
+        """The OAuth2 authorise redirect must be reachable (not 500)."""
+        resp = http.get(gn2_url + "/login", timeout=30, allow_redirects=True)
+        assert resp.status_code != 500
+
+    def test_oauth2_code_endpoint_without_code_does_not_500(self, gn2_url, http):
+        """The callback endpoint without a code param should 400/redirect, not 500."""
+        resp = http.get(gn2_url + "/oauth2/code", timeout=30, allow_redirects=False)
+        assert resp.status_code != 500
diff --git a/tests/test_gn3_smoke.py b/tests/test_gn3_smoke.py
new file mode 100644
index 0000000..b3e4e9f
--- /dev/null
+++ b/tests/test_gn3_smoke.py
@@ -0,0 +1,215 @@
+"""
+Smoke tests for the genenetwork3 REST API.
+
+All tests hit public, unauthenticated endpoints.  The gn3 API is proxied
+at /api3/ on the gn2 domain; nginx rewrites /api3/<path> → /api/<path>
+before forwarding to gn3, so the URL seen by the caller is:
+
+    https://cd.genenetwork.org/api3/metadata/species
+    (→ gn3 internal: /api/metadata/species)
+
+Response format note: gn3 metadata endpoints return JSON-LD, shaped as:
+
+    {"@context": {...}, "data": [...]}
+
+The actual payload is always in the "data" key.  On the CD environment the
+RDF/SPARQL store may be empty, so "data" can be []; production will have
+real entries.  Content assertions (e.g. "contains mouse") are therefore
+skipped when the data list is empty.
+
+Run with:
+
+    pytest -m "gn3 and smoke"
+
+Known CD/production bugs (as of 2026-05-27):
+  - POST /api3/metadata/datasets/edit returns 500 on both CD and production
+    due to TypeError in privileges_fulfill_specs() — tracked separately.
+"""
+
+import pytest
+
+
+pytestmark = [pytest.mark.gn3, pytest.mark.smoke]
+
+# ---------------------------------------------------------------------------
+# Known-good test data (sourced from existing gn2/gn3 test fixtures)
+# ---------------------------------------------------------------------------
+_KNOWN_DATASET = "HC_M2_0606_P"   # Hippocampus Consortium M430v2 Jun06 PDNN
+_KNOWN_SPECIES = "mouse"
+_KNOWN_GROUP = "BXD"
+_KNOWN_PROBESET = "1435395_s_at"
+_KNOWN_INBREDSET_ID = 1           # BXD inbredset_id in the database
+
+
+def _jsonld_data(resp):
+    """Return the 'data' list from a JSON-LD gn3 response."""
+    body = resp.json()
+    assert isinstance(body, dict), f"Expected JSON object, got: {type(body)}"
+    assert "data" in body, f"Missing 'data' key in JSON-LD response: {list(body.keys())}"
+    assert isinstance(body["data"], list), (
+        f"'data' should be a list, got {type(body['data'])}"
+    )
+    return body["data"]
+
+
+# ---------------------------------------------------------------------------
+# GET /api3/metadata/species
+# ---------------------------------------------------------------------------
+
+class TestMetadataSpecies:
+    def test_returns_200(self, gn3_url, http):
+        resp = http.get(f"{gn3_url}/metadata/species", timeout=30)
+        assert resp.status_code == 200
+
+    def test_response_is_jsonld_with_data_key(self, gn3_url, http):
+        resp = http.get(f"{gn3_url}/metadata/species", timeout=30)
+        data = _jsonld_data(resp)  # asserts structure internally
+        assert isinstance(data, list)
+
+    def test_contains_mouse_when_data_populated(self, gn3_url, http):
+        resp = http.get(f"{gn3_url}/metadata/species", timeout=30)
+        items = _jsonld_data(resp)
+        if not items:
+            pytest.skip("Species data is empty in this environment (RDF store not populated)")
+        all_names = [
+            s.get("fullName", s.get("name", s.get("shortName", "")))
+            for s in items
+        ]
+        assert any("mouse" in n.lower() or "mus" in n.lower() for n in all_names), (
+            f"Mouse not found in species list: {all_names}"
+        )
+
+    def test_species_entry_schema_when_data_populated(self, gn3_url, http):
+        resp = http.get(f"{gn3_url}/metadata/species", timeout=30)
+        items = _jsonld_data(resp)
+        if not items:
+            pytest.skip("Species data is empty in this environment")
+        entry = items[0]
+        assert any(k in entry for k in ("fullName", "name", "shortName")), (
+            f"Species entry has no recognisable name field: {entry}"
+        )
+
+
+# ---------------------------------------------------------------------------
+# GET /api3/metadata/species/<name>
+# ---------------------------------------------------------------------------
+
+class TestMetadataSpeciesByName:
+    def test_known_species_returns_200(self, gn3_url, http):
+        resp = http.get(f"{gn3_url}/metadata/species/{_KNOWN_SPECIES}", timeout=30)
+        assert resp.status_code == 200, (
+            f"Expected 200 for species '{_KNOWN_SPECIES}', "
+            f"got {resp.status_code}: {resp.text}"
+        )
+
+    def test_unknown_species_does_not_500(self, gn3_url, http):
+        resp = http.get(f"{gn3_url}/metadata/species/nonexistent_xyz", timeout=30)
+        assert resp.status_code != 500, (
+            f"Unknown species caused 500: {resp.text}"
+        )
+
+
+# ---------------------------------------------------------------------------
+# GET /api3/metadata/groups
+# ---------------------------------------------------------------------------
+
+class TestMetadataGroups:
+    def test_returns_200(self, gn3_url, http):
+        resp = http.get(f"{gn3_url}/metadata/groups", timeout=30)
+        assert resp.status_code == 200
+
+    def test_response_is_jsonld_with_data_key(self, gn3_url, http):
+        resp = http.get(f"{gn3_url}/metadata/groups", timeout=30)
+        data = _jsonld_data(resp)
+        assert isinstance(data, list)
+
+    def test_contains_bxd_when_data_populated(self, gn3_url, http):
+        resp = http.get(f"{gn3_url}/metadata/groups", timeout=30)
+        items = _jsonld_data(resp)
+        if not items:
+            pytest.skip("Groups data is empty in this environment")
+        names = [g.get("name", g.get("shortName", "")) for g in items]
+        assert "BXD" in names, f"BXD not found in groups: {names[:20]}"
+
+
+# ---------------------------------------------------------------------------
+# GET /api3/metadata/datasets/<name>
+# ---------------------------------------------------------------------------
+
+class TestMetadataDatasets:
+    def test_known_dataset_returns_200(self, gn3_url, http):
+        resp = http.get(f"{gn3_url}/metadata/datasets/{_KNOWN_DATASET}", timeout=30)
+        assert resp.status_code == 200, (
+            f"Expected 200 for dataset '{_KNOWN_DATASET}', "
+            f"got {resp.status_code}: {resp.text}"
+        )
+
+    def test_known_dataset_response_is_jsonld(self, gn3_url, http):
+        resp = http.get(f"{gn3_url}/metadata/datasets/{_KNOWN_DATASET}", timeout=30)
+        body = resp.json()
+        assert isinstance(body, dict), f"Expected JSON object, got {type(body)}"
+        assert "@context" in body or "data" in body, (
+            f"Response does not look like JSON-LD: {list(body.keys())}"
+        )
+
+    def test_unknown_dataset_does_not_500(self, gn3_url, http):
+        resp = http.get(f"{gn3_url}/metadata/datasets/NONEXISTENT_XYZ", timeout=30)
+        assert resp.status_code != 500, (
+            f"Unknown dataset caused 500: {resp.text}"
+        )
+
+
+# ---------------------------------------------------------------------------
+# GET /api3/metadata/probesets/<dataset>/<name>
+# ---------------------------------------------------------------------------
+
+class TestMetadataProbesets:
+    def test_known_probeset_returns_200(self, gn3_url, http):
+        resp = http.get(
+            f"{gn3_url}/metadata/probesets/{_KNOWN_DATASET}/{_KNOWN_PROBESET}",
+            timeout=30,
+        )
+        assert resp.status_code == 200, (
+            f"Expected 200 for probeset '{_KNOWN_DATASET}/{_KNOWN_PROBESET}', "
+            f"got {resp.status_code}: {resp.text}"
+        )
+
+
+# ---------------------------------------------------------------------------
+# GET /api3/case-attribute/<inbredset_id>
+# ---------------------------------------------------------------------------
+
+class TestCaseAttributes:
+    def test_known_inbredset_does_not_500(self, gn3_url, http):
+        resp = http.get(
+            f"{gn3_url}/case-attribute/{_KNOWN_INBREDSET_ID}",
+            timeout=30,
+        )
+        assert resp.status_code != 500, (
+            f"case-attribute inbredset_id={_KNOWN_INBREDSET_ID} caused 500: {resp.text}"
+        )
+
+
+# ---------------------------------------------------------------------------
+# Protected endpoints — known bug
+# ---------------------------------------------------------------------------
+
+@pytest.mark.xfail(
+    reason=(
+        "BUG: POST /api3/metadata/datasets/edit returns 500 on both CD and "
+        "production due to TypeError: privileges_fulfill_specs() missing 1 "
+        "required positional argument: 'system_privileges' "
+        "(gn3/api/metadata.py:279). Should return 401 without a token."
+    ),
+    strict=True,
+)
+def test_metadata_edit_without_token_returns_401(gn3_url, http):
+    resp = http.post(
+        f"{gn3_url}/metadata/datasets/edit",
+        json={},
+        timeout=30,
+    )
+    assert resp.status_code == 401, (
+        f"Expected 401 on /metadata/datasets/edit without token, "
+        f"got {resp.status_code}: {resp.text}"
+    )
diff --git a/tests/test_gn_auth_smoke.py b/tests/test_gn_auth_smoke.py
new file mode 100644
index 0000000..7dd9645
--- /dev/null
+++ b/tests/test_gn_auth_smoke.py
@@ -0,0 +1,216 @@
+"""
+Smoke tests for the gn-auth service.
+
+Run with no credentials against CD (or production):
+
+    pytest -m "gn_auth and smoke"
+
+These tests verify the public-facing contract of the three authorization
+endpoints called at request time by gn2 and gn3, plus the JWKS endpoint
+used for JWT validation.
+"""
+
+import pytest
+
+
+pytestmark = [pytest.mark.gn_auth, pytest.mark.smoke]
+
+# ---------------------------------------------------------------------------
+# Known public trait used in authorisation tests.
+# HC_M2_0606_P (Hippocampus Consortium M430v2) is a public dataset with
+# extensive test coverage across gn2/gn3.
+# ---------------------------------------------------------------------------
+_PUBLIC_TRAIT = "HC_M2_0606_P::1435395_s_at"
+_PUBLIC_PHENOTYPE_TRAIT = "BXDPublish::10710"
+_NONEXISTENT_TRAIT = "NONEXISTENT_DATASET_XYZ::00000"
+
+
+# ---------------------------------------------------------------------------
+# GET /auth/public-jwks
+# ---------------------------------------------------------------------------
+
+class TestPublicJwks:
+    def test_returns_200(self, gn_auth_url, http):
+        resp = http.get(f"{gn_auth_url}/auth/public-jwks", timeout=30)
+        assert resp.status_code == 200
+
+    def test_response_has_jwks_key(self, gn_auth_url, http):
+        resp = http.get(f"{gn_auth_url}/auth/public-jwks", timeout=30)
+        data = resp.json()
+        assert "jwks" in data, f"Missing 'jwks' key in response: {data}"
+
+    def test_at_least_one_rsa_key(self, gn_auth_url, http):
+        resp = http.get(f"{gn_auth_url}/auth/public-jwks", timeout=30)
+        jwks = resp.json().get("jwks", [])
+        assert len(jwks) >= 1, "Expected at least one key in JWKS"
+        key = jwks[-1]  # newest key
+        assert key.get("kty") == "RSA"
+        # "alg" is optional per RFC 7517 — gn-auth omits it
+        assert "n" in key and "e" in key, "RSA key missing modulus or exponent"
+        assert "kid" in key, "RSA key missing 'kid'"
+
+
+# ---------------------------------------------------------------------------
+# GET /auth/system/roles  (no token → public-view only)
+# ---------------------------------------------------------------------------
+
+class TestSystemRolesUnauthenticated:
+    def test_returns_200_without_token(self, gn_auth_url, http):
+        resp = http.get(f"{gn_auth_url}/auth/system/roles", timeout=30)
+        assert resp.status_code == 200
+
+    def test_response_is_list(self, gn_auth_url, http):
+        resp = http.get(f"{gn_auth_url}/auth/system/roles", timeout=30)
+        data = resp.json()
+        assert isinstance(data, list), f"Expected list, got: {type(data)}"
+
+    def test_contains_public_view_role(self, gn_auth_url, http):
+        resp = http.get(f"{gn_auth_url}/auth/system/roles", timeout=30)
+        roles = resp.json()
+        names = [r.get("role_name") for r in roles]
+        assert "public-view" in names, (
+            f"Expected 'public-view' role when called without auth token; got: {names}"
+        )
+
+    def test_public_view_role_has_privileges(self, gn_auth_url, http):
+        resp = http.get(f"{gn_auth_url}/auth/system/roles", timeout=30)
+        public_view = next(
+            (r for r in resp.json() if r.get("role_name") == "public-view"), None
+        )
+        assert public_view is not None
+        assert isinstance(public_view.get("privileges"), list)
+        assert len(public_view["privileges"]) >= 1
+
+    def test_role_schema(self, gn_auth_url, http):
+        resp = http.get(f"{gn_auth_url}/auth/system/roles", timeout=30)
+        for role in resp.json():
+            assert "role_id" in role, f"Role missing 'role_id': {role}"
+            assert "role_name" in role, f"Role missing 'role_name': {role}"
+            assert "privileges" in role, f"Role missing 'privileges': {role}"
+
+
+# ---------------------------------------------------------------------------
+# POST /auth/data/authorisation
+# ---------------------------------------------------------------------------
+
+class TestDataAuthorisation:
+    def test_known_public_probeset_trait_returns_200(self, gn_auth_url, http):
+        resp = http.post(
+            f"{gn_auth_url}/auth/data/authorisation",
+            json={"traits": [_PUBLIC_TRAIT]},
+            timeout=30,
+        )
+        assert resp.status_code == 200, (
+            f"Expected 200 for known public trait '{_PUBLIC_TRAIT}', "
+            f"got {resp.status_code}: {resp.text}"
+        )
+
+    def test_known_public_trait_response_structure(self, gn_auth_url, http):
+        resp = http.post(
+            f"{gn_auth_url}/auth/data/authorisation",
+            json={"traits": [_PUBLIC_TRAIT]},
+            timeout=30,
+        )
+        data = resp.json()
+        assert "authorisation" in data, f"Missing 'authorisation' key: {data}"
+        assert isinstance(data["authorisation"], list)
+        assert len(data["authorisation"]) >= 1
+
+    def test_known_public_trait_has_view_privilege(self, gn_auth_url, http):
+        resp = http.post(
+            f"{gn_auth_url}/auth/data/authorisation",
+            json={"traits": [_PUBLIC_TRAIT]},
+            timeout=30,
+        )
+        resources = resp.json()["authorisation"]
+        all_privileges = [p for r in resources for p in r.get("privileges", [])]
+        assert any("view" in p for p in all_privileges), (
+            f"Expected a view privilege for public trait '{_PUBLIC_TRAIT}'; "
+            f"got privileges: {all_privileges}"
+        )
+
+    def test_resource_entry_schema(self, gn_auth_url, http):
+        # Actual response schema (verified against CD 2026-05-27):
+        #   {"resource_id": str, "resource_data": [...], "privileges": [...]}
+        # Note: "public" and "resource_name" fields are NOT present in the
+        # deployed response despite appearing in source comments.
+        resp = http.post(
+            f"{gn_auth_url}/auth/data/authorisation",
+            json={"traits": [_PUBLIC_TRAIT]},
+            timeout=30,
+        )
+        for resource in resp.json()["authorisation"]:
+            assert "resource_id" in resource, f"Missing resource_id: {resource}"
+            assert "resource_data" in resource, f"Missing resource_data: {resource}"
+            assert "privileges" in resource, f"Missing privileges: {resource}"
+            assert isinstance(resource["resource_data"], list)
+            assert isinstance(resource["privileges"], list)
+
+    def test_known_public_phenotype_trait(self, gn_auth_url, http):
+        resp = http.post(
+            f"{gn_auth_url}/auth/data/authorisation",
+            json={"traits": [_PUBLIC_PHENOTYPE_TRAIT]},
+            timeout=30,
+        )
+        assert resp.status_code == 200, (
+            f"Expected 200 for '{_PUBLIC_PHENOTYPE_TRAIT}', "
+            f"got {resp.status_code}: {resp.text}"
+        )
+
+    def test_nonexistent_trait_returns_404(self, gn_auth_url, http):
+        resp = http.post(
+            f"{gn_auth_url}/auth/data/authorisation",
+            json={"traits": [_NONEXISTENT_TRAIT]},
+            timeout=30,
+        )
+        assert resp.status_code == 404, (
+            f"Expected 404 for nonexistent trait '{_NONEXISTENT_TRAIT}', "
+            f"got {resp.status_code}: {resp.text}"
+        )
+
+    def test_empty_traits_list_is_handled(self, gn_auth_url, http):
+        resp = http.post(
+            f"{gn_auth_url}/auth/data/authorisation",
+            json={"traits": []},
+            timeout=30,
+        )
+        # Must not be a 500; 200 with empty result or 400/404 are all acceptable.
+        assert resp.status_code != 500, (
+            f"Empty traits list caused 500: {resp.text}"
+        )
+
+    def test_multiple_traits_in_one_request(self, gn_auth_url, http):
+        resp = http.post(
+            f"{gn_auth_url}/auth/data/authorisation",
+            json={"traits": [_PUBLIC_TRAIT, _PUBLIC_PHENOTYPE_TRAIT]},
+            timeout=30,
+        )
+        assert resp.status_code == 200
+        data = resp.json()
+        assert "authorisation" in data
+
+
+# ---------------------------------------------------------------------------
+# GET /auth/system/roles  (with valid token — Phase 2)
+# ---------------------------------------------------------------------------
+
+@pytest.mark.auth_flow
+class TestSystemRolesAuthenticated:
+    def test_returns_200_with_token(self, gn_auth_url, http, access_token):
+        resp = http.get(
+            f"{gn_auth_url}/auth/system/roles",
+            headers={"Authorization": f"Bearer {access_token}"},
+            timeout=30,
+        )
+        assert resp.status_code == 200
+
+    def test_authenticated_user_has_more_than_public_view(self, gn_auth_url, http, access_token):
+        resp = http.get(
+            f"{gn_auth_url}/auth/system/roles",
+            headers={"Authorization": f"Bearer {access_token}"},
+            timeout=30,
+        )
+        roles = resp.json()
+        names = [r.get("role_name") for r in roles]
+        # A logged-in user should see public-view plus at least their own roles.
+        assert "public-view" in names