diff options
| author | Munyoki Kilyungi (aider) | 2025-02-05 12:22:10 +0300 |
|---|---|---|
| committer | BonfaceKilz | 2025-02-10 12:57:22 +0300 |
| commit | 007a54501693fe28c25513ce8391da919638384d (patch) | |
| tree | b8c60e2dc9463812caf976be0874c21a050bf9e6 | |
| parent | d799cecfba78e29ad6984cb5bcd4eb7c70edd3c6 (diff) | |
| download | genenetwork3-007a54501693fe28c25513ce8391da919638384d.tar.gz | |
feat: Add LMDB sample data retrieval API endpoint with tests
Signed-off-by: Munyoki Kilyungi <me@bonfacemunyoki.com>
| -rw-r--r-- | gn3/api/lmdb_sample_data.py | 40 | ||||
| -rw-r--r-- | gn3/app.py | 2 | ||||
| -rw-r--r-- | gn3/settings.py | 6 | ||||
| -rw-r--r-- | tests/integration/conftest.py | 15 | ||||
| -rw-r--r-- | tests/integration/test_lmdb_sample_data.py | 31 | ||||
| -rwxr-xr-x | tests/test_data/lmdb-test-data/7308efbd84b33ad3d69d14b5b1f19ccc/data.mdb | bin | 0 -> 32768 bytes | |||
| -rwxr-xr-x | tests/test_data/lmdb-test-data/7308efbd84b33ad3d69d14b5b1f19ccc/lock.mdb | bin | 0 -> 8192 bytes | |||
| -rw-r--r-- | tests/unit/conftest.py | 12 |
8 files changed, 97 insertions, 9 deletions
diff --git a/gn3/api/lmdb_sample_data.py b/gn3/api/lmdb_sample_data.py new file mode 100644 index 0000000..eaa71c2 --- /dev/null +++ b/gn3/api/lmdb_sample_data.py @@ -0,0 +1,40 @@ +"""API endpoint for retrieving sample data from LMDB storage""" +import hashlib +from pathlib import Path + +import lmdb +from flask import Blueprint, current_app, jsonify + +lmdb_sample_data = Blueprint("lmdb_sample_data", __name__) + + +@lmdb_sample_data.route("/sample-data/<string:dataset>/<int:trait_id>", methods=["GET"]) +def get_sample_data(dataset: str, trait_id: int): + """Retrieve sample data from LMDB for a given dataset and trait. + + Path Parameters: + dataset: The name of the dataset + trait_id: The ID of the trait + + Returns: + JSON object mapping sample IDs to their values + """ + checksum = hashlib.md5( + f"{dataset}-{trait_id}".encode() + ).hexdigest() + + db_path = Path(current_app.config["LMDB_DATA_PATH"]) / checksum + if not db_path.exists(): + return jsonify(error="No data found for given dataset and trait"), 404 + try: + with lmdb.open(str(db_path), max_dbs=15, readonly=True) as env: + data = {} + with env.begin(write=False) as txn: + cursor = txn.cursor() + for key, value in cursor: + data[key.decode()] = float(value.decode()) + + return jsonify(data) + + except lmdb.Error as err: + return jsonify(error=f"LMDB error: {str(err)}"), 500 diff --git a/gn3/app.py b/gn3/app.py index 3841396..74bb5ab 100644 --- a/gn3/app.py +++ b/gn3/app.py @@ -30,6 +30,7 @@ from gn3.api.llm import gnqa from gn3.api.rqtl2 import rqtl2 from gn3.api.streaming import streaming from gn3.case_attributes import caseattr +from gn3.api.lmdb_sample_data import lmdb_sample_data class ConfigurationError(Exception): @@ -111,6 +112,7 @@ def create_app(config: Union[Dict, str, None] = None) -> Flask: app.register_blueprint(gnqa, url_prefix="/api/llm") app.register_blueprint(rqtl2, url_prefix="/api/rqtl2") app.register_blueprint(streaming, url_prefix="/api/stream") + app.register_blueprint(lmdb_sample_data, url_prefix="/api/lmdb") register_error_handlers(app) return app diff --git a/gn3/settings.py b/gn3/settings.py index 9a3f7eb..e988106 100644 --- a/gn3/settings.py +++ b/gn3/settings.py @@ -19,9 +19,9 @@ SPARQL_ENDPOINT = os.environ.get( "SPARQL_ENDPOINT", "http://localhost:9082/sparql") -# LMDB path -LMDB_PATH = os.environ.get( - "LMDB_PATH", f"{os.environ.get('HOME')}/tmp/dataset") +# LMDB paths +LMDB_DATA_PATH = os.environ.get( + "LMDB_DATA_PATH", "/export5/lmdb-data-hashes") # SQL confs SQL_URI = os.environ.get( diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 8e39726..bdbab09 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -1,4 +1,5 @@ """Module that holds fixtures for integration tests""" +from pathlib import Path import pytest import MySQLdb @@ -6,19 +7,25 @@ from gn3.app import create_app from gn3.chancy import random_string from gn3.db_utils import parse_db_url, database_connection + @pytest.fixture(scope="session") def client(): """Create a test client fixture for tests""" # Do some setup - app = create_app() - app.config.update({"TESTING": True}) - app.testing = True + app = create_app({ + "TESTING": True, + "LMDB_DATA_PATH": str( + Path(__file__).parent.parent / + Path("test_data/lmdb-test-data") + ), + "AUTH_SERVER_URL": "http://127.0.0.1:8081", + }) yield app.test_client() # Do some teardown/cleanup @pytest.fixture(scope="session") -def db_conn(client): # pylint: disable=[redefined-outer-name] +def db_conn(client): # pylint: disable=[redefined-outer-name] """Create a db connection fixture for tests""" # 01) Generate random string to append to all test db artifacts for the session live_db_uri = client.application.config["SQL_URI"] diff --git a/tests/integration/test_lmdb_sample_data.py b/tests/integration/test_lmdb_sample_data.py new file mode 100644 index 0000000..30a23f4 --- /dev/null +++ b/tests/integration/test_lmdb_sample_data.py @@ -0,0 +1,31 @@ +"""Tests for the LMDB sample data API endpoint""" +import pytest + + +@pytest.mark.unit_test +def test_nonexistent_data(client): + """Test endpoint returns 404 when data doesn't exist""" + response = client.get("/api/lmdb/sample-data/nonexistent/123") + assert response.status_code == 404 + assert response.json["error"] == "No data found for given dataset and trait" + + +@pytest.mark.unit_test +def test_successful_retrieval(client): + """Test successful data retrieval using test LMDB data""" + # Use known test data hash: 7308efbd84b33ad3d69d14b5b1f19ccc + response = client.get("/api/lmdb/sample-data/BXDPublish/10007") + assert response.status_code == 200 + + data = response.json + assert len(data) == 31 + # Verify some known values from the test database + assert data["BXD1"] == 18.700001 + assert data["BXD11"] == 18.9 + + +@pytest.mark.unit_test +def test_invalid_trait_id(client): + """Test endpoint handles invalid trait IDs appropriately""" + response = client.get("/api/lmdb/sample-data/BXDPublish/999999") + assert response.status_code == 404 diff --git a/tests/test_data/lmdb-test-data/7308efbd84b33ad3d69d14b5b1f19ccc/data.mdb b/tests/test_data/lmdb-test-data/7308efbd84b33ad3d69d14b5b1f19ccc/data.mdb new file mode 100755 index 0000000..5fa213b --- /dev/null +++ b/tests/test_data/lmdb-test-data/7308efbd84b33ad3d69d14b5b1f19ccc/data.mdb Binary files differdiff --git a/tests/test_data/lmdb-test-data/7308efbd84b33ad3d69d14b5b1f19ccc/lock.mdb b/tests/test_data/lmdb-test-data/7308efbd84b33ad3d69d14b5b1f19ccc/lock.mdb new file mode 100755 index 0000000..116d824 --- /dev/null +++ b/tests/test_data/lmdb-test-data/7308efbd84b33ad3d69d14b5b1f19ccc/lock.mdb Binary files differdiff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index d9d5492..5526d16 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -7,6 +7,7 @@ import pytest from gn3.app import create_app + @pytest.fixture(scope="session") def fxtr_app(): """Fixture: setup the test app""" @@ -16,6 +17,11 @@ def fxtr_app(): f'testdb_{datetime.now().strftime("%Y%m%dT%H%M%S")}') app = create_app({ "TESTING": True, + "LMDB_DATA_PATH": str( + Path(__file__).parent.parent / + Path("test_data/lmdb-test-data") + ), + "AUTH_SERVER_URL": "http://127.0.0.1:8081", "OAUTH2_ACCESS_TOKEN_GENERATOR": "tests.unit.auth.test_token.gen_token" }) app.testing = True @@ -23,13 +29,15 @@ def fxtr_app(): # Clean up after ourselves testdb.unlink(missing_ok=True) + @pytest.fixture(scope="session") -def client(fxtr_app): # pylint: disable=redefined-outer-name +def client(fxtr_app): # pylint: disable=redefined-outer-name """Create a test client fixture for tests""" with fxtr_app.app_context(): yield fxtr_app.test_client() + @pytest.fixture(scope="session") -def fxtr_app_config(client): # pylint: disable=redefined-outer-name +def fxtr_app_config(client): # pylint: disable=redefined-outer-name """Return the test application's configuration object""" return client.application.config |
