diff options
Diffstat (limited to 'gn3')
| -rw-r--r-- | gn3/api/lmdb_sample_data.py | 40 | ||||
| -rw-r--r-- | gn3/app.py | 2 | ||||
| -rw-r--r-- | gn3/settings.py | 6 |
3 files changed, 45 insertions, 3 deletions
diff --git a/gn3/api/lmdb_sample_data.py b/gn3/api/lmdb_sample_data.py new file mode 100644 index 0000000..eaa71c2 --- /dev/null +++ b/gn3/api/lmdb_sample_data.py @@ -0,0 +1,40 @@ +"""API endpoint for retrieving sample data from LMDB storage""" +import hashlib +from pathlib import Path + +import lmdb +from flask import Blueprint, current_app, jsonify + +lmdb_sample_data = Blueprint("lmdb_sample_data", __name__) + + +@lmdb_sample_data.route("/sample-data/<string:dataset>/<int:trait_id>", methods=["GET"]) +def get_sample_data(dataset: str, trait_id: int): + """Retrieve sample data from LMDB for a given dataset and trait. + + Path Parameters: + dataset: The name of the dataset + trait_id: The ID of the trait + + Returns: + JSON object mapping sample IDs to their values + """ + checksum = hashlib.md5( + f"{dataset}-{trait_id}".encode() + ).hexdigest() + + db_path = Path(current_app.config["LMDB_DATA_PATH"]) / checksum + if not db_path.exists(): + return jsonify(error="No data found for given dataset and trait"), 404 + try: + with lmdb.open(str(db_path), max_dbs=15, readonly=True) as env: + data = {} + with env.begin(write=False) as txn: + cursor = txn.cursor() + for key, value in cursor: + data[key.decode()] = float(value.decode()) + + return jsonify(data) + + except lmdb.Error as err: + return jsonify(error=f"LMDB error: {str(err)}"), 500 diff --git a/gn3/app.py b/gn3/app.py index 3841396..74bb5ab 100644 --- a/gn3/app.py +++ b/gn3/app.py @@ -30,6 +30,7 @@ from gn3.api.llm import gnqa from gn3.api.rqtl2 import rqtl2 from gn3.api.streaming import streaming from gn3.case_attributes import caseattr +from gn3.api.lmdb_sample_data import lmdb_sample_data class ConfigurationError(Exception): @@ -111,6 +112,7 @@ def create_app(config: Union[Dict, str, None] = None) -> Flask: app.register_blueprint(gnqa, url_prefix="/api/llm") app.register_blueprint(rqtl2, url_prefix="/api/rqtl2") app.register_blueprint(streaming, url_prefix="/api/stream") + app.register_blueprint(lmdb_sample_data, url_prefix="/api/lmdb") register_error_handlers(app) return app diff --git a/gn3/settings.py b/gn3/settings.py index 9a3f7eb..e988106 100644 --- a/gn3/settings.py +++ b/gn3/settings.py @@ -19,9 +19,9 @@ SPARQL_ENDPOINT = os.environ.get( "SPARQL_ENDPOINT", "http://localhost:9082/sparql") -# LMDB path -LMDB_PATH = os.environ.get( - "LMDB_PATH", f"{os.environ.get('HOME')}/tmp/dataset") +# LMDB paths +LMDB_DATA_PATH = os.environ.get( + "LMDB_DATA_PATH", "/export5/lmdb-data-hashes") # SQL confs SQL_URI = os.environ.get( |
