diff options
Diffstat (limited to 'gn_auth/auth/authorisation/data')
| -rw-r--r-- | gn_auth/auth/authorisation/data/genotypes.py | 41 | ||||
| -rw-r--r-- | gn_auth/auth/authorisation/data/mrna.py | 40 | ||||
| -rw-r--r-- | gn_auth/auth/authorisation/data/phenotypes.py | 166 | ||||
| -rw-r--r-- | gn_auth/auth/authorisation/data/views.py | 210 |
4 files changed, 362 insertions, 95 deletions
diff --git a/gn_auth/auth/authorisation/data/genotypes.py b/gn_auth/auth/authorisation/data/genotypes.py index ddb0add..d44cbfb 100644 --- a/gn_auth/auth/authorisation/data/genotypes.py +++ b/gn_auth/auth/authorisation/data/genotypes.py @@ -1,7 +1,9 @@ """Handle linking of Genotype data to the Auth(entic|oris)ation system.""" import uuid -from dataclasses import asdict +import logging from typing import Iterable +from functools import reduce +from dataclasses import asdict from gn_libs import mysqldb as gn3db from MySQLdb.cursors import DictCursor @@ -11,6 +13,9 @@ from gn_auth.auth.db import sqlite3 as authdb from gn_auth.auth.authorisation.checks import authorised_p from gn_auth.auth.authorisation.resources.groups.models import Group + +logger = logging.getLogger(__name__) + def linked_genotype_data(conn: authdb.DbConnection) -> Iterable[dict]: """Retrieve genotype data that is linked to user groups.""" with authdb.cursor(conn) as cursor: @@ -95,3 +100,37 @@ def link_genotype_data( "group": asdict(group), "datasets": datasets } + + +def resources_by_datasets_and_traits( + authconn: authdb.DbConnection, + dsets_traits: tuple[tuple[str, str], ...] +) -> tuple[dict, ...]: + """Fetch resources by their attached datasets and traits.""" + traits_by_datasets: dict[str, tuple[str, ...]] = reduce( + lambda acc, curr: { + **acc, + curr[0]: acc.get(curr[0], tuple()) + (curr[1],) + }, + dsets_traits, + {}) + paramstr = ", ".join(["?"] * len(dsets_traits)) + query = ( + "SELECT r.*, rc.*, lgd.dataset_name FROM linked_genotype_data AS lgd " + "INNER JOIN genotype_resources AS mr ON lgd.data_link_id=mr.data_link_id " + "INNER JOIN resources AS r ON mr.resource_id=r.resource_id " + "INNER JOIN resource_categories AS rc " + "ON r.resource_category_id=rc.resource_category_id " + "WHERE lgd.dataset_name " + f"IN ({paramstr})") + logger.debug("QUERY: %s", query) + with authdb.cursor(authconn) as cursor: + params = tuple(traits_by_datasets.keys()) + logger.debug("QUERY PARAMS: %s", params) + cursor.execute(query, tuple(traits_by_datasets.keys())) + return tuple({ + "resource_id": row["resource_id"], + "resource_data": tuple( + f'{row["dataset_name"]}::{trait_id}' + for trait_id in traits_by_datasets[row["dataset_name"]]) + } for row in cursor.fetchall()) diff --git a/gn_auth/auth/authorisation/data/mrna.py b/gn_auth/auth/authorisation/data/mrna.py index 0cc644e..fcf6ea3 100644 --- a/gn_auth/auth/authorisation/data/mrna.py +++ b/gn_auth/auth/authorisation/data/mrna.py @@ -1,7 +1,9 @@ """Handle linking of mRNA Assay data to the Auth(entic|oris)ation system.""" import uuid -from dataclasses import asdict +import logging from typing import Iterable +from functools import reduce +from dataclasses import asdict from gn_libs import mysqldb as gn3db from MySQLdb.cursors import DictCursor @@ -11,6 +13,10 @@ from gn_auth.auth.db import sqlite3 as authdb from gn_auth.auth.authorisation.checks import authorised_p from gn_auth.auth.authorisation.resources.groups.models import Group + +logger = logging.getLogger(__name__) + + def linked_mrna_data(conn: authdb.DbConnection) -> Iterable[dict]: """Retrieve mRNA Assay data that is linked to user groups.""" with authdb.cursor(conn) as cursor: @@ -100,3 +106,35 @@ def link_mrna_data( "group": asdict(group), "datasets": datasets } + + +def resources_by_datasets_and_traits( + authconn: authdb.DbConnection, + dsets_traits: tuple[tuple[str, str], ...] +) -> tuple[dict, ...]: + """Fetch resources by their attached datasets and traits.""" + traits_by_datasets: dict[str, tuple[str, ...]] = reduce( + lambda acc, curr: { + **acc, + curr[0]: acc.get(curr[0], tuple()) + (curr[1],) + }, + dsets_traits, + {}) + paramstr = ", ".join(["?"] * len(dsets_traits)) + query = ( + "SELECT r.*, rc.*, lmd.dataset_name FROM linked_mrna_data AS lmd " + "INNER JOIN mrna_resources AS mr ON lmd.data_link_id=mr.data_link_id " + "INNER JOIN resources AS r ON mr.resource_id=r.resource_id " + "INNER JOIN resource_categories AS rc " + "ON r.resource_category_id=rc.resource_category_id " + "WHERE lmd.dataset_name " + f"IN ({paramstr})") + logger.debug("QUERY: %s", query) + with authdb.cursor(authconn) as cursor: + cursor.execute(query, tuple(traits_by_datasets.keys())) + return tuple({ + "resource_id": row["resource_id"], + "resource_data": tuple( + f'{row["dataset_name"]}::{trait_id}' + for trait_id in traits_by_datasets[row["dataset_name"]]) + } for row in cursor.fetchall()) diff --git a/gn_auth/auth/authorisation/data/phenotypes.py b/gn_auth/auth/authorisation/data/phenotypes.py index 3e45af3..dddd5c9 100644 --- a/gn_auth/auth/authorisation/data/phenotypes.py +++ b/gn_auth/auth/authorisation/data/phenotypes.py @@ -1,20 +1,31 @@ """Handle linking of Phenotype data to the Auth(entic|oris)ation system.""" import uuid +import logging +from functools import reduce from dataclasses import asdict from typing import Any, Iterable from gn_libs import mysqldb as gn3db +from gn_libs import sqlite3 as authdb from MySQLdb.cursors import DictCursor +from flask import request, jsonify, Response, Blueprint, current_app as app -from gn_auth.auth.db import sqlite3 as authdb +from gn_auth.auth.authentication.oauth2.resource_server import require_oauth from gn_auth.auth.errors import AuthorisationError from gn_auth.auth.authorisation.checks import authorised_p +from gn_auth.auth.authorisation.resources.checks import can_delete from gn_auth.auth.authorisation.resources.system.models import system_resource from gn_auth.auth.authorisation.resources.groups.models import Group, group_resource + +from gn_auth.auth.authorisation.checks import require_json from gn_auth.auth.authorisation.resources.checks import authorised_for2 +logger = logging.getLogger(__name__) +phenosbp = Blueprint("phenotypes", __name__) + + def linked_phenotype_data( authconn: authdb.DbConnection, gn3conn: gn3db.Connection, species: str = "") -> Iterable[dict[str, Any]]: @@ -155,3 +166,156 @@ def link_phenotype_data( "group": asdict(group), "traits": params } + + +def unlink_from_resources( + cursor: authdb.DbCursor, + data_link_ids: tuple[uuid.UUID, ...] +) -> tuple[uuid.UUID, ...]: + """Unlink phenotypes from resources.""" + # TODO: Delete in batches + cursor.executemany("DELETE FROM phenotype_resources " + "WHERE data_link_id=? RETURNING resource_id", + tuple((str(_id),) for _id in data_link_ids)) + return tuple(uuid.UUID(row["resource_id"]) for row in cursor.fetchall()) + + +def delete_resources( + cursor: authdb.DbCursor, + resource_ids: tuple[uuid.UUID, ...] +) -> tuple[uuid.UUID, ...]: + """Delete the specified phenotype resources.""" + # TODO: Delete in batches + cursor.executemany("DELETE FROM resources " + "WHERE resource_id=? RETURNING resource_id", + tuple((str(_id),) for _id in resource_ids)) + return tuple(uuid.UUID(row["resource_id"]) for row in cursor.fetchall()) + + +def fetch_data_link_ids( + cursor: authdb.DbCursor, + species_id: int, + population_id: int, + dataset_id: int, + xref_ids: tuple[int, ...] +) -> tuple[uuid.UUID, ...]: + """Fetch `data_link_id` values for phenotypes.""" + paramstr = ", ".join(["(?, ?, ?, ?)"] * len(xref_ids)) + cursor.execute( + "SELECT data_link_id FROM linked_phenotype_data " + "WHERE (SpeciesId, InbredSetId, PublishFreezeId, PublishXRefId) IN " + f"({paramstr})", + tuple(str(field) for arow in + ((species_id, population_id, dataset_id, xref_id) + for xref_id in xref_ids) + for field in arow)) + return tuple(uuid.UUID(row["data_link_id"]) for row in cursor.fetchall()) + + +def fetch_resource_id(cursor: authdb.DbCursor, + data_link_ids: tuple[uuid.UUID, ...]) -> uuid.UUID: + """Retrieve the ID of the resource where the data is linked to. + + RAISES: InvalidResourceError in the case where more the data_link_ids belong + to more than one resource.""" + _paramstr = ", ".join(["?"] * len(data_link_ids)) + cursor.execute( + "SELECT DISTINCT(resource_id) FROM phenotype_resources " + f"WHERE data_link_id IN ({_paramstr})", + tuple(str(_id) for _id in data_link_ids)) + _ids = tuple(uuid.UUID(row['resource_id']) for row in cursor.fetchall()) + if len(_ids) != 1: + raise AuthorisationError( + f"Expected data from 1 resource, got {len(_ids)} resources.") + return _ids[0] + + +def delete_linked_data( + cursor: authdb.DbCursor, + data_link_ids: tuple[uuid.UUID, ...] +) -> int: + """Delete the actual linked data.""" + # TODO: Delete in batches + cursor.executemany("DELETE FROM linked_phenotype_data " + "WHERE data_link_id=?", + tuple((str(_id),) for _id in data_link_ids)) + return cursor.rowcount + + +@phenosbp.route("/<int:species_id>/<int:population_id>/<int:dataset_id>/delete", + methods=["POST"]) +@require_json +def delete_linked_phenotypes_data( + species_id: int, + population_id: int, + dataset_id: int +) -> Response: + """Delete the linked phenotypes data from the database.""" + db_uri = app.config["AUTH_DB"] + with (require_oauth.acquire("profile group resource") as _token, + authdb.connection(db_uri) as auth_conn, + authdb.cursor(auth_conn) as cursor): + _deleted = 0 + xref_ids = tuple(request.json.get("xref_ids", []))#type: ignore[union-attr] + if len(xref_ids) > 0: + # TODO: Use background job, for huge number of xref_ids + data_link_ids = fetch_data_link_ids( + cursor, species_id, population_id, dataset_id, xref_ids) + resource_id = fetch_resource_id(cursor, data_link_ids) + # - Does user have DELETE privilege on the data + if not can_delete(auth_conn, _token.user.user_id, resource_id): + # - No: Raise `AuthorisationError` and bail! + raise AuthorisationError( + "You are not allowed to delete this resource's data.") + # - YES: go ahead and delete data as below. + _resources_ids = unlink_from_resources(cursor, data_link_ids) + delete_resources(cursor, _resources_ids) + _deleted = delete_linked_data(cursor, data_link_ids) + + return jsonify({ + # TODO: "status": "sent-to-background"/"completed"/"failed" + # TODO: "status-url": <status-check-uri> + "requested": len(xref_ids), + "deleted": _deleted + }) + + +def __organise_resources_data__(acc, curr) -> dict: + logger.debug("ORGANISING... %s", dict(curr)) + resource_row = acc.get(curr["resource_id"], { + "resource_id": curr["resource_id"], + "resource_data": tuple(), + }) + return { + **acc, + curr["resource_id"]: { + **resource_row, + "resource_data": resource_row["resource_data"] + ( + f'{curr["dataset_name"]}::{curr["trait_id"]}',) + } + } + + +def resources_by_datasets_and_traits( + authconn: authdb.DbConnection, + dsets_traits: tuple[tuple[str, str], ...] +) -> tuple[dict, ...]: + """Fetch resources by their attached datasets and traits.""" + paramstr = ", ".join(["(?, ?)"] * len(dsets_traits)) + query = ( + "SELECT r.*, rc.*, lpd.dataset_name, lpd.PublishXRefId AS trait_id " + "FROM linked_phenotype_data AS lpd " + "INNER JOIN phenotype_resources AS pr " + "ON lpd.data_link_id=pr.data_link_id " + "INNER JOIN resources AS r ON pr.resource_id=r.resource_id " + "INNER JOIN resource_categories AS rc " + "ON r.resource_category_id=rc.resource_category_id " + "WHERE (lpd.dataset_name, lpd.PublishXRefId) " + f"IN ({paramstr})") + with authdb.cursor(authconn) as cursor: + cursor.execute( + query, tuple(item for row in dsets_traits for item in row)) + return tuple(reduce( + __organise_resources_data__, + cursor.fetchall(), + {}).values()) diff --git a/gn_auth/auth/authorisation/data/views.py b/gn_auth/auth/authorisation/data/views.py index 9123949..ef3d119 100644 --- a/gn_auth/auth/authorisation/data/views.py +++ b/gn_auth/auth/authorisation/data/views.py @@ -2,9 +2,9 @@ import sys import uuid import json -from dataclasses import asdict +import logging from typing import Any -from functools import partial +from functools import reduce, partial import redis from MySQLdb.cursors import DictCursor @@ -13,6 +13,7 @@ from flask import request, jsonify, Response, Blueprint, current_app as app from gn_libs import mysqldb as gn3db +from gn_libs import sqlite3 as db from gn_auth import jobs from gn_auth.commands import run_async_cmd @@ -21,25 +22,32 @@ from gn_auth.auth.requests import request_json from gn_auth.auth.errors import InvalidData, NotFoundError from gn_auth.auth.authorisation.resources.groups.models import group_by_id -from ...db import sqlite3 as db -from ...db.sqlite3 import with_db_connection +from gn_auth.auth.db.sqlite3 import with_db_connection # Replace this with gn_libs alternative from ..checks import require_json -from ..users.models import user_resource_roles - -from ..resources.checks import authorised_for -from ..resources.models import ( - user_resources, public_resources, attach_resources_data) - from ...authentication.users import User from ...authentication.oauth2.resource_server import require_oauth -from ..data.mrna import link_mrna_data, ungrouped_mrna_data -from ..data.phenotypes import link_phenotype_data, pheno_traits_from_db -from ..data.genotypes import link_genotype_data, ungrouped_genotype_data - +from .mrna import ( + link_mrna_data, + ungrouped_mrna_data, + resources_by_datasets_and_traits as mrna_resources_by_datasets_and_traits) +from .genotypes import ( + link_genotype_data, + ungrouped_genotype_data, + resources_by_datasets_and_traits as geno_resources_by_datasets_and_traits) +from .phenotypes import ( + phenosbp, + link_phenotype_data, + pheno_traits_from_db, + resources_by_datasets_and_traits as pheno_resources_by_datasets_and_traits) + + +logger = logging.getLogger(__name__) data = Blueprint("data", __name__) +data.register_blueprint(phenosbp, url_prefix="/phenotypes") + def build_trait_name(trait_fullname): """ @@ -82,98 +90,116 @@ def list_species() -> Response: def authorisation() -> Response: """Retrieve the authorisation level for datasets/traits for the user.""" # Access endpoint with something like: - # curl -X POST http://127.0.0.1:8080/api/oauth2/data/authorisation \ + # curl -X POST http://127.0.0.1:8081/auth/data/authorisation \ # -H "Content-Type: application/json" \ # -d '{"traits": ["HC_M2_0606_P::1442370_at", "BXDGeno::01.001.695", # "BXDPublish::10001"]}' + def __organise_traits__(acc, curr): + dset, _trt = curr + key = "ProbeSet" + if dset.endswith("Publish"): + key = "Publish" + elif dset.endswith("Geno"): + key="Geno" + elif dset.endswith("Temp"): + key = "Temp" + else: + key = "ProbeSet" + + return { + **acc, + key: acc.get(key, tuple()) + (curr,) + } + _dset_traits: dict[str, tuple[tuple[str, str], ...]] = reduce( + __organise_traits__, + ( + (dset.strip(), trt.strip()) for dset, trt in + (trtstr.split("::") for trtstr in + request_json().get("traits", []))), + {key: tuple() for key in ("Publish", "ProbeSet", "Geno", "Temp")}) + db_uri = app.config["AUTH_DB"] - privileges = {} user = User(uuid.uuid4(), "anon@ymous.user", "Anonymous User") - with db.connection(db_uri) as auth_conn: + with (db.connection(db_uri) as authconn, db.cursor(authconn) as cursor): + _all_resources = { + _rrow["resource_id"]: _rrow + for _rtypes in ( + pheno_resources_by_datasets_and_traits( + authconn, _dset_traits["Publish"]), + geno_resources_by_datasets_and_traits( + authconn, _dset_traits["Geno"]), + mrna_resources_by_datasets_and_traits( + authconn, _dset_traits["ProbeSet"])) + for _rrow in _rtypes + } + if len(_all_resources.keys()) == 0: + raise NotFoundError( + "No resource(s) found for specified trait(s). Do(es) the " + "trait(s) actually exist?") + _resource_ids = tuple(_all_resources.keys()) + + + def __explode_resource_data__(trait_fullname): + _dset, _trt = trait_fullname.split("::") + return { + "dataset_name": _dset, + "dataset_type": ( + "Phenotype" if _dset.endswith("Publish") + else ("Genotype" if _dset.endswith("Geno") + else ("Temporary" if _dset.endswith("Temp") + else "mRNA"))), + "trait_name": _trt, + "trait_fullname": trait_fullname + } + + _paramstr = ", ".join(["?"] * len(_resource_ids)) try: with require_oauth.acquire("profile group resource") as _token: user = _token.user - resources = attach_resources_data( - auth_conn, user_resources(auth_conn, _token.user)) - resources_roles = user_resource_roles(auth_conn, _token.user) - privileges = { - resource_id: tuple( - privilege.privilege_id - for roles in resources_roles[resource_id] - for privilege in roles.privileges)#("group:resource:view-resource",) - for resource_id, is_authorised - in authorised_for( - auth_conn, _token.user, - ("group:resource:view-resource",), tuple( - resource.resource_id for resource in resources)).items() - if is_authorised - } + cursor.execute( + "SELECT ur.resource_id, r.role_id, rp.privilege_id " + "FROM user_roles AS ur " + "INNER JOIN roles AS r ON ur.role_id=r.role_id " + "INNER JOIN role_privileges AS rp ON r.role_id=rp.role_id " + "WHERE ur.user_id = ? " + f"AND ur.resource_id IN ({_paramstr})", + (str(user.user_id),) + _resource_ids + ) + _privileges_by_resource: dict[str, tuple[str, ...]] = reduce( + lambda acc, curr: { + **acc, + curr["resource_id"]: ( + acc.get(curr["resource_id"], tuple()) + + (curr["privilege_id"],)) + }, + cursor.fetchall(), + {}) except _HTTPException as exc: err_msg = json.loads(exc.body) if err_msg["error"] == "missing_authorization": - resources = attach_resources_data( - auth_conn, public_resources(auth_conn)) + cursor.execute( + "SELECT rsc.resource_id " + "FROM resources AS rsc " + "WHERE rsc.public = '1' " + f"AND rsc.resource_id IN ({_paramstr}) ", + _resource_ids) + _privileges_by_resource = { + row["resource_id"]: ('group:resource:view-resource',) + for row in cursor.fetchall() + } else: raise exc from None - def __gen_key__(resource, data_item): - if resource.resource_category.resource_category_key.lower() == "phenotype": - return ( - f"{resource.resource_category.resource_category_key.lower()}::" - f"{data_item['dataset_name']}::{data_item['PublishXRefId']}") - return ( - f"{resource.resource_category.resource_category_key.lower()}::" - f"{data_item['dataset_name']}") - - data_to_resource_map = { - __gen_key__(resource, data_item): resource.resource_id - for resource in resources - for data_item in resource.resource_data - } - privileges = { - **{ - resource.resource_id: ("system:resource:public-read",) - for resource in resources if resource.public - }, - **privileges} - - args = request.get_json() - traits_names = args["traits"] # type: ignore[index] - def __translate__(val): - return { - "Temp": "Temp", - "ProbeSet": "mRNA", - "Geno": "Genotype", - "Publish": "Phenotype" - }[val] - - def __trait_key__(trait): - dataset_type = __translate__(trait['db']['dataset_type']).lower() - dataset_name = trait["db"]["dataset_name"] - if dataset_type == "phenotype": - return f"{dataset_type}::{dataset_name}::{trait['trait_name']}" - return f"{dataset_type}::{dataset_name}" - - return jsonify(tuple( - { - "user": asdict(user), - **{key:trait[key] for key in ("trait_fullname", "trait_name")}, - "dataset_name": trait["db"]["dataset_name"], - "dataset_type": __translate__(trait["db"]["dataset_type"]), - "resource_id": data_to_resource_map.get(__trait_key__(trait)), - "privileges": privileges.get( - data_to_resource_map.get( - __trait_key__(trait), - uuid.UUID("4afa415e-94cb-4189-b2c6-f9ce2b6a878d")), - tuple()) + ( - # Temporary traits do not exist in db: Set them - # as public-read - ("system:resource:public-read",) - if trait["db"]["dataset_type"] == "Temp" - else tuple()) - } for trait in - (build_trait_name(trait_fullname) - for trait_fullname in traits_names))) + return jsonify({ + "authorisation": [{ + **resource, + "resource_data": [ + __explode_resource_data__(item) + for item in resource["resource_data"]], + "privileges": _privileges_by_resource.get(resource["resource_id"], tuple()) + } for resource in _all_resources.values()] + }) + def __search_mrna__(): query = __request_key__("query", "") |
