"""Handle linking of Phenotype data to the Auth(entic|oris)ation system.""" import uuid from dataclasses import asdict from typing import Any, Iterable from gn_libs import mysqldb as gn3db from gn_libs import sqlite3 as authdb from MySQLdb.cursors import DictCursor from flask import request, jsonify, Response, Blueprint, current_app as app from gn_auth.auth.authentication.oauth2.resource_server import require_oauth from gn_auth.auth.errors import AuthorisationError from gn_auth.auth.authorisation.checks import authorised_p from gn_auth.auth.authorisation.resources.system.models import system_resource from gn_auth.auth.authorisation.resources.checks import authorised_for_spec from gn_auth.auth.authorisation.resources.groups.models import Group, group_resource from gn_auth.auth.authorisation.checks import require_json from gn_auth.auth.authorisation.resources.checks import authorised_for2 phenosbp = Blueprint("phenotypes", __name__) def linked_phenotype_data( authconn: authdb.DbConnection, gn3conn: gn3db.Connection, species: str = "") -> Iterable[dict[str, Any]]: """Retrieve phenotype data linked to user groups.""" authkeys = ("SpeciesId", "InbredSetId", "PublishFreezeId", "PublishXRefId") with (authdb.cursor(authconn) as authcursor, gn3conn.cursor(DictCursor) as gn3cursor): authcursor.execute("SELECT * FROM linked_phenotype_data") linked = tuple(tuple(row[key] for key in authkeys) for row in authcursor.fetchall()) if len(linked) <= 0: return iter(()) paramstr = ", ".join(["(%s, %s, %s, %s)"] * len(linked)) query = ( "SELECT spc.SpeciesId, spc.Name AS SpeciesName, iset.InbredSetId, " "iset.InbredSetName, pf.Id AS PublishFreezeId, " "pf.Name AS dataset_name, pf.FullName AS dataset_fullname, " "pf.ShortName AS dataset_shortname, pxr.Id AS PublishXRefId " "FROM " "Species AS spc " "INNER JOIN InbredSet AS iset " "ON spc.SpeciesId=iset.SpeciesId " "INNER JOIN PublishFreeze AS pf " "ON iset.InbredSetId=pf.InbredSetId " "INNER JOIN PublishXRef AS pxr " "ON pf.InbredSetId=pxr.InbredSetId") + ( " WHERE" if (len(linked) > 0 or bool(species)) else "") + ( (" (spc.SpeciesId, iset.InbredSetId, pf.Id, pxr.Id) " f"IN ({paramstr})") if len(linked) > 0 else "") + ( " AND"if len(linked) > 0 else "") + ( " spc.SpeciesName=%s" if bool(species) else "") params = tuple(item for sublist in linked for item in sublist) + ( (species,) if bool(species) else tuple()) gn3cursor.execute(query, params) return (item for item in gn3cursor.fetchall()) @authorised_p(("system:data:link-to-group",), error_description=( "You do not have sufficient privileges to link data to (a) " "group(s)."), oauth2_scope="profile group resource") def ungrouped_phenotype_data( authconn: authdb.DbConnection, gn3conn: gn3db.Connection): """Retrieve phenotype data that is not linked to any user group.""" with gn3conn.cursor() as cursor: params = tuple( (row["SpeciesId"], row["InbredSetId"], row["PublishFreezeId"], row["PublishXRefId"]) for row in linked_phenotype_data(authconn, gn3conn)) paramstr = ", ".join(["(?, ?, ?, ?)"] * len(params)) query = ( "SELECT spc.SpeciesId, spc.SpeciesName, iset.InbredSetId, " "iset.InbredSetName, pf.Id AS PublishFreezeId, " "pf.Name AS dataset_name, pf.FullName AS dataset_fullname, " "pf.ShortName AS dataset_shortname, pxr.Id AS PublishXRefId " "FROM " "Species AS spc " "INNER JOIN InbredSet AS iset " "ON spc.SpeciesId=iset.SpeciesId " "INNER JOIN PublishFreeze AS pf " "ON iset.InbredSetId=pf.InbredSetId " "INNER JOIN PublishXRef AS pxr " "ON pf.InbredSetId=pxr.InbredSetId") if len(params) > 0: query = query + ( f" WHERE (iset.InbredSetId, pf.Id, pxr.Id) NOT IN ({paramstr})") cursor.execute(query, params) return tuple(dict(row) for row in cursor.fetchall()) return tuple() def pheno_traits_from_db(gn3conn: gn3db.Connection, params: tuple[dict, ...]) -> tuple[dict, ...]: """An internal utility function. Don't use outside of this module.""" if len(params) < 1: return tuple() paramstr = ", ".join(["(%s, %s, %s, %s)"] * len(params)) with gn3conn.cursor(DictCursor) as cursor: cursor.execute( "SELECT spc.SpeciesId, iset.InbredSetId, pf.Id AS PublishFreezeId, " "pf.Name AS dataset_name, pf.FullName AS dataset_fullname, " "pf.ShortName AS dataset_shortname, pxr.Id AS PublishXRefId " "FROM " "Species AS spc " "INNER JOIN InbredSet AS iset " "ON spc.SpeciesId=iset.SpeciesId " "INNER JOIN PublishFreeze AS pf " "ON iset.InbredSetId=pf.InbredSetId " "INNER JOIN PublishXRef AS pxr " "ON pf.InbredSetId=pxr.InbredSetId " "WHERE (spc.SpeciesName, iset.InbredSetName, pf.Name, pxr.Id) " f"IN ({paramstr})", tuple( itm for sublist in ( (item["species"], item["group"], item["dataset"], item["name"]) for item in params) for itm in sublist)) return cursor.fetchall() def link_phenotype_data( authconn: authdb.DbConnection, user, group: Group, traits: tuple[dict, ...] ) -> dict: """Link phenotype traits to a user group.""" if not (authorised_for2(authconn, user, system_resource(authconn), ("system:data:link-to-group",)) or authorised_for2(authconn, user, group_resource(authconn, group.group_id), ("group:data:link-to-group",)) ): raise AuthorisationError( "You do not have sufficient privileges to link data to group " f"'{group.group_name}'.") with authdb.cursor(authconn) as cursor: params = tuple({ "data_link_id": str(uuid.uuid4()), "group_id": str(group.group_id), **item } for item in traits) cursor.executemany( "INSERT INTO linked_phenotype_data " "VALUES (" ":data_link_id, :group_id, :SpeciesId, :InbredSetId, " ":PublishFreezeId, :dataset_name, :dataset_fullname, " ":dataset_shortname, :PublishXRefId" ")", params) return { "description": ( f"Successfully linked {len(traits)} traits to group."), "group": asdict(group), "traits": params } def unlink_from_resources( cursor: authdb.DbCursor, data_link_ids: tuple[uuid.UUID, ...] ) -> tuple[uuid.UUID, ...]: """Unlink phenotypes from resources.""" # TODO: Delete in batches cursor.executemany("DELETE FROM phenotype_resources " "WHERE data_link_id=? RETURNING resource_id", tuple((str(_id),) for _id in data_link_ids)) return tuple(uuid.UUID(row["resource_id"]) for row in cursor.fetchall()) def delete_resources( cursor: authdb.DbCursor, resource_ids: tuple[uuid.UUID, ...] ) -> tuple[uuid.UUID, ...]: """Delete the specified phenotype resources.""" # TODO: Delete in batches cursor.executemany("DELETE FROM resources " "WHERE resource_id=? RETURNING resource_id", tuple((str(_id),) for _id in resource_ids)) return tuple(uuid.UUID(row["resource_id"]) for row in cursor.fetchall()) def fetch_data_link_ids( cursor: authdb.DbCursor, species_id: int, population_id: int, dataset_id: int, xref_ids: tuple[int, ...] ) -> tuple[uuid.UUID, ...]: """Fetch `data_link_id` values for phenotypes.""" paramstr = ", ".join(["(?, ?, ?, ?)"] * len(xref_ids)) cursor.execute( "SELECT data_link_id FROM linked_phenotype_data " "WHERE (SpeciesId, InbredSetId, PublishFreezeId, PublishXRefId) IN " f"({paramstr})", tuple(str(field) for arow in ((species_id, population_id, dataset_id, xref_id) for xref_id in xref_ids) for field in arow)) return tuple(uuid.UUID(row["data_link_id"]) for row in cursor.fetchall()) def fetch_resource_id(cursor: authdb.DbCursor, data_link_ids: tuple[uuid.UUID, ...]) -> uuid.UUID: """Retrieve the ID of the resource where the data is linked to. RAISES: InvalidResourceError in the case where more the data_link_ids belong to more than one resource.""" _paramstr = ", ".join(["?"] * len(data_link_ids)) cursor.execute( "SELECT DISTINCT(resource_id) FROM phenotype_resources " f"WHERE data_link_id IN ({_paramstr})", tuple(str(_id) for _id in data_link_ids)) _ids = tuple(uuid.UUID(row['resource_id']) for row in cursor.fetchall()) if len(_ids) != 1: raise AuthorisationError( f"Expected data from 1 resource, got {len(_ids)} resources.") return _ids[0] def delete_linked_data( cursor: authdb.DbCursor, data_link_ids: tuple[uuid.UUID, ...] ) -> int: """Delete the actual linked data.""" # TODO: Delete in batches cursor.executemany("DELETE FROM linked_phenotype_data " "WHERE data_link_id=?", tuple((str(_id),) for _id in data_link_ids)) return cursor.rowcount @phenosbp.route("////delete", methods=["POST"]) @require_json def delete_linked_phenotypes_data( species_id: int, population_id: int, dataset_id: int ) -> Response: """Delete the linked phenotypes data from the database.""" db_uri = app.config["AUTH_DB"] with (require_oauth.acquire("profile group resource") as _token, authdb.connection(db_uri) as auth_conn, authdb.cursor(auth_conn) as cursor): # - Does user have DELETE privilege on system (i.e. is data curator)? # YES: go ahead and delete data as below. # - Does user have DELETE privilege on resource(s)? # YES: Delete phenotypes by resource, checking privileges for each # resource. # - Neither: Raise `AuthorisationError` and bail! _deleted = 0 xref_ids = tuple(request.json.get("xref_ids", []))#type: ignore[union-attr] if len(xref_ids) > 0: # TODO: Use background job, for huge number of xref_ids data_link_ids = fetch_data_link_ids( cursor, species_id, population_id, dataset_id, xref_ids) resource_id = fetch_resource_id(cursor, data_link_ids) if not (authorised_for_spec( auth_conn, _token.user.user_id, resource_id, "(OR group:resource:delete-resource system:resource:delete)") or authorised_for_spec( auth_conn, _token.user.user_id, system_resource(auth_conn).resource_id, "(AND system:system-wide:data:delete)")): raise AuthorisationError( "You are not allowed to delete this resource's data.") _resources_ids = unlink_from_resources(cursor, data_link_ids) delete_resources(cursor, _resources_ids) _deleted = delete_linked_data(cursor, data_link_ids) return jsonify({ # TODO: "status": "sent-to-background"/"completed"/"failed" # TODO: "status-url": "requested": len(xref_ids), "deleted": _deleted })