diff options
-rw-r--r-- | gn3/auth/authorisation/data/views.py | 189 | ||||
-rw-r--r-- | gn3/auth/authorisation/groups/data.py | 99 | ||||
-rw-r--r-- | gn3/auth/authorisation/groups/views.py | 16 |
3 files changed, 1 insertions, 303 deletions
diff --git a/gn3/auth/authorisation/data/views.py b/gn3/auth/authorisation/data/views.py index e00df66..33ba262 100644 --- a/gn3/auth/authorisation/data/views.py +++ b/gn3/auth/authorisation/data/views.py @@ -26,7 +26,6 @@ from gn3.auth.authorisation.errors import InvalidData, NotFoundError from gn3.auth.authorisation.roles.models import( revoke_user_role_by_name, assign_user_role_by_name) -from gn3.auth.authorisation.groups.data import retrieve_ungrouped_data from gn3.auth.authorisation.groups.models import ( Group, user_group, group_by_id, add_user_to_group) @@ -123,194 +122,6 @@ def authorisation() -> Response: (build_trait_name(trait_fullname) for trait_fullname in traits_names))) -def migrate_user_group(conn: db.DbConnection, user: User) -> Group: - """Create a group for the user if they don't already have a group.""" - group = user_group(conn, user).maybe(# type: ignore[misc] - False, lambda grp: grp) # type: ignore[arg-type] - if not bool(group): - now = datetime.datetime.now().isoformat() - group = Group(uuid.uuid4(), f"{user.name}'s Group ({now})", { - "created": now, - "notes": "Imported from redis" - }) - with db.cursor(conn) as cursor: - cursor.execute( - "INSERT INTO groups(group_id, group_name, group_metadata) " - "VALUES(?, ?, ?)", - (str(group.group_id), group.group_name, json.dumps( - group.group_metadata))) - add_user_to_group(cursor, group, user) - revoke_user_role_by_name(cursor, user, "group-creator") - assign_user_role_by_name(cursor, user, "group-leader") - - return group - -def __redis_datasets_by_type__(acc, item): - if item["type"] == "dataset-probeset": - return (acc[0] + (item["name"],), acc[1], acc[2]) - if item["type"] == "dataset-geno": - return (acc[0], acc[1] + (item["name"],), acc[2]) - if item["type"] == "dataset-publish": - return (acc[0], acc[1], acc[2] + (item["name"],)) - return acc - -def __unmigrated_data__(ungrouped, redis_datasets): - return (dataset for dataset in ungrouped - if dataset["Name"] in redis_datasets) - -def __parametrise__(group: Group, datasets: Sequence[dict], - dataset_type: str) -> tuple[dict[str, str], ...]: - return tuple( - { - "group_id": str(group.group_id), - "dataset_type": dataset_type, - "dataset_or_trait_id": dataset["Id"], - "dataset_name": dataset["Name"], - "dataset_fullname": dataset["FullName"], - "accession_id": dataset["accession_id"] - } for dataset in datasets) - -def __org_by_user_id__(acc, resource): - try: - user_id = uuid.UUID(resource["owner_id"]) - return { - **acc, - user_id: acc.get(user_id, tuple()) + (resource,) - } - except ValueError as _verr: - return acc - -def redis_resources(rconn: redis.Redis) -> Iterable[dict[str, str]]: - """Retrieve ALL defined resources from Redis""" - return ( - json.loads(resource) - for resource in rconn.hgetall("resources").values()) - -def system_admin_user(conn: db.DbConnection) -> User: - """Return a system admin user.""" - with db.cursor(conn) as cursor: - cursor.execute( - "SELECT * FROM users AS u INNER JOIN user_roles AS ur " - "ON u.user_id=ur.user_id INNER JOIN roles AS r " - "ON ur.role_id=r.role_id WHERE r.role_name='system-administrator'") - rows = cursor.fetchall() - if len(rows) > 0: - return User(uuid.UUID(rows[0]["user_id"]), rows[0]["email"], - rows[0]["name"]) - raise NotFoundError("Could not find a system administration user.") - -def migrate_user( - conn: db.DbConnection, email: str, username: str, password: str) -> User: - """Migrate the user, if not already migrated.""" - try: - return user_by_email(conn, email) - except NotFoundError as _nfe: - user = User(uuid.uuid4(), email, username) - with db.cursor(conn) as cursor: - cursor.execute( - "INSERT INTO users(user_id, email, name) " - "VALUES (?, ?, ?)", - (str(user.user_id), user.email, user.name)) - set_user_password(cursor, user, password) - return user - -def __generate_random_password__(length: int = 25): - """Generate a random password string""" - return "".join(random.choices( - string.ascii_letters + string.punctuation + string.digits, - k=length)) - -def migrate_data(# pylint: disable=[too-many-locals] - authconn: db.DbConnection, - gn3conn: gn3db.Connection, - rconn: redis.Redis, - redis_user_id: uuid.UUID, - redisresources: tuple[dict[str, str], ...]) -> tuple[ - User, Group, tuple[dict[str, str], ...]]: - """Migrate data attached to the user to the user's group.""" - try: - user_details = json.loads(rconn.hget("users", str(redis_user_id))) - email = validate_email(user_details["email_address"]) - user = migrate_user(authconn, email["email"], - user_details.get("full_name") or "NOT SET", - __generate_random_password__()) - group = migrate_user_group(authconn, user) - redis_mrna, redis_geno, redis_pheno = reduce(#type: ignore[var-annotated] - __redis_datasets_by_type__, redisresources, - (tuple(), tuple(), tuple())) - mrna_datasets = __unmigrated_data__( - retrieve_ungrouped_data(authconn, gn3conn, "mrna"), redis_mrna) - geno_datasets = __unmigrated_data__( - retrieve_ungrouped_data(authconn, gn3conn, "genotype"), redis_geno) - pheno_datasets = __unmigrated_data__( - retrieve_ungrouped_data(authconn, gn3conn, "phenotype"), redis_pheno) - - params = ( - __parametrise__(group, mrna_datasets, "mRNA") + - __parametrise__(group, geno_datasets, "Genotype") + - __parametrise__(group, pheno_datasets, "Phenotype")) - if len(params) > 0: - with db.cursor(authconn) as cursor: - cursor.executemany( - "INSERT INTO linked_group_data VALUES" - "(:group_id, :dataset_type, :dataset_or_trait_id, " - ":dataset_name, :dataset_fullname, :accession_id)", - params) - - return user, group, params - except EmailNotValidError as _enve: - pass - - return tuple() # type: ignore[return-value] - -@data.route("/users/migrate", methods=["POST"]) -@require_oauth("migrate-data") -def migrate_users_data() -> Response: - """ - Special, protected endpoint to enable the migration of data from the older - system to the newer system with groups, resources and privileges. - - This is a temporary endpoint and should be removed after all the data has - been migrated. - """ - db_uri = app.config.get("AUTH_DB", "").strip() - if bool(db_uri) and os.path.exists(db_uri): - authorised_clients = app.config.get( - "OAUTH2_CLIENTS_WITH_DATA_MIGRATION_PRIVILEGE", []) - with (require_oauth.acquire("migrate-data") as the_token, - db.connection(db_uri) as authconn, - redis.Redis(decode_responses=True) as rconn, - gn3db.database_connection(app.config["SQL_URI"]) as gn3conn): - if the_token.client.client_id in authorised_clients: - by_user: dict[uuid.UUID, tuple[dict[str, str], ...]] = reduce( - __org_by_user_id__, redis_resources(rconn), {}) - users, groups, resource_data_params = reduce(# type: ignore[var-annotated, arg-type] - lambda acc, ugp: (acc[0] + (ugp[0],),# type: ignore[return-value, arg-type] - acc[1] + (ugp[1],), - acc[2] + ugp[2]), - ( - migrate_data( - authconn, gn3conn, rconn, user_id, user_resources) - for user_id, user_resources in by_user.items()), - (tuple(), tuple(), tuple())) - return jsonify({ - "description": ( - f"Migrated {len(resource_data_params)} resource data " - f"items for {len(users)} users into {len(groups)} " - "groups."), - "users": tuple(dictify(user) for user in users), - "groups": tuple(dictify(group) for group in groups) - }) - raise ForbiddenAccess("You cannot access this endpoint.") - - return app.response_class( - response=json.dumps({ - "error": "Unavailable", - "error_description": ( - "The data migration service is currently unavailable.") - }), - status=500, mimetype="application/json") - def __search_mrna__(): query = __request_key__("query", "") limit = int(__request_key__("limit", 10000)) diff --git a/gn3/auth/authorisation/groups/data.py b/gn3/auth/authorisation/groups/data.py index 0c821d3..453cc71 100644 --- a/gn3/auth/authorisation/groups/data.py +++ b/gn3/auth/authorisation/groups/data.py @@ -9,105 +9,6 @@ from gn3.auth.authorisation.groups import Group from gn3.auth.authorisation.checks import authorised_p from gn3.auth.authorisation.errors import InvalidData, NotFoundError -def __fetch_grouped_data__( - conn: authdb.DbConnection, dataset_type: str) -> Sequence[dict[str, Any]]: - """Retrieve ids for all data that are linked to groups in the auth db.""" - with authdb.cursor(conn) as cursor: - cursor.execute( - "SELECT dataset_type, dataset_or_trait_id FROM linked_group_data " - "WHERE LOWER(dataset_type)=?", - (dataset_type,)) - return tuple(dict(row) for row in cursor.fetchall()) - -def __fetch_ungrouped_mrna_data__( - conn: gn3db.Connection, grouped_data, offset: int) -> Sequence[dict]: - """Fetch ungrouped mRNA Assay data.""" - query = ("SELECT psf.Id, psf.Name AS dataset_name, " - "psf.FullName AS dataset_fullname, " - "ifiles.GN_AccesionId AS accession_id FROM ProbeSetFreeze AS psf " - "INNER JOIN InfoFiles AS ifiles ON psf.Name=ifiles.InfoPageName") - params: tuple[Any, ...] = tuple() - if bool(grouped_data): - clause = ", ".join(["%s"] * len(grouped_data)) - query = f"{query} WHERE psf.Id NOT IN ({clause})" - params = tuple(item["dataset_or_trait_id"] for item in grouped_data) - - query = f"{query} LIMIT 100 OFFSET %s" - with conn.cursor(DictCursor) as cursor: - cursor.execute(query, (params + (offset,))) - return tuple(dict(row) for row in cursor.fetchall()) - -def __fetch_ungrouped_geno_data__( - conn: gn3db.Connection, grouped_data, offset: int) -> Sequence[dict]: - """Fetch ungrouped Genotype data.""" - query = ("SELECT gf.Id, gf.Name AS dataset_name, " - "gf.FullName AS dataset_fullname, " - "ifiles.GN_AccesionId AS accession_id FROM GenoFreeze AS gf " - "INNER JOIN InfoFiles AS ifiles ON gf.Name=ifiles.InfoPageName") - params: tuple[Any, ...] = tuple() - if bool(grouped_data): - clause = ", ".join(["%s"] * len(grouped_data)) - query = f"{query} WHERE gf.Id NOT IN ({clause})" - params = tuple(item["dataset_or_trait_id"] for item in grouped_data) - - query = f"{query} LIMIT 100 OFFSET %s" - with conn.cursor(DictCursor) as cursor: - cursor.execute(query, (params + (offset,))) - return tuple(dict(row) for row in cursor.fetchall()) - -def __fetch_ungrouped_pheno_data__( - conn: gn3db.Connection, grouped_data, offset: int) -> Sequence[dict]: - """Fetch ungrouped Phenotype data.""" - query = ("SELECT " - "pxf.Id, iset.InbredSetName, pf.Name AS dataset_name, " - "pf.FullName AS dataset_fullname, " - "pf.ShortName AS dataset_shortname " - "FROM PublishXRef AS pxf " - "INNER JOIN InbredSet AS iset " - "ON pxf.InbredSetId=iset.InbredSetId " - "LEFT JOIN PublishFreeze AS pf " - "ON iset.InbredSetId=pf.InbredSetId") - params: tuple[Any, ...] = tuple() - if bool(grouped_data): - clause = ", ".join(["%s"] * len(grouped_data)) - query = f"{query} WHERE pxf.Id NOT IN ({clause})" - params = tuple(item["dataset_or_trait_id"] for item in grouped_data) - - query = f"{query} LIMIT 100 OFFSET %s" - with conn.cursor(DictCursor) as cursor: - cursor.execute(query, (params + (offset,))) - return tuple(dict(row) for row in cursor.fetchall()) - -def __fetch_ungrouped_data__( - conn: gn3db.Connection, dataset_type: str, - ungrouped: Sequence[dict[str, Any]], - offset) -> Sequence[dict[str, Any]]: - """Fetch any ungrouped data.""" - fetch_fns = { - "mrna": __fetch_ungrouped_mrna_data__, - "genotype": __fetch_ungrouped_geno_data__, - "phenotype": __fetch_ungrouped_pheno_data__ - } - return fetch_fns[dataset_type](conn, ungrouped, offset) - -@authorised_p(("system:data:link-to-group",), - error_description=( - "You do not have sufficient privileges to link data to (a) " - "group(s)."), - oauth2_scope="profile group resource") -def retrieve_ungrouped_data( - authconn: authdb.DbConnection, - gn3conn: gn3db.Connection, - dataset_type: str, - offset: int = 0) -> Sequence[dict]: - """Retrieve any data not linked to any group.""" - if dataset_type not in ("mrna", "genotype", "phenotype"): - raise InvalidData( - "Requested dataset type is invalid. Expected one of " - "'mrna', 'genotype' or 'phenotype'.") - grouped_data = __fetch_grouped_data__(authconn, dataset_type) - return __fetch_ungrouped_data__(gn3conn, dataset_type, grouped_data, offset) - def __fetch_mrna_data_by_ids__( conn: gn3db.Connection, dataset_ids: tuple[str, ...]) -> tuple[ dict, ...]: diff --git a/gn3/auth/authorisation/groups/views.py b/gn3/auth/authorisation/groups/views.py index e933bcd..0ff2903 100644 --- a/gn3/auth/authorisation/groups/views.py +++ b/gn3/auth/authorisation/groups/views.py @@ -12,7 +12,7 @@ from gn3 import db_utils as gn3dbutils from gn3.auth.dictify import dictify from gn3.auth.db_utils import with_db_connection -from .data import link_data_to_group, retrieve_ungrouped_data +from .data import link_data_to_group from .models import ( Group, user_group, all_groups, DUMMY_GROUP, GroupRole, group_by_id, join_requests, group_role_by_id, GroupCreationError, @@ -199,20 +199,6 @@ def unlinked_data(resource_type: str) -> Response: return jsonify(tuple()) -@groups.route("/<string:dataset_type>/ungrouped-data", methods=["GET"]) -@require_oauth("profile group resource") -def ungrouped_data(dataset_type: str) -> Response: - """View data not linked to any group.""" - if dataset_type not in ("all", "mrna", "genotype", "phenotype"): - raise AuthorisationError(f"Invalid dataset type {dataset_type}") - - with require_oauth.acquire("profile group resource") as _the_token: - with gn3dbutils.database_connection(current_app.config["SQL_URI"]) as gn3conn: - return jsonify(with_db_connection(partial( - retrieve_ungrouped_data, gn3conn=gn3conn, - dataset_type=dataset_type, - offset = int(request.args.get("offset", 0))))) - @groups.route("/data/link", methods=["POST"]) @require_oauth("profile group resource") def link_data() -> Response: |