From 86b7a2589fb0956f6ef6303d64d53b5d5f6b9d43 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Wed, 22 Mar 2023 11:25:10 +0300 Subject: auth: Enable linking multiple datasets to the group at once. --- gn3/auth/authorisation/groups/data.py | 86 ++++++++++++++++++++-------------- gn3/auth/authorisation/groups/views.py | 4 +- 2 files changed, 54 insertions(+), 36 deletions(-) diff --git a/gn3/auth/authorisation/groups/data.py b/gn3/auth/authorisation/groups/data.py index fd76648..0c821d3 100644 --- a/gn3/auth/authorisation/groups/data.py +++ b/gn3/auth/authorisation/groups/data.py @@ -108,58 +108,74 @@ def retrieve_ungrouped_data( grouped_data = __fetch_grouped_data__(authconn, dataset_type) return __fetch_ungrouped_data__(gn3conn, dataset_type, grouped_data, offset) -def __fetch_mrna_data_by_id__(conn: gn3db.Connection, dataset_id: str) -> dict: +def __fetch_mrna_data_by_ids__( + conn: gn3db.Connection, dataset_ids: tuple[str, ...]) -> tuple[ + dict, ...]: """Fetch mRNA Assay data by ID.""" with conn.cursor(DictCursor) as cursor: + paramstr = ", ".join(["%s"] * len(dataset_ids)) cursor.execute( - "SELECT psf.Id, psf.Name, psf.FullName, " + "SELECT psf.Id, psf.Name AS dataset_name, " + "psf.FullName AS dataset_fullname, " "ifiles.GN_AccesionId AS accession_id FROM ProbeSetFreeze AS psf " "INNER JOIN InfoFiles AS ifiles ON psf.Name=ifiles.InfoPageName " - "WHERE psf.Id=%s", - (dataset_id,)) - res = cursor.fetchone() + f"WHERE psf.Id IN ({paramstr})", + dataset_ids) + res = cursor.fetchall() if res: - return dict(res) + return tuple(dict(row) for row in res) raise NotFoundError("Could not find mRNA Assay data with the given ID.") -def __fetch_geno_data_by_id__(conn: gn3db.Connection, dataset_id: str) -> dict: +def __fetch_geno_data_by_ids__( + conn: gn3db.Connection, dataset_ids: tuple[str, ...]) -> tuple[ + dict, ...]: """Fetch genotype data by ID.""" with conn.cursor(DictCursor) as cursor: + paramstr = ", ".join(["%s"] * len(dataset_ids)) cursor.execute( - "SELECT gf.Id, gf.Name, gf.FullName, " + "SELECT gf.Id, gf.Name AS dataset_name, " + "gf.FullName AS dataset_fullname, " "ifiles.GN_AccesionId AS accession_id FROM GenoFreeze AS gf " "INNER JOIN InfoFiles AS ifiles ON gf.Name=ifiles.InfoPageName " - "WHERE gf.Id=%s", - (dataset_id,)) - res = cursor.fetchone() + f"WHERE gf.Id IN ({paramstr})", + dataset_ids) + res = cursor.fetchall() if res: - return dict(res) + return tuple(dict(row) for row in res) raise NotFoundError("Could not find Genotype data with the given ID.") -def __fetch_pheno_data_by_id__(conn: gn3db.Connection, dataset_id: str) -> dict: +def __fetch_pheno_data_by_ids__( + conn: gn3db.Connection, dataset_ids: tuple[str, ...]) -> tuple[ + dict, ...]: """Fetch phenotype data by ID.""" with conn.cursor(DictCursor) as cursor: + paramstr = ", ".join(["%s"] * len(dataset_ids)) cursor.execute( - "SELECT pf.Id, pf.Name, pf.FullName, " - "ifiles.GN_AccesionId AS accession_id FROM PublishFreeze AS pf " + "SELECT pxf.Id, iset.InbredSetName, pf.Id AS dataset_id, " + "pf.Name AS dataset_name, pf.FullName AS dataset_fullname, " + "ifiles.GN_AccesionId AS accession_id " + "FROM PublishXRef AS pxf " + "INNER JOIN InbredSet AS iset ON pxf.InbredSetId=iset.InbredSetId " + "INNER JOIN PublishFreeze AS pf ON iset.InbredSetId=pf.InbredSetId " "INNER JOIN InfoFiles AS ifiles ON pf.Name=ifiles.InfoPageName " - "WHERE pf.Id=%s", - (dataset_id,)) - res = cursor.fetchone() + f"WHERE pxf.Id IN ({paramstr})", + dataset_ids) + res = cursor.fetchall() if res: - return dict(res) + return tuple(dict(row) for row in res) raise NotFoundError( - "Could not find Phenotype/Publish data with the given ID.") + "Could not find Phenotype/Publish data with the given IDs.") def __fetch_data_by_id( - conn: gn3db.Connection, dataset_type: str, dataset_id: str) -> dict: - """Fetch data from MySQL by ID.""" + conn: gn3db.Connection, dataset_type: str, + dataset_ids: tuple[str, ...]) -> tuple[dict, ...]: + """Fetch data from MySQL by IDs.""" fetch_fns = { - "mrna": __fetch_mrna_data_by_id__, - "genotype": __fetch_geno_data_by_id__, - "phenotype": __fetch_pheno_data_by_id__ + "mrna": __fetch_mrna_data_by_ids__, + "genotype": __fetch_geno_data_by_ids__, + "phenotype": __fetch_pheno_data_by_ids__ } - return fetch_fns[dataset_type](conn, dataset_id) + return fetch_fns[dataset_type](conn, dataset_ids) @authorised_p(("system:data:link-to-group",), error_description=( @@ -168,20 +184,22 @@ def __fetch_data_by_id( oauth2_scope="profile group resource") def link_data_to_group( authconn: authdb.DbConnection, gn3conn: gn3db.Connection, - dataset_type: str, dataset_id: str, group: Group) -> dict: + dataset_type: str, dataset_ids: tuple[str, ...], group: Group) -> tuple[ + dict, ...]: """Link the given data to the specified group.""" - the_data = __fetch_data_by_id(gn3conn, dataset_type, dataset_id) + the_data = __fetch_data_by_id(gn3conn, dataset_type, dataset_ids) with authdb.cursor(authconn) as cursor: - params = { + params = tuple({ "group_id": str(group.group_id), "dataset_type": { "mrna": "mRNA", "genotype": "Genotype", "phenotype": "Phenotype" }[dataset_type], - "dataset_or_trait_id": dataset_id, "dataset_name": the_data["Name"], - "dataset_fullname": the_data["FullName"], - "accession_id": the_data["accession_id"] - } - cursor.execute( + "dataset_or_trait_id": item["Id"], + "dataset_name": item["dataset_name"], + "dataset_fullname": item["dataset_fullname"], + "accession_id": item["accession_id"] + } for item in the_data) + cursor.executemany( "INSERT INTO linked_group_data VALUES" "(:group_id, :dataset_type, :dataset_or_trait_id, :dataset_name, " ":dataset_fullname, :accession_id)", diff --git a/gn3/auth/authorisation/groups/views.py b/gn3/auth/authorisation/groups/views.py index 0b21800..6f1a6df 100644 --- a/gn3/auth/authorisation/groups/views.py +++ b/gn3/auth/authorisation/groups/views.py @@ -213,7 +213,7 @@ def link_data() -> Response: with require_oauth.acquire("profile group resource") as _the_token: form = request.form group_id = uuid.UUID(form["group_id"]) - dataset_id = form["dataset_id"] + dataset_ids = form.getlist("dataset_ids") dataset_type = form.get("dataset_type") if dataset_type not in ("mrna", "genotype", "phenotype"): raise InvalidData("Unexpected dataset type requested!") @@ -221,7 +221,7 @@ def link_data() -> Response: group = group_by_id(conn, group_id) with gn3dbutils.database_connection() as gn3conn: return link_data_to_group( - conn, gn3conn, dataset_type, dataset_id, group) + conn, gn3conn, dataset_type, dataset_ids, group) return jsonify(with_db_connection(__link__)) -- cgit v1.2.3