4 files changed, 362 insertions, 95 deletions
diff --git a/gn_auth/auth/authorisation/data/genotypes.py b/gn_auth/auth/authorisation/data/genotypes.py
index ddb0add..d44cbfb 100644
--- a/gn_auth/auth/authorisation/data/genotypes.py
+++ b/gn_auth/auth/authorisation/data/genotypes.py
@@ -1,7 +1,9 @@
 """Handle linking of Genotype data to the Auth(entic|oris)ation system."""
 import uuid
-from dataclasses import asdict
+import logging
 from typing import Iterable
+from functools import reduce
+from dataclasses import asdict
 
 from gn_libs import mysqldb as gn3db
 from MySQLdb.cursors import DictCursor
@@ -11,6 +13,9 @@ from gn_auth.auth.db import sqlite3 as authdb
 from gn_auth.auth.authorisation.checks import authorised_p
 from gn_auth.auth.authorisation.resources.groups.models import Group
 
+
+logger = logging.getLogger(__name__)
+
 def linked_genotype_data(conn: authdb.DbConnection) -> Iterable[dict]:
     """Retrieve genotype data that is linked to user groups."""
     with authdb.cursor(conn) as cursor:
@@ -95,3 +100,37 @@ def link_genotype_data(
             "group": asdict(group),
             "datasets": datasets
         }
+
+
+def resources_by_datasets_and_traits(
+        authconn: authdb.DbConnection,
+        dsets_traits: tuple[tuple[str, str], ...]
+) -> tuple[dict, ...]:
+    """Fetch resources by their attached datasets and traits."""
+    traits_by_datasets: dict[str, tuple[str, ...]] = reduce(
+        lambda acc, curr: {
+            **acc,
+            curr[0]: acc.get(curr[0], tuple()) + (curr[1],)
+        },
+        dsets_traits,
+        {})
+    paramstr = ", ".join(["?"] * len(dsets_traits))
+    query = (
+        "SELECT r.*, rc.*, lgd.dataset_name FROM linked_genotype_data AS lgd "
+        "INNER JOIN genotype_resources AS mr ON lgd.data_link_id=mr.data_link_id "
+        "INNER JOIN resources AS r ON mr.resource_id=r.resource_id "
+        "INNER JOIN resource_categories AS rc "
+        "ON r.resource_category_id=rc.resource_category_id "
+        "WHERE lgd.dataset_name "
+        f"IN ({paramstr})")
+    logger.debug("QUERY: %s", query)
+    with authdb.cursor(authconn) as cursor:
+        params = tuple(traits_by_datasets.keys())
+        logger.debug("QUERY PARAMS: %s", params)
+        cursor.execute(query, tuple(traits_by_datasets.keys()))
+        return tuple({
+            "resource_id": row["resource_id"],
+            "resource_data": tuple(
+                f'{row["dataset_name"]}::{trait_id}'
+                for trait_id in traits_by_datasets[row["dataset_name"]])
+        } for row in cursor.fetchall())
diff --git a/gn_auth/auth/authorisation/data/mrna.py b/gn_auth/auth/authorisation/data/mrna.py
index 0cc644e..fcf6ea3 100644
--- a/gn_auth/auth/authorisation/data/mrna.py
+++ b/gn_auth/auth/authorisation/data/mrna.py
@@ -1,7 +1,9 @@
 """Handle linking of mRNA Assay data to the Auth(entic|oris)ation system."""
 import uuid
-from dataclasses import asdict
+import logging
 from typing import Iterable
+from functools import reduce
+from dataclasses import asdict
 
 from gn_libs import mysqldb as gn3db
 from MySQLdb.cursors import DictCursor
@@ -11,6 +13,10 @@ from gn_auth.auth.db import sqlite3 as authdb
 from gn_auth.auth.authorisation.checks import authorised_p
 from gn_auth.auth.authorisation.resources.groups.models import Group
 
+
+logger = logging.getLogger(__name__)
+
+
 def linked_mrna_data(conn: authdb.DbConnection) -> Iterable[dict]:
     """Retrieve mRNA Assay data that is linked to user groups."""
     with authdb.cursor(conn) as cursor:
@@ -100,3 +106,35 @@ def link_mrna_data(
             "group": asdict(group),
             "datasets": datasets
         }
+
+
+def resources_by_datasets_and_traits(
+        authconn: authdb.DbConnection,
+        dsets_traits: tuple[tuple[str, str], ...]
+) -> tuple[dict, ...]:
+    """Fetch resources by their attached datasets and traits."""
+    traits_by_datasets: dict[str, tuple[str, ...]] = reduce(
+        lambda acc, curr: {
+            **acc,
+            curr[0]: acc.get(curr[0], tuple()) + (curr[1],)
+        },
+        dsets_traits,
+        {})
+    paramstr = ", ".join(["?"] * len(dsets_traits))
+    query = (
+        "SELECT r.*, rc.*, lmd.dataset_name FROM linked_mrna_data AS lmd "
+        "INNER JOIN mrna_resources AS mr ON lmd.data_link_id=mr.data_link_id "
+        "INNER JOIN resources AS r ON mr.resource_id=r.resource_id "
+        "INNER JOIN resource_categories AS rc "
+        "ON r.resource_category_id=rc.resource_category_id "
+        "WHERE lmd.dataset_name "
+        f"IN ({paramstr})")
+    logger.debug("QUERY: %s", query)
+    with authdb.cursor(authconn) as cursor:
+        cursor.execute(query, tuple(traits_by_datasets.keys()))
+        return tuple({
+            "resource_id": row["resource_id"],
+            "resource_data": tuple(
+                f'{row["dataset_name"]}::{trait_id}'
+                for trait_id in traits_by_datasets[row["dataset_name"]])
+        } for row in cursor.fetchall())
diff --git a/gn_auth/auth/authorisation/data/phenotypes.py b/gn_auth/auth/authorisation/data/phenotypes.py
index 3e45af3..dddd5c9 100644
--- a/gn_auth/auth/authorisation/data/phenotypes.py
+++ b/gn_auth/auth/authorisation/data/phenotypes.py
@@ -1,20 +1,31 @@
 """Handle linking of Phenotype data to the Auth(entic|oris)ation system."""
 import uuid
+import logging
+from functools import reduce
 from dataclasses import asdict
 from typing import Any, Iterable
 
 from gn_libs import mysqldb as gn3db
+from gn_libs import sqlite3 as authdb
 from MySQLdb.cursors import DictCursor
+from flask import request, jsonify, Response, Blueprint, current_app as app
 
-from gn_auth.auth.db import sqlite3 as authdb
+from gn_auth.auth.authentication.oauth2.resource_server import require_oauth
 
 from gn_auth.auth.errors import AuthorisationError
 from gn_auth.auth.authorisation.checks import authorised_p
+from gn_auth.auth.authorisation.resources.checks import can_delete
 from gn_auth.auth.authorisation.resources.system.models import system_resource
 from gn_auth.auth.authorisation.resources.groups.models import Group, group_resource
 
+
+from gn_auth.auth.authorisation.checks import require_json
 from gn_auth.auth.authorisation.resources.checks import authorised_for2
 
+logger = logging.getLogger(__name__)
+phenosbp = Blueprint("phenotypes", __name__)
+
+
 def linked_phenotype_data(
         authconn: authdb.DbConnection, gn3conn: gn3db.Connection,
         species: str = "") -> Iterable[dict[str, Any]]:
@@ -155,3 +166,156 @@ def link_phenotype_data(
             "group": asdict(group),
             "traits": params
         }
+
+
+def unlink_from_resources(
+        cursor: authdb.DbCursor,
+        data_link_ids: tuple[uuid.UUID, ...]
+) -> tuple[uuid.UUID, ...]:
+    """Unlink phenotypes from resources."""
+    # TODO: Delete in batches
+    cursor.executemany("DELETE FROM phenotype_resources "
+                       "WHERE data_link_id=? RETURNING resource_id",
+                       tuple((str(_id),) for _id in data_link_ids))
+    return tuple(uuid.UUID(row["resource_id"]) for row in cursor.fetchall())
+
+
+def delete_resources(
+        cursor: authdb.DbCursor,
+        resource_ids: tuple[uuid.UUID, ...]
+) -> tuple[uuid.UUID, ...]:
+    """Delete the specified phenotype resources."""
+    # TODO: Delete in batches
+    cursor.executemany("DELETE FROM resources "
+                       "WHERE resource_id=? RETURNING resource_id",
+                       tuple((str(_id),) for _id in resource_ids))
+    return tuple(uuid.UUID(row["resource_id"]) for row in cursor.fetchall())
+
+
+def fetch_data_link_ids(
+        cursor: authdb.DbCursor,
+        species_id: int,
+        population_id: int,
+        dataset_id: int,
+        xref_ids: tuple[int, ...]
+) -> tuple[uuid.UUID, ...]:
+    """Fetch `data_link_id` values for phenotypes."""
+    paramstr = ", ".join(["(?, ?, ?, ?)"] * len(xref_ids))
+    cursor.execute(
+        "SELECT data_link_id FROM linked_phenotype_data "
+        "WHERE (SpeciesId, InbredSetId, PublishFreezeId, PublishXRefId) IN "
+        f"({paramstr})",
+        tuple(str(field) for arow in
+              ((species_id, population_id, dataset_id, xref_id)
+             for xref_id in xref_ids)
+            for field in arow))
+    return tuple(uuid.UUID(row["data_link_id"]) for row in cursor.fetchall())
+
+
+def fetch_resource_id(cursor: authdb.DbCursor,
+                      data_link_ids: tuple[uuid.UUID, ...]) -> uuid.UUID:
+    """Retrieve the ID of the resource where the data is linked to.
+
+    RAISES: InvalidResourceError in the case where more the data_link_ids belong
+      to more than one resource."""
+    _paramstr = ", ".join(["?"] * len(data_link_ids))
+    cursor.execute(
+        "SELECT DISTINCT(resource_id) FROM phenotype_resources "
+        f"WHERE data_link_id IN ({_paramstr})",
+        tuple(str(_id) for _id in data_link_ids))
+    _ids = tuple(uuid.UUID(row['resource_id']) for row in cursor.fetchall())
+    if len(_ids) != 1:
+        raise AuthorisationError(
+            f"Expected data from 1 resource, got {len(_ids)} resources.")
+    return _ids[0]
+
+
+def delete_linked_data(
+        cursor: authdb.DbCursor,
+        data_link_ids: tuple[uuid.UUID, ...]
+) -> int:
+    """Delete the actual linked data."""
+    # TODO: Delete in batches
+    cursor.executemany("DELETE FROM linked_phenotype_data "
+                       "WHERE data_link_id=?",
+                       tuple((str(_id),) for _id in data_link_ids))
+    return cursor.rowcount
+
+
+@phenosbp.route("/<int:species_id>/<int:population_id>/<int:dataset_id>/delete",
+                methods=["POST"])
+@require_json
+def delete_linked_phenotypes_data(
+        species_id: int,
+        population_id: int,
+        dataset_id: int
+) -> Response:
+    """Delete the linked phenotypes data from the database."""
+    db_uri = app.config["AUTH_DB"]
+    with (require_oauth.acquire("profile group resource") as _token,
+          authdb.connection(db_uri) as auth_conn,
+          authdb.cursor(auth_conn) as cursor):
+        _deleted = 0
+        xref_ids = tuple(request.json.get("xref_ids", []))#type: ignore[union-attr]
+        if len(xref_ids) > 0:
+            # TODO: Use background job, for huge number of xref_ids
+            data_link_ids = fetch_data_link_ids(
+                cursor, species_id, population_id, dataset_id, xref_ids)
+            resource_id = fetch_resource_id(cursor, data_link_ids)
+            # - Does user have DELETE privilege on the data
+            if not can_delete(auth_conn, _token.user.user_id, resource_id):
+                # - No: Raise `AuthorisationError` and bail!
+                raise AuthorisationError(
+                    "You are not allowed to delete this resource's data.")
+            # - YES: go ahead and delete data as below.
+            _resources_ids = unlink_from_resources(cursor, data_link_ids)
+            delete_resources(cursor, _resources_ids)
+            _deleted = delete_linked_data(cursor, data_link_ids)
+
+        return jsonify({
+            # TODO: "status": "sent-to-background"/"completed"/"failed"
+            # TODO: "status-url": <status-check-uri>
+            "requested": len(xref_ids),
+            "deleted": _deleted
+        })
+
+
+def __organise_resources_data__(acc, curr) -> dict:
+    logger.debug("ORGANISING... %s", dict(curr))
+    resource_row = acc.get(curr["resource_id"], {
+        "resource_id": curr["resource_id"],
+        "resource_data": tuple(),
+    })
+    return {
+        **acc,
+        curr["resource_id"]: {
+            **resource_row,
+            "resource_data": resource_row["resource_data"] + (
+                f'{curr["dataset_name"]}::{curr["trait_id"]}',)
+        }
+    }
+
+
+def resources_by_datasets_and_traits(
+        authconn: authdb.DbConnection,
+        dsets_traits: tuple[tuple[str, str], ...]
+) -> tuple[dict, ...]:
+    """Fetch resources by their attached datasets and traits."""
+    paramstr = ", ".join(["(?, ?)"] * len(dsets_traits))
+    query = (
+        "SELECT r.*, rc.*, lpd.dataset_name, lpd.PublishXRefId AS trait_id "
+        "FROM linked_phenotype_data AS lpd "
+        "INNER JOIN phenotype_resources AS pr "
+        "ON lpd.data_link_id=pr.data_link_id "
+        "INNER JOIN resources AS r ON pr.resource_id=r.resource_id "
+        "INNER JOIN resource_categories AS rc "
+        "ON r.resource_category_id=rc.resource_category_id "
+        "WHERE (lpd.dataset_name, lpd.PublishXRefId) "
+        f"IN ({paramstr})")
+    with authdb.cursor(authconn) as cursor:
+        cursor.execute(
+            query, tuple(item for row in dsets_traits for item in row))
+        return tuple(reduce(
+            __organise_resources_data__,
+            cursor.fetchall(),
+            {}).values())
diff --git a/gn_auth/auth/authorisation/data/views.py b/gn_auth/auth/authorisation/data/views.py
index 9123949..ef3d119 100644
--- a/gn_auth/auth/authorisation/data/views.py
+++ b/gn_auth/auth/authorisation/data/views.py
@@ -2,9 +2,9 @@
 import sys
 import uuid
 import json
-from dataclasses import asdict
+import logging
 from typing import Any
-from functools import partial
+from functools import reduce, partial
 
 import redis
 from MySQLdb.cursors import DictCursor
@@ -13,6 +13,7 @@ from flask import request, jsonify, Response, Blueprint, current_app as app
 
 
 from gn_libs import mysqldb as gn3db
+from gn_libs import sqlite3 as db
 
 from gn_auth import jobs
 from gn_auth.commands import run_async_cmd
@@ -21,25 +22,32 @@ from gn_auth.auth.requests import request_json
 from gn_auth.auth.errors import InvalidData, NotFoundError
 from gn_auth.auth.authorisation.resources.groups.models import group_by_id
 
-from ...db import sqlite3 as db
-from ...db.sqlite3 import with_db_connection
+from gn_auth.auth.db.sqlite3 import with_db_connection # Replace this with gn_libs alternative
 
 from ..checks import require_json
 
-from ..users.models import user_resource_roles
-
-from ..resources.checks import authorised_for
-from ..resources.models import (
-    user_resources, public_resources, attach_resources_data)
-
 from ...authentication.users import User
 from ...authentication.oauth2.resource_server import require_oauth
 
-from ..data.mrna import link_mrna_data, ungrouped_mrna_data
-from ..data.phenotypes import link_phenotype_data, pheno_traits_from_db
-from ..data.genotypes import link_genotype_data, ungrouped_genotype_data
-
+from .mrna import (
+    link_mrna_data,
+    ungrouped_mrna_data,
+    resources_by_datasets_and_traits as mrna_resources_by_datasets_and_traits)
+from .genotypes import (
+    link_genotype_data,
+    ungrouped_genotype_data,
+    resources_by_datasets_and_traits as geno_resources_by_datasets_and_traits)
+from .phenotypes import (
+    phenosbp,
+    link_phenotype_data,
+    pheno_traits_from_db,
+    resources_by_datasets_and_traits as pheno_resources_by_datasets_and_traits)
+
+
+logger = logging.getLogger(__name__)
 data = Blueprint("data", __name__)
+data.register_blueprint(phenosbp, url_prefix="/phenotypes")
+
 
 def build_trait_name(trait_fullname):
     """
@@ -82,98 +90,116 @@ def list_species() -> Response:
 def authorisation() -> Response:
     """Retrieve the authorisation level for datasets/traits for the user."""
     # Access endpoint with something like:
-    # curl -X POST http://127.0.0.1:8080/api/oauth2/data/authorisation \
+    # curl -X POST http://127.0.0.1:8081/auth/data/authorisation \
     #    -H "Content-Type: application/json" \
     #    -d '{"traits": ["HC_M2_0606_P::1442370_at", "BXDGeno::01.001.695",
     #        "BXDPublish::10001"]}'
+    def __organise_traits__(acc, curr):
+        dset, _trt = curr
+        key = "ProbeSet"
+        if dset.endswith("Publish"):
+            key = "Publish"
+        elif dset.endswith("Geno"):
+            key="Geno"
+        elif dset.endswith("Temp"):
+            key = "Temp"
+        else:
+            key = "ProbeSet"
+
+        return {
+            **acc,
+            key: acc.get(key, tuple()) + (curr,)
+        }
+    _dset_traits: dict[str, tuple[tuple[str, str], ...]] = reduce(
+        __organise_traits__,
+        (
+            (dset.strip(), trt.strip()) for dset, trt in
+            (trtstr.split("::") for trtstr in
+             request_json().get("traits", []))),
+        {key: tuple() for key in ("Publish", "ProbeSet", "Geno", "Temp")})
+
     db_uri = app.config["AUTH_DB"]
-    privileges = {}
     user = User(uuid.uuid4(), "anon@ymous.user", "Anonymous User")
-    with db.connection(db_uri) as auth_conn:
+    with (db.connection(db_uri) as authconn, db.cursor(authconn) as cursor):
+        _all_resources = {
+            _rrow["resource_id"]: _rrow
+            for _rtypes in (
+                    pheno_resources_by_datasets_and_traits(
+                        authconn, _dset_traits["Publish"]),
+                    geno_resources_by_datasets_and_traits(
+                        authconn, _dset_traits["Geno"]),
+                    mrna_resources_by_datasets_and_traits(
+                        authconn, _dset_traits["ProbeSet"]))
+            for _rrow in _rtypes
+        }
+        if len(_all_resources.keys()) == 0:
+            raise NotFoundError(
+                "No resource(s) found for specified trait(s). Do(es) the "
+                "trait(s) actually exist?")
+        _resource_ids = tuple(_all_resources.keys())
+
+
+        def __explode_resource_data__(trait_fullname):
+            _dset, _trt = trait_fullname.split("::")
+            return {
+                "dataset_name": _dset,
+                "dataset_type": (
+                    "Phenotype" if _dset.endswith("Publish")
+                    else ("Genotype" if _dset.endswith("Geno")
+                          else ("Temporary" if _dset.endswith("Temp")
+                                else "mRNA"))),
+                "trait_name": _trt,
+                "trait_fullname": trait_fullname
+            }
+
+        _paramstr = ", ".join(["?"] * len(_resource_ids))
         try:
             with require_oauth.acquire("profile group resource") as _token:
                 user = _token.user
-                resources = attach_resources_data(
-                    auth_conn, user_resources(auth_conn, _token.user))
-                resources_roles = user_resource_roles(auth_conn, _token.user)
-                privileges = {
-                    resource_id: tuple(
-                        privilege.privilege_id
-                        for roles in resources_roles[resource_id]
-                        for privilege in roles.privileges)#("group:resource:view-resource",)
-                    for resource_id, is_authorised
-                    in authorised_for(
-                        auth_conn, _token.user,
-                        ("group:resource:view-resource",), tuple(
-                            resource.resource_id for resource in resources)).items()
-                    if is_authorised
-                }
+                cursor.execute(
+                    "SELECT ur.resource_id, r.role_id, rp.privilege_id "
+                    "FROM user_roles AS ur "
+                    "INNER JOIN roles AS r ON ur.role_id=r.role_id "
+                    "INNER JOIN role_privileges AS rp ON r.role_id=rp.role_id "
+                    "WHERE ur.user_id = ? "
+                    f"AND ur.resource_id IN ({_paramstr})",
+                    (str(user.user_id),) + _resource_ids
+                )
+                _privileges_by_resource: dict[str, tuple[str, ...]] = reduce(
+                    lambda acc, curr: {
+                        **acc,
+                        curr["resource_id"]: (
+                            acc.get(curr["resource_id"], tuple())
+                            + (curr["privilege_id"],))
+                    },
+                    cursor.fetchall(),
+                    {})
         except _HTTPException as exc:
             err_msg = json.loads(exc.body)
             if err_msg["error"] == "missing_authorization":
-                resources = attach_resources_data(
-                    auth_conn, public_resources(auth_conn))
+                cursor.execute(
+                    "SELECT rsc.resource_id "
+                    "FROM resources AS rsc "
+                    "WHERE rsc.public = '1' "
+                    f"AND rsc.resource_id IN ({_paramstr}) ",
+                    _resource_ids)
+                _privileges_by_resource = {
+                    row["resource_id"]: ('group:resource:view-resource',)
+                    for row in cursor.fetchall()
+                }
             else:
                 raise exc from None
 
-        def __gen_key__(resource, data_item):
-            if resource.resource_category.resource_category_key.lower() == "phenotype":
-                return (
-                    f"{resource.resource_category.resource_category_key.lower()}::"
-                    f"{data_item['dataset_name']}::{data_item['PublishXRefId']}")
-            return (
-                f"{resource.resource_category.resource_category_key.lower()}::"
-                f"{data_item['dataset_name']}")
-
-        data_to_resource_map = {
-            __gen_key__(resource, data_item): resource.resource_id
-            for resource in resources
-            for data_item in resource.resource_data
-        }
-        privileges = {
-            **{
-                resource.resource_id: ("system:resource:public-read",)
-                for resource in resources if resource.public
-            },
-            **privileges}
-
-        args = request.get_json()
-        traits_names = args["traits"] # type: ignore[index]
-        def __translate__(val):
-            return {
-                "Temp": "Temp",
-                "ProbeSet": "mRNA",
-                "Geno": "Genotype",
-                "Publish": "Phenotype"
-            }[val]
-
-        def __trait_key__(trait):
-            dataset_type = __translate__(trait['db']['dataset_type']).lower()
-            dataset_name = trait["db"]["dataset_name"]
-            if dataset_type == "phenotype":
-                return f"{dataset_type}::{dataset_name}::{trait['trait_name']}"
-            return f"{dataset_type}::{dataset_name}"
-
-        return jsonify(tuple(
-            {
-                "user": asdict(user),
-                **{key:trait[key] for key in ("trait_fullname", "trait_name")},
-                "dataset_name": trait["db"]["dataset_name"],
-                "dataset_type": __translate__(trait["db"]["dataset_type"]),
-                "resource_id": data_to_resource_map.get(__trait_key__(trait)),
-                "privileges": privileges.get(
-                    data_to_resource_map.get(
-                        __trait_key__(trait),
-                        uuid.UUID("4afa415e-94cb-4189-b2c6-f9ce2b6a878d")),
-                    tuple()) + (
-                        # Temporary traits do not exist in db: Set them
-                        # as public-read
-                        ("system:resource:public-read",)
-                        if trait["db"]["dataset_type"] == "Temp"
-                        else tuple())
-            } for trait in
-            (build_trait_name(trait_fullname)
-             for trait_fullname in traits_names)))
+        return jsonify({
+            "authorisation": [{
+                **resource,
+                "resource_data": [
+                    __explode_resource_data__(item)
+                            for item in resource["resource_data"]],
+                "privileges": _privileges_by_resource.get(resource["resource_id"], tuple())
+            } for resource in _all_resources.values()]
+        })
+
 
 def __search_mrna__():
     query = __request_key__("query", "")