1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
|
"""Handle linking of Phenotype data to the Auth(entic|oris)ation system."""
import uuid
from dataclasses import asdict
from typing import Any, Iterable
from gn_libs import mysqldb as gn3db
from gn_libs import sqlite3 as authdb
from MySQLdb.cursors import DictCursor
from flask import request, jsonify, Response, Blueprint, current_app as app
from gn_auth.auth.authentication.oauth2.resource_server import require_oauth
from gn_auth.auth.errors import AuthorisationError
from gn_auth.auth.authorisation.checks import authorised_p
from gn_auth.auth.authorisation.resources.system.models import system_resource
from gn_auth.auth.authorisation.resources.checks import authorised_for_spec
from gn_auth.auth.authorisation.resources.groups.models import Group, group_resource
from gn_auth.auth.authorisation.checks import require_json
from gn_auth.auth.authorisation.resources.checks import authorised_for2
phenosbp = Blueprint("phenotypes", __name__)
def linked_phenotype_data(
authconn: authdb.DbConnection, gn3conn: gn3db.Connection,
species: str = "") -> Iterable[dict[str, Any]]:
"""Retrieve phenotype data linked to user groups."""
authkeys = ("SpeciesId", "InbredSetId", "PublishFreezeId", "PublishXRefId")
with (authdb.cursor(authconn) as authcursor,
gn3conn.cursor(DictCursor) as gn3cursor):
authcursor.execute("SELECT * FROM linked_phenotype_data")
linked = tuple(tuple(row[key] for key in authkeys)
for row in authcursor.fetchall())
if len(linked) <= 0:
return iter(())
paramstr = ", ".join(["(%s, %s, %s, %s)"] * len(linked))
query = (
"SELECT spc.SpeciesId, spc.Name AS SpeciesName, iset.InbredSetId, "
"iset.InbredSetName, pf.Id AS PublishFreezeId, "
"pf.Name AS dataset_name, pf.FullName AS dataset_fullname, "
"pf.ShortName AS dataset_shortname, pxr.Id AS PublishXRefId "
"FROM "
"Species AS spc "
"INNER JOIN InbredSet AS iset "
"ON spc.SpeciesId=iset.SpeciesId "
"INNER JOIN PublishFreeze AS pf "
"ON iset.InbredSetId=pf.InbredSetId "
"INNER JOIN PublishXRef AS pxr "
"ON pf.InbredSetId=pxr.InbredSetId") + (
" WHERE" if (len(linked) > 0 or bool(species)) else "") + (
(" (spc.SpeciesId, iset.InbredSetId, pf.Id, pxr.Id) "
f"IN ({paramstr})") if len(linked) > 0 else "") + (
" AND"if len(linked) > 0 else "") + (
" spc.SpeciesName=%s" if bool(species) else "")
params = tuple(item for sublist in linked for item in sublist) + (
(species,) if bool(species) else tuple())
gn3cursor.execute(query, params)
return (item for item in gn3cursor.fetchall())
@authorised_p(("system:data:link-to-group",),
error_description=(
"You do not have sufficient privileges to link data to (a) "
"group(s)."),
oauth2_scope="profile group resource")
def ungrouped_phenotype_data(
authconn: authdb.DbConnection, gn3conn: gn3db.Connection):
"""Retrieve phenotype data that is not linked to any user group."""
with gn3conn.cursor() as cursor:
params = tuple(
(row["SpeciesId"], row["InbredSetId"], row["PublishFreezeId"],
row["PublishXRefId"])
for row in linked_phenotype_data(authconn, gn3conn))
paramstr = ", ".join(["(?, ?, ?, ?)"] * len(params))
query = (
"SELECT spc.SpeciesId, spc.SpeciesName, iset.InbredSetId, "
"iset.InbredSetName, pf.Id AS PublishFreezeId, "
"pf.Name AS dataset_name, pf.FullName AS dataset_fullname, "
"pf.ShortName AS dataset_shortname, pxr.Id AS PublishXRefId "
"FROM "
"Species AS spc "
"INNER JOIN InbredSet AS iset "
"ON spc.SpeciesId=iset.SpeciesId "
"INNER JOIN PublishFreeze AS pf "
"ON iset.InbredSetId=pf.InbredSetId "
"INNER JOIN PublishXRef AS pxr "
"ON pf.InbredSetId=pxr.InbredSetId")
if len(params) > 0:
query = query + (
f" WHERE (iset.InbredSetId, pf.Id, pxr.Id) NOT IN ({paramstr})")
cursor.execute(query, params)
return tuple(dict(row) for row in cursor.fetchall())
return tuple()
def pheno_traits_from_db(gn3conn: gn3db.Connection, params: tuple[dict, ...]) -> tuple[dict, ...]:
"""An internal utility function. Don't use outside of this module."""
if len(params) < 1:
return tuple()
paramstr = ", ".join(["(%s, %s, %s, %s)"] * len(params))
with gn3conn.cursor(DictCursor) as cursor:
cursor.execute(
"SELECT spc.SpeciesId, iset.InbredSetId, pf.Id AS PublishFreezeId, "
"pf.Name AS dataset_name, pf.FullName AS dataset_fullname, "
"pf.ShortName AS dataset_shortname, pxr.Id AS PublishXRefId "
"FROM "
"Species AS spc "
"INNER JOIN InbredSet AS iset "
"ON spc.SpeciesId=iset.SpeciesId "
"INNER JOIN PublishFreeze AS pf "
"ON iset.InbredSetId=pf.InbredSetId "
"INNER JOIN PublishXRef AS pxr "
"ON pf.InbredSetId=pxr.InbredSetId "
"WHERE (spc.SpeciesName, iset.InbredSetName, pf.Name, pxr.Id) "
f"IN ({paramstr})",
tuple(
itm for sublist in (
(item["species"], item["group"], item["dataset"], item["name"])
for item in params)
for itm in sublist))
return cursor.fetchall()
def link_phenotype_data(
authconn: authdb.DbConnection,
user,
group: Group,
traits: tuple[dict, ...]
) -> dict:
"""Link phenotype traits to a user group."""
if not (authorised_for2(authconn,
user,
system_resource(authconn),
("system:data:link-to-group",))
or
authorised_for2(authconn,
user,
group_resource(authconn, group.group_id),
("group:data:link-to-group",))
):
raise AuthorisationError(
"You do not have sufficient privileges to link data to group "
f"'{group.group_name}'.")
with authdb.cursor(authconn) as cursor:
params = tuple({
"data_link_id": str(uuid.uuid4()),
"group_id": str(group.group_id),
**item
} for item in traits)
cursor.executemany(
"INSERT INTO linked_phenotype_data "
"VALUES ("
":data_link_id, :group_id, :SpeciesId, :InbredSetId, "
":PublishFreezeId, :dataset_name, :dataset_fullname, "
":dataset_shortname, :PublishXRefId"
")",
params)
return {
"description": (
f"Successfully linked {len(traits)} traits to group."),
"group": asdict(group),
"traits": params
}
def unlink_from_resources(
cursor: authdb.DbCursor,
data_link_ids: tuple[uuid.UUID, ...]
) -> tuple[uuid.UUID, ...]:
"""Unlink phenotypes from resources."""
# TODO: Delete in batches
cursor.executemany("DELETE FROM phenotype_resources "
"WHERE data_link_id=? RETURNING resource_id",
tuple((str(_id),) for _id in data_link_ids))
return tuple(uuid.UUID(row["resource_id"]) for row in cursor.fetchall())
def delete_resources(
cursor: authdb.DbCursor,
resource_ids: tuple[uuid.UUID, ...]
) -> tuple[uuid.UUID, ...]:
"""Delete the specified phenotype resources."""
# TODO: Delete in batches
cursor.executemany("DELETE FROM resources "
"WHERE resource_id=? RETURNING resource_id",
tuple((str(_id),) for _id in resource_ids))
return tuple(uuid.UUID(row["resource_id"]) for row in cursor.fetchall())
def fetch_data_link_ids(
cursor: authdb.DbCursor,
species_id: int,
population_id: int,
dataset_id: int,
xref_ids: tuple[int, ...]
) -> tuple[uuid.UUID, ...]:
"""Fetch `data_link_id` values for phenotypes."""
paramstr = ", ".join(["(?, ?, ?, ?)"] * len(xref_ids))
cursor.execute(
"SELECT data_link_id FROM linked_phenotype_data "
"WHERE (SpeciesId, InbredSetId, PublishFreezeId, PublishXRefId) IN "
f"({paramstr})",
tuple(str(field) for arow in
((species_id, population_id, dataset_id, xref_id)
for xref_id in xref_ids)
for field in arow))
return tuple(uuid.UUID(row["data_link_id"]) for row in cursor.fetchall())
def fetch_resource_id(cursor: authdb.DbCursor,
data_link_ids: tuple[uuid.UUID, ...]) -> uuid.UUID:
"""Retrieve the ID of the resource where the data is linked to.
RAISES: InvalidResourceError in the case where more the data_link_ids belong
to more than one resource."""
_paramstr = ", ".join(["?"] * len(data_link_ids))
cursor.execute(
"SELECT DISTINCT(resource_id) FROM phenotype_resources "
f"WHERE data_link_id IN ({_paramstr})",
tuple(str(_id) for _id in data_link_ids))
_ids = tuple(uuid.UUID(row['resource_id']) for row in cursor.fetchall())
if len(_ids) != 1:
raise AuthorisationError(
f"Expected data from 1 resource, got {len(_ids)} resources.")
return _ids[0]
def delete_linked_data(
cursor: authdb.DbCursor,
data_link_ids: tuple[uuid.UUID, ...]
) -> int:
"""Delete the actual linked data."""
# TODO: Delete in batches
cursor.executemany("DELETE FROM linked_phenotype_data "
"WHERE data_link_id=?",
tuple((str(_id),) for _id in data_link_ids))
return cursor.rowcount
@phenosbp.route("/<int:species_id>/<int:population_id>/<int:dataset_id>/delete",
methods=["POST"])
@require_json
def delete_linked_phenotypes_data(
species_id: int,
population_id: int,
dataset_id: int
) -> Response:
"""Delete the linked phenotypes data from the database."""
db_uri = app.config["AUTH_DB"]
with (require_oauth.acquire("profile group resource") as _token,
authdb.connection(db_uri) as auth_conn,
authdb.cursor(auth_conn) as cursor):
# - Does user have DELETE privilege on system (i.e. is data curator)?
# YES: go ahead and delete data as below.
# - Does user have DELETE privilege on resource(s)?
# YES: Delete phenotypes by resource, checking privileges for each
# resource.
# - Neither: Raise `AuthorisationError` and bail!
_deleted = 0
xref_ids = tuple(request.json.get("xref_ids", []))#type: ignore[union-attr]
if len(xref_ids) > 0:
# TODO: Use background job, for huge number of xref_ids
data_link_ids = fetch_data_link_ids(
cursor, species_id, population_id, dataset_id, xref_ids)
resource_id = fetch_resource_id(cursor, data_link_ids)
if not (authorised_for_spec(
auth_conn,
_token.user.user_id,
resource_id,
"(OR group:resource:delete-resource system:resource:delete)")
or
authorised_for_spec(
auth_conn,
_token.user.user_id,
system_resource(auth_conn).resource_id,
"(AND system:system-wide:data:delete)")):
raise AuthorisationError(
"You are not allowed to delete this resource's data.")
_resources_ids = unlink_from_resources(cursor, data_link_ids)
delete_resources(cursor, _resources_ids)
_deleted = delete_linked_data(cursor, data_link_ids)
return jsonify({
# TODO: "status": "sent-to-background"/"completed"/"failed"
# TODO: "status-url": <status-check-uri>
"requested": len(xref_ids),
"deleted": _deleted
})
|