aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/assign_data_to_default_admin.py (renamed from scripts/migrate_existing_data.py)69
-rw-r--r--scripts/batch_assign_data_to_default_admin.py87
-rw-r--r--scripts/link_inbredsets.py3
3 files changed, 138 insertions, 21 deletions
diff --git a/scripts/migrate_existing_data.py b/scripts/assign_data_to_default_admin.py
index 336ce72..0ae209a 100644
--- a/scripts/migrate_existing_data.py
+++ b/scripts/assign_data_to_default_admin.py
@@ -1,11 +1,12 @@
"""
-Migrate existing data that is not assigned to any group to the default sys-admin
-group for accessibility purposes.
+Assign any existing data (that is not currently assigned to any group) to the
+default sys-admin group for accessibility purposes.
"""
import sys
import json
import time
import random
+import logging
from pathlib import Path
from uuid import UUID, uuid4
@@ -24,9 +25,11 @@ from gn_auth.auth.authorisation.resources.groups.models import (
from gn_auth.auth.authorisation.resources.models import (
Resource, ResourceCategory, __assign_resource_owner_role__)
+
class DataNotFound(Exception):
"""Raise if no admin user exists."""
+
def sys_admins(conn: authdb.DbConnection) -> tuple[User, ...]:
"""Retrieve all the existing system admins."""
with authdb.cursor(conn) as cursor:
@@ -38,6 +41,7 @@ def sys_admins(conn: authdb.DbConnection) -> tuple[User, ...]:
return tuple(User.from_sqlite3_row(row) for row in cursor.fetchall())
return tuple()
+
def choose_admin(enum_admins: dict[int, User]) -> int:
"""Prompt and read user choice."""
while True:
@@ -54,6 +58,7 @@ def choose_admin(enum_admins: dict[int, User]) -> int:
sys.exit(0)
print(f"\nERROR: Invalid choice '{choice}'!")
+
def select_sys_admin(admins: tuple[User, ...]) -> User:
"""Pick one admin out of list."""
if len(admins) > 0:
@@ -67,6 +72,7 @@ def select_sys_admin(admins: tuple[User, ...]) -> User:
raise DataNotFound(
"No administrator user found. Create an administrator user first.")
+
def admin_group(conn: authdb.DbConnection, admin: User) -> Group:
"""Retrieve the admin's user group. If none exist, create one."""
with authdb.cursor(conn) as cursor:
@@ -114,6 +120,7 @@ def admin_group(conn: authdb.DbConnection, admin: User) -> Group:
cursor, admin, UUID(grp_res["resource_id"]), "group-leader")
return new_group
+
def __resource_category_by_key__(
cursor: authdb.DbCursor, category_key: str) -> ResourceCategory:
"""Retrieve a resource category by its ID."""
@@ -128,6 +135,7 @@ def __resource_category_by_key__(
row["resource_category_key"],
row["resource_category_description"])
+
def __create_resources__(cursor: authdb.DbCursor) -> tuple[Resource, ...]:
"""Create default resources."""
resources = tuple(Resource(
@@ -147,6 +155,7 @@ def __create_resources__(cursor: authdb.DbCursor) -> tuple[Resource, ...]:
} for res in resources))
return resources
+
def default_resources(conn: authdb.DbConnection, group: Group) -> tuple[
Resource, ...]:
"""Create default resources, or return them if they exist."""
@@ -175,10 +184,12 @@ def default_resources(conn: authdb.DbConnection, group: Group) -> tuple[
tuple()
) for row in rows)
+
def delay():
"""Delay a while: anything from 2 seconds to 15 seconds."""
time.sleep(random.choice(range(2,16)))
+
def __assigned_mrna__(authconn):
"""Retrieve assigned mRNA items."""
with authdb.cursor(authconn) as cursor:
@@ -189,6 +200,7 @@ def __assigned_mrna__(authconn):
(row["SpeciesId"], row["InbredSetId"], row["ProbeFreezeId"],
row["ProbeSetFreezeId"]) for row in cursor.fetchall())
+
def __unassigned_mrna__(bioconn, assigned):
"""Retrieve unassigned mRNA data items."""
query = (
@@ -198,11 +210,12 @@ def __unassigned_mrna__(bioconn, assigned):
"FROM Species AS s INNER JOIN InbredSet AS iset "
"ON s.SpeciesId=iset.SpeciesId INNER JOIN ProbeFreeze AS pf "
"ON iset.InbredSetId=pf.InbredSetId INNER JOIN ProbeSetFreeze AS psf "
- "ON pf.ProbeFreezeId=psf.ProbeFreezeId ")
+ "ON pf.ProbeFreezeId=psf.ProbeFreezeId "
+ "WHERE s.Name != 'human' ")
if len(assigned) > 0:
paramstr = ", ".join(["(%s, %s, %s, %s)"] * len(assigned))
query = query + (
- "WHERE (s.SpeciesId, iset.InbredSetId, pf.ProbeFreezeId, psf.Id) "
+ "AND (s.SpeciesId, iset.InbredSetId, pf.ProbeFreezeId, psf.Id) "
f"NOT IN ({paramstr}) ")
query = query + "LIMIT 100000"
@@ -210,6 +223,7 @@ def __unassigned_mrna__(bioconn, assigned):
cursor.execute(query, tuple(item for row in assigned for item in row))
return (row for row in cursor.fetchall())
+
def __assign_mrna__(authconn, bioconn, resource, group):
"Assign any unassigned mRNA data to resource."
while True:
@@ -238,6 +252,7 @@ def __assign_mrna__(authconn, bioconn, resource, group):
print(f"-> mRNA: Linked {len(unassigned)}")
delay()
+
def __assigned_geno__(authconn):
"""Retrieve assigned genotype data."""
with authdb.cursor(authconn) as cursor:
@@ -256,11 +271,12 @@ def __unassigned_geno__(bioconn, assigned):
"gf.ShortName AS dataset_shortname "
"FROM Species AS s INNER JOIN InbredSet AS iset "
"ON s.SpeciesId=iset.SpeciesId INNER JOIN GenoFreeze AS gf "
- "ON iset.InbredSetId=gf.InbredSetId ")
+ "ON iset.InbredSetId=gf.InbredSetId "
+ "WHERE s.Name != 'human' ")
if len(assigned) > 0:
paramstr = ", ".join(["(%s, %s, %s)"] * len(assigned))
query = query + (
- "WHERE (s.SpeciesId, iset.InbredSetId, gf.Id) "
+ "AND (s.SpeciesId, iset.InbredSetId, gf.Id) "
f"NOT IN ({paramstr}) ")
query = query + "LIMIT 100000"
@@ -268,6 +284,7 @@ def __unassigned_geno__(bioconn, assigned):
cursor.execute(query, tuple(item for row in assigned for item in row))
return (row for row in cursor.fetchall())
+
def __assign_geno__(authconn, bioconn, resource, group):
"Assign any unassigned Genotype data to resource."
while True:
@@ -296,6 +313,7 @@ def __assign_geno__(authconn, bioconn, resource, group):
print(f"-> Genotype: Linked {len(unassigned)}")
delay()
+
def __assigned_pheno__(authconn):
"""Retrieve assigned phenotype data."""
with authdb.cursor(authconn) as cursor:
@@ -306,25 +324,27 @@ def __assigned_pheno__(authconn):
row["SpeciesId"], row["InbredSetId"], row["PublishFreezeId"],
row["PublishXRefId"]) for row in cursor.fetchall())
+
def __unassigned_pheno__(bioconn, assigned):
"""Retrieve all unassigned Phenotype data."""
query = (
- "SELECT spc.SpeciesId, iset.InbredSetId, "
- "pf.Id AS PublishFreezeId, pf.Name AS dataset_name, "
- "pf.FullName AS dataset_fullname, "
- "pf.ShortName AS dataset_shortname, pxr.Id AS PublishXRefId "
- "FROM "
- "Species AS spc "
- "INNER JOIN InbredSet AS iset "
- "ON spc.SpeciesId=iset.SpeciesId "
- "INNER JOIN PublishFreeze AS pf "
- "ON iset.InbredSetId=pf.InbredSetId "
- "INNER JOIN PublishXRef AS pxr "
- "ON pf.InbredSetId=pxr.InbredSetId ")
+ "SELECT spc.SpeciesId, iset.InbredSetId, "
+ "pf.Id AS PublishFreezeId, pf.Name AS dataset_name, "
+ "pf.FullName AS dataset_fullname, "
+ "pf.ShortName AS dataset_shortname, pxr.Id AS PublishXRefId "
+ "FROM "
+ "Species AS spc "
+ "INNER JOIN InbredSet AS iset "
+ "ON spc.SpeciesId=iset.SpeciesId "
+ "INNER JOIN PublishFreeze AS pf "
+ "ON iset.InbredSetId=pf.InbredSetId "
+ "INNER JOIN PublishXRef AS pxr "
+ "ON pf.InbredSetId=pxr.InbredSetId "
+ "WHERE spc.Name != 'human' ")
if len(assigned) > 0:
paramstr = ", ".join(["(%s, %s, %s, %s)"] * len(assigned))
query = query + (
- "WHERE (spc.SpeciesId, iset.InbredSetId, pf.Id, pxr.Id) "
+ "AND (spc.SpeciesId, iset.InbredSetId, pf.Id, pxr.Id) "
f"NOT IN ({paramstr}) ")
query = query + "LIMIT 100000"
@@ -332,6 +352,7 @@ def __unassigned_pheno__(bioconn, assigned):
cursor.execute(query, tuple(item for row in assigned for item in row))
return (row for row in cursor.fetchall())
+
def __assign_pheno__(authconn, bioconn, resource, group):
"""Assign any unassigned Phenotype data to resource."""
while True:
@@ -360,6 +381,7 @@ def __assign_pheno__(authconn, bioconn, resource, group):
print(f"-> Phenotype: Linked {len(unassigned)}")
delay()
+
def assign_data_to_resource(
authconn, bioconn, resource: Resource, group: Group):
"""Assign existing data, not linked to any group to the resource."""
@@ -371,6 +393,7 @@ def assign_data_to_resource(
return assigner_fns[resource.resource_category.resource_category_key](
authconn, bioconn, resource, group)
+
def entry(authdbpath, mysqldburi):
"""Entry-point for data migration."""
if not Path(authdbpath).exists():
@@ -394,12 +417,18 @@ def entry(authdbpath, mysqldburi):
print(dnf.args[0], file=sys.stderr)
sys.exit(1)
+
@click.command()
@click.argument("authdbpath") # "Path to the Auth(entic|oris)ation database"
@click.argument("mysqldburi") # "URI to the MySQL database with the biology data"
-def run(authdbpath, mysqldburi):
+@click.option("--loglevel", default="WARNING", show_default=True,
+ type=click.Choice(["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]))
+def run(authdbpath, mysqldburi, loglevel):
"""Setup command-line arguments."""
+ globallogger = logging.getLogger()
+ globallogger.setLevel(loglevel)
entry(authdbpath, mysqldburi)
+
if __name__ == "__main__":
run() # pylint: disable=[no-value-for-parameter]
diff --git a/scripts/batch_assign_data_to_default_admin.py b/scripts/batch_assign_data_to_default_admin.py
new file mode 100644
index 0000000..3df123d
--- /dev/null
+++ b/scripts/batch_assign_data_to_default_admin.py
@@ -0,0 +1,87 @@
+"""
+Similar to the 'assign_data_to_default_admin' script but without user
+interaction.
+"""
+import sys
+import logging
+from pathlib import Path
+
+import click
+from pymonad.maybe import Just, Maybe, Nothing
+from pymonad.tools import monad_from_none_or_value
+
+from gn_auth.auth.db import mariadb as biodb
+from gn_auth.auth.db import sqlite3 as authdb
+from gn_auth.auth.authentication.users import User
+from gn_auth.auth.authorisation.resources.groups.models import (
+ Group, db_row_to_group)
+
+from scripts.assign_data_to_default_admin import (
+ default_resources, assign_data_to_resource)
+
+
+def resources_group(conn: authdb.DbConnection) -> Maybe:
+ """Retrieve resources' group"""
+ with authdb.cursor(conn) as cursor:
+ cursor.execute(
+ "SELECT g.* FROM resources AS r "
+ "INNER JOIN resource_ownership AS ro "
+ "ON r.resource_id=ro.resource_id "
+ "INNER JOIN groups AS g ON ro.group_id=g.group_id "
+ "WHERE resource_name='mRNA-euhrin'")
+ return monad_from_none_or_value(
+ Nothing, Just, cursor.fetchone()).then(
+ db_row_to_group)
+
+
+def resource_owner(conn: authdb.DbConnection) -> Maybe:
+ """Retrieve the resource owner."""
+ with authdb.cursor(conn) as cursor:
+ cursor.execute(
+ "SELECT u.* FROM users AS u WHERE u.user_id IN "
+ "(SELECT ur.user_id FROM resources AS rsc "
+ "INNER JOIN user_roles AS ur ON rsc.resource_id=ur.resource_id "
+ "INNER JOIN roles AS r on ur.role_id=r.role_id "
+ "WHERE resource_name='mRNA-euhrin' "
+ "AND r.role_name='resource-owner')")
+ return monad_from_none_or_value(
+ Nothing, Just, cursor.fetchone()).then(
+ User.from_sqlite3_row)
+
+
+def assign_data(authconn: authdb.DbConnection, bioconn, group: Group):
+ """Do actual data assignments."""
+ try:
+ for resource in default_resources(authconn, group):
+ assign_data_to_resource(authconn, bioconn, resource, group)
+
+ return 1
+ except Exception as _exc:# pylint: disable=[broad-except]
+ logging.error("Failed to assign some data!", exc_info=True)
+ return 1
+
+
+if __name__ == "__main__":
+ @click.command()
+ @click.argument("authdbpath") # "Path to the Auth(entic|oris)ation database"
+ @click.argument("mysqldburi") # "URI to the MySQL database with the biology data"
+ @click.option("--loglevel",
+ default="WARNING",
+ show_default=True,
+ type=click.Choice([
+ "CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]))
+ def run(authdbpath, mysqldburi, loglevel):
+ """Script entry point."""
+ _logger = logging.getLogger()
+ _logger.setLevel(loglevel)
+ if Path(authdbpath).exists():
+ with (authdb.connection(authdbpath) as authconn,
+ biodb.database_connection(mysqldburi) as bioconn):
+ return resources_group(authconn).maybe(
+ 1,
+ lambda group: assign_data(authconn, bioconn, group))
+
+ logging.error("There is no such SQLite3 database file.")
+ return 1
+
+ sys.exit(run()) # pylint: disable=[no-value-for-parameter]
diff --git a/scripts/link_inbredsets.py b/scripts/link_inbredsets.py
index ac9fa2b..5db7ea8 100644
--- a/scripts/link_inbredsets.py
+++ b/scripts/link_inbredsets.py
@@ -11,7 +11,8 @@ import gn_auth.auth.db.sqlite3 as authdb
from gn_auth.auth.db import mariadb as biodb
-from scripts.migrate_existing_data import sys_admins, admin_group, select_sys_admin
+from scripts.assign_data_to_default_admin import (
+ sys_admins, admin_group, select_sys_admin)
def linked_inbredsets(conn):
"""Fetch all inbredset groups that are linked to the auth system."""