aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2023-08-07 08:40:50 +0300
committerFrederick Muriuki Muriithi2023-08-07 09:26:13 +0300
commita112c99cae0c5422a38e3a35e843a82db764316a (patch)
treef0823fd1fc4d8ec52dc0a12b8d987dd4325b043b /scripts
parent6d9c61dc0072b96b12153e64940b465306f25bfb (diff)
downloadgn-auth-a112c99cae0c5422a38e3a35e843a82db764316a.tar.gz
Add missing modules
Copy over missing modules and functions to completely disconnect gn-auth from GN3.
Diffstat (limited to 'scripts')
-rw-r--r--scripts/__init__.py0
-rw-r--r--scripts/migrate_existing_data.py381
-rw-r--r--scripts/register_sys_admin.py81
3 files changed, 462 insertions, 0 deletions
diff --git a/scripts/__init__.py b/scripts/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/scripts/__init__.py
diff --git a/scripts/migrate_existing_data.py b/scripts/migrate_existing_data.py
new file mode 100644
index 0000000..4452f41
--- /dev/null
+++ b/scripts/migrate_existing_data.py
@@ -0,0 +1,381 @@
+"""
+Migrate existing data that is not assigned to any group to the default sys-admin
+group for accessibility purposes.
+"""
+import sys
+import json
+import time
+import random
+from pathlib import Path
+from uuid import UUID, uuid4
+
+import click
+from MySQLdb.cursors import DictCursor
+
+from gn_auth.auth.db import mariadb as biodb
+
+from gn_auth.auth.db import sqlite3 as authdb
+from gn_auth.auth.authentication.users import User
+from gn_auth.auth.authorisation.groups.models import Group, save_group
+from gn_auth.auth.authorisation.roles.models import (
+ revoke_user_role_by_name, assign_user_role_by_name)
+from gn_auth.auth.authorisation.resources.models import (
+ Resource, ResourceCategory, __assign_resource_owner_role__)
+
+class DataNotFound(Exception):
+ """Raise if no admin user exists."""
+
+def sys_admins(conn: authdb.DbConnection) -> tuple[User, ...]:
+ """Retrieve all the existing system admins."""
+ with authdb.cursor(conn) as cursor:
+ cursor.execute(
+ "SELECT u.* FROM users AS u "
+ "INNER JOIN user_roles AS ur ON u.user_id=ur.user_id "
+ "INNER JOIN roles AS r ON ur.role_id=r.role_id "
+ "WHERE r.role_name='system-administrator'")
+ return tuple(User(UUID(row["user_id"]), row["email"], row["name"])
+ for row in cursor.fetchall())
+ return tuple()
+
+def choose_admin(enum_admins: dict[int, User]) -> int:
+ """Prompt and read user choice."""
+ while True:
+ try:
+ print("\n===========================\n")
+ print("We found the following system administrators:")
+ for idx, admin in enum_admins.items():
+ print(f"\t{idx}: {admin.name} ({admin.email})")
+ choice = input(f"Choose [1 .. {len(enum_admins)}]: ")
+ return int(choice)
+ except ValueError as _verr:
+ if choice.lower() == "quit":
+ print("Goodbye!")
+ sys.exit(0)
+ print(f"\nERROR: Invalid choice '{choice}'!")
+
+def select_sys_admin(admins: tuple[User, ...]) -> User:
+ """Pick one admin out of list."""
+ if len(admins) > 0:
+ if len(admins) == 1:
+ print(f"-> Found Admin: {admins[0].name} ({admins[0].email})")
+ return admins[0]
+ enum_admins = dict(enumerate(admins, start=1))
+ chosen = enum_admins[choose_admin(enum_admins)]
+ print(f"-> Chosen Admin: {chosen.name} ({chosen.email})")
+ return chosen
+ raise DataNotFound(
+ "No administrator user found. Create an administrator user first.")
+
+def admin_group(conn: authdb.DbConnection, admin: User) -> Group:
+ """Retrieve the admin's user group. If none exist, create one."""
+ with authdb.cursor(conn) as cursor:
+ cursor.execute(
+ "SELECT g.* FROM users AS u "
+ "INNER JOIN group_users AS gu ON u.user_id=gu.user_id "
+ "INNER JOIN groups AS g on gu.group_id=g.group_id "
+ "WHERE u.user_id = ?",
+ (str(admin.user_id),))
+ row = cursor.fetchone()
+ if row:
+ return Group(UUID(row["group_id"]),
+ row["group_name"],
+ json.loads(row["group_metadata"]))
+ new_group = save_group(cursor, "AutoAdminGroup", {
+ "group_description": (
+ "Created by script for existing data visibility. "
+ "Existing data was migrated into this group and assigned "
+ "to publicly visible resources according to type.")
+ })
+ cursor.execute("INSERT INTO group_users VALUES (?, ?)",
+ (str(new_group.group_id), str(admin.user_id)))
+ revoke_user_role_by_name(cursor, admin, "group-creator")
+ assign_user_role_by_name(cursor, admin, "group-leader")
+ return new_group
+
+def __resource_category_by_key__(
+ cursor: authdb.DbCursor, category_key: str) -> ResourceCategory:
+ """Retrieve a resource category by its ID."""
+ cursor.execute(
+ "SELECT * FROM resource_categories WHERE resource_category_key = ?",
+ (category_key,))
+ row = cursor.fetchone()
+ if not bool(row):
+ raise DataNotFound(
+ f"Could not find resource category with key {category_key}")
+ return ResourceCategory(UUID(row["resource_category_id"]),
+ row["resource_category_key"],
+ row["resource_category_description"])
+
+def __create_resources__(cursor: authdb.DbCursor, group: Group) -> tuple[
+ Resource, ...]:
+ """Create default resources."""
+ resources = tuple(Resource(
+ group, uuid4(), name, __resource_category_by_key__(cursor, catkey),
+ True, tuple()
+ ) for name, catkey in (
+ ("mRNA-euhrin", "mrna"),
+ ("pheno-xboecp", "phenotype"),
+ ("geno-welphd", "genotype")))
+ cursor.executemany(
+ "INSERT INTO resources VALUES (:gid, :rid, :rname, :rcid, :pub)",
+ tuple({
+ "gid": str(group.group_id),
+ "rid": str(res.resource_id),
+ "rname": res.resource_name,
+ "rcid": str(res.resource_category.resource_category_id),
+ "pub": 1
+ } for res in resources))
+ return resources
+
+def default_resources(conn: authdb.DbConnection, group: Group) -> tuple[
+ Resource, ...]:
+ """Create default resources, or return them if they exist."""
+ with authdb.cursor(conn) as cursor:
+ cursor.execute(
+ "SELECT r.resource_id, r.resource_name, r.public, rc.* "
+ "FROM resources AS r INNER JOIN resource_categories AS rc "
+ "ON r.resource_category_id=rc.resource_category_id "
+ "WHERE r.group_id=? AND r.resource_name IN "
+ "('mRNA-euhrin', 'pheno-xboecp', 'geno-welphd')",
+ (str(group.group_id),))
+ rows = cursor.fetchall()
+ if len(rows) == 0:
+ return __create_resources__(cursor, group)
+
+ return tuple(Resource(
+ group,
+ UUID(row["resource_id"]),
+ row["resource_name"],
+ ResourceCategory(
+ UUID(row["resource_category_id"]),
+ row["resource_category_key"],
+ row["resource_category_description"]),
+ bool(row["public"]),
+ tuple()
+ ) for row in rows)
+
+def delay():
+ """Delay a while: anything from 2 seconds to 15 seconds."""
+ time.sleep(random.choice(range(2,16)))
+
+def __assigned_mrna__(authconn):
+ """Retrieve assigned mRNA items."""
+ with authdb.cursor(authconn) as cursor:
+ cursor.execute(
+ "SELECT SpeciesId, InbredSetId, ProbeFreezeId, ProbeSetFreezeId "
+ "FROM linked_mrna_data")
+ return tuple(
+ (row["SpeciesId"], row["InbredSetId"], row["ProbeFreezeId"],
+ row["ProbeSetFreezeId"]) for row in cursor.fetchall())
+
+def __unassigned_mrna__(bioconn, assigned):
+ """Retrieve unassigned mRNA data items."""
+ query = (
+ "SELECT s.SpeciesId, iset.InbredSetId, pf.ProbeFreezeId, "
+ "psf.Id AS ProbeSetFreezeId, psf.Name AS dataset_name, "
+ "psf.FullName AS dataset_fullname, psf.ShortName AS dataset_shortname "
+ "FROM Species AS s INNER JOIN InbredSet AS iset "
+ "ON s.SpeciesId=iset.SpeciesId INNER JOIN ProbeFreeze AS pf "
+ "ON iset.InbredSetId=pf.InbredSetId INNER JOIN ProbeSetFreeze AS psf "
+ "ON pf.ProbeFreezeId=psf.ProbeFreezeId ")
+ if len(assigned) > 0:
+ paramstr = ", ".join(["(%s, %s, %s, %s)"] * len(assigned))
+ query = query + (
+ "WHERE (s.SpeciesId, iset.InbredSetId, pf.ProbeFreezeId, psf.Id) "
+ f"NOT IN ({paramstr}) ")
+
+ query = query + "LIMIT 100000"
+ with bioconn.cursor(DictCursor) as cursor:
+ cursor.execute(query, tuple(item for row in assigned for item in row))
+ return (row for row in cursor.fetchall())
+
+def __assign_mrna__(authconn, bioconn, resource):
+ "Assign any unassigned mRNA data to resource."
+ while True:
+ unassigned = tuple({
+ "data_link_id": str(uuid4()),
+ "group_id": str(resource.group.group_id),
+ "resource_id": str(resource.resource_id),
+ **row
+ } for row in __unassigned_mrna__(
+ bioconn, __assigned_mrna__(authconn)))
+
+ if len(unassigned) <= 0:
+ print("-> mRNA: Completed!")
+ break
+ with authdb.cursor(authconn) as cursor:
+ cursor.executemany(
+ "INSERT INTO linked_mrna_data VALUES "
+ "(:data_link_id, :group_id, :SpeciesId, :InbredSetId, "
+ ":ProbeFreezeId, :ProbeSetFreezeId, :dataset_name, "
+ ":dataset_fullname, :dataset_shortname)",
+ unassigned)
+ cursor.executemany(
+ "INSERT INTO mrna_resources VALUES "
+ "(:group_id, :resource_id, :data_link_id)",
+ unassigned)
+ print(f"-> mRNA: Linked {len(unassigned)}")
+ delay()
+
+def __assigned_geno__(authconn):
+ """Retrieve assigned genotype data."""
+ with authdb.cursor(authconn) as cursor:
+ cursor.execute(
+ "SELECT SpeciesId, InbredSetId, GenoFreezeId "
+ "FROM linked_genotype_data")
+ return tuple((row["SpeciesId"], row["InbredSetId"], row["GenoFreezeId"])
+ for row in cursor.fetchall())
+
+def __unassigned_geno__(bioconn, assigned):
+ """Fetch unassigned genotype data."""
+ query = (
+ "SELECT s.SpeciesId, iset.InbredSetId, iset.InbredSetName, "
+ "gf.Id AS GenoFreezeId, gf.Name AS dataset_name, "
+ "gf.FullName AS dataset_fullname, "
+ "gf.ShortName AS dataset_shortname "
+ "FROM Species AS s INNER JOIN InbredSet AS iset "
+ "ON s.SpeciesId=iset.SpeciesId INNER JOIN GenoFreeze AS gf "
+ "ON iset.InbredSetId=gf.InbredSetId ")
+ if len(assigned) > 0:
+ paramstr = ", ".join(["(%s, %s, %s)"] * len(assigned))
+ query = query + (
+ "WHERE (s.SpeciesId, iset.InbredSetId, gf.Id) "
+ f"NOT IN ({paramstr}) ")
+
+ query = query + "LIMIT 100000"
+ with bioconn.cursor(DictCursor) as cursor:
+ cursor.execute(query, tuple(item for row in assigned for item in row))
+ return (row for row in cursor.fetchall())
+
+def __assign_geno__(authconn, bioconn, resource):
+ "Assign any unassigned Genotype data to resource."
+ while True:
+ unassigned = tuple({
+ "data_link_id": str(uuid4()),
+ "group_id": str(resource.group.group_id),
+ "resource_id": str(resource.resource_id),
+ **row
+ } for row in __unassigned_geno__(
+ bioconn, __assigned_geno__(authconn)))
+
+ if len(unassigned) <= 0:
+ print("-> Genotype: Completed!")
+ break
+ with authdb.cursor(authconn) as cursor:
+ cursor.executemany(
+ "INSERT INTO linked_genotype_data VALUES "
+ "(:data_link_id, :group_id, :SpeciesId, :InbredSetId, "
+ ":GenoFreezeId, :dataset_name, :dataset_fullname, "
+ ":dataset_shortname)",
+ unassigned)
+ cursor.executemany(
+ "INSERT INTO genotype_resources VALUES "
+ "(:group_id, :resource_id, :data_link_id)",
+ unassigned)
+ print(f"-> Genotype: Linked {len(unassigned)}")
+ delay()
+
+def __assigned_pheno__(authconn):
+ """Retrieve assigned phenotype data."""
+ with authdb.cursor(authconn) as cursor:
+ cursor.execute(
+ "SELECT SpeciesId, InbredSetId, PublishFreezeId, PublishXRefId "
+ "FROM linked_phenotype_data")
+ return tuple((
+ row["SpeciesId"], row["InbredSetId"], row["PublishFreezeId"],
+ row["PublishXRefId"]) for row in cursor.fetchall())
+
+def __unassigned_pheno__(bioconn, assigned):
+ """Retrieve all unassigned Phenotype data."""
+ query = (
+ "SELECT spc.SpeciesId, iset.InbredSetId, "
+ "pf.Id AS PublishFreezeId, pf.Name AS dataset_name, "
+ "pf.FullName AS dataset_fullname, "
+ "pf.ShortName AS dataset_shortname, pxr.Id AS PublishXRefId "
+ "FROM "
+ "Species AS spc "
+ "INNER JOIN InbredSet AS iset "
+ "ON spc.SpeciesId=iset.SpeciesId "
+ "INNER JOIN PublishFreeze AS pf "
+ "ON iset.InbredSetId=pf.InbredSetId "
+ "INNER JOIN PublishXRef AS pxr "
+ "ON pf.InbredSetId=pxr.InbredSetId ")
+ if len(assigned) > 0:
+ paramstr = ", ".join(["(%s, %s, %s, %s)"] * len(assigned))
+ query = query + (
+ "WHERE (spc.SpeciesId, iset.InbredSetId, pf.Id, pxr.Id) "
+ f"NOT IN ({paramstr}) ")
+
+ query = query + "LIMIT 100000"
+ with bioconn.cursor(DictCursor) as cursor:
+ cursor.execute(query, tuple(item for row in assigned for item in row))
+ return (row for row in cursor.fetchall())
+
+def __assign_pheno__(authconn, bioconn, resource):
+ """Assign any unassigned Phenotype data to resource."""
+ while True:
+ unassigned = tuple({
+ "data_link_id": str(uuid4()),
+ "group_id": str(resource.group.group_id),
+ "resource_id": str(resource.resource_id),
+ **row
+ } for row in __unassigned_pheno__(
+ bioconn, __assigned_pheno__(authconn)))
+
+ if len(unassigned) <= 0:
+ print("-> Phenotype: Completed!")
+ break
+ with authdb.cursor(authconn) as cursor:
+ cursor.executemany(
+ "INSERT INTO linked_phenotype_data VALUES "
+ "(:data_link_id, :group_id, :SpeciesId, :InbredSetId, "
+ ":PublishFreezeId, :dataset_name, :dataset_fullname, "
+ ":dataset_shortname, :PublishXRefId)",
+ unassigned)
+ cursor.executemany(
+ "INSERT INTO phenotype_resources VALUES "
+ "(:group_id, :resource_id, :data_link_id)",
+ unassigned)
+ print(f"-> Phenotype: Linked {len(unassigned)}")
+ delay()
+
+def assign_data_to_resource(authconn, bioconn, resource: Resource):
+ """Assign existing data, not linked to any group to the resource."""
+ assigner_fns = {
+ "mrna": __assign_mrna__,
+ "genotype": __assign_geno__,
+ "phenotype": __assign_pheno__
+ }
+ return assigner_fns[resource.resource_category.resource_category_key](
+ authconn, bioconn, resource)
+
+def entry(authdbpath, mysqldburi):
+ """Entry-point for data migration."""
+ if not Path(authdbpath).exists():
+ print(
+ f"ERROR: Auth db file `{authdbpath}` does not exist.",
+ file=sys.stderr)
+ sys.exit(2)
+ try:
+ with (authdb.connection(authdbpath) as authconn,
+ biodb.database_connection(mysqldburi) as bioconn):
+ admin = select_sys_admin(sys_admins(authconn))
+ resources = default_resources(
+ authconn, admin_group(authconn, admin))
+ for resource in resources:
+ assign_data_to_resource(authconn, bioconn, resource)
+ with authdb.cursor(authconn) as cursor:
+ __assign_resource_owner_role__(cursor, resource, admin)
+ except DataNotFound as dnf:
+ print(dnf.args[0], file=sys.stderr)
+ sys.exit(1)
+
+@click.command()
+@click.argument("authdbpath") # "Path to the Auth(entic|oris)ation database"
+@click.argument("mysqldburi") # "URI to the MySQL database with the biology data"
+def run(authdbpath, mysqldburi):
+ """Setup command-line arguments."""
+ entry(authdbpath, mysqldburi)
+
+if __name__ == "__main__":
+ run() # pylint: disable=[no-value-for-parameter]
diff --git a/scripts/register_sys_admin.py b/scripts/register_sys_admin.py
new file mode 100644
index 0000000..7d4d667
--- /dev/null
+++ b/scripts/register_sys_admin.py
@@ -0,0 +1,81 @@
+"""Script to register and mark a user account as sysadmin."""
+import sys
+import uuid
+import getpass
+from pathlib import Path
+
+import click
+from email_validator import validate_email, EmailNotValidError
+
+from gn_auth.auth.db import sqlite3 as db
+from gn_auth.auth.authentication.users import hash_password
+
+def fetch_email() -> str:
+ """Prompt user for email."""
+ while True:
+ try:
+ user_input = input("Enter the administrator's email: ")
+ email = validate_email(user_input.strip(), check_deliverability=True)
+ return email["email"]
+ except EmailNotValidError as _enve:
+ print("You did not provide a valid email address. Try again...",
+ file=sys.stderr)
+
+def fetch_password() -> str:
+ """Prompt user for password."""
+ while True:
+ passwd = getpass.getpass(prompt="Enter password: ").strip()
+ passwd2 = getpass.getpass(prompt="Confirm password: ").strip()
+ if passwd != "" and passwd == passwd2:
+ return passwd
+ if passwd == "":
+ print("Empty password not accepted", file=sys.stderr)
+ continue
+ if passwd != passwd2:
+ print("Passwords *MUST* match", file=sys.stderr)
+ continue
+
+def fetch_name() -> str:
+ """Prompt user for name"""
+ while True:
+ name = input("Enter the user's name: ").strip()
+ if name == "":
+ print("Invalid name.")
+ continue
+ return name
+
+def save_admin(conn: db.DbConnection, name: str, email: str, passwd: str):
+ """Save the details to the database and assign the new user as admin."""
+ admin_id = uuid.uuid4()
+ admin = {
+ "user_id": str(admin_id),
+ "email": email,
+ "name": name,
+ "hash": hash_password(passwd)
+ }
+ with db.cursor(conn) as cursor:
+ cursor.execute("INSERT INTO users VALUES (:user_id, :email, :name)",
+ admin)
+ cursor.execute("INSERT INTO user_credentials VALUES (:user_id, :hash)",
+ admin)
+ cursor.execute(
+ "SELECT * FROM roles WHERE role_name='system-administrator'")
+ admin_role = cursor.fetchone()
+ cursor.execute("INSERT INTO user_roles VALUES (:user_id, :role_id)",
+ {**admin, "role_id": admin_role["role_id"]})
+ return 0
+
+def register_admin(authdbpath: Path):
+ """Register a user as a system admin."""
+ assert authdbpath.exists(), "Could not find database file."
+ with db.connection(str(authdbpath)) as conn:
+ return save_admin(conn, fetch_name(), fetch_email(), fetch_password())
+
+if __name__ == "__main__":
+ @click.command()
+ @click.argument("authdbpath")
+ def run(authdbpath):
+ """Entry-point for when script is run directly"""
+ return register_admin(Path(authdbpath).absolute())
+
+ run()# pylint: disable=[no-value-for-parameter]