From 4324f2432390392c8022beab480d8bc911682d1f Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Thu, 21 May 2026 13:43:51 -0500 Subject: Move scripts to top-level gn_auth package. In preparation for migrating to pyproject.toml (from setup.py and friends) we need to have only one top-level package. This will also help in improving testing and checks down the line, since everything will be relative to one single top-level directory. --- README.md | 2 +- gn_auth/auth/authorisation/data/views.py | 2 +- gn_auth/scripts/__init__.py | 1 + gn_auth/scripts/assign_data_to_default_admin.py | 434 +++++++++++++++++++++ .../scripts/batch_assign_data_to_default_admin.py | 86 ++++ gn_auth/scripts/link_inbredsets.py | 122 ++++++ gn_auth/scripts/register_sys_admin.py | 68 ++++ gn_auth/scripts/search_phenotypes.py | 125 ++++++ gn_auth/scripts/worker.py | 83 ++++ gn_auth/wsgi.py | 3 +- scripts/__init__.py | 0 scripts/assign_data_to_default_admin.py | 434 --------------------- scripts/batch_assign_data_to_default_admin.py | 87 ----- scripts/link_inbredsets.py | 122 ------ scripts/register_sys_admin.py | 68 ---- scripts/search_phenotypes.py | 125 ------ scripts/worker.py | 83 ---- 17 files changed, 922 insertions(+), 923 deletions(-) create mode 100644 gn_auth/scripts/__init__.py create mode 100644 gn_auth/scripts/assign_data_to_default_admin.py create mode 100644 gn_auth/scripts/batch_assign_data_to_default_admin.py create mode 100644 gn_auth/scripts/link_inbredsets.py create mode 100644 gn_auth/scripts/register_sys_admin.py create mode 100644 gn_auth/scripts/search_phenotypes.py create mode 100644 gn_auth/scripts/worker.py delete mode 100644 scripts/__init__.py delete mode 100644 scripts/assign_data_to_default_admin.py delete mode 100644 scripts/batch_assign_data_to_default_admin.py delete mode 100644 scripts/link_inbredsets.py delete mode 100644 scripts/register_sys_admin.py delete mode 100644 scripts/search_phenotypes.py delete mode 100644 scripts/worker.py diff --git a/README.md b/README.md index 963b5c5..07937eb 100644 --- a/README.md +++ b/README.md @@ -268,7 +268,7 @@ The checks we do are ### Linting ```bash -pylint *py tests gn_auth scripts +pylint *py tests gn_auth ``` ### Type-Checking diff --git a/gn_auth/auth/authorisation/data/views.py b/gn_auth/auth/authorisation/data/views.py index ef3d119..fc1849f 100644 --- a/gn_auth/auth/authorisation/data/views.py +++ b/gn_auth/auth/authorisation/data/views.py @@ -244,7 +244,7 @@ def __search_phenotypes__(): job_id = uuid.uuid4() selected = __request_key__("selected_traits", []) command =[ - sys.executable, "-m", "scripts.search_phenotypes", + sys.executable, "-m", "gn_auth.scripts.search_phenotypes", __request_key__("species_name"), __request_key__("query"), str(job_id), diff --git a/gn_auth/scripts/__init__.py b/gn_auth/scripts/__init__.py new file mode 100644 index 0000000..5be56d8 --- /dev/null +++ b/gn_auth/scripts/__init__.py @@ -0,0 +1 @@ +"""These are command-line scripts to be run manually or in the background.""" diff --git a/gn_auth/scripts/assign_data_to_default_admin.py b/gn_auth/scripts/assign_data_to_default_admin.py new file mode 100644 index 0000000..69fc50c --- /dev/null +++ b/gn_auth/scripts/assign_data_to_default_admin.py @@ -0,0 +1,434 @@ +""" +Assign any existing data (that is not currently assigned to any group) to the +default sys-admin group for accessibility purposes. +""" +import sys +import json +import time +import random +import logging +from pathlib import Path +from uuid import UUID, uuid4 + +import click +from gn_libs import mysqldb as biodb +from MySQLdb.cursors import DictCursor + +import gn_auth.auth.db.sqlite3 as authdb +from gn_auth.auth.authentication.users import User +from gn_auth.auth.authorisation.roles.models import ( + revoke_user_role_by_name, assign_user_role_by_name) + +from gn_auth.auth.authorisation.resources.groups.models import ( + Group, save_group, add_resources_to_group) +from gn_auth.auth.authorisation.resources.common import assign_resource_owner_role +from gn_auth.auth.authorisation.resources.models import Resource, ResourceCategory + + +class DataNotFound(Exception): + """Raise if no admin user exists.""" + + +def sys_admins(conn: authdb.DbConnection) -> tuple[User, ...]: + """Retrieve all the existing system admins.""" + with authdb.cursor(conn) as cursor: + cursor.execute( + "SELECT u.* FROM users AS u " + "INNER JOIN user_roles AS ur ON u.user_id=ur.user_id " + "INNER JOIN roles AS r ON ur.role_id=r.role_id " + "WHERE r.role_name='system-administrator'") + return tuple(User.from_sqlite3_row(row) for row in cursor.fetchall()) + return tuple() + + +def choose_admin(enum_admins: dict[int, User]) -> int: + """Prompt and read user choice.""" + while True: + try: + print("\n===========================\n") + print("We found the following system administrators:") + for idx, admin in enum_admins.items(): + print(f"\t{idx}: {admin.name} ({admin.email})") + choice = input(f"Choose [1 .. {len(enum_admins)}]: ") + return int(choice) + except ValueError as _verr: + if choice.lower() == "quit": + print("Goodbye!") + sys.exit(0) + print(f"\nERROR: Invalid choice '{choice}'!") + + +def select_sys_admin(admins: tuple[User, ...]) -> User: + """Pick one admin out of list.""" + if len(admins) > 0: + if len(admins) == 1: + print(f"-> Found Admin: {admins[0].name} ({admins[0].email})") + return admins[0] + enum_admins = dict(enumerate(admins, start=1)) + chosen = enum_admins[choose_admin(enum_admins)] + print(f"-> Chosen Admin: {chosen.name} ({chosen.email})") + return chosen + raise DataNotFound( + "No administrator user found. Create an administrator user first.") + + +def admin_group(conn: authdb.DbConnection, admin: User) -> Group: + """Retrieve the admin's user group. If none exist, create one.""" + with authdb.cursor(conn) as cursor: + cursor.execute( + "SELECT g.* FROM users AS u " + "INNER JOIN group_users AS gu ON u.user_id=gu.user_id " + "INNER JOIN groups AS g on gu.group_id=g.group_id " + "WHERE u.user_id = ?", + (str(admin.user_id),)) + row = cursor.fetchone() + if row: + return Group(UUID(row["group_id"]), + row["group_name"], + json.loads(row["group_metadata"])) + new_group = save_group(cursor, "AutoAdminGroup", { + "group_description": ( + "Created by script for existing data visibility. " + "Existing data was migrated into this group and assigned " + "to publicly visible resources according to type.") + }) + + cursor.execute( + "SELECT * FROM resource_categories WHERE " + "resource_category_key='group'") + res_cat_id = cursor.fetchone()["resource_category_id"] + grp_res = { + "group_id": str(new_group.group_id), + "resource_id": str(uuid4()), + "resource_name": new_group.group_name, + "resource_category_id": res_cat_id, + "public": 0 + } + cursor.execute( + "INSERT INTO resources VALUES " + "(:resource_id, :resource_name, :resource_category_id, :public)", + grp_res) + cursor.execute( + "INSERT INTO group_resources(resource_id, group_id) " + "VALUES(:resource_id, :group_id)", + grp_res) + cursor.execute("INSERT INTO group_users VALUES (?, ?)", + (str(new_group.group_id), str(admin.user_id))) + revoke_user_role_by_name(cursor, admin, "group-creator") + assign_user_role_by_name( + cursor, admin, UUID(grp_res["resource_id"]), "group-leader") + return new_group + + +def __resource_category_by_key__( + cursor: authdb.DbCursor, category_key: str) -> ResourceCategory: + """Retrieve a resource category by its ID.""" + cursor.execute( + "SELECT * FROM resource_categories WHERE resource_category_key = ?", + (category_key,)) + row = cursor.fetchone() + if not bool(row): + raise DataNotFound( + f"Could not find resource category with key {category_key}") + return ResourceCategory(UUID(row["resource_category_id"]), + row["resource_category_key"], + row["resource_category_description"]) + + +def __create_resources__(cursor: authdb.DbCursor) -> tuple[Resource, ...]: + """Create default resources.""" + resources = tuple(Resource( + uuid4(), name, __resource_category_by_key__(cursor, catkey), + True, tuple() + ) for name, catkey in ( + ("mRNA-euhrin", "mrna"), + ("pheno-xboecp", "phenotype"), + ("geno-welphd", "genotype"))) + cursor.executemany( + "INSERT INTO resources VALUES (:rid, :rname, :rcid, :pub)", + tuple({ + "rid": str(res.resource_id), + "rname": res.resource_name, + "rcid": str(res.resource_category.resource_category_id), + "pub": 1 + } for res in resources)) + return resources + + +def default_resources(conn: authdb.DbConnection, group: Group) -> tuple[ + Resource, ...]: + """Create default resources, or return them if they exist.""" + with authdb.cursor(conn) as cursor: + cursor.execute( + "SELECT r.resource_id, r.resource_name, r.public, rc.* " + "FROM resource_ownership AS ro INNER JOIN resources AS r " + "ON ro.resource_id=r.resource_id " + "INNER JOIN resource_categories AS rc " + "ON r.resource_category_id=rc.resource_category_id " + "WHERE ro.group_id=? AND r.resource_name IN " + "('mRNA-euhrin', 'pheno-xboecp', 'geno-welphd')", + (str(group.group_id),)) + rows = cursor.fetchall() + if len(rows) == 0: + return __create_resources__(cursor) + + return tuple(Resource( + UUID(row["resource_id"]), + row["resource_name"], + ResourceCategory( + UUID(row["resource_category_id"]), + row["resource_category_key"], + row["resource_category_description"]), + bool(row["public"]), + tuple() + ) for row in rows) + + +def delay(): + """Delay a while: anything from 2 seconds to 15 seconds.""" + time.sleep(random.choice(range(2,16))) + + +def __assigned_mrna__(authconn): + """Retrieve assigned mRNA items.""" + with authdb.cursor(authconn) as cursor: + cursor.execute( + "SELECT SpeciesId, InbredSetId, ProbeFreezeId, ProbeSetFreezeId " + "FROM linked_mrna_data") + return tuple( + (row["SpeciesId"], row["InbredSetId"], row["ProbeFreezeId"], + row["ProbeSetFreezeId"]) for row in cursor.fetchall()) + + +def __unassigned_mrna__(bioconn, assigned): + """Retrieve unassigned mRNA data items.""" + query = ( + "SELECT s.SpeciesId, iset.InbredSetId, pf.ProbeFreezeId, " + "psf.Id AS ProbeSetFreezeId, psf.Name AS dataset_name, " + "psf.FullName AS dataset_fullname, psf.ShortName AS dataset_shortname " + "FROM Species AS s INNER JOIN InbredSet AS iset " + "ON s.SpeciesId=iset.SpeciesId INNER JOIN ProbeFreeze AS pf " + "ON iset.InbredSetId=pf.InbredSetId INNER JOIN ProbeSetFreeze AS psf " + "ON pf.ProbeFreezeId=psf.ProbeFreezeId " + "WHERE s.Name != 'human' ") + if len(assigned) > 0: + paramstr = ", ".join(["(%s, %s, %s, %s)"] * len(assigned)) + query = query + ( + "AND (s.SpeciesId, iset.InbredSetId, pf.ProbeFreezeId, psf.Id) " + f"NOT IN ({paramstr}) ") + + query = query + "LIMIT 100000" + with bioconn.cursor(DictCursor) as cursor: + cursor.execute(query, tuple(item for row in assigned for item in row)) + return (row for row in cursor.fetchall()) + + +def __assign_mrna__(authconn, bioconn, resource, group): + "Assign any unassigned mRNA data to resource." + while True: + unassigned = tuple({ + "data_link_id": str(uuid4()), + "group_id": str(group.group_id), + "resource_id": str(resource.resource_id), + **row + } for row in __unassigned_mrna__( + bioconn, __assigned_mrna__(authconn))) + + if len(unassigned) <= 0: + print("-> mRNA: Completed!") + break + with authdb.cursor(authconn) as cursor: + cursor.executemany( + "INSERT INTO linked_mrna_data VALUES " + "(:data_link_id, :group_id, :SpeciesId, :InbredSetId, " + ":ProbeFreezeId, :ProbeSetFreezeId, :dataset_name, " + ":dataset_fullname, :dataset_shortname)", + unassigned) + cursor.executemany( + "INSERT INTO mrna_resources VALUES " + "(:resource_id, :data_link_id)", + unassigned) + print(f"-> mRNA: Linked {len(unassigned)}") + delay() + + +def __assigned_geno__(authconn): + """Retrieve assigned genotype data.""" + with authdb.cursor(authconn) as cursor: + cursor.execute( + "SELECT SpeciesId, InbredSetId, GenoFreezeId " + "FROM linked_genotype_data") + return tuple((row["SpeciesId"], row["InbredSetId"], row["GenoFreezeId"]) + for row in cursor.fetchall()) + +def __unassigned_geno__(bioconn, assigned): + """Fetch unassigned genotype data.""" + query = ( + "SELECT s.SpeciesId, iset.InbredSetId, iset.InbredSetName, " + "gf.Id AS GenoFreezeId, gf.Name AS dataset_name, " + "gf.FullName AS dataset_fullname, " + "gf.ShortName AS dataset_shortname " + "FROM Species AS s INNER JOIN InbredSet AS iset " + "ON s.SpeciesId=iset.SpeciesId INNER JOIN GenoFreeze AS gf " + "ON iset.InbredSetId=gf.InbredSetId " + "WHERE s.Name != 'human' ") + if len(assigned) > 0: + paramstr = ", ".join(["(%s, %s, %s)"] * len(assigned)) + query = query + ( + "AND (s.SpeciesId, iset.InbredSetId, gf.Id) " + f"NOT IN ({paramstr}) ") + + query = query + "LIMIT 100000" + with bioconn.cursor(DictCursor) as cursor: + cursor.execute(query, tuple(item for row in assigned for item in row)) + return (row for row in cursor.fetchall()) + + +def __assign_geno__(authconn, bioconn, resource, group): + "Assign any unassigned Genotype data to resource." + while True: + unassigned = tuple({ + "data_link_id": str(uuid4()), + "group_id": str(group.group_id), + "resource_id": str(resource.resource_id), + **row + } for row in __unassigned_geno__( + bioconn, __assigned_geno__(authconn))) + + if len(unassigned) <= 0: + print("-> Genotype: Completed!") + break + with authdb.cursor(authconn) as cursor: + cursor.executemany( + "INSERT INTO linked_genotype_data VALUES " + "(:data_link_id, :group_id, :SpeciesId, :InbredSetId, " + ":GenoFreezeId, :dataset_name, :dataset_fullname, " + ":dataset_shortname)", + unassigned) + cursor.executemany( + "INSERT INTO genotype_resources VALUES " + "(:resource_id, :data_link_id)", + unassigned) + print(f"-> Genotype: Linked {len(unassigned)}") + delay() + + +def __assigned_pheno__(authconn): + """Retrieve assigned phenotype data.""" + with authdb.cursor(authconn) as cursor: + cursor.execute( + "SELECT SpeciesId, InbredSetId, PublishFreezeId, PublishXRefId " + "FROM linked_phenotype_data") + return tuple(( + row["SpeciesId"], row["InbredSetId"], row["PublishFreezeId"], + row["PublishXRefId"]) for row in cursor.fetchall()) + + +def __unassigned_pheno__(bioconn, assigned): + """Retrieve all unassigned Phenotype data.""" + query = ( + "SELECT spc.SpeciesId, iset.InbredSetId, " + "pf.Id AS PublishFreezeId, pf.Name AS dataset_name, " + "pf.FullName AS dataset_fullname, " + "pf.ShortName AS dataset_shortname, pxr.Id AS PublishXRefId " + "FROM " + "Species AS spc " + "INNER JOIN InbredSet AS iset " + "ON spc.SpeciesId=iset.SpeciesId " + "INNER JOIN PublishFreeze AS pf " + "ON iset.InbredSetId=pf.InbredSetId " + "INNER JOIN PublishXRef AS pxr " + "ON pf.InbredSetId=pxr.InbredSetId " + "WHERE spc.Name != 'human' ") + if len(assigned) > 0: + paramstr = ", ".join(["(%s, %s, %s, %s)"] * len(assigned)) + query = query + ( + "AND (spc.SpeciesId, iset.InbredSetId, pf.Id, pxr.Id) " + f"NOT IN ({paramstr}) ") + + query = query + "LIMIT 100000" + with bioconn.cursor(DictCursor) as cursor: + cursor.execute(query, tuple(item for row in assigned for item in row)) + return (row for row in cursor.fetchall()) + + +def __assign_pheno__(authconn, bioconn, resource, group): + """Assign any unassigned Phenotype data to resource.""" + while True: + unassigned = tuple({ + "data_link_id": str(uuid4()), + "group_id": str(group.group_id), + "resource_id": str(resource.resource_id), + **row + } for row in __unassigned_pheno__( + bioconn, __assigned_pheno__(authconn))) + + if len(unassigned) <= 0: + print("-> Phenotype: Completed!") + break + with authdb.cursor(authconn) as cursor: + cursor.executemany( + "INSERT INTO linked_phenotype_data VALUES " + "(:data_link_id, :group_id, :SpeciesId, :InbredSetId, " + ":PublishFreezeId, :dataset_name, :dataset_fullname, " + ":dataset_shortname, :PublishXRefId)", + unassigned) + cursor.executemany( + "INSERT INTO phenotype_resources VALUES " + "(:resource_id, :data_link_id)", + unassigned) + print(f"-> Phenotype: Linked {len(unassigned)}") + delay() + + +def assign_data_to_resource( + authconn, bioconn, resource: Resource, group: Group): + """Assign existing data, not linked to any group to the resource.""" + assigner_fns = { + "mrna": __assign_mrna__, + "genotype": __assign_geno__, + "phenotype": __assign_pheno__ + } + return assigner_fns[resource.resource_category.resource_category_key]( + authconn, bioconn, resource, group) + + +def entry(authdbpath, mysqldburi): + """Entry-point for data migration.""" + if not Path(authdbpath).exists(): + print( + f"ERROR: Auth db file `{authdbpath}` does not exist.", + file=sys.stderr) + sys.exit(2) + try: + with (authdb.connection(authdbpath) as authconn, + biodb.database_connection(mysqldburi) as bioconn): + admin = select_sys_admin(sys_admins(authconn)) + the_admin_group = admin_group(authconn, admin) + resources = default_resources(authconn, the_admin_group) + add_resources_to_group(authconn, resources, the_admin_group) + for resource in resources: + assign_data_to_resource( + authconn, bioconn, resource, the_admin_group) + with authdb.cursor(authconn) as cursor: + assign_resource_owner_role( + cursor, resource.resource_id, admin.user_id) + except DataNotFound as dnf: + print(dnf.args[0], file=sys.stderr) + sys.exit(1) + + +@click.command() +@click.argument("authdbpath") # "Path to the Auth(entic|oris)ation database" +@click.argument("mysqldburi") # "URI to the MySQL database with the biology data" +@click.option("--loglevel", default="WARNING", show_default=True, + type=click.Choice(["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"])) +def run(authdbpath, mysqldburi, loglevel): + """Setup command-line arguments.""" + globallogger = logging.getLogger() + globallogger.setLevel(loglevel) + entry(authdbpath, mysqldburi) + + +if __name__ == "__main__": + run() # pylint: disable=[no-value-for-parameter] diff --git a/gn_auth/scripts/batch_assign_data_to_default_admin.py b/gn_auth/scripts/batch_assign_data_to_default_admin.py new file mode 100644 index 0000000..95d9794 --- /dev/null +++ b/gn_auth/scripts/batch_assign_data_to_default_admin.py @@ -0,0 +1,86 @@ +""" +Similar to the 'assign_data_to_default_admin' script but without user +interaction. +""" +import sys +import logging +from pathlib import Path + +import click +from gn_libs import mysqldb as biodb +from pymonad.maybe import Just, Maybe, Nothing +from pymonad.tools import monad_from_none_or_value + +from gn_auth.auth.db import sqlite3 as authdb +from gn_auth.auth.authentication.users import User +from gn_auth.auth.authorisation.resources.groups.models import ( + Group, db_row_to_group) +from gn_auth.scripts.assign_data_to_default_admin import ( + default_resources, assign_data_to_resource) + + +def resources_group(conn: authdb.DbConnection) -> Maybe: + """Retrieve resources' group""" + with authdb.cursor(conn) as cursor: + cursor.execute( + "SELECT g.* FROM resources AS r " + "INNER JOIN resource_ownership AS ro " + "ON r.resource_id=ro.resource_id " + "INNER JOIN groups AS g ON ro.group_id=g.group_id " + "WHERE resource_name='mRNA-euhrin'") + return monad_from_none_or_value( + Nothing, Just, cursor.fetchone()).then( + db_row_to_group) + + +def resource_owner(conn: authdb.DbConnection) -> Maybe: + """Retrieve the resource owner.""" + with authdb.cursor(conn) as cursor: + cursor.execute( + "SELECT u.* FROM users AS u WHERE u.user_id IN " + "(SELECT ur.user_id FROM resources AS rsc " + "INNER JOIN user_roles AS ur ON rsc.resource_id=ur.resource_id " + "INNER JOIN roles AS r on ur.role_id=r.role_id " + "WHERE resource_name='mRNA-euhrin' " + "AND r.role_name='resource-owner')") + return monad_from_none_or_value( + Nothing, Just, cursor.fetchone()).then( + User.from_sqlite3_row) + + +def assign_data(authconn: authdb.DbConnection, bioconn, group: Group): + """Do actual data assignments.""" + try: + for resource in default_resources(authconn, group): + assign_data_to_resource(authconn, bioconn, resource, group) + + return 1 + except Exception as _exc:# pylint: disable=[broad-except] + logging.error("Failed to assign some data!", exc_info=True) + return 1 + + +if __name__ == "__main__": + @click.command() + @click.argument("authdbpath") # "Path to the Auth(entic|oris)ation database" + @click.argument("mysqldburi") # "URI to the MySQL database with the biology data" + @click.option("--loglevel", + default="WARNING", + show_default=True, + type=click.Choice([ + "CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"])) + def run(authdbpath, mysqldburi, loglevel): + """Script entry point.""" + _logger = logging.getLogger() + _logger.setLevel(loglevel) + if Path(authdbpath).exists(): + with (authdb.connection(authdbpath) as authconn, + biodb.database_connection(mysqldburi) as bioconn): + return resources_group(authconn).maybe( + 1, + lambda group: assign_data(authconn, bioconn, group)) + + logging.error("There is no such SQLite3 database file.") + return 1 + + sys.exit(run()) # pylint: disable=[no-value-for-parameter] diff --git a/gn_auth/scripts/link_inbredsets.py b/gn_auth/scripts/link_inbredsets.py new file mode 100644 index 0000000..ad743f5 --- /dev/null +++ b/gn_auth/scripts/link_inbredsets.py @@ -0,0 +1,122 @@ +""" +Link any unlinked InbredSet groups. +""" +import sys +import uuid +from pathlib import Path + +import click +from gn_libs import mysqldb as biodb + +import gn_auth.auth.db.sqlite3 as authdb + +from gn_auth.scripts.assign_data_to_default_admin import ( + sys_admins, admin_group, select_sys_admin) + +def linked_inbredsets(conn): + """Fetch all inbredset groups that are linked to the auth system.""" + with authdb.cursor(conn) as cursor: + cursor.execute( + "SELECT SpeciesId, InbredSetId FROM linked_inbredset_groups") + return tuple((row["SpeciesId"], row["InbredSetId"]) + for row in cursor.fetchall()) + +def unlinked_inbredsets(conn, linked): + """Fetch any inbredset groups that are not linked to the auth system.""" + with conn.cursor() as cursor: + where_clause = "" + query = "SELECT SpeciesId, InbredSetId, InbredSetName, FullName FROM InbredSet" + if len(linked) > 0: + pholders = ["(%s, %s)"] * len(linked) + where_clause = (f" WHERE (SpeciesId, InbredSetId) " + f"NOT IN ({pholders})") + cursor.execute(query + where_clause, + tuple(arg for sublist in linked for arg in sublist)) + return cursor.fetchall() + + cursor.execute(query) + return cursor.fetchall() + +def link_unlinked(conn, unlinked): + """Link the unlinked inbredset groups to the auth system.""" + params = tuple((str(uuid.uuid4()),) + row for row in unlinked) + with authdb.cursor(conn) as cursor: + cursor.executemany( + "INSERT INTO linked_inbredset_groups VALUES (?, ?, ?, ?, ?)", + params) + + return params + +def build_resources(conn, new_linked): + """Build resources for newly linked inbredsets.""" + with authdb.cursor(conn) as cursor: + cursor.execute( + "SELECT resource_category_id FROM resource_categories " + "WHERE resource_category_key='inbredset-group'") + category_id = cursor.fetchone()["resource_category_id"] + resources = tuple({ + "resource_id": str(uuid.uuid4()), + "resource_name": f"InbredSet: {name}", + "resource_category_id": category_id, + "public": 1, + "data_link_id": datalinkid + } for datalinkid, _sid, _isetid, name, _name in new_linked) + cursor.executemany( + "INSERT INTO resources VALUES " + "(:resource_id, :resource_name, :resource_category_id, :public)", + resources) + cursor.executemany( + "INSERT INTO inbredset_group_resources VALUES " + "(:resource_id, :data_link_id)", + resources) + return resources + +def own_resources(conn, group, resources): + """Link new resources to admin group.""" + with authdb.cursor(conn) as cursor: + params = tuple({ + "group_id": str(group.group_id), + **resource + } for resource in resources) + cursor.executemany( + "INSERT INTO resource_ownership VALUES " + "(:group_id, :resource_id)", + params) + return params + +def assign_role_for_admin(conn, user, resources): + """Assign basic role to admin on the inbredset-group resources.""" + with authdb.cursor(conn) as cursor: + cursor.execute( + "SELECT * FROM roles WHERE role_name='inbredset-group-owner'") + role_id = cursor.fetchone()["role_id"] + cursor.executemany( + "INSERT INTO user_roles(user_id, role_id, resource_id) " + "VALUES (:user_id, :role_id, :resource_id)", + tuple({**rsc, "user_id": str(user.user_id), "role_id": role_id} + for rsc in resources)) + +@click.command() +@click.argument("authdbpath") # "Path to the Auth(entic|oris)ation database" +@click.argument("mysqldburi") # "URI to the MySQL database with the biology data" +def run(authdbpath, mysqldburi): + """Setup command-line arguments.""" + if not Path(authdbpath).exists(): + print( + f"ERROR: Auth db file `{authdbpath}` does not exist.", + file=sys.stderr) + sys.exit(2) + + with (authdb.connection(authdbpath) as authconn, + biodb.database_connection(mysqldburi) as bioconn): + admin = select_sys_admin(sys_admins(authconn)) + assign_role_for_admin(authconn, admin, own_resources( + authconn, + admin_group(authconn, admin), + build_resources( + authconn, link_unlinked( + authconn, + unlinked_inbredsets(bioconn, linked_inbredsets(authconn)))))) + +if __name__ == "__main__": + run() # pylint: disable=[no-value-for-parameter] diff --git a/gn_auth/scripts/register_sys_admin.py b/gn_auth/scripts/register_sys_admin.py new file mode 100644 index 0000000..06aa845 --- /dev/null +++ b/gn_auth/scripts/register_sys_admin.py @@ -0,0 +1,68 @@ +"""Script to register and mark a user account as sysadmin.""" +import sys +import getpass +from pathlib import Path + +import click +from email_validator import validate_email, EmailNotValidError + +from gn_auth.auth.db import sqlite3 as db +from gn_auth.auth.authorisation.users.admin.models import make_sys_admin +from gn_auth.auth.authentication.users import save_user, set_user_password + +def fetch_email() -> str: + """Prompt user for email.""" + while True: + try: + user_input = input("Enter the administrator's email: ") + email = validate_email(user_input.strip(), check_deliverability=True) + return email["email"] # type: ignore + except EmailNotValidError as _enve: + print("You did not provide a valid email address. Try again...", + file=sys.stderr) + +def fetch_password() -> str: + """Prompt user for password.""" + while True: + passwd = getpass.getpass(prompt="Enter password: ").strip() + passwd2 = getpass.getpass(prompt="Confirm password: ").strip() + if passwd != "" and passwd == passwd2: + return passwd + if passwd == "": + print("Empty password not accepted", file=sys.stderr) + continue + if passwd != passwd2: + print("Passwords *MUST* match", file=sys.stderr) + continue + +def fetch_name() -> str: + """Prompt user for name""" + while True: + name = input("Enter the user's name: ").strip() + if name == "": + print("Invalid name.") + continue + return name + +def save_admin(conn: db.DbConnection, name: str, email: str, passwd: str): + """Save the details to the database and assign the new user as admin.""" + with db.cursor(conn) as cursor: + usr, _hpasswd = set_user_password( + cursor, save_user(cursor, email, name), passwd) + make_sys_admin(cursor, usr) + return 0 + +def register_admin(authdbpath: Path): + """Register a user as a system admin.""" + assert authdbpath.exists(), "Could not find database file." + with db.connection(str(authdbpath)) as conn: + return save_admin(conn, fetch_name(), fetch_email(), fetch_password()) + +if __name__ == "__main__": + @click.command() + @click.argument("authdbpath") + def run(authdbpath): + """Entry-point for when script is run directly""" + return register_admin(Path(authdbpath).absolute()) + + run()# pylint: disable=[no-value-for-parameter] diff --git a/gn_auth/scripts/search_phenotypes.py b/gn_auth/scripts/search_phenotypes.py new file mode 100644 index 0000000..eee112d --- /dev/null +++ b/gn_auth/scripts/search_phenotypes.py @@ -0,0 +1,125 @@ +""" +A script to do search for phenotype traits using the Xapian Search endpoint. +""" +import uuid +import json +import traceback +from urllib.parse import urljoin +from typing import Any, Iterable +from datetime import datetime, timedelta + +import click +import redis +import requests +from gn_libs import mysqldb as gn3db + +from gn_auth import jobs +from gn_auth.auth.db import sqlite3 as authdb +from gn_auth.settings import SQL_URI, AUTH_DB +from gn_auth.auth.authorisation.data.phenotypes import linked_phenotype_data + +class NoSearchResults(Exception): + """Raise when there are no results for a search.""" + +def do_search( + host: str, query: str, per_page: int, page: int = 1) -> Iterable[dict[str, Any]]: + """Do the search and return the results""" + search_uri = urljoin(host, (f"search/?page={page}&per_page={per_page}" + f"&type=phenotype&query={query}")) + response = requests.get(search_uri, timeout=300) + results = response.json() + if len(results) > 0: + return (item for item in results) + raise NoSearchResults(f"No results for search '{query}'") + +def __filter_object__(search_item): + return (search_item["species"], search_item["group"], + search_item["dataset"], search_item["name"]) + +def remove_selected(search_results, selected: tuple): + """Remove any item that the user has selected.""" + return (item for item in search_results if __filter_object__(item) not in selected) + +def remove_linked(search_results, linked: tuple): + """Remove any item that has been already linked to a user group.""" + return (item for item in search_results if __filter_object__(item) not in linked) + +def update_status(redisconn: redis.Redis, redisname, status: str): + """Update the status of the search.""" + redisconn.hset(redisname, "status", json.dumps(status)) + +def update_search_results(redisconn: redis.Redis, redisname: str, + results: tuple[dict[str, Any], ...]): + """Save the results to redis db.""" + key = "search_results" + prev_results = tuple(json.loads(redisconn.hget(redisname, key) or "[]")) # type: ignore + redisconn.hset(redisname, key, json.dumps(prev_results + results)) + +def expire_redis_results(redisconn: redis.Redis, redisname: str): + """Expire the results after a while to ensure they are cleaned up.""" + redisconn.expireat(redisname, datetime.now() + timedelta(minutes=30)) + +@click.command() +@click.argument("species") +@click.argument("query") +@click.argument("job-id", type=click.UUID) +@click.option( + "--host", default="http://localhost:8080/api/", help="The URI to GN3.") +@click.option("--per-page", default=10000, help="Number of results per page.") +@click.option("--selected", default="[]", help="Selected traits.") +@click.option( + "--auth-db-uri", default=AUTH_DB, help="The SQL URI to the auth database.") +@click.option( + "--gn3-db-uri", default=SQL_URI, + help="The SQL URI to the main GN3 database.") +@click.option( + "--redis-uri", default="redis://:@localhost:6379/0", + help="The URI to the redis server.") +def search(# pylint: disable=[too-many-arguments, too-many-positional-arguments, too-many-locals] + species: str, query: str, job_id: uuid.UUID, host: str, per_page: int, + selected: str, auth_db_uri: str, gn3_db_uri: str, redis_uri: str): + """ + Search for phenotype traits, filtering out any linked and selected traits, + loading more and more pages until the `per_page` quota is fulfilled or the + search runs out of pages. + """ + redisname = jobs.job_key(job_id) + with (authdb.connection(auth_db_uri) as authconn, + gn3db.database_connection(gn3_db_uri) as gn3conn, + redis.Redis.from_url(redis_uri, decode_responses=True) as redisconn): + update_status(redisconn, redisname, "started") + update_search_results(redisconn, redisname, tuple()) # init search results + try: + search_query = f"species:{species}" + ( + f" AND ({query})" if bool(query) else "") + selected_traits = tuple( + (item["species"], item["group"], item["dataset"], item["name"]) + for item in json.loads(selected)) + linked = tuple( + (row["SpeciesName"], row["InbredSetName"], row["dataset_name"], + str(row["PublishXRefId"])) + for row in linked_phenotype_data(authconn, gn3conn, species)) + page = 1 + count = 0 + while count < per_page: + results = tuple(remove_linked( + remove_selected( + do_search(host, search_query, per_page, page), + selected_traits), + linked))[0:per_page-count] + count = count + len(results) + page = page + 1 + update_search_results(redisconn, redisname, results) + except NoSearchResults as _nsr: + pass + except Exception as _exc: # pylint: disable=[broad-except] + update_status(redisconn, redisname, "failed") + redisconn.hset(redisname, "exception", json.dumps(traceback.format_exc())) + expire_redis_results(redisconn, redisname) + return 1 + update_status(redisconn, redisname, "completed") + expire_redis_results(redisconn, redisname) + return 0 + +if __name__ == "__main__": + search() # pylint: disable=[no-value-for-parameter] diff --git a/gn_auth/scripts/worker.py b/gn_auth/scripts/worker.py new file mode 100644 index 0000000..0a77d41 --- /dev/null +++ b/gn_auth/scripts/worker.py @@ -0,0 +1,83 @@ +"""Daemon that processes commands""" +import os +import sys +import time +import argparse + +import redis +import redis.connection + +from gn_auth.commands import run_cmd + +# Enable importing from one dir up: put as first to override any other globally +# accessible GN3 +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +def update_status(conn, cmd_id, status): + """Helper to update command status""" + conn.hset(name=f"{cmd_id}", key="status", value=f"{status}") + +def make_incremental_backoff(init_val: float=0.1, maximum: int=420): + """ + Returns a closure that can be used to increment the returned value up to + `maximum` or reset it to `init_val`. + """ + current = init_val + + def __increment_or_reset__(command: str, value: float=0.1): + nonlocal current + if command == "reset": + current = init_val + return current + + if command == "increment": + current = min(current + abs(value), maximum) + return current + + return current + + return __increment_or_reset__ + +def run_jobs(conn, queue_name: str): + """Process the redis using a redis connection, CONN""" + # pylint: disable=E0401, C0415 + cmd_id = (conn.lpop(queue_name) or b'').decode("utf-8") + if bool(cmd_id): + cmd = conn.hget(name=cmd_id, key="cmd") + if cmd and (conn.hget(cmd_id, "status") == b"queued"): + update_status(conn, cmd_id, "running") + result = run_cmd( + cmd.decode("utf-8"), env=conn.hget(name=cmd_id, key="env")) + conn.hset(name=cmd_id, key="result", value=result.get("output")) + if result.get("code") == 0: # Success + update_status(conn, cmd_id, "success") + else: + update_status(conn, cmd_id, "error") + conn.hset(cmd_id, "stderr", result.get("output")) + return cmd_id + return None + +def parse_cli_arguments(): + """Parse the command-line arguments.""" + parser = argparse.ArgumentParser( + description="Run asynchronous (service) commands.") + parser.add_argument("queue_name", help="Queue to check in redis") + parser.add_argument( + "--daemon", default=False, action="store_true", + help=( + "Run process as a daemon instead of the default 'one-shot' " + "process")) + return parser.parse_args() + +if __name__ == "__main__": + args = parse_cli_arguments() + with redis.Redis() as redis_conn: + if not args.daemon: + run_jobs(redis_conn, args.queue_name) + else: + sleep_time = make_incremental_backoff() + while True: # Daemon that keeps running forever: + if run_jobs(redis_conn, args.queue_name): + time.sleep(sleep_time("reset")) + continue + time.sleep(sleep_time("increment", sleep_time("return_current"))) diff --git a/gn_auth/wsgi.py b/gn_auth/wsgi.py index e05ef0d..f2f17f1 100644 --- a/gn_auth/wsgi.py +++ b/gn_auth/wsgi.py @@ -16,8 +16,7 @@ from gn_auth.auth.db import sqlite3 as db from gn_auth.auth.errors import NotFoundError from gn_auth.auth.authentication.users import user_by_id, hash_password from gn_auth.auth.authorisation.users.admin.models import make_sys_admin - -from scripts import register_sys_admin as rsysadm# type: ignore[import] +from gn_auth.scripts import register_sys_admin as rsysadm# type: ignore[import] app = create_app() diff --git a/scripts/__init__.py b/scripts/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/scripts/assign_data_to_default_admin.py b/scripts/assign_data_to_default_admin.py deleted file mode 100644 index 69fc50c..0000000 --- a/scripts/assign_data_to_default_admin.py +++ /dev/null @@ -1,434 +0,0 @@ -""" -Assign any existing data (that is not currently assigned to any group) to the -default sys-admin group for accessibility purposes. -""" -import sys -import json -import time -import random -import logging -from pathlib import Path -from uuid import UUID, uuid4 - -import click -from gn_libs import mysqldb as biodb -from MySQLdb.cursors import DictCursor - -import gn_auth.auth.db.sqlite3 as authdb -from gn_auth.auth.authentication.users import User -from gn_auth.auth.authorisation.roles.models import ( - revoke_user_role_by_name, assign_user_role_by_name) - -from gn_auth.auth.authorisation.resources.groups.models import ( - Group, save_group, add_resources_to_group) -from gn_auth.auth.authorisation.resources.common import assign_resource_owner_role -from gn_auth.auth.authorisation.resources.models import Resource, ResourceCategory - - -class DataNotFound(Exception): - """Raise if no admin user exists.""" - - -def sys_admins(conn: authdb.DbConnection) -> tuple[User, ...]: - """Retrieve all the existing system admins.""" - with authdb.cursor(conn) as cursor: - cursor.execute( - "SELECT u.* FROM users AS u " - "INNER JOIN user_roles AS ur ON u.user_id=ur.user_id " - "INNER JOIN roles AS r ON ur.role_id=r.role_id " - "WHERE r.role_name='system-administrator'") - return tuple(User.from_sqlite3_row(row) for row in cursor.fetchall()) - return tuple() - - -def choose_admin(enum_admins: dict[int, User]) -> int: - """Prompt and read user choice.""" - while True: - try: - print("\n===========================\n") - print("We found the following system administrators:") - for idx, admin in enum_admins.items(): - print(f"\t{idx}: {admin.name} ({admin.email})") - choice = input(f"Choose [1 .. {len(enum_admins)}]: ") - return int(choice) - except ValueError as _verr: - if choice.lower() == "quit": - print("Goodbye!") - sys.exit(0) - print(f"\nERROR: Invalid choice '{choice}'!") - - -def select_sys_admin(admins: tuple[User, ...]) -> User: - """Pick one admin out of list.""" - if len(admins) > 0: - if len(admins) == 1: - print(f"-> Found Admin: {admins[0].name} ({admins[0].email})") - return admins[0] - enum_admins = dict(enumerate(admins, start=1)) - chosen = enum_admins[choose_admin(enum_admins)] - print(f"-> Chosen Admin: {chosen.name} ({chosen.email})") - return chosen - raise DataNotFound( - "No administrator user found. Create an administrator user first.") - - -def admin_group(conn: authdb.DbConnection, admin: User) -> Group: - """Retrieve the admin's user group. If none exist, create one.""" - with authdb.cursor(conn) as cursor: - cursor.execute( - "SELECT g.* FROM users AS u " - "INNER JOIN group_users AS gu ON u.user_id=gu.user_id " - "INNER JOIN groups AS g on gu.group_id=g.group_id " - "WHERE u.user_id = ?", - (str(admin.user_id),)) - row = cursor.fetchone() - if row: - return Group(UUID(row["group_id"]), - row["group_name"], - json.loads(row["group_metadata"])) - new_group = save_group(cursor, "AutoAdminGroup", { - "group_description": ( - "Created by script for existing data visibility. " - "Existing data was migrated into this group and assigned " - "to publicly visible resources according to type.") - }) - - cursor.execute( - "SELECT * FROM resource_categories WHERE " - "resource_category_key='group'") - res_cat_id = cursor.fetchone()["resource_category_id"] - grp_res = { - "group_id": str(new_group.group_id), - "resource_id": str(uuid4()), - "resource_name": new_group.group_name, - "resource_category_id": res_cat_id, - "public": 0 - } - cursor.execute( - "INSERT INTO resources VALUES " - "(:resource_id, :resource_name, :resource_category_id, :public)", - grp_res) - cursor.execute( - "INSERT INTO group_resources(resource_id, group_id) " - "VALUES(:resource_id, :group_id)", - grp_res) - cursor.execute("INSERT INTO group_users VALUES (?, ?)", - (str(new_group.group_id), str(admin.user_id))) - revoke_user_role_by_name(cursor, admin, "group-creator") - assign_user_role_by_name( - cursor, admin, UUID(grp_res["resource_id"]), "group-leader") - return new_group - - -def __resource_category_by_key__( - cursor: authdb.DbCursor, category_key: str) -> ResourceCategory: - """Retrieve a resource category by its ID.""" - cursor.execute( - "SELECT * FROM resource_categories WHERE resource_category_key = ?", - (category_key,)) - row = cursor.fetchone() - if not bool(row): - raise DataNotFound( - f"Could not find resource category with key {category_key}") - return ResourceCategory(UUID(row["resource_category_id"]), - row["resource_category_key"], - row["resource_category_description"]) - - -def __create_resources__(cursor: authdb.DbCursor) -> tuple[Resource, ...]: - """Create default resources.""" - resources = tuple(Resource( - uuid4(), name, __resource_category_by_key__(cursor, catkey), - True, tuple() - ) for name, catkey in ( - ("mRNA-euhrin", "mrna"), - ("pheno-xboecp", "phenotype"), - ("geno-welphd", "genotype"))) - cursor.executemany( - "INSERT INTO resources VALUES (:rid, :rname, :rcid, :pub)", - tuple({ - "rid": str(res.resource_id), - "rname": res.resource_name, - "rcid": str(res.resource_category.resource_category_id), - "pub": 1 - } for res in resources)) - return resources - - -def default_resources(conn: authdb.DbConnection, group: Group) -> tuple[ - Resource, ...]: - """Create default resources, or return them if they exist.""" - with authdb.cursor(conn) as cursor: - cursor.execute( - "SELECT r.resource_id, r.resource_name, r.public, rc.* " - "FROM resource_ownership AS ro INNER JOIN resources AS r " - "ON ro.resource_id=r.resource_id " - "INNER JOIN resource_categories AS rc " - "ON r.resource_category_id=rc.resource_category_id " - "WHERE ro.group_id=? AND r.resource_name IN " - "('mRNA-euhrin', 'pheno-xboecp', 'geno-welphd')", - (str(group.group_id),)) - rows = cursor.fetchall() - if len(rows) == 0: - return __create_resources__(cursor) - - return tuple(Resource( - UUID(row["resource_id"]), - row["resource_name"], - ResourceCategory( - UUID(row["resource_category_id"]), - row["resource_category_key"], - row["resource_category_description"]), - bool(row["public"]), - tuple() - ) for row in rows) - - -def delay(): - """Delay a while: anything from 2 seconds to 15 seconds.""" - time.sleep(random.choice(range(2,16))) - - -def __assigned_mrna__(authconn): - """Retrieve assigned mRNA items.""" - with authdb.cursor(authconn) as cursor: - cursor.execute( - "SELECT SpeciesId, InbredSetId, ProbeFreezeId, ProbeSetFreezeId " - "FROM linked_mrna_data") - return tuple( - (row["SpeciesId"], row["InbredSetId"], row["ProbeFreezeId"], - row["ProbeSetFreezeId"]) for row in cursor.fetchall()) - - -def __unassigned_mrna__(bioconn, assigned): - """Retrieve unassigned mRNA data items.""" - query = ( - "SELECT s.SpeciesId, iset.InbredSetId, pf.ProbeFreezeId, " - "psf.Id AS ProbeSetFreezeId, psf.Name AS dataset_name, " - "psf.FullName AS dataset_fullname, psf.ShortName AS dataset_shortname " - "FROM Species AS s INNER JOIN InbredSet AS iset " - "ON s.SpeciesId=iset.SpeciesId INNER JOIN ProbeFreeze AS pf " - "ON iset.InbredSetId=pf.InbredSetId INNER JOIN ProbeSetFreeze AS psf " - "ON pf.ProbeFreezeId=psf.ProbeFreezeId " - "WHERE s.Name != 'human' ") - if len(assigned) > 0: - paramstr = ", ".join(["(%s, %s, %s, %s)"] * len(assigned)) - query = query + ( - "AND (s.SpeciesId, iset.InbredSetId, pf.ProbeFreezeId, psf.Id) " - f"NOT IN ({paramstr}) ") - - query = query + "LIMIT 100000" - with bioconn.cursor(DictCursor) as cursor: - cursor.execute(query, tuple(item for row in assigned for item in row)) - return (row for row in cursor.fetchall()) - - -def __assign_mrna__(authconn, bioconn, resource, group): - "Assign any unassigned mRNA data to resource." - while True: - unassigned = tuple({ - "data_link_id": str(uuid4()), - "group_id": str(group.group_id), - "resource_id": str(resource.resource_id), - **row - } for row in __unassigned_mrna__( - bioconn, __assigned_mrna__(authconn))) - - if len(unassigned) <= 0: - print("-> mRNA: Completed!") - break - with authdb.cursor(authconn) as cursor: - cursor.executemany( - "INSERT INTO linked_mrna_data VALUES " - "(:data_link_id, :group_id, :SpeciesId, :InbredSetId, " - ":ProbeFreezeId, :ProbeSetFreezeId, :dataset_name, " - ":dataset_fullname, :dataset_shortname)", - unassigned) - cursor.executemany( - "INSERT INTO mrna_resources VALUES " - "(:resource_id, :data_link_id)", - unassigned) - print(f"-> mRNA: Linked {len(unassigned)}") - delay() - - -def __assigned_geno__(authconn): - """Retrieve assigned genotype data.""" - with authdb.cursor(authconn) as cursor: - cursor.execute( - "SELECT SpeciesId, InbredSetId, GenoFreezeId " - "FROM linked_genotype_data") - return tuple((row["SpeciesId"], row["InbredSetId"], row["GenoFreezeId"]) - for row in cursor.fetchall()) - -def __unassigned_geno__(bioconn, assigned): - """Fetch unassigned genotype data.""" - query = ( - "SELECT s.SpeciesId, iset.InbredSetId, iset.InbredSetName, " - "gf.Id AS GenoFreezeId, gf.Name AS dataset_name, " - "gf.FullName AS dataset_fullname, " - "gf.ShortName AS dataset_shortname " - "FROM Species AS s INNER JOIN InbredSet AS iset " - "ON s.SpeciesId=iset.SpeciesId INNER JOIN GenoFreeze AS gf " - "ON iset.InbredSetId=gf.InbredSetId " - "WHERE s.Name != 'human' ") - if len(assigned) > 0: - paramstr = ", ".join(["(%s, %s, %s)"] * len(assigned)) - query = query + ( - "AND (s.SpeciesId, iset.InbredSetId, gf.Id) " - f"NOT IN ({paramstr}) ") - - query = query + "LIMIT 100000" - with bioconn.cursor(DictCursor) as cursor: - cursor.execute(query, tuple(item for row in assigned for item in row)) - return (row for row in cursor.fetchall()) - - -def __assign_geno__(authconn, bioconn, resource, group): - "Assign any unassigned Genotype data to resource." - while True: - unassigned = tuple({ - "data_link_id": str(uuid4()), - "group_id": str(group.group_id), - "resource_id": str(resource.resource_id), - **row - } for row in __unassigned_geno__( - bioconn, __assigned_geno__(authconn))) - - if len(unassigned) <= 0: - print("-> Genotype: Completed!") - break - with authdb.cursor(authconn) as cursor: - cursor.executemany( - "INSERT INTO linked_genotype_data VALUES " - "(:data_link_id, :group_id, :SpeciesId, :InbredSetId, " - ":GenoFreezeId, :dataset_name, :dataset_fullname, " - ":dataset_shortname)", - unassigned) - cursor.executemany( - "INSERT INTO genotype_resources VALUES " - "(:resource_id, :data_link_id)", - unassigned) - print(f"-> Genotype: Linked {len(unassigned)}") - delay() - - -def __assigned_pheno__(authconn): - """Retrieve assigned phenotype data.""" - with authdb.cursor(authconn) as cursor: - cursor.execute( - "SELECT SpeciesId, InbredSetId, PublishFreezeId, PublishXRefId " - "FROM linked_phenotype_data") - return tuple(( - row["SpeciesId"], row["InbredSetId"], row["PublishFreezeId"], - row["PublishXRefId"]) for row in cursor.fetchall()) - - -def __unassigned_pheno__(bioconn, assigned): - """Retrieve all unassigned Phenotype data.""" - query = ( - "SELECT spc.SpeciesId, iset.InbredSetId, " - "pf.Id AS PublishFreezeId, pf.Name AS dataset_name, " - "pf.FullName AS dataset_fullname, " - "pf.ShortName AS dataset_shortname, pxr.Id AS PublishXRefId " - "FROM " - "Species AS spc " - "INNER JOIN InbredSet AS iset " - "ON spc.SpeciesId=iset.SpeciesId " - "INNER JOIN PublishFreeze AS pf " - "ON iset.InbredSetId=pf.InbredSetId " - "INNER JOIN PublishXRef AS pxr " - "ON pf.InbredSetId=pxr.InbredSetId " - "WHERE spc.Name != 'human' ") - if len(assigned) > 0: - paramstr = ", ".join(["(%s, %s, %s, %s)"] * len(assigned)) - query = query + ( - "AND (spc.SpeciesId, iset.InbredSetId, pf.Id, pxr.Id) " - f"NOT IN ({paramstr}) ") - - query = query + "LIMIT 100000" - with bioconn.cursor(DictCursor) as cursor: - cursor.execute(query, tuple(item for row in assigned for item in row)) - return (row for row in cursor.fetchall()) - - -def __assign_pheno__(authconn, bioconn, resource, group): - """Assign any unassigned Phenotype data to resource.""" - while True: - unassigned = tuple({ - "data_link_id": str(uuid4()), - "group_id": str(group.group_id), - "resource_id": str(resource.resource_id), - **row - } for row in __unassigned_pheno__( - bioconn, __assigned_pheno__(authconn))) - - if len(unassigned) <= 0: - print("-> Phenotype: Completed!") - break - with authdb.cursor(authconn) as cursor: - cursor.executemany( - "INSERT INTO linked_phenotype_data VALUES " - "(:data_link_id, :group_id, :SpeciesId, :InbredSetId, " - ":PublishFreezeId, :dataset_name, :dataset_fullname, " - ":dataset_shortname, :PublishXRefId)", - unassigned) - cursor.executemany( - "INSERT INTO phenotype_resources VALUES " - "(:resource_id, :data_link_id)", - unassigned) - print(f"-> Phenotype: Linked {len(unassigned)}") - delay() - - -def assign_data_to_resource( - authconn, bioconn, resource: Resource, group: Group): - """Assign existing data, not linked to any group to the resource.""" - assigner_fns = { - "mrna": __assign_mrna__, - "genotype": __assign_geno__, - "phenotype": __assign_pheno__ - } - return assigner_fns[resource.resource_category.resource_category_key]( - authconn, bioconn, resource, group) - - -def entry(authdbpath, mysqldburi): - """Entry-point for data migration.""" - if not Path(authdbpath).exists(): - print( - f"ERROR: Auth db file `{authdbpath}` does not exist.", - file=sys.stderr) - sys.exit(2) - try: - with (authdb.connection(authdbpath) as authconn, - biodb.database_connection(mysqldburi) as bioconn): - admin = select_sys_admin(sys_admins(authconn)) - the_admin_group = admin_group(authconn, admin) - resources = default_resources(authconn, the_admin_group) - add_resources_to_group(authconn, resources, the_admin_group) - for resource in resources: - assign_data_to_resource( - authconn, bioconn, resource, the_admin_group) - with authdb.cursor(authconn) as cursor: - assign_resource_owner_role( - cursor, resource.resource_id, admin.user_id) - except DataNotFound as dnf: - print(dnf.args[0], file=sys.stderr) - sys.exit(1) - - -@click.command() -@click.argument("authdbpath") # "Path to the Auth(entic|oris)ation database" -@click.argument("mysqldburi") # "URI to the MySQL database with the biology data" -@click.option("--loglevel", default="WARNING", show_default=True, - type=click.Choice(["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"])) -def run(authdbpath, mysqldburi, loglevel): - """Setup command-line arguments.""" - globallogger = logging.getLogger() - globallogger.setLevel(loglevel) - entry(authdbpath, mysqldburi) - - -if __name__ == "__main__": - run() # pylint: disable=[no-value-for-parameter] diff --git a/scripts/batch_assign_data_to_default_admin.py b/scripts/batch_assign_data_to_default_admin.py deleted file mode 100644 index a468019..0000000 --- a/scripts/batch_assign_data_to_default_admin.py +++ /dev/null @@ -1,87 +0,0 @@ -""" -Similar to the 'assign_data_to_default_admin' script but without user -interaction. -""" -import sys -import logging -from pathlib import Path - -import click -from gn_libs import mysqldb as biodb -from pymonad.maybe import Just, Maybe, Nothing -from pymonad.tools import monad_from_none_or_value - -from gn_auth.auth.db import sqlite3 as authdb -from gn_auth.auth.authentication.users import User -from gn_auth.auth.authorisation.resources.groups.models import ( - Group, db_row_to_group) - -from scripts.assign_data_to_default_admin import ( - default_resources, assign_data_to_resource) - - -def resources_group(conn: authdb.DbConnection) -> Maybe: - """Retrieve resources' group""" - with authdb.cursor(conn) as cursor: - cursor.execute( - "SELECT g.* FROM resources AS r " - "INNER JOIN resource_ownership AS ro " - "ON r.resource_id=ro.resource_id " - "INNER JOIN groups AS g ON ro.group_id=g.group_id " - "WHERE resource_name='mRNA-euhrin'") - return monad_from_none_or_value( - Nothing, Just, cursor.fetchone()).then( - db_row_to_group) - - -def resource_owner(conn: authdb.DbConnection) -> Maybe: - """Retrieve the resource owner.""" - with authdb.cursor(conn) as cursor: - cursor.execute( - "SELECT u.* FROM users AS u WHERE u.user_id IN " - "(SELECT ur.user_id FROM resources AS rsc " - "INNER JOIN user_roles AS ur ON rsc.resource_id=ur.resource_id " - "INNER JOIN roles AS r on ur.role_id=r.role_id " - "WHERE resource_name='mRNA-euhrin' " - "AND r.role_name='resource-owner')") - return monad_from_none_or_value( - Nothing, Just, cursor.fetchone()).then( - User.from_sqlite3_row) - - -def assign_data(authconn: authdb.DbConnection, bioconn, group: Group): - """Do actual data assignments.""" - try: - for resource in default_resources(authconn, group): - assign_data_to_resource(authconn, bioconn, resource, group) - - return 1 - except Exception as _exc:# pylint: disable=[broad-except] - logging.error("Failed to assign some data!", exc_info=True) - return 1 - - -if __name__ == "__main__": - @click.command() - @click.argument("authdbpath") # "Path to the Auth(entic|oris)ation database" - @click.argument("mysqldburi") # "URI to the MySQL database with the biology data" - @click.option("--loglevel", - default="WARNING", - show_default=True, - type=click.Choice([ - "CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"])) - def run(authdbpath, mysqldburi, loglevel): - """Script entry point.""" - _logger = logging.getLogger() - _logger.setLevel(loglevel) - if Path(authdbpath).exists(): - with (authdb.connection(authdbpath) as authconn, - biodb.database_connection(mysqldburi) as bioconn): - return resources_group(authconn).maybe( - 1, - lambda group: assign_data(authconn, bioconn, group)) - - logging.error("There is no such SQLite3 database file.") - return 1 - - sys.exit(run()) # pylint: disable=[no-value-for-parameter] diff --git a/scripts/link_inbredsets.py b/scripts/link_inbredsets.py deleted file mode 100644 index c78a050..0000000 --- a/scripts/link_inbredsets.py +++ /dev/null @@ -1,122 +0,0 @@ -""" -Link any unlinked InbredSet groups. -""" -import sys -import uuid -from pathlib import Path - -import click -from gn_libs import mysqldb as biodb - -import gn_auth.auth.db.sqlite3 as authdb - -from scripts.assign_data_to_default_admin import ( - sys_admins, admin_group, select_sys_admin) - -def linked_inbredsets(conn): - """Fetch all inbredset groups that are linked to the auth system.""" - with authdb.cursor(conn) as cursor: - cursor.execute( - "SELECT SpeciesId, InbredSetId FROM linked_inbredset_groups") - return tuple((row["SpeciesId"], row["InbredSetId"]) - for row in cursor.fetchall()) - -def unlinked_inbredsets(conn, linked): - """Fetch any inbredset groups that are not linked to the auth system.""" - with conn.cursor() as cursor: - where_clause = "" - query = "SELECT SpeciesId, InbredSetId, InbredSetName, FullName FROM InbredSet" - if len(linked) > 0: - pholders = ["(%s, %s)"] * len(linked) - where_clause = (f" WHERE (SpeciesId, InbredSetId) " - f"NOT IN ({pholders})") - cursor.execute(query + where_clause, - tuple(arg for sublist in linked for arg in sublist)) - return cursor.fetchall() - - cursor.execute(query) - return cursor.fetchall() - -def link_unlinked(conn, unlinked): - """Link the unlinked inbredset groups to the auth system.""" - params = tuple((str(uuid.uuid4()),) + row for row in unlinked) - with authdb.cursor(conn) as cursor: - cursor.executemany( - "INSERT INTO linked_inbredset_groups VALUES (?, ?, ?, ?, ?)", - params) - - return params - -def build_resources(conn, new_linked): - """Build resources for newly linked inbredsets.""" - with authdb.cursor(conn) as cursor: - cursor.execute( - "SELECT resource_category_id FROM resource_categories " - "WHERE resource_category_key='inbredset-group'") - category_id = cursor.fetchone()["resource_category_id"] - resources = tuple({ - "resource_id": str(uuid.uuid4()), - "resource_name": f"InbredSet: {name}", - "resource_category_id": category_id, - "public": 1, - "data_link_id": datalinkid - } for datalinkid, _sid, _isetid, name, _name in new_linked) - cursor.executemany( - "INSERT INTO resources VALUES " - "(:resource_id, :resource_name, :resource_category_id, :public)", - resources) - cursor.executemany( - "INSERT INTO inbredset_group_resources VALUES " - "(:resource_id, :data_link_id)", - resources) - return resources - -def own_resources(conn, group, resources): - """Link new resources to admin group.""" - with authdb.cursor(conn) as cursor: - params = tuple({ - "group_id": str(group.group_id), - **resource - } for resource in resources) - cursor.executemany( - "INSERT INTO resource_ownership VALUES " - "(:group_id, :resource_id)", - params) - return params - -def assign_role_for_admin(conn, user, resources): - """Assign basic role to admin on the inbredset-group resources.""" - with authdb.cursor(conn) as cursor: - cursor.execute( - "SELECT * FROM roles WHERE role_name='inbredset-group-owner'") - role_id = cursor.fetchone()["role_id"] - cursor.executemany( - "INSERT INTO user_roles(user_id, role_id, resource_id) " - "VALUES (:user_id, :role_id, :resource_id)", - tuple({**rsc, "user_id": str(user.user_id), "role_id": role_id} - for rsc in resources)) - -@click.command() -@click.argument("authdbpath") # "Path to the Auth(entic|oris)ation database" -@click.argument("mysqldburi") # "URI to the MySQL database with the biology data" -def run(authdbpath, mysqldburi): - """Setup command-line arguments.""" - if not Path(authdbpath).exists(): - print( - f"ERROR: Auth db file `{authdbpath}` does not exist.", - file=sys.stderr) - sys.exit(2) - - with (authdb.connection(authdbpath) as authconn, - biodb.database_connection(mysqldburi) as bioconn): - admin = select_sys_admin(sys_admins(authconn)) - assign_role_for_admin(authconn, admin, own_resources( - authconn, - admin_group(authconn, admin), - build_resources( - authconn, link_unlinked( - authconn, - unlinked_inbredsets(bioconn, linked_inbredsets(authconn)))))) - -if __name__ == "__main__": - run() # pylint: disable=[no-value-for-parameter] diff --git a/scripts/register_sys_admin.py b/scripts/register_sys_admin.py deleted file mode 100644 index 06aa845..0000000 --- a/scripts/register_sys_admin.py +++ /dev/null @@ -1,68 +0,0 @@ -"""Script to register and mark a user account as sysadmin.""" -import sys -import getpass -from pathlib import Path - -import click -from email_validator import validate_email, EmailNotValidError - -from gn_auth.auth.db import sqlite3 as db -from gn_auth.auth.authorisation.users.admin.models import make_sys_admin -from gn_auth.auth.authentication.users import save_user, set_user_password - -def fetch_email() -> str: - """Prompt user for email.""" - while True: - try: - user_input = input("Enter the administrator's email: ") - email = validate_email(user_input.strip(), check_deliverability=True) - return email["email"] # type: ignore - except EmailNotValidError as _enve: - print("You did not provide a valid email address. Try again...", - file=sys.stderr) - -def fetch_password() -> str: - """Prompt user for password.""" - while True: - passwd = getpass.getpass(prompt="Enter password: ").strip() - passwd2 = getpass.getpass(prompt="Confirm password: ").strip() - if passwd != "" and passwd == passwd2: - return passwd - if passwd == "": - print("Empty password not accepted", file=sys.stderr) - continue - if passwd != passwd2: - print("Passwords *MUST* match", file=sys.stderr) - continue - -def fetch_name() -> str: - """Prompt user for name""" - while True: - name = input("Enter the user's name: ").strip() - if name == "": - print("Invalid name.") - continue - return name - -def save_admin(conn: db.DbConnection, name: str, email: str, passwd: str): - """Save the details to the database and assign the new user as admin.""" - with db.cursor(conn) as cursor: - usr, _hpasswd = set_user_password( - cursor, save_user(cursor, email, name), passwd) - make_sys_admin(cursor, usr) - return 0 - -def register_admin(authdbpath: Path): - """Register a user as a system admin.""" - assert authdbpath.exists(), "Could not find database file." - with db.connection(str(authdbpath)) as conn: - return save_admin(conn, fetch_name(), fetch_email(), fetch_password()) - -if __name__ == "__main__": - @click.command() - @click.argument("authdbpath") - def run(authdbpath): - """Entry-point for when script is run directly""" - return register_admin(Path(authdbpath).absolute()) - - run()# pylint: disable=[no-value-for-parameter] diff --git a/scripts/search_phenotypes.py b/scripts/search_phenotypes.py deleted file mode 100644 index eee112d..0000000 --- a/scripts/search_phenotypes.py +++ /dev/null @@ -1,125 +0,0 @@ -""" -A script to do search for phenotype traits using the Xapian Search endpoint. -""" -import uuid -import json -import traceback -from urllib.parse import urljoin -from typing import Any, Iterable -from datetime import datetime, timedelta - -import click -import redis -import requests -from gn_libs import mysqldb as gn3db - -from gn_auth import jobs -from gn_auth.auth.db import sqlite3 as authdb -from gn_auth.settings import SQL_URI, AUTH_DB -from gn_auth.auth.authorisation.data.phenotypes import linked_phenotype_data - -class NoSearchResults(Exception): - """Raise when there are no results for a search.""" - -def do_search( - host: str, query: str, per_page: int, page: int = 1) -> Iterable[dict[str, Any]]: - """Do the search and return the results""" - search_uri = urljoin(host, (f"search/?page={page}&per_page={per_page}" - f"&type=phenotype&query={query}")) - response = requests.get(search_uri, timeout=300) - results = response.json() - if len(results) > 0: - return (item for item in results) - raise NoSearchResults(f"No results for search '{query}'") - -def __filter_object__(search_item): - return (search_item["species"], search_item["group"], - search_item["dataset"], search_item["name"]) - -def remove_selected(search_results, selected: tuple): - """Remove any item that the user has selected.""" - return (item for item in search_results if __filter_object__(item) not in selected) - -def remove_linked(search_results, linked: tuple): - """Remove any item that has been already linked to a user group.""" - return (item for item in search_results if __filter_object__(item) not in linked) - -def update_status(redisconn: redis.Redis, redisname, status: str): - """Update the status of the search.""" - redisconn.hset(redisname, "status", json.dumps(status)) - -def update_search_results(redisconn: redis.Redis, redisname: str, - results: tuple[dict[str, Any], ...]): - """Save the results to redis db.""" - key = "search_results" - prev_results = tuple(json.loads(redisconn.hget(redisname, key) or "[]")) # type: ignore - redisconn.hset(redisname, key, json.dumps(prev_results + results)) - -def expire_redis_results(redisconn: redis.Redis, redisname: str): - """Expire the results after a while to ensure they are cleaned up.""" - redisconn.expireat(redisname, datetime.now() + timedelta(minutes=30)) - -@click.command() -@click.argument("species") -@click.argument("query") -@click.argument("job-id", type=click.UUID) -@click.option( - "--host", default="http://localhost:8080/api/", help="The URI to GN3.") -@click.option("--per-page", default=10000, help="Number of results per page.") -@click.option("--selected", default="[]", help="Selected traits.") -@click.option( - "--auth-db-uri", default=AUTH_DB, help="The SQL URI to the auth database.") -@click.option( - "--gn3-db-uri", default=SQL_URI, - help="The SQL URI to the main GN3 database.") -@click.option( - "--redis-uri", default="redis://:@localhost:6379/0", - help="The URI to the redis server.") -def search(# pylint: disable=[too-many-arguments, too-many-positional-arguments, too-many-locals] - species: str, query: str, job_id: uuid.UUID, host: str, per_page: int, - selected: str, auth_db_uri: str, gn3_db_uri: str, redis_uri: str): - """ - Search for phenotype traits, filtering out any linked and selected traits, - loading more and more pages until the `per_page` quota is fulfilled or the - search runs out of pages. - """ - redisname = jobs.job_key(job_id) - with (authdb.connection(auth_db_uri) as authconn, - gn3db.database_connection(gn3_db_uri) as gn3conn, - redis.Redis.from_url(redis_uri, decode_responses=True) as redisconn): - update_status(redisconn, redisname, "started") - update_search_results(redisconn, redisname, tuple()) # init search results - try: - search_query = f"species:{species}" + ( - f" AND ({query})" if bool(query) else "") - selected_traits = tuple( - (item["species"], item["group"], item["dataset"], item["name"]) - for item in json.loads(selected)) - linked = tuple( - (row["SpeciesName"], row["InbredSetName"], row["dataset_name"], - str(row["PublishXRefId"])) - for row in linked_phenotype_data(authconn, gn3conn, species)) - page = 1 - count = 0 - while count < per_page: - results = tuple(remove_linked( - remove_selected( - do_search(host, search_query, per_page, page), - selected_traits), - linked))[0:per_page-count] - count = count + len(results) - page = page + 1 - update_search_results(redisconn, redisname, results) - except NoSearchResults as _nsr: - pass - except Exception as _exc: # pylint: disable=[broad-except] - update_status(redisconn, redisname, "failed") - redisconn.hset(redisname, "exception", json.dumps(traceback.format_exc())) - expire_redis_results(redisconn, redisname) - return 1 - update_status(redisconn, redisname, "completed") - expire_redis_results(redisconn, redisname) - return 0 - -if __name__ == "__main__": - search() # pylint: disable=[no-value-for-parameter] diff --git a/scripts/worker.py b/scripts/worker.py deleted file mode 100644 index 0a77d41..0000000 --- a/scripts/worker.py +++ /dev/null @@ -1,83 +0,0 @@ -"""Daemon that processes commands""" -import os -import sys -import time -import argparse - -import redis -import redis.connection - -from gn_auth.commands import run_cmd - -# Enable importing from one dir up: put as first to override any other globally -# accessible GN3 -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) - -def update_status(conn, cmd_id, status): - """Helper to update command status""" - conn.hset(name=f"{cmd_id}", key="status", value=f"{status}") - -def make_incremental_backoff(init_val: float=0.1, maximum: int=420): - """ - Returns a closure that can be used to increment the returned value up to - `maximum` or reset it to `init_val`. - """ - current = init_val - - def __increment_or_reset__(command: str, value: float=0.1): - nonlocal current - if command == "reset": - current = init_val - return current - - if command == "increment": - current = min(current + abs(value), maximum) - return current - - return current - - return __increment_or_reset__ - -def run_jobs(conn, queue_name: str): - """Process the redis using a redis connection, CONN""" - # pylint: disable=E0401, C0415 - cmd_id = (conn.lpop(queue_name) or b'').decode("utf-8") - if bool(cmd_id): - cmd = conn.hget(name=cmd_id, key="cmd") - if cmd and (conn.hget(cmd_id, "status") == b"queued"): - update_status(conn, cmd_id, "running") - result = run_cmd( - cmd.decode("utf-8"), env=conn.hget(name=cmd_id, key="env")) - conn.hset(name=cmd_id, key="result", value=result.get("output")) - if result.get("code") == 0: # Success - update_status(conn, cmd_id, "success") - else: - update_status(conn, cmd_id, "error") - conn.hset(cmd_id, "stderr", result.get("output")) - return cmd_id - return None - -def parse_cli_arguments(): - """Parse the command-line arguments.""" - parser = argparse.ArgumentParser( - description="Run asynchronous (service) commands.") - parser.add_argument("queue_name", help="Queue to check in redis") - parser.add_argument( - "--daemon", default=False, action="store_true", - help=( - "Run process as a daemon instead of the default 'one-shot' " - "process")) - return parser.parse_args() - -if __name__ == "__main__": - args = parse_cli_arguments() - with redis.Redis() as redis_conn: - if not args.daemon: - run_jobs(redis_conn, args.queue_name) - else: - sleep_time = make_incremental_backoff() - while True: # Daemon that keeps running forever: - if run_jobs(redis_conn, args.queue_name): - time.sleep(sleep_time("reset")) - continue - time.sleep(sleep_time("increment", sleep_time("return_current"))) -- cgit 1.4.1