diff options
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/assign_data_to_default_admin.py (renamed from scripts/migrate_existing_data.py) | 79 | ||||
-rw-r--r-- | scripts/batch_assign_data_to_default_admin.py | 87 | ||||
-rw-r--r-- | scripts/link_inbredsets.py | 6 | ||||
-rw-r--r-- | scripts/register_sys_admin.py | 2 | ||||
-rw-r--r-- | scripts/search_phenotypes.py | 8 |
5 files changed, 149 insertions, 33 deletions
diff --git a/scripts/migrate_existing_data.py b/scripts/assign_data_to_default_admin.py index 336ce72..69fc50c 100644 --- a/scripts/migrate_existing_data.py +++ b/scripts/assign_data_to_default_admin.py @@ -1,19 +1,19 @@ """ -Migrate existing data that is not assigned to any group to the default sys-admin -group for accessibility purposes. +Assign any existing data (that is not currently assigned to any group) to the +default sys-admin group for accessibility purposes. """ import sys import json import time import random +import logging from pathlib import Path from uuid import UUID, uuid4 import click +from gn_libs import mysqldb as biodb from MySQLdb.cursors import DictCursor -from gn_auth.auth.db import mariadb as biodb - import gn_auth.auth.db.sqlite3 as authdb from gn_auth.auth.authentication.users import User from gn_auth.auth.authorisation.roles.models import ( @@ -21,12 +21,14 @@ from gn_auth.auth.authorisation.roles.models import ( from gn_auth.auth.authorisation.resources.groups.models import ( Group, save_group, add_resources_to_group) -from gn_auth.auth.authorisation.resources.models import ( - Resource, ResourceCategory, __assign_resource_owner_role__) +from gn_auth.auth.authorisation.resources.common import assign_resource_owner_role +from gn_auth.auth.authorisation.resources.models import Resource, ResourceCategory + class DataNotFound(Exception): """Raise if no admin user exists.""" + def sys_admins(conn: authdb.DbConnection) -> tuple[User, ...]: """Retrieve all the existing system admins.""" with authdb.cursor(conn) as cursor: @@ -38,6 +40,7 @@ def sys_admins(conn: authdb.DbConnection) -> tuple[User, ...]: return tuple(User.from_sqlite3_row(row) for row in cursor.fetchall()) return tuple() + def choose_admin(enum_admins: dict[int, User]) -> int: """Prompt and read user choice.""" while True: @@ -54,6 +57,7 @@ def choose_admin(enum_admins: dict[int, User]) -> int: sys.exit(0) print(f"\nERROR: Invalid choice '{choice}'!") + def select_sys_admin(admins: tuple[User, ...]) -> User: """Pick one admin out of list.""" if len(admins) > 0: @@ -67,6 +71,7 @@ def select_sys_admin(admins: tuple[User, ...]) -> User: raise DataNotFound( "No administrator user found. Create an administrator user first.") + def admin_group(conn: authdb.DbConnection, admin: User) -> Group: """Retrieve the admin's user group. If none exist, create one.""" with authdb.cursor(conn) as cursor: @@ -114,6 +119,7 @@ def admin_group(conn: authdb.DbConnection, admin: User) -> Group: cursor, admin, UUID(grp_res["resource_id"]), "group-leader") return new_group + def __resource_category_by_key__( cursor: authdb.DbCursor, category_key: str) -> ResourceCategory: """Retrieve a resource category by its ID.""" @@ -128,6 +134,7 @@ def __resource_category_by_key__( row["resource_category_key"], row["resource_category_description"]) + def __create_resources__(cursor: authdb.DbCursor) -> tuple[Resource, ...]: """Create default resources.""" resources = tuple(Resource( @@ -147,6 +154,7 @@ def __create_resources__(cursor: authdb.DbCursor) -> tuple[Resource, ...]: } for res in resources)) return resources + def default_resources(conn: authdb.DbConnection, group: Group) -> tuple[ Resource, ...]: """Create default resources, or return them if they exist.""" @@ -175,10 +183,12 @@ def default_resources(conn: authdb.DbConnection, group: Group) -> tuple[ tuple() ) for row in rows) + def delay(): """Delay a while: anything from 2 seconds to 15 seconds.""" time.sleep(random.choice(range(2,16))) + def __assigned_mrna__(authconn): """Retrieve assigned mRNA items.""" with authdb.cursor(authconn) as cursor: @@ -189,6 +199,7 @@ def __assigned_mrna__(authconn): (row["SpeciesId"], row["InbredSetId"], row["ProbeFreezeId"], row["ProbeSetFreezeId"]) for row in cursor.fetchall()) + def __unassigned_mrna__(bioconn, assigned): """Retrieve unassigned mRNA data items.""" query = ( @@ -198,11 +209,12 @@ def __unassigned_mrna__(bioconn, assigned): "FROM Species AS s INNER JOIN InbredSet AS iset " "ON s.SpeciesId=iset.SpeciesId INNER JOIN ProbeFreeze AS pf " "ON iset.InbredSetId=pf.InbredSetId INNER JOIN ProbeSetFreeze AS psf " - "ON pf.ProbeFreezeId=psf.ProbeFreezeId ") + "ON pf.ProbeFreezeId=psf.ProbeFreezeId " + "WHERE s.Name != 'human' ") if len(assigned) > 0: paramstr = ", ".join(["(%s, %s, %s, %s)"] * len(assigned)) query = query + ( - "WHERE (s.SpeciesId, iset.InbredSetId, pf.ProbeFreezeId, psf.Id) " + "AND (s.SpeciesId, iset.InbredSetId, pf.ProbeFreezeId, psf.Id) " f"NOT IN ({paramstr}) ") query = query + "LIMIT 100000" @@ -210,6 +222,7 @@ def __unassigned_mrna__(bioconn, assigned): cursor.execute(query, tuple(item for row in assigned for item in row)) return (row for row in cursor.fetchall()) + def __assign_mrna__(authconn, bioconn, resource, group): "Assign any unassigned mRNA data to resource." while True: @@ -238,6 +251,7 @@ def __assign_mrna__(authconn, bioconn, resource, group): print(f"-> mRNA: Linked {len(unassigned)}") delay() + def __assigned_geno__(authconn): """Retrieve assigned genotype data.""" with authdb.cursor(authconn) as cursor: @@ -256,11 +270,12 @@ def __unassigned_geno__(bioconn, assigned): "gf.ShortName AS dataset_shortname " "FROM Species AS s INNER JOIN InbredSet AS iset " "ON s.SpeciesId=iset.SpeciesId INNER JOIN GenoFreeze AS gf " - "ON iset.InbredSetId=gf.InbredSetId ") + "ON iset.InbredSetId=gf.InbredSetId " + "WHERE s.Name != 'human' ") if len(assigned) > 0: paramstr = ", ".join(["(%s, %s, %s)"] * len(assigned)) query = query + ( - "WHERE (s.SpeciesId, iset.InbredSetId, gf.Id) " + "AND (s.SpeciesId, iset.InbredSetId, gf.Id) " f"NOT IN ({paramstr}) ") query = query + "LIMIT 100000" @@ -268,6 +283,7 @@ def __unassigned_geno__(bioconn, assigned): cursor.execute(query, tuple(item for row in assigned for item in row)) return (row for row in cursor.fetchall()) + def __assign_geno__(authconn, bioconn, resource, group): "Assign any unassigned Genotype data to resource." while True: @@ -296,6 +312,7 @@ def __assign_geno__(authconn, bioconn, resource, group): print(f"-> Genotype: Linked {len(unassigned)}") delay() + def __assigned_pheno__(authconn): """Retrieve assigned phenotype data.""" with authdb.cursor(authconn) as cursor: @@ -306,25 +323,27 @@ def __assigned_pheno__(authconn): row["SpeciesId"], row["InbredSetId"], row["PublishFreezeId"], row["PublishXRefId"]) for row in cursor.fetchall()) + def __unassigned_pheno__(bioconn, assigned): """Retrieve all unassigned Phenotype data.""" query = ( - "SELECT spc.SpeciesId, iset.InbredSetId, " - "pf.Id AS PublishFreezeId, pf.Name AS dataset_name, " - "pf.FullName AS dataset_fullname, " - "pf.ShortName AS dataset_shortname, pxr.Id AS PublishXRefId " - "FROM " - "Species AS spc " - "INNER JOIN InbredSet AS iset " - "ON spc.SpeciesId=iset.SpeciesId " - "INNER JOIN PublishFreeze AS pf " - "ON iset.InbredSetId=pf.InbredSetId " - "INNER JOIN PublishXRef AS pxr " - "ON pf.InbredSetId=pxr.InbredSetId ") + "SELECT spc.SpeciesId, iset.InbredSetId, " + "pf.Id AS PublishFreezeId, pf.Name AS dataset_name, " + "pf.FullName AS dataset_fullname, " + "pf.ShortName AS dataset_shortname, pxr.Id AS PublishXRefId " + "FROM " + "Species AS spc " + "INNER JOIN InbredSet AS iset " + "ON spc.SpeciesId=iset.SpeciesId " + "INNER JOIN PublishFreeze AS pf " + "ON iset.InbredSetId=pf.InbredSetId " + "INNER JOIN PublishXRef AS pxr " + "ON pf.InbredSetId=pxr.InbredSetId " + "WHERE spc.Name != 'human' ") if len(assigned) > 0: paramstr = ", ".join(["(%s, %s, %s, %s)"] * len(assigned)) query = query + ( - "WHERE (spc.SpeciesId, iset.InbredSetId, pf.Id, pxr.Id) " + "AND (spc.SpeciesId, iset.InbredSetId, pf.Id, pxr.Id) " f"NOT IN ({paramstr}) ") query = query + "LIMIT 100000" @@ -332,6 +351,7 @@ def __unassigned_pheno__(bioconn, assigned): cursor.execute(query, tuple(item for row in assigned for item in row)) return (row for row in cursor.fetchall()) + def __assign_pheno__(authconn, bioconn, resource, group): """Assign any unassigned Phenotype data to resource.""" while True: @@ -360,6 +380,7 @@ def __assign_pheno__(authconn, bioconn, resource, group): print(f"-> Phenotype: Linked {len(unassigned)}") delay() + def assign_data_to_resource( authconn, bioconn, resource: Resource, group: Group): """Assign existing data, not linked to any group to the resource.""" @@ -371,6 +392,7 @@ def assign_data_to_resource( return assigner_fns[resource.resource_category.resource_category_key]( authconn, bioconn, resource, group) + def entry(authdbpath, mysqldburi): """Entry-point for data migration.""" if not Path(authdbpath).exists(): @@ -389,17 +411,24 @@ def entry(authdbpath, mysqldburi): assign_data_to_resource( authconn, bioconn, resource, the_admin_group) with authdb.cursor(authconn) as cursor: - __assign_resource_owner_role__(cursor, resource, admin) + assign_resource_owner_role( + cursor, resource.resource_id, admin.user_id) except DataNotFound as dnf: print(dnf.args[0], file=sys.stderr) sys.exit(1) + @click.command() @click.argument("authdbpath") # "Path to the Auth(entic|oris)ation database" @click.argument("mysqldburi") # "URI to the MySQL database with the biology data" -def run(authdbpath, mysqldburi): +@click.option("--loglevel", default="WARNING", show_default=True, + type=click.Choice(["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"])) +def run(authdbpath, mysqldburi, loglevel): """Setup command-line arguments.""" + globallogger = logging.getLogger() + globallogger.setLevel(loglevel) entry(authdbpath, mysqldburi) + if __name__ == "__main__": run() # pylint: disable=[no-value-for-parameter] diff --git a/scripts/batch_assign_data_to_default_admin.py b/scripts/batch_assign_data_to_default_admin.py new file mode 100644 index 0000000..a468019 --- /dev/null +++ b/scripts/batch_assign_data_to_default_admin.py @@ -0,0 +1,87 @@ +""" +Similar to the 'assign_data_to_default_admin' script but without user +interaction. +""" +import sys +import logging +from pathlib import Path + +import click +from gn_libs import mysqldb as biodb +from pymonad.maybe import Just, Maybe, Nothing +from pymonad.tools import monad_from_none_or_value + +from gn_auth.auth.db import sqlite3 as authdb +from gn_auth.auth.authentication.users import User +from gn_auth.auth.authorisation.resources.groups.models import ( + Group, db_row_to_group) + +from scripts.assign_data_to_default_admin import ( + default_resources, assign_data_to_resource) + + +def resources_group(conn: authdb.DbConnection) -> Maybe: + """Retrieve resources' group""" + with authdb.cursor(conn) as cursor: + cursor.execute( + "SELECT g.* FROM resources AS r " + "INNER JOIN resource_ownership AS ro " + "ON r.resource_id=ro.resource_id " + "INNER JOIN groups AS g ON ro.group_id=g.group_id " + "WHERE resource_name='mRNA-euhrin'") + return monad_from_none_or_value( + Nothing, Just, cursor.fetchone()).then( + db_row_to_group) + + +def resource_owner(conn: authdb.DbConnection) -> Maybe: + """Retrieve the resource owner.""" + with authdb.cursor(conn) as cursor: + cursor.execute( + "SELECT u.* FROM users AS u WHERE u.user_id IN " + "(SELECT ur.user_id FROM resources AS rsc " + "INNER JOIN user_roles AS ur ON rsc.resource_id=ur.resource_id " + "INNER JOIN roles AS r on ur.role_id=r.role_id " + "WHERE resource_name='mRNA-euhrin' " + "AND r.role_name='resource-owner')") + return monad_from_none_or_value( + Nothing, Just, cursor.fetchone()).then( + User.from_sqlite3_row) + + +def assign_data(authconn: authdb.DbConnection, bioconn, group: Group): + """Do actual data assignments.""" + try: + for resource in default_resources(authconn, group): + assign_data_to_resource(authconn, bioconn, resource, group) + + return 1 + except Exception as _exc:# pylint: disable=[broad-except] + logging.error("Failed to assign some data!", exc_info=True) + return 1 + + +if __name__ == "__main__": + @click.command() + @click.argument("authdbpath") # "Path to the Auth(entic|oris)ation database" + @click.argument("mysqldburi") # "URI to the MySQL database with the biology data" + @click.option("--loglevel", + default="WARNING", + show_default=True, + type=click.Choice([ + "CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"])) + def run(authdbpath, mysqldburi, loglevel): + """Script entry point.""" + _logger = logging.getLogger() + _logger.setLevel(loglevel) + if Path(authdbpath).exists(): + with (authdb.connection(authdbpath) as authconn, + biodb.database_connection(mysqldburi) as bioconn): + return resources_group(authconn).maybe( + 1, + lambda group: assign_data(authconn, bioconn, group)) + + logging.error("There is no such SQLite3 database file.") + return 1 + + sys.exit(run()) # pylint: disable=[no-value-for-parameter] diff --git a/scripts/link_inbredsets.py b/scripts/link_inbredsets.py index ac9fa2b..c78a050 100644 --- a/scripts/link_inbredsets.py +++ b/scripts/link_inbredsets.py @@ -6,12 +6,12 @@ import uuid from pathlib import Path import click +from gn_libs import mysqldb as biodb import gn_auth.auth.db.sqlite3 as authdb -from gn_auth.auth.db import mariadb as biodb - -from scripts.migrate_existing_data import sys_admins, admin_group, select_sys_admin +from scripts.assign_data_to_default_admin import ( + sys_admins, admin_group, select_sys_admin) def linked_inbredsets(conn): """Fetch all inbredset groups that are linked to the auth system.""" diff --git a/scripts/register_sys_admin.py b/scripts/register_sys_admin.py index dfd4d59..06aa845 100644 --- a/scripts/register_sys_admin.py +++ b/scripts/register_sys_admin.py @@ -16,7 +16,7 @@ def fetch_email() -> str: try: user_input = input("Enter the administrator's email: ") email = validate_email(user_input.strip(), check_deliverability=True) - return email["email"] + return email["email"] # type: ignore except EmailNotValidError as _enve: print("You did not provide a valid email address. Try again...", file=sys.stderr) diff --git a/scripts/search_phenotypes.py b/scripts/search_phenotypes.py index 20d91c9..eee112d 100644 --- a/scripts/search_phenotypes.py +++ b/scripts/search_phenotypes.py @@ -11,9 +11,9 @@ from datetime import datetime, timedelta import click import redis import requests +from gn_libs import mysqldb as gn3db from gn_auth import jobs -from gn_auth.auth.db import mariadb as gn3db from gn_auth.auth.db import sqlite3 as authdb from gn_auth.settings import SQL_URI, AUTH_DB from gn_auth.auth.authorisation.data.phenotypes import linked_phenotype_data @@ -26,7 +26,7 @@ def do_search( """Do the search and return the results""" search_uri = urljoin(host, (f"search/?page={page}&per_page={per_page}" f"&type=phenotype&query={query}")) - response = requests.get(search_uri) + response = requests.get(search_uri, timeout=300) results = response.json() if len(results) > 0: return (item for item in results) @@ -52,7 +52,7 @@ def update_search_results(redisconn: redis.Redis, redisname: str, results: tuple[dict[str, Any], ...]): """Save the results to redis db.""" key = "search_results" - prev_results = tuple(json.loads(redisconn.hget(redisname, key) or "[]")) + prev_results = tuple(json.loads(redisconn.hget(redisname, key) or "[]")) # type: ignore redisconn.hset(redisname, key, json.dumps(prev_results + results)) def expire_redis_results(redisconn: redis.Redis, redisname: str): @@ -75,7 +75,7 @@ def expire_redis_results(redisconn: redis.Redis, redisname: str): @click.option( "--redis-uri", default="redis://:@localhost:6379/0", help="The URI to the redis server.") -def search(# pylint: disable=[too-many-arguments, too-many-locals] +def search(# pylint: disable=[too-many-arguments, too-many-positional-arguments, too-many-locals] species: str, query: str, job_id: uuid.UUID, host: str, per_page: int, selected: str, auth_db_uri: str, gn3_db_uri: str, redis_uri: str): """ |