about summary refs log tree commit diff
path: root/gn_auth/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'gn_auth/scripts')
-rw-r--r--gn_auth/scripts/__init__.py1
-rw-r--r--gn_auth/scripts/assign_data_to_default_admin.py434
-rw-r--r--gn_auth/scripts/batch_assign_data_to_default_admin.py86
-rw-r--r--gn_auth/scripts/link_inbredsets.py122
-rw-r--r--gn_auth/scripts/register_sys_admin.py68
-rw-r--r--gn_auth/scripts/search_phenotypes.py125
-rw-r--r--gn_auth/scripts/worker.py83
7 files changed, 919 insertions, 0 deletions
diff --git a/gn_auth/scripts/__init__.py b/gn_auth/scripts/__init__.py
new file mode 100644
index 0000000..5be56d8
--- /dev/null
+++ b/gn_auth/scripts/__init__.py
@@ -0,0 +1 @@
+"""These are command-line scripts to be run manually or in the background."""
diff --git a/gn_auth/scripts/assign_data_to_default_admin.py b/gn_auth/scripts/assign_data_to_default_admin.py
new file mode 100644
index 0000000..69fc50c
--- /dev/null
+++ b/gn_auth/scripts/assign_data_to_default_admin.py
@@ -0,0 +1,434 @@
+"""
+Assign any existing data (that is not currently assigned to any group) to the
+default sys-admin group for accessibility purposes.
+"""
+import sys
+import json
+import time
+import random
+import logging
+from pathlib import Path
+from uuid import UUID, uuid4
+
+import click
+from gn_libs import mysqldb as biodb
+from MySQLdb.cursors import DictCursor
+
+import gn_auth.auth.db.sqlite3 as authdb
+from gn_auth.auth.authentication.users import User
+from gn_auth.auth.authorisation.roles.models import (
+    revoke_user_role_by_name, assign_user_role_by_name)
+
+from gn_auth.auth.authorisation.resources.groups.models import (
+    Group, save_group, add_resources_to_group)
+from gn_auth.auth.authorisation.resources.common import assign_resource_owner_role
+from gn_auth.auth.authorisation.resources.models import Resource, ResourceCategory
+
+
+class DataNotFound(Exception):
+    """Raise if no admin user exists."""
+
+
+def sys_admins(conn: authdb.DbConnection) -> tuple[User, ...]:
+    """Retrieve all the existing system admins."""
+    with authdb.cursor(conn) as cursor:
+        cursor.execute(
+            "SELECT u.* FROM users AS u "
+            "INNER JOIN user_roles AS ur ON u.user_id=ur.user_id "
+            "INNER JOIN roles AS r ON ur.role_id=r.role_id "
+            "WHERE r.role_name='system-administrator'")
+        return tuple(User.from_sqlite3_row(row) for row in cursor.fetchall())
+    return tuple()
+
+
+def choose_admin(enum_admins: dict[int, User]) -> int:
+    """Prompt and read user choice."""
+    while True:
+        try:
+            print("\n===========================\n")
+            print("We found the following system administrators:")
+            for idx, admin in enum_admins.items():
+                print(f"\t{idx}: {admin.name} ({admin.email})")
+            choice = input(f"Choose [1 .. {len(enum_admins)}]: ")
+            return int(choice)
+        except ValueError as _verr:
+            if choice.lower() == "quit":
+                print("Goodbye!")
+                sys.exit(0)
+            print(f"\nERROR: Invalid choice '{choice}'!")
+
+
+def select_sys_admin(admins: tuple[User, ...]) -> User:
+    """Pick one admin out of list."""
+    if len(admins) > 0:
+        if len(admins) == 1:
+            print(f"-> Found Admin: {admins[0].name} ({admins[0].email})")
+            return admins[0]
+        enum_admins = dict(enumerate(admins, start=1))
+        chosen = enum_admins[choose_admin(enum_admins)]
+        print(f"-> Chosen Admin: {chosen.name} ({chosen.email})")
+        return chosen
+    raise DataNotFound(
+        "No administrator user found. Create an administrator user first.")
+
+
+def admin_group(conn: authdb.DbConnection, admin: User) -> Group:
+    """Retrieve the admin's user group. If none exist, create one."""
+    with authdb.cursor(conn) as cursor:
+        cursor.execute(
+            "SELECT g.* FROM users AS u "
+            "INNER JOIN group_users AS gu ON u.user_id=gu.user_id "
+            "INNER JOIN groups AS g on gu.group_id=g.group_id "
+            "WHERE u.user_id = ?",
+            (str(admin.user_id),))
+        row = cursor.fetchone()
+        if row:
+            return Group(UUID(row["group_id"]),
+                         row["group_name"],
+                         json.loads(row["group_metadata"]))
+        new_group = save_group(cursor, "AutoAdminGroup", {
+            "group_description": (
+                "Created by script for existing data visibility. "
+                "Existing data was migrated into this group and assigned "
+                "to publicly visible resources according to type.")
+        })
+
+        cursor.execute(
+            "SELECT * FROM resource_categories WHERE "
+            "resource_category_key='group'")
+        res_cat_id = cursor.fetchone()["resource_category_id"]
+        grp_res = {
+            "group_id": str(new_group.group_id),
+            "resource_id": str(uuid4()),
+            "resource_name": new_group.group_name,
+            "resource_category_id": res_cat_id,
+            "public": 0
+        }
+        cursor.execute(
+            "INSERT INTO resources VALUES "
+            "(:resource_id, :resource_name, :resource_category_id, :public)",
+            grp_res)
+        cursor.execute(
+            "INSERT INTO group_resources(resource_id, group_id) "
+            "VALUES(:resource_id, :group_id)",
+            grp_res)
+        cursor.execute("INSERT INTO group_users VALUES (?, ?)",
+                       (str(new_group.group_id), str(admin.user_id)))
+        revoke_user_role_by_name(cursor, admin, "group-creator")
+        assign_user_role_by_name(
+            cursor, admin, UUID(grp_res["resource_id"]), "group-leader")
+        return new_group
+
+
+def __resource_category_by_key__(
+        cursor: authdb.DbCursor, category_key: str) -> ResourceCategory:
+    """Retrieve a resource category by its ID."""
+    cursor.execute(
+        "SELECT * FROM resource_categories WHERE resource_category_key = ?",
+        (category_key,))
+    row = cursor.fetchone()
+    if not bool(row):
+        raise DataNotFound(
+            f"Could not find resource category with key {category_key}")
+    return ResourceCategory(UUID(row["resource_category_id"]),
+                            row["resource_category_key"],
+                            row["resource_category_description"])
+
+
+def __create_resources__(cursor: authdb.DbCursor) -> tuple[Resource, ...]:
+    """Create default resources."""
+    resources = tuple(Resource(
+        uuid4(), name, __resource_category_by_key__(cursor, catkey),
+        True, tuple()
+    ) for name, catkey in (
+        ("mRNA-euhrin", "mrna"),
+        ("pheno-xboecp", "phenotype"),
+        ("geno-welphd", "genotype")))
+    cursor.executemany(
+        "INSERT INTO resources VALUES (:rid, :rname, :rcid, :pub)",
+        tuple({
+            "rid": str(res.resource_id),
+            "rname": res.resource_name,
+            "rcid": str(res.resource_category.resource_category_id),
+            "pub": 1
+        } for res in resources))
+    return resources
+
+
+def default_resources(conn: authdb.DbConnection, group: Group) -> tuple[
+        Resource, ...]:
+    """Create default resources, or return them if they exist."""
+    with authdb.cursor(conn) as cursor:
+        cursor.execute(
+            "SELECT r.resource_id, r.resource_name, r.public, rc.* "
+            "FROM resource_ownership AS ro INNER JOIN resources AS r "
+            "ON ro.resource_id=r.resource_id "
+            "INNER JOIN resource_categories AS rc "
+            "ON r.resource_category_id=rc.resource_category_id "
+            "WHERE ro.group_id=? AND r.resource_name IN "
+            "('mRNA-euhrin', 'pheno-xboecp', 'geno-welphd')",
+            (str(group.group_id),))
+        rows = cursor.fetchall()
+        if len(rows) == 0:
+            return __create_resources__(cursor)
+
+        return tuple(Resource(
+            UUID(row["resource_id"]),
+            row["resource_name"],
+            ResourceCategory(
+                UUID(row["resource_category_id"]),
+                row["resource_category_key"],
+                row["resource_category_description"]),
+            bool(row["public"]),
+            tuple()
+        ) for row in rows)
+
+
+def delay():
+    """Delay a while: anything from 2 seconds to 15 seconds."""
+    time.sleep(random.choice(range(2,16)))
+
+
+def __assigned_mrna__(authconn):
+    """Retrieve assigned mRNA items."""
+    with authdb.cursor(authconn) as cursor:
+        cursor.execute(
+            "SELECT SpeciesId, InbredSetId, ProbeFreezeId, ProbeSetFreezeId "
+            "FROM linked_mrna_data")
+        return tuple(
+            (row["SpeciesId"], row["InbredSetId"], row["ProbeFreezeId"],
+             row["ProbeSetFreezeId"]) for row in cursor.fetchall())
+
+
+def __unassigned_mrna__(bioconn, assigned):
+    """Retrieve unassigned mRNA data items."""
+    query = (
+        "SELECT s.SpeciesId, iset.InbredSetId, pf.ProbeFreezeId, "
+        "psf.Id AS ProbeSetFreezeId, psf.Name AS dataset_name, "
+        "psf.FullName AS dataset_fullname, psf.ShortName AS dataset_shortname "
+        "FROM Species AS s INNER JOIN InbredSet AS iset "
+        "ON s.SpeciesId=iset.SpeciesId INNER JOIN ProbeFreeze AS pf "
+        "ON iset.InbredSetId=pf.InbredSetId INNER JOIN ProbeSetFreeze AS psf "
+        "ON pf.ProbeFreezeId=psf.ProbeFreezeId "
+        "WHERE s.Name != 'human' ")
+    if len(assigned) > 0:
+        paramstr = ", ".join(["(%s, %s, %s, %s)"] * len(assigned))
+        query = query + (
+            "AND (s.SpeciesId, iset.InbredSetId, pf.ProbeFreezeId, psf.Id) "
+            f"NOT IN ({paramstr}) ")
+
+    query = query + "LIMIT 100000"
+    with bioconn.cursor(DictCursor) as cursor:
+        cursor.execute(query, tuple(item for row in assigned for item in row))
+        return (row for row in cursor.fetchall())
+
+
+def __assign_mrna__(authconn, bioconn, resource, group):
+    "Assign any unassigned mRNA data to resource."
+    while True:
+        unassigned = tuple({
+            "data_link_id": str(uuid4()),
+            "group_id": str(group.group_id),
+            "resource_id": str(resource.resource_id),
+            **row
+        } for row in __unassigned_mrna__(
+            bioconn, __assigned_mrna__(authconn)))
+
+        if len(unassigned) <= 0:
+            print("-> mRNA: Completed!")
+            break
+        with authdb.cursor(authconn) as cursor:
+            cursor.executemany(
+                "INSERT INTO linked_mrna_data VALUES "
+                "(:data_link_id, :group_id, :SpeciesId, :InbredSetId, "
+                ":ProbeFreezeId, :ProbeSetFreezeId, :dataset_name, "
+                ":dataset_fullname, :dataset_shortname)",
+                unassigned)
+            cursor.executemany(
+                "INSERT INTO mrna_resources VALUES "
+                "(:resource_id, :data_link_id)",
+                unassigned)
+            print(f"-> mRNA: Linked {len(unassigned)}")
+            delay()
+
+
+def __assigned_geno__(authconn):
+    """Retrieve assigned genotype data."""
+    with authdb.cursor(authconn) as cursor:
+        cursor.execute(
+            "SELECT SpeciesId, InbredSetId, GenoFreezeId "
+            "FROM linked_genotype_data")
+        return tuple((row["SpeciesId"], row["InbredSetId"], row["GenoFreezeId"])
+                     for row in cursor.fetchall())
+
+def __unassigned_geno__(bioconn, assigned):
+    """Fetch unassigned genotype data."""
+    query = (
+        "SELECT s.SpeciesId, iset.InbredSetId, iset.InbredSetName, "
+        "gf.Id AS GenoFreezeId, gf.Name AS dataset_name, "
+        "gf.FullName AS dataset_fullname, "
+        "gf.ShortName AS dataset_shortname "
+        "FROM Species AS s INNER JOIN InbredSet AS iset "
+        "ON s.SpeciesId=iset.SpeciesId INNER JOIN GenoFreeze AS gf "
+        "ON iset.InbredSetId=gf.InbredSetId "
+        "WHERE s.Name != 'human' ")
+    if len(assigned) > 0:
+        paramstr = ", ".join(["(%s, %s, %s)"] * len(assigned))
+        query = query + (
+            "AND (s.SpeciesId, iset.InbredSetId, gf.Id) "
+            f"NOT IN ({paramstr}) ")
+
+    query = query + "LIMIT 100000"
+    with bioconn.cursor(DictCursor) as cursor:
+        cursor.execute(query, tuple(item for row in assigned for item in row))
+        return (row for row in cursor.fetchall())
+
+
+def __assign_geno__(authconn, bioconn, resource, group):
+    "Assign any unassigned Genotype data to resource."
+    while True:
+        unassigned = tuple({
+            "data_link_id": str(uuid4()),
+            "group_id": str(group.group_id),
+            "resource_id": str(resource.resource_id),
+            **row
+        } for row in __unassigned_geno__(
+            bioconn, __assigned_geno__(authconn)))
+
+        if len(unassigned) <= 0:
+            print("-> Genotype: Completed!")
+            break
+        with authdb.cursor(authconn) as cursor:
+            cursor.executemany(
+                "INSERT INTO linked_genotype_data VALUES "
+                "(:data_link_id, :group_id, :SpeciesId, :InbredSetId, "
+                ":GenoFreezeId, :dataset_name, :dataset_fullname, "
+                ":dataset_shortname)",
+                unassigned)
+            cursor.executemany(
+                "INSERT INTO genotype_resources VALUES "
+                "(:resource_id, :data_link_id)",
+                unassigned)
+            print(f"-> Genotype: Linked {len(unassigned)}")
+            delay()
+
+
+def __assigned_pheno__(authconn):
+    """Retrieve assigned phenotype data."""
+    with authdb.cursor(authconn) as cursor:
+        cursor.execute(
+            "SELECT SpeciesId, InbredSetId, PublishFreezeId, PublishXRefId "
+            "FROM linked_phenotype_data")
+        return tuple((
+            row["SpeciesId"], row["InbredSetId"], row["PublishFreezeId"],
+            row["PublishXRefId"]) for row in cursor.fetchall())
+
+
+def __unassigned_pheno__(bioconn, assigned):
+    """Retrieve all unassigned Phenotype data."""
+    query = (
+        "SELECT spc.SpeciesId, iset.InbredSetId, "
+        "pf.Id AS PublishFreezeId, pf.Name AS dataset_name, "
+        "pf.FullName AS dataset_fullname, "
+        "pf.ShortName AS dataset_shortname, pxr.Id AS PublishXRefId "
+        "FROM "
+        "Species AS spc "
+        "INNER JOIN InbredSet AS iset "
+        "ON spc.SpeciesId=iset.SpeciesId "
+        "INNER JOIN PublishFreeze AS pf "
+        "ON iset.InbredSetId=pf.InbredSetId "
+        "INNER JOIN PublishXRef AS pxr "
+        "ON pf.InbredSetId=pxr.InbredSetId "
+        "WHERE spc.Name != 'human' ")
+    if len(assigned) > 0:
+        paramstr = ", ".join(["(%s, %s, %s, %s)"] * len(assigned))
+        query = query + (
+            "AND (spc.SpeciesId, iset.InbredSetId, pf.Id, pxr.Id) "
+            f"NOT IN ({paramstr}) ")
+
+    query = query + "LIMIT 100000"
+    with bioconn.cursor(DictCursor) as cursor:
+        cursor.execute(query, tuple(item for row in assigned for item in row))
+        return (row for row in cursor.fetchall())
+
+
+def __assign_pheno__(authconn, bioconn, resource, group):
+    """Assign any unassigned Phenotype data to resource."""
+    while True:
+        unassigned = tuple({
+            "data_link_id": str(uuid4()),
+            "group_id": str(group.group_id),
+            "resource_id": str(resource.resource_id),
+            **row
+        } for row in __unassigned_pheno__(
+            bioconn, __assigned_pheno__(authconn)))
+
+        if len(unassigned) <= 0:
+            print("-> Phenotype: Completed!")
+            break
+        with authdb.cursor(authconn) as cursor:
+            cursor.executemany(
+                "INSERT INTO linked_phenotype_data VALUES "
+                "(:data_link_id, :group_id, :SpeciesId, :InbredSetId, "
+                ":PublishFreezeId, :dataset_name, :dataset_fullname, "
+                ":dataset_shortname, :PublishXRefId)",
+                unassigned)
+            cursor.executemany(
+                "INSERT INTO phenotype_resources VALUES "
+                "(:resource_id, :data_link_id)",
+                unassigned)
+            print(f"-> Phenotype: Linked {len(unassigned)}")
+            delay()
+
+
+def assign_data_to_resource(
+        authconn, bioconn, resource: Resource, group: Group):
+    """Assign existing data, not linked to any group to the resource."""
+    assigner_fns = {
+        "mrna": __assign_mrna__,
+        "genotype": __assign_geno__,
+        "phenotype": __assign_pheno__
+    }
+    return assigner_fns[resource.resource_category.resource_category_key](
+        authconn, bioconn, resource, group)
+
+
+def entry(authdbpath, mysqldburi):
+    """Entry-point for data migration."""
+    if not Path(authdbpath).exists():
+        print(
+            f"ERROR: Auth db file `{authdbpath}` does not exist.",
+            file=sys.stderr)
+        sys.exit(2)
+    try:
+        with (authdb.connection(authdbpath) as authconn,
+              biodb.database_connection(mysqldburi) as bioconn):
+            admin = select_sys_admin(sys_admins(authconn))
+            the_admin_group = admin_group(authconn, admin)
+            resources = default_resources(authconn, the_admin_group)
+            add_resources_to_group(authconn, resources, the_admin_group)
+            for resource in resources:
+                assign_data_to_resource(
+                    authconn, bioconn, resource, the_admin_group)
+                with authdb.cursor(authconn) as cursor:
+                    assign_resource_owner_role(
+                        cursor, resource.resource_id, admin.user_id)
+    except DataNotFound as dnf:
+        print(dnf.args[0], file=sys.stderr)
+        sys.exit(1)
+
+
+@click.command()
+@click.argument("authdbpath") # "Path to the Auth(entic|oris)ation database"
+@click.argument("mysqldburi") # "URI to the MySQL database with the biology data"
+@click.option("--loglevel", default="WARNING", show_default=True,
+              type=click.Choice(["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]))
+def run(authdbpath, mysqldburi, loglevel):
+    """Setup command-line arguments."""
+    globallogger = logging.getLogger()
+    globallogger.setLevel(loglevel)
+    entry(authdbpath, mysqldburi)
+
+
+if __name__ == "__main__":
+    run() # pylint: disable=[no-value-for-parameter]
diff --git a/gn_auth/scripts/batch_assign_data_to_default_admin.py b/gn_auth/scripts/batch_assign_data_to_default_admin.py
new file mode 100644
index 0000000..95d9794
--- /dev/null
+++ b/gn_auth/scripts/batch_assign_data_to_default_admin.py
@@ -0,0 +1,86 @@
+"""
+Similar to the 'assign_data_to_default_admin' script but without user
+interaction.
+"""
+import sys
+import logging
+from pathlib import Path
+
+import click
+from gn_libs import mysqldb as biodb
+from pymonad.maybe import Just, Maybe, Nothing
+from pymonad.tools import monad_from_none_or_value
+
+from gn_auth.auth.db import sqlite3 as authdb
+from gn_auth.auth.authentication.users import User
+from gn_auth.auth.authorisation.resources.groups.models import (
+    Group, db_row_to_group)
+from gn_auth.scripts.assign_data_to_default_admin import (
+    default_resources, assign_data_to_resource)
+
+
+def resources_group(conn: authdb.DbConnection) -> Maybe:
+    """Retrieve resources' group"""
+    with authdb.cursor(conn) as cursor:
+        cursor.execute(
+            "SELECT g.* FROM resources AS r "
+            "INNER JOIN resource_ownership AS ro "
+            "ON r.resource_id=ro.resource_id "
+            "INNER JOIN groups AS g ON ro.group_id=g.group_id "
+            "WHERE resource_name='mRNA-euhrin'")
+        return monad_from_none_or_value(
+            Nothing, Just, cursor.fetchone()).then(
+                db_row_to_group)
+
+
+def resource_owner(conn: authdb.DbConnection) -> Maybe:
+    """Retrieve the resource owner."""
+    with authdb.cursor(conn) as cursor:
+        cursor.execute(
+            "SELECT u.* FROM users AS u WHERE u.user_id IN "
+            "(SELECT ur.user_id FROM resources AS rsc "
+            "INNER JOIN user_roles AS ur ON rsc.resource_id=ur.resource_id "
+            "INNER JOIN roles AS r on ur.role_id=r.role_id "
+            "WHERE resource_name='mRNA-euhrin' "
+            "AND r.role_name='resource-owner')")
+        return monad_from_none_or_value(
+            Nothing, Just, cursor.fetchone()).then(
+                User.from_sqlite3_row)
+
+
+def assign_data(authconn: authdb.DbConnection, bioconn, group: Group):
+    """Do actual data assignments."""
+    try:
+        for resource in default_resources(authconn, group):
+            assign_data_to_resource(authconn, bioconn, resource, group)
+
+        return 1
+    except Exception as _exc:# pylint: disable=[broad-except]
+        logging.error("Failed to assign some data!", exc_info=True)
+        return 1
+
+
+if __name__ == "__main__":
+    @click.command()
+    @click.argument("authdbpath") # "Path to the Auth(entic|oris)ation database"
+    @click.argument("mysqldburi") # "URI to the MySQL database with the biology data"
+    @click.option("--loglevel",
+                  default="WARNING",
+                  show_default=True,
+                  type=click.Choice([
+                      "CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]))
+    def run(authdbpath, mysqldburi, loglevel):
+        """Script entry point."""
+        _logger = logging.getLogger()
+        _logger.setLevel(loglevel)
+        if Path(authdbpath).exists():
+            with (authdb.connection(authdbpath) as authconn,
+                  biodb.database_connection(mysqldburi) as bioconn):
+                return resources_group(authconn).maybe(
+                    1,
+                    lambda group: assign_data(authconn, bioconn, group))
+
+        logging.error("There is no such SQLite3 database file.")
+        return 1
+
+    sys.exit(run()) # pylint: disable=[no-value-for-parameter]
diff --git a/gn_auth/scripts/link_inbredsets.py b/gn_auth/scripts/link_inbredsets.py
new file mode 100644
index 0000000..ad743f5
--- /dev/null
+++ b/gn_auth/scripts/link_inbredsets.py
@@ -0,0 +1,122 @@
+"""
+Link any unlinked InbredSet groups.
+"""
+import sys
+import uuid
+from pathlib import Path
+
+import click
+from gn_libs import mysqldb as biodb
+
+import gn_auth.auth.db.sqlite3 as authdb
+
+from gn_auth.scripts.assign_data_to_default_admin import (
+    sys_admins, admin_group, select_sys_admin)
+
+def linked_inbredsets(conn):
+    """Fetch all inbredset groups that are linked to the auth system."""
+    with authdb.cursor(conn) as cursor:
+        cursor.execute(
+            "SELECT SpeciesId, InbredSetId FROM linked_inbredset_groups")
+        return tuple((row["SpeciesId"], row["InbredSetId"])
+                     for row in cursor.fetchall())
+
+def unlinked_inbredsets(conn, linked):
+    """Fetch any inbredset groups that are not linked to the auth system."""
+    with conn.cursor() as cursor:
+        where_clause = ""
+        query = "SELECT SpeciesId, InbredSetId, InbredSetName, FullName FROM InbredSet"
+        if len(linked) > 0:
+            pholders = ["(%s, %s)"] * len(linked)
+            where_clause = (f" WHERE (SpeciesId, InbredSetId) "
+                            f"NOT IN ({pholders})")
+            cursor.execute(query + where_clause,
+                           tuple(arg for sublist in linked for arg in sublist))
+            return cursor.fetchall()
+
+        cursor.execute(query)
+        return cursor.fetchall()
+
+def link_unlinked(conn, unlinked):
+    """Link the unlinked inbredset groups to the auth system."""
+    params = tuple((str(uuid.uuid4()),) + row for row in unlinked)
+    with authdb.cursor(conn) as cursor:
+        cursor.executemany(
+            "INSERT INTO linked_inbredset_groups VALUES (?, ?, ?, ?, ?)",
+            params)
+
+    return params
+
+def build_resources(conn, new_linked):
+    """Build resources for newly linked inbredsets."""
+    with authdb.cursor(conn) as cursor:
+        cursor.execute(
+            "SELECT resource_category_id FROM resource_categories "
+            "WHERE resource_category_key='inbredset-group'")
+        category_id = cursor.fetchone()["resource_category_id"]
+        resources = tuple({
+            "resource_id": str(uuid.uuid4()),
+            "resource_name": f"InbredSet: {name}",
+            "resource_category_id": category_id,
+            "public": 1,
+            "data_link_id": datalinkid
+        } for datalinkid, _sid, _isetid, name, _name in new_linked)
+        cursor.executemany(
+            "INSERT INTO resources VALUES "
+            "(:resource_id, :resource_name, :resource_category_id, :public)",
+            resources)
+        cursor.executemany(
+            "INSERT INTO inbredset_group_resources VALUES "
+            "(:resource_id, :data_link_id)",
+            resources)
+        return resources
+
+def own_resources(conn, group, resources):
+    """Link new resources to admin group."""
+    with authdb.cursor(conn) as cursor:
+        params = tuple({
+            "group_id": str(group.group_id),
+            **resource
+        } for resource in resources)
+        cursor.executemany(
+            "INSERT INTO resource_ownership VALUES "
+            "(:group_id, :resource_id)",
+            params)
+        return params
+
+def assign_role_for_admin(conn, user, resources):
+    """Assign basic role to admin on the inbredset-group resources."""
+    with authdb.cursor(conn) as cursor:
+        cursor.execute(
+            "SELECT * FROM roles WHERE role_name='inbredset-group-owner'")
+        role_id = cursor.fetchone()["role_id"]
+        cursor.executemany(
+            "INSERT INTO user_roles(user_id, role_id, resource_id) "
+            "VALUES (:user_id, :role_id, :resource_id)",
+            tuple({**rsc, "user_id": str(user.user_id), "role_id": role_id}
+                  for rsc in resources))
+
+@click.command()
+@click.argument("authdbpath") # "Path to the Auth(entic|oris)ation database"
+@click.argument("mysqldburi") # "URI to the MySQL database with the biology data"
+def run(authdbpath, mysqldburi):
+    """Setup command-line arguments."""
+    if not Path(authdbpath).exists():
+        print(
+            f"ERROR: Auth db file `{authdbpath}` does not exist.",
+            file=sys.stderr)
+        sys.exit(2)
+
+    with (authdb.connection(authdbpath) as authconn,
+          biodb.database_connection(mysqldburi) as bioconn):
+        admin = select_sys_admin(sys_admins(authconn))
+        assign_role_for_admin(authconn, admin, own_resources(
+            authconn,
+            admin_group(authconn, admin),
+            build_resources(
+                authconn, link_unlinked(
+                    authconn,
+                    unlinked_inbredsets(bioconn, linked_inbredsets(authconn))))))
+
+if __name__ == "__main__":
+    run() # pylint: disable=[no-value-for-parameter]
diff --git a/gn_auth/scripts/register_sys_admin.py b/gn_auth/scripts/register_sys_admin.py
new file mode 100644
index 0000000..06aa845
--- /dev/null
+++ b/gn_auth/scripts/register_sys_admin.py
@@ -0,0 +1,68 @@
+"""Script to register and mark a user account as sysadmin."""
+import sys
+import getpass
+from pathlib import Path
+
+import click
+from email_validator import validate_email, EmailNotValidError
+
+from gn_auth.auth.db import sqlite3 as db
+from gn_auth.auth.authorisation.users.admin.models import make_sys_admin
+from gn_auth.auth.authentication.users import save_user, set_user_password
+
+def fetch_email() -> str:
+    """Prompt user for email."""
+    while True:
+        try:
+            user_input = input("Enter the administrator's email: ")
+            email = validate_email(user_input.strip(), check_deliverability=True)
+            return email["email"]  # type: ignore
+        except EmailNotValidError as _enve:
+            print("You did not provide a valid email address. Try again...",
+                  file=sys.stderr)
+
+def fetch_password() -> str:
+    """Prompt user for password."""
+    while True:
+        passwd = getpass.getpass(prompt="Enter password: ").strip()
+        passwd2 = getpass.getpass(prompt="Confirm password: ").strip()
+        if passwd != "" and passwd == passwd2:
+            return passwd
+        if passwd == "":
+            print("Empty password not accepted", file=sys.stderr)
+            continue
+        if passwd != passwd2:
+            print("Passwords *MUST* match", file=sys.stderr)
+            continue
+
+def fetch_name() -> str:
+    """Prompt user for name"""
+    while True:
+        name = input("Enter the user's name: ").strip()
+        if name == "":
+            print("Invalid name.")
+            continue
+        return name
+
+def save_admin(conn: db.DbConnection, name: str, email: str, passwd: str):
+    """Save the details to the database and assign the new user as admin."""
+    with db.cursor(conn) as cursor:
+        usr, _hpasswd = set_user_password(
+            cursor, save_user(cursor, email, name), passwd)
+        make_sys_admin(cursor, usr)
+        return 0
+
+def register_admin(authdbpath: Path):
+    """Register a user as a system admin."""
+    assert authdbpath.exists(), "Could not find database file."
+    with db.connection(str(authdbpath)) as conn:
+        return save_admin(conn, fetch_name(), fetch_email(), fetch_password())
+
+if __name__ == "__main__":
+    @click.command()
+    @click.argument("authdbpath")
+    def run(authdbpath):
+        """Entry-point for when script is run directly"""
+        return register_admin(Path(authdbpath).absolute())
+
+    run()# pylint: disable=[no-value-for-parameter]
diff --git a/gn_auth/scripts/search_phenotypes.py b/gn_auth/scripts/search_phenotypes.py
new file mode 100644
index 0000000..eee112d
--- /dev/null
+++ b/gn_auth/scripts/search_phenotypes.py
@@ -0,0 +1,125 @@
+"""
+A script to do search for phenotype traits using the Xapian Search endpoint.
+"""
+import uuid
+import json
+import traceback
+from urllib.parse import urljoin
+from typing import Any, Iterable
+from datetime import datetime, timedelta
+
+import click
+import redis
+import requests
+from gn_libs import mysqldb as gn3db
+
+from gn_auth import jobs
+from gn_auth.auth.db import sqlite3 as authdb
+from gn_auth.settings import SQL_URI, AUTH_DB
+from gn_auth.auth.authorisation.data.phenotypes import linked_phenotype_data
+
+class NoSearchResults(Exception):
+    """Raise when there are no results for a search."""
+
+def do_search(
+        host: str, query: str, per_page: int, page: int = 1) -> Iterable[dict[str, Any]]:
+    """Do the search and return the results"""
+    search_uri = urljoin(host, (f"search/?page={page}&per_page={per_page}"
+                                f"&type=phenotype&query={query}"))
+    response = requests.get(search_uri, timeout=300)
+    results = response.json()
+    if len(results) > 0:
+        return (item for item in results)
+    raise NoSearchResults(f"No results for search '{query}'")
+
+def __filter_object__(search_item):
+    return (search_item["species"], search_item["group"],
+            search_item["dataset"], search_item["name"])
+
+def remove_selected(search_results, selected: tuple):
+    """Remove any item that the user has selected."""
+    return (item for item in search_results if __filter_object__(item) not in selected)
+
+def remove_linked(search_results, linked: tuple):
+    """Remove any item that has been already linked to a user group."""
+    return (item for item in search_results if __filter_object__(item) not in linked)
+
+def update_status(redisconn: redis.Redis, redisname, status: str):
+    """Update the status of the search."""
+    redisconn.hset(redisname, "status", json.dumps(status))
+
+def update_search_results(redisconn: redis.Redis, redisname: str,
+                          results: tuple[dict[str, Any], ...]):
+    """Save the results to redis db."""
+    key = "search_results"
+    prev_results = tuple(json.loads(redisconn.hget(redisname, key) or "[]"))  # type: ignore
+    redisconn.hset(redisname, key, json.dumps(prev_results + results))
+
+def expire_redis_results(redisconn: redis.Redis, redisname: str):
+    """Expire the results after a while to ensure they are cleaned up."""
+    redisconn.expireat(redisname, datetime.now() + timedelta(minutes=30))
+
+@click.command()
+@click.argument("species")
+@click.argument("query")
+@click.argument("job-id", type=click.UUID)
+@click.option(
+    "--host", default="http://localhost:8080/api/", help="The URI to GN3.")
+@click.option("--per-page", default=10000, help="Number of results per page.")
+@click.option("--selected", default="[]", help="Selected traits.")
+@click.option(
+    "--auth-db-uri", default=AUTH_DB, help="The SQL URI to the auth database.")
+@click.option(
+    "--gn3-db-uri", default=SQL_URI,
+    help="The SQL URI to the main GN3 database.")
+@click.option(
+    "--redis-uri", default="redis://:@localhost:6379/0",
+    help="The URI to the redis server.")
+def search(# pylint: disable=[too-many-arguments, too-many-positional-arguments, too-many-locals]
+        species: str, query: str, job_id: uuid.UUID, host: str, per_page: int,
+        selected: str, auth_db_uri: str, gn3_db_uri: str, redis_uri: str):
+    """
+    Search for phenotype traits, filtering out any linked and selected traits,
+    loading more and more pages until the `per_page` quota is fulfilled or the
+    search runs out of pages.
+    """
+    redisname = jobs.job_key(job_id)
+    with (authdb.connection(auth_db_uri) as authconn,
+          gn3db.database_connection(gn3_db_uri) as gn3conn,
+          redis.Redis.from_url(redis_uri, decode_responses=True) as redisconn):
+        update_status(redisconn, redisname, "started")
+        update_search_results(redisconn, redisname, tuple()) # init search results
+        try:
+            search_query = f"species:{species}" + (
+                f" AND ({query})" if bool(query) else "")
+            selected_traits = tuple(
+                (item["species"], item["group"], item["dataset"], item["name"])
+                for item in json.loads(selected))
+            linked = tuple(
+                (row["SpeciesName"], row["InbredSetName"], row["dataset_name"],
+                 str(row["PublishXRefId"]))
+                for row in linked_phenotype_data(authconn, gn3conn, species))
+            page = 1
+            count = 0
+            while count < per_page:
+                results = tuple(remove_linked(
+                    remove_selected(
+                        do_search(host, search_query, per_page, page),
+                        selected_traits),
+                    linked))[0:per_page-count]
+                count = count + len(results)
+                page = page + 1
+                update_search_results(redisconn, redisname, results)
+        except NoSearchResults as _nsr:
+            pass
+        except Exception as _exc: # pylint: disable=[broad-except]
+            update_status(redisconn, redisname, "failed")
+            redisconn.hset(redisname, "exception", json.dumps(traceback.format_exc()))
+            expire_redis_results(redisconn, redisname)
+            return 1
+        update_status(redisconn, redisname, "completed")
+        expire_redis_results(redisconn, redisname)
+        return 0
+
+if __name__ == "__main__":
+    search() # pylint: disable=[no-value-for-parameter]
diff --git a/gn_auth/scripts/worker.py b/gn_auth/scripts/worker.py
new file mode 100644
index 0000000..0a77d41
--- /dev/null
+++ b/gn_auth/scripts/worker.py
@@ -0,0 +1,83 @@
+"""Daemon that processes commands"""
+import os
+import sys
+import time
+import argparse
+
+import redis
+import redis.connection
+
+from gn_auth.commands import run_cmd
+
+# Enable importing from one dir up: put as first to override any other globally
+# accessible GN3
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+def update_status(conn, cmd_id, status):
+    """Helper to update command status"""
+    conn.hset(name=f"{cmd_id}", key="status", value=f"{status}")
+
+def make_incremental_backoff(init_val: float=0.1, maximum: int=420):
+    """
+    Returns a closure that can be used to increment the returned value up to
+    `maximum` or reset it to `init_val`.
+    """
+    current = init_val
+
+    def __increment_or_reset__(command: str, value: float=0.1):
+        nonlocal current
+        if command == "reset":
+            current = init_val
+            return current
+
+        if command == "increment":
+            current = min(current + abs(value), maximum)
+            return current
+
+        return current
+
+    return __increment_or_reset__
+
+def run_jobs(conn, queue_name: str):
+    """Process the redis using a redis connection, CONN"""
+    # pylint: disable=E0401, C0415
+    cmd_id = (conn.lpop(queue_name) or b'').decode("utf-8")
+    if bool(cmd_id):
+        cmd = conn.hget(name=cmd_id, key="cmd")
+        if cmd and (conn.hget(cmd_id, "status") == b"queued"):
+            update_status(conn, cmd_id, "running")
+            result = run_cmd(
+                cmd.decode("utf-8"), env=conn.hget(name=cmd_id, key="env"))
+            conn.hset(name=cmd_id, key="result", value=result.get("output"))
+            if result.get("code") == 0:  # Success
+                update_status(conn, cmd_id, "success")
+            else:
+                update_status(conn, cmd_id, "error")
+                conn.hset(cmd_id, "stderr", result.get("output"))
+        return cmd_id
+    return None
+
+def parse_cli_arguments():
+    """Parse the command-line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Run asynchronous (service) commands.")
+    parser.add_argument("queue_name", help="Queue to check in redis")
+    parser.add_argument(
+        "--daemon", default=False, action="store_true",
+        help=(
+            "Run process as a daemon instead of the default 'one-shot' "
+            "process"))
+    return parser.parse_args()
+
+if __name__ == "__main__":
+    args = parse_cli_arguments()
+    with redis.Redis() as redis_conn:
+        if not args.daemon:
+            run_jobs(redis_conn, args.queue_name)
+        else:
+            sleep_time = make_incremental_backoff()
+            while True:  # Daemon that keeps running forever:
+                if run_jobs(redis_conn, args.queue_name):
+                    time.sleep(sleep_time("reset"))
+                    continue
+                time.sleep(sleep_time("increment", sleep_time("return_current")))