From a112c99cae0c5422a38e3a35e843a82db764316a Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 7 Aug 2023 08:40:50 +0300
Subject: Add missing modules

Copy over missing modules and functions to completely disconnect
gn-auth from GN3.
---
 gn_auth/auth/authorisation/data/views.py |  30 ++-
 gn_auth/commands.py                      |  64 ++++++
 gn_auth/jobs.py                          |  61 +++++
 gn_auth/json_encoders_decoders.py        |  31 +++
 gn_auth/session.py                       |  60 +++++
 scripts/__init__.py                      |   0
 scripts/migrate_existing_data.py         | 381 +++++++++++++++++++++++++++++++
 scripts/register_sys_admin.py            |  81 +++++++
 8 files changed, 707 insertions(+), 1 deletion(-)
 create mode 100644 gn_auth/commands.py
 create mode 100644 gn_auth/jobs.py
 create mode 100644 gn_auth/json_encoders_decoders.py
 create mode 100644 gn_auth/session.py
 create mode 100644 scripts/__init__.py
 create mode 100644 scripts/migrate_existing_data.py
 create mode 100644 scripts/register_sys_admin.py

diff --git a/gn_auth/auth/authorisation/data/views.py b/gn_auth/auth/authorisation/data/views.py
index 9e55dd8..03b416f 100644
--- a/gn_auth/auth/authorisation/data/views.py
+++ b/gn_auth/auth/authorisation/data/views.py
@@ -11,9 +11,9 @@ from authlib.integrations.flask_oauth2.errors import _HTTPException
 from flask import request, jsonify, Response, Blueprint, current_app as app
 
 import gn_auth.auth.db.mariadb as gn3db
+
 from gn_auth import jobs
 from gn_auth.commands import run_async_cmd
-from gn_auth.db.traits import build_trait_name
 
 from gn_auth.auth.db import sqlite3 as db
 from gn_auth.auth.db.sqlite3 import with_db_connection
@@ -39,6 +39,34 @@ from gn_auth.auth.authorisation.data.genotypes import (
 
 data = Blueprint("data", __name__)
 
+def build_trait_name(trait_fullname):
+    """
+    Initialises the trait's name, and other values from the search data provided
+
+    This is a copy of `gn3.db.traits.build_trait_name` function.
+    """
+    def dataset_type(dset_name):
+        if dset_name.find('Temp') >= 0:
+            return "Temp"
+        if dset_name.find('Geno') >= 0:
+            return "Geno"
+        if dset_name.find('Publish') >= 0:
+            return "Publish"
+        return "ProbeSet"
+
+    name_parts = trait_fullname.split("::")
+    assert len(name_parts) >= 2, f"Name format error: '{trait_fullname}'"
+    dataset_name = name_parts[0]
+    dataset_type = dataset_type(dataset_name)
+    return {
+        "db": {
+            "dataset_name": dataset_name,
+            "dataset_type": dataset_type},
+        "trait_fullname": trait_fullname,
+        "trait_name": name_parts[1],
+        "cellid": name_parts[2] if len(name_parts) == 3 else ""
+    }
+
 @data.route("species")
 def list_species() -> Response:
     """List all available species information."""
diff --git a/gn_auth/commands.py b/gn_auth/commands.py
new file mode 100644
index 0000000..d6f6f56
--- /dev/null
+++ b/gn_auth/commands.py
@@ -0,0 +1,64 @@
+"""Procedures used to work with the various bio-informatics cli
+commands"""
+import sys
+import json
+import subprocess
+from uuid import uuid4
+from datetime import datetime
+from typing import Dict, Optional, Tuple, Union, Sequence
+
+
+from redis.client import Redis
+
+def queue_cmd(conn: Redis,
+              job_queue: str,
+              cmd: Union[str, Sequence[str]],
+              email: Optional[str] = None,
+              env: Optional[dict] = None) -> str:
+    """Given a command CMD; (optional) EMAIL; and a redis connection CONN, queue
+it in Redis with an initial status of 'queued'.  The following status codes
+are supported:
+
+    queued:  Unprocessed; Still in the queue
+    running: Still running
+    success: Successful completion
+    error:   Erroneous completion
+
+Returns the name of the specific redis hash for the specific task.
+
+    """
+    if not conn.ping():
+        raise RedisConnectionError
+    unique_id = ("cmd::"
+                 f"{datetime.now().strftime('%Y-%m-%d%H-%M%S-%M%S-')}"
+                 f"{str(uuid4())}")
+    conn.rpush(job_queue, unique_id)
+    for key, value in {
+            "cmd": json.dumps(cmd), "result": "", "status": "queued"}.items():
+        conn.hset(name=unique_id, key=key, value=value)
+    if email:
+        conn.hset(name=unique_id, key="email", value=email)
+    if env:
+        conn.hset(name=unique_id, key="env", value=json.dumps(env))
+    return unique_id
+
+def run_cmd(cmd: str, success_codes: Tuple = (0,), env: str = None) -> Dict:
+    """Run CMD and return the CMD's status code and output as a dict"""
+    parsed_cmd = json.loads(cmd)
+    parsed_env = (json.loads(env) if env is not None else None)
+    results = subprocess.run(
+        parsed_cmd, capture_output=True, shell=isinstance(parsed_cmd, str),
+        check=False, env=parsed_env)
+    out = str(results.stdout, 'utf-8')
+    if results.returncode not in success_codes:  # Error!
+        out = str(results.stderr, 'utf-8')
+    return {"code": results.returncode, "output": out}
+
+def run_async_cmd(
+        conn: Redis, job_queue: str, cmd: Union[str, Sequence[str]],
+        email: Optional[str] = None, env: Optional[dict] = None) -> str:
+    """A utility function to call `gn3.commands.queue_cmd` function and run the
+    worker in the `one-shot` mode."""
+    cmd_id = queue_cmd(conn, job_queue, cmd, email, env)
+    subprocess.Popen([f"{sys.executable}", "-m", "sheepdog.worker"]) # pylint: disable=[consider-using-with]
+    return cmd_id
diff --git a/gn_auth/jobs.py b/gn_auth/jobs.py
new file mode 100644
index 0000000..6fc9bba
--- /dev/null
+++ b/gn_auth/jobs.py
@@ -0,0 +1,61 @@
+"""Handle external processes in a consistent manner."""
+import json
+from typing import Any
+from uuid import UUID, uuid4
+from datetime import datetime
+
+from redis import Redis
+from pymonad.either import Left, Right, Either
+
+from gn_auth import json_encoders_decoders as jed
+
+JOBS_NAMESPACE = "GN3_AUTH::JOBS"
+
+class InvalidCommand(Exception):
+    """Raise if the command to run is invalid."""
+
+def job_key(job_id: UUID, namespace_prefix: str = JOBS_NAMESPACE):
+    """Build the namespace key for a specific job."""
+    return f"{namespace_prefix}::{job_id}"
+
+def job(redisconn: Redis, job_id: UUID) -> Either:
+    """Retrive the job details of a job identified by `job_id`."""
+    the_job = redisconn.hgetall(job_key(job_id))
+    if the_job:
+        return Right({
+            key: json.loads(value, object_hook=jed.custom_json_decoder)
+            for key, value in the_job.items()
+        })
+    return Left({
+        "error": "NotFound",
+        "error_description": f"Job '{job_id}' was not found."
+    })
+
+def __command_valid__(job_command: Any) -> Either:
+    if not isinstance(job_command, list):
+        return Left({
+            "error": "InvalidJobCommand",
+            "error_description": "The job command MUST be a list."
+        })
+    if not all((isinstance(val, str) for val in job_command)):
+        return Left({
+            "error": "InvalidJobCommand",
+            "error_description": "All parts of the command MUST be strings."
+        })
+    return Right(job_command)
+
+def create_job(redisconn: Redis, job_details: dict[str, Any]) -> UUID:
+    """Create a new job and put it on Redis."""
+    def __create__(_job_command):
+        job_id = job_details.get("job_id", uuid4())
+        redisconn.hset(job_key(job_id), mapping={
+            key: json.dumps(value, cls=jed.CustomJSONEncoder) for key, value in {
+                **job_details, "job_id": job_id, "created": datetime.now(),
+                "status": "queued"
+            }.items()
+        })
+        return job_id
+    def __raise__(err):
+        raise InvalidCommand(err["error_description"])
+    return __command_valid__(job_details.get("command")).either(
+        __raise__, __create__)
diff --git a/gn_auth/json_encoders_decoders.py b/gn_auth/json_encoders_decoders.py
new file mode 100644
index 0000000..be15b34
--- /dev/null
+++ b/gn_auth/json_encoders_decoders.py
@@ -0,0 +1,31 @@
+"""Custom json encoders for various purposes."""
+import json
+from uuid import UUID
+from datetime import datetime
+
+__ENCODERS__ = {
+    UUID: lambda obj: {"__type": "UUID", "__value": str(obj)},
+    datetime: lambda obj: {"__type": "DATETIME", "__value": obj.isoformat()}
+}
+
+class CustomJSONEncoder(json.JSONEncoder):
+    """
+    A custom JSON encoder to handle cases where the default encoder fails.
+    """
+    def default(self, obj):# pylint: disable=[arguments-renamed]
+        """Return a serializable object for `obj`."""
+        if type(obj) in __ENCODERS__:
+            return __ENCODERS__[type(obj)](obj)
+        return json.JSONEncoder.default(self, obj)
+
+
+__DECODERS__ = {
+    "UUID": UUID,
+    "DATETIME": datetime.fromisoformat
+}
+
+def custom_json_decoder(obj_dict):
+    """Decode custom types"""
+    if "__type" in obj_dict:
+        return __DECODERS__[obj_dict["__type"]](obj_dict["__value"])
+    return obj_dict
diff --git a/gn_auth/session.py b/gn_auth/session.py
new file mode 100644
index 0000000..7226ac5
--- /dev/null
+++ b/gn_auth/session.py
@@ -0,0 +1,60 @@
+"""Handle any GN3 sessions."""
+from functools import wraps
+from datetime import datetime, timezone, timedelta
+
+from flask import flash, request, session, url_for, redirect
+
+__SESSION_KEY__ = "GN::AUTH::session_details"
+
+def __session_expired__():
+    """Check whether the session has expired."""
+    return datetime.now(tz=timezone.utc) >= session[__SESSION_KEY__]["expires"]
+
+def logged_in(func):
+    """Verify the user is logged in."""
+    @wraps(func)
+    def __logged_in__(*args, **kwargs):
+        if bool(session.get(__SESSION_KEY__)) and not __session_expired__():
+            return func(*args, **kwargs)
+        flash("You need to be logged in to access that page.", "alert-danger")
+        return redirect(url_for(
+            "oauth2.admin.login", next=request.url_rule.endpoint))
+    return __logged_in__
+
+def session_info():
+    """Retrieve the session information."""
+    return session.get(__SESSION_KEY__, False)
+
+def session_user():
+    """Retrieve session user."""
+    info = session_info()
+    return info and info["user"]
+
+def clear_session_info():
+    """Clear any session info."""
+    try:
+        session.pop(__SESSION_KEY__)
+    except KeyError as _keyerr:
+        pass
+
+def session_expired() -> bool:
+    """
+    Check whether the session has expired. Will always return `True` if no
+    session currently exists.
+    """
+    if bool(session.get(__SESSION_KEY__)):
+        now = datetime.now(tz=timezone.utc)
+        return now >= session[__SESSION_KEY__]["expires"]
+    return True
+
+def update_expiry() -> bool:
+    """Update the session expiry and return a boolean indicating success."""
+    if not session_expired():
+        now = datetime.now(tz=timezone.utc)
+        session[__SESSION_KEY__]["expires"] = now + timedelta(minutes=10)
+        return True
+    return False
+
+def update_session_info(**info):
+    """Update the session information."""
+    session[__SESSION_KEY__] = info
diff --git a/scripts/__init__.py b/scripts/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/scripts/migrate_existing_data.py b/scripts/migrate_existing_data.py
new file mode 100644
index 0000000..4452f41
--- /dev/null
+++ b/scripts/migrate_existing_data.py
@@ -0,0 +1,381 @@
+"""
+Migrate existing data that is not assigned to any group to the default sys-admin
+group for accessibility purposes.
+"""
+import sys
+import json
+import time
+import random
+from pathlib import Path
+from uuid import UUID, uuid4
+
+import click
+from MySQLdb.cursors import DictCursor
+
+from gn_auth.auth.db import mariadb as biodb
+
+from gn_auth.auth.db import sqlite3 as authdb
+from gn_auth.auth.authentication.users import User
+from gn_auth.auth.authorisation.groups.models import Group, save_group
+from gn_auth.auth.authorisation.roles.models import (
+    revoke_user_role_by_name, assign_user_role_by_name)
+from gn_auth.auth.authorisation.resources.models import (
+    Resource, ResourceCategory, __assign_resource_owner_role__)
+
+class DataNotFound(Exception):
+    """Raise if no admin user exists."""
+
+def sys_admins(conn: authdb.DbConnection) -> tuple[User, ...]:
+    """Retrieve all the existing system admins."""
+    with authdb.cursor(conn) as cursor:
+        cursor.execute(
+            "SELECT u.* FROM users AS u "
+            "INNER JOIN user_roles AS ur ON u.user_id=ur.user_id "
+            "INNER JOIN roles AS r ON ur.role_id=r.role_id "
+            "WHERE r.role_name='system-administrator'")
+        return tuple(User(UUID(row["user_id"]), row["email"], row["name"])
+                     for row in cursor.fetchall())
+    return tuple()
+
+def choose_admin(enum_admins: dict[int, User]) -> int:
+    """Prompt and read user choice."""
+    while True:
+        try:
+            print("\n===========================\n")
+            print("We found the following system administrators:")
+            for idx, admin in enum_admins.items():
+                print(f"\t{idx}: {admin.name} ({admin.email})")
+            choice = input(f"Choose [1 .. {len(enum_admins)}]: ")
+            return int(choice)
+        except ValueError as _verr:
+            if choice.lower() == "quit":
+                print("Goodbye!")
+                sys.exit(0)
+            print(f"\nERROR: Invalid choice '{choice}'!")
+
+def select_sys_admin(admins: tuple[User, ...]) -> User:
+    """Pick one admin out of list."""
+    if len(admins) > 0:
+        if len(admins) == 1:
+            print(f"-> Found Admin: {admins[0].name} ({admins[0].email})")
+            return admins[0]
+        enum_admins = dict(enumerate(admins, start=1))
+        chosen = enum_admins[choose_admin(enum_admins)]
+        print(f"-> Chosen Admin: {chosen.name} ({chosen.email})")
+        return chosen
+    raise DataNotFound(
+        "No administrator user found. Create an administrator user first.")
+
+def admin_group(conn: authdb.DbConnection, admin: User) -> Group:
+    """Retrieve the admin's user group. If none exist, create one."""
+    with authdb.cursor(conn) as cursor:
+        cursor.execute(
+            "SELECT g.* FROM users AS u "
+            "INNER JOIN group_users AS gu ON u.user_id=gu.user_id "
+            "INNER JOIN groups AS g on gu.group_id=g.group_id "
+            "WHERE u.user_id = ?",
+            (str(admin.user_id),))
+        row = cursor.fetchone()
+        if row:
+            return Group(UUID(row["group_id"]),
+                         row["group_name"],
+                         json.loads(row["group_metadata"]))
+        new_group = save_group(cursor, "AutoAdminGroup", {
+            "group_description": (
+                "Created by script for existing data visibility. "
+                "Existing data was migrated into this group and assigned "
+                "to publicly visible resources according to type.")
+        })
+        cursor.execute("INSERT INTO group_users VALUES (?, ?)",
+                       (str(new_group.group_id), str(admin.user_id)))
+        revoke_user_role_by_name(cursor, admin, "group-creator")
+        assign_user_role_by_name(cursor, admin, "group-leader")
+        return new_group
+
+def __resource_category_by_key__(
+        cursor: authdb.DbCursor, category_key: str) -> ResourceCategory:
+    """Retrieve a resource category by its ID."""
+    cursor.execute(
+        "SELECT * FROM resource_categories WHERE resource_category_key = ?",
+        (category_key,))
+    row = cursor.fetchone()
+    if not bool(row):
+        raise DataNotFound(
+            f"Could not find resource category with key {category_key}")
+    return ResourceCategory(UUID(row["resource_category_id"]),
+                            row["resource_category_key"],
+                            row["resource_category_description"])
+
+def __create_resources__(cursor: authdb.DbCursor, group: Group) -> tuple[
+        Resource, ...]:
+    """Create default resources."""
+    resources = tuple(Resource(
+        group, uuid4(), name, __resource_category_by_key__(cursor, catkey),
+        True, tuple()
+    ) for name, catkey in (
+        ("mRNA-euhrin", "mrna"),
+        ("pheno-xboecp", "phenotype"),
+        ("geno-welphd", "genotype")))
+    cursor.executemany(
+        "INSERT INTO resources VALUES (:gid, :rid, :rname, :rcid, :pub)",
+        tuple({
+            "gid": str(group.group_id),
+            "rid": str(res.resource_id),
+            "rname": res.resource_name,
+            "rcid": str(res.resource_category.resource_category_id),
+            "pub": 1
+        } for res in resources))
+    return resources
+
+def default_resources(conn: authdb.DbConnection, group: Group) -> tuple[
+        Resource, ...]:
+    """Create default resources, or return them if they exist."""
+    with authdb.cursor(conn) as cursor:
+        cursor.execute(
+            "SELECT r.resource_id, r.resource_name, r.public, rc.* "
+            "FROM resources AS r INNER JOIN resource_categories AS rc "
+            "ON r.resource_category_id=rc.resource_category_id "
+            "WHERE r.group_id=? AND r.resource_name IN "
+            "('mRNA-euhrin', 'pheno-xboecp', 'geno-welphd')",
+            (str(group.group_id),))
+        rows = cursor.fetchall()
+        if len(rows) == 0:
+            return __create_resources__(cursor, group)
+
+        return tuple(Resource(
+            group,
+            UUID(row["resource_id"]),
+            row["resource_name"],
+            ResourceCategory(
+                UUID(row["resource_category_id"]),
+                row["resource_category_key"],
+                row["resource_category_description"]),
+            bool(row["public"]),
+            tuple()
+        ) for row in rows)
+
+def delay():
+    """Delay a while: anything from 2 seconds to 15 seconds."""
+    time.sleep(random.choice(range(2,16)))
+
+def __assigned_mrna__(authconn):
+    """Retrieve assigned mRNA items."""
+    with authdb.cursor(authconn) as cursor:
+        cursor.execute(
+            "SELECT SpeciesId, InbredSetId, ProbeFreezeId, ProbeSetFreezeId "
+            "FROM linked_mrna_data")
+        return tuple(
+            (row["SpeciesId"], row["InbredSetId"], row["ProbeFreezeId"],
+             row["ProbeSetFreezeId"]) for row in cursor.fetchall())
+
+def __unassigned_mrna__(bioconn, assigned):
+    """Retrieve unassigned mRNA data items."""
+    query = (
+        "SELECT s.SpeciesId, iset.InbredSetId, pf.ProbeFreezeId, "
+        "psf.Id AS ProbeSetFreezeId, psf.Name AS dataset_name, "
+        "psf.FullName AS dataset_fullname, psf.ShortName AS dataset_shortname "
+        "FROM Species AS s INNER JOIN InbredSet AS iset "
+        "ON s.SpeciesId=iset.SpeciesId INNER JOIN ProbeFreeze AS pf "
+        "ON iset.InbredSetId=pf.InbredSetId INNER JOIN ProbeSetFreeze AS psf "
+        "ON pf.ProbeFreezeId=psf.ProbeFreezeId ")
+    if len(assigned) > 0:
+        paramstr = ", ".join(["(%s, %s, %s, %s)"] * len(assigned))
+        query = query + (
+            "WHERE (s.SpeciesId, iset.InbredSetId, pf.ProbeFreezeId, psf.Id) "
+            f"NOT IN ({paramstr}) ")
+
+    query = query + "LIMIT 100000"
+    with bioconn.cursor(DictCursor) as cursor:
+        cursor.execute(query, tuple(item for row in assigned for item in row))
+        return (row for row in cursor.fetchall())
+
+def __assign_mrna__(authconn, bioconn, resource):
+    "Assign any unassigned mRNA data to resource."
+    while True:
+        unassigned = tuple({
+            "data_link_id": str(uuid4()),
+            "group_id": str(resource.group.group_id),
+            "resource_id": str(resource.resource_id),
+            **row
+        } for row in __unassigned_mrna__(
+            bioconn, __assigned_mrna__(authconn)))
+
+        if len(unassigned) <= 0:
+            print("-> mRNA: Completed!")
+            break
+        with authdb.cursor(authconn) as cursor:
+            cursor.executemany(
+                "INSERT INTO linked_mrna_data VALUES "
+                "(:data_link_id, :group_id, :SpeciesId, :InbredSetId, "
+                ":ProbeFreezeId, :ProbeSetFreezeId, :dataset_name, "
+                ":dataset_fullname, :dataset_shortname)",
+                unassigned)
+            cursor.executemany(
+                "INSERT INTO mrna_resources VALUES "
+                "(:group_id, :resource_id, :data_link_id)",
+                unassigned)
+            print(f"-> mRNA: Linked {len(unassigned)}")
+            delay()
+
+def __assigned_geno__(authconn):
+    """Retrieve assigned genotype data."""
+    with authdb.cursor(authconn) as cursor:
+        cursor.execute(
+            "SELECT SpeciesId, InbredSetId, GenoFreezeId "
+            "FROM linked_genotype_data")
+        return tuple((row["SpeciesId"], row["InbredSetId"], row["GenoFreezeId"])
+                     for row in cursor.fetchall())
+
+def __unassigned_geno__(bioconn, assigned):
+    """Fetch unassigned genotype data."""
+    query = (
+        "SELECT s.SpeciesId, iset.InbredSetId, iset.InbredSetName, "
+        "gf.Id AS GenoFreezeId, gf.Name AS dataset_name, "
+        "gf.FullName AS dataset_fullname, "
+        "gf.ShortName AS dataset_shortname "
+        "FROM Species AS s INNER JOIN InbredSet AS iset "
+        "ON s.SpeciesId=iset.SpeciesId INNER JOIN GenoFreeze AS gf "
+        "ON iset.InbredSetId=gf.InbredSetId ")
+    if len(assigned) > 0:
+        paramstr = ", ".join(["(%s, %s, %s)"] * len(assigned))
+        query = query + (
+            "WHERE (s.SpeciesId, iset.InbredSetId, gf.Id) "
+            f"NOT IN ({paramstr}) ")
+
+    query = query + "LIMIT 100000"
+    with bioconn.cursor(DictCursor) as cursor:
+        cursor.execute(query, tuple(item for row in assigned for item in row))
+        return (row for row in cursor.fetchall())
+
+def __assign_geno__(authconn, bioconn, resource):
+    "Assign any unassigned Genotype data to resource."
+    while True:
+        unassigned = tuple({
+            "data_link_id": str(uuid4()),
+            "group_id": str(resource.group.group_id),
+            "resource_id": str(resource.resource_id),
+            **row
+        } for row in __unassigned_geno__(
+            bioconn, __assigned_geno__(authconn)))
+
+        if len(unassigned) <= 0:
+            print("-> Genotype: Completed!")
+            break
+        with authdb.cursor(authconn) as cursor:
+            cursor.executemany(
+                "INSERT INTO linked_genotype_data VALUES "
+                "(:data_link_id, :group_id, :SpeciesId, :InbredSetId, "
+                ":GenoFreezeId, :dataset_name, :dataset_fullname, "
+                ":dataset_shortname)",
+                unassigned)
+            cursor.executemany(
+                "INSERT INTO genotype_resources VALUES "
+                "(:group_id, :resource_id, :data_link_id)",
+                unassigned)
+            print(f"-> Genotype: Linked {len(unassigned)}")
+            delay()
+
+def __assigned_pheno__(authconn):
+    """Retrieve assigned phenotype data."""
+    with authdb.cursor(authconn) as cursor:
+        cursor.execute(
+            "SELECT SpeciesId, InbredSetId, PublishFreezeId, PublishXRefId "
+            "FROM linked_phenotype_data")
+        return tuple((
+            row["SpeciesId"], row["InbredSetId"], row["PublishFreezeId"],
+            row["PublishXRefId"]) for row in cursor.fetchall())
+
+def __unassigned_pheno__(bioconn, assigned):
+    """Retrieve all unassigned Phenotype data."""
+    query = (
+            "SELECT spc.SpeciesId, iset.InbredSetId, "
+            "pf.Id AS PublishFreezeId, pf.Name AS dataset_name, "
+            "pf.FullName AS dataset_fullname, "
+            "pf.ShortName AS dataset_shortname, pxr.Id AS PublishXRefId "
+            "FROM "
+            "Species AS spc "
+            "INNER JOIN InbredSet AS iset "
+            "ON spc.SpeciesId=iset.SpeciesId "
+            "INNER JOIN PublishFreeze AS pf "
+            "ON iset.InbredSetId=pf.InbredSetId "
+            "INNER JOIN PublishXRef AS pxr "
+            "ON pf.InbredSetId=pxr.InbredSetId ")
+    if len(assigned) > 0:
+        paramstr = ", ".join(["(%s, %s, %s, %s)"] * len(assigned))
+        query = query + (
+            "WHERE (spc.SpeciesId, iset.InbredSetId, pf.Id, pxr.Id) "
+            f"NOT IN ({paramstr}) ")
+
+    query = query + "LIMIT 100000"
+    with bioconn.cursor(DictCursor) as cursor:
+        cursor.execute(query, tuple(item for row in assigned for item in row))
+        return (row for row in cursor.fetchall())
+
+def __assign_pheno__(authconn, bioconn, resource):
+    """Assign any unassigned Phenotype data to resource."""
+    while True:
+        unassigned = tuple({
+            "data_link_id": str(uuid4()),
+            "group_id": str(resource.group.group_id),
+            "resource_id": str(resource.resource_id),
+            **row
+        } for row in __unassigned_pheno__(
+            bioconn, __assigned_pheno__(authconn)))
+
+        if len(unassigned) <= 0:
+            print("-> Phenotype: Completed!")
+            break
+        with authdb.cursor(authconn) as cursor:
+            cursor.executemany(
+                "INSERT INTO linked_phenotype_data VALUES "
+                "(:data_link_id, :group_id, :SpeciesId, :InbredSetId, "
+                ":PublishFreezeId, :dataset_name, :dataset_fullname, "
+                ":dataset_shortname, :PublishXRefId)",
+                unassigned)
+            cursor.executemany(
+                "INSERT INTO phenotype_resources VALUES "
+                "(:group_id, :resource_id, :data_link_id)",
+                unassigned)
+            print(f"-> Phenotype: Linked {len(unassigned)}")
+            delay()
+
+def assign_data_to_resource(authconn, bioconn, resource: Resource):
+    """Assign existing data, not linked to any group to the resource."""
+    assigner_fns = {
+        "mrna": __assign_mrna__,
+        "genotype": __assign_geno__,
+        "phenotype": __assign_pheno__
+    }
+    return assigner_fns[resource.resource_category.resource_category_key](
+        authconn, bioconn, resource)
+
+def entry(authdbpath, mysqldburi):
+    """Entry-point for data migration."""
+    if not Path(authdbpath).exists():
+        print(
+            f"ERROR: Auth db file `{authdbpath}` does not exist.",
+            file=sys.stderr)
+        sys.exit(2)
+    try:
+        with (authdb.connection(authdbpath) as authconn,
+              biodb.database_connection(mysqldburi) as bioconn):
+            admin = select_sys_admin(sys_admins(authconn))
+            resources = default_resources(
+                authconn, admin_group(authconn, admin))
+            for resource in resources:
+                assign_data_to_resource(authconn, bioconn, resource)
+                with authdb.cursor(authconn) as cursor:
+                    __assign_resource_owner_role__(cursor, resource, admin)
+    except DataNotFound as dnf:
+        print(dnf.args[0], file=sys.stderr)
+        sys.exit(1)
+
+@click.command()
+@click.argument("authdbpath") # "Path to the Auth(entic|oris)ation database"
+@click.argument("mysqldburi") # "URI to the MySQL database with the biology data"
+def run(authdbpath, mysqldburi):
+    """Setup command-line arguments."""
+    entry(authdbpath, mysqldburi)
+
+if __name__ == "__main__":
+    run() # pylint: disable=[no-value-for-parameter]
diff --git a/scripts/register_sys_admin.py b/scripts/register_sys_admin.py
new file mode 100644
index 0000000..7d4d667
--- /dev/null
+++ b/scripts/register_sys_admin.py
@@ -0,0 +1,81 @@
+"""Script to register and mark a user account as sysadmin."""
+import sys
+import uuid
+import getpass
+from pathlib import Path
+
+import click
+from email_validator import validate_email, EmailNotValidError
+
+from gn_auth.auth.db import sqlite3 as db
+from gn_auth.auth.authentication.users import hash_password
+
+def fetch_email() -> str:
+    """Prompt user for email."""
+    while True:
+        try:
+            user_input = input("Enter the administrator's email: ")
+            email = validate_email(user_input.strip(), check_deliverability=True)
+            return email["email"]
+        except EmailNotValidError as _enve:
+            print("You did not provide a valid email address. Try again...",
+                  file=sys.stderr)
+
+def fetch_password() -> str:
+    """Prompt user for password."""
+    while True:
+        passwd = getpass.getpass(prompt="Enter password: ").strip()
+        passwd2 = getpass.getpass(prompt="Confirm password: ").strip()
+        if passwd != "" and passwd == passwd2:
+            return passwd
+        if passwd == "":
+            print("Empty password not accepted", file=sys.stderr)
+            continue
+        if passwd != passwd2:
+            print("Passwords *MUST* match", file=sys.stderr)
+            continue
+
+def fetch_name() -> str:
+    """Prompt user for name"""
+    while True:
+        name = input("Enter the user's name: ").strip()
+        if name == "":
+            print("Invalid name.")
+            continue
+        return name
+
+def save_admin(conn: db.DbConnection, name: str, email: str, passwd: str):
+    """Save the details to the database and assign the new user as admin."""
+    admin_id = uuid.uuid4()
+    admin = {
+        "user_id": str(admin_id),
+        "email": email,
+        "name": name,
+        "hash": hash_password(passwd)
+    }
+    with db.cursor(conn) as cursor:
+        cursor.execute("INSERT INTO users VALUES (:user_id, :email, :name)",
+                       admin)
+        cursor.execute("INSERT INTO user_credentials VALUES (:user_id, :hash)",
+                       admin)
+        cursor.execute(
+            "SELECT * FROM roles WHERE role_name='system-administrator'")
+        admin_role = cursor.fetchone()
+        cursor.execute("INSERT INTO user_roles VALUES (:user_id, :role_id)",
+                       {**admin, "role_id": admin_role["role_id"]})
+        return 0
+
+def register_admin(authdbpath: Path):
+    """Register a user as a system admin."""
+    assert authdbpath.exists(), "Could not find database file."
+    with db.connection(str(authdbpath)) as conn:
+        return save_admin(conn, fetch_name(), fetch_email(), fetch_password())
+
+if __name__ == "__main__":
+    @click.command()
+    @click.argument("authdbpath")
+    def run(authdbpath):
+        """Entry-point for when script is run directly"""
+        return register_admin(Path(authdbpath).absolute())
+
+    run()# pylint: disable=[no-value-for-parameter]
-- 
cgit v1.2.3