aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/assign_data_to_default_admin.py (renamed from scripts/migrate_existing_data.py)79
-rw-r--r--scripts/batch_assign_data_to_default_admin.py87
-rw-r--r--scripts/link_inbredsets.py6
-rw-r--r--scripts/register_sys_admin.py2
-rw-r--r--scripts/search_phenotypes.py8
5 files changed, 149 insertions, 33 deletions
diff --git a/scripts/migrate_existing_data.py b/scripts/assign_data_to_default_admin.py
index 336ce72..69fc50c 100644
--- a/scripts/migrate_existing_data.py
+++ b/scripts/assign_data_to_default_admin.py
@@ -1,19 +1,19 @@
"""
-Migrate existing data that is not assigned to any group to the default sys-admin
-group for accessibility purposes.
+Assign any existing data (that is not currently assigned to any group) to the
+default sys-admin group for accessibility purposes.
"""
import sys
import json
import time
import random
+import logging
from pathlib import Path
from uuid import UUID, uuid4
import click
+from gn_libs import mysqldb as biodb
from MySQLdb.cursors import DictCursor
-from gn_auth.auth.db import mariadb as biodb
-
import gn_auth.auth.db.sqlite3 as authdb
from gn_auth.auth.authentication.users import User
from gn_auth.auth.authorisation.roles.models import (
@@ -21,12 +21,14 @@ from gn_auth.auth.authorisation.roles.models import (
from gn_auth.auth.authorisation.resources.groups.models import (
Group, save_group, add_resources_to_group)
-from gn_auth.auth.authorisation.resources.models import (
- Resource, ResourceCategory, __assign_resource_owner_role__)
+from gn_auth.auth.authorisation.resources.common import assign_resource_owner_role
+from gn_auth.auth.authorisation.resources.models import Resource, ResourceCategory
+
class DataNotFound(Exception):
"""Raise if no admin user exists."""
+
def sys_admins(conn: authdb.DbConnection) -> tuple[User, ...]:
"""Retrieve all the existing system admins."""
with authdb.cursor(conn) as cursor:
@@ -38,6 +40,7 @@ def sys_admins(conn: authdb.DbConnection) -> tuple[User, ...]:
return tuple(User.from_sqlite3_row(row) for row in cursor.fetchall())
return tuple()
+
def choose_admin(enum_admins: dict[int, User]) -> int:
"""Prompt and read user choice."""
while True:
@@ -54,6 +57,7 @@ def choose_admin(enum_admins: dict[int, User]) -> int:
sys.exit(0)
print(f"\nERROR: Invalid choice '{choice}'!")
+
def select_sys_admin(admins: tuple[User, ...]) -> User:
"""Pick one admin out of list."""
if len(admins) > 0:
@@ -67,6 +71,7 @@ def select_sys_admin(admins: tuple[User, ...]) -> User:
raise DataNotFound(
"No administrator user found. Create an administrator user first.")
+
def admin_group(conn: authdb.DbConnection, admin: User) -> Group:
"""Retrieve the admin's user group. If none exist, create one."""
with authdb.cursor(conn) as cursor:
@@ -114,6 +119,7 @@ def admin_group(conn: authdb.DbConnection, admin: User) -> Group:
cursor, admin, UUID(grp_res["resource_id"]), "group-leader")
return new_group
+
def __resource_category_by_key__(
cursor: authdb.DbCursor, category_key: str) -> ResourceCategory:
"""Retrieve a resource category by its ID."""
@@ -128,6 +134,7 @@ def __resource_category_by_key__(
row["resource_category_key"],
row["resource_category_description"])
+
def __create_resources__(cursor: authdb.DbCursor) -> tuple[Resource, ...]:
"""Create default resources."""
resources = tuple(Resource(
@@ -147,6 +154,7 @@ def __create_resources__(cursor: authdb.DbCursor) -> tuple[Resource, ...]:
} for res in resources))
return resources
+
def default_resources(conn: authdb.DbConnection, group: Group) -> tuple[
Resource, ...]:
"""Create default resources, or return them if they exist."""
@@ -175,10 +183,12 @@ def default_resources(conn: authdb.DbConnection, group: Group) -> tuple[
tuple()
) for row in rows)
+
def delay():
"""Delay a while: anything from 2 seconds to 15 seconds."""
time.sleep(random.choice(range(2,16)))
+
def __assigned_mrna__(authconn):
"""Retrieve assigned mRNA items."""
with authdb.cursor(authconn) as cursor:
@@ -189,6 +199,7 @@ def __assigned_mrna__(authconn):
(row["SpeciesId"], row["InbredSetId"], row["ProbeFreezeId"],
row["ProbeSetFreezeId"]) for row in cursor.fetchall())
+
def __unassigned_mrna__(bioconn, assigned):
"""Retrieve unassigned mRNA data items."""
query = (
@@ -198,11 +209,12 @@ def __unassigned_mrna__(bioconn, assigned):
"FROM Species AS s INNER JOIN InbredSet AS iset "
"ON s.SpeciesId=iset.SpeciesId INNER JOIN ProbeFreeze AS pf "
"ON iset.InbredSetId=pf.InbredSetId INNER JOIN ProbeSetFreeze AS psf "
- "ON pf.ProbeFreezeId=psf.ProbeFreezeId ")
+ "ON pf.ProbeFreezeId=psf.ProbeFreezeId "
+ "WHERE s.Name != 'human' ")
if len(assigned) > 0:
paramstr = ", ".join(["(%s, %s, %s, %s)"] * len(assigned))
query = query + (
- "WHERE (s.SpeciesId, iset.InbredSetId, pf.ProbeFreezeId, psf.Id) "
+ "AND (s.SpeciesId, iset.InbredSetId, pf.ProbeFreezeId, psf.Id) "
f"NOT IN ({paramstr}) ")
query = query + "LIMIT 100000"
@@ -210,6 +222,7 @@ def __unassigned_mrna__(bioconn, assigned):
cursor.execute(query, tuple(item for row in assigned for item in row))
return (row for row in cursor.fetchall())
+
def __assign_mrna__(authconn, bioconn, resource, group):
"Assign any unassigned mRNA data to resource."
while True:
@@ -238,6 +251,7 @@ def __assign_mrna__(authconn, bioconn, resource, group):
print(f"-> mRNA: Linked {len(unassigned)}")
delay()
+
def __assigned_geno__(authconn):
"""Retrieve assigned genotype data."""
with authdb.cursor(authconn) as cursor:
@@ -256,11 +270,12 @@ def __unassigned_geno__(bioconn, assigned):
"gf.ShortName AS dataset_shortname "
"FROM Species AS s INNER JOIN InbredSet AS iset "
"ON s.SpeciesId=iset.SpeciesId INNER JOIN GenoFreeze AS gf "
- "ON iset.InbredSetId=gf.InbredSetId ")
+ "ON iset.InbredSetId=gf.InbredSetId "
+ "WHERE s.Name != 'human' ")
if len(assigned) > 0:
paramstr = ", ".join(["(%s, %s, %s)"] * len(assigned))
query = query + (
- "WHERE (s.SpeciesId, iset.InbredSetId, gf.Id) "
+ "AND (s.SpeciesId, iset.InbredSetId, gf.Id) "
f"NOT IN ({paramstr}) ")
query = query + "LIMIT 100000"
@@ -268,6 +283,7 @@ def __unassigned_geno__(bioconn, assigned):
cursor.execute(query, tuple(item for row in assigned for item in row))
return (row for row in cursor.fetchall())
+
def __assign_geno__(authconn, bioconn, resource, group):
"Assign any unassigned Genotype data to resource."
while True:
@@ -296,6 +312,7 @@ def __assign_geno__(authconn, bioconn, resource, group):
print(f"-> Genotype: Linked {len(unassigned)}")
delay()
+
def __assigned_pheno__(authconn):
"""Retrieve assigned phenotype data."""
with authdb.cursor(authconn) as cursor:
@@ -306,25 +323,27 @@ def __assigned_pheno__(authconn):
row["SpeciesId"], row["InbredSetId"], row["PublishFreezeId"],
row["PublishXRefId"]) for row in cursor.fetchall())
+
def __unassigned_pheno__(bioconn, assigned):
"""Retrieve all unassigned Phenotype data."""
query = (
- "SELECT spc.SpeciesId, iset.InbredSetId, "
- "pf.Id AS PublishFreezeId, pf.Name AS dataset_name, "
- "pf.FullName AS dataset_fullname, "
- "pf.ShortName AS dataset_shortname, pxr.Id AS PublishXRefId "
- "FROM "
- "Species AS spc "
- "INNER JOIN InbredSet AS iset "
- "ON spc.SpeciesId=iset.SpeciesId "
- "INNER JOIN PublishFreeze AS pf "
- "ON iset.InbredSetId=pf.InbredSetId "
- "INNER JOIN PublishXRef AS pxr "
- "ON pf.InbredSetId=pxr.InbredSetId ")
+ "SELECT spc.SpeciesId, iset.InbredSetId, "
+ "pf.Id AS PublishFreezeId, pf.Name AS dataset_name, "
+ "pf.FullName AS dataset_fullname, "
+ "pf.ShortName AS dataset_shortname, pxr.Id AS PublishXRefId "
+ "FROM "
+ "Species AS spc "
+ "INNER JOIN InbredSet AS iset "
+ "ON spc.SpeciesId=iset.SpeciesId "
+ "INNER JOIN PublishFreeze AS pf "
+ "ON iset.InbredSetId=pf.InbredSetId "
+ "INNER JOIN PublishXRef AS pxr "
+ "ON pf.InbredSetId=pxr.InbredSetId "
+ "WHERE spc.Name != 'human' ")
if len(assigned) > 0:
paramstr = ", ".join(["(%s, %s, %s, %s)"] * len(assigned))
query = query + (
- "WHERE (spc.SpeciesId, iset.InbredSetId, pf.Id, pxr.Id) "
+ "AND (spc.SpeciesId, iset.InbredSetId, pf.Id, pxr.Id) "
f"NOT IN ({paramstr}) ")
query = query + "LIMIT 100000"
@@ -332,6 +351,7 @@ def __unassigned_pheno__(bioconn, assigned):
cursor.execute(query, tuple(item for row in assigned for item in row))
return (row for row in cursor.fetchall())
+
def __assign_pheno__(authconn, bioconn, resource, group):
"""Assign any unassigned Phenotype data to resource."""
while True:
@@ -360,6 +380,7 @@ def __assign_pheno__(authconn, bioconn, resource, group):
print(f"-> Phenotype: Linked {len(unassigned)}")
delay()
+
def assign_data_to_resource(
authconn, bioconn, resource: Resource, group: Group):
"""Assign existing data, not linked to any group to the resource."""
@@ -371,6 +392,7 @@ def assign_data_to_resource(
return assigner_fns[resource.resource_category.resource_category_key](
authconn, bioconn, resource, group)
+
def entry(authdbpath, mysqldburi):
"""Entry-point for data migration."""
if not Path(authdbpath).exists():
@@ -389,17 +411,24 @@ def entry(authdbpath, mysqldburi):
assign_data_to_resource(
authconn, bioconn, resource, the_admin_group)
with authdb.cursor(authconn) as cursor:
- __assign_resource_owner_role__(cursor, resource, admin)
+ assign_resource_owner_role(
+ cursor, resource.resource_id, admin.user_id)
except DataNotFound as dnf:
print(dnf.args[0], file=sys.stderr)
sys.exit(1)
+
@click.command()
@click.argument("authdbpath") # "Path to the Auth(entic|oris)ation database"
@click.argument("mysqldburi") # "URI to the MySQL database with the biology data"
-def run(authdbpath, mysqldburi):
+@click.option("--loglevel", default="WARNING", show_default=True,
+ type=click.Choice(["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]))
+def run(authdbpath, mysqldburi, loglevel):
"""Setup command-line arguments."""
+ globallogger = logging.getLogger()
+ globallogger.setLevel(loglevel)
entry(authdbpath, mysqldburi)
+
if __name__ == "__main__":
run() # pylint: disable=[no-value-for-parameter]
diff --git a/scripts/batch_assign_data_to_default_admin.py b/scripts/batch_assign_data_to_default_admin.py
new file mode 100644
index 0000000..a468019
--- /dev/null
+++ b/scripts/batch_assign_data_to_default_admin.py
@@ -0,0 +1,87 @@
+"""
+Similar to the 'assign_data_to_default_admin' script but without user
+interaction.
+"""
+import sys
+import logging
+from pathlib import Path
+
+import click
+from gn_libs import mysqldb as biodb
+from pymonad.maybe import Just, Maybe, Nothing
+from pymonad.tools import monad_from_none_or_value
+
+from gn_auth.auth.db import sqlite3 as authdb
+from gn_auth.auth.authentication.users import User
+from gn_auth.auth.authorisation.resources.groups.models import (
+ Group, db_row_to_group)
+
+from scripts.assign_data_to_default_admin import (
+ default_resources, assign_data_to_resource)
+
+
+def resources_group(conn: authdb.DbConnection) -> Maybe:
+ """Retrieve resources' group"""
+ with authdb.cursor(conn) as cursor:
+ cursor.execute(
+ "SELECT g.* FROM resources AS r "
+ "INNER JOIN resource_ownership AS ro "
+ "ON r.resource_id=ro.resource_id "
+ "INNER JOIN groups AS g ON ro.group_id=g.group_id "
+ "WHERE resource_name='mRNA-euhrin'")
+ return monad_from_none_or_value(
+ Nothing, Just, cursor.fetchone()).then(
+ db_row_to_group)
+
+
+def resource_owner(conn: authdb.DbConnection) -> Maybe:
+ """Retrieve the resource owner."""
+ with authdb.cursor(conn) as cursor:
+ cursor.execute(
+ "SELECT u.* FROM users AS u WHERE u.user_id IN "
+ "(SELECT ur.user_id FROM resources AS rsc "
+ "INNER JOIN user_roles AS ur ON rsc.resource_id=ur.resource_id "
+ "INNER JOIN roles AS r on ur.role_id=r.role_id "
+ "WHERE resource_name='mRNA-euhrin' "
+ "AND r.role_name='resource-owner')")
+ return monad_from_none_or_value(
+ Nothing, Just, cursor.fetchone()).then(
+ User.from_sqlite3_row)
+
+
+def assign_data(authconn: authdb.DbConnection, bioconn, group: Group):
+ """Do actual data assignments."""
+ try:
+ for resource in default_resources(authconn, group):
+ assign_data_to_resource(authconn, bioconn, resource, group)
+
+ return 1
+ except Exception as _exc:# pylint: disable=[broad-except]
+ logging.error("Failed to assign some data!", exc_info=True)
+ return 1
+
+
+if __name__ == "__main__":
+ @click.command()
+ @click.argument("authdbpath") # "Path to the Auth(entic|oris)ation database"
+ @click.argument("mysqldburi") # "URI to the MySQL database with the biology data"
+ @click.option("--loglevel",
+ default="WARNING",
+ show_default=True,
+ type=click.Choice([
+ "CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]))
+ def run(authdbpath, mysqldburi, loglevel):
+ """Script entry point."""
+ _logger = logging.getLogger()
+ _logger.setLevel(loglevel)
+ if Path(authdbpath).exists():
+ with (authdb.connection(authdbpath) as authconn,
+ biodb.database_connection(mysqldburi) as bioconn):
+ return resources_group(authconn).maybe(
+ 1,
+ lambda group: assign_data(authconn, bioconn, group))
+
+ logging.error("There is no such SQLite3 database file.")
+ return 1
+
+ sys.exit(run()) # pylint: disable=[no-value-for-parameter]
diff --git a/scripts/link_inbredsets.py b/scripts/link_inbredsets.py
index ac9fa2b..c78a050 100644
--- a/scripts/link_inbredsets.py
+++ b/scripts/link_inbredsets.py
@@ -6,12 +6,12 @@ import uuid
from pathlib import Path
import click
+from gn_libs import mysqldb as biodb
import gn_auth.auth.db.sqlite3 as authdb
-from gn_auth.auth.db import mariadb as biodb
-
-from scripts.migrate_existing_data import sys_admins, admin_group, select_sys_admin
+from scripts.assign_data_to_default_admin import (
+ sys_admins, admin_group, select_sys_admin)
def linked_inbredsets(conn):
"""Fetch all inbredset groups that are linked to the auth system."""
diff --git a/scripts/register_sys_admin.py b/scripts/register_sys_admin.py
index dfd4d59..06aa845 100644
--- a/scripts/register_sys_admin.py
+++ b/scripts/register_sys_admin.py
@@ -16,7 +16,7 @@ def fetch_email() -> str:
try:
user_input = input("Enter the administrator's email: ")
email = validate_email(user_input.strip(), check_deliverability=True)
- return email["email"]
+ return email["email"] # type: ignore
except EmailNotValidError as _enve:
print("You did not provide a valid email address. Try again...",
file=sys.stderr)
diff --git a/scripts/search_phenotypes.py b/scripts/search_phenotypes.py
index 20d91c9..eee112d 100644
--- a/scripts/search_phenotypes.py
+++ b/scripts/search_phenotypes.py
@@ -11,9 +11,9 @@ from datetime import datetime, timedelta
import click
import redis
import requests
+from gn_libs import mysqldb as gn3db
from gn_auth import jobs
-from gn_auth.auth.db import mariadb as gn3db
from gn_auth.auth.db import sqlite3 as authdb
from gn_auth.settings import SQL_URI, AUTH_DB
from gn_auth.auth.authorisation.data.phenotypes import linked_phenotype_data
@@ -26,7 +26,7 @@ def do_search(
"""Do the search and return the results"""
search_uri = urljoin(host, (f"search/?page={page}&per_page={per_page}"
f"&type=phenotype&query={query}"))
- response = requests.get(search_uri)
+ response = requests.get(search_uri, timeout=300)
results = response.json()
if len(results) > 0:
return (item for item in results)
@@ -52,7 +52,7 @@ def update_search_results(redisconn: redis.Redis, redisname: str,
results: tuple[dict[str, Any], ...]):
"""Save the results to redis db."""
key = "search_results"
- prev_results = tuple(json.loads(redisconn.hget(redisname, key) or "[]"))
+ prev_results = tuple(json.loads(redisconn.hget(redisname, key) or "[]")) # type: ignore
redisconn.hset(redisname, key, json.dumps(prev_results + results))
def expire_redis_results(redisconn: redis.Redis, redisname: str):
@@ -75,7 +75,7 @@ def expire_redis_results(redisconn: redis.Redis, redisname: str):
@click.option(
"--redis-uri", default="redis://:@localhost:6379/0",
help="The URI to the redis server.")
-def search(# pylint: disable=[too-many-arguments, too-many-locals]
+def search(# pylint: disable=[too-many-arguments, too-many-positional-arguments, too-many-locals]
species: str, query: str, job_id: uuid.UUID, host: str, per_page: int,
selected: str, auth_db_uri: str, gn3_db_uri: str, redis_uri: str):
"""