about summary refs log tree commit diff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/assign_data_to_default_admin.py (renamed from scripts/migrate_existing_data.py)79
-rw-r--r--scripts/batch_assign_data_to_default_admin.py87
-rw-r--r--scripts/link_inbredsets.py6
-rw-r--r--scripts/register_sys_admin.py2
-rw-r--r--scripts/search_phenotypes.py8
5 files changed, 149 insertions, 33 deletions
diff --git a/scripts/migrate_existing_data.py b/scripts/assign_data_to_default_admin.py
index 336ce72..69fc50c 100644
--- a/scripts/migrate_existing_data.py
+++ b/scripts/assign_data_to_default_admin.py
@@ -1,19 +1,19 @@
 """
-Migrate existing data that is not assigned to any group to the default sys-admin
-group for accessibility purposes.
+Assign any existing data (that is not currently assigned to any group) to the
+default sys-admin group for accessibility purposes.
 """
 import sys
 import json
 import time
 import random
+import logging
 from pathlib import Path
 from uuid import UUID, uuid4
 
 import click
+from gn_libs import mysqldb as biodb
 from MySQLdb.cursors import DictCursor
 
-from gn_auth.auth.db import mariadb as biodb
-
 import gn_auth.auth.db.sqlite3 as authdb
 from gn_auth.auth.authentication.users import User
 from gn_auth.auth.authorisation.roles.models import (
@@ -21,12 +21,14 @@ from gn_auth.auth.authorisation.roles.models import (
 
 from gn_auth.auth.authorisation.resources.groups.models import (
     Group, save_group, add_resources_to_group)
-from gn_auth.auth.authorisation.resources.models import (
-    Resource, ResourceCategory, __assign_resource_owner_role__)
+from gn_auth.auth.authorisation.resources.common import assign_resource_owner_role
+from gn_auth.auth.authorisation.resources.models import Resource, ResourceCategory
+
 
 class DataNotFound(Exception):
     """Raise if no admin user exists."""
 
+
 def sys_admins(conn: authdb.DbConnection) -> tuple[User, ...]:
     """Retrieve all the existing system admins."""
     with authdb.cursor(conn) as cursor:
@@ -38,6 +40,7 @@ def sys_admins(conn: authdb.DbConnection) -> tuple[User, ...]:
         return tuple(User.from_sqlite3_row(row) for row in cursor.fetchall())
     return tuple()
 
+
 def choose_admin(enum_admins: dict[int, User]) -> int:
     """Prompt and read user choice."""
     while True:
@@ -54,6 +57,7 @@ def choose_admin(enum_admins: dict[int, User]) -> int:
                 sys.exit(0)
             print(f"\nERROR: Invalid choice '{choice}'!")
 
+
 def select_sys_admin(admins: tuple[User, ...]) -> User:
     """Pick one admin out of list."""
     if len(admins) > 0:
@@ -67,6 +71,7 @@ def select_sys_admin(admins: tuple[User, ...]) -> User:
     raise DataNotFound(
         "No administrator user found. Create an administrator user first.")
 
+
 def admin_group(conn: authdb.DbConnection, admin: User) -> Group:
     """Retrieve the admin's user group. If none exist, create one."""
     with authdb.cursor(conn) as cursor:
@@ -114,6 +119,7 @@ def admin_group(conn: authdb.DbConnection, admin: User) -> Group:
             cursor, admin, UUID(grp_res["resource_id"]), "group-leader")
         return new_group
 
+
 def __resource_category_by_key__(
         cursor: authdb.DbCursor, category_key: str) -> ResourceCategory:
     """Retrieve a resource category by its ID."""
@@ -128,6 +134,7 @@ def __resource_category_by_key__(
                             row["resource_category_key"],
                             row["resource_category_description"])
 
+
 def __create_resources__(cursor: authdb.DbCursor) -> tuple[Resource, ...]:
     """Create default resources."""
     resources = tuple(Resource(
@@ -147,6 +154,7 @@ def __create_resources__(cursor: authdb.DbCursor) -> tuple[Resource, ...]:
         } for res in resources))
     return resources
 
+
 def default_resources(conn: authdb.DbConnection, group: Group) -> tuple[
         Resource, ...]:
     """Create default resources, or return them if they exist."""
@@ -175,10 +183,12 @@ def default_resources(conn: authdb.DbConnection, group: Group) -> tuple[
             tuple()
         ) for row in rows)
 
+
 def delay():
     """Delay a while: anything from 2 seconds to 15 seconds."""
     time.sleep(random.choice(range(2,16)))
 
+
 def __assigned_mrna__(authconn):
     """Retrieve assigned mRNA items."""
     with authdb.cursor(authconn) as cursor:
@@ -189,6 +199,7 @@ def __assigned_mrna__(authconn):
             (row["SpeciesId"], row["InbredSetId"], row["ProbeFreezeId"],
              row["ProbeSetFreezeId"]) for row in cursor.fetchall())
 
+
 def __unassigned_mrna__(bioconn, assigned):
     """Retrieve unassigned mRNA data items."""
     query = (
@@ -198,11 +209,12 @@ def __unassigned_mrna__(bioconn, assigned):
         "FROM Species AS s INNER JOIN InbredSet AS iset "
         "ON s.SpeciesId=iset.SpeciesId INNER JOIN ProbeFreeze AS pf "
         "ON iset.InbredSetId=pf.InbredSetId INNER JOIN ProbeSetFreeze AS psf "
-        "ON pf.ProbeFreezeId=psf.ProbeFreezeId ")
+        "ON pf.ProbeFreezeId=psf.ProbeFreezeId "
+        "WHERE s.Name != 'human' ")
     if len(assigned) > 0:
         paramstr = ", ".join(["(%s, %s, %s, %s)"] * len(assigned))
         query = query + (
-            "WHERE (s.SpeciesId, iset.InbredSetId, pf.ProbeFreezeId, psf.Id) "
+            "AND (s.SpeciesId, iset.InbredSetId, pf.ProbeFreezeId, psf.Id) "
             f"NOT IN ({paramstr}) ")
 
     query = query + "LIMIT 100000"
@@ -210,6 +222,7 @@ def __unassigned_mrna__(bioconn, assigned):
         cursor.execute(query, tuple(item for row in assigned for item in row))
         return (row for row in cursor.fetchall())
 
+
 def __assign_mrna__(authconn, bioconn, resource, group):
     "Assign any unassigned mRNA data to resource."
     while True:
@@ -238,6 +251,7 @@ def __assign_mrna__(authconn, bioconn, resource, group):
             print(f"-> mRNA: Linked {len(unassigned)}")
             delay()
 
+
 def __assigned_geno__(authconn):
     """Retrieve assigned genotype data."""
     with authdb.cursor(authconn) as cursor:
@@ -256,11 +270,12 @@ def __unassigned_geno__(bioconn, assigned):
         "gf.ShortName AS dataset_shortname "
         "FROM Species AS s INNER JOIN InbredSet AS iset "
         "ON s.SpeciesId=iset.SpeciesId INNER JOIN GenoFreeze AS gf "
-        "ON iset.InbredSetId=gf.InbredSetId ")
+        "ON iset.InbredSetId=gf.InbredSetId "
+        "WHERE s.Name != 'human' ")
     if len(assigned) > 0:
         paramstr = ", ".join(["(%s, %s, %s)"] * len(assigned))
         query = query + (
-            "WHERE (s.SpeciesId, iset.InbredSetId, gf.Id) "
+            "AND (s.SpeciesId, iset.InbredSetId, gf.Id) "
             f"NOT IN ({paramstr}) ")
 
     query = query + "LIMIT 100000"
@@ -268,6 +283,7 @@ def __unassigned_geno__(bioconn, assigned):
         cursor.execute(query, tuple(item for row in assigned for item in row))
         return (row for row in cursor.fetchall())
 
+
 def __assign_geno__(authconn, bioconn, resource, group):
     "Assign any unassigned Genotype data to resource."
     while True:
@@ -296,6 +312,7 @@ def __assign_geno__(authconn, bioconn, resource, group):
             print(f"-> Genotype: Linked {len(unassigned)}")
             delay()
 
+
 def __assigned_pheno__(authconn):
     """Retrieve assigned phenotype data."""
     with authdb.cursor(authconn) as cursor:
@@ -306,25 +323,27 @@ def __assigned_pheno__(authconn):
             row["SpeciesId"], row["InbredSetId"], row["PublishFreezeId"],
             row["PublishXRefId"]) for row in cursor.fetchall())
 
+
 def __unassigned_pheno__(bioconn, assigned):
     """Retrieve all unassigned Phenotype data."""
     query = (
-            "SELECT spc.SpeciesId, iset.InbredSetId, "
-            "pf.Id AS PublishFreezeId, pf.Name AS dataset_name, "
-            "pf.FullName AS dataset_fullname, "
-            "pf.ShortName AS dataset_shortname, pxr.Id AS PublishXRefId "
-            "FROM "
-            "Species AS spc "
-            "INNER JOIN InbredSet AS iset "
-            "ON spc.SpeciesId=iset.SpeciesId "
-            "INNER JOIN PublishFreeze AS pf "
-            "ON iset.InbredSetId=pf.InbredSetId "
-            "INNER JOIN PublishXRef AS pxr "
-            "ON pf.InbredSetId=pxr.InbredSetId ")
+        "SELECT spc.SpeciesId, iset.InbredSetId, "
+        "pf.Id AS PublishFreezeId, pf.Name AS dataset_name, "
+        "pf.FullName AS dataset_fullname, "
+        "pf.ShortName AS dataset_shortname, pxr.Id AS PublishXRefId "
+        "FROM "
+        "Species AS spc "
+        "INNER JOIN InbredSet AS iset "
+        "ON spc.SpeciesId=iset.SpeciesId "
+        "INNER JOIN PublishFreeze AS pf "
+        "ON iset.InbredSetId=pf.InbredSetId "
+        "INNER JOIN PublishXRef AS pxr "
+        "ON pf.InbredSetId=pxr.InbredSetId "
+        "WHERE spc.Name != 'human' ")
     if len(assigned) > 0:
         paramstr = ", ".join(["(%s, %s, %s, %s)"] * len(assigned))
         query = query + (
-            "WHERE (spc.SpeciesId, iset.InbredSetId, pf.Id, pxr.Id) "
+            "AND (spc.SpeciesId, iset.InbredSetId, pf.Id, pxr.Id) "
             f"NOT IN ({paramstr}) ")
 
     query = query + "LIMIT 100000"
@@ -332,6 +351,7 @@ def __unassigned_pheno__(bioconn, assigned):
         cursor.execute(query, tuple(item for row in assigned for item in row))
         return (row for row in cursor.fetchall())
 
+
 def __assign_pheno__(authconn, bioconn, resource, group):
     """Assign any unassigned Phenotype data to resource."""
     while True:
@@ -360,6 +380,7 @@ def __assign_pheno__(authconn, bioconn, resource, group):
             print(f"-> Phenotype: Linked {len(unassigned)}")
             delay()
 
+
 def assign_data_to_resource(
         authconn, bioconn, resource: Resource, group: Group):
     """Assign existing data, not linked to any group to the resource."""
@@ -371,6 +392,7 @@ def assign_data_to_resource(
     return assigner_fns[resource.resource_category.resource_category_key](
         authconn, bioconn, resource, group)
 
+
 def entry(authdbpath, mysqldburi):
     """Entry-point for data migration."""
     if not Path(authdbpath).exists():
@@ -389,17 +411,24 @@ def entry(authdbpath, mysqldburi):
                 assign_data_to_resource(
                     authconn, bioconn, resource, the_admin_group)
                 with authdb.cursor(authconn) as cursor:
-                    __assign_resource_owner_role__(cursor, resource, admin)
+                    assign_resource_owner_role(
+                        cursor, resource.resource_id, admin.user_id)
     except DataNotFound as dnf:
         print(dnf.args[0], file=sys.stderr)
         sys.exit(1)
 
+
 @click.command()
 @click.argument("authdbpath") # "Path to the Auth(entic|oris)ation database"
 @click.argument("mysqldburi") # "URI to the MySQL database with the biology data"
-def run(authdbpath, mysqldburi):
+@click.option("--loglevel", default="WARNING", show_default=True,
+              type=click.Choice(["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]))
+def run(authdbpath, mysqldburi, loglevel):
     """Setup command-line arguments."""
+    globallogger = logging.getLogger()
+    globallogger.setLevel(loglevel)
     entry(authdbpath, mysqldburi)
 
+
 if __name__ == "__main__":
     run() # pylint: disable=[no-value-for-parameter]
diff --git a/scripts/batch_assign_data_to_default_admin.py b/scripts/batch_assign_data_to_default_admin.py
new file mode 100644
index 0000000..a468019
--- /dev/null
+++ b/scripts/batch_assign_data_to_default_admin.py
@@ -0,0 +1,87 @@
+"""
+Similar to the 'assign_data_to_default_admin' script but without user
+interaction.
+"""
+import sys
+import logging
+from pathlib import Path
+
+import click
+from gn_libs import mysqldb as biodb
+from pymonad.maybe import Just, Maybe, Nothing
+from pymonad.tools import monad_from_none_or_value
+
+from gn_auth.auth.db import sqlite3 as authdb
+from gn_auth.auth.authentication.users import User
+from gn_auth.auth.authorisation.resources.groups.models import (
+    Group, db_row_to_group)
+
+from scripts.assign_data_to_default_admin import (
+    default_resources, assign_data_to_resource)
+
+
+def resources_group(conn: authdb.DbConnection) -> Maybe:
+    """Retrieve resources' group"""
+    with authdb.cursor(conn) as cursor:
+        cursor.execute(
+            "SELECT g.* FROM resources AS r "
+            "INNER JOIN resource_ownership AS ro "
+            "ON r.resource_id=ro.resource_id "
+            "INNER JOIN groups AS g ON ro.group_id=g.group_id "
+            "WHERE resource_name='mRNA-euhrin'")
+        return monad_from_none_or_value(
+            Nothing, Just, cursor.fetchone()).then(
+                db_row_to_group)
+
+
+def resource_owner(conn: authdb.DbConnection) -> Maybe:
+    """Retrieve the resource owner."""
+    with authdb.cursor(conn) as cursor:
+        cursor.execute(
+            "SELECT u.* FROM users AS u WHERE u.user_id IN "
+            "(SELECT ur.user_id FROM resources AS rsc "
+            "INNER JOIN user_roles AS ur ON rsc.resource_id=ur.resource_id "
+            "INNER JOIN roles AS r on ur.role_id=r.role_id "
+            "WHERE resource_name='mRNA-euhrin' "
+            "AND r.role_name='resource-owner')")
+        return monad_from_none_or_value(
+            Nothing, Just, cursor.fetchone()).then(
+                User.from_sqlite3_row)
+
+
+def assign_data(authconn: authdb.DbConnection, bioconn, group: Group):
+    """Do actual data assignments."""
+    try:
+        for resource in default_resources(authconn, group):
+            assign_data_to_resource(authconn, bioconn, resource, group)
+
+        return 1
+    except Exception as _exc:# pylint: disable=[broad-except]
+        logging.error("Failed to assign some data!", exc_info=True)
+        return 1
+
+
+if __name__ == "__main__":
+    @click.command()
+    @click.argument("authdbpath") # "Path to the Auth(entic|oris)ation database"
+    @click.argument("mysqldburi") # "URI to the MySQL database with the biology data"
+    @click.option("--loglevel",
+                  default="WARNING",
+                  show_default=True,
+                  type=click.Choice([
+                      "CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]))
+    def run(authdbpath, mysqldburi, loglevel):
+        """Script entry point."""
+        _logger = logging.getLogger()
+        _logger.setLevel(loglevel)
+        if Path(authdbpath).exists():
+            with (authdb.connection(authdbpath) as authconn,
+                  biodb.database_connection(mysqldburi) as bioconn):
+                return resources_group(authconn).maybe(
+                    1,
+                    lambda group: assign_data(authconn, bioconn, group))
+
+        logging.error("There is no such SQLite3 database file.")
+        return 1
+
+    sys.exit(run()) # pylint: disable=[no-value-for-parameter]
diff --git a/scripts/link_inbredsets.py b/scripts/link_inbredsets.py
index ac9fa2b..c78a050 100644
--- a/scripts/link_inbredsets.py
+++ b/scripts/link_inbredsets.py
@@ -6,12 +6,12 @@ import uuid
 from pathlib import Path
 
 import click
+from gn_libs import mysqldb as biodb
 
 import gn_auth.auth.db.sqlite3 as authdb
 
-from gn_auth.auth.db import mariadb as biodb
-
-from scripts.migrate_existing_data import sys_admins, admin_group, select_sys_admin
+from scripts.assign_data_to_default_admin import (
+    sys_admins, admin_group, select_sys_admin)
 
 def linked_inbredsets(conn):
     """Fetch all inbredset groups that are linked to the auth system."""
diff --git a/scripts/register_sys_admin.py b/scripts/register_sys_admin.py
index dfd4d59..06aa845 100644
--- a/scripts/register_sys_admin.py
+++ b/scripts/register_sys_admin.py
@@ -16,7 +16,7 @@ def fetch_email() -> str:
         try:
             user_input = input("Enter the administrator's email: ")
             email = validate_email(user_input.strip(), check_deliverability=True)
-            return email["email"]
+            return email["email"]  # type: ignore
         except EmailNotValidError as _enve:
             print("You did not provide a valid email address. Try again...",
                   file=sys.stderr)
diff --git a/scripts/search_phenotypes.py b/scripts/search_phenotypes.py
index 20d91c9..eee112d 100644
--- a/scripts/search_phenotypes.py
+++ b/scripts/search_phenotypes.py
@@ -11,9 +11,9 @@ from datetime import datetime, timedelta
 import click
 import redis
 import requests
+from gn_libs import mysqldb as gn3db
 
 from gn_auth import jobs
-from gn_auth.auth.db import mariadb as gn3db
 from gn_auth.auth.db import sqlite3 as authdb
 from gn_auth.settings import SQL_URI, AUTH_DB
 from gn_auth.auth.authorisation.data.phenotypes import linked_phenotype_data
@@ -26,7 +26,7 @@ def do_search(
     """Do the search and return the results"""
     search_uri = urljoin(host, (f"search/?page={page}&per_page={per_page}"
                                 f"&type=phenotype&query={query}"))
-    response = requests.get(search_uri)
+    response = requests.get(search_uri, timeout=300)
     results = response.json()
     if len(results) > 0:
         return (item for item in results)
@@ -52,7 +52,7 @@ def update_search_results(redisconn: redis.Redis, redisname: str,
                           results: tuple[dict[str, Any], ...]):
     """Save the results to redis db."""
     key = "search_results"
-    prev_results = tuple(json.loads(redisconn.hget(redisname, key) or "[]"))
+    prev_results = tuple(json.loads(redisconn.hget(redisname, key) or "[]"))  # type: ignore
     redisconn.hset(redisname, key, json.dumps(prev_results + results))
 
 def expire_redis_results(redisconn: redis.Redis, redisname: str):
@@ -75,7 +75,7 @@ def expire_redis_results(redisconn: redis.Redis, redisname: str):
 @click.option(
     "--redis-uri", default="redis://:@localhost:6379/0",
     help="The URI to the redis server.")
-def search(# pylint: disable=[too-many-arguments, too-many-locals]
+def search(# pylint: disable=[too-many-arguments, too-many-positional-arguments, too-many-locals]
         species: str, query: str, job_id: uuid.UUID, host: str, per_page: int,
         selected: str, auth_db_uri: str, gn3_db_uri: str, redis_uri: str):
     """