aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/search_phenotypes.py125
1 files changed, 0 insertions, 125 deletions
diff --git a/scripts/search_phenotypes.py b/scripts/search_phenotypes.py
deleted file mode 100644
index 38b992b..0000000
--- a/scripts/search_phenotypes.py
+++ /dev/null
@@ -1,125 +0,0 @@
-"""
-A script to do search for phenotype traits using the Xapian Search endpoint.
-"""
-import uuid
-import json
-import traceback
-from urllib.parse import urljoin
-from typing import Any, Iterable
-from datetime import datetime, timedelta
-
-import click
-import redis
-import requests
-
-from gn3 import jobs
-from gn3.auth import db as authdb
-from gn3 import db_utils as gn3db
-from gn3.settings import SQL_URI, AUTH_DB
-from gn3.auth.authorisation.data.phenotypes import linked_phenotype_data
-
-class NoSearchResults(Exception):
- """Raise when there are no results for a search."""
-
-def do_search(
- host: str, query: str, per_page: int, page: int = 1) -> Iterable[dict[str, Any]]:
- """Do the search and return the results"""
- search_uri = urljoin(host, (f"search/?page={page}&per_page={per_page}"
- f"&type=phenotype&query={query}"))
- response = requests.get(search_uri)
- results = response.json()
- if len(results) > 0:
- return (item for item in results)
- raise NoSearchResults(f"No results for search '{query}'")
-
-def __filter_object__(search_item):
- return (search_item["species"], search_item["group"],
- search_item["dataset"], search_item["name"])
-
-def remove_selected(search_results, selected: tuple):
- """Remove any item that the user has selected."""
- return (item for item in search_results if __filter_object__(item) not in selected)
-
-def remove_linked(search_results, linked: tuple):
- """Remove any item that has been already linked to a user group."""
- return (item for item in search_results if __filter_object__(item) not in linked)
-
-def update_status(redisconn: redis.Redis, redisname, status: str):
- """Update the status of the search."""
- redisconn.hset(redisname, "status", json.dumps(status))
-
-def update_search_results(redisconn: redis.Redis, redisname: str,
- results: tuple[dict[str, Any], ...]):
- """Save the results to redis db."""
- key = "search_results"
- prev_results = tuple(json.loads(redisconn.hget(redisname, key) or "[]"))
- redisconn.hset(redisname, key, json.dumps(prev_results + results))
-
-def expire_redis_results(redisconn: redis.Redis, redisname: str):
- """Expire the results after a while to ensure they are cleaned up."""
- redisconn.expireat(redisname, datetime.now() + timedelta(minutes=30))
-
-@click.command()
-@click.argument("species")
-@click.argument("query")
-@click.argument("job-id", type=click.UUID)
-@click.option(
- "--host", default="http://localhost:8080/api/", help="The URI to GN3.")
-@click.option("--per-page", default=10000, help="Number of results per page.")
-@click.option("--selected", default="[]", help="Selected traits.")
-@click.option(
- "--auth-db-uri", default=AUTH_DB, help="The SQL URI to the auth database.")
-@click.option(
- "--gn3-db-uri", default=SQL_URI,
- help="The SQL URI to the main GN3 database.")
-@click.option(
- "--redis-uri", default="redis://:@localhost:6379/0",
- help="The URI to the redis server.")
-def search(# pylint: disable=[too-many-arguments, too-many-locals]
- species: str, query: str, job_id: uuid.UUID, host: str, per_page: int,
- selected: str, auth_db_uri: str, gn3_db_uri: str, redis_uri: str):
- """
- Search for phenotype traits, filtering out any linked and selected traits,
- loading more and more pages until the `per_page` quota is fulfilled or the
- search runs out of pages.
- """
- redisname = jobs.job_key(job_id)
- with (authdb.connection(auth_db_uri) as authconn,
- gn3db.database_connection(gn3_db_uri) as gn3conn,
- redis.Redis.from_url(redis_uri, decode_responses=True) as redisconn):
- update_status(redisconn, redisname, "started")
- update_search_results(redisconn, redisname, tuple()) # init search results
- try:
- search_query = f"species:{species}" + (
- f" AND ({query})" if bool(query) else "")
- selected_traits = tuple(
- (item["species"], item["group"], item["dataset"], item["name"])
- for item in json.loads(selected))
- linked = tuple(
- (row["SpeciesName"], row["InbredSetName"], row["dataset_name"],
- str(row["PublishXRefId"]))
- for row in linked_phenotype_data(authconn, gn3conn, species))
- page = 1
- count = 0
- while count < per_page:
- results = tuple(remove_linked(
- remove_selected(
- do_search(host, search_query, per_page, page),
- selected_traits),
- linked))[0:per_page-count]
- count = count + len(results)
- page = page + 1
- update_search_results(redisconn, redisname, results)
- except NoSearchResults as _nsr:
- pass
- except Exception as _exc: # pylint: disable=[broad-except]
- update_status(redisconn, redisname, "failed")
- redisconn.hset(redisname, "exception", json.dumps(traceback.format_exc()))
- expire_redis_results(redisconn, redisname)
- return 1
- update_status(redisconn, redisname, "completed")
- expire_redis_results(redisconn, redisname)
- return 0
-
-if __name__ == "__main__":
- search() # pylint: disable=[no-value-for-parameter]