aboutsummaryrefslogtreecommitdiff
path: root/gn3
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2023-04-14 13:19:31 +0300
committerFrederick Muriuki Muriithi2023-04-14 14:14:20 +0300
commit2b7ecb52fa5f568d7d48fc324bae8231343543bd (patch)
tree4ccefd378ac5ea2d276985cf531e27690041d1ef /gn3
parent73863c8230df455dc433f8f10e2cee5edd8d8e58 (diff)
downloadgenenetwork3-2b7ecb52fa5f568d7d48fc324bae8231343543bd.tar.gz
auth: Add external script to search for phenotypes
We need a search through the available phenotype traits in the database when linking the traits to user groups. Unfortunately, the Xapian Search indexes do not (and should not) include the internal identifiers we use to disambiguate the traits. On the other hand, we do not want to present the user with traits that have already been linked to any user group within the search results. The script in this commit, together with the modified queries for fetching the phenotype data form a "hack" of sorts to wrap around the way the search works while ensuring we do not present the user with "non-actionable" (linked) traits in the search results.
Diffstat (limited to 'gn3')
-rw-r--r--gn3/auth/authorisation/data/phenotypes.py45
1 files changed, 38 insertions, 7 deletions
diff --git a/gn3/auth/authorisation/data/phenotypes.py b/gn3/auth/authorisation/data/phenotypes.py
index 0d018cf..3ba478a 100644
--- a/gn3/auth/authorisation/data/phenotypes.py
+++ b/gn3/auth/authorisation/data/phenotypes.py
@@ -1,14 +1,44 @@
"""Handle linking of Phenotype data to the Auth(entic|oris)ation system."""
+from typing import Any, Iterable
+
+from MySQLdb.cursors import DictCursor
+
import gn3.auth.db as authdb
import gn3.db_utils as gn3db
from gn3.auth.authorisation.checks import authorised_p
-def linked_phenotype_data(conn: authdb.DbConnection) -> tuple[dict, ...]:
+def linked_phenotype_data(
+ authconn: authdb.DbConnection, gn3conn: gn3db.Connection,
+ species: str = "") -> Iterable[dict[str, Any]]:
"""Retrieve phenotype data linked to user groups."""
- with authdb.cursor(conn) as cursor:
- cursor.execute("SELECT * FROM linked_phenotype_data")
- return tuple(dict(row) for row in cursor.fetchall())
- return tuple()
+ authkeys = ("SpeciesId", "InbredSetId", "PublishFreezeId", "PublishXRefId")
+ with (authdb.cursor(authconn) as authcursor,
+ gn3conn.cursor(DictCursor) as gn3cursor):
+ authcursor.execute("SELECT * FROM linked_phenotype_data")
+ linked = tuple(tuple(row[key] for key in authkeys)
+ for row in authcursor.fetchall())
+ paramstr = "".join(["(%s, %s, %s, %s)"] * len(linked))
+ query = (
+ "SELECT spc.SpeciesId, spc.SpeciesName, iset.InbredSetId, "
+ "iset.InbredSetName, pf.Id AS PublishFreezeId, "
+ "pf.Name AS dataset_name, pf.FullName AS dataset_fullname, "
+ "pf.ShortName AS dataset_shortname, pxr.Id AS PublishXRefId "
+ "FROM "
+ "Species AS spc "
+ "INNER JOIN InbredSet AS iset "
+ "ON spc.SpeciesId=iset.SpeciesId "
+ "INNER JOIN PublishFreeze AS pf "
+ "ON iset.InbredSetId=pf.InbredSetId "
+ "INNER JOIN PublishXRef AS pxr "
+ "ON pf.InbredSetId=pxr.InbredSetId") + (
+ " WHERE" if (len(linked) > 0 or bool(species)) else "") + (
+ (" (spc.SpeciesId, iset.InbredSetId, pf.Id, pxr.Id) "
+ f"NOT IN ({paramstr})") if len(linked) > 0 else "") + (
+ " AND"if len(linked) > 0 else "") + (
+ " spc.SpeciesName=%s" if bool(species) else "")
+ params = linked + ((species,) if bool(species) else tuple())
+ gn3cursor.execute(query, params)
+ return (item for item in gn3cursor.fetchall())
@authorised_p(("system:data:link-to-group",),
error_description=(
@@ -22,10 +52,11 @@ def ungrouped_phenotype_data(
params = tuple(
(row["SpeciesId"], row["InbredSetId"], row["PublishFreezeId"],
row["PublishXRefId"])
- for row in linked_phenotype_data(authconn))
+ for row in linked_phenotype_data(authconn, gn3conn))
paramstr = ", ".join(["(?, ?, ?, ?)"] * len(params))
query = (
- "SELECT spc.SpeciesId, iset.InbredSetId, pf.Id AS PublishFreezeId, "
+ "SELECT spc.SpeciesId, spc.SpeciesName, iset.InbredSetId, "
+ "iset.InbredSetName, pf.Id AS PublishFreezeId, "
"pf.Name AS dataset_name, pf.FullName AS dataset_fullname, "
"pf.ShortName AS dataset_shortname, pxr.Id AS PublishXRefId "
"FROM "