diff options
Diffstat (limited to 'gn3')
| -rw-r--r-- | gn3/db/case_attributes.py | 188 |
1 files changed, 180 insertions, 8 deletions
diff --git a/gn3/db/case_attributes.py b/gn3/db/case_attributes.py index 5eddf98..c164b06 100644 --- a/gn3/db/case_attributes.py +++ b/gn3/db/case_attributes.py @@ -26,11 +26,12 @@ class CaseAttributeEdit: """Represents an edit operation for case attributes in the database. Attributes: - inbredset_id (int): The ID of the inbred set associated with + - inbredset_id (int): The ID of the inbred set associated with the edit. - user_id (str): The ID of the user performing the edit. - changes (dict): A dictionary containing the changes to be - applied to the case attributes. + - status: (EditStatus): The status of this edit. + - user_id (str): The ID of the user performing the edit. + - changes (dict): A dictionary containing the changes to be + applied to the case attributes. """ inbredset_id: int @@ -81,6 +82,40 @@ def queue_edit(cursor, directory: Path, edit: CaseAttributeEdit) -> Optional[int def __fetch_case_attrs_changes__(cursor, change_ids: tuple) -> list: + """Fetches case attribute change records from the audit table for + given change IDs. + + Retrieves records from the `caseattributes_audit` table for the + specified `change_ids`, including the editor, JSON diff data, and + timestamp. The JSON diff data is deserialized into a Python + dictionary for each record. Results are ordered by timestamp in + descending order (most recent first). + + Args: + cursor: A MySQLdb cursor for executing SQL queries. + change_ids (tuple): A tuple of integers representing the IDs + of changes to fetch. + + Returns: + list: A list of dictionaries, each containing the `editor`, + `json_diff_data` (as a deserialized dictionary), and `time_stamp` + for the matching change IDs. Returns an empty list if no records + are found. + + Notes: + - The function assumes `change_ids` is a non-empty tuple of valid integers. + - The `json_diff_data` column in `caseattributes_audit` is expected to contain valid + JSON strings, which are deserialized into dictionaries. + - The query uses parameterized placeholders to prevent SQL injection. + - This is an internal helper function (indicated by double underscores) used by + other functions like `get_changes`. + + Raises: + json.JSONDecodeError: If any `json_diff_data` value cannot be deserialized. + TypeError: If `change_ids` is empty or contains non-integer values, potentially + causing a database error. + + """ placeholders = ','.join(['%s'] * len(change_ids)) cursor.execute( "SELECT editor, json_diff_data, time_stamp " @@ -95,6 +130,38 @@ def __fetch_case_attrs_changes__(cursor, change_ids: tuple) -> list: def view_change(cursor, change_id: int) -> dict: + """Queries the `caseattributes_audit` table to fetch the + `json_diff_data` column for the given `change_id`. The JSON data + is deserialized into a Python dictionary and returned. If no + record is found or the `json_diff_data` is None, an empty + dictionary is returned. + + Args: + cursor: A MySQLdb cursor for executing SQL queries. + change_id (int): The ID of the change to retrieve from the + `caseattributes_audit` table. + + Returns: + dict: The deserialized JSON diff data as a dictionary if the + record exists and contains valid JSON; otherwise, an empty dictionary. + + Notes: + - The function assumes `change_id` is a valid integer + corresponding to a record in `caseattributes_audit`. + - The `json_diff_data` column is expected to contain a valid + JSON string or None. + - Uses parameterized queries to prevent SQL injection. + - The second column returned by `fetchone()` is + ignored (denoted by `_`). + + Raises: + json.JSONDecodeError: If the `json_diff_data` cannot be + deserialized due to invalid JSON. + TypeError: If `cursor.fetchone()` returns None (e.g., no + record found) and `json_diff_data` is accessed, though the + function handles this by returning an empty dictionary. + + """ cursor.execute( "SELECT json_diff_data " "FROM caseattributes_audit " @@ -109,6 +176,53 @@ def view_change(cursor, change_id: int) -> dict: def get_changes(cursor, inbredset_id: int, directory: Path) -> dict: + """Retrieves case attribute changes for a given inbred set, + categorized by review status. + + Fetches change IDs from an LMDB database for the specified + `inbredset_id`, categorized as review, approved, or + rejected. Queries the `caseattributes_audit` table to retrieve + details for these changes using the internal + `__fetch_case_attrs_changes__` function. Returns a dictionary + with change details grouped by status. + + Args: + - cursor: A MySQLdb cursor for executing SQL queries. + - inbredset_id (int): The ID of the inbred set to filter + changes. + - directory (Path): The base directory path for the LMDB + database. + + Returns: + dict: A dictionary with three keys: 'reviews', 'approvals', + and 'rejections'. Each key maps to a dictionary where keys + are change IDs (int) and values are change details + (dictionaries with 'editor', 'json_diff_data', and + 'time_stamp'). Empty dictionaries are returned for + categories with no changes. + + Notes: + - Creates an LMDB environment in a subdirectory + `case-attributes/{inbredset_id}` if it doesn't exist, with a + map size of 8 MB. + - Uses LMDB to store sets of change IDs under keys 'review', + 'approved', and 'rejected'. + - The `__fetch_case_attrs_changes__` function is called for + each non-empty set of change IDs to retrieve details from + the `caseattributes_audit` table. + - If no change IDs exist for a category, an empty dictionary + is returned for that category. + - Assumes `inbredset_id` is a valid integer and the LMDB + database is properly formatted. + + Raises: + json.JSONDecodeError: If any `json_diff_data` in the audit + table cannot be deserialized by + `__fetch_case_attrs_changes__`. + TypeError: If `inbredset_id` is not an integer or if LMDB data + cannot be deserialized. + + """ directory = f"{directory}/case-attributes/{inbredset_id}" if not os.path.exists(directory): os.makedirs(directory) @@ -138,7 +252,65 @@ def get_changes(cursor, inbredset_id: int, directory: Path) -> dict: } +# pylint: disable[too-many-locals] def apply_change(cursor, change_type: EditStatus, change_id: int, directory: Path) -> bool: + """Applies or rejects a case attribute change and updates its status in the audit table and LMDB. + + Processes a change identified by `change_id` based on the + specified `change_type` (approved or rejected). For approved + changes, applies modifications to the `CaseAttributeXRefNew` table + using bulk inserts and updates the audit status. For rejected + changes, updates the audit status only. Manages change IDs in + LMDB by moving them from the 'review' set to either 'approved' or + 'rejected' sets. Returns False if the `change_id` is not in the + review set. + + Args: + cursor: A MySQLdb cursor for executing SQL queries. + change_type (EditStatus): The action to perform, either + `EditStatus.approved` or `EditStatus.rejected`. + change_id (int): The ID of the change to process, + corresponding to a record in `caseattributes_audit`. + directory (Path): The base directory path for the LMDB + database. + + Returns: + bool: True if the change was successfully applied or rejected, + False if `change_id` is not found in the LMDB 'review' + set. + + Notes: + - Opens an LMDB environment in the specified `directory` with + a map size of 8 MB. + - For `EditStatus.approved`, fetches `json_diff_data` from + `caseattributes_audit`, extracts modifications, and performs + bulk inserts into `CaseAttributeXRefNew` with `ON DUPLICATE + KEY UPDATE`. + - For `EditStatus.rejected`, updates the + `caseattributes_audit` status without modifying case + attributes. + - Uses bulk `SELECT` queries to fetch `StrainId` and + `CaseAttributeId` values efficiently. + - Assumes `CaseAttributeXRefNew` has a unique key on + `(InbredSetId, StrainId, CaseAttributeId)` for `ON DUPLICATE + KEY UPDATE`. + - The `json_diff_data` is expected to contain valid JSON with + an `inbredset_id` and `Modifications.Current` structure. + - The second column from `fetchone()` is ignored (denoted by + `_`). + + Raises: + ValueError: If `change_type` is neither `EditStatus.approved` + nor `EditStatus.rejected`. + json.JSONDecodeError: If `json_diff_data` cannot be + deserialized for approved changes. + TypeError: If `cursor.fetchone()` returns None for + `json_diff_data` or if `strain_id` or `caseattr_id` are + missing during bulk insert preparation. + pickle.UnpicklingError: If LMDB data (e.g., 'review' or + 'approved' sets) cannot be deserialized. + + """ review_ids, approved_ids, rejected_ids = set(), set(), set() env = lmdb.open(directory, map_size=8_000_000) # 1 MB with env.begin(write=True) as txn: @@ -226,10 +398,10 @@ def apply_change(cursor, change_type: EditStatus, change_id: int, directory: Pat (str(change_type), change_id)) if approvals := txn.get(b"approved"): approved_ids = pickle.loads(approvals) - review_ids.discard(change_id) - approved_ids.add(change_id) - txn.put(b"review", pickle.dumps(review_ids)) - txn.put(b"approvals", pickle.dumps(approved_ids)) + review_ids.discard(change_id) + approved_ids.add(change_id) + txn.put(b"review", pickle.dumps(review_ids)) + txn.put(b"approvals", pickle.dumps(approved_ids)) return True case _: raise ValueError |
