about summary refs log tree commit diff
path: root/gn3/db/case_attributes.py
diff options
context:
space:
mode:
Diffstat (limited to 'gn3/db/case_attributes.py')
-rw-r--r--gn3/db/case_attributes.py188
1 files changed, 180 insertions, 8 deletions
diff --git a/gn3/db/case_attributes.py b/gn3/db/case_attributes.py
index 5eddf98..c164b06 100644
--- a/gn3/db/case_attributes.py
+++ b/gn3/db/case_attributes.py
@@ -26,11 +26,12 @@ class CaseAttributeEdit:
     """Represents an edit operation for case attributes in the database.
 
     Attributes:
-        inbredset_id (int): The ID of the inbred set associated with
+        - inbredset_id (int): The ID of the inbred set associated with
         the edit.
-        user_id (str): The ID of the user performing the edit.
-        changes (dict): A dictionary containing the changes to be
-    applied to the case attributes.
+        - status: (EditStatus): The status of this edit.
+        - user_id (str): The ID of the user performing the edit.
+        - changes (dict): A dictionary containing the changes to be
+        applied to the case attributes.
 
     """
     inbredset_id: int
@@ -81,6 +82,40 @@ def queue_edit(cursor, directory: Path, edit: CaseAttributeEdit) -> Optional[int
 
 
 def __fetch_case_attrs_changes__(cursor, change_ids: tuple) -> list:
+    """Fetches case attribute change records from the audit table for
+    given change IDs.
+
+    Retrieves records from the `caseattributes_audit` table for the
+    specified `change_ids`, including the editor, JSON diff data, and
+    timestamp. The JSON diff data is deserialized into a Python
+    dictionary for each record. Results are ordered by timestamp in
+    descending order (most recent first).
+
+    Args:
+        cursor: A MySQLdb cursor for executing SQL queries.
+        change_ids (tuple): A tuple of integers representing the IDs
+        of changes to fetch.
+
+    Returns:
+        list: A list of dictionaries, each containing the `editor`,
+            `json_diff_data` (as a deserialized dictionary), and `time_stamp`
+            for the matching change IDs. Returns an empty list if no records
+            are found.
+
+    Notes:
+        - The function assumes `change_ids` is a non-empty tuple of valid integers.
+        - The `json_diff_data` column in `caseattributes_audit` is expected to contain valid
+          JSON strings, which are deserialized into dictionaries.
+        - The query uses parameterized placeholders to prevent SQL injection.
+        - This is an internal helper function (indicated by double underscores) used by
+          other functions like `get_changes`.
+
+    Raises:
+        json.JSONDecodeError: If any `json_diff_data` value cannot be deserialized.
+        TypeError: If `change_ids` is empty or contains non-integer values, potentially
+            causing a database error.
+
+    """
     placeholders = ','.join(['%s'] * len(change_ids))
     cursor.execute(
         "SELECT editor, json_diff_data, time_stamp "
@@ -95,6 +130,38 @@ def __fetch_case_attrs_changes__(cursor, change_ids: tuple) -> list:
 
 
 def view_change(cursor, change_id: int) -> dict:
+    """Queries the `caseattributes_audit` table to fetch the
+    `json_diff_data` column for the given `change_id`. The JSON data
+    is deserialized into a Python dictionary and returned.  If no
+    record is found or the `json_diff_data` is None, an empty
+    dictionary is returned.
+
+    Args:
+        cursor: A MySQLdb cursor for executing SQL queries.
+        change_id (int): The ID of the change to retrieve from the
+        `caseattributes_audit` table.
+
+    Returns:
+        dict: The deserialized JSON diff data as a dictionary if the
+              record exists and contains valid JSON; otherwise, an empty dictionary.
+
+    Notes:
+        - The function assumes `change_id` is a valid integer
+          corresponding to a record in `caseattributes_audit`.
+        - The `json_diff_data` column is expected to contain a valid
+          JSON string or None.
+        - Uses parameterized queries to prevent SQL injection.
+        - The second column returned by `fetchone()` is
+          ignored (denoted by `_`).
+
+    Raises:
+        json.JSONDecodeError: If the `json_diff_data` cannot be
+            deserialized due to invalid JSON.
+        TypeError: If `cursor.fetchone()` returns None (e.g., no
+            record found) and `json_diff_data` is accessed, though the
+            function handles this by returning an empty dictionary.
+
+    """
     cursor.execute(
         "SELECT json_diff_data "
         "FROM caseattributes_audit "
@@ -109,6 +176,53 @@ def view_change(cursor, change_id: int) -> dict:
 
 
 def get_changes(cursor, inbredset_id: int, directory: Path) -> dict:
+    """Retrieves case attribute changes for a given inbred set,
+    categorized by review status.
+
+    Fetches change IDs from an LMDB database for the specified
+    `inbredset_id`, categorized as review, approved, or
+    rejected. Queries the `caseattributes_audit` table to retrieve
+    details for these changes using the internal
+    `__fetch_case_attrs_changes__` function.  Returns a dictionary
+    with change details grouped by status.
+
+    Args:
+        - cursor: A MySQLdb cursor for executing SQL queries.
+        - inbredset_id (int): The ID of the inbred set to filter
+          changes.
+        - directory (Path): The base directory path for the LMDB
+          database.
+
+    Returns:
+        dict: A dictionary with three keys: 'reviews', 'approvals',
+            and 'rejections'. Each key maps to a dictionary where keys
+            are change IDs (int) and values are change details
+            (dictionaries with 'editor', 'json_diff_data', and
+            'time_stamp'). Empty dictionaries are returned for
+            categories with no changes.
+
+    Notes:
+        - Creates an LMDB environment in a subdirectory
+          `case-attributes/{inbredset_id}` if it doesn't exist, with a
+          map size of 8 MB.
+        - Uses LMDB to store sets of change IDs under keys 'review',
+          'approved', and 'rejected'.
+        - The `__fetch_case_attrs_changes__` function is called for
+          each non-empty set of change IDs to retrieve details from
+          the `caseattributes_audit` table.
+        - If no change IDs exist for a category, an empty dictionary
+          is returned for that category.
+        - Assumes `inbredset_id` is a valid integer and the LMDB
+          database is properly formatted.
+
+    Raises:
+        json.JSONDecodeError: If any `json_diff_data` in the audit
+            table cannot be deserialized by
+            `__fetch_case_attrs_changes__`.
+        TypeError: If `inbredset_id` is not an integer or if LMDB data
+            cannot be deserialized.
+
+    """
     directory = f"{directory}/case-attributes/{inbredset_id}"
     if not os.path.exists(directory):
         os.makedirs(directory)
@@ -138,7 +252,65 @@ def get_changes(cursor, inbredset_id: int, directory: Path) -> dict:
     }
 
 
+# pylint: disable[too-many-locals]
 def apply_change(cursor, change_type: EditStatus, change_id: int, directory: Path) -> bool:
+    """Applies or rejects a case attribute change and updates its status in the audit table and LMDB.
+
+    Processes a change identified by `change_id` based on the
+    specified `change_type` (approved or rejected). For approved
+    changes, applies modifications to the `CaseAttributeXRefNew` table
+    using bulk inserts and updates the audit status. For rejected
+    changes, updates the audit status only.  Manages change IDs in
+    LMDB by moving them from the 'review' set to either 'approved' or
+    'rejected' sets. Returns False if the `change_id` is not in the
+    review set.
+
+    Args:
+        cursor: A MySQLdb cursor for executing SQL queries.
+        change_type (EditStatus): The action to perform, either
+            `EditStatus.approved` or `EditStatus.rejected`.
+        change_id (int): The ID of the change to process,
+            corresponding to a record in `caseattributes_audit`.
+        directory (Path): The base directory path for the LMDB
+        database.
+
+    Returns:
+        bool: True if the change was successfully applied or rejected,
+            False if `change_id` is not found in the LMDB 'review'
+            set.
+
+    Notes:
+        - Opens an LMDB environment in the specified `directory` with
+          a map size of 8 MB.
+        - For `EditStatus.approved`, fetches `json_diff_data` from
+          `caseattributes_audit`, extracts modifications, and performs
+          bulk inserts into `CaseAttributeXRefNew` with `ON DUPLICATE
+          KEY UPDATE`.
+        - For `EditStatus.rejected`, updates the
+          `caseattributes_audit` status without modifying case
+          attributes.
+        - Uses bulk `SELECT` queries to fetch `StrainId` and
+          `CaseAttributeId` values efficiently.
+        - Assumes `CaseAttributeXRefNew` has a unique key on
+          `(InbredSetId, StrainId, CaseAttributeId)` for `ON DUPLICATE
+          KEY UPDATE`.
+        - The `json_diff_data` is expected to contain valid JSON with
+          an `inbredset_id` and `Modifications.Current` structure.
+        - The second column from `fetchone()` is ignored (denoted by
+          `_`).
+
+    Raises:
+        ValueError: If `change_type` is neither `EditStatus.approved`
+            nor `EditStatus.rejected`.
+        json.JSONDecodeError: If `json_diff_data` cannot be
+            deserialized for approved changes.
+        TypeError: If `cursor.fetchone()` returns None for
+            `json_diff_data` or if `strain_id` or `caseattr_id` are
+            missing during bulk insert preparation.
+        pickle.UnpicklingError: If LMDB data (e.g., 'review' or
+            'approved' sets) cannot be deserialized.
+
+    """
     review_ids, approved_ids, rejected_ids = set(), set(), set()
     env = lmdb.open(directory, map_size=8_000_000)  # 1 MB
     with env.begin(write=True) as txn:
@@ -226,10 +398,10 @@ def apply_change(cursor, change_type: EditStatus, change_id: int, directory: Pat
                     (str(change_type), change_id))
                 if approvals := txn.get(b"approved"):
                     approved_ids = pickle.loads(approvals)
-                review_ids.discard(change_id)
-                approved_ids.add(change_id)
-                txn.put(b"review", pickle.dumps(review_ids))
-                txn.put(b"approvals", pickle.dumps(approved_ids))
+                    review_ids.discard(change_id)
+                    approved_ids.add(change_id)
+                    txn.put(b"review", pickle.dumps(review_ids))
+                    txn.put(b"approvals", pickle.dumps(approved_ids))
                 return True
             case _:
                 raise ValueError