aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/deepdiff/summarize.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/deepdiff/summarize.py')
-rw-r--r--.venv/lib/python3.12/site-packages/deepdiff/summarize.py144
1 files changed, 144 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/deepdiff/summarize.py b/.venv/lib/python3.12/site-packages/deepdiff/summarize.py
new file mode 100644
index 00000000..f911b84c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/deepdiff/summarize.py
@@ -0,0 +1,144 @@
+from typing import Tuple
+from deepdiff.helper import JSON, SummaryNodeType
+from deepdiff.serialization import json_dumps
+
+
+def _truncate(s: str, max_len: int) -> str:
+ """
+ Truncate string s to max_len characters.
+ If possible, keep the first (max_len-5) characters, then '...' then the last 2 characters.
+ """
+ if len(s) <= max_len:
+ return s
+ if max_len <= 5:
+ return s[:max_len]
+ return s[:max_len - 5] + "..." + s[-2:]
+# Re-defining the functions due to environment reset
+
+
+# Function to calculate node weights recursively
+def calculate_weights(node):
+ if isinstance(node, dict):
+ weight = 0
+ children_weights = {}
+ for k, v in node.items():
+ try:
+ edge_weight = len(k)
+ except TypeError:
+ edge_weight = 1
+ child_weight, child_structure = calculate_weights(v)
+ total_weight = edge_weight + child_weight
+ weight += total_weight
+ children_weights[k] = (edge_weight, child_weight, child_structure)
+ return weight, (SummaryNodeType.dict, children_weights)
+
+ elif isinstance(node, list):
+ weight = 0
+ children_weights = []
+ for v in node:
+ edge_weight = 0 # Index weights are zero
+ child_weight, child_structure = calculate_weights(v)
+ total_weight = edge_weight + child_weight
+ weight += total_weight
+ children_weights.append((edge_weight, child_weight, child_structure))
+ return weight, (SummaryNodeType.list, children_weights)
+
+ else:
+ if isinstance(node, str):
+ node_weight = len(node)
+ elif isinstance(node, int):
+ node_weight = len(str(node))
+ elif isinstance(node, float):
+ node_weight = len(str(round(node, 2)))
+ elif node is None:
+ node_weight = 1
+ else:
+ node_weight = 0
+ return node_weight, (SummaryNodeType.leaf, node)
+
+# Include previously defined functions for shrinking with threshold
+# (Implementing directly the balanced summarization algorithm as above)
+
+# Balanced algorithm (simplified version):
+def shrink_tree_balanced(node_structure, max_weight: int, balance_threshold: float) -> Tuple[JSON, float]:
+ node_type, node_info = node_structure
+
+ if node_type is SummaryNodeType.leaf:
+ leaf_value = node_info
+ leaf_weight, _ = calculate_weights(leaf_value)
+ if leaf_weight <= max_weight:
+ return leaf_value, leaf_weight
+ else:
+ if isinstance(leaf_value, str):
+ truncated_value = _truncate(leaf_value, max_weight)
+ return truncated_value, len(truncated_value)
+ elif isinstance(leaf_value, (int, float)):
+ leaf_str = str(leaf_value)
+ truncated_str = leaf_str[:max_weight]
+ try:
+ return int(truncated_str), len(truncated_str)
+ except Exception:
+ try:
+ return float(truncated_str), len(truncated_str)
+ except Exception:
+ return truncated_str, len(truncated_str)
+ elif leaf_value is None:
+ return None, 1 if max_weight >= 1 else 0
+
+ elif node_type is SummaryNodeType.dict:
+ shrunk_dict = {}
+ total_weight = 0
+ sorted_children = sorted(node_info.items(), key=lambda x: x[1][0] + x[1][1], reverse=True)
+
+ for k, (edge_w, _, child_struct) in sorted_children:
+ allowed_branch_weight = min(max_weight * balance_threshold, max_weight - total_weight)
+ if allowed_branch_weight <= edge_w:
+ continue
+
+ remaining_weight = int(allowed_branch_weight - edge_w)
+ shrunk_child, shrunk_weight = shrink_tree_balanced(child_struct, remaining_weight, balance_threshold)
+ if shrunk_child is not None:
+ shrunk_dict[k[:edge_w]] = shrunk_child
+ total_weight += edge_w + shrunk_weight
+
+ if total_weight >= max_weight:
+ break
+ if not shrunk_dict:
+ return None, 0
+
+ return shrunk_dict, total_weight
+
+ elif node_type is SummaryNodeType.list:
+ shrunk_list = []
+ total_weight = 0
+ sorted_children = sorted(node_info, key=lambda x: x[0] + x[1], reverse=True)
+ for edge_w, _, child_struct in sorted_children:
+ allowed_branch_weight = int(min(max_weight * balance_threshold, max_weight - total_weight))
+ shrunk_child, shrunk_weight = shrink_tree_balanced(child_struct, allowed_branch_weight, balance_threshold)
+ if shrunk_child is not None:
+ shrunk_list.append(shrunk_child)
+ total_weight += shrunk_weight
+ if total_weight >= max_weight - 1:
+ shrunk_list.append("...")
+ break
+ if not shrunk_list:
+ return None, 0
+ return shrunk_list, total_weight
+ return None, 0
+
+
+def greedy_tree_summarization_balanced(json_data: JSON, max_weight: int, balance_threshold=0.6) -> JSON:
+ total_weight, tree_structure = calculate_weights(json_data)
+ if total_weight <= max_weight:
+ return json_data
+ shrunk_tree, _ = shrink_tree_balanced(tree_structure, max_weight, balance_threshold)
+ return shrunk_tree
+
+
+def summarize(data: JSON, max_length:int=200, balance_threshold:float=0.6) -> str:
+ try:
+ return json_dumps(
+ greedy_tree_summarization_balanced(data, max_length, balance_threshold)
+ )
+ except Exception:
+ return str(data)