aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/networkx/algorithms/community/modularity_max.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/networkx/algorithms/community/modularity_max.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/networkx/algorithms/community/modularity_max.py')
-rw-r--r--.venv/lib/python3.12/site-packages/networkx/algorithms/community/modularity_max.py451
1 files changed, 451 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/modularity_max.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/modularity_max.py
new file mode 100644
index 00000000..f465e01c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/modularity_max.py
@@ -0,0 +1,451 @@
+"""Functions for detecting communities based on modularity."""
+
+from collections import defaultdict
+
+import networkx as nx
+from networkx.algorithms.community.quality import modularity
+from networkx.utils import not_implemented_for
+from networkx.utils.mapped_queue import MappedQueue
+
+__all__ = [
+ "greedy_modularity_communities",
+ "naive_greedy_modularity_communities",
+]
+
+
+def _greedy_modularity_communities_generator(G, weight=None, resolution=1):
+ r"""Yield community partitions of G and the modularity change at each step.
+
+ This function performs Clauset-Newman-Moore greedy modularity maximization [2]_
+ At each step of the process it yields the change in modularity that will occur in
+ the next step followed by yielding the new community partition after that step.
+
+ Greedy modularity maximization begins with each node in its own community
+ and repeatedly joins the pair of communities that lead to the largest
+ modularity until one community contains all nodes (the partition has one set).
+
+ This function maximizes the generalized modularity, where `resolution`
+ is the resolution parameter, often expressed as $\gamma$.
+ See :func:`~networkx.algorithms.community.quality.modularity`.
+
+ Parameters
+ ----------
+ G : NetworkX graph
+
+ weight : string or None, optional (default=None)
+ The name of an edge attribute that holds the numerical value used
+ as a weight. If None, then each edge has weight 1.
+ The degree is the sum of the edge weights adjacent to the node.
+
+ resolution : float (default=1)
+ If resolution is less than 1, modularity favors larger communities.
+ Greater than 1 favors smaller communities.
+
+ Yields
+ ------
+ Alternating yield statements produce the following two objects:
+
+ communities: dict_values
+ A dict_values of frozensets of nodes, one for each community.
+ This represents a partition of the nodes of the graph into communities.
+ The first yield is the partition with each node in its own community.
+
+ dq: float
+ The change in modularity when merging the next two communities
+ that leads to the largest modularity.
+
+ See Also
+ --------
+ modularity
+
+ References
+ ----------
+ .. [1] Newman, M. E. J. "Networks: An Introduction", page 224
+ Oxford University Press 2011.
+ .. [2] Clauset, A., Newman, M. E., & Moore, C.
+ "Finding community structure in very large networks."
+ Physical Review E 70(6), 2004.
+ .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community
+ Detection" Phys. Rev. E74, 2006.
+ .. [4] Newman, M. E. J."Analysis of weighted networks"
+ Physical Review E 70(5 Pt 2):056131, 2004.
+ """
+ directed = G.is_directed()
+ N = G.number_of_nodes()
+
+ # Count edges (or the sum of edge-weights for weighted graphs)
+ m = G.size(weight)
+ q0 = 1 / m
+
+ # Calculate degrees (notation from the papers)
+ # a : the fraction of (weighted) out-degree for each node
+ # b : the fraction of (weighted) in-degree for each node
+ if directed:
+ a = {node: deg_out * q0 for node, deg_out in G.out_degree(weight=weight)}
+ b = {node: deg_in * q0 for node, deg_in in G.in_degree(weight=weight)}
+ else:
+ a = b = {node: deg * q0 * 0.5 for node, deg in G.degree(weight=weight)}
+
+ # this preliminary step collects the edge weights for each node pair
+ # It handles multigraph and digraph and works fine for graph.
+ dq_dict = defaultdict(lambda: defaultdict(float))
+ for u, v, wt in G.edges(data=weight, default=1):
+ if u == v:
+ continue
+ dq_dict[u][v] += wt
+ dq_dict[v][u] += wt
+
+ # now scale and subtract the expected edge-weights term
+ for u, nbrdict in dq_dict.items():
+ for v, wt in nbrdict.items():
+ dq_dict[u][v] = q0 * wt - resolution * (a[u] * b[v] + b[u] * a[v])
+
+ # Use -dq to get a max_heap instead of a min_heap
+ # dq_heap holds a heap for each node's neighbors
+ dq_heap = {u: MappedQueue({(u, v): -dq for v, dq in dq_dict[u].items()}) for u in G}
+ # H -> all_dq_heap holds a heap with the best items for each node
+ H = MappedQueue([dq_heap[n].heap[0] for n in G if len(dq_heap[n]) > 0])
+
+ # Initialize single-node communities
+ communities = {n: frozenset([n]) for n in G}
+ yield communities.values()
+
+ # Merge the two communities that lead to the largest modularity
+ while len(H) > 1:
+ # Find best merge
+ # Remove from heap of row maxes
+ # Ties will be broken by choosing the pair with lowest min community id
+ try:
+ negdq, u, v = H.pop()
+ except IndexError:
+ break
+ dq = -negdq
+ yield dq
+ # Remove best merge from row u heap
+ dq_heap[u].pop()
+ # Push new row max onto H
+ if len(dq_heap[u]) > 0:
+ H.push(dq_heap[u].heap[0])
+ # If this element was also at the root of row v, we need to remove the
+ # duplicate entry from H
+ if dq_heap[v].heap[0] == (v, u):
+ H.remove((v, u))
+ # Remove best merge from row v heap
+ dq_heap[v].remove((v, u))
+ # Push new row max onto H
+ if len(dq_heap[v]) > 0:
+ H.push(dq_heap[v].heap[0])
+ else:
+ # Duplicate wasn't in H, just remove from row v heap
+ dq_heap[v].remove((v, u))
+
+ # Perform merge
+ communities[v] = frozenset(communities[u] | communities[v])
+ del communities[u]
+
+ # Get neighbor communities connected to the merged communities
+ u_nbrs = set(dq_dict[u])
+ v_nbrs = set(dq_dict[v])
+ all_nbrs = (u_nbrs | v_nbrs) - {u, v}
+ both_nbrs = u_nbrs & v_nbrs
+ # Update dq for merge of u into v
+ for w in all_nbrs:
+ # Calculate new dq value
+ if w in both_nbrs:
+ dq_vw = dq_dict[v][w] + dq_dict[u][w]
+ elif w in v_nbrs:
+ dq_vw = dq_dict[v][w] - resolution * (a[u] * b[w] + a[w] * b[u])
+ else: # w in u_nbrs
+ dq_vw = dq_dict[u][w] - resolution * (a[v] * b[w] + a[w] * b[v])
+ # Update rows v and w
+ for row, col in [(v, w), (w, v)]:
+ dq_heap_row = dq_heap[row]
+ # Update dict for v,w only (u is removed below)
+ dq_dict[row][col] = dq_vw
+ # Save old max of per-row heap
+ if len(dq_heap_row) > 0:
+ d_oldmax = dq_heap_row.heap[0]
+ else:
+ d_oldmax = None
+ # Add/update heaps
+ d = (row, col)
+ d_negdq = -dq_vw
+ # Save old value for finding heap index
+ if w in v_nbrs:
+ # Update existing element in per-row heap
+ dq_heap_row.update(d, d, priority=d_negdq)
+ else:
+ # We're creating a new nonzero element, add to heap
+ dq_heap_row.push(d, priority=d_negdq)
+ # Update heap of row maxes if necessary
+ if d_oldmax is None:
+ # No entries previously in this row, push new max
+ H.push(d, priority=d_negdq)
+ else:
+ # We've updated an entry in this row, has the max changed?
+ row_max = dq_heap_row.heap[0]
+ if d_oldmax != row_max or d_oldmax.priority != row_max.priority:
+ H.update(d_oldmax, row_max)
+
+ # Remove row/col u from dq_dict matrix
+ for w in dq_dict[u]:
+ # Remove from dict
+ dq_old = dq_dict[w][u]
+ del dq_dict[w][u]
+ # Remove from heaps if we haven't already
+ if w != v:
+ # Remove both row and column
+ for row, col in [(w, u), (u, w)]:
+ dq_heap_row = dq_heap[row]
+ # Check if replaced dq is row max
+ d_old = (row, col)
+ if dq_heap_row.heap[0] == d_old:
+ # Update per-row heap and heap of row maxes
+ dq_heap_row.remove(d_old)
+ H.remove(d_old)
+ # Update row max
+ if len(dq_heap_row) > 0:
+ H.push(dq_heap_row.heap[0])
+ else:
+ # Only update per-row heap
+ dq_heap_row.remove(d_old)
+
+ del dq_dict[u]
+ # Mark row u as deleted, but keep placeholder
+ dq_heap[u] = MappedQueue()
+ # Merge u into v and update a
+ a[v] += a[u]
+ a[u] = 0
+ if directed:
+ b[v] += b[u]
+ b[u] = 0
+
+ yield communities.values()
+
+
+@nx._dispatchable(edge_attrs="weight")
+def greedy_modularity_communities(
+ G,
+ weight=None,
+ resolution=1,
+ cutoff=1,
+ best_n=None,
+):
+ r"""Find communities in G using greedy modularity maximization.
+
+ This function uses Clauset-Newman-Moore greedy modularity maximization [2]_
+ to find the community partition with the largest modularity.
+
+ Greedy modularity maximization begins with each node in its own community
+ and repeatedly joins the pair of communities that lead to the largest
+ modularity until no further increase in modularity is possible (a maximum).
+ Two keyword arguments adjust the stopping condition. `cutoff` is a lower
+ limit on the number of communities so you can stop the process before
+ reaching a maximum (used to save computation time). `best_n` is an upper
+ limit on the number of communities so you can make the process continue
+ until at most n communities remain even if the maximum modularity occurs
+ for more. To obtain exactly n communities, set both `cutoff` and `best_n` to n.
+
+ This function maximizes the generalized modularity, where `resolution`
+ is the resolution parameter, often expressed as $\gamma$.
+ See :func:`~networkx.algorithms.community.quality.modularity`.
+
+ Parameters
+ ----------
+ G : NetworkX graph
+
+ weight : string or None, optional (default=None)
+ The name of an edge attribute that holds the numerical value used
+ as a weight. If None, then each edge has weight 1.
+ The degree is the sum of the edge weights adjacent to the node.
+
+ resolution : float, optional (default=1)
+ If resolution is less than 1, modularity favors larger communities.
+ Greater than 1 favors smaller communities.
+
+ cutoff : int, optional (default=1)
+ A minimum number of communities below which the merging process stops.
+ The process stops at this number of communities even if modularity
+ is not maximized. The goal is to let the user stop the process early.
+ The process stops before the cutoff if it finds a maximum of modularity.
+
+ best_n : int or None, optional (default=None)
+ A maximum number of communities above which the merging process will
+ not stop. This forces community merging to continue after modularity
+ starts to decrease until `best_n` communities remain.
+ If ``None``, don't force it to continue beyond a maximum.
+
+ Raises
+ ------
+ ValueError : If the `cutoff` or `best_n` value is not in the range
+ ``[1, G.number_of_nodes()]``, or if `best_n` < `cutoff`.
+
+ Returns
+ -------
+ communities: list
+ A list of frozensets of nodes, one for each community.
+ Sorted by length with largest communities first.
+
+ Examples
+ --------
+ >>> G = nx.karate_club_graph()
+ >>> c = nx.community.greedy_modularity_communities(G)
+ >>> sorted(c[0])
+ [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]
+
+ See Also
+ --------
+ modularity
+
+ References
+ ----------
+ .. [1] Newman, M. E. J. "Networks: An Introduction", page 224
+ Oxford University Press 2011.
+ .. [2] Clauset, A., Newman, M. E., & Moore, C.
+ "Finding community structure in very large networks."
+ Physical Review E 70(6), 2004.
+ .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community
+ Detection" Phys. Rev. E74, 2006.
+ .. [4] Newman, M. E. J."Analysis of weighted networks"
+ Physical Review E 70(5 Pt 2):056131, 2004.
+ """
+ if not G.size():
+ return [{n} for n in G]
+
+ if (cutoff < 1) or (cutoff > G.number_of_nodes()):
+ raise ValueError(f"cutoff must be between 1 and {len(G)}. Got {cutoff}.")
+ if best_n is not None:
+ if (best_n < 1) or (best_n > G.number_of_nodes()):
+ raise ValueError(f"best_n must be between 1 and {len(G)}. Got {best_n}.")
+ if best_n < cutoff:
+ raise ValueError(f"Must have best_n >= cutoff. Got {best_n} < {cutoff}")
+ if best_n == 1:
+ return [set(G)]
+ else:
+ best_n = G.number_of_nodes()
+
+ # retrieve generator object to construct output
+ community_gen = _greedy_modularity_communities_generator(
+ G, weight=weight, resolution=resolution
+ )
+
+ # construct the first best community
+ communities = next(community_gen)
+
+ # continue merging communities until one of the breaking criteria is satisfied
+ while len(communities) > cutoff:
+ try:
+ dq = next(community_gen)
+ # StopIteration occurs when communities are the connected components
+ except StopIteration:
+ communities = sorted(communities, key=len, reverse=True)
+ # if best_n requires more merging, merge big sets for highest modularity
+ while len(communities) > best_n:
+ comm1, comm2, *rest = communities
+ communities = [comm1 ^ comm2]
+ communities.extend(rest)
+ return communities
+
+ # keep going unless max_mod is reached or best_n says to merge more
+ if dq < 0 and len(communities) <= best_n:
+ break
+ communities = next(community_gen)
+
+ return sorted(communities, key=len, reverse=True)
+
+
+@not_implemented_for("directed")
+@not_implemented_for("multigraph")
+@nx._dispatchable(edge_attrs="weight")
+def naive_greedy_modularity_communities(G, resolution=1, weight=None):
+ r"""Find communities in G using greedy modularity maximization.
+
+ This implementation is O(n^4), much slower than alternatives, but it is
+ provided as an easy-to-understand reference implementation.
+
+ Greedy modularity maximization begins with each node in its own community
+ and joins the pair of communities that most increases modularity until no
+ such pair exists.
+
+ This function maximizes the generalized modularity, where `resolution`
+ is the resolution parameter, often expressed as $\gamma$.
+ See :func:`~networkx.algorithms.community.quality.modularity`.
+
+ Parameters
+ ----------
+ G : NetworkX graph
+ Graph must be simple and undirected.
+
+ resolution : float (default=1)
+ If resolution is less than 1, modularity favors larger communities.
+ Greater than 1 favors smaller communities.
+
+ weight : string or None, optional (default=None)
+ The name of an edge attribute that holds the numerical value used
+ as a weight. If None, then each edge has weight 1.
+ The degree is the sum of the edge weights adjacent to the node.
+
+ Returns
+ -------
+ list
+ A list of sets of nodes, one for each community.
+ Sorted by length with largest communities first.
+
+ Examples
+ --------
+ >>> G = nx.karate_club_graph()
+ >>> c = nx.community.naive_greedy_modularity_communities(G)
+ >>> sorted(c[0])
+ [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]
+
+ See Also
+ --------
+ greedy_modularity_communities
+ modularity
+ """
+ # First create one community for each node
+ communities = [frozenset([u]) for u in G.nodes()]
+ # Track merges
+ merges = []
+ # Greedily merge communities until no improvement is possible
+ old_modularity = None
+ new_modularity = modularity(G, communities, resolution=resolution, weight=weight)
+ while old_modularity is None or new_modularity > old_modularity:
+ # Save modularity for comparison
+ old_modularity = new_modularity
+ # Find best pair to merge
+ trial_communities = list(communities)
+ to_merge = None
+ for i, u in enumerate(communities):
+ for j, v in enumerate(communities):
+ # Skip i==j and empty communities
+ if j <= i or len(u) == 0 or len(v) == 0:
+ continue
+ # Merge communities u and v
+ trial_communities[j] = u | v
+ trial_communities[i] = frozenset([])
+ trial_modularity = modularity(
+ G, trial_communities, resolution=resolution, weight=weight
+ )
+ if trial_modularity >= new_modularity:
+ # Check if strictly better or tie
+ if trial_modularity > new_modularity:
+ # Found new best, save modularity and group indexes
+ new_modularity = trial_modularity
+ to_merge = (i, j, new_modularity - old_modularity)
+ elif to_merge and min(i, j) < min(to_merge[0], to_merge[1]):
+ # Break ties by choosing pair with lowest min id
+ new_modularity = trial_modularity
+ to_merge = (i, j, new_modularity - old_modularity)
+ # Un-merge
+ trial_communities[i] = u
+ trial_communities[j] = v
+ if to_merge is not None:
+ # If the best merge improves modularity, use it
+ merges.append(to_merge)
+ i, j, dq = to_merge
+ u, v = communities[i], communities[j]
+ communities[j] = u | v
+ communities[i] = frozenset([])
+ # Remove empty communities and sort
+ return sorted((c for c in communities if len(c) > 0), key=len, reverse=True)