diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/networkx/algorithms/community')
24 files changed, 4228 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/__init__.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/__init__.py new file mode 100644 index 00000000..4dfa8481 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/__init__.py @@ -0,0 +1,26 @@ +"""Functions for computing and measuring community structure. + +The ``community`` subpackage can be accessed by using :mod:`networkx.community`, then accessing the +functions as attributes of ``community``. For example:: + + >>> import networkx as nx + >>> G = nx.barbell_graph(5, 1) + >>> communities_generator = nx.community.girvan_newman(G) + >>> top_level_communities = next(communities_generator) + >>> next_level_communities = next(communities_generator) + >>> sorted(map(sorted, next_level_communities)) + [[0, 1, 2, 3, 4], [5], [6, 7, 8, 9, 10]] + +""" + +from networkx.algorithms.community.asyn_fluid import * +from networkx.algorithms.community.centrality import * +from networkx.algorithms.community.divisive import * +from networkx.algorithms.community.kclique import * +from networkx.algorithms.community.kernighan_lin import * +from networkx.algorithms.community.label_propagation import * +from networkx.algorithms.community.lukes import * +from networkx.algorithms.community.modularity_max import * +from networkx.algorithms.community.quality import * +from networkx.algorithms.community.community_utils import * +from networkx.algorithms.community.louvain import * diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/asyn_fluid.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/asyn_fluid.py new file mode 100644 index 00000000..fea72c1b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/asyn_fluid.py @@ -0,0 +1,151 @@ +"""Asynchronous Fluid Communities algorithm for community detection.""" + +from collections import Counter + +import networkx as nx +from networkx.algorithms.components import is_connected +from networkx.exception import NetworkXError +from networkx.utils import groups, not_implemented_for, py_random_state + +__all__ = ["asyn_fluidc"] + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@py_random_state(3) +@nx._dispatchable +def asyn_fluidc(G, k, max_iter=100, seed=None): + """Returns communities in `G` as detected by Fluid Communities algorithm. + + The asynchronous fluid communities algorithm is described in + [1]_. The algorithm is based on the simple idea of fluids interacting + in an environment, expanding and pushing each other. Its initialization is + random, so found communities may vary on different executions. + + The algorithm proceeds as follows. First each of the initial k communities + is initialized in a random vertex in the graph. Then the algorithm iterates + over all vertices in a random order, updating the community of each vertex + based on its own community and the communities of its neighbors. This + process is performed several times until convergence. + At all times, each community has a total density of 1, which is equally + distributed among the vertices it contains. If a vertex changes of + community, vertex densities of affected communities are adjusted + immediately. When a complete iteration over all vertices is done, such that + no vertex changes the community it belongs to, the algorithm has converged + and returns. + + This is the original version of the algorithm described in [1]_. + Unfortunately, it does not support weighted graphs yet. + + Parameters + ---------- + G : NetworkX graph + Graph must be simple and undirected. + + k : integer + The number of communities to be found. + + max_iter : integer + The number of maximum iterations allowed. By default 100. + + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness<randomness>`. + + Returns + ------- + communities : iterable + Iterable of communities given as sets of nodes. + + Notes + ----- + k variable is not an optional argument. + + References + ---------- + .. [1] Parés F., Garcia-Gasulla D. et al. "Fluid Communities: A + Competitive and Highly Scalable Community Detection Algorithm". + [https://arxiv.org/pdf/1703.09307.pdf]. + """ + # Initial checks + if not isinstance(k, int): + raise NetworkXError("k must be an integer.") + if not k > 0: + raise NetworkXError("k must be greater than 0.") + if not is_connected(G): + raise NetworkXError("Fluid Communities require connected Graphs.") + if len(G) < k: + raise NetworkXError("k cannot be bigger than the number of nodes.") + # Initialization + max_density = 1.0 + vertices = list(G) + seed.shuffle(vertices) + communities = {n: i for i, n in enumerate(vertices[:k])} + density = {} + com_to_numvertices = {} + for vertex in communities: + com_to_numvertices[communities[vertex]] = 1 + density[communities[vertex]] = max_density + # Set up control variables and start iterating + iter_count = 0 + cont = True + while cont: + cont = False + iter_count += 1 + # Loop over all vertices in graph in a random order + vertices = list(G) + seed.shuffle(vertices) + for vertex in vertices: + # Updating rule + com_counter = Counter() + # Take into account self vertex community + try: + com_counter.update({communities[vertex]: density[communities[vertex]]}) + except KeyError: + pass + # Gather neighbor vertex communities + for v in G[vertex]: + try: + com_counter.update({communities[v]: density[communities[v]]}) + except KeyError: + continue + # Check which is the community with highest density + new_com = -1 + if len(com_counter.keys()) > 0: + max_freq = max(com_counter.values()) + best_communities = [ + com + for com, freq in com_counter.items() + if (max_freq - freq) < 0.0001 + ] + # If actual vertex com in best communities, it is preserved + try: + if communities[vertex] in best_communities: + new_com = communities[vertex] + except KeyError: + pass + # If vertex community changes... + if new_com == -1: + # Set flag of non-convergence + cont = True + # Randomly chose a new community from candidates + new_com = seed.choice(best_communities) + # Update previous community status + try: + com_to_numvertices[communities[vertex]] -= 1 + density[communities[vertex]] = ( + max_density / com_to_numvertices[communities[vertex]] + ) + except KeyError: + pass + # Update new community status + communities[vertex] = new_com + com_to_numvertices[communities[vertex]] += 1 + density[communities[vertex]] = ( + max_density / com_to_numvertices[communities[vertex]] + ) + # If maximum iterations reached --> output actual results + if iter_count > max_iter: + break + # Return results by grouping communities as list of vertices + return iter(groups(communities).values()) diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/centrality.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/centrality.py new file mode 100644 index 00000000..43281701 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/centrality.py @@ -0,0 +1,171 @@ +"""Functions for computing communities based on centrality notions.""" + +import networkx as nx + +__all__ = ["girvan_newman"] + + +@nx._dispatchable(preserve_edge_attrs="most_valuable_edge") +def girvan_newman(G, most_valuable_edge=None): + """Finds communities in a graph using the Girvan–Newman method. + + Parameters + ---------- + G : NetworkX graph + + most_valuable_edge : function + Function that takes a graph as input and outputs an edge. The + edge returned by this function will be recomputed and removed at + each iteration of the algorithm. + + If not specified, the edge with the highest + :func:`networkx.edge_betweenness_centrality` will be used. + + Returns + ------- + iterator + Iterator over tuples of sets of nodes in `G`. Each set of node + is a community, each tuple is a sequence of communities at a + particular level of the algorithm. + + Examples + -------- + To get the first pair of communities:: + + >>> G = nx.path_graph(10) + >>> comp = nx.community.girvan_newman(G) + >>> tuple(sorted(c) for c in next(comp)) + ([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]) + + To get only the first *k* tuples of communities, use + :func:`itertools.islice`:: + + >>> import itertools + >>> G = nx.path_graph(8) + >>> k = 2 + >>> comp = nx.community.girvan_newman(G) + >>> for communities in itertools.islice(comp, k): + ... print(tuple(sorted(c) for c in communities)) + ... + ([0, 1, 2, 3], [4, 5, 6, 7]) + ([0, 1], [2, 3], [4, 5, 6, 7]) + + To stop getting tuples of communities once the number of communities + is greater than *k*, use :func:`itertools.takewhile`:: + + >>> import itertools + >>> G = nx.path_graph(8) + >>> k = 4 + >>> comp = nx.community.girvan_newman(G) + >>> limited = itertools.takewhile(lambda c: len(c) <= k, comp) + >>> for communities in limited: + ... print(tuple(sorted(c) for c in communities)) + ... + ([0, 1, 2, 3], [4, 5, 6, 7]) + ([0, 1], [2, 3], [4, 5, 6, 7]) + ([0, 1], [2, 3], [4, 5], [6, 7]) + + To just choose an edge to remove based on the weight:: + + >>> from operator import itemgetter + >>> G = nx.path_graph(10) + >>> edges = G.edges() + >>> nx.set_edge_attributes(G, {(u, v): v for u, v in edges}, "weight") + >>> def heaviest(G): + ... u, v, w = max(G.edges(data="weight"), key=itemgetter(2)) + ... return (u, v) + ... + >>> comp = nx.community.girvan_newman(G, most_valuable_edge=heaviest) + >>> tuple(sorted(c) for c in next(comp)) + ([0, 1, 2, 3, 4, 5, 6, 7, 8], [9]) + + To utilize edge weights when choosing an edge with, for example, the + highest betweenness centrality:: + + >>> from networkx import edge_betweenness_centrality as betweenness + >>> def most_central_edge(G): + ... centrality = betweenness(G, weight="weight") + ... return max(centrality, key=centrality.get) + ... + >>> G = nx.path_graph(10) + >>> comp = nx.community.girvan_newman(G, most_valuable_edge=most_central_edge) + >>> tuple(sorted(c) for c in next(comp)) + ([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]) + + To specify a different ranking algorithm for edges, use the + `most_valuable_edge` keyword argument:: + + >>> from networkx import edge_betweenness_centrality + >>> from random import random + >>> def most_central_edge(G): + ... centrality = edge_betweenness_centrality(G) + ... max_cent = max(centrality.values()) + ... # Scale the centrality values so they are between 0 and 1, + ... # and add some random noise. + ... centrality = {e: c / max_cent for e, c in centrality.items()} + ... # Add some random noise. + ... centrality = {e: c + random() for e, c in centrality.items()} + ... return max(centrality, key=centrality.get) + ... + >>> G = nx.path_graph(10) + >>> comp = nx.community.girvan_newman(G, most_valuable_edge=most_central_edge) + + Notes + ----- + The Girvan–Newman algorithm detects communities by progressively + removing edges from the original graph. The algorithm removes the + "most valuable" edge, traditionally the edge with the highest + betweenness centrality, at each step. As the graph breaks down into + pieces, the tightly knit community structure is exposed and the + result can be depicted as a dendrogram. + + """ + # If the graph is already empty, simply return its connected + # components. + if G.number_of_edges() == 0: + yield tuple(nx.connected_components(G)) + return + # If no function is provided for computing the most valuable edge, + # use the edge betweenness centrality. + if most_valuable_edge is None: + + def most_valuable_edge(G): + """Returns the edge with the highest betweenness centrality + in the graph `G`. + + """ + # We have guaranteed that the graph is non-empty, so this + # dictionary will never be empty. + betweenness = nx.edge_betweenness_centrality(G) + return max(betweenness, key=betweenness.get) + + # The copy of G here must include the edge weight data. + g = G.copy().to_undirected() + # Self-loops must be removed because their removal has no effect on + # the connected components of the graph. + g.remove_edges_from(nx.selfloop_edges(g)) + while g.number_of_edges() > 0: + yield _without_most_central_edges(g, most_valuable_edge) + + +def _without_most_central_edges(G, most_valuable_edge): + """Returns the connected components of the graph that results from + repeatedly removing the most "valuable" edge in the graph. + + `G` must be a non-empty graph. This function modifies the graph `G` + in-place; that is, it removes edges on the graph `G`. + + `most_valuable_edge` is a function that takes the graph `G` as input + (or a subgraph with one or more edges of `G` removed) and returns an + edge. That edge will be removed and this process will be repeated + until the number of connected components in the graph increases. + + """ + original_num_components = nx.number_connected_components(G) + num_new_components = original_num_components + while num_new_components <= original_num_components: + edge = most_valuable_edge(G) + G.remove_edge(*edge) + new_components = tuple(nx.connected_components(G)) + num_new_components = len(new_components) + return new_components diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/community_utils.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/community_utils.py new file mode 100644 index 00000000..ba73a6b3 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/community_utils.py @@ -0,0 +1,30 @@ +"""Helper functions for community-finding algorithms.""" + +import networkx as nx + +__all__ = ["is_partition"] + + +@nx._dispatchable +def is_partition(G, communities): + """Returns *True* if `communities` is a partition of the nodes of `G`. + + A partition of a universe set is a family of pairwise disjoint sets + whose union is the entire universe set. + + Parameters + ---------- + G : NetworkX graph. + + communities : list or iterable of sets of nodes + If not a list, the iterable is converted internally to a list. + If it is an iterator it is exhausted. + + """ + # Alternate implementation: + # return all(sum(1 if v in c else 0 for c in communities) == 1 for v in G) + if not isinstance(communities, list): + communities = list(communities) + nodes = {n for c in communities for n in c if n in G} + + return len(G) == len(nodes) == sum(len(c) for c in communities) diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/divisive.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/divisive.py new file mode 100644 index 00000000..be3c7d86 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/divisive.py @@ -0,0 +1,216 @@ +import functools + +import networkx as nx + +__all__ = [ + "edge_betweenness_partition", + "edge_current_flow_betweenness_partition", +] + + +@nx._dispatchable(edge_attrs="weight") +def edge_betweenness_partition(G, number_of_sets, *, weight=None): + """Partition created by iteratively removing the highest edge betweenness edge. + + This algorithm works by calculating the edge betweenness for all + edges and removing the edge with the highest value. It is then + determined whether the graph has been broken into at least + `number_of_sets` connected components. + If not the process is repeated. + + Parameters + ---------- + G : NetworkX Graph, DiGraph or MultiGraph + Graph to be partitioned + + number_of_sets : int + Number of sets in the desired partition of the graph + + weight : key, optional, default=None + The key to use if using weights for edge betweenness calculation + + Returns + ------- + C : list of sets + Partition of the nodes of G + + Raises + ------ + NetworkXError + If number_of_sets is <= 0 or if number_of_sets > len(G) + + Examples + -------- + >>> G = nx.karate_club_graph() + >>> part = nx.community.edge_betweenness_partition(G, 2) + >>> {0, 1, 3, 4, 5, 6, 7, 10, 11, 12, 13, 16, 17, 19, 21} in part + True + >>> { + ... 2, + ... 8, + ... 9, + ... 14, + ... 15, + ... 18, + ... 20, + ... 22, + ... 23, + ... 24, + ... 25, + ... 26, + ... 27, + ... 28, + ... 29, + ... 30, + ... 31, + ... 32, + ... 33, + ... } in part + True + + See Also + -------- + edge_current_flow_betweenness_partition + + Notes + ----- + This algorithm is fairly slow, as both the calculation of connected + components and edge betweenness relies on all pairs shortest + path algorithms. They could potentially be combined to cut down + on overall computation time. + + References + ---------- + .. [1] Santo Fortunato 'Community Detection in Graphs' Physical Reports + Volume 486, Issue 3-5 p. 75-174 + http://arxiv.org/abs/0906.0612 + """ + if number_of_sets <= 0: + raise nx.NetworkXError("number_of_sets must be >0") + if number_of_sets == 1: + return [set(G)] + if number_of_sets == len(G): + return [{n} for n in G] + if number_of_sets > len(G): + raise nx.NetworkXError("number_of_sets must be <= len(G)") + + H = G.copy() + partition = list(nx.connected_components(H)) + while len(partition) < number_of_sets: + ranking = nx.edge_betweenness_centrality(H, weight=weight) + edge = max(ranking, key=ranking.get) + H.remove_edge(*edge) + partition = list(nx.connected_components(H)) + return partition + + +@nx._dispatchable(edge_attrs="weight") +def edge_current_flow_betweenness_partition(G, number_of_sets, *, weight=None): + """Partition created by removing the highest edge current flow betweenness edge. + + This algorithm works by calculating the edge current flow + betweenness for all edges and removing the edge with the + highest value. It is then determined whether the graph has + been broken into at least `number_of_sets` connected + components. If not the process is repeated. + + Parameters + ---------- + G : NetworkX Graph, DiGraph or MultiGraph + Graph to be partitioned + + number_of_sets : int + Number of sets in the desired partition of the graph + + weight : key, optional (default=None) + The edge attribute key to use as weights for + edge current flow betweenness calculations + + Returns + ------- + C : list of sets + Partition of G + + Raises + ------ + NetworkXError + If number_of_sets is <= 0 or number_of_sets > len(G) + + Examples + -------- + >>> G = nx.karate_club_graph() + >>> part = nx.community.edge_current_flow_betweenness_partition(G, 2) + >>> {0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 16, 17, 19, 21} in part + True + >>> {8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33} in part + True + + + See Also + -------- + edge_betweenness_partition + + Notes + ----- + This algorithm is extremely slow, as the recalculation of the edge + current flow betweenness is extremely slow. + + References + ---------- + .. [1] Santo Fortunato 'Community Detection in Graphs' Physical Reports + Volume 486, Issue 3-5 p. 75-174 + http://arxiv.org/abs/0906.0612 + """ + if number_of_sets <= 0: + raise nx.NetworkXError("number_of_sets must be >0") + elif number_of_sets == 1: + return [set(G)] + elif number_of_sets == len(G): + return [{n} for n in G] + elif number_of_sets > len(G): + raise nx.NetworkXError("number_of_sets must be <= len(G)") + + rank = functools.partial( + nx.edge_current_flow_betweenness_centrality, normalized=False, weight=weight + ) + + # current flow requires a connected network so we track the components explicitly + H = G.copy() + partition = list(nx.connected_components(H)) + if len(partition) > 1: + Hcc_subgraphs = [H.subgraph(cc).copy() for cc in partition] + else: + Hcc_subgraphs = [H] + + ranking = {} + for Hcc in Hcc_subgraphs: + ranking.update(rank(Hcc)) + + while len(partition) < number_of_sets: + edge = max(ranking, key=ranking.get) + for cc, Hcc in zip(partition, Hcc_subgraphs): + if edge[0] in cc: + Hcc.remove_edge(*edge) + del ranking[edge] + splitcc_list = list(nx.connected_components(Hcc)) + if len(splitcc_list) > 1: + # there are 2 connected components. split off smaller one + cc_new = min(splitcc_list, key=len) + Hcc_new = Hcc.subgraph(cc_new).copy() + # update edge rankings for Hcc_new + newranks = rank(Hcc_new) + for e, r in newranks.items(): + ranking[e if e in ranking else e[::-1]] = r + # append new cc and Hcc to their lists. + partition.append(cc_new) + Hcc_subgraphs.append(Hcc_new) + + # leave existing cc and Hcc in their lists, but shrink them + Hcc.remove_nodes_from(cc_new) + cc.difference_update(cc_new) + # update edge rankings for Hcc whether it was split or not + newranks = rank(Hcc) + for e, r in newranks.items(): + ranking[e if e in ranking else e[::-1]] = r + break + return partition diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/kclique.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/kclique.py new file mode 100644 index 00000000..c7249104 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/kclique.py @@ -0,0 +1,79 @@ +from collections import defaultdict + +import networkx as nx + +__all__ = ["k_clique_communities"] + + +@nx._dispatchable +def k_clique_communities(G, k, cliques=None): + """Find k-clique communities in graph using the percolation method. + + A k-clique community is the union of all cliques of size k that + can be reached through adjacent (sharing k-1 nodes) k-cliques. + + Parameters + ---------- + G : NetworkX graph + + k : int + Size of smallest clique + + cliques: list or generator + Precomputed cliques (use networkx.find_cliques(G)) + + Returns + ------- + Yields sets of nodes, one for each k-clique community. + + Examples + -------- + >>> G = nx.complete_graph(5) + >>> K5 = nx.convert_node_labels_to_integers(G, first_label=2) + >>> G.add_edges_from(K5.edges()) + >>> c = list(nx.community.k_clique_communities(G, 4)) + >>> sorted(list(c[0])) + [0, 1, 2, 3, 4, 5, 6] + >>> list(nx.community.k_clique_communities(G, 6)) + [] + + References + ---------- + .. [1] Gergely Palla, Imre Derényi, Illés Farkas1, and Tamás Vicsek, + Uncovering the overlapping community structure of complex networks + in nature and society Nature 435, 814-818, 2005, + doi:10.1038/nature03607 + """ + if k < 2: + raise nx.NetworkXError(f"k={k}, k must be greater than 1.") + if cliques is None: + cliques = nx.find_cliques(G) + cliques = [frozenset(c) for c in cliques if len(c) >= k] + + # First index which nodes are in which cliques + membership_dict = defaultdict(list) + for clique in cliques: + for node in clique: + membership_dict[node].append(clique) + + # For each clique, see which adjacent cliques percolate + perc_graph = nx.Graph() + perc_graph.add_nodes_from(cliques) + for clique in cliques: + for adj_clique in _get_adjacent_cliques(clique, membership_dict): + if len(clique.intersection(adj_clique)) >= (k - 1): + perc_graph.add_edge(clique, adj_clique) + + # Connected components of clique graph with perc edges + # are the percolated cliques + for component in nx.connected_components(perc_graph): + yield (frozenset.union(*component)) + + +def _get_adjacent_cliques(clique, membership_dict): + adjacent_cliques = set() + for n in clique: + for adj_clique in membership_dict[n]: + if clique != adj_clique: + adjacent_cliques.add(adj_clique) + return adjacent_cliques diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/kernighan_lin.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/kernighan_lin.py new file mode 100644 index 00000000..f6397d82 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/kernighan_lin.py @@ -0,0 +1,139 @@ +"""Functions for computing the Kernighan–Lin bipartition algorithm.""" + +from itertools import count + +import networkx as nx +from networkx.algorithms.community.community_utils import is_partition +from networkx.utils import BinaryHeap, not_implemented_for, py_random_state + +__all__ = ["kernighan_lin_bisection"] + + +def _kernighan_lin_sweep(edges, side): + """ + This is a modified form of Kernighan-Lin, which moves single nodes at a + time, alternating between sides to keep the bisection balanced. We keep + two min-heaps of swap costs to make optimal-next-move selection fast. + """ + costs0, costs1 = costs = BinaryHeap(), BinaryHeap() + for u, side_u, edges_u in zip(count(), side, edges): + cost_u = sum(w if side[v] else -w for v, w in edges_u) + costs[side_u].insert(u, cost_u if side_u else -cost_u) + + def _update_costs(costs_x, x): + for y, w in edges[x]: + costs_y = costs[side[y]] + cost_y = costs_y.get(y) + if cost_y is not None: + cost_y += 2 * (-w if costs_x is costs_y else w) + costs_y.insert(y, cost_y, True) + + i = 0 + totcost = 0 + while costs0 and costs1: + u, cost_u = costs0.pop() + _update_costs(costs0, u) + v, cost_v = costs1.pop() + _update_costs(costs1, v) + totcost += cost_u + cost_v + i += 1 + yield totcost, i, (u, v) + + +@not_implemented_for("directed") +@py_random_state(4) +@nx._dispatchable(edge_attrs="weight") +def kernighan_lin_bisection(G, partition=None, max_iter=10, weight="weight", seed=None): + """Partition a graph into two blocks using the Kernighan–Lin + algorithm. + + This algorithm partitions a network into two sets by iteratively + swapping pairs of nodes to reduce the edge cut between the two sets. The + pairs are chosen according to a modified form of Kernighan-Lin [1]_, which + moves node individually, alternating between sides to keep the bisection + balanced. + + Parameters + ---------- + G : NetworkX graph + Graph must be undirected. + + partition : tuple + Pair of iterables containing an initial partition. If not + specified, a random balanced partition is used. + + max_iter : int + Maximum number of times to attempt swaps to find an + improvement before giving up. + + weight : key + Edge data key to use as weight. If None, the weights are all + set to one. + + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness<randomness>`. + Only used if partition is None + + Returns + ------- + partition : tuple + A pair of sets of nodes representing the bipartition. + + Raises + ------ + NetworkXError + If partition is not a valid partition of the nodes of the graph. + + References + ---------- + .. [1] Kernighan, B. W.; Lin, Shen (1970). + "An efficient heuristic procedure for partitioning graphs." + *Bell Systems Technical Journal* 49: 291--307. + Oxford University Press 2011. + + """ + n = len(G) + labels = list(G) + seed.shuffle(labels) + index = {v: i for i, v in enumerate(labels)} + + if partition is None: + side = [0] * (n // 2) + [1] * ((n + 1) // 2) + else: + try: + A, B = partition + except (TypeError, ValueError) as err: + raise nx.NetworkXError("partition must be two sets") from err + if not is_partition(G, (A, B)): + raise nx.NetworkXError("partition invalid") + side = [0] * n + for a in A: + side[index[a]] = 1 + + if G.is_multigraph(): + edges = [ + [ + (index[u], sum(e.get(weight, 1) for e in d.values())) + for u, d in G[v].items() + ] + for v in labels + ] + else: + edges = [ + [(index[u], e.get(weight, 1)) for u, e in G[v].items()] for v in labels + ] + + for i in range(max_iter): + costs = list(_kernighan_lin_sweep(edges, side)) + min_cost, min_i, _ = min(costs) + if min_cost >= 0: + break + + for _, _, (u, v) in costs[:min_i]: + side[u] = 1 + side[v] = 0 + + A = {u for u, s in zip(labels, side) if s == 0} + B = {u for u, s in zip(labels, side) if s == 1} + return A, B diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/label_propagation.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/label_propagation.py new file mode 100644 index 00000000..74880286 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/label_propagation.py @@ -0,0 +1,338 @@ +""" +Label propagation community detection algorithms. +""" + +from collections import Counter, defaultdict, deque + +import networkx as nx +from networkx.utils import groups, not_implemented_for, py_random_state + +__all__ = [ + "label_propagation_communities", + "asyn_lpa_communities", + "fast_label_propagation_communities", +] + + +@py_random_state("seed") +@nx._dispatchable(edge_attrs="weight") +def fast_label_propagation_communities(G, *, weight=None, seed=None): + """Returns communities in `G` as detected by fast label propagation. + + The fast label propagation algorithm is described in [1]_. The algorithm is + probabilistic and the found communities may vary in different executions. + + The algorithm operates as follows. First, the community label of each node is + set to a unique label. The algorithm then repeatedly updates the labels of + the nodes to the most frequent label in their neighborhood. In case of ties, + a random label is chosen from the most frequent labels. + + The algorithm maintains a queue of nodes that still need to be processed. + Initially, all nodes are added to the queue in a random order. Then the nodes + are removed from the queue one by one and processed. If a node updates its label, + all its neighbors that have a different label are added to the queue (if not + already in the queue). The algorithm stops when the queue is empty. + + Parameters + ---------- + G : Graph, DiGraph, MultiGraph, or MultiDiGraph + Any NetworkX graph. + + weight : string, or None (default) + The edge attribute representing a non-negative weight of an edge. If None, + each edge is assumed to have weight one. The weight of an edge is used in + determining the frequency with which a label appears among the neighbors of + a node (edge with weight `w` is equivalent to `w` unweighted edges). + + seed : integer, random_state, or None (default) + Indicator of random number generation state. See :ref:`Randomness<randomness>`. + + Returns + ------- + communities : iterable + Iterable of communities given as sets of nodes. + + Notes + ----- + Edge directions are ignored for directed graphs. + Edge weights must be non-negative numbers. + + References + ---------- + .. [1] Vincent A. Traag & Lovro Šubelj. "Large network community detection by + fast label propagation." Scientific Reports 13 (2023): 2701. + https://doi.org/10.1038/s41598-023-29610-z + """ + + # Queue of nodes to be processed. + nodes_queue = deque(G) + seed.shuffle(nodes_queue) + + # Set of nodes in the queue. + nodes_set = set(G) + + # Assign unique label to each node. + comms = {node: i for i, node in enumerate(G)} + + while nodes_queue: + # Remove next node from the queue to process. + node = nodes_queue.popleft() + nodes_set.remove(node) + + # Isolated nodes retain their initial label. + if G.degree(node) > 0: + # Compute frequency of labels in node's neighborhood. + label_freqs = _fast_label_count(G, comms, node, weight) + max_freq = max(label_freqs.values()) + + # Always sample new label from most frequent labels. + comm = seed.choice( + [comm for comm in label_freqs if label_freqs[comm] == max_freq] + ) + + if comms[node] != comm: + comms[node] = comm + + # Add neighbors that have different label to the queue. + for nbr in nx.all_neighbors(G, node): + if comms[nbr] != comm and nbr not in nodes_set: + nodes_queue.append(nbr) + nodes_set.add(nbr) + + yield from groups(comms).values() + + +def _fast_label_count(G, comms, node, weight=None): + """Computes the frequency of labels in the neighborhood of a node. + + Returns a dictionary keyed by label to the frequency of that label. + """ + + if weight is None: + # Unweighted (un)directed simple graph. + if not G.is_multigraph(): + label_freqs = Counter(map(comms.get, nx.all_neighbors(G, node))) + + # Unweighted (un)directed multigraph. + else: + label_freqs = defaultdict(int) + for nbr in G[node]: + label_freqs[comms[nbr]] += len(G[node][nbr]) + + if G.is_directed(): + for nbr in G.pred[node]: + label_freqs[comms[nbr]] += len(G.pred[node][nbr]) + + else: + # Weighted undirected simple/multigraph. + label_freqs = defaultdict(float) + for _, nbr, w in G.edges(node, data=weight, default=1): + label_freqs[comms[nbr]] += w + + # Weighted directed simple/multigraph. + if G.is_directed(): + for nbr, _, w in G.in_edges(node, data=weight, default=1): + label_freqs[comms[nbr]] += w + + return label_freqs + + +@py_random_state(2) +@nx._dispatchable(edge_attrs="weight") +def asyn_lpa_communities(G, weight=None, seed=None): + """Returns communities in `G` as detected by asynchronous label + propagation. + + The asynchronous label propagation algorithm is described in + [1]_. The algorithm is probabilistic and the found communities may + vary on different executions. + + The algorithm proceeds as follows. After initializing each node with + a unique label, the algorithm repeatedly sets the label of a node to + be the label that appears most frequently among that nodes + neighbors. The algorithm halts when each node has the label that + appears most frequently among its neighbors. The algorithm is + asynchronous because each node is updated without waiting for + updates on the remaining nodes. + + This generalized version of the algorithm in [1]_ accepts edge + weights. + + Parameters + ---------- + G : Graph + + weight : string + The edge attribute representing the weight of an edge. + If None, each edge is assumed to have weight one. In this + algorithm, the weight of an edge is used in determining the + frequency with which a label appears among the neighbors of a + node: a higher weight means the label appears more often. + + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness<randomness>`. + + Returns + ------- + communities : iterable + Iterable of communities given as sets of nodes. + + Notes + ----- + Edge weight attributes must be numerical. + + References + ---------- + .. [1] Raghavan, Usha Nandini, Réka Albert, and Soundar Kumara. "Near + linear time algorithm to detect community structures in large-scale + networks." Physical Review E 76.3 (2007): 036106. + """ + + labels = {n: i for i, n in enumerate(G)} + cont = True + + while cont: + cont = False + nodes = list(G) + seed.shuffle(nodes) + + for node in nodes: + if not G[node]: + continue + + # Get label frequencies among adjacent nodes. + # Depending on the order they are processed in, + # some nodes will be in iteration t and others in t-1, + # making the algorithm asynchronous. + if weight is None: + # initialising a Counter from an iterator of labels is + # faster for getting unweighted label frequencies + label_freq = Counter(map(labels.get, G[node])) + else: + # updating a defaultdict is substantially faster + # for getting weighted label frequencies + label_freq = defaultdict(float) + for _, v, wt in G.edges(node, data=weight, default=1): + label_freq[labels[v]] += wt + + # Get the labels that appear with maximum frequency. + max_freq = max(label_freq.values()) + best_labels = [ + label for label, freq in label_freq.items() if freq == max_freq + ] + + # If the node does not have one of the maximum frequency labels, + # randomly choose one of them and update the node's label. + # Continue the iteration as long as at least one node + # doesn't have a maximum frequency label. + if labels[node] not in best_labels: + labels[node] = seed.choice(best_labels) + cont = True + + yield from groups(labels).values() + + +@not_implemented_for("directed") +@nx._dispatchable +def label_propagation_communities(G): + """Generates community sets determined by label propagation + + Finds communities in `G` using a semi-synchronous label propagation + method [1]_. This method combines the advantages of both the synchronous + and asynchronous models. Not implemented for directed graphs. + + Parameters + ---------- + G : graph + An undirected NetworkX graph. + + Returns + ------- + communities : iterable + A dict_values object that contains a set of nodes for each community. + + Raises + ------ + NetworkXNotImplemented + If the graph is directed + + References + ---------- + .. [1] Cordasco, G., & Gargano, L. (2010, December). Community detection + via semi-synchronous label propagation algorithms. In Business + Applications of Social Network Analysis (BASNA), 2010 IEEE International + Workshop on (pp. 1-8). IEEE. + """ + coloring = _color_network(G) + # Create a unique label for each node in the graph + labeling = {v: k for k, v in enumerate(G)} + while not _labeling_complete(labeling, G): + # Update the labels of every node with the same color. + for color, nodes in coloring.items(): + for n in nodes: + _update_label(n, labeling, G) + + clusters = defaultdict(set) + for node, label in labeling.items(): + clusters[label].add(node) + return clusters.values() + + +def _color_network(G): + """Colors the network so that neighboring nodes all have distinct colors. + + Returns a dict keyed by color to a set of nodes with that color. + """ + coloring = {} # color => set(node) + colors = nx.coloring.greedy_color(G) + for node, color in colors.items(): + if color in coloring: + coloring[color].add(node) + else: + coloring[color] = {node} + return coloring + + +def _labeling_complete(labeling, G): + """Determines whether or not LPA is done. + + Label propagation is complete when all nodes have a label that is + in the set of highest frequency labels amongst its neighbors. + + Nodes with no neighbors are considered complete. + """ + return all( + labeling[v] in _most_frequent_labels(v, labeling, G) for v in G if len(G[v]) > 0 + ) + + +def _most_frequent_labels(node, labeling, G): + """Returns a set of all labels with maximum frequency in `labeling`. + + Input `labeling` should be a dict keyed by node to labels. + """ + if not G[node]: + # Nodes with no neighbors are themselves a community and are labeled + # accordingly, hence the immediate if statement. + return {labeling[node]} + + # Compute the frequencies of all neighbors of node + freqs = Counter(labeling[q] for q in G[node]) + max_freq = max(freqs.values()) + return {label for label, freq in freqs.items() if freq == max_freq} + + +def _update_label(node, labeling, G): + """Updates the label of a node using the Prec-Max tie breaking algorithm + + The algorithm is explained in: 'Community Detection via Semi-Synchronous + Label Propagation Algorithms' Cordasco and Gargano, 2011 + """ + high_labels = _most_frequent_labels(node, labeling, G) + if len(high_labels) == 1: + labeling[node] = high_labels.pop() + elif len(high_labels) > 1: + # Prec-Max + if labeling[node] not in high_labels: + labeling[node] = max(high_labels) diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/louvain.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/louvain.py new file mode 100644 index 00000000..959c93a5 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/louvain.py @@ -0,0 +1,382 @@ +"""Function for detecting communities based on Louvain Community Detection +Algorithm""" + +import itertools +from collections import defaultdict, deque + +import networkx as nx +from networkx.algorithms.community import modularity +from networkx.utils import py_random_state + +__all__ = ["louvain_communities", "louvain_partitions"] + + +@py_random_state("seed") +@nx._dispatchable(edge_attrs="weight") +def louvain_communities( + G, weight="weight", resolution=1, threshold=0.0000001, max_level=None, seed=None +): + r"""Find the best partition of a graph using the Louvain Community Detection + Algorithm. + + Louvain Community Detection Algorithm is a simple method to extract the community + structure of a network. This is a heuristic method based on modularity optimization. [1]_ + + The algorithm works in 2 steps. On the first step it assigns every node to be + in its own community and then for each node it tries to find the maximum positive + modularity gain by moving each node to all of its neighbor communities. If no positive + gain is achieved the node remains in its original community. + + The modularity gain obtained by moving an isolated node $i$ into a community $C$ can + easily be calculated by the following formula (combining [1]_ [2]_ and some algebra): + + .. math:: + \Delta Q = \frac{k_{i,in}}{2m} - \gamma\frac{ \Sigma_{tot} \cdot k_i}{2m^2} + + where $m$ is the size of the graph, $k_{i,in}$ is the sum of the weights of the links + from $i$ to nodes in $C$, $k_i$ is the sum of the weights of the links incident to node $i$, + $\Sigma_{tot}$ is the sum of the weights of the links incident to nodes in $C$ and $\gamma$ + is the resolution parameter. + + For the directed case the modularity gain can be computed using this formula according to [3]_ + + .. math:: + \Delta Q = \frac{k_{i,in}}{m} + - \gamma\frac{k_i^{out} \cdot\Sigma_{tot}^{in} + k_i^{in} \cdot \Sigma_{tot}^{out}}{m^2} + + where $k_i^{out}$, $k_i^{in}$ are the outer and inner weighted degrees of node $i$ and + $\Sigma_{tot}^{in}$, $\Sigma_{tot}^{out}$ are the sum of in-going and out-going links incident + to nodes in $C$. + + The first phase continues until no individual move can improve the modularity. + + The second phase consists in building a new network whose nodes are now the communities + found in the first phase. To do so, the weights of the links between the new nodes are given by + the sum of the weight of the links between nodes in the corresponding two communities. Once this + phase is complete it is possible to reapply the first phase creating bigger communities with + increased modularity. + + The above two phases are executed until no modularity gain is achieved (or is less than + the `threshold`, or until `max_levels` is reached). + + Be careful with self-loops in the input graph. These are treated as + previously reduced communities -- as if the process had been started + in the middle of the algorithm. Large self-loop edge weights thus + represent strong communities and in practice may be hard to add + other nodes to. If your input graph edge weights for self-loops + do not represent already reduced communities you may want to remove + the self-loops before inputting that graph. + + Parameters + ---------- + G : NetworkX graph + weight : string or None, optional (default="weight") + The name of an edge attribute that holds the numerical value + used as a weight. If None then each edge has weight 1. + resolution : float, optional (default=1) + If resolution is less than 1, the algorithm favors larger communities. + Greater than 1 favors smaller communities + threshold : float, optional (default=0.0000001) + Modularity gain threshold for each level. If the gain of modularity + between 2 levels of the algorithm is less than the given threshold + then the algorithm stops and returns the resulting communities. + max_level : int or None, optional (default=None) + The maximum number of levels (steps of the algorithm) to compute. + Must be a positive integer or None. If None, then there is no max + level and the threshold parameter determines the stopping condition. + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness<randomness>`. + + Returns + ------- + list + A list of sets (partition of `G`). Each set represents one community and contains + all the nodes that constitute it. + + Examples + -------- + >>> import networkx as nx + >>> G = nx.petersen_graph() + >>> nx.community.louvain_communities(G, seed=123) + [{0, 4, 5, 7, 9}, {1, 2, 3, 6, 8}] + + Notes + ----- + The order in which the nodes are considered can affect the final output. In the algorithm + the ordering happens using a random shuffle. + + References + ---------- + .. [1] Blondel, V.D. et al. Fast unfolding of communities in + large networks. J. Stat. Mech 10008, 1-12(2008). https://doi.org/10.1088/1742-5468/2008/10/P10008 + .. [2] Traag, V.A., Waltman, L. & van Eck, N.J. From Louvain to Leiden: guaranteeing + well-connected communities. Sci Rep 9, 5233 (2019). https://doi.org/10.1038/s41598-019-41695-z + .. [3] Nicolas Dugué, Anthony Perez. Directed Louvain : maximizing modularity in directed networks. + [Research Report] Université d’Orléans. 2015. hal-01231784. https://hal.archives-ouvertes.fr/hal-01231784 + + See Also + -------- + louvain_partitions + """ + + partitions = louvain_partitions(G, weight, resolution, threshold, seed) + if max_level is not None: + if max_level <= 0: + raise ValueError("max_level argument must be a positive integer or None") + partitions = itertools.islice(partitions, max_level) + final_partition = deque(partitions, maxlen=1) + return final_partition.pop() + + +@py_random_state("seed") +@nx._dispatchable(edge_attrs="weight") +def louvain_partitions( + G, weight="weight", resolution=1, threshold=0.0000001, seed=None +): + """Yields partitions for each level of the Louvain Community Detection Algorithm + + Louvain Community Detection Algorithm is a simple method to extract the community + structure of a network. This is a heuristic method based on modularity optimization. [1]_ + + The partitions at each level (step of the algorithm) form a dendrogram of communities. + A dendrogram is a diagram representing a tree and each level represents + a partition of the G graph. The top level contains the smallest communities + and as you traverse to the bottom of the tree the communities get bigger + and the overall modularity increases making the partition better. + + Each level is generated by executing the two phases of the Louvain Community + Detection Algorithm. + + Be careful with self-loops in the input graph. These are treated as + previously reduced communities -- as if the process had been started + in the middle of the algorithm. Large self-loop edge weights thus + represent strong communities and in practice may be hard to add + other nodes to. If your input graph edge weights for self-loops + do not represent already reduced communities you may want to remove + the self-loops before inputting that graph. + + Parameters + ---------- + G : NetworkX graph + weight : string or None, optional (default="weight") + The name of an edge attribute that holds the numerical value + used as a weight. If None then each edge has weight 1. + resolution : float, optional (default=1) + If resolution is less than 1, the algorithm favors larger communities. + Greater than 1 favors smaller communities + threshold : float, optional (default=0.0000001) + Modularity gain threshold for each level. If the gain of modularity + between 2 levels of the algorithm is less than the given threshold + then the algorithm stops and returns the resulting communities. + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness<randomness>`. + + Yields + ------ + list + A list of sets (partition of `G`). Each set represents one community and contains + all the nodes that constitute it. + + References + ---------- + .. [1] Blondel, V.D. et al. Fast unfolding of communities in + large networks. J. Stat. Mech 10008, 1-12(2008) + + See Also + -------- + louvain_communities + """ + + partition = [{u} for u in G.nodes()] + if nx.is_empty(G): + yield partition + return + mod = modularity(G, partition, resolution=resolution, weight=weight) + is_directed = G.is_directed() + if G.is_multigraph(): + graph = _convert_multigraph(G, weight, is_directed) + else: + graph = G.__class__() + graph.add_nodes_from(G) + graph.add_weighted_edges_from(G.edges(data=weight, default=1)) + + m = graph.size(weight="weight") + partition, inner_partition, improvement = _one_level( + graph, m, partition, resolution, is_directed, seed + ) + improvement = True + while improvement: + # gh-5901 protect the sets in the yielded list from further manipulation here + yield [s.copy() for s in partition] + new_mod = modularity( + graph, inner_partition, resolution=resolution, weight="weight" + ) + if new_mod - mod <= threshold: + return + mod = new_mod + graph = _gen_graph(graph, inner_partition) + partition, inner_partition, improvement = _one_level( + graph, m, partition, resolution, is_directed, seed + ) + + +def _one_level(G, m, partition, resolution=1, is_directed=False, seed=None): + """Calculate one level of the Louvain partitions tree + + Parameters + ---------- + G : NetworkX Graph/DiGraph + The graph from which to detect communities + m : number + The size of the graph `G`. + partition : list of sets of nodes + A valid partition of the graph `G` + resolution : positive number + The resolution parameter for computing the modularity of a partition + is_directed : bool + True if `G` is a directed graph. + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness<randomness>`. + + """ + node2com = {u: i for i, u in enumerate(G.nodes())} + inner_partition = [{u} for u in G.nodes()] + if is_directed: + in_degrees = dict(G.in_degree(weight="weight")) + out_degrees = dict(G.out_degree(weight="weight")) + Stot_in = list(in_degrees.values()) + Stot_out = list(out_degrees.values()) + # Calculate weights for both in and out neighbors without considering self-loops + nbrs = {} + for u in G: + nbrs[u] = defaultdict(float) + for _, n, wt in G.out_edges(u, data="weight"): + if u != n: + nbrs[u][n] += wt + for n, _, wt in G.in_edges(u, data="weight"): + if u != n: + nbrs[u][n] += wt + else: + degrees = dict(G.degree(weight="weight")) + Stot = list(degrees.values()) + nbrs = {u: {v: data["weight"] for v, data in G[u].items() if v != u} for u in G} + rand_nodes = list(G.nodes) + seed.shuffle(rand_nodes) + nb_moves = 1 + improvement = False + while nb_moves > 0: + nb_moves = 0 + for u in rand_nodes: + best_mod = 0 + best_com = node2com[u] + weights2com = _neighbor_weights(nbrs[u], node2com) + if is_directed: + in_degree = in_degrees[u] + out_degree = out_degrees[u] + Stot_in[best_com] -= in_degree + Stot_out[best_com] -= out_degree + remove_cost = ( + -weights2com[best_com] / m + + resolution + * (out_degree * Stot_in[best_com] + in_degree * Stot_out[best_com]) + / m**2 + ) + else: + degree = degrees[u] + Stot[best_com] -= degree + remove_cost = -weights2com[best_com] / m + resolution * ( + Stot[best_com] * degree + ) / (2 * m**2) + for nbr_com, wt in weights2com.items(): + if is_directed: + gain = ( + remove_cost + + wt / m + - resolution + * ( + out_degree * Stot_in[nbr_com] + + in_degree * Stot_out[nbr_com] + ) + / m**2 + ) + else: + gain = ( + remove_cost + + wt / m + - resolution * (Stot[nbr_com] * degree) / (2 * m**2) + ) + if gain > best_mod: + best_mod = gain + best_com = nbr_com + if is_directed: + Stot_in[best_com] += in_degree + Stot_out[best_com] += out_degree + else: + Stot[best_com] += degree + if best_com != node2com[u]: + com = G.nodes[u].get("nodes", {u}) + partition[node2com[u]].difference_update(com) + inner_partition[node2com[u]].remove(u) + partition[best_com].update(com) + inner_partition[best_com].add(u) + improvement = True + nb_moves += 1 + node2com[u] = best_com + partition = list(filter(len, partition)) + inner_partition = list(filter(len, inner_partition)) + return partition, inner_partition, improvement + + +def _neighbor_weights(nbrs, node2com): + """Calculate weights between node and its neighbor communities. + + Parameters + ---------- + nbrs : dictionary + Dictionary with nodes' neighbors as keys and their edge weight as value. + node2com : dictionary + Dictionary with all graph's nodes as keys and their community index as value. + + """ + weights = defaultdict(float) + for nbr, wt in nbrs.items(): + weights[node2com[nbr]] += wt + return weights + + +def _gen_graph(G, partition): + """Generate a new graph based on the partitions of a given graph""" + H = G.__class__() + node2com = {} + for i, part in enumerate(partition): + nodes = set() + for node in part: + node2com[node] = i + nodes.update(G.nodes[node].get("nodes", {node})) + H.add_node(i, nodes=nodes) + + for node1, node2, wt in G.edges(data=True): + wt = wt["weight"] + com1 = node2com[node1] + com2 = node2com[node2] + temp = H.get_edge_data(com1, com2, {"weight": 0})["weight"] + H.add_edge(com1, com2, weight=wt + temp) + return H + + +def _convert_multigraph(G, weight, is_directed): + """Convert a Multigraph to normal Graph""" + if is_directed: + H = nx.DiGraph() + else: + H = nx.Graph() + H.add_nodes_from(G) + for u, v, wt in G.edges(data=weight, default=1): + if H.has_edge(u, v): + H[u][v]["weight"] += wt + else: + H.add_edge(u, v, weight=wt) + return H diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/lukes.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/lukes.py new file mode 100644 index 00000000..08dd7cd5 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/lukes.py @@ -0,0 +1,227 @@ +"""Lukes Algorithm for exact optimal weighted tree partitioning.""" + +from copy import deepcopy +from functools import lru_cache +from random import choice + +import networkx as nx +from networkx.utils import not_implemented_for + +__all__ = ["lukes_partitioning"] + +D_EDGE_W = "weight" +D_EDGE_VALUE = 1.0 +D_NODE_W = "weight" +D_NODE_VALUE = 1 +PKEY = "partitions" +CLUSTER_EVAL_CACHE_SIZE = 2048 + + +def _split_n_from(n, min_size_of_first_part): + # splits j in two parts of which the first is at least + # the second argument + assert n >= min_size_of_first_part + for p1 in range(min_size_of_first_part, n + 1): + yield p1, n - p1 + + +@nx._dispatchable(node_attrs="node_weight", edge_attrs="edge_weight") +def lukes_partitioning(G, max_size, node_weight=None, edge_weight=None): + """Optimal partitioning of a weighted tree using the Lukes algorithm. + + This algorithm partitions a connected, acyclic graph featuring integer + node weights and float edge weights. The resulting clusters are such + that the total weight of the nodes in each cluster does not exceed + max_size and that the weight of the edges that are cut by the partition + is minimum. The algorithm is based on [1]_. + + Parameters + ---------- + G : NetworkX graph + + max_size : int + Maximum weight a partition can have in terms of sum of + node_weight for all nodes in the partition + + edge_weight : key + Edge data key to use as weight. If None, the weights are all + set to one. + + node_weight : key + Node data key to use as weight. If None, the weights are all + set to one. The data must be int. + + Returns + ------- + partition : list + A list of sets of nodes representing the clusters of the + partition. + + Raises + ------ + NotATree + If G is not a tree. + TypeError + If any of the values of node_weight is not int. + + References + ---------- + .. [1] Lukes, J. A. (1974). + "Efficient Algorithm for the Partitioning of Trees." + IBM Journal of Research and Development, 18(3), 217–224. + + """ + # First sanity check and tree preparation + if not nx.is_tree(G): + raise nx.NotATree("lukes_partitioning works only on trees") + else: + if nx.is_directed(G): + root = [n for n, d in G.in_degree() if d == 0] + assert len(root) == 1 + root = root[0] + t_G = deepcopy(G) + else: + root = choice(list(G.nodes)) + # this has the desirable side effect of not inheriting attributes + t_G = nx.dfs_tree(G, root) + + # Since we do not want to screw up the original graph, + # if we have a blank attribute, we make a deepcopy + if edge_weight is None or node_weight is None: + safe_G = deepcopy(G) + if edge_weight is None: + nx.set_edge_attributes(safe_G, D_EDGE_VALUE, D_EDGE_W) + edge_weight = D_EDGE_W + if node_weight is None: + nx.set_node_attributes(safe_G, D_NODE_VALUE, D_NODE_W) + node_weight = D_NODE_W + else: + safe_G = G + + # Second sanity check + # The values of node_weight MUST BE int. + # I cannot see any room for duck typing without incurring serious + # danger of subtle bugs. + all_n_attr = nx.get_node_attributes(safe_G, node_weight).values() + for x in all_n_attr: + if not isinstance(x, int): + raise TypeError( + "lukes_partitioning needs integer " + f"values for node_weight ({node_weight})" + ) + + # SUBROUTINES ----------------------- + # these functions are defined here for two reasons: + # - brevity: we can leverage global "safe_G" + # - caching: signatures are hashable + + @not_implemented_for("undirected") + # this is intended to be called only on t_G + def _leaves(gr): + for x in gr.nodes: + if not nx.descendants(gr, x): + yield x + + @not_implemented_for("undirected") + def _a_parent_of_leaves_only(gr): + tleaves = set(_leaves(gr)) + for n in set(gr.nodes) - tleaves: + if all(x in tleaves for x in nx.descendants(gr, n)): + return n + + @lru_cache(CLUSTER_EVAL_CACHE_SIZE) + def _value_of_cluster(cluster): + valid_edges = [e for e in safe_G.edges if e[0] in cluster and e[1] in cluster] + return sum(safe_G.edges[e][edge_weight] for e in valid_edges) + + def _value_of_partition(partition): + return sum(_value_of_cluster(frozenset(c)) for c in partition) + + @lru_cache(CLUSTER_EVAL_CACHE_SIZE) + def _weight_of_cluster(cluster): + return sum(safe_G.nodes[n][node_weight] for n in cluster) + + def _pivot(partition, node): + ccx = [c for c in partition if node in c] + assert len(ccx) == 1 + return ccx[0] + + def _concatenate_or_merge(partition_1, partition_2, x, i, ref_weight): + ccx = _pivot(partition_1, x) + cci = _pivot(partition_2, i) + merged_xi = ccx.union(cci) + + # We first check if we can do the merge. + # If so, we do the actual calculations, otherwise we concatenate + if _weight_of_cluster(frozenset(merged_xi)) <= ref_weight: + cp1 = list(filter(lambda x: x != ccx, partition_1)) + cp2 = list(filter(lambda x: x != cci, partition_2)) + + option_2 = [merged_xi] + cp1 + cp2 + return option_2, _value_of_partition(option_2) + else: + option_1 = partition_1 + partition_2 + return option_1, _value_of_partition(option_1) + + # INITIALIZATION ----------------------- + leaves = set(_leaves(t_G)) + for lv in leaves: + t_G.nodes[lv][PKEY] = {} + slot = safe_G.nodes[lv][node_weight] + t_G.nodes[lv][PKEY][slot] = [{lv}] + t_G.nodes[lv][PKEY][0] = [{lv}] + + for inner in [x for x in t_G.nodes if x not in leaves]: + t_G.nodes[inner][PKEY] = {} + slot = safe_G.nodes[inner][node_weight] + t_G.nodes[inner][PKEY][slot] = [{inner}] + nx._clear_cache(t_G) + + # CORE ALGORITHM ----------------------- + while True: + x_node = _a_parent_of_leaves_only(t_G) + weight_of_x = safe_G.nodes[x_node][node_weight] + best_value = 0 + best_partition = None + bp_buffer = {} + x_descendants = nx.descendants(t_G, x_node) + for i_node in x_descendants: + for j in range(weight_of_x, max_size + 1): + for a, b in _split_n_from(j, weight_of_x): + if ( + a not in t_G.nodes[x_node][PKEY] + or b not in t_G.nodes[i_node][PKEY] + ): + # it's not possible to form this particular weight sum + continue + + part1 = t_G.nodes[x_node][PKEY][a] + part2 = t_G.nodes[i_node][PKEY][b] + part, value = _concatenate_or_merge(part1, part2, x_node, i_node, j) + + if j not in bp_buffer or bp_buffer[j][1] < value: + # we annotate in the buffer the best partition for j + bp_buffer[j] = part, value + + # we also keep track of the overall best partition + if best_value <= value: + best_value = value + best_partition = part + + # as illustrated in Lukes, once we finished a child, we can + # discharge the partitions we found into the graph + # (the key phrase is make all x == x') + # so that they are used by the subsequent children + for w, (best_part_for_vl, vl) in bp_buffer.items(): + t_G.nodes[x_node][PKEY][w] = best_part_for_vl + bp_buffer.clear() + + # the absolute best partition for this node + # across all weights has to be stored at 0 + t_G.nodes[x_node][PKEY][0] = best_partition + t_G.remove_nodes_from(x_descendants) + + if x_node == root: + # the 0-labeled partition of root + # is the optimal one for the whole tree + return t_G.nodes[root][PKEY][0] diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/modularity_max.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/modularity_max.py new file mode 100644 index 00000000..f465e01c --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/modularity_max.py @@ -0,0 +1,451 @@ +"""Functions for detecting communities based on modularity.""" + +from collections import defaultdict + +import networkx as nx +from networkx.algorithms.community.quality import modularity +from networkx.utils import not_implemented_for +from networkx.utils.mapped_queue import MappedQueue + +__all__ = [ + "greedy_modularity_communities", + "naive_greedy_modularity_communities", +] + + +def _greedy_modularity_communities_generator(G, weight=None, resolution=1): + r"""Yield community partitions of G and the modularity change at each step. + + This function performs Clauset-Newman-Moore greedy modularity maximization [2]_ + At each step of the process it yields the change in modularity that will occur in + the next step followed by yielding the new community partition after that step. + + Greedy modularity maximization begins with each node in its own community + and repeatedly joins the pair of communities that lead to the largest + modularity until one community contains all nodes (the partition has one set). + + This function maximizes the generalized modularity, where `resolution` + is the resolution parameter, often expressed as $\gamma$. + See :func:`~networkx.algorithms.community.quality.modularity`. + + Parameters + ---------- + G : NetworkX graph + + weight : string or None, optional (default=None) + The name of an edge attribute that holds the numerical value used + as a weight. If None, then each edge has weight 1. + The degree is the sum of the edge weights adjacent to the node. + + resolution : float (default=1) + If resolution is less than 1, modularity favors larger communities. + Greater than 1 favors smaller communities. + + Yields + ------ + Alternating yield statements produce the following two objects: + + communities: dict_values + A dict_values of frozensets of nodes, one for each community. + This represents a partition of the nodes of the graph into communities. + The first yield is the partition with each node in its own community. + + dq: float + The change in modularity when merging the next two communities + that leads to the largest modularity. + + See Also + -------- + modularity + + References + ---------- + .. [1] Newman, M. E. J. "Networks: An Introduction", page 224 + Oxford University Press 2011. + .. [2] Clauset, A., Newman, M. E., & Moore, C. + "Finding community structure in very large networks." + Physical Review E 70(6), 2004. + .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community + Detection" Phys. Rev. E74, 2006. + .. [4] Newman, M. E. J."Analysis of weighted networks" + Physical Review E 70(5 Pt 2):056131, 2004. + """ + directed = G.is_directed() + N = G.number_of_nodes() + + # Count edges (or the sum of edge-weights for weighted graphs) + m = G.size(weight) + q0 = 1 / m + + # Calculate degrees (notation from the papers) + # a : the fraction of (weighted) out-degree for each node + # b : the fraction of (weighted) in-degree for each node + if directed: + a = {node: deg_out * q0 for node, deg_out in G.out_degree(weight=weight)} + b = {node: deg_in * q0 for node, deg_in in G.in_degree(weight=weight)} + else: + a = b = {node: deg * q0 * 0.5 for node, deg in G.degree(weight=weight)} + + # this preliminary step collects the edge weights for each node pair + # It handles multigraph and digraph and works fine for graph. + dq_dict = defaultdict(lambda: defaultdict(float)) + for u, v, wt in G.edges(data=weight, default=1): + if u == v: + continue + dq_dict[u][v] += wt + dq_dict[v][u] += wt + + # now scale and subtract the expected edge-weights term + for u, nbrdict in dq_dict.items(): + for v, wt in nbrdict.items(): + dq_dict[u][v] = q0 * wt - resolution * (a[u] * b[v] + b[u] * a[v]) + + # Use -dq to get a max_heap instead of a min_heap + # dq_heap holds a heap for each node's neighbors + dq_heap = {u: MappedQueue({(u, v): -dq for v, dq in dq_dict[u].items()}) for u in G} + # H -> all_dq_heap holds a heap with the best items for each node + H = MappedQueue([dq_heap[n].heap[0] for n in G if len(dq_heap[n]) > 0]) + + # Initialize single-node communities + communities = {n: frozenset([n]) for n in G} + yield communities.values() + + # Merge the two communities that lead to the largest modularity + while len(H) > 1: + # Find best merge + # Remove from heap of row maxes + # Ties will be broken by choosing the pair with lowest min community id + try: + negdq, u, v = H.pop() + except IndexError: + break + dq = -negdq + yield dq + # Remove best merge from row u heap + dq_heap[u].pop() + # Push new row max onto H + if len(dq_heap[u]) > 0: + H.push(dq_heap[u].heap[0]) + # If this element was also at the root of row v, we need to remove the + # duplicate entry from H + if dq_heap[v].heap[0] == (v, u): + H.remove((v, u)) + # Remove best merge from row v heap + dq_heap[v].remove((v, u)) + # Push new row max onto H + if len(dq_heap[v]) > 0: + H.push(dq_heap[v].heap[0]) + else: + # Duplicate wasn't in H, just remove from row v heap + dq_heap[v].remove((v, u)) + + # Perform merge + communities[v] = frozenset(communities[u] | communities[v]) + del communities[u] + + # Get neighbor communities connected to the merged communities + u_nbrs = set(dq_dict[u]) + v_nbrs = set(dq_dict[v]) + all_nbrs = (u_nbrs | v_nbrs) - {u, v} + both_nbrs = u_nbrs & v_nbrs + # Update dq for merge of u into v + for w in all_nbrs: + # Calculate new dq value + if w in both_nbrs: + dq_vw = dq_dict[v][w] + dq_dict[u][w] + elif w in v_nbrs: + dq_vw = dq_dict[v][w] - resolution * (a[u] * b[w] + a[w] * b[u]) + else: # w in u_nbrs + dq_vw = dq_dict[u][w] - resolution * (a[v] * b[w] + a[w] * b[v]) + # Update rows v and w + for row, col in [(v, w), (w, v)]: + dq_heap_row = dq_heap[row] + # Update dict for v,w only (u is removed below) + dq_dict[row][col] = dq_vw + # Save old max of per-row heap + if len(dq_heap_row) > 0: + d_oldmax = dq_heap_row.heap[0] + else: + d_oldmax = None + # Add/update heaps + d = (row, col) + d_negdq = -dq_vw + # Save old value for finding heap index + if w in v_nbrs: + # Update existing element in per-row heap + dq_heap_row.update(d, d, priority=d_negdq) + else: + # We're creating a new nonzero element, add to heap + dq_heap_row.push(d, priority=d_negdq) + # Update heap of row maxes if necessary + if d_oldmax is None: + # No entries previously in this row, push new max + H.push(d, priority=d_negdq) + else: + # We've updated an entry in this row, has the max changed? + row_max = dq_heap_row.heap[0] + if d_oldmax != row_max or d_oldmax.priority != row_max.priority: + H.update(d_oldmax, row_max) + + # Remove row/col u from dq_dict matrix + for w in dq_dict[u]: + # Remove from dict + dq_old = dq_dict[w][u] + del dq_dict[w][u] + # Remove from heaps if we haven't already + if w != v: + # Remove both row and column + for row, col in [(w, u), (u, w)]: + dq_heap_row = dq_heap[row] + # Check if replaced dq is row max + d_old = (row, col) + if dq_heap_row.heap[0] == d_old: + # Update per-row heap and heap of row maxes + dq_heap_row.remove(d_old) + H.remove(d_old) + # Update row max + if len(dq_heap_row) > 0: + H.push(dq_heap_row.heap[0]) + else: + # Only update per-row heap + dq_heap_row.remove(d_old) + + del dq_dict[u] + # Mark row u as deleted, but keep placeholder + dq_heap[u] = MappedQueue() + # Merge u into v and update a + a[v] += a[u] + a[u] = 0 + if directed: + b[v] += b[u] + b[u] = 0 + + yield communities.values() + + +@nx._dispatchable(edge_attrs="weight") +def greedy_modularity_communities( + G, + weight=None, + resolution=1, + cutoff=1, + best_n=None, +): + r"""Find communities in G using greedy modularity maximization. + + This function uses Clauset-Newman-Moore greedy modularity maximization [2]_ + to find the community partition with the largest modularity. + + Greedy modularity maximization begins with each node in its own community + and repeatedly joins the pair of communities that lead to the largest + modularity until no further increase in modularity is possible (a maximum). + Two keyword arguments adjust the stopping condition. `cutoff` is a lower + limit on the number of communities so you can stop the process before + reaching a maximum (used to save computation time). `best_n` is an upper + limit on the number of communities so you can make the process continue + until at most n communities remain even if the maximum modularity occurs + for more. To obtain exactly n communities, set both `cutoff` and `best_n` to n. + + This function maximizes the generalized modularity, where `resolution` + is the resolution parameter, often expressed as $\gamma$. + See :func:`~networkx.algorithms.community.quality.modularity`. + + Parameters + ---------- + G : NetworkX graph + + weight : string or None, optional (default=None) + The name of an edge attribute that holds the numerical value used + as a weight. If None, then each edge has weight 1. + The degree is the sum of the edge weights adjacent to the node. + + resolution : float, optional (default=1) + If resolution is less than 1, modularity favors larger communities. + Greater than 1 favors smaller communities. + + cutoff : int, optional (default=1) + A minimum number of communities below which the merging process stops. + The process stops at this number of communities even if modularity + is not maximized. The goal is to let the user stop the process early. + The process stops before the cutoff if it finds a maximum of modularity. + + best_n : int or None, optional (default=None) + A maximum number of communities above which the merging process will + not stop. This forces community merging to continue after modularity + starts to decrease until `best_n` communities remain. + If ``None``, don't force it to continue beyond a maximum. + + Raises + ------ + ValueError : If the `cutoff` or `best_n` value is not in the range + ``[1, G.number_of_nodes()]``, or if `best_n` < `cutoff`. + + Returns + ------- + communities: list + A list of frozensets of nodes, one for each community. + Sorted by length with largest communities first. + + Examples + -------- + >>> G = nx.karate_club_graph() + >>> c = nx.community.greedy_modularity_communities(G) + >>> sorted(c[0]) + [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] + + See Also + -------- + modularity + + References + ---------- + .. [1] Newman, M. E. J. "Networks: An Introduction", page 224 + Oxford University Press 2011. + .. [2] Clauset, A., Newman, M. E., & Moore, C. + "Finding community structure in very large networks." + Physical Review E 70(6), 2004. + .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community + Detection" Phys. Rev. E74, 2006. + .. [4] Newman, M. E. J."Analysis of weighted networks" + Physical Review E 70(5 Pt 2):056131, 2004. + """ + if not G.size(): + return [{n} for n in G] + + if (cutoff < 1) or (cutoff > G.number_of_nodes()): + raise ValueError(f"cutoff must be between 1 and {len(G)}. Got {cutoff}.") + if best_n is not None: + if (best_n < 1) or (best_n > G.number_of_nodes()): + raise ValueError(f"best_n must be between 1 and {len(G)}. Got {best_n}.") + if best_n < cutoff: + raise ValueError(f"Must have best_n >= cutoff. Got {best_n} < {cutoff}") + if best_n == 1: + return [set(G)] + else: + best_n = G.number_of_nodes() + + # retrieve generator object to construct output + community_gen = _greedy_modularity_communities_generator( + G, weight=weight, resolution=resolution + ) + + # construct the first best community + communities = next(community_gen) + + # continue merging communities until one of the breaking criteria is satisfied + while len(communities) > cutoff: + try: + dq = next(community_gen) + # StopIteration occurs when communities are the connected components + except StopIteration: + communities = sorted(communities, key=len, reverse=True) + # if best_n requires more merging, merge big sets for highest modularity + while len(communities) > best_n: + comm1, comm2, *rest = communities + communities = [comm1 ^ comm2] + communities.extend(rest) + return communities + + # keep going unless max_mod is reached or best_n says to merge more + if dq < 0 and len(communities) <= best_n: + break + communities = next(community_gen) + + return sorted(communities, key=len, reverse=True) + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@nx._dispatchable(edge_attrs="weight") +def naive_greedy_modularity_communities(G, resolution=1, weight=None): + r"""Find communities in G using greedy modularity maximization. + + This implementation is O(n^4), much slower than alternatives, but it is + provided as an easy-to-understand reference implementation. + + Greedy modularity maximization begins with each node in its own community + and joins the pair of communities that most increases modularity until no + such pair exists. + + This function maximizes the generalized modularity, where `resolution` + is the resolution parameter, often expressed as $\gamma$. + See :func:`~networkx.algorithms.community.quality.modularity`. + + Parameters + ---------- + G : NetworkX graph + Graph must be simple and undirected. + + resolution : float (default=1) + If resolution is less than 1, modularity favors larger communities. + Greater than 1 favors smaller communities. + + weight : string or None, optional (default=None) + The name of an edge attribute that holds the numerical value used + as a weight. If None, then each edge has weight 1. + The degree is the sum of the edge weights adjacent to the node. + + Returns + ------- + list + A list of sets of nodes, one for each community. + Sorted by length with largest communities first. + + Examples + -------- + >>> G = nx.karate_club_graph() + >>> c = nx.community.naive_greedy_modularity_communities(G) + >>> sorted(c[0]) + [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] + + See Also + -------- + greedy_modularity_communities + modularity + """ + # First create one community for each node + communities = [frozenset([u]) for u in G.nodes()] + # Track merges + merges = [] + # Greedily merge communities until no improvement is possible + old_modularity = None + new_modularity = modularity(G, communities, resolution=resolution, weight=weight) + while old_modularity is None or new_modularity > old_modularity: + # Save modularity for comparison + old_modularity = new_modularity + # Find best pair to merge + trial_communities = list(communities) + to_merge = None + for i, u in enumerate(communities): + for j, v in enumerate(communities): + # Skip i==j and empty communities + if j <= i or len(u) == 0 or len(v) == 0: + continue + # Merge communities u and v + trial_communities[j] = u | v + trial_communities[i] = frozenset([]) + trial_modularity = modularity( + G, trial_communities, resolution=resolution, weight=weight + ) + if trial_modularity >= new_modularity: + # Check if strictly better or tie + if trial_modularity > new_modularity: + # Found new best, save modularity and group indexes + new_modularity = trial_modularity + to_merge = (i, j, new_modularity - old_modularity) + elif to_merge and min(i, j) < min(to_merge[0], to_merge[1]): + # Break ties by choosing pair with lowest min id + new_modularity = trial_modularity + to_merge = (i, j, new_modularity - old_modularity) + # Un-merge + trial_communities[i] = u + trial_communities[j] = v + if to_merge is not None: + # If the best merge improves modularity, use it + merges.append(to_merge) + i, j, dq = to_merge + u, v = communities[i], communities[j] + communities[j] = u | v + communities[i] = frozenset([]) + # Remove empty communities and sort + return sorted((c for c in communities if len(c) > 0), key=len, reverse=True) diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/quality.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/quality.py new file mode 100644 index 00000000..f09a6d45 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/quality.py @@ -0,0 +1,346 @@ +"""Functions for measuring the quality of a partition (into +communities). + +""" + +from itertools import combinations + +import networkx as nx +from networkx import NetworkXError +from networkx.algorithms.community.community_utils import is_partition +from networkx.utils.decorators import argmap + +__all__ = ["modularity", "partition_quality"] + + +class NotAPartition(NetworkXError): + """Raised if a given collection is not a partition.""" + + def __init__(self, G, collection): + msg = f"{collection} is not a valid partition of the graph {G}" + super().__init__(msg) + + +def _require_partition(G, partition): + """Decorator to check that a valid partition is input to a function + + Raises :exc:`networkx.NetworkXError` if the partition is not valid. + + This decorator should be used on functions whose first two arguments + are a graph and a partition of the nodes of that graph (in that + order):: + + >>> @require_partition + ... def foo(G, partition): + ... print("partition is valid!") + ... + >>> G = nx.complete_graph(5) + >>> partition = [{0, 1}, {2, 3}, {4}] + >>> foo(G, partition) + partition is valid! + >>> partition = [{0}, {2, 3}, {4}] + >>> foo(G, partition) + Traceback (most recent call last): + ... + networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G + >>> partition = [{0, 1}, {1, 2, 3}, {4}] + >>> foo(G, partition) + Traceback (most recent call last): + ... + networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G + + """ + if is_partition(G, partition): + return G, partition + raise nx.NetworkXError("`partition` is not a valid partition of the nodes of G") + + +require_partition = argmap(_require_partition, (0, 1)) + + +@nx._dispatchable +def intra_community_edges(G, partition): + """Returns the number of intra-community edges for a partition of `G`. + + Parameters + ---------- + G : NetworkX graph. + + partition : iterable of sets of nodes + This must be a partition of the nodes of `G`. + + The "intra-community edges" are those edges joining a pair of nodes + in the same block of the partition. + + """ + return sum(G.subgraph(block).size() for block in partition) + + +@nx._dispatchable +def inter_community_edges(G, partition): + """Returns the number of inter-community edges for a partition of `G`. + according to the given + partition of the nodes of `G`. + + Parameters + ---------- + G : NetworkX graph. + + partition : iterable of sets of nodes + This must be a partition of the nodes of `G`. + + The *inter-community edges* are those edges joining a pair of nodes + in different blocks of the partition. + + Implementation note: this function creates an intermediate graph + that may require the same amount of memory as that of `G`. + + """ + # Alternate implementation that does not require constructing a new + # graph object (but does require constructing an affiliation + # dictionary): + # + # aff = dict(chain.from_iterable(((v, block) for v in block) + # for block in partition)) + # return sum(1 for u, v in G.edges() if aff[u] != aff[v]) + # + MG = nx.MultiDiGraph if G.is_directed() else nx.MultiGraph + return nx.quotient_graph(G, partition, create_using=MG).size() + + +@nx._dispatchable +def inter_community_non_edges(G, partition): + """Returns the number of inter-community non-edges according to the + given partition of the nodes of `G`. + + Parameters + ---------- + G : NetworkX graph. + + partition : iterable of sets of nodes + This must be a partition of the nodes of `G`. + + A *non-edge* is a pair of nodes (undirected if `G` is undirected) + that are not adjacent in `G`. The *inter-community non-edges* are + those non-edges on a pair of nodes in different blocks of the + partition. + + Implementation note: this function creates two intermediate graphs, + which may require up to twice the amount of memory as required to + store `G`. + + """ + # Alternate implementation that does not require constructing two + # new graph objects (but does require constructing an affiliation + # dictionary): + # + # aff = dict(chain.from_iterable(((v, block) for v in block) + # for block in partition)) + # return sum(1 for u, v in nx.non_edges(G) if aff[u] != aff[v]) + # + return inter_community_edges(nx.complement(G), partition) + + +@nx._dispatchable(edge_attrs="weight") +def modularity(G, communities, weight="weight", resolution=1): + r"""Returns the modularity of the given partition of the graph. + + Modularity is defined in [1]_ as + + .. math:: + Q = \frac{1}{2m} \sum_{ij} \left( A_{ij} - \gamma\frac{k_ik_j}{2m}\right) + \delta(c_i,c_j) + + where $m$ is the number of edges (or sum of all edge weights as in [5]_), + $A$ is the adjacency matrix of `G`, $k_i$ is the (weighted) degree of $i$, + $\gamma$ is the resolution parameter, and $\delta(c_i, c_j)$ is 1 if $i$ and + $j$ are in the same community else 0. + + According to [2]_ (and verified by some algebra) this can be reduced to + + .. math:: + Q = \sum_{c=1}^{n} + \left[ \frac{L_c}{m} - \gamma\left( \frac{k_c}{2m} \right) ^2 \right] + + where the sum iterates over all communities $c$, $m$ is the number of edges, + $L_c$ is the number of intra-community links for community $c$, + $k_c$ is the sum of degrees of the nodes in community $c$, + and $\gamma$ is the resolution parameter. + + The resolution parameter sets an arbitrary tradeoff between intra-group + edges and inter-group edges. More complex grouping patterns can be + discovered by analyzing the same network with multiple values of gamma + and then combining the results [3]_. That said, it is very common to + simply use gamma=1. More on the choice of gamma is in [4]_. + + The second formula is the one actually used in calculation of the modularity. + For directed graphs the second formula replaces $k_c$ with $k^{in}_c k^{out}_c$. + + Parameters + ---------- + G : NetworkX Graph + + communities : list or iterable of set of nodes + These node sets must represent a partition of G's nodes. + + weight : string or None, optional (default="weight") + The edge attribute that holds the numerical value used + as a weight. If None or an edge does not have that attribute, + then that edge has weight 1. + + resolution : float (default=1) + If resolution is less than 1, modularity favors larger communities. + Greater than 1 favors smaller communities. + + Returns + ------- + Q : float + The modularity of the partition. + + Raises + ------ + NotAPartition + If `communities` is not a partition of the nodes of `G`. + + Examples + -------- + >>> G = nx.barbell_graph(3, 0) + >>> nx.community.modularity(G, [{0, 1, 2}, {3, 4, 5}]) + 0.35714285714285715 + >>> nx.community.modularity(G, nx.community.label_propagation_communities(G)) + 0.35714285714285715 + + References + ---------- + .. [1] M. E. J. Newman "Networks: An Introduction", page 224. + Oxford University Press, 2011. + .. [2] Clauset, Aaron, Mark EJ Newman, and Cristopher Moore. + "Finding community structure in very large networks." + Phys. Rev. E 70.6 (2004). <https://arxiv.org/abs/cond-mat/0408187> + .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community Detection" + Phys. Rev. E 74, 016110, 2006. https://doi.org/10.1103/PhysRevE.74.016110 + .. [4] M. E. J. Newman, "Equivalence between modularity optimization and + maximum likelihood methods for community detection" + Phys. Rev. E 94, 052315, 2016. https://doi.org/10.1103/PhysRevE.94.052315 + .. [5] Blondel, V.D. et al. "Fast unfolding of communities in large + networks" J. Stat. Mech 10008, 1-12 (2008). + https://doi.org/10.1088/1742-5468/2008/10/P10008 + """ + if not isinstance(communities, list): + communities = list(communities) + if not is_partition(G, communities): + raise NotAPartition(G, communities) + + directed = G.is_directed() + if directed: + out_degree = dict(G.out_degree(weight=weight)) + in_degree = dict(G.in_degree(weight=weight)) + m = sum(out_degree.values()) + norm = 1 / m**2 + else: + out_degree = in_degree = dict(G.degree(weight=weight)) + deg_sum = sum(out_degree.values()) + m = deg_sum / 2 + norm = 1 / deg_sum**2 + + def community_contribution(community): + comm = set(community) + L_c = sum(wt for u, v, wt in G.edges(comm, data=weight, default=1) if v in comm) + + out_degree_sum = sum(out_degree[u] for u in comm) + in_degree_sum = sum(in_degree[u] for u in comm) if directed else out_degree_sum + + return L_c / m - resolution * out_degree_sum * in_degree_sum * norm + + return sum(map(community_contribution, communities)) + + +@require_partition +@nx._dispatchable +def partition_quality(G, partition): + """Returns the coverage and performance of a partition of G. + + The *coverage* of a partition is the ratio of the number of + intra-community edges to the total number of edges in the graph. + + The *performance* of a partition is the number of + intra-community edges plus inter-community non-edges divided by the total + number of potential edges. + + This algorithm has complexity $O(C^2 + L)$ where C is the number of communities and L is the number of links. + + Parameters + ---------- + G : NetworkX graph + + partition : sequence + Partition of the nodes of `G`, represented as a sequence of + sets of nodes (blocks). Each block of the partition represents a + community. + + Returns + ------- + (float, float) + The (coverage, performance) tuple of the partition, as defined above. + + Raises + ------ + NetworkXError + If `partition` is not a valid partition of the nodes of `G`. + + Notes + ----- + If `G` is a multigraph; + - for coverage, the multiplicity of edges is counted + - for performance, the result is -1 (total number of possible edges is not defined) + + References + ---------- + .. [1] Santo Fortunato. + "Community Detection in Graphs". + *Physical Reports*, Volume 486, Issue 3--5 pp. 75--174 + <https://arxiv.org/abs/0906.0612> + """ + + node_community = {} + for i, community in enumerate(partition): + for node in community: + node_community[node] = i + + # `performance` is not defined for multigraphs + if not G.is_multigraph(): + # Iterate over the communities, quadratic, to calculate `possible_inter_community_edges` + possible_inter_community_edges = sum( + len(p1) * len(p2) for p1, p2 in combinations(partition, 2) + ) + + if G.is_directed(): + possible_inter_community_edges *= 2 + else: + possible_inter_community_edges = 0 + + # Compute the number of edges in the complete graph -- `n` nodes, + # directed or undirected, depending on `G` + n = len(G) + total_pairs = n * (n - 1) + if not G.is_directed(): + total_pairs //= 2 + + intra_community_edges = 0 + inter_community_non_edges = possible_inter_community_edges + + # Iterate over the links to count `intra_community_edges` and `inter_community_non_edges` + for e in G.edges(): + if node_community[e[0]] == node_community[e[1]]: + intra_community_edges += 1 + else: + inter_community_non_edges -= 1 + + coverage = intra_community_edges / len(G.edges) + + if G.is_multigraph(): + performance = -1.0 + else: + performance = (intra_community_edges + inter_community_non_edges) / total_pairs + + return coverage, performance diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/__init__.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/__init__.py diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_asyn_fluid.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_asyn_fluid.py new file mode 100644 index 00000000..6c023be7 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_asyn_fluid.py @@ -0,0 +1,136 @@ +import pytest + +import networkx as nx +from networkx import Graph, NetworkXError +from networkx.algorithms.community import asyn_fluidc + + +@pytest.mark.parametrize("graph_constructor", (nx.DiGraph, nx.MultiGraph)) +def test_raises_on_directed_and_multigraphs(graph_constructor): + G = graph_constructor([(0, 1), (1, 2)]) + with pytest.raises(nx.NetworkXNotImplemented): + nx.community.asyn_fluidc(G, 1) + + +def test_exceptions(): + test = Graph() + test.add_node("a") + pytest.raises(NetworkXError, asyn_fluidc, test, "hi") + pytest.raises(NetworkXError, asyn_fluidc, test, -1) + pytest.raises(NetworkXError, asyn_fluidc, test, 3) + test.add_node("b") + pytest.raises(NetworkXError, asyn_fluidc, test, 1) + + +def test_single_node(): + test = Graph() + + test.add_node("a") + + # ground truth + ground_truth = {frozenset(["a"])} + + communities = asyn_fluidc(test, 1) + result = {frozenset(c) for c in communities} + assert result == ground_truth + + +def test_two_nodes(): + test = Graph() + + test.add_edge("a", "b") + + # ground truth + ground_truth = {frozenset(["a"]), frozenset(["b"])} + + communities = asyn_fluidc(test, 2) + result = {frozenset(c) for c in communities} + assert result == ground_truth + + +def test_two_clique_communities(): + test = Graph() + + # c1 + test.add_edge("a", "b") + test.add_edge("a", "c") + test.add_edge("b", "c") + + # connection + test.add_edge("c", "d") + + # c2 + test.add_edge("d", "e") + test.add_edge("d", "f") + test.add_edge("f", "e") + + # ground truth + ground_truth = {frozenset(["a", "c", "b"]), frozenset(["e", "d", "f"])} + + communities = asyn_fluidc(test, 2, seed=7) + result = {frozenset(c) for c in communities} + assert result == ground_truth + + +def test_five_clique_ring(): + test = Graph() + + # c1 + test.add_edge("1a", "1b") + test.add_edge("1a", "1c") + test.add_edge("1a", "1d") + test.add_edge("1b", "1c") + test.add_edge("1b", "1d") + test.add_edge("1c", "1d") + + # c2 + test.add_edge("2a", "2b") + test.add_edge("2a", "2c") + test.add_edge("2a", "2d") + test.add_edge("2b", "2c") + test.add_edge("2b", "2d") + test.add_edge("2c", "2d") + + # c3 + test.add_edge("3a", "3b") + test.add_edge("3a", "3c") + test.add_edge("3a", "3d") + test.add_edge("3b", "3c") + test.add_edge("3b", "3d") + test.add_edge("3c", "3d") + + # c4 + test.add_edge("4a", "4b") + test.add_edge("4a", "4c") + test.add_edge("4a", "4d") + test.add_edge("4b", "4c") + test.add_edge("4b", "4d") + test.add_edge("4c", "4d") + + # c5 + test.add_edge("5a", "5b") + test.add_edge("5a", "5c") + test.add_edge("5a", "5d") + test.add_edge("5b", "5c") + test.add_edge("5b", "5d") + test.add_edge("5c", "5d") + + # connections + test.add_edge("1a", "2c") + test.add_edge("2a", "3c") + test.add_edge("3a", "4c") + test.add_edge("4a", "5c") + test.add_edge("5a", "1c") + + # ground truth + ground_truth = { + frozenset(["1a", "1b", "1c", "1d"]), + frozenset(["2a", "2b", "2c", "2d"]), + frozenset(["3a", "3b", "3c", "3d"]), + frozenset(["4a", "4b", "4c", "4d"]), + frozenset(["5a", "5b", "5c", "5d"]), + } + + communities = asyn_fluidc(test, 5, seed=9) + result = {frozenset(c) for c in communities} + assert result == ground_truth diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_centrality.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_centrality.py new file mode 100644 index 00000000..1a8982f0 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_centrality.py @@ -0,0 +1,85 @@ +"""Unit tests for the :mod:`networkx.algorithms.community.centrality` +module. + +""" + +from operator import itemgetter + +import networkx as nx + + +def set_of_sets(iterable): + return set(map(frozenset, iterable)) + + +def validate_communities(result, expected): + assert set_of_sets(result) == set_of_sets(expected) + + +def validate_possible_communities(result, *expected): + assert any(set_of_sets(result) == set_of_sets(p) for p in expected) + + +class TestGirvanNewman: + """Unit tests for the + :func:`networkx.algorithms.community.centrality.girvan_newman` + function. + + """ + + def test_no_edges(self): + G = nx.empty_graph(3) + communities = list(nx.community.girvan_newman(G)) + assert len(communities) == 1 + validate_communities(communities[0], [{0}, {1}, {2}]) + + def test_undirected(self): + # Start with the graph .-.-.-. + G = nx.path_graph(4) + communities = list(nx.community.girvan_newman(G)) + assert len(communities) == 3 + # After one removal, we get the graph .-. .-. + validate_communities(communities[0], [{0, 1}, {2, 3}]) + # After the next, we get the graph .-. . ., but there are two + # symmetric possible versions. + validate_possible_communities( + communities[1], [{0}, {1}, {2, 3}], [{0, 1}, {2}, {3}] + ) + # After the last removal, we always get the empty graph. + validate_communities(communities[2], [{0}, {1}, {2}, {3}]) + + def test_directed(self): + G = nx.DiGraph(nx.path_graph(4)) + communities = list(nx.community.girvan_newman(G)) + assert len(communities) == 3 + validate_communities(communities[0], [{0, 1}, {2, 3}]) + validate_possible_communities( + communities[1], [{0}, {1}, {2, 3}], [{0, 1}, {2}, {3}] + ) + validate_communities(communities[2], [{0}, {1}, {2}, {3}]) + + def test_selfloops(self): + G = nx.path_graph(4) + G.add_edge(0, 0) + G.add_edge(2, 2) + communities = list(nx.community.girvan_newman(G)) + assert len(communities) == 3 + validate_communities(communities[0], [{0, 1}, {2, 3}]) + validate_possible_communities( + communities[1], [{0}, {1}, {2, 3}], [{0, 1}, {2}, {3}] + ) + validate_communities(communities[2], [{0}, {1}, {2}, {3}]) + + def test_most_valuable_edge(self): + G = nx.Graph() + G.add_weighted_edges_from([(0, 1, 3), (1, 2, 2), (2, 3, 1)]) + # Let the most valuable edge be the one with the highest weight. + + def heaviest(G): + return max(G.edges(data="weight"), key=itemgetter(2))[:2] + + communities = list(nx.community.girvan_newman(G, heaviest)) + assert len(communities) == 3 + validate_communities(communities[0], [{0}, {1, 2, 3}]) + validate_communities(communities[1], [{0}, {1}, {2, 3}]) + validate_communities(communities[2], [{0}, {1}, {2}, {3}]) diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_divisive.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_divisive.py new file mode 100644 index 00000000..6331503f --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_divisive.py @@ -0,0 +1,106 @@ +import pytest + +import networkx as nx + + +def test_edge_betweenness_partition(): + G = nx.barbell_graph(3, 0) + C = nx.community.edge_betweenness_partition(G, 2) + answer = [{0, 1, 2}, {3, 4, 5}] + assert len(C) == len(answer) + for s in answer: + assert s in C + + G = nx.barbell_graph(3, 1) + C = nx.community.edge_betweenness_partition(G, 3) + answer = [{0, 1, 2}, {4, 5, 6}, {3}] + assert len(C) == len(answer) + for s in answer: + assert s in C + + C = nx.community.edge_betweenness_partition(G, 7) + answer = [{n} for n in G] + assert len(C) == len(answer) + for s in answer: + assert s in C + + C = nx.community.edge_betweenness_partition(G, 1) + assert C == [set(G)] + + C = nx.community.edge_betweenness_partition(G, 1, weight="weight") + assert C == [set(G)] + + with pytest.raises(nx.NetworkXError): + nx.community.edge_betweenness_partition(G, 0) + + with pytest.raises(nx.NetworkXError): + nx.community.edge_betweenness_partition(G, -1) + + with pytest.raises(nx.NetworkXError): + nx.community.edge_betweenness_partition(G, 10) + + +def test_edge_current_flow_betweenness_partition(): + pytest.importorskip("scipy") + + G = nx.barbell_graph(3, 0) + C = nx.community.edge_current_flow_betweenness_partition(G, 2) + answer = [{0, 1, 2}, {3, 4, 5}] + assert len(C) == len(answer) + for s in answer: + assert s in C + + G = nx.barbell_graph(3, 1) + C = nx.community.edge_current_flow_betweenness_partition(G, 2) + answers = [[{0, 1, 2, 3}, {4, 5, 6}], [{0, 1, 2}, {3, 4, 5, 6}]] + assert len(C) == len(answers[0]) + assert any(all(s in answer for s in C) for answer in answers) + + C = nx.community.edge_current_flow_betweenness_partition(G, 3) + answer = [{0, 1, 2}, {4, 5, 6}, {3}] + assert len(C) == len(answer) + for s in answer: + assert s in C + + C = nx.community.edge_current_flow_betweenness_partition(G, 4) + answers = [[{1, 2}, {4, 5, 6}, {3}, {0}], [{0, 1, 2}, {5, 6}, {3}, {4}]] + assert len(C) == len(answers[0]) + assert any(all(s in answer for s in C) for answer in answers) + + C = nx.community.edge_current_flow_betweenness_partition(G, 5) + answer = [{1, 2}, {5, 6}, {3}, {0}, {4}] + assert len(C) == len(answer) + for s in answer: + assert s in C + + C = nx.community.edge_current_flow_betweenness_partition(G, 6) + answers = [[{2}, {5, 6}, {3}, {0}, {4}, {1}], [{1, 2}, {6}, {3}, {0}, {4}, {5}]] + assert len(C) == len(answers[0]) + assert any(all(s in answer for s in C) for answer in answers) + + C = nx.community.edge_current_flow_betweenness_partition(G, 7) + answer = [{n} for n in G] + assert len(C) == len(answer) + for s in answer: + assert s in C + + C = nx.community.edge_current_flow_betweenness_partition(G, 1) + assert C == [set(G)] + + C = nx.community.edge_current_flow_betweenness_partition(G, 1, weight="weight") + assert C == [set(G)] + + with pytest.raises(nx.NetworkXError): + nx.community.edge_current_flow_betweenness_partition(G, 0) + + with pytest.raises(nx.NetworkXError): + nx.community.edge_current_flow_betweenness_partition(G, -1) + + with pytest.raises(nx.NetworkXError): + nx.community.edge_current_flow_betweenness_partition(G, 10) + + N = 10 + G = nx.empty_graph(N) + for i in range(2, N - 1): + C = nx.community.edge_current_flow_betweenness_partition(G, i) + assert C == [{n} for n in G] diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_kclique.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_kclique.py new file mode 100644 index 00000000..aa0b7e82 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_kclique.py @@ -0,0 +1,91 @@ +from itertools import combinations + +import pytest + +import networkx as nx + + +def test_overlapping_K5(): + G = nx.Graph() + G.add_edges_from(combinations(range(5), 2)) # Add a five clique + G.add_edges_from(combinations(range(2, 7), 2)) # Add another five clique + c = list(nx.community.k_clique_communities(G, 4)) + assert c == [frozenset(range(7))] + c = set(nx.community.k_clique_communities(G, 5)) + assert c == {frozenset(range(5)), frozenset(range(2, 7))} + + +def test_isolated_K5(): + G = nx.Graph() + G.add_edges_from(combinations(range(5), 2)) # Add a five clique + G.add_edges_from(combinations(range(5, 10), 2)) # Add another five clique + c = set(nx.community.k_clique_communities(G, 5)) + assert c == {frozenset(range(5)), frozenset(range(5, 10))} + + +class TestZacharyKarateClub: + def setup_method(self): + self.G = nx.karate_club_graph() + + def _check_communities(self, k, expected): + communities = set(nx.community.k_clique_communities(self.G, k)) + assert communities == expected + + def test_k2(self): + # clique percolation with k=2 is just connected components + expected = {frozenset(self.G)} + self._check_communities(2, expected) + + def test_k3(self): + comm1 = [ + 0, + 1, + 2, + 3, + 7, + 8, + 12, + 13, + 14, + 15, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + ] + comm2 = [0, 4, 5, 6, 10, 16] + comm3 = [24, 25, 31] + expected = {frozenset(comm1), frozenset(comm2), frozenset(comm3)} + self._check_communities(3, expected) + + def test_k4(self): + expected = { + frozenset([0, 1, 2, 3, 7, 13]), + frozenset([8, 32, 30, 33]), + frozenset([32, 33, 29, 23]), + } + self._check_communities(4, expected) + + def test_k5(self): + expected = {frozenset([0, 1, 2, 3, 7, 13])} + self._check_communities(5, expected) + + def test_k6(self): + expected = set() + self._check_communities(6, expected) + + +def test_bad_k(): + with pytest.raises(nx.NetworkXError): + list(nx.community.k_clique_communities(nx.Graph(), 1)) diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_kernighan_lin.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_kernighan_lin.py new file mode 100644 index 00000000..25d53d5f --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_kernighan_lin.py @@ -0,0 +1,92 @@ +"""Unit tests for the :mod:`networkx.algorithms.community.kernighan_lin` +module. +""" + +from itertools import permutations + +import pytest + +import networkx as nx +from networkx.algorithms.community import kernighan_lin_bisection + + +def assert_partition_equal(x, y): + assert set(map(frozenset, x)) == set(map(frozenset, y)) + + +def test_partition(): + G = nx.barbell_graph(3, 0) + C = kernighan_lin_bisection(G) + assert_partition_equal(C, [{0, 1, 2}, {3, 4, 5}]) + + +def test_partition_argument(): + G = nx.barbell_graph(3, 0) + partition = [{0, 1, 2}, {3, 4, 5}] + C = kernighan_lin_bisection(G, partition) + assert_partition_equal(C, partition) + + +def test_partition_argument_non_integer_nodes(): + G = nx.Graph([("A", "B"), ("A", "C"), ("B", "C"), ("C", "D")]) + partition = ({"A", "B"}, {"C", "D"}) + C = kernighan_lin_bisection(G, partition) + assert_partition_equal(C, partition) + + +def test_seed_argument(): + G = nx.barbell_graph(3, 0) + C = kernighan_lin_bisection(G, seed=1) + assert_partition_equal(C, [{0, 1, 2}, {3, 4, 5}]) + + +def test_non_disjoint_partition(): + with pytest.raises(nx.NetworkXError): + G = nx.barbell_graph(3, 0) + partition = ({0, 1, 2}, {2, 3, 4, 5}) + kernighan_lin_bisection(G, partition) + + +def test_too_many_blocks(): + with pytest.raises(nx.NetworkXError): + G = nx.barbell_graph(3, 0) + partition = ({0, 1}, {2}, {3, 4, 5}) + kernighan_lin_bisection(G, partition) + + +def test_multigraph(): + G = nx.cycle_graph(4) + M = nx.MultiGraph(G.edges()) + M.add_edges_from(G.edges()) + M.remove_edge(1, 2) + for labels in permutations(range(4)): + mapping = dict(zip(M, labels)) + A, B = kernighan_lin_bisection(nx.relabel_nodes(M, mapping), seed=0) + assert_partition_equal( + [A, B], [{mapping[0], mapping[1]}, {mapping[2], mapping[3]}] + ) + + +def test_max_iter_argument(): + G = nx.Graph( + [ + ("A", "B", {"weight": 1}), + ("A", "C", {"weight": 2}), + ("A", "D", {"weight": 3}), + ("A", "E", {"weight": 2}), + ("A", "F", {"weight": 4}), + ("B", "C", {"weight": 1}), + ("B", "D", {"weight": 4}), + ("B", "E", {"weight": 2}), + ("B", "F", {"weight": 1}), + ("C", "D", {"weight": 3}), + ("C", "E", {"weight": 2}), + ("C", "F", {"weight": 1}), + ("D", "E", {"weight": 4}), + ("D", "F", {"weight": 3}), + ("E", "F", {"weight": 2}), + ] + ) + partition = ({"A", "B", "C"}, {"D", "E", "F"}) + C = kernighan_lin_bisection(G, partition, max_iter=1) + assert_partition_equal(C, ({"A", "F", "C"}, {"D", "E", "B"})) diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_label_propagation.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_label_propagation.py new file mode 100644 index 00000000..4be72dbf --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_label_propagation.py @@ -0,0 +1,241 @@ +from itertools import chain, combinations + +import pytest + +import networkx as nx + + +def test_directed_not_supported(): + with pytest.raises(nx.NetworkXNotImplemented): + # not supported for directed graphs + test = nx.DiGraph() + test.add_edge("a", "b") + test.add_edge("a", "c") + test.add_edge("b", "d") + result = nx.community.label_propagation_communities(test) + + +def test_iterator_vs_iterable(): + G = nx.empty_graph("a") + assert list(nx.community.label_propagation_communities(G)) == [{"a"}] + for community in nx.community.label_propagation_communities(G): + assert community == {"a"} + pytest.raises(TypeError, next, nx.community.label_propagation_communities(G)) + + +def test_one_node(): + test = nx.Graph() + test.add_node("a") + + # The expected communities are: + ground_truth = {frozenset(["a"])} + + communities = nx.community.label_propagation_communities(test) + result = {frozenset(c) for c in communities} + assert result == ground_truth + + +def test_unconnected_communities(): + test = nx.Graph() + # community 1 + test.add_edge("a", "c") + test.add_edge("a", "d") + test.add_edge("d", "c") + # community 2 + test.add_edge("b", "e") + test.add_edge("e", "f") + test.add_edge("f", "b") + + # The expected communities are: + ground_truth = {frozenset(["a", "c", "d"]), frozenset(["b", "e", "f"])} + + communities = nx.community.label_propagation_communities(test) + result = {frozenset(c) for c in communities} + assert result == ground_truth + + +def test_connected_communities(): + test = nx.Graph() + # community 1 + test.add_edge("a", "b") + test.add_edge("c", "a") + test.add_edge("c", "b") + test.add_edge("d", "a") + test.add_edge("d", "b") + test.add_edge("d", "c") + test.add_edge("e", "a") + test.add_edge("e", "b") + test.add_edge("e", "c") + test.add_edge("e", "d") + # community 2 + test.add_edge("1", "2") + test.add_edge("3", "1") + test.add_edge("3", "2") + test.add_edge("4", "1") + test.add_edge("4", "2") + test.add_edge("4", "3") + test.add_edge("5", "1") + test.add_edge("5", "2") + test.add_edge("5", "3") + test.add_edge("5", "4") + # edge between community 1 and 2 + test.add_edge("a", "1") + # community 3 + test.add_edge("x", "y") + # community 4 with only a single node + test.add_node("z") + + # The expected communities are: + ground_truth1 = { + frozenset(["a", "b", "c", "d", "e"]), + frozenset(["1", "2", "3", "4", "5"]), + frozenset(["x", "y"]), + frozenset(["z"]), + } + ground_truth2 = { + frozenset(["a", "b", "c", "d", "e", "1", "2", "3", "4", "5"]), + frozenset(["x", "y"]), + frozenset(["z"]), + } + ground_truth = (ground_truth1, ground_truth2) + + communities = nx.community.label_propagation_communities(test) + result = {frozenset(c) for c in communities} + assert result in ground_truth + + +def test_termination(): + # ensure termination of asyn_lpa_communities in two cases + # that led to an endless loop in a previous version + test1 = nx.karate_club_graph() + test2 = nx.caveman_graph(2, 10) + test2.add_edges_from([(0, 20), (20, 10)]) + nx.community.asyn_lpa_communities(test1) + nx.community.asyn_lpa_communities(test2) + + +class TestAsynLpaCommunities: + def _check_communities(self, G, expected): + """Checks that the communities computed from the given graph ``G`` + using the :func:`~networkx.asyn_lpa_communities` function match + the set of nodes given in ``expected``. + + ``expected`` must be a :class:`set` of :class:`frozenset` + instances, each element of which is a node in the graph. + + """ + communities = nx.community.asyn_lpa_communities(G) + result = {frozenset(c) for c in communities} + assert result == expected + + def test_null_graph(self): + G = nx.null_graph() + ground_truth = set() + self._check_communities(G, ground_truth) + + def test_single_node(self): + G = nx.empty_graph(1) + ground_truth = {frozenset([0])} + self._check_communities(G, ground_truth) + + def test_simple_communities(self): + # This graph is the disjoint union of two triangles. + G = nx.Graph(["ab", "ac", "bc", "de", "df", "fe"]) + ground_truth = {frozenset("abc"), frozenset("def")} + self._check_communities(G, ground_truth) + + def test_seed_argument(self): + G = nx.Graph(["ab", "ac", "bc", "de", "df", "fe"]) + ground_truth = {frozenset("abc"), frozenset("def")} + communities = nx.community.asyn_lpa_communities(G, seed=1) + result = {frozenset(c) for c in communities} + assert result == ground_truth + + def test_several_communities(self): + # This graph is the disjoint union of five triangles. + ground_truth = {frozenset(range(3 * i, 3 * (i + 1))) for i in range(5)} + edges = chain.from_iterable(combinations(c, 2) for c in ground_truth) + G = nx.Graph(edges) + self._check_communities(G, ground_truth) + + +class TestFastLabelPropagationCommunities: + N = 100 # number of nodes + K = 15 # average node degree + + def _check_communities(self, G, truth, weight=None, seed=42): + C = nx.community.fast_label_propagation_communities(G, weight=weight, seed=seed) + assert {frozenset(c) for c in C} == truth + + def test_null_graph(self): + G = nx.null_graph() + truth = set() + self._check_communities(G, truth) + + def test_empty_graph(self): + G = nx.empty_graph(self.N) + truth = {frozenset([i]) for i in G} + self._check_communities(G, truth) + + def test_star_graph(self): + G = nx.star_graph(self.N) + truth = {frozenset(G)} + self._check_communities(G, truth) + + def test_complete_graph(self): + G = nx.complete_graph(self.N) + truth = {frozenset(G)} + self._check_communities(G, truth) + + def test_bipartite_graph(self): + G = nx.complete_bipartite_graph(self.N // 2, self.N // 2) + truth = {frozenset(G)} + self._check_communities(G, truth) + + def test_random_graph(self): + G = nx.gnm_random_graph(self.N, self.N * self.K // 2, seed=42) + truth = {frozenset(G)} + self._check_communities(G, truth) + + def test_disjoin_cliques(self): + G = nx.Graph(["ab", "AB", "AC", "BC", "12", "13", "14", "23", "24", "34"]) + truth = {frozenset("ab"), frozenset("ABC"), frozenset("1234")} + self._check_communities(G, truth) + + def test_ring_of_cliques(self): + N, K = self.N, self.K + G = nx.ring_of_cliques(N, K) + truth = {frozenset([K * i + k for k in range(K)]) for i in range(N)} + self._check_communities(G, truth) + + def test_larger_graph(self): + G = nx.gnm_random_graph(100 * self.N, 50 * self.N * self.K, seed=42) + nx.community.fast_label_propagation_communities(G) + + def test_graph_type(self): + G1 = nx.complete_graph(self.N, nx.MultiDiGraph()) + G2 = nx.MultiGraph(G1) + G3 = nx.DiGraph(G1) + G4 = nx.Graph(G1) + truth = {frozenset(G1)} + self._check_communities(G1, truth) + self._check_communities(G2, truth) + self._check_communities(G3, truth) + self._check_communities(G4, truth) + + def test_weight_argument(self): + G = nx.MultiDiGraph() + G.add_edge(1, 2, weight=1.41) + G.add_edge(2, 1, weight=1.41) + G.add_edge(2, 3) + G.add_edge(3, 4, weight=3.14) + truth = {frozenset({1, 2}), frozenset({3, 4})} + self._check_communities(G, truth, weight="weight") + + def test_seed_argument(self): + G = nx.karate_club_graph() + C = nx.community.fast_label_propagation_communities(G, seed=2023) + truth = {frozenset(c) for c in C} + self._check_communities(G, truth, seed=2023) + # smoke test that seed=None works + C = nx.community.fast_label_propagation_communities(G, seed=None) diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_louvain.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_louvain.py new file mode 100644 index 00000000..816e6f14 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_louvain.py @@ -0,0 +1,264 @@ +import pytest + +import networkx as nx + + +def test_modularity_increase(): + G = nx.LFR_benchmark_graph( + 250, 3, 1.5, 0.009, average_degree=5, min_community=20, seed=10 + ) + partition = [{u} for u in G.nodes()] + mod = nx.community.modularity(G, partition) + partition = nx.community.louvain_communities(G) + + assert nx.community.modularity(G, partition) > mod + + +def test_valid_partition(): + G = nx.LFR_benchmark_graph( + 250, 3, 1.5, 0.009, average_degree=5, min_community=20, seed=10 + ) + H = G.to_directed() + partition = nx.community.louvain_communities(G) + partition2 = nx.community.louvain_communities(H) + + assert nx.community.is_partition(G, partition) + assert nx.community.is_partition(H, partition2) + + +def test_karate_club_partition(): + G = nx.karate_club_graph() + part = [ + {0, 1, 2, 3, 7, 9, 11, 12, 13, 17, 19, 21}, + {16, 4, 5, 6, 10}, + {23, 25, 27, 28, 24, 31}, + {32, 33, 8, 14, 15, 18, 20, 22, 26, 29, 30}, + ] + partition = nx.community.louvain_communities(G, seed=2, weight=None) + + assert part == partition + + +def test_partition_iterator(): + G = nx.path_graph(15) + parts_iter = nx.community.louvain_partitions(G, seed=42) + first_part = next(parts_iter) + first_copy = [s.copy() for s in first_part] + + # gh-5901 reports sets changing after next partition is yielded + assert first_copy[0] == first_part[0] + second_part = next(parts_iter) + assert first_copy[0] == first_part[0] + + +def test_undirected_selfloops(): + G = nx.karate_club_graph() + expected_partition = nx.community.louvain_communities(G, seed=2, weight=None) + part = [ + {0, 1, 2, 3, 7, 9, 11, 12, 13, 17, 19, 21}, + {16, 4, 5, 6, 10}, + {23, 25, 27, 28, 24, 31}, + {32, 33, 8, 14, 15, 18, 20, 22, 26, 29, 30}, + ] + assert expected_partition == part + + G.add_weighted_edges_from([(i, i, i * 1000) for i in range(9)]) + # large self-loop weight impacts partition + partition = nx.community.louvain_communities(G, seed=2, weight="weight") + assert part != partition + + # small self-loop weights aren't enough to impact partition in this graph + partition = nx.community.louvain_communities(G, seed=2, weight=None) + assert part == partition + + +def test_directed_selfloops(): + G = nx.DiGraph() + G.add_nodes_from(range(11)) + G_edges = [ + (0, 2), + (0, 1), + (1, 0), + (2, 1), + (2, 0), + (3, 4), + (4, 3), + (7, 8), + (8, 7), + (9, 10), + (10, 9), + ] + G.add_edges_from(G_edges) + G_expected_partition = nx.community.louvain_communities(G, seed=123, weight=None) + + G.add_weighted_edges_from([(i, i, i * 1000) for i in range(3)]) + # large self-loop weight impacts partition + G_partition = nx.community.louvain_communities(G, seed=123, weight="weight") + assert G_partition != G_expected_partition + + # small self-loop weights aren't enough to impact partition in this graph + G_partition = nx.community.louvain_communities(G, seed=123, weight=None) + assert G_partition == G_expected_partition + + +def test_directed_partition(): + """ + Test 2 cases that were looping infinitely + from issues #5175 and #5704 + """ + G = nx.DiGraph() + H = nx.DiGraph() + G.add_nodes_from(range(10)) + H.add_nodes_from([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) + G_edges = [ + (0, 2), + (0, 1), + (1, 0), + (2, 1), + (2, 0), + (3, 4), + (4, 3), + (7, 8), + (8, 7), + (9, 10), + (10, 9), + ] + H_edges = [ + (1, 2), + (1, 6), + (1, 9), + (2, 3), + (2, 4), + (2, 5), + (3, 4), + (4, 3), + (4, 5), + (5, 4), + (6, 7), + (6, 8), + (9, 10), + (9, 11), + (10, 11), + (11, 10), + ] + G.add_edges_from(G_edges) + H.add_edges_from(H_edges) + + G_expected_partition = [{0, 1, 2}, {3, 4}, {5}, {6}, {8, 7}, {9, 10}] + G_partition = nx.community.louvain_communities(G, seed=123, weight=None) + + H_expected_partition = [{2, 3, 4, 5}, {8, 1, 6, 7}, {9, 10, 11}] + H_partition = nx.community.louvain_communities(H, seed=123, weight=None) + + assert G_partition == G_expected_partition + assert H_partition == H_expected_partition + + +def test_none_weight_param(): + G = nx.karate_club_graph() + nx.set_edge_attributes( + G, {edge: i * i for i, edge in enumerate(G.edges)}, name="foo" + ) + + part = [ + {0, 1, 2, 3, 7, 9, 11, 12, 13, 17, 19, 21}, + {16, 4, 5, 6, 10}, + {23, 25, 27, 28, 24, 31}, + {32, 33, 8, 14, 15, 18, 20, 22, 26, 29, 30}, + ] + partition1 = nx.community.louvain_communities(G, weight=None, seed=2) + partition2 = nx.community.louvain_communities(G, weight="foo", seed=2) + partition3 = nx.community.louvain_communities(G, weight="weight", seed=2) + + assert part == partition1 + assert part != partition2 + assert part != partition3 + assert partition2 != partition3 + + +def test_quality(): + G = nx.LFR_benchmark_graph( + 250, 3, 1.5, 0.009, average_degree=5, min_community=20, seed=10 + ) + H = nx.gn_graph(200, seed=1234) + I = nx.MultiGraph(G) + J = nx.MultiDiGraph(H) + + partition = nx.community.louvain_communities(G) + partition2 = nx.community.louvain_communities(H) + partition3 = nx.community.louvain_communities(I) + partition4 = nx.community.louvain_communities(J) + + quality = nx.community.partition_quality(G, partition)[0] + quality2 = nx.community.partition_quality(H, partition2)[0] + quality3 = nx.community.partition_quality(I, partition3)[0] + quality4 = nx.community.partition_quality(J, partition4)[0] + + assert quality >= 0.65 + assert quality2 >= 0.65 + assert quality3 >= 0.65 + assert quality4 >= 0.65 + + +def test_multigraph(): + G = nx.karate_club_graph() + H = nx.MultiGraph(G) + G.add_edge(0, 1, weight=10) + H.add_edge(0, 1, weight=9) + G.add_edge(0, 9, foo=20) + H.add_edge(0, 9, foo=20) + + partition1 = nx.community.louvain_communities(G, seed=1234) + partition2 = nx.community.louvain_communities(H, seed=1234) + partition3 = nx.community.louvain_communities(H, weight="foo", seed=1234) + + assert partition1 == partition2 != partition3 + + +def test_resolution(): + G = nx.LFR_benchmark_graph( + 250, 3, 1.5, 0.009, average_degree=5, min_community=20, seed=10 + ) + + partition1 = nx.community.louvain_communities(G, resolution=0.5, seed=12) + partition2 = nx.community.louvain_communities(G, seed=12) + partition3 = nx.community.louvain_communities(G, resolution=2, seed=12) + + assert len(partition1) <= len(partition2) <= len(partition3) + + +def test_threshold(): + G = nx.LFR_benchmark_graph( + 250, 3, 1.5, 0.009, average_degree=5, min_community=20, seed=10 + ) + partition1 = nx.community.louvain_communities(G, threshold=0.3, seed=2) + partition2 = nx.community.louvain_communities(G, seed=2) + mod1 = nx.community.modularity(G, partition1) + mod2 = nx.community.modularity(G, partition2) + + assert mod1 <= mod2 + + +def test_empty_graph(): + G = nx.Graph() + G.add_nodes_from(range(5)) + expected = [{0}, {1}, {2}, {3}, {4}] + assert nx.community.louvain_communities(G) == expected + + +def test_max_level(): + G = nx.LFR_benchmark_graph( + 250, 3, 1.5, 0.009, average_degree=5, min_community=20, seed=10 + ) + parts_iter = nx.community.louvain_partitions(G, seed=42) + for max_level, expected in enumerate(parts_iter, 1): + partition = nx.community.louvain_communities(G, max_level=max_level, seed=42) + assert partition == expected + assert max_level > 1 # Ensure we are actually testing max_level + # max_level is an upper limit; it's okay if we stop before it's hit. + partition = nx.community.louvain_communities(G, max_level=max_level + 1, seed=42) + assert partition == expected + with pytest.raises( + ValueError, match="max_level argument must be a positive integer" + ): + nx.community.louvain_communities(G, max_level=0) diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_lukes.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_lukes.py new file mode 100644 index 00000000..cfa48f0f --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_lukes.py @@ -0,0 +1,152 @@ +from itertools import product + +import pytest + +import networkx as nx + +EWL = "e_weight" +NWL = "n_weight" + + +# first test from the Lukes original paper +def paper_1_case(float_edge_wt=False, explicit_node_wt=True, directed=False): + # problem-specific constants + limit = 3 + + # configuration + if float_edge_wt: + shift = 0.001 + else: + shift = 0 + + if directed: + example_1 = nx.DiGraph() + else: + example_1 = nx.Graph() + + # graph creation + example_1.add_edge(1, 2, **{EWL: 3 + shift}) + example_1.add_edge(1, 4, **{EWL: 2 + shift}) + example_1.add_edge(2, 3, **{EWL: 4 + shift}) + example_1.add_edge(2, 5, **{EWL: 6 + shift}) + + # node weights + if explicit_node_wt: + nx.set_node_attributes(example_1, 1, NWL) + wtu = NWL + else: + wtu = None + + # partitioning + clusters_1 = { + frozenset(x) + for x in nx.community.lukes_partitioning( + example_1, limit, node_weight=wtu, edge_weight=EWL + ) + } + + return clusters_1 + + +# second test from the Lukes original paper +def paper_2_case(explicit_edge_wt=True, directed=False): + # problem specific constants + byte_block_size = 32 + + # configuration + if directed: + example_2 = nx.DiGraph() + else: + example_2 = nx.Graph() + + if explicit_edge_wt: + edic = {EWL: 1} + wtu = EWL + else: + edic = {} + wtu = None + + # graph creation + example_2.add_edge("name", "home_address", **edic) + example_2.add_edge("name", "education", **edic) + example_2.add_edge("education", "bs", **edic) + example_2.add_edge("education", "ms", **edic) + example_2.add_edge("education", "phd", **edic) + example_2.add_edge("name", "telephone", **edic) + example_2.add_edge("telephone", "home", **edic) + example_2.add_edge("telephone", "office", **edic) + example_2.add_edge("office", "no1", **edic) + example_2.add_edge("office", "no2", **edic) + + example_2.nodes["name"][NWL] = 20 + example_2.nodes["education"][NWL] = 10 + example_2.nodes["bs"][NWL] = 1 + example_2.nodes["ms"][NWL] = 1 + example_2.nodes["phd"][NWL] = 1 + example_2.nodes["home_address"][NWL] = 8 + example_2.nodes["telephone"][NWL] = 8 + example_2.nodes["home"][NWL] = 8 + example_2.nodes["office"][NWL] = 4 + example_2.nodes["no1"][NWL] = 1 + example_2.nodes["no2"][NWL] = 1 + + # partitioning + clusters_2 = { + frozenset(x) + for x in nx.community.lukes_partitioning( + example_2, byte_block_size, node_weight=NWL, edge_weight=wtu + ) + } + + return clusters_2 + + +def test_paper_1_case(): + ground_truth = {frozenset([1, 4]), frozenset([2, 3, 5])} + + tf = (True, False) + for flt, nwt, drc in product(tf, tf, tf): + part = paper_1_case(flt, nwt, drc) + assert part == ground_truth + + +def test_paper_2_case(): + ground_truth = { + frozenset(["education", "bs", "ms", "phd"]), + frozenset(["name", "home_address"]), + frozenset(["telephone", "home", "office", "no1", "no2"]), + } + + tf = (True, False) + for ewt, drc in product(tf, tf): + part = paper_2_case(ewt, drc) + assert part == ground_truth + + +def test_mandatory_tree(): + not_a_tree = nx.complete_graph(4) + + with pytest.raises(nx.NotATree): + nx.community.lukes_partitioning(not_a_tree, 5) + + +def test_mandatory_integrality(): + byte_block_size = 32 + + ex_1_broken = nx.DiGraph() + + ex_1_broken.add_edge(1, 2, **{EWL: 3.2}) + ex_1_broken.add_edge(1, 4, **{EWL: 2.4}) + ex_1_broken.add_edge(2, 3, **{EWL: 4.0}) + ex_1_broken.add_edge(2, 5, **{EWL: 6.3}) + + ex_1_broken.nodes[1][NWL] = 1.2 # ! + ex_1_broken.nodes[2][NWL] = 1 + ex_1_broken.nodes[3][NWL] = 1 + ex_1_broken.nodes[4][NWL] = 1 + ex_1_broken.nodes[5][NWL] = 2 + + with pytest.raises(TypeError): + nx.community.lukes_partitioning( + ex_1_broken, byte_block_size, node_weight=NWL, edge_weight=EWL + ) diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_modularity_max.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_modularity_max.py new file mode 100644 index 00000000..0121367f --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_modularity_max.py @@ -0,0 +1,340 @@ +import pytest + +import networkx as nx +from networkx.algorithms.community import ( + greedy_modularity_communities, + naive_greedy_modularity_communities, +) + + +@pytest.mark.parametrize( + "func", (greedy_modularity_communities, naive_greedy_modularity_communities) +) +def test_modularity_communities(func): + G = nx.karate_club_graph() + john_a = frozenset( + [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] + ) + mr_hi = frozenset([0, 4, 5, 6, 10, 11, 16, 19]) + overlap = frozenset([1, 2, 3, 7, 9, 12, 13, 17, 21]) + expected = {john_a, overlap, mr_hi} + assert set(func(G, weight=None)) == expected + + +@pytest.mark.parametrize( + "func", (greedy_modularity_communities, naive_greedy_modularity_communities) +) +def test_modularity_communities_categorical_labels(func): + # Using other than 0-starting contiguous integers as node-labels. + G = nx.Graph( + [ + ("a", "b"), + ("a", "c"), + ("b", "c"), + ("b", "d"), # inter-community edge + ("d", "e"), + ("d", "f"), + ("d", "g"), + ("f", "g"), + ("d", "e"), + ("f", "e"), + ] + ) + expected = {frozenset({"f", "g", "e", "d"}), frozenset({"a", "b", "c"})} + assert set(func(G)) == expected + + +def test_greedy_modularity_communities_components(): + # Test for gh-5530 + G = nx.Graph([(0, 1), (2, 3), (4, 5), (5, 6)]) + # usual case with 3 components + assert greedy_modularity_communities(G) == [{4, 5, 6}, {0, 1}, {2, 3}] + # best_n can make the algorithm continue even when modularity goes down + assert greedy_modularity_communities(G, best_n=3) == [{4, 5, 6}, {0, 1}, {2, 3}] + assert greedy_modularity_communities(G, best_n=2) == [{0, 1, 4, 5, 6}, {2, 3}] + assert greedy_modularity_communities(G, best_n=1) == [{0, 1, 2, 3, 4, 5, 6}] + + +def test_greedy_modularity_communities_relabeled(): + # Test for gh-4966 + G = nx.balanced_tree(2, 2) + mapping = {0: "a", 1: "b", 2: "c", 3: "d", 4: "e", 5: "f", 6: "g", 7: "h"} + G = nx.relabel_nodes(G, mapping) + expected = [frozenset({"e", "d", "a", "b"}), frozenset({"c", "f", "g"})] + assert greedy_modularity_communities(G) == expected + + +def test_greedy_modularity_communities_directed(): + G = nx.DiGraph( + [ + ("a", "b"), + ("a", "c"), + ("b", "c"), + ("b", "d"), # inter-community edge + ("d", "e"), + ("d", "f"), + ("d", "g"), + ("f", "g"), + ("d", "e"), + ("f", "e"), + ] + ) + expected = [frozenset({"f", "g", "e", "d"}), frozenset({"a", "b", "c"})] + assert greedy_modularity_communities(G) == expected + + # with loops + G = nx.DiGraph() + G.add_edges_from( + [(1, 1), (1, 2), (1, 3), (2, 3), (1, 4), (4, 4), (5, 5), (4, 5), (4, 6), (5, 6)] + ) + expected = [frozenset({1, 2, 3}), frozenset({4, 5, 6})] + assert greedy_modularity_communities(G) == expected + + +@pytest.mark.parametrize( + "func", (greedy_modularity_communities, naive_greedy_modularity_communities) +) +def test_modularity_communities_weighted(func): + G = nx.balanced_tree(2, 3) + for a, b in G.edges: + if ((a == 1) or (a == 2)) and (b != 0): + G[a][b]["weight"] = 10.0 + else: + G[a][b]["weight"] = 1.0 + + expected = [{0, 1, 3, 4, 7, 8, 9, 10}, {2, 5, 6, 11, 12, 13, 14}] + + assert func(G, weight="weight") == expected + assert func(G, weight="weight", resolution=0.9) == expected + assert func(G, weight="weight", resolution=0.3) == expected + assert func(G, weight="weight", resolution=1.1) != expected + + +def test_modularity_communities_floating_point(): + # check for floating point error when used as key in the mapped_queue dict. + # Test for gh-4992 and gh-5000 + G = nx.Graph() + G.add_weighted_edges_from( + [(0, 1, 12), (1, 4, 71), (2, 3, 15), (2, 4, 10), (3, 6, 13)] + ) + expected = [{0, 1, 4}, {2, 3, 6}] + assert greedy_modularity_communities(G, weight="weight") == expected + assert ( + greedy_modularity_communities(G, weight="weight", resolution=0.99) == expected + ) + + +def test_modularity_communities_directed_weighted(): + G = nx.DiGraph() + G.add_weighted_edges_from( + [ + (1, 2, 5), + (1, 3, 3), + (2, 3, 6), + (2, 6, 1), + (1, 4, 1), + (4, 5, 3), + (4, 6, 7), + (5, 6, 2), + (5, 7, 5), + (5, 8, 4), + (6, 8, 3), + ] + ) + expected = [frozenset({4, 5, 6, 7, 8}), frozenset({1, 2, 3})] + assert greedy_modularity_communities(G, weight="weight") == expected + + # A large weight of the edge (2, 6) causes 6 to change group, even if it shares + # only one connection with the new group and 3 with the old one. + G[2][6]["weight"] = 20 + expected = [frozenset({1, 2, 3, 6}), frozenset({4, 5, 7, 8})] + assert greedy_modularity_communities(G, weight="weight") == expected + + +def test_greedy_modularity_communities_multigraph(): + G = nx.MultiGraph() + G.add_edges_from( + [ + (1, 2), + (1, 2), + (1, 3), + (2, 3), + (1, 4), + (2, 4), + (4, 5), + (5, 6), + (5, 7), + (5, 7), + (6, 7), + (7, 8), + (5, 8), + ] + ) + expected = [frozenset({1, 2, 3, 4}), frozenset({5, 6, 7, 8})] + assert greedy_modularity_communities(G) == expected + + # Converting (4, 5) into a multi-edge causes node 4 to change group. + G.add_edge(4, 5) + expected = [frozenset({4, 5, 6, 7, 8}), frozenset({1, 2, 3})] + assert greedy_modularity_communities(G) == expected + + +def test_greedy_modularity_communities_multigraph_weighted(): + G = nx.MultiGraph() + G.add_weighted_edges_from( + [ + (1, 2, 5), + (1, 2, 3), + (1, 3, 6), + (1, 3, 6), + (2, 3, 4), + (1, 4, 1), + (1, 4, 1), + (2, 4, 3), + (2, 4, 3), + (4, 5, 1), + (5, 6, 3), + (5, 6, 7), + (5, 6, 4), + (5, 7, 9), + (5, 7, 9), + (6, 7, 8), + (7, 8, 2), + (7, 8, 2), + (5, 8, 6), + (5, 8, 6), + ] + ) + expected = [frozenset({1, 2, 3, 4}), frozenset({5, 6, 7, 8})] + assert greedy_modularity_communities(G, weight="weight") == expected + + # Adding multi-edge (4, 5, 16) causes node 4 to change group. + G.add_edge(4, 5, weight=16) + expected = [frozenset({4, 5, 6, 7, 8}), frozenset({1, 2, 3})] + assert greedy_modularity_communities(G, weight="weight") == expected + + # Increasing the weight of edge (1, 4) causes node 4 to return to the former group. + G[1][4][1]["weight"] = 3 + expected = [frozenset({1, 2, 3, 4}), frozenset({5, 6, 7, 8})] + assert greedy_modularity_communities(G, weight="weight") == expected + + +def test_greed_modularity_communities_multidigraph(): + G = nx.MultiDiGraph() + G.add_edges_from( + [ + (1, 2), + (1, 2), + (3, 1), + (2, 3), + (2, 3), + (3, 2), + (1, 4), + (2, 4), + (4, 2), + (4, 5), + (5, 6), + (5, 6), + (6, 5), + (5, 7), + (6, 7), + (7, 8), + (5, 8), + (8, 4), + ] + ) + expected = [frozenset({1, 2, 3, 4}), frozenset({5, 6, 7, 8})] + assert greedy_modularity_communities(G, weight="weight") == expected + + +def test_greed_modularity_communities_multidigraph_weighted(): + G = nx.MultiDiGraph() + G.add_weighted_edges_from( + [ + (1, 2, 5), + (1, 2, 3), + (3, 1, 6), + (1, 3, 6), + (3, 2, 4), + (1, 4, 2), + (1, 4, 5), + (2, 4, 3), + (3, 2, 8), + (4, 2, 3), + (4, 3, 5), + (4, 5, 2), + (5, 6, 3), + (5, 6, 7), + (6, 5, 4), + (5, 7, 9), + (5, 7, 9), + (7, 6, 8), + (7, 8, 2), + (8, 7, 2), + (5, 8, 6), + (5, 8, 6), + ] + ) + expected = [frozenset({1, 2, 3, 4}), frozenset({5, 6, 7, 8})] + assert greedy_modularity_communities(G, weight="weight") == expected + + +def test_resolution_parameter_impact(): + G = nx.barbell_graph(5, 3) + + gamma = 1 + expected = [frozenset(range(5)), frozenset(range(8, 13)), frozenset(range(5, 8))] + assert greedy_modularity_communities(G, resolution=gamma) == expected + assert naive_greedy_modularity_communities(G, resolution=gamma) == expected + + gamma = 2.5 + expected = [{0, 1, 2, 3}, {9, 10, 11, 12}, {5, 6, 7}, {4}, {8}] + assert greedy_modularity_communities(G, resolution=gamma) == expected + assert naive_greedy_modularity_communities(G, resolution=gamma) == expected + + gamma = 0.3 + expected = [frozenset(range(8)), frozenset(range(8, 13))] + assert greedy_modularity_communities(G, resolution=gamma) == expected + assert naive_greedy_modularity_communities(G, resolution=gamma) == expected + + +def test_cutoff_parameter(): + G = nx.circular_ladder_graph(4) + + # No aggregation: + expected = [{k} for k in range(8)] + assert greedy_modularity_communities(G, cutoff=8) == expected + + # Aggregation to half order (number of nodes) + expected = [{k, k + 1} for k in range(0, 8, 2)] + assert greedy_modularity_communities(G, cutoff=4) == expected + + # Default aggregation case (here, 2 communities emerge) + expected = [frozenset(range(4)), frozenset(range(4, 8))] + assert greedy_modularity_communities(G, cutoff=1) == expected + + +def test_best_n(): + G = nx.barbell_graph(5, 3) + + # Same result as without enforcing cutoff: + best_n = 3 + expected = [frozenset(range(5)), frozenset(range(8, 13)), frozenset(range(5, 8))] + assert greedy_modularity_communities(G, best_n=best_n) == expected + + # One additional merging step: + best_n = 2 + expected = [frozenset(range(8)), frozenset(range(8, 13))] + assert greedy_modularity_communities(G, best_n=best_n) == expected + + # Two additional merging steps: + best_n = 1 + expected = [frozenset(range(13))] + assert greedy_modularity_communities(G, best_n=best_n) == expected + + +def test_greedy_modularity_communities_corner_cases(): + G = nx.empty_graph() + assert nx.community.greedy_modularity_communities(G) == [] + G.add_nodes_from(range(3)) + assert nx.community.greedy_modularity_communities(G) == [{0}, {1}, {2}] diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_quality.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_quality.py new file mode 100644 index 00000000..c502c7e3 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_quality.py @@ -0,0 +1,139 @@ +"""Unit tests for the :mod:`networkx.algorithms.community.quality` +module. + +""" + +import pytest + +import networkx as nx +from networkx import barbell_graph +from networkx.algorithms.community import modularity, partition_quality +from networkx.algorithms.community.quality import inter_community_edges + + +class TestPerformance: + """Unit tests for the :func:`performance` function.""" + + def test_bad_partition(self): + """Tests that a poor partition has a low performance measure.""" + G = barbell_graph(3, 0) + partition = [{0, 1, 4}, {2, 3, 5}] + assert 8 / 15 == pytest.approx(partition_quality(G, partition)[1], abs=1e-7) + + def test_good_partition(self): + """Tests that a good partition has a high performance measure.""" + G = barbell_graph(3, 0) + partition = [{0, 1, 2}, {3, 4, 5}] + assert 14 / 15 == pytest.approx(partition_quality(G, partition)[1], abs=1e-7) + + +class TestCoverage: + """Unit tests for the :func:`coverage` function.""" + + def test_bad_partition(self): + """Tests that a poor partition has a low coverage measure.""" + G = barbell_graph(3, 0) + partition = [{0, 1, 4}, {2, 3, 5}] + assert 3 / 7 == pytest.approx(partition_quality(G, partition)[0], abs=1e-7) + + def test_good_partition(self): + """Tests that a good partition has a high coverage measure.""" + G = barbell_graph(3, 0) + partition = [{0, 1, 2}, {3, 4, 5}] + assert 6 / 7 == pytest.approx(partition_quality(G, partition)[0], abs=1e-7) + + +def test_modularity(): + G = nx.barbell_graph(3, 0) + C = [{0, 1, 4}, {2, 3, 5}] + assert (-16 / (14**2)) == pytest.approx(modularity(G, C), abs=1e-7) + C = [{0, 1, 2}, {3, 4, 5}] + assert (35 * 2) / (14**2) == pytest.approx(modularity(G, C), abs=1e-7) + + n = 1000 + G = nx.erdos_renyi_graph(n, 0.09, seed=42, directed=True) + C = [set(range(n // 2)), set(range(n // 2, n))] + assert 0.00017154251389292754 == pytest.approx(modularity(G, C), abs=1e-7) + + G = nx.margulis_gabber_galil_graph(10) + mid_value = G.number_of_nodes() // 2 + nodes = list(G.nodes) + C = [set(nodes[:mid_value]), set(nodes[mid_value:])] + assert 0.13 == pytest.approx(modularity(G, C), abs=1e-7) + + G = nx.DiGraph() + G.add_edges_from([(2, 1), (2, 3), (3, 4)]) + C = [{1, 2}, {3, 4}] + assert 2 / 9 == pytest.approx(modularity(G, C), abs=1e-7) + + +def test_modularity_resolution(): + G = nx.barbell_graph(3, 0) + C = [{0, 1, 4}, {2, 3, 5}] + assert modularity(G, C) == pytest.approx(3 / 7 - 100 / 14**2) + gamma = 2 + result = modularity(G, C, resolution=gamma) + assert result == pytest.approx(3 / 7 - gamma * 100 / 14**2) + gamma = 0.2 + result = modularity(G, C, resolution=gamma) + assert result == pytest.approx(3 / 7 - gamma * 100 / 14**2) + + C = [{0, 1, 2}, {3, 4, 5}] + assert modularity(G, C) == pytest.approx(6 / 7 - 98 / 14**2) + gamma = 2 + result = modularity(G, C, resolution=gamma) + assert result == pytest.approx(6 / 7 - gamma * 98 / 14**2) + gamma = 0.2 + result = modularity(G, C, resolution=gamma) + assert result == pytest.approx(6 / 7 - gamma * 98 / 14**2) + + G = nx.barbell_graph(5, 3) + C = [frozenset(range(5)), frozenset(range(8, 13)), frozenset(range(5, 8))] + gamma = 1 + result = modularity(G, C, resolution=gamma) + # This C is maximal for gamma=1: modularity = 0.518229 + assert result == pytest.approx((22 / 24) - gamma * (918 / (48**2))) + gamma = 2 + result = modularity(G, C, resolution=gamma) + assert result == pytest.approx((22 / 24) - gamma * (918 / (48**2))) + gamma = 0.2 + result = modularity(G, C, resolution=gamma) + assert result == pytest.approx((22 / 24) - gamma * (918 / (48**2))) + + C = [{0, 1, 2, 3}, {9, 10, 11, 12}, {5, 6, 7}, {4}, {8}] + gamma = 1 + result = modularity(G, C, resolution=gamma) + assert result == pytest.approx((14 / 24) - gamma * (598 / (48**2))) + gamma = 2.5 + result = modularity(G, C, resolution=gamma) + # This C is maximal for gamma=2.5: modularity = -0.06553819 + assert result == pytest.approx((14 / 24) - gamma * (598 / (48**2))) + gamma = 0.2 + result = modularity(G, C, resolution=gamma) + assert result == pytest.approx((14 / 24) - gamma * (598 / (48**2))) + + C = [frozenset(range(8)), frozenset(range(8, 13))] + gamma = 1 + result = modularity(G, C, resolution=gamma) + assert result == pytest.approx((23 / 24) - gamma * (1170 / (48**2))) + gamma = 2 + result = modularity(G, C, resolution=gamma) + assert result == pytest.approx((23 / 24) - gamma * (1170 / (48**2))) + gamma = 0.3 + result = modularity(G, C, resolution=gamma) + # This C is maximal for gamma=0.3: modularity = 0.805990 + assert result == pytest.approx((23 / 24) - gamma * (1170 / (48**2))) + + +def test_inter_community_edges_with_digraphs(): + G = nx.complete_graph(2, create_using=nx.DiGraph()) + partition = [{0}, {1}] + assert inter_community_edges(G, partition) == 2 + + G = nx.complete_graph(10, create_using=nx.DiGraph()) + partition = [{0}, {1, 2}, {3, 4, 5}, {6, 7, 8, 9}] + assert inter_community_edges(G, partition) == 70 + + G = nx.cycle_graph(4, create_using=nx.DiGraph()) + partition = [{0, 1}, {2, 3}] + assert inter_community_edges(G, partition) == 2 diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_utils.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_utils.py new file mode 100644 index 00000000..ea019db9 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/tests/test_utils.py @@ -0,0 +1,26 @@ +"""Unit tests for the :mod:`networkx.algorithms.community.utils` module.""" + +import networkx as nx + + +def test_is_partition(): + G = nx.empty_graph(3) + assert nx.community.is_partition(G, [{0, 1}, {2}]) + assert nx.community.is_partition(G, ({0, 1}, {2})) + assert nx.community.is_partition(G, ([0, 1], [2])) + assert nx.community.is_partition(G, [[0, 1], [2]]) + + +def test_not_covering(): + G = nx.empty_graph(3) + assert not nx.community.is_partition(G, [{0}, {1}]) + + +def test_not_disjoint(): + G = nx.empty_graph(3) + assert not nx.community.is_partition(G, [{0, 1}, {1, 2}]) + + +def test_not_node(): + G = nx.empty_graph(3) + assert not nx.community.is_partition(G, [{0, 1}, {3}]) |