diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/networkx/algorithms/community/louvain.py | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-master.tar.gz |
Diffstat (limited to '.venv/lib/python3.12/site-packages/networkx/algorithms/community/louvain.py')
-rw-r--r-- | .venv/lib/python3.12/site-packages/networkx/algorithms/community/louvain.py | 382 |
1 files changed, 382 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/louvain.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/louvain.py new file mode 100644 index 00000000..959c93a5 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/louvain.py @@ -0,0 +1,382 @@ +"""Function for detecting communities based on Louvain Community Detection +Algorithm""" + +import itertools +from collections import defaultdict, deque + +import networkx as nx +from networkx.algorithms.community import modularity +from networkx.utils import py_random_state + +__all__ = ["louvain_communities", "louvain_partitions"] + + +@py_random_state("seed") +@nx._dispatchable(edge_attrs="weight") +def louvain_communities( + G, weight="weight", resolution=1, threshold=0.0000001, max_level=None, seed=None +): + r"""Find the best partition of a graph using the Louvain Community Detection + Algorithm. + + Louvain Community Detection Algorithm is a simple method to extract the community + structure of a network. This is a heuristic method based on modularity optimization. [1]_ + + The algorithm works in 2 steps. On the first step it assigns every node to be + in its own community and then for each node it tries to find the maximum positive + modularity gain by moving each node to all of its neighbor communities. If no positive + gain is achieved the node remains in its original community. + + The modularity gain obtained by moving an isolated node $i$ into a community $C$ can + easily be calculated by the following formula (combining [1]_ [2]_ and some algebra): + + .. math:: + \Delta Q = \frac{k_{i,in}}{2m} - \gamma\frac{ \Sigma_{tot} \cdot k_i}{2m^2} + + where $m$ is the size of the graph, $k_{i,in}$ is the sum of the weights of the links + from $i$ to nodes in $C$, $k_i$ is the sum of the weights of the links incident to node $i$, + $\Sigma_{tot}$ is the sum of the weights of the links incident to nodes in $C$ and $\gamma$ + is the resolution parameter. + + For the directed case the modularity gain can be computed using this formula according to [3]_ + + .. math:: + \Delta Q = \frac{k_{i,in}}{m} + - \gamma\frac{k_i^{out} \cdot\Sigma_{tot}^{in} + k_i^{in} \cdot \Sigma_{tot}^{out}}{m^2} + + where $k_i^{out}$, $k_i^{in}$ are the outer and inner weighted degrees of node $i$ and + $\Sigma_{tot}^{in}$, $\Sigma_{tot}^{out}$ are the sum of in-going and out-going links incident + to nodes in $C$. + + The first phase continues until no individual move can improve the modularity. + + The second phase consists in building a new network whose nodes are now the communities + found in the first phase. To do so, the weights of the links between the new nodes are given by + the sum of the weight of the links between nodes in the corresponding two communities. Once this + phase is complete it is possible to reapply the first phase creating bigger communities with + increased modularity. + + The above two phases are executed until no modularity gain is achieved (or is less than + the `threshold`, or until `max_levels` is reached). + + Be careful with self-loops in the input graph. These are treated as + previously reduced communities -- as if the process had been started + in the middle of the algorithm. Large self-loop edge weights thus + represent strong communities and in practice may be hard to add + other nodes to. If your input graph edge weights for self-loops + do not represent already reduced communities you may want to remove + the self-loops before inputting that graph. + + Parameters + ---------- + G : NetworkX graph + weight : string or None, optional (default="weight") + The name of an edge attribute that holds the numerical value + used as a weight. If None then each edge has weight 1. + resolution : float, optional (default=1) + If resolution is less than 1, the algorithm favors larger communities. + Greater than 1 favors smaller communities + threshold : float, optional (default=0.0000001) + Modularity gain threshold for each level. If the gain of modularity + between 2 levels of the algorithm is less than the given threshold + then the algorithm stops and returns the resulting communities. + max_level : int or None, optional (default=None) + The maximum number of levels (steps of the algorithm) to compute. + Must be a positive integer or None. If None, then there is no max + level and the threshold parameter determines the stopping condition. + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness<randomness>`. + + Returns + ------- + list + A list of sets (partition of `G`). Each set represents one community and contains + all the nodes that constitute it. + + Examples + -------- + >>> import networkx as nx + >>> G = nx.petersen_graph() + >>> nx.community.louvain_communities(G, seed=123) + [{0, 4, 5, 7, 9}, {1, 2, 3, 6, 8}] + + Notes + ----- + The order in which the nodes are considered can affect the final output. In the algorithm + the ordering happens using a random shuffle. + + References + ---------- + .. [1] Blondel, V.D. et al. Fast unfolding of communities in + large networks. J. Stat. Mech 10008, 1-12(2008). https://doi.org/10.1088/1742-5468/2008/10/P10008 + .. [2] Traag, V.A., Waltman, L. & van Eck, N.J. From Louvain to Leiden: guaranteeing + well-connected communities. Sci Rep 9, 5233 (2019). https://doi.org/10.1038/s41598-019-41695-z + .. [3] Nicolas Dugué, Anthony Perez. Directed Louvain : maximizing modularity in directed networks. + [Research Report] Université d’Orléans. 2015. hal-01231784. https://hal.archives-ouvertes.fr/hal-01231784 + + See Also + -------- + louvain_partitions + """ + + partitions = louvain_partitions(G, weight, resolution, threshold, seed) + if max_level is not None: + if max_level <= 0: + raise ValueError("max_level argument must be a positive integer or None") + partitions = itertools.islice(partitions, max_level) + final_partition = deque(partitions, maxlen=1) + return final_partition.pop() + + +@py_random_state("seed") +@nx._dispatchable(edge_attrs="weight") +def louvain_partitions( + G, weight="weight", resolution=1, threshold=0.0000001, seed=None +): + """Yields partitions for each level of the Louvain Community Detection Algorithm + + Louvain Community Detection Algorithm is a simple method to extract the community + structure of a network. This is a heuristic method based on modularity optimization. [1]_ + + The partitions at each level (step of the algorithm) form a dendrogram of communities. + A dendrogram is a diagram representing a tree and each level represents + a partition of the G graph. The top level contains the smallest communities + and as you traverse to the bottom of the tree the communities get bigger + and the overall modularity increases making the partition better. + + Each level is generated by executing the two phases of the Louvain Community + Detection Algorithm. + + Be careful with self-loops in the input graph. These are treated as + previously reduced communities -- as if the process had been started + in the middle of the algorithm. Large self-loop edge weights thus + represent strong communities and in practice may be hard to add + other nodes to. If your input graph edge weights for self-loops + do not represent already reduced communities you may want to remove + the self-loops before inputting that graph. + + Parameters + ---------- + G : NetworkX graph + weight : string or None, optional (default="weight") + The name of an edge attribute that holds the numerical value + used as a weight. If None then each edge has weight 1. + resolution : float, optional (default=1) + If resolution is less than 1, the algorithm favors larger communities. + Greater than 1 favors smaller communities + threshold : float, optional (default=0.0000001) + Modularity gain threshold for each level. If the gain of modularity + between 2 levels of the algorithm is less than the given threshold + then the algorithm stops and returns the resulting communities. + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness<randomness>`. + + Yields + ------ + list + A list of sets (partition of `G`). Each set represents one community and contains + all the nodes that constitute it. + + References + ---------- + .. [1] Blondel, V.D. et al. Fast unfolding of communities in + large networks. J. Stat. Mech 10008, 1-12(2008) + + See Also + -------- + louvain_communities + """ + + partition = [{u} for u in G.nodes()] + if nx.is_empty(G): + yield partition + return + mod = modularity(G, partition, resolution=resolution, weight=weight) + is_directed = G.is_directed() + if G.is_multigraph(): + graph = _convert_multigraph(G, weight, is_directed) + else: + graph = G.__class__() + graph.add_nodes_from(G) + graph.add_weighted_edges_from(G.edges(data=weight, default=1)) + + m = graph.size(weight="weight") + partition, inner_partition, improvement = _one_level( + graph, m, partition, resolution, is_directed, seed + ) + improvement = True + while improvement: + # gh-5901 protect the sets in the yielded list from further manipulation here + yield [s.copy() for s in partition] + new_mod = modularity( + graph, inner_partition, resolution=resolution, weight="weight" + ) + if new_mod - mod <= threshold: + return + mod = new_mod + graph = _gen_graph(graph, inner_partition) + partition, inner_partition, improvement = _one_level( + graph, m, partition, resolution, is_directed, seed + ) + + +def _one_level(G, m, partition, resolution=1, is_directed=False, seed=None): + """Calculate one level of the Louvain partitions tree + + Parameters + ---------- + G : NetworkX Graph/DiGraph + The graph from which to detect communities + m : number + The size of the graph `G`. + partition : list of sets of nodes + A valid partition of the graph `G` + resolution : positive number + The resolution parameter for computing the modularity of a partition + is_directed : bool + True if `G` is a directed graph. + seed : integer, random_state, or None (default) + Indicator of random number generation state. + See :ref:`Randomness<randomness>`. + + """ + node2com = {u: i for i, u in enumerate(G.nodes())} + inner_partition = [{u} for u in G.nodes()] + if is_directed: + in_degrees = dict(G.in_degree(weight="weight")) + out_degrees = dict(G.out_degree(weight="weight")) + Stot_in = list(in_degrees.values()) + Stot_out = list(out_degrees.values()) + # Calculate weights for both in and out neighbors without considering self-loops + nbrs = {} + for u in G: + nbrs[u] = defaultdict(float) + for _, n, wt in G.out_edges(u, data="weight"): + if u != n: + nbrs[u][n] += wt + for n, _, wt in G.in_edges(u, data="weight"): + if u != n: + nbrs[u][n] += wt + else: + degrees = dict(G.degree(weight="weight")) + Stot = list(degrees.values()) + nbrs = {u: {v: data["weight"] for v, data in G[u].items() if v != u} for u in G} + rand_nodes = list(G.nodes) + seed.shuffle(rand_nodes) + nb_moves = 1 + improvement = False + while nb_moves > 0: + nb_moves = 0 + for u in rand_nodes: + best_mod = 0 + best_com = node2com[u] + weights2com = _neighbor_weights(nbrs[u], node2com) + if is_directed: + in_degree = in_degrees[u] + out_degree = out_degrees[u] + Stot_in[best_com] -= in_degree + Stot_out[best_com] -= out_degree + remove_cost = ( + -weights2com[best_com] / m + + resolution + * (out_degree * Stot_in[best_com] + in_degree * Stot_out[best_com]) + / m**2 + ) + else: + degree = degrees[u] + Stot[best_com] -= degree + remove_cost = -weights2com[best_com] / m + resolution * ( + Stot[best_com] * degree + ) / (2 * m**2) + for nbr_com, wt in weights2com.items(): + if is_directed: + gain = ( + remove_cost + + wt / m + - resolution + * ( + out_degree * Stot_in[nbr_com] + + in_degree * Stot_out[nbr_com] + ) + / m**2 + ) + else: + gain = ( + remove_cost + + wt / m + - resolution * (Stot[nbr_com] * degree) / (2 * m**2) + ) + if gain > best_mod: + best_mod = gain + best_com = nbr_com + if is_directed: + Stot_in[best_com] += in_degree + Stot_out[best_com] += out_degree + else: + Stot[best_com] += degree + if best_com != node2com[u]: + com = G.nodes[u].get("nodes", {u}) + partition[node2com[u]].difference_update(com) + inner_partition[node2com[u]].remove(u) + partition[best_com].update(com) + inner_partition[best_com].add(u) + improvement = True + nb_moves += 1 + node2com[u] = best_com + partition = list(filter(len, partition)) + inner_partition = list(filter(len, inner_partition)) + return partition, inner_partition, improvement + + +def _neighbor_weights(nbrs, node2com): + """Calculate weights between node and its neighbor communities. + + Parameters + ---------- + nbrs : dictionary + Dictionary with nodes' neighbors as keys and their edge weight as value. + node2com : dictionary + Dictionary with all graph's nodes as keys and their community index as value. + + """ + weights = defaultdict(float) + for nbr, wt in nbrs.items(): + weights[node2com[nbr]] += wt + return weights + + +def _gen_graph(G, partition): + """Generate a new graph based on the partitions of a given graph""" + H = G.__class__() + node2com = {} + for i, part in enumerate(partition): + nodes = set() + for node in part: + node2com[node] = i + nodes.update(G.nodes[node].get("nodes", {node})) + H.add_node(i, nodes=nodes) + + for node1, node2, wt in G.edges(data=True): + wt = wt["weight"] + com1 = node2com[node1] + com2 = node2com[node2] + temp = H.get_edge_data(com1, com2, {"weight": 0})["weight"] + H.add_edge(com1, com2, weight=wt + temp) + return H + + +def _convert_multigraph(G, weight, is_directed): + """Convert a Multigraph to normal Graph""" + if is_directed: + H = nx.DiGraph() + else: + H = nx.Graph() + H.add_nodes_from(G) + for u, v, wt in G.edges(data=weight, default=1): + if H.has_edge(u, v): + H[u][v]["weight"] += wt + else: + H.add_edge(u, v, weight=wt) + return H |