about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/networkx/algorithms/community/louvain.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/networkx/algorithms/community/louvain.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-4a52a71956a8d46fcb7294ac71734504bb09bcc2.tar.gz
two version of R2R are here HEAD master
Diffstat (limited to '.venv/lib/python3.12/site-packages/networkx/algorithms/community/louvain.py')
-rw-r--r--.venv/lib/python3.12/site-packages/networkx/algorithms/community/louvain.py382
1 files changed, 382 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/louvain.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/louvain.py
new file mode 100644
index 00000000..959c93a5
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/louvain.py
@@ -0,0 +1,382 @@
+"""Function for detecting communities based on Louvain Community Detection
+Algorithm"""
+
+import itertools
+from collections import defaultdict, deque
+
+import networkx as nx
+from networkx.algorithms.community import modularity
+from networkx.utils import py_random_state
+
+__all__ = ["louvain_communities", "louvain_partitions"]
+
+
+@py_random_state("seed")
+@nx._dispatchable(edge_attrs="weight")
+def louvain_communities(
+    G, weight="weight", resolution=1, threshold=0.0000001, max_level=None, seed=None
+):
+    r"""Find the best partition of a graph using the Louvain Community Detection
+    Algorithm.
+
+    Louvain Community Detection Algorithm is a simple method to extract the community
+    structure of a network. This is a heuristic method based on modularity optimization. [1]_
+
+    The algorithm works in 2 steps. On the first step it assigns every node to be
+    in its own community and then for each node it tries to find the maximum positive
+    modularity gain by moving each node to all of its neighbor communities. If no positive
+    gain is achieved the node remains in its original community.
+
+    The modularity gain obtained by moving an isolated node $i$ into a community $C$ can
+    easily be calculated by the following formula (combining [1]_ [2]_ and some algebra):
+
+    .. math::
+        \Delta Q = \frac{k_{i,in}}{2m} - \gamma\frac{ \Sigma_{tot} \cdot k_i}{2m^2}
+
+    where $m$ is the size of the graph, $k_{i,in}$ is the sum of the weights of the links
+    from $i$ to nodes in $C$, $k_i$ is the sum of the weights of the links incident to node $i$,
+    $\Sigma_{tot}$ is the sum of the weights of the links incident to nodes in $C$ and $\gamma$
+    is the resolution parameter.
+
+    For the directed case the modularity gain can be computed using this formula according to [3]_
+
+    .. math::
+        \Delta Q = \frac{k_{i,in}}{m}
+        - \gamma\frac{k_i^{out} \cdot\Sigma_{tot}^{in} + k_i^{in} \cdot \Sigma_{tot}^{out}}{m^2}
+
+    where $k_i^{out}$, $k_i^{in}$ are the outer and inner weighted degrees of node $i$ and
+    $\Sigma_{tot}^{in}$, $\Sigma_{tot}^{out}$ are the sum of in-going and out-going links incident
+    to nodes in $C$.
+
+    The first phase continues until no individual move can improve the modularity.
+
+    The second phase consists in building a new network whose nodes are now the communities
+    found in the first phase. To do so, the weights of the links between the new nodes are given by
+    the sum of the weight of the links between nodes in the corresponding two communities. Once this
+    phase is complete it is possible to reapply the first phase creating bigger communities with
+    increased modularity.
+
+    The above two phases are executed until no modularity gain is achieved (or is less than
+    the `threshold`, or until `max_levels` is reached).
+
+    Be careful with self-loops in the input graph. These are treated as
+    previously reduced communities -- as if the process had been started
+    in the middle of the algorithm. Large self-loop edge weights thus
+    represent strong communities and in practice may be hard to add
+    other nodes to.  If your input graph edge weights for self-loops
+    do not represent already reduced communities you may want to remove
+    the self-loops before inputting that graph.
+
+    Parameters
+    ----------
+    G : NetworkX graph
+    weight : string or None, optional (default="weight")
+        The name of an edge attribute that holds the numerical value
+        used as a weight. If None then each edge has weight 1.
+    resolution : float, optional (default=1)
+        If resolution is less than 1, the algorithm favors larger communities.
+        Greater than 1 favors smaller communities
+    threshold : float, optional (default=0.0000001)
+        Modularity gain threshold for each level. If the gain of modularity
+        between 2 levels of the algorithm is less than the given threshold
+        then the algorithm stops and returns the resulting communities.
+    max_level : int or None, optional (default=None)
+        The maximum number of levels (steps of the algorithm) to compute.
+        Must be a positive integer or None. If None, then there is no max
+        level and the threshold parameter determines the stopping condition.
+    seed : integer, random_state, or None (default)
+        Indicator of random number generation state.
+        See :ref:`Randomness<randomness>`.
+
+    Returns
+    -------
+    list
+        A list of sets (partition of `G`). Each set represents one community and contains
+        all the nodes that constitute it.
+
+    Examples
+    --------
+    >>> import networkx as nx
+    >>> G = nx.petersen_graph()
+    >>> nx.community.louvain_communities(G, seed=123)
+    [{0, 4, 5, 7, 9}, {1, 2, 3, 6, 8}]
+
+    Notes
+    -----
+    The order in which the nodes are considered can affect the final output. In the algorithm
+    the ordering happens using a random shuffle.
+
+    References
+    ----------
+    .. [1] Blondel, V.D. et al. Fast unfolding of communities in
+       large networks. J. Stat. Mech 10008, 1-12(2008). https://doi.org/10.1088/1742-5468/2008/10/P10008
+    .. [2] Traag, V.A., Waltman, L. & van Eck, N.J. From Louvain to Leiden: guaranteeing
+       well-connected communities. Sci Rep 9, 5233 (2019). https://doi.org/10.1038/s41598-019-41695-z
+    .. [3] Nicolas Dugué, Anthony Perez. Directed Louvain : maximizing modularity in directed networks.
+        [Research Report] Université d’Orléans. 2015. hal-01231784. https://hal.archives-ouvertes.fr/hal-01231784
+
+    See Also
+    --------
+    louvain_partitions
+    """
+
+    partitions = louvain_partitions(G, weight, resolution, threshold, seed)
+    if max_level is not None:
+        if max_level <= 0:
+            raise ValueError("max_level argument must be a positive integer or None")
+        partitions = itertools.islice(partitions, max_level)
+    final_partition = deque(partitions, maxlen=1)
+    return final_partition.pop()
+
+
+@py_random_state("seed")
+@nx._dispatchable(edge_attrs="weight")
+def louvain_partitions(
+    G, weight="weight", resolution=1, threshold=0.0000001, seed=None
+):
+    """Yields partitions for each level of the Louvain Community Detection Algorithm
+
+    Louvain Community Detection Algorithm is a simple method to extract the community
+    structure of a network. This is a heuristic method based on modularity optimization. [1]_
+
+    The partitions at each level (step of the algorithm) form a dendrogram of communities.
+    A dendrogram is a diagram representing a tree and each level represents
+    a partition of the G graph. The top level contains the smallest communities
+    and as you traverse to the bottom of the tree the communities get bigger
+    and the overall modularity increases making the partition better.
+
+    Each level is generated by executing the two phases of the Louvain Community
+    Detection Algorithm.
+
+    Be careful with self-loops in the input graph. These are treated as
+    previously reduced communities -- as if the process had been started
+    in the middle of the algorithm. Large self-loop edge weights thus
+    represent strong communities and in practice may be hard to add
+    other nodes to.  If your input graph edge weights for self-loops
+    do not represent already reduced communities you may want to remove
+    the self-loops before inputting that graph.
+
+    Parameters
+    ----------
+    G : NetworkX graph
+    weight : string or None, optional (default="weight")
+     The name of an edge attribute that holds the numerical value
+     used as a weight. If None then each edge has weight 1.
+    resolution : float, optional (default=1)
+        If resolution is less than 1, the algorithm favors larger communities.
+        Greater than 1 favors smaller communities
+    threshold : float, optional (default=0.0000001)
+     Modularity gain threshold for each level. If the gain of modularity
+     between 2 levels of the algorithm is less than the given threshold
+     then the algorithm stops and returns the resulting communities.
+    seed : integer, random_state, or None (default)
+     Indicator of random number generation state.
+     See :ref:`Randomness<randomness>`.
+
+    Yields
+    ------
+    list
+        A list of sets (partition of `G`). Each set represents one community and contains
+        all the nodes that constitute it.
+
+    References
+    ----------
+    .. [1] Blondel, V.D. et al. Fast unfolding of communities in
+       large networks. J. Stat. Mech 10008, 1-12(2008)
+
+    See Also
+    --------
+    louvain_communities
+    """
+
+    partition = [{u} for u in G.nodes()]
+    if nx.is_empty(G):
+        yield partition
+        return
+    mod = modularity(G, partition, resolution=resolution, weight=weight)
+    is_directed = G.is_directed()
+    if G.is_multigraph():
+        graph = _convert_multigraph(G, weight, is_directed)
+    else:
+        graph = G.__class__()
+        graph.add_nodes_from(G)
+        graph.add_weighted_edges_from(G.edges(data=weight, default=1))
+
+    m = graph.size(weight="weight")
+    partition, inner_partition, improvement = _one_level(
+        graph, m, partition, resolution, is_directed, seed
+    )
+    improvement = True
+    while improvement:
+        # gh-5901 protect the sets in the yielded list from further manipulation here
+        yield [s.copy() for s in partition]
+        new_mod = modularity(
+            graph, inner_partition, resolution=resolution, weight="weight"
+        )
+        if new_mod - mod <= threshold:
+            return
+        mod = new_mod
+        graph = _gen_graph(graph, inner_partition)
+        partition, inner_partition, improvement = _one_level(
+            graph, m, partition, resolution, is_directed, seed
+        )
+
+
+def _one_level(G, m, partition, resolution=1, is_directed=False, seed=None):
+    """Calculate one level of the Louvain partitions tree
+
+    Parameters
+    ----------
+    G : NetworkX Graph/DiGraph
+        The graph from which to detect communities
+    m : number
+        The size of the graph `G`.
+    partition : list of sets of nodes
+        A valid partition of the graph `G`
+    resolution : positive number
+        The resolution parameter for computing the modularity of a partition
+    is_directed : bool
+        True if `G` is a directed graph.
+    seed : integer, random_state, or None (default)
+        Indicator of random number generation state.
+        See :ref:`Randomness<randomness>`.
+
+    """
+    node2com = {u: i for i, u in enumerate(G.nodes())}
+    inner_partition = [{u} for u in G.nodes()]
+    if is_directed:
+        in_degrees = dict(G.in_degree(weight="weight"))
+        out_degrees = dict(G.out_degree(weight="weight"))
+        Stot_in = list(in_degrees.values())
+        Stot_out = list(out_degrees.values())
+        # Calculate weights for both in and out neighbors without considering self-loops
+        nbrs = {}
+        for u in G:
+            nbrs[u] = defaultdict(float)
+            for _, n, wt in G.out_edges(u, data="weight"):
+                if u != n:
+                    nbrs[u][n] += wt
+            for n, _, wt in G.in_edges(u, data="weight"):
+                if u != n:
+                    nbrs[u][n] += wt
+    else:
+        degrees = dict(G.degree(weight="weight"))
+        Stot = list(degrees.values())
+        nbrs = {u: {v: data["weight"] for v, data in G[u].items() if v != u} for u in G}
+    rand_nodes = list(G.nodes)
+    seed.shuffle(rand_nodes)
+    nb_moves = 1
+    improvement = False
+    while nb_moves > 0:
+        nb_moves = 0
+        for u in rand_nodes:
+            best_mod = 0
+            best_com = node2com[u]
+            weights2com = _neighbor_weights(nbrs[u], node2com)
+            if is_directed:
+                in_degree = in_degrees[u]
+                out_degree = out_degrees[u]
+                Stot_in[best_com] -= in_degree
+                Stot_out[best_com] -= out_degree
+                remove_cost = (
+                    -weights2com[best_com] / m
+                    + resolution
+                    * (out_degree * Stot_in[best_com] + in_degree * Stot_out[best_com])
+                    / m**2
+                )
+            else:
+                degree = degrees[u]
+                Stot[best_com] -= degree
+                remove_cost = -weights2com[best_com] / m + resolution * (
+                    Stot[best_com] * degree
+                ) / (2 * m**2)
+            for nbr_com, wt in weights2com.items():
+                if is_directed:
+                    gain = (
+                        remove_cost
+                        + wt / m
+                        - resolution
+                        * (
+                            out_degree * Stot_in[nbr_com]
+                            + in_degree * Stot_out[nbr_com]
+                        )
+                        / m**2
+                    )
+                else:
+                    gain = (
+                        remove_cost
+                        + wt / m
+                        - resolution * (Stot[nbr_com] * degree) / (2 * m**2)
+                    )
+                if gain > best_mod:
+                    best_mod = gain
+                    best_com = nbr_com
+            if is_directed:
+                Stot_in[best_com] += in_degree
+                Stot_out[best_com] += out_degree
+            else:
+                Stot[best_com] += degree
+            if best_com != node2com[u]:
+                com = G.nodes[u].get("nodes", {u})
+                partition[node2com[u]].difference_update(com)
+                inner_partition[node2com[u]].remove(u)
+                partition[best_com].update(com)
+                inner_partition[best_com].add(u)
+                improvement = True
+                nb_moves += 1
+                node2com[u] = best_com
+    partition = list(filter(len, partition))
+    inner_partition = list(filter(len, inner_partition))
+    return partition, inner_partition, improvement
+
+
+def _neighbor_weights(nbrs, node2com):
+    """Calculate weights between node and its neighbor communities.
+
+    Parameters
+    ----------
+    nbrs : dictionary
+           Dictionary with nodes' neighbors as keys and their edge weight as value.
+    node2com : dictionary
+           Dictionary with all graph's nodes as keys and their community index as value.
+
+    """
+    weights = defaultdict(float)
+    for nbr, wt in nbrs.items():
+        weights[node2com[nbr]] += wt
+    return weights
+
+
+def _gen_graph(G, partition):
+    """Generate a new graph based on the partitions of a given graph"""
+    H = G.__class__()
+    node2com = {}
+    for i, part in enumerate(partition):
+        nodes = set()
+        for node in part:
+            node2com[node] = i
+            nodes.update(G.nodes[node].get("nodes", {node}))
+        H.add_node(i, nodes=nodes)
+
+    for node1, node2, wt in G.edges(data=True):
+        wt = wt["weight"]
+        com1 = node2com[node1]
+        com2 = node2com[node2]
+        temp = H.get_edge_data(com1, com2, {"weight": 0})["weight"]
+        H.add_edge(com1, com2, weight=wt + temp)
+    return H
+
+
+def _convert_multigraph(G, weight, is_directed):
+    """Convert a Multigraph to normal Graph"""
+    if is_directed:
+        H = nx.DiGraph()
+    else:
+        H = nx.Graph()
+    H.add_nodes_from(G)
+    for u, v, wt in G.edges(data=weight, default=1):
+        if H.has_edge(u, v):
+            H[u][v]["weight"] += wt
+        else:
+            H.add_edge(u, v, weight=wt)
+    return H