aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/networkx/algorithms/community/quality.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/networkx/algorithms/community/quality.py')
-rw-r--r--.venv/lib/python3.12/site-packages/networkx/algorithms/community/quality.py346
1 files changed, 346 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/quality.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/quality.py
new file mode 100644
index 00000000..f09a6d45
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/quality.py
@@ -0,0 +1,346 @@
+"""Functions for measuring the quality of a partition (into
+communities).
+
+"""
+
+from itertools import combinations
+
+import networkx as nx
+from networkx import NetworkXError
+from networkx.algorithms.community.community_utils import is_partition
+from networkx.utils.decorators import argmap
+
+__all__ = ["modularity", "partition_quality"]
+
+
+class NotAPartition(NetworkXError):
+ """Raised if a given collection is not a partition."""
+
+ def __init__(self, G, collection):
+ msg = f"{collection} is not a valid partition of the graph {G}"
+ super().__init__(msg)
+
+
+def _require_partition(G, partition):
+ """Decorator to check that a valid partition is input to a function
+
+ Raises :exc:`networkx.NetworkXError` if the partition is not valid.
+
+ This decorator should be used on functions whose first two arguments
+ are a graph and a partition of the nodes of that graph (in that
+ order)::
+
+ >>> @require_partition
+ ... def foo(G, partition):
+ ... print("partition is valid!")
+ ...
+ >>> G = nx.complete_graph(5)
+ >>> partition = [{0, 1}, {2, 3}, {4}]
+ >>> foo(G, partition)
+ partition is valid!
+ >>> partition = [{0}, {2, 3}, {4}]
+ >>> foo(G, partition)
+ Traceback (most recent call last):
+ ...
+ networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G
+ >>> partition = [{0, 1}, {1, 2, 3}, {4}]
+ >>> foo(G, partition)
+ Traceback (most recent call last):
+ ...
+ networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G
+
+ """
+ if is_partition(G, partition):
+ return G, partition
+ raise nx.NetworkXError("`partition` is not a valid partition of the nodes of G")
+
+
+require_partition = argmap(_require_partition, (0, 1))
+
+
+@nx._dispatchable
+def intra_community_edges(G, partition):
+ """Returns the number of intra-community edges for a partition of `G`.
+
+ Parameters
+ ----------
+ G : NetworkX graph.
+
+ partition : iterable of sets of nodes
+ This must be a partition of the nodes of `G`.
+
+ The "intra-community edges" are those edges joining a pair of nodes
+ in the same block of the partition.
+
+ """
+ return sum(G.subgraph(block).size() for block in partition)
+
+
+@nx._dispatchable
+def inter_community_edges(G, partition):
+ """Returns the number of inter-community edges for a partition of `G`.
+ according to the given
+ partition of the nodes of `G`.
+
+ Parameters
+ ----------
+ G : NetworkX graph.
+
+ partition : iterable of sets of nodes
+ This must be a partition of the nodes of `G`.
+
+ The *inter-community edges* are those edges joining a pair of nodes
+ in different blocks of the partition.
+
+ Implementation note: this function creates an intermediate graph
+ that may require the same amount of memory as that of `G`.
+
+ """
+ # Alternate implementation that does not require constructing a new
+ # graph object (but does require constructing an affiliation
+ # dictionary):
+ #
+ # aff = dict(chain.from_iterable(((v, block) for v in block)
+ # for block in partition))
+ # return sum(1 for u, v in G.edges() if aff[u] != aff[v])
+ #
+ MG = nx.MultiDiGraph if G.is_directed() else nx.MultiGraph
+ return nx.quotient_graph(G, partition, create_using=MG).size()
+
+
+@nx._dispatchable
+def inter_community_non_edges(G, partition):
+ """Returns the number of inter-community non-edges according to the
+ given partition of the nodes of `G`.
+
+ Parameters
+ ----------
+ G : NetworkX graph.
+
+ partition : iterable of sets of nodes
+ This must be a partition of the nodes of `G`.
+
+ A *non-edge* is a pair of nodes (undirected if `G` is undirected)
+ that are not adjacent in `G`. The *inter-community non-edges* are
+ those non-edges on a pair of nodes in different blocks of the
+ partition.
+
+ Implementation note: this function creates two intermediate graphs,
+ which may require up to twice the amount of memory as required to
+ store `G`.
+
+ """
+ # Alternate implementation that does not require constructing two
+ # new graph objects (but does require constructing an affiliation
+ # dictionary):
+ #
+ # aff = dict(chain.from_iterable(((v, block) for v in block)
+ # for block in partition))
+ # return sum(1 for u, v in nx.non_edges(G) if aff[u] != aff[v])
+ #
+ return inter_community_edges(nx.complement(G), partition)
+
+
+@nx._dispatchable(edge_attrs="weight")
+def modularity(G, communities, weight="weight", resolution=1):
+ r"""Returns the modularity of the given partition of the graph.
+
+ Modularity is defined in [1]_ as
+
+ .. math::
+ Q = \frac{1}{2m} \sum_{ij} \left( A_{ij} - \gamma\frac{k_ik_j}{2m}\right)
+ \delta(c_i,c_j)
+
+ where $m$ is the number of edges (or sum of all edge weights as in [5]_),
+ $A$ is the adjacency matrix of `G`, $k_i$ is the (weighted) degree of $i$,
+ $\gamma$ is the resolution parameter, and $\delta(c_i, c_j)$ is 1 if $i$ and
+ $j$ are in the same community else 0.
+
+ According to [2]_ (and verified by some algebra) this can be reduced to
+
+ .. math::
+ Q = \sum_{c=1}^{n}
+ \left[ \frac{L_c}{m} - \gamma\left( \frac{k_c}{2m} \right) ^2 \right]
+
+ where the sum iterates over all communities $c$, $m$ is the number of edges,
+ $L_c$ is the number of intra-community links for community $c$,
+ $k_c$ is the sum of degrees of the nodes in community $c$,
+ and $\gamma$ is the resolution parameter.
+
+ The resolution parameter sets an arbitrary tradeoff between intra-group
+ edges and inter-group edges. More complex grouping patterns can be
+ discovered by analyzing the same network with multiple values of gamma
+ and then combining the results [3]_. That said, it is very common to
+ simply use gamma=1. More on the choice of gamma is in [4]_.
+
+ The second formula is the one actually used in calculation of the modularity.
+ For directed graphs the second formula replaces $k_c$ with $k^{in}_c k^{out}_c$.
+
+ Parameters
+ ----------
+ G : NetworkX Graph
+
+ communities : list or iterable of set of nodes
+ These node sets must represent a partition of G's nodes.
+
+ weight : string or None, optional (default="weight")
+ The edge attribute that holds the numerical value used
+ as a weight. If None or an edge does not have that attribute,
+ then that edge has weight 1.
+
+ resolution : float (default=1)
+ If resolution is less than 1, modularity favors larger communities.
+ Greater than 1 favors smaller communities.
+
+ Returns
+ -------
+ Q : float
+ The modularity of the partition.
+
+ Raises
+ ------
+ NotAPartition
+ If `communities` is not a partition of the nodes of `G`.
+
+ Examples
+ --------
+ >>> G = nx.barbell_graph(3, 0)
+ >>> nx.community.modularity(G, [{0, 1, 2}, {3, 4, 5}])
+ 0.35714285714285715
+ >>> nx.community.modularity(G, nx.community.label_propagation_communities(G))
+ 0.35714285714285715
+
+ References
+ ----------
+ .. [1] M. E. J. Newman "Networks: An Introduction", page 224.
+ Oxford University Press, 2011.
+ .. [2] Clauset, Aaron, Mark EJ Newman, and Cristopher Moore.
+ "Finding community structure in very large networks."
+ Phys. Rev. E 70.6 (2004). <https://arxiv.org/abs/cond-mat/0408187>
+ .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community Detection"
+ Phys. Rev. E 74, 016110, 2006. https://doi.org/10.1103/PhysRevE.74.016110
+ .. [4] M. E. J. Newman, "Equivalence between modularity optimization and
+ maximum likelihood methods for community detection"
+ Phys. Rev. E 94, 052315, 2016. https://doi.org/10.1103/PhysRevE.94.052315
+ .. [5] Blondel, V.D. et al. "Fast unfolding of communities in large
+ networks" J. Stat. Mech 10008, 1-12 (2008).
+ https://doi.org/10.1088/1742-5468/2008/10/P10008
+ """
+ if not isinstance(communities, list):
+ communities = list(communities)
+ if not is_partition(G, communities):
+ raise NotAPartition(G, communities)
+
+ directed = G.is_directed()
+ if directed:
+ out_degree = dict(G.out_degree(weight=weight))
+ in_degree = dict(G.in_degree(weight=weight))
+ m = sum(out_degree.values())
+ norm = 1 / m**2
+ else:
+ out_degree = in_degree = dict(G.degree(weight=weight))
+ deg_sum = sum(out_degree.values())
+ m = deg_sum / 2
+ norm = 1 / deg_sum**2
+
+ def community_contribution(community):
+ comm = set(community)
+ L_c = sum(wt for u, v, wt in G.edges(comm, data=weight, default=1) if v in comm)
+
+ out_degree_sum = sum(out_degree[u] for u in comm)
+ in_degree_sum = sum(in_degree[u] for u in comm) if directed else out_degree_sum
+
+ return L_c / m - resolution * out_degree_sum * in_degree_sum * norm
+
+ return sum(map(community_contribution, communities))
+
+
+@require_partition
+@nx._dispatchable
+def partition_quality(G, partition):
+ """Returns the coverage and performance of a partition of G.
+
+ The *coverage* of a partition is the ratio of the number of
+ intra-community edges to the total number of edges in the graph.
+
+ The *performance* of a partition is the number of
+ intra-community edges plus inter-community non-edges divided by the total
+ number of potential edges.
+
+ This algorithm has complexity $O(C^2 + L)$ where C is the number of communities and L is the number of links.
+
+ Parameters
+ ----------
+ G : NetworkX graph
+
+ partition : sequence
+ Partition of the nodes of `G`, represented as a sequence of
+ sets of nodes (blocks). Each block of the partition represents a
+ community.
+
+ Returns
+ -------
+ (float, float)
+ The (coverage, performance) tuple of the partition, as defined above.
+
+ Raises
+ ------
+ NetworkXError
+ If `partition` is not a valid partition of the nodes of `G`.
+
+ Notes
+ -----
+ If `G` is a multigraph;
+ - for coverage, the multiplicity of edges is counted
+ - for performance, the result is -1 (total number of possible edges is not defined)
+
+ References
+ ----------
+ .. [1] Santo Fortunato.
+ "Community Detection in Graphs".
+ *Physical Reports*, Volume 486, Issue 3--5 pp. 75--174
+ <https://arxiv.org/abs/0906.0612>
+ """
+
+ node_community = {}
+ for i, community in enumerate(partition):
+ for node in community:
+ node_community[node] = i
+
+ # `performance` is not defined for multigraphs
+ if not G.is_multigraph():
+ # Iterate over the communities, quadratic, to calculate `possible_inter_community_edges`
+ possible_inter_community_edges = sum(
+ len(p1) * len(p2) for p1, p2 in combinations(partition, 2)
+ )
+
+ if G.is_directed():
+ possible_inter_community_edges *= 2
+ else:
+ possible_inter_community_edges = 0
+
+ # Compute the number of edges in the complete graph -- `n` nodes,
+ # directed or undirected, depending on `G`
+ n = len(G)
+ total_pairs = n * (n - 1)
+ if not G.is_directed():
+ total_pairs //= 2
+
+ intra_community_edges = 0
+ inter_community_non_edges = possible_inter_community_edges
+
+ # Iterate over the links to count `intra_community_edges` and `inter_community_non_edges`
+ for e in G.edges():
+ if node_community[e[0]] == node_community[e[1]]:
+ intra_community_edges += 1
+ else:
+ inter_community_non_edges -= 1
+
+ coverage = intra_community_edges / len(G.edges)
+
+ if G.is_multigraph():
+ performance = -1.0
+ else:
+ performance = (intra_community_edges + inter_community_non_edges) / total_pairs
+
+ return coverage, performance