diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/networkx/algorithms/community/quality.py | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-master.tar.gz |
Diffstat (limited to '.venv/lib/python3.12/site-packages/networkx/algorithms/community/quality.py')
-rw-r--r-- | .venv/lib/python3.12/site-packages/networkx/algorithms/community/quality.py | 346 |
1 files changed, 346 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/community/quality.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/quality.py new file mode 100644 index 00000000..f09a6d45 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/community/quality.py @@ -0,0 +1,346 @@ +"""Functions for measuring the quality of a partition (into +communities). + +""" + +from itertools import combinations + +import networkx as nx +from networkx import NetworkXError +from networkx.algorithms.community.community_utils import is_partition +from networkx.utils.decorators import argmap + +__all__ = ["modularity", "partition_quality"] + + +class NotAPartition(NetworkXError): + """Raised if a given collection is not a partition.""" + + def __init__(self, G, collection): + msg = f"{collection} is not a valid partition of the graph {G}" + super().__init__(msg) + + +def _require_partition(G, partition): + """Decorator to check that a valid partition is input to a function + + Raises :exc:`networkx.NetworkXError` if the partition is not valid. + + This decorator should be used on functions whose first two arguments + are a graph and a partition of the nodes of that graph (in that + order):: + + >>> @require_partition + ... def foo(G, partition): + ... print("partition is valid!") + ... + >>> G = nx.complete_graph(5) + >>> partition = [{0, 1}, {2, 3}, {4}] + >>> foo(G, partition) + partition is valid! + >>> partition = [{0}, {2, 3}, {4}] + >>> foo(G, partition) + Traceback (most recent call last): + ... + networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G + >>> partition = [{0, 1}, {1, 2, 3}, {4}] + >>> foo(G, partition) + Traceback (most recent call last): + ... + networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G + + """ + if is_partition(G, partition): + return G, partition + raise nx.NetworkXError("`partition` is not a valid partition of the nodes of G") + + +require_partition = argmap(_require_partition, (0, 1)) + + +@nx._dispatchable +def intra_community_edges(G, partition): + """Returns the number of intra-community edges for a partition of `G`. + + Parameters + ---------- + G : NetworkX graph. + + partition : iterable of sets of nodes + This must be a partition of the nodes of `G`. + + The "intra-community edges" are those edges joining a pair of nodes + in the same block of the partition. + + """ + return sum(G.subgraph(block).size() for block in partition) + + +@nx._dispatchable +def inter_community_edges(G, partition): + """Returns the number of inter-community edges for a partition of `G`. + according to the given + partition of the nodes of `G`. + + Parameters + ---------- + G : NetworkX graph. + + partition : iterable of sets of nodes + This must be a partition of the nodes of `G`. + + The *inter-community edges* are those edges joining a pair of nodes + in different blocks of the partition. + + Implementation note: this function creates an intermediate graph + that may require the same amount of memory as that of `G`. + + """ + # Alternate implementation that does not require constructing a new + # graph object (but does require constructing an affiliation + # dictionary): + # + # aff = dict(chain.from_iterable(((v, block) for v in block) + # for block in partition)) + # return sum(1 for u, v in G.edges() if aff[u] != aff[v]) + # + MG = nx.MultiDiGraph if G.is_directed() else nx.MultiGraph + return nx.quotient_graph(G, partition, create_using=MG).size() + + +@nx._dispatchable +def inter_community_non_edges(G, partition): + """Returns the number of inter-community non-edges according to the + given partition of the nodes of `G`. + + Parameters + ---------- + G : NetworkX graph. + + partition : iterable of sets of nodes + This must be a partition of the nodes of `G`. + + A *non-edge* is a pair of nodes (undirected if `G` is undirected) + that are not adjacent in `G`. The *inter-community non-edges* are + those non-edges on a pair of nodes in different blocks of the + partition. + + Implementation note: this function creates two intermediate graphs, + which may require up to twice the amount of memory as required to + store `G`. + + """ + # Alternate implementation that does not require constructing two + # new graph objects (but does require constructing an affiliation + # dictionary): + # + # aff = dict(chain.from_iterable(((v, block) for v in block) + # for block in partition)) + # return sum(1 for u, v in nx.non_edges(G) if aff[u] != aff[v]) + # + return inter_community_edges(nx.complement(G), partition) + + +@nx._dispatchable(edge_attrs="weight") +def modularity(G, communities, weight="weight", resolution=1): + r"""Returns the modularity of the given partition of the graph. + + Modularity is defined in [1]_ as + + .. math:: + Q = \frac{1}{2m} \sum_{ij} \left( A_{ij} - \gamma\frac{k_ik_j}{2m}\right) + \delta(c_i,c_j) + + where $m$ is the number of edges (or sum of all edge weights as in [5]_), + $A$ is the adjacency matrix of `G`, $k_i$ is the (weighted) degree of $i$, + $\gamma$ is the resolution parameter, and $\delta(c_i, c_j)$ is 1 if $i$ and + $j$ are in the same community else 0. + + According to [2]_ (and verified by some algebra) this can be reduced to + + .. math:: + Q = \sum_{c=1}^{n} + \left[ \frac{L_c}{m} - \gamma\left( \frac{k_c}{2m} \right) ^2 \right] + + where the sum iterates over all communities $c$, $m$ is the number of edges, + $L_c$ is the number of intra-community links for community $c$, + $k_c$ is the sum of degrees of the nodes in community $c$, + and $\gamma$ is the resolution parameter. + + The resolution parameter sets an arbitrary tradeoff between intra-group + edges and inter-group edges. More complex grouping patterns can be + discovered by analyzing the same network with multiple values of gamma + and then combining the results [3]_. That said, it is very common to + simply use gamma=1. More on the choice of gamma is in [4]_. + + The second formula is the one actually used in calculation of the modularity. + For directed graphs the second formula replaces $k_c$ with $k^{in}_c k^{out}_c$. + + Parameters + ---------- + G : NetworkX Graph + + communities : list or iterable of set of nodes + These node sets must represent a partition of G's nodes. + + weight : string or None, optional (default="weight") + The edge attribute that holds the numerical value used + as a weight. If None or an edge does not have that attribute, + then that edge has weight 1. + + resolution : float (default=1) + If resolution is less than 1, modularity favors larger communities. + Greater than 1 favors smaller communities. + + Returns + ------- + Q : float + The modularity of the partition. + + Raises + ------ + NotAPartition + If `communities` is not a partition of the nodes of `G`. + + Examples + -------- + >>> G = nx.barbell_graph(3, 0) + >>> nx.community.modularity(G, [{0, 1, 2}, {3, 4, 5}]) + 0.35714285714285715 + >>> nx.community.modularity(G, nx.community.label_propagation_communities(G)) + 0.35714285714285715 + + References + ---------- + .. [1] M. E. J. Newman "Networks: An Introduction", page 224. + Oxford University Press, 2011. + .. [2] Clauset, Aaron, Mark EJ Newman, and Cristopher Moore. + "Finding community structure in very large networks." + Phys. Rev. E 70.6 (2004). <https://arxiv.org/abs/cond-mat/0408187> + .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community Detection" + Phys. Rev. E 74, 016110, 2006. https://doi.org/10.1103/PhysRevE.74.016110 + .. [4] M. E. J. Newman, "Equivalence between modularity optimization and + maximum likelihood methods for community detection" + Phys. Rev. E 94, 052315, 2016. https://doi.org/10.1103/PhysRevE.94.052315 + .. [5] Blondel, V.D. et al. "Fast unfolding of communities in large + networks" J. Stat. Mech 10008, 1-12 (2008). + https://doi.org/10.1088/1742-5468/2008/10/P10008 + """ + if not isinstance(communities, list): + communities = list(communities) + if not is_partition(G, communities): + raise NotAPartition(G, communities) + + directed = G.is_directed() + if directed: + out_degree = dict(G.out_degree(weight=weight)) + in_degree = dict(G.in_degree(weight=weight)) + m = sum(out_degree.values()) + norm = 1 / m**2 + else: + out_degree = in_degree = dict(G.degree(weight=weight)) + deg_sum = sum(out_degree.values()) + m = deg_sum / 2 + norm = 1 / deg_sum**2 + + def community_contribution(community): + comm = set(community) + L_c = sum(wt for u, v, wt in G.edges(comm, data=weight, default=1) if v in comm) + + out_degree_sum = sum(out_degree[u] for u in comm) + in_degree_sum = sum(in_degree[u] for u in comm) if directed else out_degree_sum + + return L_c / m - resolution * out_degree_sum * in_degree_sum * norm + + return sum(map(community_contribution, communities)) + + +@require_partition +@nx._dispatchable +def partition_quality(G, partition): + """Returns the coverage and performance of a partition of G. + + The *coverage* of a partition is the ratio of the number of + intra-community edges to the total number of edges in the graph. + + The *performance* of a partition is the number of + intra-community edges plus inter-community non-edges divided by the total + number of potential edges. + + This algorithm has complexity $O(C^2 + L)$ where C is the number of communities and L is the number of links. + + Parameters + ---------- + G : NetworkX graph + + partition : sequence + Partition of the nodes of `G`, represented as a sequence of + sets of nodes (blocks). Each block of the partition represents a + community. + + Returns + ------- + (float, float) + The (coverage, performance) tuple of the partition, as defined above. + + Raises + ------ + NetworkXError + If `partition` is not a valid partition of the nodes of `G`. + + Notes + ----- + If `G` is a multigraph; + - for coverage, the multiplicity of edges is counted + - for performance, the result is -1 (total number of possible edges is not defined) + + References + ---------- + .. [1] Santo Fortunato. + "Community Detection in Graphs". + *Physical Reports*, Volume 486, Issue 3--5 pp. 75--174 + <https://arxiv.org/abs/0906.0612> + """ + + node_community = {} + for i, community in enumerate(partition): + for node in community: + node_community[node] = i + + # `performance` is not defined for multigraphs + if not G.is_multigraph(): + # Iterate over the communities, quadratic, to calculate `possible_inter_community_edges` + possible_inter_community_edges = sum( + len(p1) * len(p2) for p1, p2 in combinations(partition, 2) + ) + + if G.is_directed(): + possible_inter_community_edges *= 2 + else: + possible_inter_community_edges = 0 + + # Compute the number of edges in the complete graph -- `n` nodes, + # directed or undirected, depending on `G` + n = len(G) + total_pairs = n * (n - 1) + if not G.is_directed(): + total_pairs //= 2 + + intra_community_edges = 0 + inter_community_non_edges = possible_inter_community_edges + + # Iterate over the links to count `intra_community_edges` and `inter_community_non_edges` + for e in G.edges(): + if node_community[e[0]] == node_community[e[1]]: + intra_community_edges += 1 + else: + inter_community_non_edges -= 1 + + coverage = intra_community_edges / len(G.edges) + + if G.is_multigraph(): + performance = -1.0 + else: + performance = (intra_community_edges + inter_community_non_edges) / total_pairs + + return coverage, performance |