diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/pagerank_alg.py | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-master.tar.gz |
Diffstat (limited to '.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/pagerank_alg.py')
-rw-r--r-- | .venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/pagerank_alg.py | 500 |
1 files changed, 500 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/pagerank_alg.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/pagerank_alg.py new file mode 100644 index 00000000..de9f95ba --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/pagerank_alg.py @@ -0,0 +1,500 @@ +"""PageRank analysis of graph structure.""" + +from warnings import warn + +import networkx as nx + +__all__ = ["pagerank", "google_matrix"] + + +@nx._dispatchable(edge_attrs="weight") +def pagerank( + G, + alpha=0.85, + personalization=None, + max_iter=100, + tol=1.0e-6, + nstart=None, + weight="weight", + dangling=None, +): + """Returns the PageRank of the nodes in the graph. + + PageRank computes a ranking of the nodes in the graph G based on + the structure of the incoming links. It was originally designed as + an algorithm to rank web pages. + + Parameters + ---------- + G : graph + A NetworkX graph. Undirected graphs will be converted to a directed + graph with two directed edges for each undirected edge. + + alpha : float, optional + Damping parameter for PageRank, default=0.85. + + personalization: dict, optional + The "personalization vector" consisting of a dictionary with a + key some subset of graph nodes and personalization value each of those. + At least one personalization value must be non-zero. + If not specified, a nodes personalization value will be zero. + By default, a uniform distribution is used. + + max_iter : integer, optional + Maximum number of iterations in power method eigenvalue solver. + + tol : float, optional + Error tolerance used to check convergence in power method solver. + The iteration will stop after a tolerance of ``len(G) * tol`` is reached. + + nstart : dictionary, optional + Starting value of PageRank iteration for each node. + + weight : key, optional + Edge data key to use as weight. If None weights are set to 1. + + dangling: dict, optional + The outedges to be assigned to any "dangling" nodes, i.e., nodes without + any outedges. The dict key is the node the outedge points to and the dict + value is the weight of that outedge. By default, dangling nodes are given + outedges according to the personalization vector (uniform if not + specified). This must be selected to result in an irreducible transition + matrix (see notes under google_matrix). It may be common to have the + dangling dict to be the same as the personalization dict. + + + Returns + ------- + pagerank : dictionary + Dictionary of nodes with PageRank as value + + Examples + -------- + >>> G = nx.DiGraph(nx.path_graph(4)) + >>> pr = nx.pagerank(G, alpha=0.9) + + Notes + ----- + The eigenvector calculation is done by the power iteration method + and has no guarantee of convergence. The iteration will stop after + an error tolerance of ``len(G) * tol`` has been reached. If the + number of iterations exceed `max_iter`, a + :exc:`networkx.exception.PowerIterationFailedConvergence` exception + is raised. + + The PageRank algorithm was designed for directed graphs but this + algorithm does not check if the input graph is directed and will + execute on undirected graphs by converting each edge in the + directed graph to two edges. + + See Also + -------- + google_matrix + + Raises + ------ + PowerIterationFailedConvergence + If the algorithm fails to converge to the specified tolerance + within the specified number of iterations of the power iteration + method. + + References + ---------- + .. [1] A. Langville and C. Meyer, + "A survey of eigenvector methods of web information retrieval." + http://citeseer.ist.psu.edu/713792.html + .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry, + The PageRank citation ranking: Bringing order to the Web. 1999 + http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf + + """ + return _pagerank_scipy( + G, alpha, personalization, max_iter, tol, nstart, weight, dangling + ) + + +def _pagerank_python( + G, + alpha=0.85, + personalization=None, + max_iter=100, + tol=1.0e-6, + nstart=None, + weight="weight", + dangling=None, +): + if len(G) == 0: + return {} + + D = G.to_directed() + + # Create a copy in (right) stochastic form + W = nx.stochastic_graph(D, weight=weight) + N = W.number_of_nodes() + + # Choose fixed starting vector if not given + if nstart is None: + x = dict.fromkeys(W, 1.0 / N) + else: + # Normalized nstart vector + s = sum(nstart.values()) + x = {k: v / s for k, v in nstart.items()} + + if personalization is None: + # Assign uniform personalization vector if not given + p = dict.fromkeys(W, 1.0 / N) + else: + s = sum(personalization.values()) + p = {k: v / s for k, v in personalization.items()} + + if dangling is None: + # Use personalization vector if dangling vector not specified + dangling_weights = p + else: + s = sum(dangling.values()) + dangling_weights = {k: v / s for k, v in dangling.items()} + dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0] + + # power iteration: make up to max_iter iterations + for _ in range(max_iter): + xlast = x + x = dict.fromkeys(xlast.keys(), 0) + danglesum = alpha * sum(xlast[n] for n in dangling_nodes) + for n in x: + # this matrix multiply looks odd because it is + # doing a left multiply x^T=xlast^T*W + for _, nbr, wt in W.edges(n, data=weight): + x[nbr] += alpha * xlast[n] * wt + x[n] += danglesum * dangling_weights.get(n, 0) + (1.0 - alpha) * p.get(n, 0) + # check convergence, l1 norm + err = sum(abs(x[n] - xlast[n]) for n in x) + if err < N * tol: + return x + raise nx.PowerIterationFailedConvergence(max_iter) + + +@nx._dispatchable(edge_attrs="weight") +def google_matrix( + G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None +): + """Returns the Google matrix of the graph. + + Parameters + ---------- + G : graph + A NetworkX graph. Undirected graphs will be converted to a directed + graph with two directed edges for each undirected edge. + + alpha : float + The damping factor. + + personalization: dict, optional + The "personalization vector" consisting of a dictionary with a + key some subset of graph nodes and personalization value each of those. + At least one personalization value must be non-zero. + If not specified, a nodes personalization value will be zero. + By default, a uniform distribution is used. + + nodelist : list, optional + The rows and columns are ordered according to the nodes in nodelist. + If nodelist is None, then the ordering is produced by G.nodes(). + + weight : key, optional + Edge data key to use as weight. If None weights are set to 1. + + dangling: dict, optional + The outedges to be assigned to any "dangling" nodes, i.e., nodes without + any outedges. The dict key is the node the outedge points to and the dict + value is the weight of that outedge. By default, dangling nodes are given + outedges according to the personalization vector (uniform if not + specified) This must be selected to result in an irreducible transition + matrix (see notes below). It may be common to have the dangling dict to + be the same as the personalization dict. + + Returns + ------- + A : 2D NumPy ndarray + Google matrix of the graph + + Notes + ----- + The array returned represents the transition matrix that describes the + Markov chain used in PageRank. For PageRank to converge to a unique + solution (i.e., a unique stationary distribution in a Markov chain), the + transition matrix must be irreducible. In other words, it must be that + there exists a path between every pair of nodes in the graph, or else there + is the potential of "rank sinks." + + This implementation works with Multi(Di)Graphs. For multigraphs the + weight between two nodes is set to be the sum of all edge weights + between those nodes. + + See Also + -------- + pagerank + """ + import numpy as np + + if nodelist is None: + nodelist = list(G) + + A = nx.to_numpy_array(G, nodelist=nodelist, weight=weight) + N = len(G) + if N == 0: + return A + + # Personalization vector + if personalization is None: + p = np.repeat(1.0 / N, N) + else: + p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float) + if p.sum() == 0: + raise ZeroDivisionError + p /= p.sum() + + # Dangling nodes + if dangling is None: + dangling_weights = p + else: + # Convert the dangling dictionary into an array in nodelist order + dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float) + dangling_weights /= dangling_weights.sum() + dangling_nodes = np.where(A.sum(axis=1) == 0)[0] + + # Assign dangling_weights to any dangling nodes (nodes with no out links) + A[dangling_nodes] = dangling_weights + + A /= A.sum(axis=1)[:, np.newaxis] # Normalize rows to sum to 1 + + return alpha * A + (1 - alpha) * p + + +def _pagerank_numpy( + G, alpha=0.85, personalization=None, weight="weight", dangling=None +): + """Returns the PageRank of the nodes in the graph. + + PageRank computes a ranking of the nodes in the graph G based on + the structure of the incoming links. It was originally designed as + an algorithm to rank web pages. + + Parameters + ---------- + G : graph + A NetworkX graph. Undirected graphs will be converted to a directed + graph with two directed edges for each undirected edge. + + alpha : float, optional + Damping parameter for PageRank, default=0.85. + + personalization: dict, optional + The "personalization vector" consisting of a dictionary with a + key some subset of graph nodes and personalization value each of those. + At least one personalization value must be non-zero. + If not specified, a nodes personalization value will be zero. + By default, a uniform distribution is used. + + weight : key, optional + Edge data key to use as weight. If None weights are set to 1. + + dangling: dict, optional + The outedges to be assigned to any "dangling" nodes, i.e., nodes without + any outedges. The dict key is the node the outedge points to and the dict + value is the weight of that outedge. By default, dangling nodes are given + outedges according to the personalization vector (uniform if not + specified) This must be selected to result in an irreducible transition + matrix (see notes under google_matrix). It may be common to have the + dangling dict to be the same as the personalization dict. + + Returns + ------- + pagerank : dictionary + Dictionary of nodes with PageRank as value. + + Examples + -------- + >>> from networkx.algorithms.link_analysis.pagerank_alg import _pagerank_numpy + >>> G = nx.DiGraph(nx.path_graph(4)) + >>> pr = _pagerank_numpy(G, alpha=0.9) + + Notes + ----- + The eigenvector calculation uses NumPy's interface to the LAPACK + eigenvalue solvers. This will be the fastest and most accurate + for small graphs. + + This implementation works with Multi(Di)Graphs. For multigraphs the + weight between two nodes is set to be the sum of all edge weights + between those nodes. + + See Also + -------- + pagerank, google_matrix + + References + ---------- + .. [1] A. Langville and C. Meyer, + "A survey of eigenvector methods of web information retrieval." + http://citeseer.ist.psu.edu/713792.html + .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry, + The PageRank citation ranking: Bringing order to the Web. 1999 + http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf + """ + import numpy as np + + if len(G) == 0: + return {} + M = google_matrix( + G, alpha, personalization=personalization, weight=weight, dangling=dangling + ) + # use numpy LAPACK solver + eigenvalues, eigenvectors = np.linalg.eig(M.T) + ind = np.argmax(eigenvalues) + # eigenvector of largest eigenvalue is at ind, normalized + largest = np.array(eigenvectors[:, ind]).flatten().real + norm = largest.sum() + return dict(zip(G, map(float, largest / norm))) + + +def _pagerank_scipy( + G, + alpha=0.85, + personalization=None, + max_iter=100, + tol=1.0e-6, + nstart=None, + weight="weight", + dangling=None, +): + """Returns the PageRank of the nodes in the graph. + + PageRank computes a ranking of the nodes in the graph G based on + the structure of the incoming links. It was originally designed as + an algorithm to rank web pages. + + Parameters + ---------- + G : graph + A NetworkX graph. Undirected graphs will be converted to a directed + graph with two directed edges for each undirected edge. + + alpha : float, optional + Damping parameter for PageRank, default=0.85. + + personalization: dict, optional + The "personalization vector" consisting of a dictionary with a + key some subset of graph nodes and personalization value each of those. + At least one personalization value must be non-zero. + If not specified, a nodes personalization value will be zero. + By default, a uniform distribution is used. + + max_iter : integer, optional + Maximum number of iterations in power method eigenvalue solver. + + tol : float, optional + Error tolerance used to check convergence in power method solver. + The iteration will stop after a tolerance of ``len(G) * tol`` is reached. + + nstart : dictionary, optional + Starting value of PageRank iteration for each node. + + weight : key, optional + Edge data key to use as weight. If None weights are set to 1. + + dangling: dict, optional + The outedges to be assigned to any "dangling" nodes, i.e., nodes without + any outedges. The dict key is the node the outedge points to and the dict + value is the weight of that outedge. By default, dangling nodes are given + outedges according to the personalization vector (uniform if not + specified) This must be selected to result in an irreducible transition + matrix (see notes under google_matrix). It may be common to have the + dangling dict to be the same as the personalization dict. + + Returns + ------- + pagerank : dictionary + Dictionary of nodes with PageRank as value + + Examples + -------- + >>> from networkx.algorithms.link_analysis.pagerank_alg import _pagerank_scipy + >>> G = nx.DiGraph(nx.path_graph(4)) + >>> pr = _pagerank_scipy(G, alpha=0.9) + + Notes + ----- + The eigenvector calculation uses power iteration with a SciPy + sparse matrix representation. + + This implementation works with Multi(Di)Graphs. For multigraphs the + weight between two nodes is set to be the sum of all edge weights + between those nodes. + + See Also + -------- + pagerank + + Raises + ------ + PowerIterationFailedConvergence + If the algorithm fails to converge to the specified tolerance + within the specified number of iterations of the power iteration + method. + + References + ---------- + .. [1] A. Langville and C. Meyer, + "A survey of eigenvector methods of web information retrieval." + http://citeseer.ist.psu.edu/713792.html + .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry, + The PageRank citation ranking: Bringing order to the Web. 1999 + http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf + """ + import numpy as np + import scipy as sp + + N = len(G) + if N == 0: + return {} + + nodelist = list(G) + A = nx.to_scipy_sparse_array(G, nodelist=nodelist, weight=weight, dtype=float) + S = A.sum(axis=1) + S[S != 0] = 1.0 / S[S != 0] + # TODO: csr_array + Q = sp.sparse.csr_array(sp.sparse.spdiags(S.T, 0, *A.shape)) + A = Q @ A + + # initial vector + if nstart is None: + x = np.repeat(1.0 / N, N) + else: + x = np.array([nstart.get(n, 0) for n in nodelist], dtype=float) + x /= x.sum() + + # Personalization vector + if personalization is None: + p = np.repeat(1.0 / N, N) + else: + p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float) + if p.sum() == 0: + raise ZeroDivisionError + p /= p.sum() + # Dangling nodes + if dangling is None: + dangling_weights = p + else: + # Convert the dangling dictionary into an array in nodelist order + dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float) + dangling_weights /= dangling_weights.sum() + is_dangling = np.where(S == 0)[0] + + # power iteration: make up to max_iter iterations + for _ in range(max_iter): + xlast = x + x = alpha * (x @ A + sum(x[is_dangling]) * dangling_weights) + (1 - alpha) * p + # check convergence, l1 norm + err = np.absolute(x - xlast).sum() + if err < N * tol: + return dict(zip(nodelist, map(float, x))) + raise nx.PowerIterationFailedConvergence(max_iter) |