diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis')
6 files changed, 1131 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/__init__.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/__init__.py new file mode 100644 index 00000000..6009f000 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/__init__.py @@ -0,0 +1,2 @@ +from networkx.algorithms.link_analysis.hits_alg import * +from networkx.algorithms.link_analysis.pagerank_alg import * diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/hits_alg.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/hits_alg.py new file mode 100644 index 00000000..d9e3069d --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/hits_alg.py @@ -0,0 +1,337 @@ +"""Hubs and authorities analysis of graph structure.""" + +import networkx as nx + +__all__ = ["hits"] + + +@nx._dispatchable(preserve_edge_attrs={"G": {"weight": 1}}) +def hits(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True): + """Returns HITS hubs and authorities values for nodes. + + The HITS algorithm computes two numbers for a node. + Authorities estimates the node value based on the incoming links. + Hubs estimates the node value based on outgoing links. + + Parameters + ---------- + G : graph + A NetworkX graph + + max_iter : integer, optional + Maximum number of iterations in power method. + + tol : float, optional + Error tolerance used to check convergence in power method iteration. + + nstart : dictionary, optional + Starting value of each node for power method iteration. + + normalized : bool (default=True) + Normalize results by the sum of all of the values. + + Returns + ------- + (hubs,authorities) : two-tuple of dictionaries + Two dictionaries keyed by node containing the hub and authority + values. + + Raises + ------ + PowerIterationFailedConvergence + If the algorithm fails to converge to the specified tolerance + within the specified number of iterations of the power iteration + method. + + Examples + -------- + >>> G = nx.path_graph(4) + >>> h, a = nx.hits(G) + + Notes + ----- + The eigenvector calculation is done by the power iteration method + and has no guarantee of convergence. The iteration will stop + after max_iter iterations or an error tolerance of + number_of_nodes(G)*tol has been reached. + + The HITS algorithm was designed for directed graphs but this + algorithm does not check if the input graph is directed and will + execute on undirected graphs. + + References + ---------- + .. [1] A. Langville and C. Meyer, + "A survey of eigenvector methods of web information retrieval." + http://citeseer.ist.psu.edu/713792.html + .. [2] Jon Kleinberg, + Authoritative sources in a hyperlinked environment + Journal of the ACM 46 (5): 604-32, 1999. + doi:10.1145/324133.324140. + http://www.cs.cornell.edu/home/kleinber/auth.pdf. + """ + import numpy as np + import scipy as sp + + if len(G) == 0: + return {}, {} + A = nx.adjacency_matrix(G, nodelist=list(G), dtype=float) + + if nstart is not None: + nstart = np.array(list(nstart.values())) + if max_iter <= 0: + raise nx.PowerIterationFailedConvergence(max_iter) + try: + _, _, vt = sp.sparse.linalg.svds(A, k=1, v0=nstart, maxiter=max_iter, tol=tol) + except sp.sparse.linalg.ArpackNoConvergence as exc: + raise nx.PowerIterationFailedConvergence(max_iter) from exc + + a = vt.flatten().real + h = A @ a + if normalized: + h /= h.sum() + a /= a.sum() + hubs = dict(zip(G, map(float, h))) + authorities = dict(zip(G, map(float, a))) + return hubs, authorities + + +def _hits_python(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True): + if isinstance(G, nx.MultiGraph | nx.MultiDiGraph): + raise Exception("hits() not defined for graphs with multiedges.") + if len(G) == 0: + return {}, {} + # choose fixed starting vector if not given + if nstart is None: + h = dict.fromkeys(G, 1.0 / G.number_of_nodes()) + else: + h = nstart + # normalize starting vector + s = 1.0 / sum(h.values()) + for k in h: + h[k] *= s + for _ in range(max_iter): # power iteration: make up to max_iter iterations + hlast = h + h = dict.fromkeys(hlast.keys(), 0) + a = dict.fromkeys(hlast.keys(), 0) + # this "matrix multiply" looks odd because it is + # doing a left multiply a^T=hlast^T*G + for n in h: + for nbr in G[n]: + a[nbr] += hlast[n] * G[n][nbr].get("weight", 1) + # now multiply h=Ga + for n in h: + for nbr in G[n]: + h[n] += a[nbr] * G[n][nbr].get("weight", 1) + # normalize vector + s = 1.0 / max(h.values()) + for n in h: + h[n] *= s + # normalize vector + s = 1.0 / max(a.values()) + for n in a: + a[n] *= s + # check convergence, l1 norm + err = sum(abs(h[n] - hlast[n]) for n in h) + if err < tol: + break + else: + raise nx.PowerIterationFailedConvergence(max_iter) + if normalized: + s = 1.0 / sum(a.values()) + for n in a: + a[n] *= s + s = 1.0 / sum(h.values()) + for n in h: + h[n] *= s + return h, a + + +def _hits_numpy(G, normalized=True): + """Returns HITS hubs and authorities values for nodes. + + The HITS algorithm computes two numbers for a node. + Authorities estimates the node value based on the incoming links. + Hubs estimates the node value based on outgoing links. + + Parameters + ---------- + G : graph + A NetworkX graph + + normalized : bool (default=True) + Normalize results by the sum of all of the values. + + Returns + ------- + (hubs,authorities) : two-tuple of dictionaries + Two dictionaries keyed by node containing the hub and authority + values. + + Examples + -------- + >>> G = nx.path_graph(4) + + The `hubs` and `authorities` are given by the eigenvectors corresponding to the + maximum eigenvalues of the hubs_matrix and the authority_matrix, respectively. + + The ``hubs`` and ``authority`` matrices are computed from the adjacency + matrix: + + >>> adj_ary = nx.to_numpy_array(G) + >>> hubs_matrix = adj_ary @ adj_ary.T + >>> authority_matrix = adj_ary.T @ adj_ary + + `_hits_numpy` maps the eigenvector corresponding to the maximum eigenvalue + of the respective matrices to the nodes in `G`: + + >>> from networkx.algorithms.link_analysis.hits_alg import _hits_numpy + >>> hubs, authority = _hits_numpy(G) + + Notes + ----- + The eigenvector calculation uses NumPy's interface to LAPACK. + + The HITS algorithm was designed for directed graphs but this + algorithm does not check if the input graph is directed and will + execute on undirected graphs. + + References + ---------- + .. [1] A. Langville and C. Meyer, + "A survey of eigenvector methods of web information retrieval." + http://citeseer.ist.psu.edu/713792.html + .. [2] Jon Kleinberg, + Authoritative sources in a hyperlinked environment + Journal of the ACM 46 (5): 604-32, 1999. + doi:10.1145/324133.324140. + http://www.cs.cornell.edu/home/kleinber/auth.pdf. + """ + import numpy as np + + if len(G) == 0: + return {}, {} + adj_ary = nx.to_numpy_array(G) + # Hub matrix + H = adj_ary @ adj_ary.T + e, ev = np.linalg.eig(H) + h = ev[:, np.argmax(e)] # eigenvector corresponding to the maximum eigenvalue + # Authority matrix + A = adj_ary.T @ adj_ary + e, ev = np.linalg.eig(A) + a = ev[:, np.argmax(e)] # eigenvector corresponding to the maximum eigenvalue + if normalized: + h /= h.sum() + a /= a.sum() + else: + h /= h.max() + a /= a.max() + hubs = dict(zip(G, map(float, h))) + authorities = dict(zip(G, map(float, a))) + return hubs, authorities + + +def _hits_scipy(G, max_iter=100, tol=1.0e-6, nstart=None, normalized=True): + """Returns HITS hubs and authorities values for nodes. + + + The HITS algorithm computes two numbers for a node. + Authorities estimates the node value based on the incoming links. + Hubs estimates the node value based on outgoing links. + + Parameters + ---------- + G : graph + A NetworkX graph + + max_iter : integer, optional + Maximum number of iterations in power method. + + tol : float, optional + Error tolerance used to check convergence in power method iteration. + + nstart : dictionary, optional + Starting value of each node for power method iteration. + + normalized : bool (default=True) + Normalize results by the sum of all of the values. + + Returns + ------- + (hubs,authorities) : two-tuple of dictionaries + Two dictionaries keyed by node containing the hub and authority + values. + + Examples + -------- + >>> from networkx.algorithms.link_analysis.hits_alg import _hits_scipy + >>> G = nx.path_graph(4) + >>> h, a = _hits_scipy(G) + + Notes + ----- + This implementation uses SciPy sparse matrices. + + The eigenvector calculation is done by the power iteration method + and has no guarantee of convergence. The iteration will stop + after max_iter iterations or an error tolerance of + number_of_nodes(G)*tol has been reached. + + The HITS algorithm was designed for directed graphs but this + algorithm does not check if the input graph is directed and will + execute on undirected graphs. + + Raises + ------ + PowerIterationFailedConvergence + If the algorithm fails to converge to the specified tolerance + within the specified number of iterations of the power iteration + method. + + References + ---------- + .. [1] A. Langville and C. Meyer, + "A survey of eigenvector methods of web information retrieval." + http://citeseer.ist.psu.edu/713792.html + .. [2] Jon Kleinberg, + Authoritative sources in a hyperlinked environment + Journal of the ACM 46 (5): 604-632, 1999. + doi:10.1145/324133.324140. + http://www.cs.cornell.edu/home/kleinber/auth.pdf. + """ + import numpy as np + + if len(G) == 0: + return {}, {} + A = nx.to_scipy_sparse_array(G, nodelist=list(G)) + (n, _) = A.shape # should be square + ATA = A.T @ A # authority matrix + # choose fixed starting vector if not given + if nstart is None: + x = np.ones((n, 1)) / n + else: + x = np.array([nstart.get(n, 0) for n in list(G)], dtype=float) + x /= x.sum() + + # power iteration on authority matrix + i = 0 + while True: + xlast = x + x = ATA @ x + x /= x.max() + # check convergence, l1 norm + err = np.absolute(x - xlast).sum() + if err < tol: + break + if i > max_iter: + raise nx.PowerIterationFailedConvergence(max_iter) + i += 1 + + a = x.flatten() + h = A @ a + if normalized: + h /= h.sum() + a /= a.sum() + hubs = dict(zip(G, map(float, h))) + authorities = dict(zip(G, map(float, a))) + return hubs, authorities diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/pagerank_alg.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/pagerank_alg.py new file mode 100644 index 00000000..de9f95ba --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/pagerank_alg.py @@ -0,0 +1,500 @@ +"""PageRank analysis of graph structure.""" + +from warnings import warn + +import networkx as nx + +__all__ = ["pagerank", "google_matrix"] + + +@nx._dispatchable(edge_attrs="weight") +def pagerank( + G, + alpha=0.85, + personalization=None, + max_iter=100, + tol=1.0e-6, + nstart=None, + weight="weight", + dangling=None, +): + """Returns the PageRank of the nodes in the graph. + + PageRank computes a ranking of the nodes in the graph G based on + the structure of the incoming links. It was originally designed as + an algorithm to rank web pages. + + Parameters + ---------- + G : graph + A NetworkX graph. Undirected graphs will be converted to a directed + graph with two directed edges for each undirected edge. + + alpha : float, optional + Damping parameter for PageRank, default=0.85. + + personalization: dict, optional + The "personalization vector" consisting of a dictionary with a + key some subset of graph nodes and personalization value each of those. + At least one personalization value must be non-zero. + If not specified, a nodes personalization value will be zero. + By default, a uniform distribution is used. + + max_iter : integer, optional + Maximum number of iterations in power method eigenvalue solver. + + tol : float, optional + Error tolerance used to check convergence in power method solver. + The iteration will stop after a tolerance of ``len(G) * tol`` is reached. + + nstart : dictionary, optional + Starting value of PageRank iteration for each node. + + weight : key, optional + Edge data key to use as weight. If None weights are set to 1. + + dangling: dict, optional + The outedges to be assigned to any "dangling" nodes, i.e., nodes without + any outedges. The dict key is the node the outedge points to and the dict + value is the weight of that outedge. By default, dangling nodes are given + outedges according to the personalization vector (uniform if not + specified). This must be selected to result in an irreducible transition + matrix (see notes under google_matrix). It may be common to have the + dangling dict to be the same as the personalization dict. + + + Returns + ------- + pagerank : dictionary + Dictionary of nodes with PageRank as value + + Examples + -------- + >>> G = nx.DiGraph(nx.path_graph(4)) + >>> pr = nx.pagerank(G, alpha=0.9) + + Notes + ----- + The eigenvector calculation is done by the power iteration method + and has no guarantee of convergence. The iteration will stop after + an error tolerance of ``len(G) * tol`` has been reached. If the + number of iterations exceed `max_iter`, a + :exc:`networkx.exception.PowerIterationFailedConvergence` exception + is raised. + + The PageRank algorithm was designed for directed graphs but this + algorithm does not check if the input graph is directed and will + execute on undirected graphs by converting each edge in the + directed graph to two edges. + + See Also + -------- + google_matrix + + Raises + ------ + PowerIterationFailedConvergence + If the algorithm fails to converge to the specified tolerance + within the specified number of iterations of the power iteration + method. + + References + ---------- + .. [1] A. Langville and C. Meyer, + "A survey of eigenvector methods of web information retrieval." + http://citeseer.ist.psu.edu/713792.html + .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry, + The PageRank citation ranking: Bringing order to the Web. 1999 + http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf + + """ + return _pagerank_scipy( + G, alpha, personalization, max_iter, tol, nstart, weight, dangling + ) + + +def _pagerank_python( + G, + alpha=0.85, + personalization=None, + max_iter=100, + tol=1.0e-6, + nstart=None, + weight="weight", + dangling=None, +): + if len(G) == 0: + return {} + + D = G.to_directed() + + # Create a copy in (right) stochastic form + W = nx.stochastic_graph(D, weight=weight) + N = W.number_of_nodes() + + # Choose fixed starting vector if not given + if nstart is None: + x = dict.fromkeys(W, 1.0 / N) + else: + # Normalized nstart vector + s = sum(nstart.values()) + x = {k: v / s for k, v in nstart.items()} + + if personalization is None: + # Assign uniform personalization vector if not given + p = dict.fromkeys(W, 1.0 / N) + else: + s = sum(personalization.values()) + p = {k: v / s for k, v in personalization.items()} + + if dangling is None: + # Use personalization vector if dangling vector not specified + dangling_weights = p + else: + s = sum(dangling.values()) + dangling_weights = {k: v / s for k, v in dangling.items()} + dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0] + + # power iteration: make up to max_iter iterations + for _ in range(max_iter): + xlast = x + x = dict.fromkeys(xlast.keys(), 0) + danglesum = alpha * sum(xlast[n] for n in dangling_nodes) + for n in x: + # this matrix multiply looks odd because it is + # doing a left multiply x^T=xlast^T*W + for _, nbr, wt in W.edges(n, data=weight): + x[nbr] += alpha * xlast[n] * wt + x[n] += danglesum * dangling_weights.get(n, 0) + (1.0 - alpha) * p.get(n, 0) + # check convergence, l1 norm + err = sum(abs(x[n] - xlast[n]) for n in x) + if err < N * tol: + return x + raise nx.PowerIterationFailedConvergence(max_iter) + + +@nx._dispatchable(edge_attrs="weight") +def google_matrix( + G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None +): + """Returns the Google matrix of the graph. + + Parameters + ---------- + G : graph + A NetworkX graph. Undirected graphs will be converted to a directed + graph with two directed edges for each undirected edge. + + alpha : float + The damping factor. + + personalization: dict, optional + The "personalization vector" consisting of a dictionary with a + key some subset of graph nodes and personalization value each of those. + At least one personalization value must be non-zero. + If not specified, a nodes personalization value will be zero. + By default, a uniform distribution is used. + + nodelist : list, optional + The rows and columns are ordered according to the nodes in nodelist. + If nodelist is None, then the ordering is produced by G.nodes(). + + weight : key, optional + Edge data key to use as weight. If None weights are set to 1. + + dangling: dict, optional + The outedges to be assigned to any "dangling" nodes, i.e., nodes without + any outedges. The dict key is the node the outedge points to and the dict + value is the weight of that outedge. By default, dangling nodes are given + outedges according to the personalization vector (uniform if not + specified) This must be selected to result in an irreducible transition + matrix (see notes below). It may be common to have the dangling dict to + be the same as the personalization dict. + + Returns + ------- + A : 2D NumPy ndarray + Google matrix of the graph + + Notes + ----- + The array returned represents the transition matrix that describes the + Markov chain used in PageRank. For PageRank to converge to a unique + solution (i.e., a unique stationary distribution in a Markov chain), the + transition matrix must be irreducible. In other words, it must be that + there exists a path between every pair of nodes in the graph, or else there + is the potential of "rank sinks." + + This implementation works with Multi(Di)Graphs. For multigraphs the + weight between two nodes is set to be the sum of all edge weights + between those nodes. + + See Also + -------- + pagerank + """ + import numpy as np + + if nodelist is None: + nodelist = list(G) + + A = nx.to_numpy_array(G, nodelist=nodelist, weight=weight) + N = len(G) + if N == 0: + return A + + # Personalization vector + if personalization is None: + p = np.repeat(1.0 / N, N) + else: + p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float) + if p.sum() == 0: + raise ZeroDivisionError + p /= p.sum() + + # Dangling nodes + if dangling is None: + dangling_weights = p + else: + # Convert the dangling dictionary into an array in nodelist order + dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float) + dangling_weights /= dangling_weights.sum() + dangling_nodes = np.where(A.sum(axis=1) == 0)[0] + + # Assign dangling_weights to any dangling nodes (nodes with no out links) + A[dangling_nodes] = dangling_weights + + A /= A.sum(axis=1)[:, np.newaxis] # Normalize rows to sum to 1 + + return alpha * A + (1 - alpha) * p + + +def _pagerank_numpy( + G, alpha=0.85, personalization=None, weight="weight", dangling=None +): + """Returns the PageRank of the nodes in the graph. + + PageRank computes a ranking of the nodes in the graph G based on + the structure of the incoming links. It was originally designed as + an algorithm to rank web pages. + + Parameters + ---------- + G : graph + A NetworkX graph. Undirected graphs will be converted to a directed + graph with two directed edges for each undirected edge. + + alpha : float, optional + Damping parameter for PageRank, default=0.85. + + personalization: dict, optional + The "personalization vector" consisting of a dictionary with a + key some subset of graph nodes and personalization value each of those. + At least one personalization value must be non-zero. + If not specified, a nodes personalization value will be zero. + By default, a uniform distribution is used. + + weight : key, optional + Edge data key to use as weight. If None weights are set to 1. + + dangling: dict, optional + The outedges to be assigned to any "dangling" nodes, i.e., nodes without + any outedges. The dict key is the node the outedge points to and the dict + value is the weight of that outedge. By default, dangling nodes are given + outedges according to the personalization vector (uniform if not + specified) This must be selected to result in an irreducible transition + matrix (see notes under google_matrix). It may be common to have the + dangling dict to be the same as the personalization dict. + + Returns + ------- + pagerank : dictionary + Dictionary of nodes with PageRank as value. + + Examples + -------- + >>> from networkx.algorithms.link_analysis.pagerank_alg import _pagerank_numpy + >>> G = nx.DiGraph(nx.path_graph(4)) + >>> pr = _pagerank_numpy(G, alpha=0.9) + + Notes + ----- + The eigenvector calculation uses NumPy's interface to the LAPACK + eigenvalue solvers. This will be the fastest and most accurate + for small graphs. + + This implementation works with Multi(Di)Graphs. For multigraphs the + weight between two nodes is set to be the sum of all edge weights + between those nodes. + + See Also + -------- + pagerank, google_matrix + + References + ---------- + .. [1] A. Langville and C. Meyer, + "A survey of eigenvector methods of web information retrieval." + http://citeseer.ist.psu.edu/713792.html + .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry, + The PageRank citation ranking: Bringing order to the Web. 1999 + http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf + """ + import numpy as np + + if len(G) == 0: + return {} + M = google_matrix( + G, alpha, personalization=personalization, weight=weight, dangling=dangling + ) + # use numpy LAPACK solver + eigenvalues, eigenvectors = np.linalg.eig(M.T) + ind = np.argmax(eigenvalues) + # eigenvector of largest eigenvalue is at ind, normalized + largest = np.array(eigenvectors[:, ind]).flatten().real + norm = largest.sum() + return dict(zip(G, map(float, largest / norm))) + + +def _pagerank_scipy( + G, + alpha=0.85, + personalization=None, + max_iter=100, + tol=1.0e-6, + nstart=None, + weight="weight", + dangling=None, +): + """Returns the PageRank of the nodes in the graph. + + PageRank computes a ranking of the nodes in the graph G based on + the structure of the incoming links. It was originally designed as + an algorithm to rank web pages. + + Parameters + ---------- + G : graph + A NetworkX graph. Undirected graphs will be converted to a directed + graph with two directed edges for each undirected edge. + + alpha : float, optional + Damping parameter for PageRank, default=0.85. + + personalization: dict, optional + The "personalization vector" consisting of a dictionary with a + key some subset of graph nodes and personalization value each of those. + At least one personalization value must be non-zero. + If not specified, a nodes personalization value will be zero. + By default, a uniform distribution is used. + + max_iter : integer, optional + Maximum number of iterations in power method eigenvalue solver. + + tol : float, optional + Error tolerance used to check convergence in power method solver. + The iteration will stop after a tolerance of ``len(G) * tol`` is reached. + + nstart : dictionary, optional + Starting value of PageRank iteration for each node. + + weight : key, optional + Edge data key to use as weight. If None weights are set to 1. + + dangling: dict, optional + The outedges to be assigned to any "dangling" nodes, i.e., nodes without + any outedges. The dict key is the node the outedge points to and the dict + value is the weight of that outedge. By default, dangling nodes are given + outedges according to the personalization vector (uniform if not + specified) This must be selected to result in an irreducible transition + matrix (see notes under google_matrix). It may be common to have the + dangling dict to be the same as the personalization dict. + + Returns + ------- + pagerank : dictionary + Dictionary of nodes with PageRank as value + + Examples + -------- + >>> from networkx.algorithms.link_analysis.pagerank_alg import _pagerank_scipy + >>> G = nx.DiGraph(nx.path_graph(4)) + >>> pr = _pagerank_scipy(G, alpha=0.9) + + Notes + ----- + The eigenvector calculation uses power iteration with a SciPy + sparse matrix representation. + + This implementation works with Multi(Di)Graphs. For multigraphs the + weight between two nodes is set to be the sum of all edge weights + between those nodes. + + See Also + -------- + pagerank + + Raises + ------ + PowerIterationFailedConvergence + If the algorithm fails to converge to the specified tolerance + within the specified number of iterations of the power iteration + method. + + References + ---------- + .. [1] A. Langville and C. Meyer, + "A survey of eigenvector methods of web information retrieval." + http://citeseer.ist.psu.edu/713792.html + .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry, + The PageRank citation ranking: Bringing order to the Web. 1999 + http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf + """ + import numpy as np + import scipy as sp + + N = len(G) + if N == 0: + return {} + + nodelist = list(G) + A = nx.to_scipy_sparse_array(G, nodelist=nodelist, weight=weight, dtype=float) + S = A.sum(axis=1) + S[S != 0] = 1.0 / S[S != 0] + # TODO: csr_array + Q = sp.sparse.csr_array(sp.sparse.spdiags(S.T, 0, *A.shape)) + A = Q @ A + + # initial vector + if nstart is None: + x = np.repeat(1.0 / N, N) + else: + x = np.array([nstart.get(n, 0) for n in nodelist], dtype=float) + x /= x.sum() + + # Personalization vector + if personalization is None: + p = np.repeat(1.0 / N, N) + else: + p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float) + if p.sum() == 0: + raise ZeroDivisionError + p /= p.sum() + # Dangling nodes + if dangling is None: + dangling_weights = p + else: + # Convert the dangling dictionary into an array in nodelist order + dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float) + dangling_weights /= dangling_weights.sum() + is_dangling = np.where(S == 0)[0] + + # power iteration: make up to max_iter iterations + for _ in range(max_iter): + xlast = x + x = alpha * (x @ A + sum(x[is_dangling]) * dangling_weights) + (1 - alpha) * p + # check convergence, l1 norm + err = np.absolute(x - xlast).sum() + if err < N * tol: + return dict(zip(nodelist, map(float, x))) + raise nx.PowerIterationFailedConvergence(max_iter) diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/tests/__init__.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/tests/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/tests/__init__.py diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/tests/test_hits.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/tests/test_hits.py new file mode 100644 index 00000000..54713eb4 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/tests/test_hits.py @@ -0,0 +1,78 @@ +import pytest + +import networkx as nx + +np = pytest.importorskip("numpy") +sp = pytest.importorskip("scipy") + +from networkx.algorithms.link_analysis.hits_alg import ( + _hits_numpy, + _hits_python, + _hits_scipy, +) + +# Example from +# A. Langville and C. Meyer, "A survey of eigenvector methods of web +# information retrieval." http://citeseer.ist.psu.edu/713792.html + + +class TestHITS: + @classmethod + def setup_class(cls): + G = nx.DiGraph() + + edges = [(1, 3), (1, 5), (2, 1), (3, 5), (5, 4), (5, 3), (6, 5)] + + G.add_edges_from(edges, weight=1) + cls.G = G + cls.G.a = dict( + zip(sorted(G), [0.000000, 0.000000, 0.366025, 0.133975, 0.500000, 0.000000]) + ) + cls.G.h = dict( + zip(sorted(G), [0.366025, 0.000000, 0.211325, 0.000000, 0.211325, 0.211325]) + ) + + def test_hits_numpy(self): + G = self.G + h, a = _hits_numpy(G) + for n in G: + assert h[n] == pytest.approx(G.h[n], abs=1e-4) + for n in G: + assert a[n] == pytest.approx(G.a[n], abs=1e-4) + + @pytest.mark.parametrize("hits_alg", (nx.hits, _hits_python, _hits_scipy)) + def test_hits(self, hits_alg): + G = self.G + h, a = hits_alg(G, tol=1.0e-08) + for n in G: + assert h[n] == pytest.approx(G.h[n], abs=1e-4) + for n in G: + assert a[n] == pytest.approx(G.a[n], abs=1e-4) + nstart = {i: 1.0 / 2 for i in G} + h, a = hits_alg(G, nstart=nstart) + for n in G: + assert h[n] == pytest.approx(G.h[n], abs=1e-4) + for n in G: + assert a[n] == pytest.approx(G.a[n], abs=1e-4) + + def test_empty(self): + G = nx.Graph() + assert nx.hits(G) == ({}, {}) + assert _hits_numpy(G) == ({}, {}) + assert _hits_python(G) == ({}, {}) + assert _hits_scipy(G) == ({}, {}) + + def test_hits_not_convergent(self): + G = nx.path_graph(50) + with pytest.raises(nx.PowerIterationFailedConvergence): + _hits_scipy(G, max_iter=1) + with pytest.raises(nx.PowerIterationFailedConvergence): + _hits_python(G, max_iter=1) + with pytest.raises(nx.PowerIterationFailedConvergence): + _hits_scipy(G, max_iter=0) + with pytest.raises(nx.PowerIterationFailedConvergence): + _hits_python(G, max_iter=0) + with pytest.raises(nx.PowerIterationFailedConvergence): + nx.hits(G, max_iter=0) + with pytest.raises(nx.PowerIterationFailedConvergence): + nx.hits(G, max_iter=1) diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/tests/test_pagerank.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/tests/test_pagerank.py new file mode 100644 index 00000000..44038fd4 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/tests/test_pagerank.py @@ -0,0 +1,214 @@ +import random + +import pytest + +import networkx as nx +from networkx.classes.tests import dispatch_interface + +np = pytest.importorskip("numpy") +pytest.importorskip("scipy") + +from networkx.algorithms.link_analysis.pagerank_alg import ( + _pagerank_numpy, + _pagerank_python, + _pagerank_scipy, +) + +# Example from +# A. Langville and C. Meyer, "A survey of eigenvector methods of web +# information retrieval." http://citeseer.ist.psu.edu/713792.html + + +class TestPageRank: + @classmethod + def setup_class(cls): + G = nx.DiGraph() + edges = [ + (1, 2), + (1, 3), + # 2 is a dangling node + (3, 1), + (3, 2), + (3, 5), + (4, 5), + (4, 6), + (5, 4), + (5, 6), + (6, 4), + ] + G.add_edges_from(edges) + cls.G = G + cls.G.pagerank = dict( + zip( + sorted(G), + [ + 0.03721197, + 0.05395735, + 0.04150565, + 0.37508082, + 0.20599833, + 0.28624589, + ], + ) + ) + cls.dangling_node_index = 1 + cls.dangling_edges = {1: 2, 2: 3, 3: 0, 4: 0, 5: 0, 6: 0} + cls.G.dangling_pagerank = dict( + zip( + sorted(G), + [0.10844518, 0.18618601, 0.0710892, 0.2683668, 0.15919783, 0.20671497], + ) + ) + + @pytest.mark.parametrize("alg", (nx.pagerank, _pagerank_python)) + def test_pagerank(self, alg): + G = self.G + p = alg(G, alpha=0.9, tol=1.0e-08) + for n in G: + assert p[n] == pytest.approx(G.pagerank[n], abs=1e-4) + + nstart = {n: random.random() for n in G} + p = alg(G, alpha=0.9, tol=1.0e-08, nstart=nstart) + for n in G: + assert p[n] == pytest.approx(G.pagerank[n], abs=1e-4) + + @pytest.mark.parametrize("alg", (nx.pagerank, _pagerank_python)) + def test_pagerank_max_iter(self, alg): + with pytest.raises(nx.PowerIterationFailedConvergence): + alg(self.G, max_iter=0) + + def test_numpy_pagerank(self): + G = self.G + p = _pagerank_numpy(G, alpha=0.9) + for n in G: + assert p[n] == pytest.approx(G.pagerank[n], abs=1e-4) + + def test_google_matrix(self): + G = self.G + M = nx.google_matrix(G, alpha=0.9, nodelist=sorted(G)) + _, ev = np.linalg.eig(M.T) + p = ev[:, 0] / ev[:, 0].sum() + for a, b in zip(p, self.G.pagerank.values()): + assert a == pytest.approx(b, abs=1e-7) + + @pytest.mark.parametrize("alg", (nx.pagerank, _pagerank_python, _pagerank_numpy)) + def test_personalization(self, alg): + G = nx.complete_graph(4) + personalize = {0: 1, 1: 1, 2: 4, 3: 4} + answer = { + 0: 0.23246732615667579, + 1: 0.23246732615667579, + 2: 0.267532673843324, + 3: 0.2675326738433241, + } + p = alg(G, alpha=0.85, personalization=personalize) + for n in G: + assert p[n] == pytest.approx(answer[n], abs=1e-4) + + @pytest.mark.parametrize("alg", (nx.pagerank, _pagerank_python, nx.google_matrix)) + def test_zero_personalization_vector(self, alg): + G = nx.complete_graph(4) + personalize = {0: 0, 1: 0, 2: 0, 3: 0} + pytest.raises(ZeroDivisionError, alg, G, personalization=personalize) + + @pytest.mark.parametrize("alg", (nx.pagerank, _pagerank_python)) + def test_one_nonzero_personalization_value(self, alg): + G = nx.complete_graph(4) + personalize = {0: 0, 1: 0, 2: 0, 3: 1} + answer = { + 0: 0.22077931820379187, + 1: 0.22077931820379187, + 2: 0.22077931820379187, + 3: 0.3376620453886241, + } + p = alg(G, alpha=0.85, personalization=personalize) + for n in G: + assert p[n] == pytest.approx(answer[n], abs=1e-4) + + @pytest.mark.parametrize("alg", (nx.pagerank, _pagerank_python)) + def test_incomplete_personalization(self, alg): + G = nx.complete_graph(4) + personalize = {3: 1} + answer = { + 0: 0.22077931820379187, + 1: 0.22077931820379187, + 2: 0.22077931820379187, + 3: 0.3376620453886241, + } + p = alg(G, alpha=0.85, personalization=personalize) + for n in G: + assert p[n] == pytest.approx(answer[n], abs=1e-4) + + def test_dangling_matrix(self): + """ + Tests that the google_matrix doesn't change except for the dangling + nodes. + """ + G = self.G + dangling = self.dangling_edges + dangling_sum = sum(dangling.values()) + M1 = nx.google_matrix(G, personalization=dangling) + M2 = nx.google_matrix(G, personalization=dangling, dangling=dangling) + for i in range(len(G)): + for j in range(len(G)): + if i == self.dangling_node_index and (j + 1) in dangling: + assert M2[i, j] == pytest.approx( + dangling[j + 1] / dangling_sum, abs=1e-4 + ) + else: + assert M2[i, j] == pytest.approx(M1[i, j], abs=1e-4) + + @pytest.mark.parametrize("alg", (nx.pagerank, _pagerank_python, _pagerank_numpy)) + def test_dangling_pagerank(self, alg): + pr = alg(self.G, dangling=self.dangling_edges) + for n in self.G: + assert pr[n] == pytest.approx(self.G.dangling_pagerank[n], abs=1e-4) + + def test_empty(self): + G = nx.Graph() + assert nx.pagerank(G) == {} + assert _pagerank_python(G) == {} + assert _pagerank_numpy(G) == {} + assert nx.google_matrix(G).shape == (0, 0) + + @pytest.mark.parametrize("alg", (nx.pagerank, _pagerank_python)) + def test_multigraph(self, alg): + G = nx.MultiGraph() + G.add_edges_from([(1, 2), (1, 2), (1, 2), (2, 3), (2, 3), ("3", 3), ("3", 3)]) + answer = { + 1: 0.21066048614468322, + 2: 0.3395308825985378, + 3: 0.28933951385531687, + "3": 0.16046911740146227, + } + p = alg(G) + for n in G: + assert p[n] == pytest.approx(answer[n], abs=1e-4) + + +class TestPageRankScipy(TestPageRank): + def test_scipy_pagerank(self): + G = self.G + p = _pagerank_scipy(G, alpha=0.9, tol=1.0e-08) + for n in G: + assert p[n] == pytest.approx(G.pagerank[n], abs=1e-4) + personalize = {n: random.random() for n in G} + p = _pagerank_scipy(G, alpha=0.9, tol=1.0e-08, personalization=personalize) + + nstart = {n: random.random() for n in G} + p = _pagerank_scipy(G, alpha=0.9, tol=1.0e-08, nstart=nstart) + for n in G: + assert p[n] == pytest.approx(G.pagerank[n], abs=1e-4) + + def test_scipy_pagerank_max_iter(self): + with pytest.raises(nx.PowerIterationFailedConvergence): + _pagerank_scipy(self.G, max_iter=0) + + def test_dangling_scipy_pagerank(self): + pr = _pagerank_scipy(self.G, dangling=self.dangling_edges) + for n in self.G: + assert pr[n] == pytest.approx(self.G.dangling_pagerank[n], abs=1e-4) + + def test_empty_scipy(self): + G = nx.Graph() + assert _pagerank_scipy(G) == {} |