aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis')
-rw-r--r--.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/__init__.py2
-rw-r--r--.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/hits_alg.py337
-rw-r--r--.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/pagerank_alg.py500
-rw-r--r--.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/tests/__init__.py0
-rw-r--r--.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/tests/test_hits.py78
-rw-r--r--.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/tests/test_pagerank.py214
6 files changed, 1131 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/__init__.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/__init__.py
new file mode 100644
index 00000000..6009f000
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/__init__.py
@@ -0,0 +1,2 @@
+from networkx.algorithms.link_analysis.hits_alg import *
+from networkx.algorithms.link_analysis.pagerank_alg import *
diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/hits_alg.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/hits_alg.py
new file mode 100644
index 00000000..d9e3069d
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/hits_alg.py
@@ -0,0 +1,337 @@
+"""Hubs and authorities analysis of graph structure."""
+
+import networkx as nx
+
+__all__ = ["hits"]
+
+
+@nx._dispatchable(preserve_edge_attrs={"G": {"weight": 1}})
+def hits(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True):
+ """Returns HITS hubs and authorities values for nodes.
+
+ The HITS algorithm computes two numbers for a node.
+ Authorities estimates the node value based on the incoming links.
+ Hubs estimates the node value based on outgoing links.
+
+ Parameters
+ ----------
+ G : graph
+ A NetworkX graph
+
+ max_iter : integer, optional
+ Maximum number of iterations in power method.
+
+ tol : float, optional
+ Error tolerance used to check convergence in power method iteration.
+
+ nstart : dictionary, optional
+ Starting value of each node for power method iteration.
+
+ normalized : bool (default=True)
+ Normalize results by the sum of all of the values.
+
+ Returns
+ -------
+ (hubs,authorities) : two-tuple of dictionaries
+ Two dictionaries keyed by node containing the hub and authority
+ values.
+
+ Raises
+ ------
+ PowerIterationFailedConvergence
+ If the algorithm fails to converge to the specified tolerance
+ within the specified number of iterations of the power iteration
+ method.
+
+ Examples
+ --------
+ >>> G = nx.path_graph(4)
+ >>> h, a = nx.hits(G)
+
+ Notes
+ -----
+ The eigenvector calculation is done by the power iteration method
+ and has no guarantee of convergence. The iteration will stop
+ after max_iter iterations or an error tolerance of
+ number_of_nodes(G)*tol has been reached.
+
+ The HITS algorithm was designed for directed graphs but this
+ algorithm does not check if the input graph is directed and will
+ execute on undirected graphs.
+
+ References
+ ----------
+ .. [1] A. Langville and C. Meyer,
+ "A survey of eigenvector methods of web information retrieval."
+ http://citeseer.ist.psu.edu/713792.html
+ .. [2] Jon Kleinberg,
+ Authoritative sources in a hyperlinked environment
+ Journal of the ACM 46 (5): 604-32, 1999.
+ doi:10.1145/324133.324140.
+ http://www.cs.cornell.edu/home/kleinber/auth.pdf.
+ """
+ import numpy as np
+ import scipy as sp
+
+ if len(G) == 0:
+ return {}, {}
+ A = nx.adjacency_matrix(G, nodelist=list(G), dtype=float)
+
+ if nstart is not None:
+ nstart = np.array(list(nstart.values()))
+ if max_iter <= 0:
+ raise nx.PowerIterationFailedConvergence(max_iter)
+ try:
+ _, _, vt = sp.sparse.linalg.svds(A, k=1, v0=nstart, maxiter=max_iter, tol=tol)
+ except sp.sparse.linalg.ArpackNoConvergence as exc:
+ raise nx.PowerIterationFailedConvergence(max_iter) from exc
+
+ a = vt.flatten().real
+ h = A @ a
+ if normalized:
+ h /= h.sum()
+ a /= a.sum()
+ hubs = dict(zip(G, map(float, h)))
+ authorities = dict(zip(G, map(float, a)))
+ return hubs, authorities
+
+
+def _hits_python(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True):
+ if isinstance(G, nx.MultiGraph | nx.MultiDiGraph):
+ raise Exception("hits() not defined for graphs with multiedges.")
+ if len(G) == 0:
+ return {}, {}
+ # choose fixed starting vector if not given
+ if nstart is None:
+ h = dict.fromkeys(G, 1.0 / G.number_of_nodes())
+ else:
+ h = nstart
+ # normalize starting vector
+ s = 1.0 / sum(h.values())
+ for k in h:
+ h[k] *= s
+ for _ in range(max_iter): # power iteration: make up to max_iter iterations
+ hlast = h
+ h = dict.fromkeys(hlast.keys(), 0)
+ a = dict.fromkeys(hlast.keys(), 0)
+ # this "matrix multiply" looks odd because it is
+ # doing a left multiply a^T=hlast^T*G
+ for n in h:
+ for nbr in G[n]:
+ a[nbr] += hlast[n] * G[n][nbr].get("weight", 1)
+ # now multiply h=Ga
+ for n in h:
+ for nbr in G[n]:
+ h[n] += a[nbr] * G[n][nbr].get("weight", 1)
+ # normalize vector
+ s = 1.0 / max(h.values())
+ for n in h:
+ h[n] *= s
+ # normalize vector
+ s = 1.0 / max(a.values())
+ for n in a:
+ a[n] *= s
+ # check convergence, l1 norm
+ err = sum(abs(h[n] - hlast[n]) for n in h)
+ if err < tol:
+ break
+ else:
+ raise nx.PowerIterationFailedConvergence(max_iter)
+ if normalized:
+ s = 1.0 / sum(a.values())
+ for n in a:
+ a[n] *= s
+ s = 1.0 / sum(h.values())
+ for n in h:
+ h[n] *= s
+ return h, a
+
+
+def _hits_numpy(G, normalized=True):
+ """Returns HITS hubs and authorities values for nodes.
+
+ The HITS algorithm computes two numbers for a node.
+ Authorities estimates the node value based on the incoming links.
+ Hubs estimates the node value based on outgoing links.
+
+ Parameters
+ ----------
+ G : graph
+ A NetworkX graph
+
+ normalized : bool (default=True)
+ Normalize results by the sum of all of the values.
+
+ Returns
+ -------
+ (hubs,authorities) : two-tuple of dictionaries
+ Two dictionaries keyed by node containing the hub and authority
+ values.
+
+ Examples
+ --------
+ >>> G = nx.path_graph(4)
+
+ The `hubs` and `authorities` are given by the eigenvectors corresponding to the
+ maximum eigenvalues of the hubs_matrix and the authority_matrix, respectively.
+
+ The ``hubs`` and ``authority`` matrices are computed from the adjacency
+ matrix:
+
+ >>> adj_ary = nx.to_numpy_array(G)
+ >>> hubs_matrix = adj_ary @ adj_ary.T
+ >>> authority_matrix = adj_ary.T @ adj_ary
+
+ `_hits_numpy` maps the eigenvector corresponding to the maximum eigenvalue
+ of the respective matrices to the nodes in `G`:
+
+ >>> from networkx.algorithms.link_analysis.hits_alg import _hits_numpy
+ >>> hubs, authority = _hits_numpy(G)
+
+ Notes
+ -----
+ The eigenvector calculation uses NumPy's interface to LAPACK.
+
+ The HITS algorithm was designed for directed graphs but this
+ algorithm does not check if the input graph is directed and will
+ execute on undirected graphs.
+
+ References
+ ----------
+ .. [1] A. Langville and C. Meyer,
+ "A survey of eigenvector methods of web information retrieval."
+ http://citeseer.ist.psu.edu/713792.html
+ .. [2] Jon Kleinberg,
+ Authoritative sources in a hyperlinked environment
+ Journal of the ACM 46 (5): 604-32, 1999.
+ doi:10.1145/324133.324140.
+ http://www.cs.cornell.edu/home/kleinber/auth.pdf.
+ """
+ import numpy as np
+
+ if len(G) == 0:
+ return {}, {}
+ adj_ary = nx.to_numpy_array(G)
+ # Hub matrix
+ H = adj_ary @ adj_ary.T
+ e, ev = np.linalg.eig(H)
+ h = ev[:, np.argmax(e)] # eigenvector corresponding to the maximum eigenvalue
+ # Authority matrix
+ A = adj_ary.T @ adj_ary
+ e, ev = np.linalg.eig(A)
+ a = ev[:, np.argmax(e)] # eigenvector corresponding to the maximum eigenvalue
+ if normalized:
+ h /= h.sum()
+ a /= a.sum()
+ else:
+ h /= h.max()
+ a /= a.max()
+ hubs = dict(zip(G, map(float, h)))
+ authorities = dict(zip(G, map(float, a)))
+ return hubs, authorities
+
+
+def _hits_scipy(G, max_iter=100, tol=1.0e-6, nstart=None, normalized=True):
+ """Returns HITS hubs and authorities values for nodes.
+
+
+ The HITS algorithm computes two numbers for a node.
+ Authorities estimates the node value based on the incoming links.
+ Hubs estimates the node value based on outgoing links.
+
+ Parameters
+ ----------
+ G : graph
+ A NetworkX graph
+
+ max_iter : integer, optional
+ Maximum number of iterations in power method.
+
+ tol : float, optional
+ Error tolerance used to check convergence in power method iteration.
+
+ nstart : dictionary, optional
+ Starting value of each node for power method iteration.
+
+ normalized : bool (default=True)
+ Normalize results by the sum of all of the values.
+
+ Returns
+ -------
+ (hubs,authorities) : two-tuple of dictionaries
+ Two dictionaries keyed by node containing the hub and authority
+ values.
+
+ Examples
+ --------
+ >>> from networkx.algorithms.link_analysis.hits_alg import _hits_scipy
+ >>> G = nx.path_graph(4)
+ >>> h, a = _hits_scipy(G)
+
+ Notes
+ -----
+ This implementation uses SciPy sparse matrices.
+
+ The eigenvector calculation is done by the power iteration method
+ and has no guarantee of convergence. The iteration will stop
+ after max_iter iterations or an error tolerance of
+ number_of_nodes(G)*tol has been reached.
+
+ The HITS algorithm was designed for directed graphs but this
+ algorithm does not check if the input graph is directed and will
+ execute on undirected graphs.
+
+ Raises
+ ------
+ PowerIterationFailedConvergence
+ If the algorithm fails to converge to the specified tolerance
+ within the specified number of iterations of the power iteration
+ method.
+
+ References
+ ----------
+ .. [1] A. Langville and C. Meyer,
+ "A survey of eigenvector methods of web information retrieval."
+ http://citeseer.ist.psu.edu/713792.html
+ .. [2] Jon Kleinberg,
+ Authoritative sources in a hyperlinked environment
+ Journal of the ACM 46 (5): 604-632, 1999.
+ doi:10.1145/324133.324140.
+ http://www.cs.cornell.edu/home/kleinber/auth.pdf.
+ """
+ import numpy as np
+
+ if len(G) == 0:
+ return {}, {}
+ A = nx.to_scipy_sparse_array(G, nodelist=list(G))
+ (n, _) = A.shape # should be square
+ ATA = A.T @ A # authority matrix
+ # choose fixed starting vector if not given
+ if nstart is None:
+ x = np.ones((n, 1)) / n
+ else:
+ x = np.array([nstart.get(n, 0) for n in list(G)], dtype=float)
+ x /= x.sum()
+
+ # power iteration on authority matrix
+ i = 0
+ while True:
+ xlast = x
+ x = ATA @ x
+ x /= x.max()
+ # check convergence, l1 norm
+ err = np.absolute(x - xlast).sum()
+ if err < tol:
+ break
+ if i > max_iter:
+ raise nx.PowerIterationFailedConvergence(max_iter)
+ i += 1
+
+ a = x.flatten()
+ h = A @ a
+ if normalized:
+ h /= h.sum()
+ a /= a.sum()
+ hubs = dict(zip(G, map(float, h)))
+ authorities = dict(zip(G, map(float, a)))
+ return hubs, authorities
diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/pagerank_alg.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/pagerank_alg.py
new file mode 100644
index 00000000..de9f95ba
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/pagerank_alg.py
@@ -0,0 +1,500 @@
+"""PageRank analysis of graph structure."""
+
+from warnings import warn
+
+import networkx as nx
+
+__all__ = ["pagerank", "google_matrix"]
+
+
+@nx._dispatchable(edge_attrs="weight")
+def pagerank(
+ G,
+ alpha=0.85,
+ personalization=None,
+ max_iter=100,
+ tol=1.0e-6,
+ nstart=None,
+ weight="weight",
+ dangling=None,
+):
+ """Returns the PageRank of the nodes in the graph.
+
+ PageRank computes a ranking of the nodes in the graph G based on
+ the structure of the incoming links. It was originally designed as
+ an algorithm to rank web pages.
+
+ Parameters
+ ----------
+ G : graph
+ A NetworkX graph. Undirected graphs will be converted to a directed
+ graph with two directed edges for each undirected edge.
+
+ alpha : float, optional
+ Damping parameter for PageRank, default=0.85.
+
+ personalization: dict, optional
+ The "personalization vector" consisting of a dictionary with a
+ key some subset of graph nodes and personalization value each of those.
+ At least one personalization value must be non-zero.
+ If not specified, a nodes personalization value will be zero.
+ By default, a uniform distribution is used.
+
+ max_iter : integer, optional
+ Maximum number of iterations in power method eigenvalue solver.
+
+ tol : float, optional
+ Error tolerance used to check convergence in power method solver.
+ The iteration will stop after a tolerance of ``len(G) * tol`` is reached.
+
+ nstart : dictionary, optional
+ Starting value of PageRank iteration for each node.
+
+ weight : key, optional
+ Edge data key to use as weight. If None weights are set to 1.
+
+ dangling: dict, optional
+ The outedges to be assigned to any "dangling" nodes, i.e., nodes without
+ any outedges. The dict key is the node the outedge points to and the dict
+ value is the weight of that outedge. By default, dangling nodes are given
+ outedges according to the personalization vector (uniform if not
+ specified). This must be selected to result in an irreducible transition
+ matrix (see notes under google_matrix). It may be common to have the
+ dangling dict to be the same as the personalization dict.
+
+
+ Returns
+ -------
+ pagerank : dictionary
+ Dictionary of nodes with PageRank as value
+
+ Examples
+ --------
+ >>> G = nx.DiGraph(nx.path_graph(4))
+ >>> pr = nx.pagerank(G, alpha=0.9)
+
+ Notes
+ -----
+ The eigenvector calculation is done by the power iteration method
+ and has no guarantee of convergence. The iteration will stop after
+ an error tolerance of ``len(G) * tol`` has been reached. If the
+ number of iterations exceed `max_iter`, a
+ :exc:`networkx.exception.PowerIterationFailedConvergence` exception
+ is raised.
+
+ The PageRank algorithm was designed for directed graphs but this
+ algorithm does not check if the input graph is directed and will
+ execute on undirected graphs by converting each edge in the
+ directed graph to two edges.
+
+ See Also
+ --------
+ google_matrix
+
+ Raises
+ ------
+ PowerIterationFailedConvergence
+ If the algorithm fails to converge to the specified tolerance
+ within the specified number of iterations of the power iteration
+ method.
+
+ References
+ ----------
+ .. [1] A. Langville and C. Meyer,
+ "A survey of eigenvector methods of web information retrieval."
+ http://citeseer.ist.psu.edu/713792.html
+ .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
+ The PageRank citation ranking: Bringing order to the Web. 1999
+ http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf
+
+ """
+ return _pagerank_scipy(
+ G, alpha, personalization, max_iter, tol, nstart, weight, dangling
+ )
+
+
+def _pagerank_python(
+ G,
+ alpha=0.85,
+ personalization=None,
+ max_iter=100,
+ tol=1.0e-6,
+ nstart=None,
+ weight="weight",
+ dangling=None,
+):
+ if len(G) == 0:
+ return {}
+
+ D = G.to_directed()
+
+ # Create a copy in (right) stochastic form
+ W = nx.stochastic_graph(D, weight=weight)
+ N = W.number_of_nodes()
+
+ # Choose fixed starting vector if not given
+ if nstart is None:
+ x = dict.fromkeys(W, 1.0 / N)
+ else:
+ # Normalized nstart vector
+ s = sum(nstart.values())
+ x = {k: v / s for k, v in nstart.items()}
+
+ if personalization is None:
+ # Assign uniform personalization vector if not given
+ p = dict.fromkeys(W, 1.0 / N)
+ else:
+ s = sum(personalization.values())
+ p = {k: v / s for k, v in personalization.items()}
+
+ if dangling is None:
+ # Use personalization vector if dangling vector not specified
+ dangling_weights = p
+ else:
+ s = sum(dangling.values())
+ dangling_weights = {k: v / s for k, v in dangling.items()}
+ dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]
+
+ # power iteration: make up to max_iter iterations
+ for _ in range(max_iter):
+ xlast = x
+ x = dict.fromkeys(xlast.keys(), 0)
+ danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
+ for n in x:
+ # this matrix multiply looks odd because it is
+ # doing a left multiply x^T=xlast^T*W
+ for _, nbr, wt in W.edges(n, data=weight):
+ x[nbr] += alpha * xlast[n] * wt
+ x[n] += danglesum * dangling_weights.get(n, 0) + (1.0 - alpha) * p.get(n, 0)
+ # check convergence, l1 norm
+ err = sum(abs(x[n] - xlast[n]) for n in x)
+ if err < N * tol:
+ return x
+ raise nx.PowerIterationFailedConvergence(max_iter)
+
+
+@nx._dispatchable(edge_attrs="weight")
+def google_matrix(
+ G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None
+):
+ """Returns the Google matrix of the graph.
+
+ Parameters
+ ----------
+ G : graph
+ A NetworkX graph. Undirected graphs will be converted to a directed
+ graph with two directed edges for each undirected edge.
+
+ alpha : float
+ The damping factor.
+
+ personalization: dict, optional
+ The "personalization vector" consisting of a dictionary with a
+ key some subset of graph nodes and personalization value each of those.
+ At least one personalization value must be non-zero.
+ If not specified, a nodes personalization value will be zero.
+ By default, a uniform distribution is used.
+
+ nodelist : list, optional
+ The rows and columns are ordered according to the nodes in nodelist.
+ If nodelist is None, then the ordering is produced by G.nodes().
+
+ weight : key, optional
+ Edge data key to use as weight. If None weights are set to 1.
+
+ dangling: dict, optional
+ The outedges to be assigned to any "dangling" nodes, i.e., nodes without
+ any outedges. The dict key is the node the outedge points to and the dict
+ value is the weight of that outedge. By default, dangling nodes are given
+ outedges according to the personalization vector (uniform if not
+ specified) This must be selected to result in an irreducible transition
+ matrix (see notes below). It may be common to have the dangling dict to
+ be the same as the personalization dict.
+
+ Returns
+ -------
+ A : 2D NumPy ndarray
+ Google matrix of the graph
+
+ Notes
+ -----
+ The array returned represents the transition matrix that describes the
+ Markov chain used in PageRank. For PageRank to converge to a unique
+ solution (i.e., a unique stationary distribution in a Markov chain), the
+ transition matrix must be irreducible. In other words, it must be that
+ there exists a path between every pair of nodes in the graph, or else there
+ is the potential of "rank sinks."
+
+ This implementation works with Multi(Di)Graphs. For multigraphs the
+ weight between two nodes is set to be the sum of all edge weights
+ between those nodes.
+
+ See Also
+ --------
+ pagerank
+ """
+ import numpy as np
+
+ if nodelist is None:
+ nodelist = list(G)
+
+ A = nx.to_numpy_array(G, nodelist=nodelist, weight=weight)
+ N = len(G)
+ if N == 0:
+ return A
+
+ # Personalization vector
+ if personalization is None:
+ p = np.repeat(1.0 / N, N)
+ else:
+ p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float)
+ if p.sum() == 0:
+ raise ZeroDivisionError
+ p /= p.sum()
+
+ # Dangling nodes
+ if dangling is None:
+ dangling_weights = p
+ else:
+ # Convert the dangling dictionary into an array in nodelist order
+ dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float)
+ dangling_weights /= dangling_weights.sum()
+ dangling_nodes = np.where(A.sum(axis=1) == 0)[0]
+
+ # Assign dangling_weights to any dangling nodes (nodes with no out links)
+ A[dangling_nodes] = dangling_weights
+
+ A /= A.sum(axis=1)[:, np.newaxis] # Normalize rows to sum to 1
+
+ return alpha * A + (1 - alpha) * p
+
+
+def _pagerank_numpy(
+ G, alpha=0.85, personalization=None, weight="weight", dangling=None
+):
+ """Returns the PageRank of the nodes in the graph.
+
+ PageRank computes a ranking of the nodes in the graph G based on
+ the structure of the incoming links. It was originally designed as
+ an algorithm to rank web pages.
+
+ Parameters
+ ----------
+ G : graph
+ A NetworkX graph. Undirected graphs will be converted to a directed
+ graph with two directed edges for each undirected edge.
+
+ alpha : float, optional
+ Damping parameter for PageRank, default=0.85.
+
+ personalization: dict, optional
+ The "personalization vector" consisting of a dictionary with a
+ key some subset of graph nodes and personalization value each of those.
+ At least one personalization value must be non-zero.
+ If not specified, a nodes personalization value will be zero.
+ By default, a uniform distribution is used.
+
+ weight : key, optional
+ Edge data key to use as weight. If None weights are set to 1.
+
+ dangling: dict, optional
+ The outedges to be assigned to any "dangling" nodes, i.e., nodes without
+ any outedges. The dict key is the node the outedge points to and the dict
+ value is the weight of that outedge. By default, dangling nodes are given
+ outedges according to the personalization vector (uniform if not
+ specified) This must be selected to result in an irreducible transition
+ matrix (see notes under google_matrix). It may be common to have the
+ dangling dict to be the same as the personalization dict.
+
+ Returns
+ -------
+ pagerank : dictionary
+ Dictionary of nodes with PageRank as value.
+
+ Examples
+ --------
+ >>> from networkx.algorithms.link_analysis.pagerank_alg import _pagerank_numpy
+ >>> G = nx.DiGraph(nx.path_graph(4))
+ >>> pr = _pagerank_numpy(G, alpha=0.9)
+
+ Notes
+ -----
+ The eigenvector calculation uses NumPy's interface to the LAPACK
+ eigenvalue solvers. This will be the fastest and most accurate
+ for small graphs.
+
+ This implementation works with Multi(Di)Graphs. For multigraphs the
+ weight between two nodes is set to be the sum of all edge weights
+ between those nodes.
+
+ See Also
+ --------
+ pagerank, google_matrix
+
+ References
+ ----------
+ .. [1] A. Langville and C. Meyer,
+ "A survey of eigenvector methods of web information retrieval."
+ http://citeseer.ist.psu.edu/713792.html
+ .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
+ The PageRank citation ranking: Bringing order to the Web. 1999
+ http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf
+ """
+ import numpy as np
+
+ if len(G) == 0:
+ return {}
+ M = google_matrix(
+ G, alpha, personalization=personalization, weight=weight, dangling=dangling
+ )
+ # use numpy LAPACK solver
+ eigenvalues, eigenvectors = np.linalg.eig(M.T)
+ ind = np.argmax(eigenvalues)
+ # eigenvector of largest eigenvalue is at ind, normalized
+ largest = np.array(eigenvectors[:, ind]).flatten().real
+ norm = largest.sum()
+ return dict(zip(G, map(float, largest / norm)))
+
+
+def _pagerank_scipy(
+ G,
+ alpha=0.85,
+ personalization=None,
+ max_iter=100,
+ tol=1.0e-6,
+ nstart=None,
+ weight="weight",
+ dangling=None,
+):
+ """Returns the PageRank of the nodes in the graph.
+
+ PageRank computes a ranking of the nodes in the graph G based on
+ the structure of the incoming links. It was originally designed as
+ an algorithm to rank web pages.
+
+ Parameters
+ ----------
+ G : graph
+ A NetworkX graph. Undirected graphs will be converted to a directed
+ graph with two directed edges for each undirected edge.
+
+ alpha : float, optional
+ Damping parameter for PageRank, default=0.85.
+
+ personalization: dict, optional
+ The "personalization vector" consisting of a dictionary with a
+ key some subset of graph nodes and personalization value each of those.
+ At least one personalization value must be non-zero.
+ If not specified, a nodes personalization value will be zero.
+ By default, a uniform distribution is used.
+
+ max_iter : integer, optional
+ Maximum number of iterations in power method eigenvalue solver.
+
+ tol : float, optional
+ Error tolerance used to check convergence in power method solver.
+ The iteration will stop after a tolerance of ``len(G) * tol`` is reached.
+
+ nstart : dictionary, optional
+ Starting value of PageRank iteration for each node.
+
+ weight : key, optional
+ Edge data key to use as weight. If None weights are set to 1.
+
+ dangling: dict, optional
+ The outedges to be assigned to any "dangling" nodes, i.e., nodes without
+ any outedges. The dict key is the node the outedge points to and the dict
+ value is the weight of that outedge. By default, dangling nodes are given
+ outedges according to the personalization vector (uniform if not
+ specified) This must be selected to result in an irreducible transition
+ matrix (see notes under google_matrix). It may be common to have the
+ dangling dict to be the same as the personalization dict.
+
+ Returns
+ -------
+ pagerank : dictionary
+ Dictionary of nodes with PageRank as value
+
+ Examples
+ --------
+ >>> from networkx.algorithms.link_analysis.pagerank_alg import _pagerank_scipy
+ >>> G = nx.DiGraph(nx.path_graph(4))
+ >>> pr = _pagerank_scipy(G, alpha=0.9)
+
+ Notes
+ -----
+ The eigenvector calculation uses power iteration with a SciPy
+ sparse matrix representation.
+
+ This implementation works with Multi(Di)Graphs. For multigraphs the
+ weight between two nodes is set to be the sum of all edge weights
+ between those nodes.
+
+ See Also
+ --------
+ pagerank
+
+ Raises
+ ------
+ PowerIterationFailedConvergence
+ If the algorithm fails to converge to the specified tolerance
+ within the specified number of iterations of the power iteration
+ method.
+
+ References
+ ----------
+ .. [1] A. Langville and C. Meyer,
+ "A survey of eigenvector methods of web information retrieval."
+ http://citeseer.ist.psu.edu/713792.html
+ .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
+ The PageRank citation ranking: Bringing order to the Web. 1999
+ http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf
+ """
+ import numpy as np
+ import scipy as sp
+
+ N = len(G)
+ if N == 0:
+ return {}
+
+ nodelist = list(G)
+ A = nx.to_scipy_sparse_array(G, nodelist=nodelist, weight=weight, dtype=float)
+ S = A.sum(axis=1)
+ S[S != 0] = 1.0 / S[S != 0]
+ # TODO: csr_array
+ Q = sp.sparse.csr_array(sp.sparse.spdiags(S.T, 0, *A.shape))
+ A = Q @ A
+
+ # initial vector
+ if nstart is None:
+ x = np.repeat(1.0 / N, N)
+ else:
+ x = np.array([nstart.get(n, 0) for n in nodelist], dtype=float)
+ x /= x.sum()
+
+ # Personalization vector
+ if personalization is None:
+ p = np.repeat(1.0 / N, N)
+ else:
+ p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float)
+ if p.sum() == 0:
+ raise ZeroDivisionError
+ p /= p.sum()
+ # Dangling nodes
+ if dangling is None:
+ dangling_weights = p
+ else:
+ # Convert the dangling dictionary into an array in nodelist order
+ dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float)
+ dangling_weights /= dangling_weights.sum()
+ is_dangling = np.where(S == 0)[0]
+
+ # power iteration: make up to max_iter iterations
+ for _ in range(max_iter):
+ xlast = x
+ x = alpha * (x @ A + sum(x[is_dangling]) * dangling_weights) + (1 - alpha) * p
+ # check convergence, l1 norm
+ err = np.absolute(x - xlast).sum()
+ if err < N * tol:
+ return dict(zip(nodelist, map(float, x)))
+ raise nx.PowerIterationFailedConvergence(max_iter)
diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/tests/__init__.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/tests/__init__.py
diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/tests/test_hits.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/tests/test_hits.py
new file mode 100644
index 00000000..54713eb4
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/tests/test_hits.py
@@ -0,0 +1,78 @@
+import pytest
+
+import networkx as nx
+
+np = pytest.importorskip("numpy")
+sp = pytest.importorskip("scipy")
+
+from networkx.algorithms.link_analysis.hits_alg import (
+ _hits_numpy,
+ _hits_python,
+ _hits_scipy,
+)
+
+# Example from
+# A. Langville and C. Meyer, "A survey of eigenvector methods of web
+# information retrieval." http://citeseer.ist.psu.edu/713792.html
+
+
+class TestHITS:
+ @classmethod
+ def setup_class(cls):
+ G = nx.DiGraph()
+
+ edges = [(1, 3), (1, 5), (2, 1), (3, 5), (5, 4), (5, 3), (6, 5)]
+
+ G.add_edges_from(edges, weight=1)
+ cls.G = G
+ cls.G.a = dict(
+ zip(sorted(G), [0.000000, 0.000000, 0.366025, 0.133975, 0.500000, 0.000000])
+ )
+ cls.G.h = dict(
+ zip(sorted(G), [0.366025, 0.000000, 0.211325, 0.000000, 0.211325, 0.211325])
+ )
+
+ def test_hits_numpy(self):
+ G = self.G
+ h, a = _hits_numpy(G)
+ for n in G:
+ assert h[n] == pytest.approx(G.h[n], abs=1e-4)
+ for n in G:
+ assert a[n] == pytest.approx(G.a[n], abs=1e-4)
+
+ @pytest.mark.parametrize("hits_alg", (nx.hits, _hits_python, _hits_scipy))
+ def test_hits(self, hits_alg):
+ G = self.G
+ h, a = hits_alg(G, tol=1.0e-08)
+ for n in G:
+ assert h[n] == pytest.approx(G.h[n], abs=1e-4)
+ for n in G:
+ assert a[n] == pytest.approx(G.a[n], abs=1e-4)
+ nstart = {i: 1.0 / 2 for i in G}
+ h, a = hits_alg(G, nstart=nstart)
+ for n in G:
+ assert h[n] == pytest.approx(G.h[n], abs=1e-4)
+ for n in G:
+ assert a[n] == pytest.approx(G.a[n], abs=1e-4)
+
+ def test_empty(self):
+ G = nx.Graph()
+ assert nx.hits(G) == ({}, {})
+ assert _hits_numpy(G) == ({}, {})
+ assert _hits_python(G) == ({}, {})
+ assert _hits_scipy(G) == ({}, {})
+
+ def test_hits_not_convergent(self):
+ G = nx.path_graph(50)
+ with pytest.raises(nx.PowerIterationFailedConvergence):
+ _hits_scipy(G, max_iter=1)
+ with pytest.raises(nx.PowerIterationFailedConvergence):
+ _hits_python(G, max_iter=1)
+ with pytest.raises(nx.PowerIterationFailedConvergence):
+ _hits_scipy(G, max_iter=0)
+ with pytest.raises(nx.PowerIterationFailedConvergence):
+ _hits_python(G, max_iter=0)
+ with pytest.raises(nx.PowerIterationFailedConvergence):
+ nx.hits(G, max_iter=0)
+ with pytest.raises(nx.PowerIterationFailedConvergence):
+ nx.hits(G, max_iter=1)
diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/tests/test_pagerank.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/tests/test_pagerank.py
new file mode 100644
index 00000000..44038fd4
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/tests/test_pagerank.py
@@ -0,0 +1,214 @@
+import random
+
+import pytest
+
+import networkx as nx
+from networkx.classes.tests import dispatch_interface
+
+np = pytest.importorskip("numpy")
+pytest.importorskip("scipy")
+
+from networkx.algorithms.link_analysis.pagerank_alg import (
+ _pagerank_numpy,
+ _pagerank_python,
+ _pagerank_scipy,
+)
+
+# Example from
+# A. Langville and C. Meyer, "A survey of eigenvector methods of web
+# information retrieval." http://citeseer.ist.psu.edu/713792.html
+
+
+class TestPageRank:
+ @classmethod
+ def setup_class(cls):
+ G = nx.DiGraph()
+ edges = [
+ (1, 2),
+ (1, 3),
+ # 2 is a dangling node
+ (3, 1),
+ (3, 2),
+ (3, 5),
+ (4, 5),
+ (4, 6),
+ (5, 4),
+ (5, 6),
+ (6, 4),
+ ]
+ G.add_edges_from(edges)
+ cls.G = G
+ cls.G.pagerank = dict(
+ zip(
+ sorted(G),
+ [
+ 0.03721197,
+ 0.05395735,
+ 0.04150565,
+ 0.37508082,
+ 0.20599833,
+ 0.28624589,
+ ],
+ )
+ )
+ cls.dangling_node_index = 1
+ cls.dangling_edges = {1: 2, 2: 3, 3: 0, 4: 0, 5: 0, 6: 0}
+ cls.G.dangling_pagerank = dict(
+ zip(
+ sorted(G),
+ [0.10844518, 0.18618601, 0.0710892, 0.2683668, 0.15919783, 0.20671497],
+ )
+ )
+
+ @pytest.mark.parametrize("alg", (nx.pagerank, _pagerank_python))
+ def test_pagerank(self, alg):
+ G = self.G
+ p = alg(G, alpha=0.9, tol=1.0e-08)
+ for n in G:
+ assert p[n] == pytest.approx(G.pagerank[n], abs=1e-4)
+
+ nstart = {n: random.random() for n in G}
+ p = alg(G, alpha=0.9, tol=1.0e-08, nstart=nstart)
+ for n in G:
+ assert p[n] == pytest.approx(G.pagerank[n], abs=1e-4)
+
+ @pytest.mark.parametrize("alg", (nx.pagerank, _pagerank_python))
+ def test_pagerank_max_iter(self, alg):
+ with pytest.raises(nx.PowerIterationFailedConvergence):
+ alg(self.G, max_iter=0)
+
+ def test_numpy_pagerank(self):
+ G = self.G
+ p = _pagerank_numpy(G, alpha=0.9)
+ for n in G:
+ assert p[n] == pytest.approx(G.pagerank[n], abs=1e-4)
+
+ def test_google_matrix(self):
+ G = self.G
+ M = nx.google_matrix(G, alpha=0.9, nodelist=sorted(G))
+ _, ev = np.linalg.eig(M.T)
+ p = ev[:, 0] / ev[:, 0].sum()
+ for a, b in zip(p, self.G.pagerank.values()):
+ assert a == pytest.approx(b, abs=1e-7)
+
+ @pytest.mark.parametrize("alg", (nx.pagerank, _pagerank_python, _pagerank_numpy))
+ def test_personalization(self, alg):
+ G = nx.complete_graph(4)
+ personalize = {0: 1, 1: 1, 2: 4, 3: 4}
+ answer = {
+ 0: 0.23246732615667579,
+ 1: 0.23246732615667579,
+ 2: 0.267532673843324,
+ 3: 0.2675326738433241,
+ }
+ p = alg(G, alpha=0.85, personalization=personalize)
+ for n in G:
+ assert p[n] == pytest.approx(answer[n], abs=1e-4)
+
+ @pytest.mark.parametrize("alg", (nx.pagerank, _pagerank_python, nx.google_matrix))
+ def test_zero_personalization_vector(self, alg):
+ G = nx.complete_graph(4)
+ personalize = {0: 0, 1: 0, 2: 0, 3: 0}
+ pytest.raises(ZeroDivisionError, alg, G, personalization=personalize)
+
+ @pytest.mark.parametrize("alg", (nx.pagerank, _pagerank_python))
+ def test_one_nonzero_personalization_value(self, alg):
+ G = nx.complete_graph(4)
+ personalize = {0: 0, 1: 0, 2: 0, 3: 1}
+ answer = {
+ 0: 0.22077931820379187,
+ 1: 0.22077931820379187,
+ 2: 0.22077931820379187,
+ 3: 0.3376620453886241,
+ }
+ p = alg(G, alpha=0.85, personalization=personalize)
+ for n in G:
+ assert p[n] == pytest.approx(answer[n], abs=1e-4)
+
+ @pytest.mark.parametrize("alg", (nx.pagerank, _pagerank_python))
+ def test_incomplete_personalization(self, alg):
+ G = nx.complete_graph(4)
+ personalize = {3: 1}
+ answer = {
+ 0: 0.22077931820379187,
+ 1: 0.22077931820379187,
+ 2: 0.22077931820379187,
+ 3: 0.3376620453886241,
+ }
+ p = alg(G, alpha=0.85, personalization=personalize)
+ for n in G:
+ assert p[n] == pytest.approx(answer[n], abs=1e-4)
+
+ def test_dangling_matrix(self):
+ """
+ Tests that the google_matrix doesn't change except for the dangling
+ nodes.
+ """
+ G = self.G
+ dangling = self.dangling_edges
+ dangling_sum = sum(dangling.values())
+ M1 = nx.google_matrix(G, personalization=dangling)
+ M2 = nx.google_matrix(G, personalization=dangling, dangling=dangling)
+ for i in range(len(G)):
+ for j in range(len(G)):
+ if i == self.dangling_node_index and (j + 1) in dangling:
+ assert M2[i, j] == pytest.approx(
+ dangling[j + 1] / dangling_sum, abs=1e-4
+ )
+ else:
+ assert M2[i, j] == pytest.approx(M1[i, j], abs=1e-4)
+
+ @pytest.mark.parametrize("alg", (nx.pagerank, _pagerank_python, _pagerank_numpy))
+ def test_dangling_pagerank(self, alg):
+ pr = alg(self.G, dangling=self.dangling_edges)
+ for n in self.G:
+ assert pr[n] == pytest.approx(self.G.dangling_pagerank[n], abs=1e-4)
+
+ def test_empty(self):
+ G = nx.Graph()
+ assert nx.pagerank(G) == {}
+ assert _pagerank_python(G) == {}
+ assert _pagerank_numpy(G) == {}
+ assert nx.google_matrix(G).shape == (0, 0)
+
+ @pytest.mark.parametrize("alg", (nx.pagerank, _pagerank_python))
+ def test_multigraph(self, alg):
+ G = nx.MultiGraph()
+ G.add_edges_from([(1, 2), (1, 2), (1, 2), (2, 3), (2, 3), ("3", 3), ("3", 3)])
+ answer = {
+ 1: 0.21066048614468322,
+ 2: 0.3395308825985378,
+ 3: 0.28933951385531687,
+ "3": 0.16046911740146227,
+ }
+ p = alg(G)
+ for n in G:
+ assert p[n] == pytest.approx(answer[n], abs=1e-4)
+
+
+class TestPageRankScipy(TestPageRank):
+ def test_scipy_pagerank(self):
+ G = self.G
+ p = _pagerank_scipy(G, alpha=0.9, tol=1.0e-08)
+ for n in G:
+ assert p[n] == pytest.approx(G.pagerank[n], abs=1e-4)
+ personalize = {n: random.random() for n in G}
+ p = _pagerank_scipy(G, alpha=0.9, tol=1.0e-08, personalization=personalize)
+
+ nstart = {n: random.random() for n in G}
+ p = _pagerank_scipy(G, alpha=0.9, tol=1.0e-08, nstart=nstart)
+ for n in G:
+ assert p[n] == pytest.approx(G.pagerank[n], abs=1e-4)
+
+ def test_scipy_pagerank_max_iter(self):
+ with pytest.raises(nx.PowerIterationFailedConvergence):
+ _pagerank_scipy(self.G, max_iter=0)
+
+ def test_dangling_scipy_pagerank(self):
+ pr = _pagerank_scipy(self.G, dangling=self.dangling_edges)
+ for n in self.G:
+ assert pr[n] == pytest.approx(self.G.dangling_pagerank[n], abs=1e-4)
+
+ def test_empty_scipy(self):
+ G = nx.Graph()
+ assert _pagerank_scipy(G) == {}