about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/pagerank_alg.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/pagerank_alg.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are here HEAD master
Diffstat (limited to '.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/pagerank_alg.py')
-rw-r--r--.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/pagerank_alg.py500
1 files changed, 500 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/pagerank_alg.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/pagerank_alg.py
new file mode 100644
index 00000000..de9f95ba
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/link_analysis/pagerank_alg.py
@@ -0,0 +1,500 @@
+"""PageRank analysis of graph structure."""
+
+from warnings import warn
+
+import networkx as nx
+
+__all__ = ["pagerank", "google_matrix"]
+
+
+@nx._dispatchable(edge_attrs="weight")
+def pagerank(
+    G,
+    alpha=0.85,
+    personalization=None,
+    max_iter=100,
+    tol=1.0e-6,
+    nstart=None,
+    weight="weight",
+    dangling=None,
+):
+    """Returns the PageRank of the nodes in the graph.
+
+    PageRank computes a ranking of the nodes in the graph G based on
+    the structure of the incoming links. It was originally designed as
+    an algorithm to rank web pages.
+
+    Parameters
+    ----------
+    G : graph
+      A NetworkX graph.  Undirected graphs will be converted to a directed
+      graph with two directed edges for each undirected edge.
+
+    alpha : float, optional
+      Damping parameter for PageRank, default=0.85.
+
+    personalization: dict, optional
+      The "personalization vector" consisting of a dictionary with a
+      key some subset of graph nodes and personalization value each of those.
+      At least one personalization value must be non-zero.
+      If not specified, a nodes personalization value will be zero.
+      By default, a uniform distribution is used.
+
+    max_iter : integer, optional
+      Maximum number of iterations in power method eigenvalue solver.
+
+    tol : float, optional
+      Error tolerance used to check convergence in power method solver.
+      The iteration will stop after a tolerance of ``len(G) * tol`` is reached.
+
+    nstart : dictionary, optional
+      Starting value of PageRank iteration for each node.
+
+    weight : key, optional
+      Edge data key to use as weight.  If None weights are set to 1.
+
+    dangling: dict, optional
+      The outedges to be assigned to any "dangling" nodes, i.e., nodes without
+      any outedges. The dict key is the node the outedge points to and the dict
+      value is the weight of that outedge. By default, dangling nodes are given
+      outedges according to the personalization vector (uniform if not
+      specified). This must be selected to result in an irreducible transition
+      matrix (see notes under google_matrix). It may be common to have the
+      dangling dict to be the same as the personalization dict.
+
+
+    Returns
+    -------
+    pagerank : dictionary
+       Dictionary of nodes with PageRank as value
+
+    Examples
+    --------
+    >>> G = nx.DiGraph(nx.path_graph(4))
+    >>> pr = nx.pagerank(G, alpha=0.9)
+
+    Notes
+    -----
+    The eigenvector calculation is done by the power iteration method
+    and has no guarantee of convergence.  The iteration will stop after
+    an error tolerance of ``len(G) * tol`` has been reached. If the
+    number of iterations exceed `max_iter`, a
+    :exc:`networkx.exception.PowerIterationFailedConvergence` exception
+    is raised.
+
+    The PageRank algorithm was designed for directed graphs but this
+    algorithm does not check if the input graph is directed and will
+    execute on undirected graphs by converting each edge in the
+    directed graph to two edges.
+
+    See Also
+    --------
+    google_matrix
+
+    Raises
+    ------
+    PowerIterationFailedConvergence
+        If the algorithm fails to converge to the specified tolerance
+        within the specified number of iterations of the power iteration
+        method.
+
+    References
+    ----------
+    .. [1] A. Langville and C. Meyer,
+       "A survey of eigenvector methods of web information retrieval."
+       http://citeseer.ist.psu.edu/713792.html
+    .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
+       The PageRank citation ranking: Bringing order to the Web. 1999
+       http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf
+
+    """
+    return _pagerank_scipy(
+        G, alpha, personalization, max_iter, tol, nstart, weight, dangling
+    )
+
+
+def _pagerank_python(
+    G,
+    alpha=0.85,
+    personalization=None,
+    max_iter=100,
+    tol=1.0e-6,
+    nstart=None,
+    weight="weight",
+    dangling=None,
+):
+    if len(G) == 0:
+        return {}
+
+    D = G.to_directed()
+
+    # Create a copy in (right) stochastic form
+    W = nx.stochastic_graph(D, weight=weight)
+    N = W.number_of_nodes()
+
+    # Choose fixed starting vector if not given
+    if nstart is None:
+        x = dict.fromkeys(W, 1.0 / N)
+    else:
+        # Normalized nstart vector
+        s = sum(nstart.values())
+        x = {k: v / s for k, v in nstart.items()}
+
+    if personalization is None:
+        # Assign uniform personalization vector if not given
+        p = dict.fromkeys(W, 1.0 / N)
+    else:
+        s = sum(personalization.values())
+        p = {k: v / s for k, v in personalization.items()}
+
+    if dangling is None:
+        # Use personalization vector if dangling vector not specified
+        dangling_weights = p
+    else:
+        s = sum(dangling.values())
+        dangling_weights = {k: v / s for k, v in dangling.items()}
+    dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]
+
+    # power iteration: make up to max_iter iterations
+    for _ in range(max_iter):
+        xlast = x
+        x = dict.fromkeys(xlast.keys(), 0)
+        danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
+        for n in x:
+            # this matrix multiply looks odd because it is
+            # doing a left multiply x^T=xlast^T*W
+            for _, nbr, wt in W.edges(n, data=weight):
+                x[nbr] += alpha * xlast[n] * wt
+            x[n] += danglesum * dangling_weights.get(n, 0) + (1.0 - alpha) * p.get(n, 0)
+        # check convergence, l1 norm
+        err = sum(abs(x[n] - xlast[n]) for n in x)
+        if err < N * tol:
+            return x
+    raise nx.PowerIterationFailedConvergence(max_iter)
+
+
+@nx._dispatchable(edge_attrs="weight")
+def google_matrix(
+    G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None
+):
+    """Returns the Google matrix of the graph.
+
+    Parameters
+    ----------
+    G : graph
+      A NetworkX graph.  Undirected graphs will be converted to a directed
+      graph with two directed edges for each undirected edge.
+
+    alpha : float
+      The damping factor.
+
+    personalization: dict, optional
+      The "personalization vector" consisting of a dictionary with a
+      key some subset of graph nodes and personalization value each of those.
+      At least one personalization value must be non-zero.
+      If not specified, a nodes personalization value will be zero.
+      By default, a uniform distribution is used.
+
+    nodelist : list, optional
+      The rows and columns are ordered according to the nodes in nodelist.
+      If nodelist is None, then the ordering is produced by G.nodes().
+
+    weight : key, optional
+      Edge data key to use as weight.  If None weights are set to 1.
+
+    dangling: dict, optional
+      The outedges to be assigned to any "dangling" nodes, i.e., nodes without
+      any outedges. The dict key is the node the outedge points to and the dict
+      value is the weight of that outedge. By default, dangling nodes are given
+      outedges according to the personalization vector (uniform if not
+      specified) This must be selected to result in an irreducible transition
+      matrix (see notes below). It may be common to have the dangling dict to
+      be the same as the personalization dict.
+
+    Returns
+    -------
+    A : 2D NumPy ndarray
+       Google matrix of the graph
+
+    Notes
+    -----
+    The array returned represents the transition matrix that describes the
+    Markov chain used in PageRank. For PageRank to converge to a unique
+    solution (i.e., a unique stationary distribution in a Markov chain), the
+    transition matrix must be irreducible. In other words, it must be that
+    there exists a path between every pair of nodes in the graph, or else there
+    is the potential of "rank sinks."
+
+    This implementation works with Multi(Di)Graphs. For multigraphs the
+    weight between two nodes is set to be the sum of all edge weights
+    between those nodes.
+
+    See Also
+    --------
+    pagerank
+    """
+    import numpy as np
+
+    if nodelist is None:
+        nodelist = list(G)
+
+    A = nx.to_numpy_array(G, nodelist=nodelist, weight=weight)
+    N = len(G)
+    if N == 0:
+        return A
+
+    # Personalization vector
+    if personalization is None:
+        p = np.repeat(1.0 / N, N)
+    else:
+        p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float)
+        if p.sum() == 0:
+            raise ZeroDivisionError
+        p /= p.sum()
+
+    # Dangling nodes
+    if dangling is None:
+        dangling_weights = p
+    else:
+        # Convert the dangling dictionary into an array in nodelist order
+        dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float)
+        dangling_weights /= dangling_weights.sum()
+    dangling_nodes = np.where(A.sum(axis=1) == 0)[0]
+
+    # Assign dangling_weights to any dangling nodes (nodes with no out links)
+    A[dangling_nodes] = dangling_weights
+
+    A /= A.sum(axis=1)[:, np.newaxis]  # Normalize rows to sum to 1
+
+    return alpha * A + (1 - alpha) * p
+
+
+def _pagerank_numpy(
+    G, alpha=0.85, personalization=None, weight="weight", dangling=None
+):
+    """Returns the PageRank of the nodes in the graph.
+
+    PageRank computes a ranking of the nodes in the graph G based on
+    the structure of the incoming links. It was originally designed as
+    an algorithm to rank web pages.
+
+    Parameters
+    ----------
+    G : graph
+      A NetworkX graph.  Undirected graphs will be converted to a directed
+      graph with two directed edges for each undirected edge.
+
+    alpha : float, optional
+      Damping parameter for PageRank, default=0.85.
+
+    personalization: dict, optional
+      The "personalization vector" consisting of a dictionary with a
+      key some subset of graph nodes and personalization value each of those.
+      At least one personalization value must be non-zero.
+      If not specified, a nodes personalization value will be zero.
+      By default, a uniform distribution is used.
+
+    weight : key, optional
+      Edge data key to use as weight.  If None weights are set to 1.
+
+    dangling: dict, optional
+      The outedges to be assigned to any "dangling" nodes, i.e., nodes without
+      any outedges. The dict key is the node the outedge points to and the dict
+      value is the weight of that outedge. By default, dangling nodes are given
+      outedges according to the personalization vector (uniform if not
+      specified) This must be selected to result in an irreducible transition
+      matrix (see notes under google_matrix). It may be common to have the
+      dangling dict to be the same as the personalization dict.
+
+    Returns
+    -------
+    pagerank : dictionary
+       Dictionary of nodes with PageRank as value.
+
+    Examples
+    --------
+    >>> from networkx.algorithms.link_analysis.pagerank_alg import _pagerank_numpy
+    >>> G = nx.DiGraph(nx.path_graph(4))
+    >>> pr = _pagerank_numpy(G, alpha=0.9)
+
+    Notes
+    -----
+    The eigenvector calculation uses NumPy's interface to the LAPACK
+    eigenvalue solvers.  This will be the fastest and most accurate
+    for small graphs.
+
+    This implementation works with Multi(Di)Graphs. For multigraphs the
+    weight between two nodes is set to be the sum of all edge weights
+    between those nodes.
+
+    See Also
+    --------
+    pagerank, google_matrix
+
+    References
+    ----------
+    .. [1] A. Langville and C. Meyer,
+       "A survey of eigenvector methods of web information retrieval."
+       http://citeseer.ist.psu.edu/713792.html
+    .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
+       The PageRank citation ranking: Bringing order to the Web. 1999
+       http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf
+    """
+    import numpy as np
+
+    if len(G) == 0:
+        return {}
+    M = google_matrix(
+        G, alpha, personalization=personalization, weight=weight, dangling=dangling
+    )
+    # use numpy LAPACK solver
+    eigenvalues, eigenvectors = np.linalg.eig(M.T)
+    ind = np.argmax(eigenvalues)
+    # eigenvector of largest eigenvalue is at ind, normalized
+    largest = np.array(eigenvectors[:, ind]).flatten().real
+    norm = largest.sum()
+    return dict(zip(G, map(float, largest / norm)))
+
+
+def _pagerank_scipy(
+    G,
+    alpha=0.85,
+    personalization=None,
+    max_iter=100,
+    tol=1.0e-6,
+    nstart=None,
+    weight="weight",
+    dangling=None,
+):
+    """Returns the PageRank of the nodes in the graph.
+
+    PageRank computes a ranking of the nodes in the graph G based on
+    the structure of the incoming links. It was originally designed as
+    an algorithm to rank web pages.
+
+    Parameters
+    ----------
+    G : graph
+      A NetworkX graph.  Undirected graphs will be converted to a directed
+      graph with two directed edges for each undirected edge.
+
+    alpha : float, optional
+      Damping parameter for PageRank, default=0.85.
+
+    personalization: dict, optional
+      The "personalization vector" consisting of a dictionary with a
+      key some subset of graph nodes and personalization value each of those.
+      At least one personalization value must be non-zero.
+      If not specified, a nodes personalization value will be zero.
+      By default, a uniform distribution is used.
+
+    max_iter : integer, optional
+      Maximum number of iterations in power method eigenvalue solver.
+
+    tol : float, optional
+      Error tolerance used to check convergence in power method solver.
+      The iteration will stop after a tolerance of ``len(G) * tol`` is reached.
+
+    nstart : dictionary, optional
+      Starting value of PageRank iteration for each node.
+
+    weight : key, optional
+      Edge data key to use as weight.  If None weights are set to 1.
+
+    dangling: dict, optional
+      The outedges to be assigned to any "dangling" nodes, i.e., nodes without
+      any outedges. The dict key is the node the outedge points to and the dict
+      value is the weight of that outedge. By default, dangling nodes are given
+      outedges according to the personalization vector (uniform if not
+      specified) This must be selected to result in an irreducible transition
+      matrix (see notes under google_matrix). It may be common to have the
+      dangling dict to be the same as the personalization dict.
+
+    Returns
+    -------
+    pagerank : dictionary
+       Dictionary of nodes with PageRank as value
+
+    Examples
+    --------
+    >>> from networkx.algorithms.link_analysis.pagerank_alg import _pagerank_scipy
+    >>> G = nx.DiGraph(nx.path_graph(4))
+    >>> pr = _pagerank_scipy(G, alpha=0.9)
+
+    Notes
+    -----
+    The eigenvector calculation uses power iteration with a SciPy
+    sparse matrix representation.
+
+    This implementation works with Multi(Di)Graphs. For multigraphs the
+    weight between two nodes is set to be the sum of all edge weights
+    between those nodes.
+
+    See Also
+    --------
+    pagerank
+
+    Raises
+    ------
+    PowerIterationFailedConvergence
+        If the algorithm fails to converge to the specified tolerance
+        within the specified number of iterations of the power iteration
+        method.
+
+    References
+    ----------
+    .. [1] A. Langville and C. Meyer,
+       "A survey of eigenvector methods of web information retrieval."
+       http://citeseer.ist.psu.edu/713792.html
+    .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
+       The PageRank citation ranking: Bringing order to the Web. 1999
+       http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf
+    """
+    import numpy as np
+    import scipy as sp
+
+    N = len(G)
+    if N == 0:
+        return {}
+
+    nodelist = list(G)
+    A = nx.to_scipy_sparse_array(G, nodelist=nodelist, weight=weight, dtype=float)
+    S = A.sum(axis=1)
+    S[S != 0] = 1.0 / S[S != 0]
+    # TODO: csr_array
+    Q = sp.sparse.csr_array(sp.sparse.spdiags(S.T, 0, *A.shape))
+    A = Q @ A
+
+    # initial vector
+    if nstart is None:
+        x = np.repeat(1.0 / N, N)
+    else:
+        x = np.array([nstart.get(n, 0) for n in nodelist], dtype=float)
+        x /= x.sum()
+
+    # Personalization vector
+    if personalization is None:
+        p = np.repeat(1.0 / N, N)
+    else:
+        p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float)
+        if p.sum() == 0:
+            raise ZeroDivisionError
+        p /= p.sum()
+    # Dangling nodes
+    if dangling is None:
+        dangling_weights = p
+    else:
+        # Convert the dangling dictionary into an array in nodelist order
+        dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float)
+        dangling_weights /= dangling_weights.sum()
+    is_dangling = np.where(S == 0)[0]
+
+    # power iteration: make up to max_iter iterations
+    for _ in range(max_iter):
+        xlast = x
+        x = alpha * (x @ A + sum(x[is_dangling]) * dangling_weights) + (1 - alpha) * p
+        # check convergence, l1 norm
+        err = np.absolute(x - xlast).sum()
+        if err < N * tol:
+            return dict(zip(nodelist, map(float, x)))
+    raise nx.PowerIterationFailedConvergence(max_iter)