about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/networkx/algorithms/graph_hashing.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/networkx/algorithms/graph_hashing.py')
-rw-r--r--.venv/lib/python3.12/site-packages/networkx/algorithms/graph_hashing.py328
1 files changed, 328 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/networkx/algorithms/graph_hashing.py b/.venv/lib/python3.12/site-packages/networkx/algorithms/graph_hashing.py
new file mode 100644
index 00000000..7ded847f
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/networkx/algorithms/graph_hashing.py
@@ -0,0 +1,328 @@
+"""
+Functions for hashing graphs to strings.
+Isomorphic graphs should be assigned identical hashes.
+For now, only Weisfeiler-Lehman hashing is implemented.
+"""
+
+from collections import Counter, defaultdict
+from hashlib import blake2b
+
+import networkx as nx
+
+__all__ = ["weisfeiler_lehman_graph_hash", "weisfeiler_lehman_subgraph_hashes"]
+
+
+def _hash_label(label, digest_size):
+    return blake2b(label.encode("ascii"), digest_size=digest_size).hexdigest()
+
+
+def _init_node_labels(G, edge_attr, node_attr):
+    if node_attr:
+        return {u: str(dd[node_attr]) for u, dd in G.nodes(data=True)}
+    elif edge_attr:
+        return {u: "" for u in G}
+    else:
+        return {u: str(deg) for u, deg in G.degree()}
+
+
+def _neighborhood_aggregate(G, node, node_labels, edge_attr=None):
+    """
+    Compute new labels for given node by aggregating
+    the labels of each node's neighbors.
+    """
+    label_list = []
+    for nbr in G.neighbors(node):
+        prefix = "" if edge_attr is None else str(G[node][nbr][edge_attr])
+        label_list.append(prefix + node_labels[nbr])
+    return node_labels[node] + "".join(sorted(label_list))
+
+
+@nx.utils.not_implemented_for("multigraph")
+@nx._dispatchable(edge_attrs={"edge_attr": None}, node_attrs="node_attr")
+def weisfeiler_lehman_graph_hash(
+    G, edge_attr=None, node_attr=None, iterations=3, digest_size=16
+):
+    """Return Weisfeiler Lehman (WL) graph hash.
+
+    The function iteratively aggregates and hashes neighborhoods of each node.
+    After each node's neighbors are hashed to obtain updated node labels,
+    a hashed histogram of resulting labels is returned as the final hash.
+
+    Hashes are identical for isomorphic graphs and strong guarantees that
+    non-isomorphic graphs will get different hashes. See [1]_ for details.
+
+    If no node or edge attributes are provided, the degree of each node
+    is used as its initial label.
+    Otherwise, node and/or edge labels are used to compute the hash.
+
+    Parameters
+    ----------
+    G : graph
+        The graph to be hashed.
+        Can have node and/or edge attributes. Can also have no attributes.
+    edge_attr : string, optional (default=None)
+        The key in edge attribute dictionary to be used for hashing.
+        If None, edge labels are ignored.
+    node_attr: string, optional (default=None)
+        The key in node attribute dictionary to be used for hashing.
+        If None, and no edge_attr given, use the degrees of the nodes as labels.
+    iterations: int, optional (default=3)
+        Number of neighbor aggregations to perform.
+        Should be larger for larger graphs.
+    digest_size: int, optional (default=16)
+        Size (in bits) of blake2b hash digest to use for hashing node labels.
+
+    Returns
+    -------
+    h : string
+        Hexadecimal string corresponding to hash of the input graph.
+
+    Examples
+    --------
+    Two graphs with edge attributes that are isomorphic, except for
+    differences in the edge labels.
+
+    >>> G1 = nx.Graph()
+    >>> G1.add_edges_from(
+    ...     [
+    ...         (1, 2, {"label": "A"}),
+    ...         (2, 3, {"label": "A"}),
+    ...         (3, 1, {"label": "A"}),
+    ...         (1, 4, {"label": "B"}),
+    ...     ]
+    ... )
+    >>> G2 = nx.Graph()
+    >>> G2.add_edges_from(
+    ...     [
+    ...         (5, 6, {"label": "B"}),
+    ...         (6, 7, {"label": "A"}),
+    ...         (7, 5, {"label": "A"}),
+    ...         (7, 8, {"label": "A"}),
+    ...     ]
+    ... )
+
+    Omitting the `edge_attr` option, results in identical hashes.
+
+    >>> nx.weisfeiler_lehman_graph_hash(G1)
+    '7bc4dde9a09d0b94c5097b219891d81a'
+    >>> nx.weisfeiler_lehman_graph_hash(G2)
+    '7bc4dde9a09d0b94c5097b219891d81a'
+
+    With edge labels, the graphs are no longer assigned
+    the same hash digest.
+
+    >>> nx.weisfeiler_lehman_graph_hash(G1, edge_attr="label")
+    'c653d85538bcf041d88c011f4f905f10'
+    >>> nx.weisfeiler_lehman_graph_hash(G2, edge_attr="label")
+    '3dcd84af1ca855d0eff3c978d88e7ec7'
+
+    Notes
+    -----
+    To return the WL hashes of each subgraph of a graph, use
+    `weisfeiler_lehman_subgraph_hashes`
+
+    Similarity between hashes does not imply similarity between graphs.
+
+    References
+    ----------
+    .. [1] Shervashidze, Nino, Pascal Schweitzer, Erik Jan Van Leeuwen,
+       Kurt Mehlhorn, and Karsten M. Borgwardt. Weisfeiler Lehman
+       Graph Kernels. Journal of Machine Learning Research. 2011.
+       http://www.jmlr.org/papers/volume12/shervashidze11a/shervashidze11a.pdf
+
+    See also
+    --------
+    weisfeiler_lehman_subgraph_hashes
+    """
+
+    def weisfeiler_lehman_step(G, labels, edge_attr=None):
+        """
+        Apply neighborhood aggregation to each node
+        in the graph.
+        Computes a dictionary with labels for each node.
+        """
+        new_labels = {}
+        for node in G.nodes():
+            label = _neighborhood_aggregate(G, node, labels, edge_attr=edge_attr)
+            new_labels[node] = _hash_label(label, digest_size)
+        return new_labels
+
+    # set initial node labels
+    node_labels = _init_node_labels(G, edge_attr, node_attr)
+
+    subgraph_hash_counts = []
+    for _ in range(iterations):
+        node_labels = weisfeiler_lehman_step(G, node_labels, edge_attr=edge_attr)
+        counter = Counter(node_labels.values())
+        # sort the counter, extend total counts
+        subgraph_hash_counts.extend(sorted(counter.items(), key=lambda x: x[0]))
+
+    # hash the final counter
+    return _hash_label(str(tuple(subgraph_hash_counts)), digest_size)
+
+
+@nx.utils.not_implemented_for("multigraph")
+@nx._dispatchable(edge_attrs={"edge_attr": None}, node_attrs="node_attr")
+def weisfeiler_lehman_subgraph_hashes(
+    G,
+    edge_attr=None,
+    node_attr=None,
+    iterations=3,
+    digest_size=16,
+    include_initial_labels=False,
+):
+    """
+    Return a dictionary of subgraph hashes by node.
+
+    Dictionary keys are nodes in `G`, and values are a list of hashes.
+    Each hash corresponds to a subgraph rooted at a given node u in `G`.
+    Lists of subgraph hashes are sorted in increasing order of depth from
+    their root node, with the hash at index i corresponding to a subgraph
+    of nodes at most i edges distance from u. Thus, each list will contain
+    `iterations` elements - a hash for a subgraph at each depth. If
+    `include_initial_labels` is set to `True`, each list will additionally
+    have contain a hash of the initial node label (or equivalently a
+    subgraph of depth 0) prepended, totalling ``iterations + 1`` elements.
+
+    The function iteratively aggregates and hashes neighborhoods of each node.
+    This is achieved for each step by replacing for each node its label from
+    the previous iteration with its hashed 1-hop neighborhood aggregate.
+    The new node label is then appended to a list of node labels for each
+    node.
+
+    To aggregate neighborhoods for a node $u$ at each step, all labels of
+    nodes adjacent to $u$ are concatenated. If the `edge_attr` parameter is set,
+    labels for each neighboring node are prefixed with the value of this attribute
+    along the connecting edge from this neighbor to node $u$. The resulting string
+    is then hashed to compress this information into a fixed digest size.
+
+    Thus, at the $i$-th iteration, nodes within $i$ hops influence any given
+    hashed node label. We can therefore say that at depth $i$ for node $u$
+    we have a hash for a subgraph induced by the $i$-hop neighborhood of $u$.
+
+    The output can be used to create general Weisfeiler-Lehman graph kernels,
+    or generate features for graphs or nodes - for example to generate 'words' in
+    a graph as seen in the 'graph2vec' algorithm.
+    See [1]_ & [2]_ respectively for details.
+
+    Hashes are identical for isomorphic subgraphs and there exist strong
+    guarantees that non-isomorphic graphs will get different hashes.
+    See [1]_ for details.
+
+    If no node or edge attributes are provided, the degree of each node
+    is used as its initial label.
+    Otherwise, node and/or edge labels are used to compute the hash.
+
+    Parameters
+    ----------
+    G : graph
+        The graph to be hashed.
+        Can have node and/or edge attributes. Can also have no attributes.
+    edge_attr : string, optional (default=None)
+        The key in edge attribute dictionary to be used for hashing.
+        If None, edge labels are ignored.
+    node_attr : string, optional (default=None)
+        The key in node attribute dictionary to be used for hashing.
+        If None, and no edge_attr given, use the degrees of the nodes as labels.
+        If None, and edge_attr is given, each node starts with an identical label.
+    iterations : int, optional (default=3)
+        Number of neighbor aggregations to perform.
+        Should be larger for larger graphs.
+    digest_size : int, optional (default=16)
+        Size (in bits) of blake2b hash digest to use for hashing node labels.
+        The default size is 16 bits.
+    include_initial_labels : bool, optional (default=False)
+        If True, include the hashed initial node label as the first subgraph
+        hash for each node.
+
+    Returns
+    -------
+    node_subgraph_hashes : dict
+        A dictionary with each key given by a node in G, and each value given
+        by the subgraph hashes in order of depth from the key node.
+
+    Examples
+    --------
+    Finding similar nodes in different graphs:
+
+    >>> G1 = nx.Graph()
+    >>> G1.add_edges_from([(1, 2), (2, 3), (2, 4), (3, 5), (4, 6), (5, 7), (6, 7)])
+    >>> G2 = nx.Graph()
+    >>> G2.add_edges_from([(1, 3), (2, 3), (1, 6), (1, 5), (4, 6)])
+    >>> g1_hashes = nx.weisfeiler_lehman_subgraph_hashes(
+    ...     G1, iterations=3, digest_size=8
+    ... )
+    >>> g2_hashes = nx.weisfeiler_lehman_subgraph_hashes(
+    ...     G2, iterations=3, digest_size=8
+    ... )
+
+    Even though G1 and G2 are not isomorphic (they have different numbers of edges),
+    the hash sequence of depth 3 for node 1 in G1 and node 5 in G2 are similar:
+
+    >>> g1_hashes[1]
+    ['a93b64973cfc8897', 'db1b43ae35a1878f', '57872a7d2059c1c0']
+    >>> g2_hashes[5]
+    ['a93b64973cfc8897', 'db1b43ae35a1878f', '1716d2a4012fa4bc']
+
+    The first 2 WL subgraph hashes match. From this we can conclude that it's very
+    likely the neighborhood of 2 hops around these nodes are isomorphic.
+
+    However the 3-hop neighborhoods of ``G1`` and ``G2`` are not isomorphic since the
+    3rd hashes in the lists above are not equal.
+
+    These nodes may be candidates to be classified together since their local topology
+    is similar.
+
+    Notes
+    -----
+    To hash the full graph when subgraph hashes are not needed, use
+    `weisfeiler_lehman_graph_hash` for efficiency.
+
+    Similarity between hashes does not imply similarity between graphs.
+
+    References
+    ----------
+    .. [1] Shervashidze, Nino, Pascal Schweitzer, Erik Jan Van Leeuwen,
+       Kurt Mehlhorn, and Karsten M. Borgwardt. Weisfeiler Lehman
+       Graph Kernels. Journal of Machine Learning Research. 2011.
+       http://www.jmlr.org/papers/volume12/shervashidze11a/shervashidze11a.pdf
+    .. [2] Annamalai Narayanan, Mahinthan Chandramohan, Rajasekar Venkatesan,
+       Lihui Chen, Yang Liu and Shantanu Jaiswa. graph2vec: Learning
+       Distributed Representations of Graphs. arXiv. 2017
+       https://arxiv.org/pdf/1707.05005.pdf
+
+    See also
+    --------
+    weisfeiler_lehman_graph_hash
+    """
+
+    def weisfeiler_lehman_step(G, labels, node_subgraph_hashes, edge_attr=None):
+        """
+        Apply neighborhood aggregation to each node
+        in the graph.
+        Computes a dictionary with labels for each node.
+        Appends the new hashed label to the dictionary of subgraph hashes
+        originating from and indexed by each node in G
+        """
+        new_labels = {}
+        for node in G.nodes():
+            label = _neighborhood_aggregate(G, node, labels, edge_attr=edge_attr)
+            hashed_label = _hash_label(label, digest_size)
+            new_labels[node] = hashed_label
+            node_subgraph_hashes[node].append(hashed_label)
+        return new_labels
+
+    node_labels = _init_node_labels(G, edge_attr, node_attr)
+    if include_initial_labels:
+        node_subgraph_hashes = {
+            k: [_hash_label(v, digest_size)] for k, v in node_labels.items()
+        }
+    else:
+        node_subgraph_hashes = defaultdict(list)
+
+    for _ in range(iterations):
+        node_labels = weisfeiler_lehman_step(
+            G, node_labels, node_subgraph_hashes, edge_attr
+        )
+
+    return dict(node_subgraph_hashes)