about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/PyPDF2/_security.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/PyPDF2/_security.py')
-rw-r--r--.venv/lib/python3.12/site-packages/PyPDF2/_security.py252
1 files changed, 252 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/PyPDF2/_security.py b/.venv/lib/python3.12/site-packages/PyPDF2/_security.py
new file mode 100644
index 00000000..47e5c373
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/PyPDF2/_security.py
@@ -0,0 +1,252 @@
+# Copyright (c) 2006, Mathieu Fenniak
+# Copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""Anything related to encryption / decryption."""
+
+import struct
+from hashlib import md5
+from typing import Tuple, Union
+
+from ._utils import b_, ord_, str_
+from .generic import ByteStringObject
+
+try:
+    from typing import Literal  # type: ignore[attr-defined]
+except ImportError:
+    # PEP 586 introduced typing.Literal with Python 3.8
+    # For older Python versions, the backport typing_extensions is necessary:
+    from typing_extensions import Literal  # type: ignore[misc]
+
+# ref: pdf1.8 spec section 3.5.2 algorithm 3.2
+_encryption_padding = (
+    b"\x28\xbf\x4e\x5e\x4e\x75\x8a\x41\x64\x00\x4e\x56"
+    b"\xff\xfa\x01\x08\x2e\x2e\x00\xb6\xd0\x68\x3e\x80\x2f\x0c"
+    b"\xa9\xfe\x64\x53\x69\x7a"
+)
+
+
+def _alg32(
+    password: str,
+    rev: Literal[2, 3, 4],
+    keylen: int,
+    owner_entry: ByteStringObject,
+    p_entry: int,
+    id1_entry: ByteStringObject,
+    metadata_encrypt: bool = True,
+) -> bytes:
+    """
+    Implementation of algorithm 3.2 of the PDF standard security handler.
+
+    See section 3.5.2 of the PDF 1.6 reference.
+    """
+    # 1. Pad or truncate the password string to exactly 32 bytes.  If the
+    # password string is more than 32 bytes long, use only its first 32 bytes;
+    # if it is less than 32 bytes long, pad it by appending the required number
+    # of additional bytes from the beginning of the padding string
+    # (_encryption_padding).
+    password_bytes = b_((str_(password) + str_(_encryption_padding))[:32])
+    # 2. Initialize the MD5 hash function and pass the result of step 1 as
+    # input to this function.
+    m = md5(password_bytes)
+    # 3. Pass the value of the encryption dictionary's /O entry to the MD5 hash
+    # function.
+    m.update(owner_entry.original_bytes)
+    # 4. Treat the value of the /P entry as an unsigned 4-byte integer and pass
+    # these bytes to the MD5 hash function, low-order byte first.
+    p_entry_bytes = struct.pack("<i", p_entry)
+    m.update(p_entry_bytes)
+    # 5. Pass the first element of the file's file identifier array to the MD5
+    # hash function.
+    m.update(id1_entry.original_bytes)
+    # 6. (Revision 3 or greater) If document metadata is not being encrypted,
+    # pass 4 bytes with the value 0xFFFFFFFF to the MD5 hash function.
+    if rev >= 3 and not metadata_encrypt:
+        m.update(b"\xff\xff\xff\xff")
+    # 7. Finish the hash.
+    md5_hash = m.digest()
+    # 8. (Revision 3 or greater) Do the following 50 times: Take the output
+    # from the previous MD5 hash and pass the first n bytes of the output as
+    # input into a new MD5 hash, where n is the number of bytes of the
+    # encryption key as defined by the value of the encryption dictionary's
+    # /Length entry.
+    if rev >= 3:
+        for _ in range(50):
+            md5_hash = md5(md5_hash[:keylen]).digest()
+    # 9. Set the encryption key to the first n bytes of the output from the
+    # final MD5 hash, where n is always 5 for revision 2 but, for revision 3 or
+    # greater, depends on the value of the encryption dictionary's /Length
+    # entry.
+    return md5_hash[:keylen]
+
+
+def _alg33(
+    owner_password: str, user_password: str, rev: Literal[2, 3, 4], keylen: int
+) -> bytes:
+    """
+    Implementation of algorithm 3.3 of the PDF standard security handler,
+    section 3.5.2 of the PDF 1.6 reference.
+    """
+    # steps 1 - 4
+    key = _alg33_1(owner_password, rev, keylen)
+    # 5. Pad or truncate the user password string as described in step 1 of
+    # algorithm 3.2.
+    user_password_bytes = b_((user_password + str_(_encryption_padding))[:32])
+    # 6. Encrypt the result of step 5, using an RC4 encryption function with
+    # the encryption key obtained in step 4.
+    val = RC4_encrypt(key, user_password_bytes)
+    # 7. (Revision 3 or greater) Do the following 19 times: Take the output
+    # from the previous invocation of the RC4 function and pass it as input to
+    # a new invocation of the function; use an encryption key generated by
+    # taking each byte of the encryption key obtained in step 4 and performing
+    # an XOR operation between that byte and the single-byte value of the
+    # iteration counter (from 1 to 19).
+    if rev >= 3:
+        for i in range(1, 20):
+            new_key = ""
+            for key_char in key:
+                new_key += chr(ord_(key_char) ^ i)
+            val = RC4_encrypt(new_key, val)
+    # 8. Store the output from the final invocation of the RC4 as the value of
+    # the /O entry in the encryption dictionary.
+    return val
+
+
+def _alg33_1(password: str, rev: Literal[2, 3, 4], keylen: int) -> bytes:
+    """Steps 1-4 of algorithm 3.3"""
+    # 1. Pad or truncate the owner password string as described in step 1 of
+    # algorithm 3.2.  If there is no owner password, use the user password
+    # instead.
+    password_bytes = b_((password + str_(_encryption_padding))[:32])
+    # 2. Initialize the MD5 hash function and pass the result of step 1 as
+    # input to this function.
+    m = md5(password_bytes)
+    # 3. (Revision 3 or greater) Do the following 50 times: Take the output
+    # from the previous MD5 hash and pass it as input into a new MD5 hash.
+    md5_hash = m.digest()
+    if rev >= 3:
+        for _ in range(50):
+            md5_hash = md5(md5_hash).digest()
+    # 4. Create an RC4 encryption key using the first n bytes of the output
+    # from the final MD5 hash, where n is always 5 for revision 2 but, for
+    # revision 3 or greater, depends on the value of the encryption
+    # dictionary's /Length entry.
+    key = md5_hash[:keylen]
+    return key
+
+
+def _alg34(
+    password: str,
+    owner_entry: ByteStringObject,
+    p_entry: int,
+    id1_entry: ByteStringObject,
+) -> Tuple[bytes, bytes]:
+    """
+    Implementation of algorithm 3.4 of the PDF standard security handler.
+
+    See section 3.5.2 of the PDF 1.6 reference.
+    """
+    # 1. Create an encryption key based on the user password string, as
+    # described in algorithm 3.2.
+    rev: Literal[2] = 2
+    keylen = 5
+    key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry)
+    # 2. Encrypt the 32-byte padding string shown in step 1 of algorithm 3.2,
+    # using an RC4 encryption function with the encryption key from the
+    # preceding step.
+    U = RC4_encrypt(key, _encryption_padding)
+    # 3. Store the result of step 2 as the value of the /U entry in the
+    # encryption dictionary.
+    return U, key
+
+
+def _alg35(
+    password: str,
+    rev: Literal[2, 3, 4],
+    keylen: int,
+    owner_entry: ByteStringObject,
+    p_entry: int,
+    id1_entry: ByteStringObject,
+    metadata_encrypt: bool,
+) -> Tuple[bytes, bytes]:
+    """
+    Implementation of algorithm 3.4 of the PDF standard security handler.
+
+    See section 3.5.2 of the PDF 1.6 reference.
+    """
+    # 1. Create an encryption key based on the user password string, as
+    # described in Algorithm 3.2.
+    key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry)
+    # 2. Initialize the MD5 hash function and pass the 32-byte padding string
+    # shown in step 1 of Algorithm 3.2 as input to this function.
+    m = md5()
+    m.update(_encryption_padding)
+    # 3. Pass the first element of the file's file identifier array (the value
+    # of the ID entry in the document's trailer dictionary; see Table 3.13 on
+    # page 73) to the hash function and finish the hash.  (See implementation
+    # note 25 in Appendix H.)
+    m.update(id1_entry.original_bytes)
+    md5_hash = m.digest()
+    # 4. Encrypt the 16-byte result of the hash, using an RC4 encryption
+    # function with the encryption key from step 1.
+    val = RC4_encrypt(key, md5_hash)
+    # 5. Do the following 19 times: Take the output from the previous
+    # invocation of the RC4 function and pass it as input to a new invocation
+    # of the function; use an encryption key generated by taking each byte of
+    # the original encryption key (obtained in step 2) and performing an XOR
+    # operation between that byte and the single-byte value of the iteration
+    # counter (from 1 to 19).
+    for i in range(1, 20):
+        new_key = b""
+        for k in key:
+            new_key += b_(chr(ord_(k) ^ i))
+        val = RC4_encrypt(new_key, val)
+    # 6. Append 16 bytes of arbitrary padding to the output from the final
+    # invocation of the RC4 function and store the 32-byte result as the value
+    # of the U entry in the encryption dictionary.
+    # (implementer note: I don't know what "arbitrary padding" is supposed to
+    # mean, so I have used null bytes.  This seems to match a few other
+    # people's implementations)
+    return val + (b"\x00" * 16), key
+
+
+def RC4_encrypt(key: Union[str, bytes], plaintext: bytes) -> bytes:  # TODO
+    S = list(range(256))
+    j = 0
+    for i in range(256):
+        j = (j + S[i] + ord_(key[i % len(key)])) % 256
+        S[i], S[j] = S[j], S[i]
+    i, j = 0, 0
+    retval = []
+    for plaintext_char in plaintext:
+        i = (i + 1) % 256
+        j = (j + S[i]) % 256
+        S[i], S[j] = S[j], S[i]
+        t = S[(S[i] + S[j]) % 256]
+        retval.append(b_(chr(ord_(plaintext_char) ^ t)))
+    return b"".join(retval)