aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/pypdf/_encryption.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/pypdf/_encryption.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-4a52a71956a8d46fcb7294ac71734504bb09bcc2.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/pypdf/_encryption.py')
-rw-r--r--.venv/lib/python3.12/site-packages/pypdf/_encryption.py1168
1 files changed, 1168 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/pypdf/_encryption.py b/.venv/lib/python3.12/site-packages/pypdf/_encryption.py
new file mode 100644
index 00000000..5ddd8d0e
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/pypdf/_encryption.py
@@ -0,0 +1,1168 @@
+# Copyright (c) 2022, exiledkingcc
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+import hashlib
+import secrets
+import struct
+from enum import Enum, IntEnum
+from typing import Any, Dict, Optional, Tuple, Union, cast
+
+from pypdf._crypt_providers import (
+ CryptAES,
+ CryptBase,
+ CryptIdentity,
+ CryptRC4,
+ aes_cbc_decrypt,
+ aes_cbc_encrypt,
+ aes_ecb_decrypt,
+ aes_ecb_encrypt,
+ rc4_decrypt,
+ rc4_encrypt,
+)
+
+from ._utils import b_, logger_warning
+from .generic import (
+ ArrayObject,
+ ByteStringObject,
+ DictionaryObject,
+ NameObject,
+ NumberObject,
+ PdfObject,
+ StreamObject,
+ TextStringObject,
+ create_string_object,
+)
+
+
+class CryptFilter:
+ def __init__(
+ self,
+ stm_crypt: CryptBase,
+ str_crypt: CryptBase,
+ ef_crypt: CryptBase,
+ ) -> None:
+ self.stm_crypt = stm_crypt
+ self.str_crypt = str_crypt
+ self.ef_crypt = ef_crypt
+
+ def encrypt_object(self, obj: PdfObject) -> PdfObject:
+ if isinstance(obj, ByteStringObject):
+ data = self.str_crypt.encrypt(obj.original_bytes)
+ obj = ByteStringObject(data)
+ if isinstance(obj, TextStringObject):
+ data = self.str_crypt.encrypt(obj.get_encoded_bytes())
+ obj = ByteStringObject(data)
+ elif isinstance(obj, StreamObject):
+ obj2 = StreamObject()
+ obj2.update(obj)
+ obj2.set_data(self.stm_crypt.encrypt(b_(obj._data)))
+ for key, value in obj.items(): # Dont forget the Stream dict.
+ obj2[key] = self.encrypt_object(value)
+ obj = obj2
+ elif isinstance(obj, DictionaryObject):
+ obj2 = DictionaryObject() # type: ignore
+ for key, value in obj.items():
+ obj2[key] = self.encrypt_object(value)
+ obj = obj2
+ elif isinstance(obj, ArrayObject):
+ obj = ArrayObject(self.encrypt_object(x) for x in obj)
+ return obj
+
+ def decrypt_object(self, obj: PdfObject) -> PdfObject:
+ if isinstance(obj, (ByteStringObject, TextStringObject)):
+ data = self.str_crypt.decrypt(obj.original_bytes)
+ obj = create_string_object(data)
+ elif isinstance(obj, StreamObject):
+ obj._data = self.stm_crypt.decrypt(b_(obj._data))
+ for key, value in obj.items(): # Dont forget the Stream dict.
+ obj[key] = self.decrypt_object(value)
+ elif isinstance(obj, DictionaryObject):
+ for key, value in obj.items():
+ obj[key] = self.decrypt_object(value)
+ elif isinstance(obj, ArrayObject):
+ for i in range(len(obj)):
+ obj[i] = self.decrypt_object(obj[i])
+ return obj
+
+
+_PADDING = (
+ b"\x28\xbf\x4e\x5e\x4e\x75\x8a\x41\x64\x00\x4e\x56\xff\xfa\x01\x08"
+ b"\x2e\x2e\x00\xb6\xd0\x68\x3e\x80\x2f\x0c\xa9\xfe\x64\x53\x69\x7a"
+)
+
+
+def _padding(data: bytes) -> bytes:
+ return (data + _PADDING)[:32]
+
+
+class AlgV4:
+ @staticmethod
+ def compute_key(
+ password: bytes,
+ rev: int,
+ key_size: int,
+ o_entry: bytes,
+ P: int,
+ id1_entry: bytes,
+ metadata_encrypted: bool,
+ ) -> bytes:
+ """
+ Algorithm 2: Computing an encryption key.
+
+ a) Pad or truncate the password string to exactly 32 bytes. If the
+ password string is more than 32 bytes long,
+ use only its first 32 bytes; if it is less than 32 bytes long, pad it
+ by appending the required number of
+ additional bytes from the beginning of the following padding string:
+ < 28 BF 4E 5E 4E 75 8A 41 64 00 4E 56 FF FA 01 08
+ 2E 2E 00 B6 D0 68 3E 80 2F 0C A9 FE 64 53 69 7A >
+ That is, if the password string is n bytes long, append
+ the first 32 - n bytes of the padding string to the end
+ of the password string. If the password string is empty
+ (zero-length), meaning there is no user password,
+ substitute the entire padding string in its place.
+
+ b) Initialize the MD5 hash function and pass the result of step (a)
+ as input to this function.
+ c) Pass the value of the encryption dictionary’s O entry to the
+ MD5 hash function. ("Algorithm 3: Computing
+ the encryption dictionary’s O (owner password) value" shows how the
+ O value is computed.)
+ d) Convert the integer value of the P entry to a 32-bit unsigned binary
+ number and pass these bytes to the
+ MD5 hash function, low-order byte first.
+ e) Pass the first element of the file’s file identifier array (the value
+ of the ID entry in the document’s trailer
+ dictionary; see Table 15) to the MD5 hash function.
+ f) (Security handlers of revision 4 or greater) If document metadata is
+ not being encrypted, pass 4 bytes with
+ the value 0xFFFFFFFF to the MD5 hash function.
+ g) Finish the hash.
+ h) (Security handlers of revision 3 or greater) Do the following
+ 50 times: Take the output from the previous
+ MD5 hash and pass the first n bytes of the output as input into a new
+ MD5 hash, where n is the number of
+ bytes of the encryption key as defined by the value of the encryption
+ dictionary’s Length entry.
+ i) Set the encryption key to the first n bytes of the output from the
+ final MD5 hash, where n shall always be 5
+ for security handlers of revision 2 but, for security handlers of
+ revision 3 or greater, shall depend on the
+ value of the encryption dictionary’s Length entry.
+
+ Args:
+ password: The encryption secret as a bytes-string
+ rev: The encryption revision (see PDF standard)
+ key_size: The size of the key in bytes
+ o_entry: The owner entry
+ P: A set of flags specifying which operations shall be permitted
+ when the document is opened with user access. If bit 2 is set to 1,
+ all other bits are ignored and all operations are permitted.
+ If bit 2 is set to 0, permission for operations are based on the
+ values of the remaining flags defined in Table 24.
+ id1_entry:
+ metadata_encrypted: A boolean indicating if the metadata is encrypted.
+
+ Returns:
+ The u_hash digest of length key_size
+ """
+ a = _padding(password)
+ u_hash = hashlib.md5(a)
+ u_hash.update(o_entry)
+ u_hash.update(struct.pack("<I", P))
+ u_hash.update(id1_entry)
+ if rev >= 4 and not metadata_encrypted:
+ u_hash.update(b"\xff\xff\xff\xff")
+ u_hash_digest = u_hash.digest()
+ length = key_size // 8
+ if rev >= 3:
+ for _ in range(50):
+ u_hash_digest = hashlib.md5(u_hash_digest[:length]).digest()
+ return u_hash_digest[:length]
+
+ @staticmethod
+ def compute_O_value_key(owner_password: bytes, rev: int, key_size: int) -> bytes:
+ """
+ Algorithm 3: Computing the encryption dictionary’s O (owner password) value.
+
+ a) Pad or truncate the owner password string as described in step (a)
+ of "Algorithm 2: Computing an encryption key".
+ If there is no owner password, use the user password instead.
+ b) Initialize the MD5 hash function and pass the result of step (a) as
+ input to this function.
+ c) (Security handlers of revision 3 or greater) Do the following 50 times:
+ Take the output from the previous
+ MD5 hash and pass it as input into a new MD5 hash.
+ d) Create an RC4 encryption key using the first n bytes of the output
+ from the final MD5 hash, where n shall
+ always be 5 for security handlers of revision 2 but, for security
+ handlers of revision 3 or greater, shall
+ depend on the value of the encryption dictionary’s Length entry.
+ e) Pad or truncate the user password string as described in step (a) of
+ "Algorithm 2: Computing an encryption key".
+ f) Encrypt the result of step (e), using an RC4 encryption function with
+ the encryption key obtained in step (d).
+ g) (Security handlers of revision 3 or greater) Do the following 19 times:
+ Take the output from the previous
+ invocation of the RC4 function and pass it as input to a new
+ invocation of the function; use an encryption
+ key generated by taking each byte of the encryption key obtained in
+ step (d) and performing an XOR
+ (exclusive or) operation between that byte and the single-byte value
+ of the iteration counter (from 1 to 19).
+ h) Store the output from the final invocation of the RC4 function as
+ the value of the O entry in the encryption dictionary.
+
+ Args:
+ owner_password:
+ rev: The encryption revision (see PDF standard)
+ key_size: The size of the key in bytes
+
+ Returns:
+ The RC4 key
+ """
+ a = _padding(owner_password)
+ o_hash_digest = hashlib.md5(a).digest()
+
+ if rev >= 3:
+ for _ in range(50):
+ o_hash_digest = hashlib.md5(o_hash_digest).digest()
+
+ rc4_key = o_hash_digest[: key_size // 8]
+ return rc4_key
+
+ @staticmethod
+ def compute_O_value(rc4_key: bytes, user_password: bytes, rev: int) -> bytes:
+ """
+ See :func:`compute_O_value_key`.
+
+ Args:
+ rc4_key:
+ user_password:
+ rev: The encryption revision (see PDF standard)
+
+ Returns:
+ The RC4 encrypted
+ """
+ a = _padding(user_password)
+ rc4_enc = rc4_encrypt(rc4_key, a)
+ if rev >= 3:
+ for i in range(1, 20):
+ key = bytes(bytearray(x ^ i for x in rc4_key))
+ rc4_enc = rc4_encrypt(key, rc4_enc)
+ return rc4_enc
+
+ @staticmethod
+ def compute_U_value(key: bytes, rev: int, id1_entry: bytes) -> bytes:
+ """
+ Algorithm 4: Computing the encryption dictionary’s U (user password) value.
+
+ (Security handlers of revision 2)
+
+ a) Create an encryption key based on the user password string, as
+ described in "Algorithm 2: Computing an encryption key".
+ b) Encrypt the 32-byte padding string shown in step (a) of
+ "Algorithm 2: Computing an encryption key", using an RC4 encryption
+ function with the encryption key from the preceding step.
+ c) Store the result of step (b) as the value of the U entry in the
+ encryption dictionary.
+
+ Args:
+ key:
+ rev: The encryption revision (see PDF standard)
+ id1_entry:
+
+ Returns:
+ The value
+ """
+ if rev <= 2:
+ value = rc4_encrypt(key, _PADDING)
+ return value
+
+ """
+ Algorithm 5: Computing the encryption dictionary’s U (user password) value.
+
+ (Security handlers of revision 3 or greater)
+
+ a) Create an encryption key based on the user password string, as
+ described in "Algorithm 2: Computing an encryption key".
+ b) Initialize the MD5 hash function and pass the 32-byte padding string
+ shown in step (a) of "Algorithm 2:
+ Computing an encryption key" as input to this function.
+ c) Pass the first element of the file’s file identifier array (the value
+ of the ID entry in the document’s trailer
+ dictionary; see Table 15) to the hash function and finish the hash.
+ d) Encrypt the 16-byte result of the hash, using an RC4 encryption
+ function with the encryption key from step (a).
+ e) Do the following 19 times: Take the output from the previous
+ invocation of the RC4 function and pass it as input to a new
+ invocation of the function; use an encryption key generated by
+ taking each byte of the original encryption key obtained in
+ step (a) and performing an XOR (exclusive or) operation between that
+ byte and the single-byte value of the iteration counter (from 1 to 19).
+ f) Append 16 bytes of arbitrary padding to the output from the final
+ invocation of the RC4 function and store the 32-byte result as the
+ value of the U entry in the encryption dictionary.
+ """
+ u_hash = hashlib.md5(_PADDING)
+ u_hash.update(id1_entry)
+ rc4_enc = rc4_encrypt(key, u_hash.digest())
+ for i in range(1, 20):
+ rc4_key = bytes(bytearray(x ^ i for x in key))
+ rc4_enc = rc4_encrypt(rc4_key, rc4_enc)
+ return _padding(rc4_enc)
+
+ @staticmethod
+ def verify_user_password(
+ user_password: bytes,
+ rev: int,
+ key_size: int,
+ o_entry: bytes,
+ u_entry: bytes,
+ P: int,
+ id1_entry: bytes,
+ metadata_encrypted: bool,
+ ) -> bytes:
+ """
+ Algorithm 6: Authenticating the user password.
+
+ a) Perform all but the last step of "Algorithm 4: Computing the
+ encryption dictionary’s U (user password) value (Security handlers of
+ revision 2)" or "Algorithm 5: Computing the encryption dictionary’s U
+ (user password) value (Security handlers of revision 3 or greater)"
+ using the supplied password string.
+ b) If the result of step (a) is equal to the value of the encryption
+ dictionary’s U entry (comparing on the first 16 bytes in the case of
+ security handlers of revision 3 or greater), the password supplied is
+ the correct user password. The key obtained in step (a) (that is, in
+ the first step of "Algorithm 4: Computing the encryption
+ dictionary’s U (user password) value
+ (Security handlers of revision 2)" or
+ "Algorithm 5: Computing the encryption dictionary’s U (user password)
+ value (Security handlers of revision 3 or greater)") shall be used
+ to decrypt the document.
+
+ Args:
+ user_password: The user password as a bytes stream
+ rev: The encryption revision (see PDF standard)
+ key_size: The size of the key in bytes
+ o_entry: The owner entry
+ u_entry: The user entry
+ P: A set of flags specifying which operations shall be permitted
+ when the document is opened with user access. If bit 2 is set to 1,
+ all other bits are ignored and all operations are permitted.
+ If bit 2 is set to 0, permission for operations are based on the
+ values of the remaining flags defined in Table 24.
+ id1_entry:
+ metadata_encrypted: A boolean indicating if the metadata is encrypted.
+
+ Returns:
+ The key
+ """
+ key = AlgV4.compute_key(
+ user_password, rev, key_size, o_entry, P, id1_entry, metadata_encrypted
+ )
+ u_value = AlgV4.compute_U_value(key, rev, id1_entry)
+ if rev >= 3:
+ u_value = u_value[:16]
+ u_entry = u_entry[:16]
+ if u_value != u_entry:
+ key = b""
+ return key
+
+ @staticmethod
+ def verify_owner_password(
+ owner_password: bytes,
+ rev: int,
+ key_size: int,
+ o_entry: bytes,
+ u_entry: bytes,
+ P: int,
+ id1_entry: bytes,
+ metadata_encrypted: bool,
+ ) -> bytes:
+ """
+ Algorithm 7: Authenticating the owner password.
+
+ a) Compute an encryption key from the supplied password string, as
+ described in steps (a) to (d) of
+ "Algorithm 3: Computing the encryption dictionary’s O (owner password)
+ value".
+ b) (Security handlers of revision 2 only) Decrypt the value of the
+ encryption dictionary’s O entry, using an RC4
+ encryption function with the encryption key computed in step (a).
+ (Security handlers of revision 3 or greater) Do the following 20 times:
+ Decrypt the value of the encryption dictionary’s O entry (first iteration)
+ or the output from the previous iteration (all subsequent iterations),
+ using an RC4 encryption function with a different encryption key at
+ each iteration. The key shall be generated by taking the original key
+ (obtained in step (a)) and performing an XOR (exclusive or) operation
+ between each byte of the key and the single-byte value of the
+ iteration counter (from 19 to 0).
+ c) The result of step (b) purports to be the user password.
+ Authenticate this user password using
+ "Algorithm 6: Authenticating the user password".
+ If it is correct, the password supplied is the correct owner password.
+
+ Args:
+ owner_password:
+ rev: The encryption revision (see PDF standard)
+ key_size: The size of the key in bytes
+ o_entry: The owner entry
+ u_entry: The user entry
+ P: A set of flags specifying which operations shall be permitted
+ when the document is opened with user access. If bit 2 is set to 1,
+ all other bits are ignored and all operations are permitted.
+ If bit 2 is set to 0, permission for operations are based on the
+ values of the remaining flags defined in Table 24.
+ id1_entry:
+ metadata_encrypted: A boolean indicating if the metadata is encrypted.
+
+ Returns:
+ bytes
+ """
+ rc4_key = AlgV4.compute_O_value_key(owner_password, rev, key_size)
+
+ if rev <= 2:
+ user_password = rc4_decrypt(rc4_key, o_entry)
+ else:
+ user_password = o_entry
+ for i in range(19, -1, -1):
+ key = bytes(bytearray(x ^ i for x in rc4_key))
+ user_password = rc4_decrypt(key, user_password)
+ return AlgV4.verify_user_password(
+ user_password,
+ rev,
+ key_size,
+ o_entry,
+ u_entry,
+ P,
+ id1_entry,
+ metadata_encrypted,
+ )
+
+
+class AlgV5:
+ @staticmethod
+ def verify_owner_password(
+ R: int, password: bytes, o_value: bytes, oe_value: bytes, u_value: bytes
+ ) -> bytes:
+ """
+ Algorithm 3.2a Computing an encryption key.
+
+ To understand the algorithm below, it is necessary to treat the O and U
+ strings in the Encrypt dictionary as made up of three sections.
+ The first 32 bytes are a hash value (explained below). The next 8 bytes
+ are called the Validation Salt. The final 8 bytes are called the Key Salt.
+
+ 1. The password string is generated from Unicode input by processing the
+ input string with the SASLprep (IETF RFC 4013) profile of
+ stringprep (IETF RFC 3454), and then converting to a UTF-8
+ representation.
+ 2. Truncate the UTF-8 representation to 127 bytes if it is longer than
+ 127 bytes.
+ 3. Test the password against the owner key by computing the SHA-256 hash
+ of the UTF-8 password concatenated with the 8 bytes of owner
+ Validation Salt, concatenated with the 48-byte U string. If the
+ 32-byte result matches the first 32 bytes of the O string, this is
+ the owner password.
+ Compute an intermediate owner key by computing the SHA-256 hash of
+ the UTF-8 password concatenated with the 8 bytes of owner Key Salt,
+ concatenated with the 48-byte U string. The 32-byte result is the
+ key used to decrypt the 32-byte OE string using AES-256 in CBC mode
+ with no padding and an initialization vector of zero.
+ The 32-byte result is the file encryption key.
+ 4. Test the password against the user key by computing the SHA-256 hash
+ of the UTF-8 password concatenated with the 8 bytes of user
+ Validation Salt. If the 32 byte result matches the first 32 bytes of
+ the U string, this is the user password.
+ Compute an intermediate user key by computing the SHA-256 hash of the
+ UTF-8 password concatenated with the 8 bytes of user Key Salt.
+ The 32-byte result is the key used to decrypt the 32-byte
+ UE string using AES-256 in CBC mode with no padding and an
+ initialization vector of zero. The 32-byte result is the file
+ encryption key.
+ 5. Decrypt the 16-byte Perms string using AES-256 in ECB mode with an
+ initialization vector of zero and the file encryption key as the key.
+ Verify that bytes 9-11 of the result are the characters ‘a’, ‘d’, ‘b’.
+ Bytes 0-3 of the decrypted Perms entry, treated as a little-endian
+ integer, are the user permissions.
+ They should match the value in the P key.
+
+ Args:
+ R: A number specifying which revision of the standard security
+ handler shall be used to interpret this dictionary
+ password: The owner password
+ o_value: A 32-byte string, based on both the owner and user passwords,
+ that shall be used in computing the encryption key and in
+ determining whether a valid owner password was entered
+ oe_value:
+ u_value: A 32-byte string, based on the user password, that shall be
+ used in determining whether to prompt the user for a password and,
+ if so, whether a valid user or owner password was entered.
+
+ Returns:
+ The key
+ """
+ password = password[:127]
+ if (
+ AlgV5.calculate_hash(R, password, o_value[32:40], u_value[:48])
+ != o_value[:32]
+ ):
+ return b""
+ iv = bytes(0 for _ in range(16))
+ tmp_key = AlgV5.calculate_hash(R, password, o_value[40:48], u_value[:48])
+ key = aes_cbc_decrypt(tmp_key, iv, oe_value)
+ return key
+
+ @staticmethod
+ def verify_user_password(
+ R: int, password: bytes, u_value: bytes, ue_value: bytes
+ ) -> bytes:
+ """
+ See :func:`verify_owner_password`.
+
+ Args:
+ R: A number specifying which revision of the standard security
+ handler shall be used to interpret this dictionary
+ password: The user password
+ u_value: A 32-byte string, based on the user password, that shall be
+ used in determining whether to prompt the user for a password
+ and, if so, whether a valid user or owner password was entered.
+ ue_value:
+
+ Returns:
+ bytes
+ """
+ password = password[:127]
+ if AlgV5.calculate_hash(R, password, u_value[32:40], b"") != u_value[:32]:
+ return b""
+ iv = bytes(0 for _ in range(16))
+ tmp_key = AlgV5.calculate_hash(R, password, u_value[40:48], b"")
+ return aes_cbc_decrypt(tmp_key, iv, ue_value)
+
+ @staticmethod
+ def calculate_hash(R: int, password: bytes, salt: bytes, udata: bytes) -> bytes:
+ # from https://github.com/qpdf/qpdf/blob/main/libqpdf/QPDF_encryption.cc
+ k = hashlib.sha256(password + salt + udata).digest()
+ if R < 6:
+ return k
+ count = 0
+ while True:
+ count += 1
+ k1 = password + k + udata
+ e = aes_cbc_encrypt(k[:16], k[16:32], k1 * 64)
+ hash_fn = (
+ hashlib.sha256,
+ hashlib.sha384,
+ hashlib.sha512,
+ )[sum(e[:16]) % 3]
+ k = hash_fn(e).digest()
+ if count >= 64 and e[-1] <= count - 32:
+ break
+ return k[:32]
+
+ @staticmethod
+ def verify_perms(
+ key: bytes, perms: bytes, p: int, metadata_encrypted: bool
+ ) -> bool:
+ """
+ See :func:`verify_owner_password` and :func:`compute_perms_value`.
+
+ Args:
+ key: The owner password
+ perms:
+ p: A set of flags specifying which operations shall be permitted
+ when the document is opened with user access.
+ If bit 2 is set to 1, all other bits are ignored and all
+ operations are permitted.
+ If bit 2 is set to 0, permission for operations are based on
+ the values of the remaining flags defined in Table 24.
+ metadata_encrypted:
+
+ Returns:
+ A boolean
+ """
+ b8 = b"T" if metadata_encrypted else b"F"
+ p1 = struct.pack("<I", p) + b"\xff\xff\xff\xff" + b8 + b"adb"
+ p2 = aes_ecb_decrypt(key, perms)
+ return p1 == p2[:12]
+
+ @staticmethod
+ def generate_values(
+ R: int,
+ user_password: bytes,
+ owner_password: bytes,
+ key: bytes,
+ p: int,
+ metadata_encrypted: bool,
+ ) -> Dict[Any, Any]:
+ user_password = user_password[:127]
+ owner_password = owner_password[:127]
+ u_value, ue_value = AlgV5.compute_U_value(R, user_password, key)
+ o_value, oe_value = AlgV5.compute_O_value(R, owner_password, key, u_value)
+ perms = AlgV5.compute_Perms_value(key, p, metadata_encrypted)
+ return {
+ "/U": u_value,
+ "/UE": ue_value,
+ "/O": o_value,
+ "/OE": oe_value,
+ "/Perms": perms,
+ }
+
+ @staticmethod
+ def compute_U_value(R: int, password: bytes, key: bytes) -> Tuple[bytes, bytes]:
+ """
+ Algorithm 3.8 Computing the encryption dictionary’s U (user password)
+ and UE (user encryption key) values.
+
+ 1. Generate 16 random bytes of data using a strong random number generator.
+ The first 8 bytes are the User Validation Salt. The second 8 bytes
+ are the User Key Salt. Compute the 32-byte SHA-256 hash of the
+ password concatenated with the User Validation Salt. The 48-byte
+ string consisting of the 32-byte hash followed by the User
+ Validation Salt followed by the User Key Salt is stored as the U key.
+ 2. Compute the 32-byte SHA-256 hash of the password concatenated with
+ the User Key Salt. Using this hash as the key, encrypt the file
+ encryption key using AES-256 in CBC mode with no padding and an
+ initialization vector of zero. The resulting 32-byte string is stored
+ as the UE key.
+
+ Args:
+ R:
+ password:
+ key:
+
+ Returns:
+ A tuple (u-value, ue value)
+ """
+ random_bytes = secrets.token_bytes(16)
+ val_salt = random_bytes[:8]
+ key_salt = random_bytes[8:]
+ u_value = AlgV5.calculate_hash(R, password, val_salt, b"") + val_salt + key_salt
+
+ tmp_key = AlgV5.calculate_hash(R, password, key_salt, b"")
+ iv = bytes(0 for _ in range(16))
+ ue_value = aes_cbc_encrypt(tmp_key, iv, key)
+ return u_value, ue_value
+
+ @staticmethod
+ def compute_O_value(
+ R: int, password: bytes, key: bytes, u_value: bytes
+ ) -> Tuple[bytes, bytes]:
+ """
+ Algorithm 3.9 Computing the encryption dictionary’s O (owner password)
+ and OE (owner encryption key) values.
+
+ 1. Generate 16 random bytes of data using a strong random number
+ generator. The first 8 bytes are the Owner Validation Salt. The
+ second 8 bytes are the Owner Key Salt. Compute the 32-byte SHA-256
+ hash of the password concatenated with the Owner Validation Salt and
+ then concatenated with the 48-byte U string as generated in
+ Algorithm 3.8. The 48-byte string consisting of the 32-byte hash
+ followed by the Owner Validation Salt followed by the Owner Key Salt
+ is stored as the O key.
+ 2. Compute the 32-byte SHA-256 hash of the password concatenated with
+ the Owner Key Salt and then concatenated with the 48-byte U string as
+ generated in Algorithm 3.8. Using this hash as the key,
+ encrypt the file encryption key using AES-256 in CBC mode with
+ no padding and an initialization vector of zero.
+ The resulting 32-byte string is stored as the OE key.
+
+ Args:
+ R:
+ password:
+ key:
+ u_value: A 32-byte string, based on the user password, that shall be
+ used in determining whether to prompt the user for a password
+ and, if so, whether a valid user or owner password was entered.
+
+ Returns:
+ A tuple (O value, OE value)
+ """
+ random_bytes = secrets.token_bytes(16)
+ val_salt = random_bytes[:8]
+ key_salt = random_bytes[8:]
+ o_value = (
+ AlgV5.calculate_hash(R, password, val_salt, u_value) + val_salt + key_salt
+ )
+ tmp_key = AlgV5.calculate_hash(R, password, key_salt, u_value[:48])
+ iv = bytes(0 for _ in range(16))
+ oe_value = aes_cbc_encrypt(tmp_key, iv, key)
+ return o_value, oe_value
+
+ @staticmethod
+ def compute_Perms_value(key: bytes, p: int, metadata_encrypted: bool) -> bytes:
+ """
+ Algorithm 3.10 Computing the encryption dictionary’s Perms
+ (permissions) value.
+
+ 1. Extend the permissions (contents of the P integer) to 64 bits by
+ setting the upper 32 bits to all 1’s.
+ (This allows for future extension without changing the format.)
+ 2. Record the 8 bytes of permission in the bytes 0-7 of the block,
+ low order byte first.
+ 3. Set byte 8 to the ASCII value ' T ' or ' F ' according to the
+ EncryptMetadata Boolean.
+ 4. Set bytes 9-11 to the ASCII characters ' a ', ' d ', ' b '.
+ 5. Set bytes 12-15 to 4 bytes of random data, which will be ignored.
+ 6. Encrypt the 16-byte block using AES-256 in ECB mode with an
+ initialization vector of zero, using the file encryption key as the
+ key. The result (16 bytes) is stored as the Perms string, and checked
+ for validity when the file is opened.
+
+ Args:
+ key:
+ p: A set of flags specifying which operations shall be permitted
+ when the document is opened with user access. If bit 2 is set to 1,
+ all other bits are ignored and all operations are permitted.
+ If bit 2 is set to 0, permission for operations are based on the
+ values of the remaining flags defined in Table 24.
+ metadata_encrypted: A boolean indicating if the metadata is encrypted.
+
+ Returns:
+ The perms value
+ """
+ b8 = b"T" if metadata_encrypted else b"F"
+ rr = secrets.token_bytes(4)
+ data = struct.pack("<I", p) + b"\xff\xff\xff\xff" + b8 + b"adb" + rr
+ perms = aes_ecb_encrypt(key, data)
+ return perms
+
+
+class PasswordType(IntEnum):
+ NOT_DECRYPTED = 0
+ USER_PASSWORD = 1
+ OWNER_PASSWORD = 2
+
+
+class EncryptAlgorithm(tuple, Enum): # type: ignore # noqa: SLOT001
+ # V, R, Length
+ RC4_40 = (1, 2, 40)
+ RC4_128 = (2, 3, 128)
+ AES_128 = (4, 4, 128)
+ AES_256_R5 = (5, 5, 256)
+ AES_256 = (5, 6, 256)
+
+
+class EncryptionValues:
+ O: bytes # noqa
+ U: bytes
+ OE: bytes
+ UE: bytes
+ Perms: bytes
+
+
+class Encryption:
+ """
+ Collects and manages parameters for PDF document encryption and decryption.
+
+ Args:
+ V: A code specifying the algorithm to be used in encrypting and
+ decrypting the document.
+ R: The revision of the standard security handler.
+ Length: The length of the encryption key in bits.
+ P: A set of flags specifying which operations shall be permitted
+ when the document is opened with user access
+ entry: The encryption dictionary object.
+ EncryptMetadata: Whether to encrypt metadata in the document.
+ first_id_entry: The first 16 bytes of the file's original ID.
+ StmF: The name of the crypt filter that shall be used by default
+ when decrypting streams.
+ StrF: The name of the crypt filter that shall be used when decrypting
+ all strings in the document.
+ EFF: The name of the crypt filter that shall be used when
+ encrypting embedded file streams that do not have their own
+ crypt filter specifier.
+ values: Additional encryption parameters.
+ """
+
+ def __init__(
+ self,
+ V: int,
+ R: int,
+ Length: int,
+ P: int,
+ entry: DictionaryObject,
+ EncryptMetadata: bool,
+ first_id_entry: bytes,
+ StmF: str,
+ StrF: str,
+ EFF: str,
+ values: Optional[EncryptionValues],
+ ) -> None:
+ # §7.6.2, entries common to all encryption dictionaries
+ # use same name as keys of encryption dictionaries entries
+ self.V = V
+ self.R = R
+ self.Length = Length # key_size
+ self.P = (P + 0x100000000) % 0x100000000 # maybe P < 0
+ self.EncryptMetadata = EncryptMetadata
+ self.id1_entry = first_id_entry
+ self.StmF = StmF
+ self.StrF = StrF
+ self.EFF = EFF
+ self.values: EncryptionValues = values if values else EncryptionValues()
+
+ self._password_type = PasswordType.NOT_DECRYPTED
+ self._key: Optional[bytes] = None
+
+ def is_decrypted(self) -> bool:
+ return self._password_type != PasswordType.NOT_DECRYPTED
+
+ def encrypt_object(self, obj: PdfObject, idnum: int, generation: int) -> PdfObject:
+ # skip calculate key
+ if not self._is_encryption_object(obj):
+ return obj
+
+ cf = self._make_crypt_filter(idnum, generation)
+ return cf.encrypt_object(obj)
+
+ def decrypt_object(self, obj: PdfObject, idnum: int, generation: int) -> PdfObject:
+ # skip calculate key
+ if not self._is_encryption_object(obj):
+ return obj
+
+ cf = self._make_crypt_filter(idnum, generation)
+ return cf.decrypt_object(obj)
+
+ @staticmethod
+ def _is_encryption_object(obj: PdfObject) -> bool:
+ return isinstance(
+ obj,
+ (
+ ByteStringObject,
+ TextStringObject,
+ StreamObject,
+ ArrayObject,
+ DictionaryObject,
+ ),
+ )
+
+ def _make_crypt_filter(self, idnum: int, generation: int) -> CryptFilter:
+ """
+ Algorithm 1: Encryption of data using the RC4 or AES algorithms.
+
+ a) Obtain the object number and generation number from the object
+ identifier of the string or stream to be encrypted
+ (see 7.3.10, "Indirect Objects"). If the string is a direct object,
+ use the identifier of the indirect object containing it.
+ b) For all strings and streams without crypt filter specifier; treating
+ the object number and generation number as binary integers, extend
+ the original n-byte encryption key to n + 5 bytes by appending the
+ low-order 3 bytes of the object number and the low-order 2 bytes of
+ the generation number in that order, low-order byte first.
+ (n is 5 unless the value of V in the encryption dictionary is greater
+ than 1, in which case n is the value of Length divided by 8.)
+ If using the AES algorithm, extend the encryption key an additional
+ 4 bytes by adding the value “sAlT”, which corresponds to the
+ hexadecimal values 0x73, 0x41, 0x6C, 0x54. (This addition is done for
+ backward compatibility and is not intended to provide additional
+ security.)
+ c) Initialize the MD5 hash function and pass the result of step (b) as
+ input to this function.
+ d) Use the first (n + 5) bytes, up to a maximum of 16, of the output
+ from the MD5 hash as the key for the RC4 or AES symmetric key
+ algorithms, along with the string or stream data to be encrypted.
+ If using the AES algorithm, the Cipher Block Chaining (CBC) mode,
+ which requires an initialization vector, is used. The block size
+ parameter is set to 16 bytes, and the initialization vector is a
+ 16-byte random number that is stored as the first 16 bytes of the
+ encrypted stream or string.
+
+ Algorithm 3.1a Encryption of data using the AES algorithm
+ 1. Use the 32-byte file encryption key for the AES-256 symmetric key
+ algorithm, along with the string or stream data to be encrypted.
+ Use the AES algorithm in Cipher Block Chaining (CBC) mode, which
+ requires an initialization vector. The block size parameter is set to
+ 16 bytes, and the initialization vector is a 16-byte random number
+ that is stored as the first 16 bytes of the encrypted stream or string.
+ The output is the encrypted data to be stored in the PDF file.
+ """
+ pack1 = struct.pack("<i", idnum)[:3]
+ pack2 = struct.pack("<i", generation)[:2]
+
+ assert self._key
+ key = self._key
+ n = 5 if self.V == 1 else self.Length // 8
+ key_data = key[:n] + pack1 + pack2
+ key_hash = hashlib.md5(key_data)
+ rc4_key = key_hash.digest()[: min(n + 5, 16)]
+ # for AES-128
+ key_hash.update(b"sAlT")
+ aes128_key = key_hash.digest()[: min(n + 5, 16)]
+
+ # for AES-256
+ aes256_key = key
+
+ stm_crypt = self._get_crypt(self.StmF, rc4_key, aes128_key, aes256_key)
+ str_crypt = self._get_crypt(self.StrF, rc4_key, aes128_key, aes256_key)
+ ef_crypt = self._get_crypt(self.EFF, rc4_key, aes128_key, aes256_key)
+
+ return CryptFilter(stm_crypt, str_crypt, ef_crypt)
+
+ @staticmethod
+ def _get_crypt(
+ method: str, rc4_key: bytes, aes128_key: bytes, aes256_key: bytes
+ ) -> CryptBase:
+ if method == "/AESV3":
+ return CryptAES(aes256_key)
+ if method == "/AESV2":
+ return CryptAES(aes128_key)
+ elif method == "/Identity":
+ return CryptIdentity()
+ else:
+ return CryptRC4(rc4_key)
+
+ @staticmethod
+ def _encode_password(password: Union[bytes, str]) -> bytes:
+ if isinstance(password, str):
+ try:
+ pwd = password.encode("latin-1")
+ except Exception:
+ pwd = password.encode("utf-8")
+ else:
+ pwd = password
+ return pwd
+
+ def verify(self, password: Union[bytes, str]) -> PasswordType:
+ pwd = self._encode_password(password)
+ key, rc = self.verify_v4(pwd) if self.V <= 4 else self.verify_v5(pwd)
+ if rc != PasswordType.NOT_DECRYPTED:
+ self._password_type = rc
+ self._key = key
+ return rc
+
+ def verify_v4(self, password: bytes) -> Tuple[bytes, PasswordType]:
+ # verify owner password first
+ key = AlgV4.verify_owner_password(
+ password,
+ self.R,
+ self.Length,
+ self.values.O,
+ self.values.U,
+ self.P,
+ self.id1_entry,
+ self.EncryptMetadata,
+ )
+ if key:
+ return key, PasswordType.OWNER_PASSWORD
+ key = AlgV4.verify_user_password(
+ password,
+ self.R,
+ self.Length,
+ self.values.O,
+ self.values.U,
+ self.P,
+ self.id1_entry,
+ self.EncryptMetadata,
+ )
+ if key:
+ return key, PasswordType.USER_PASSWORD
+ return b"", PasswordType.NOT_DECRYPTED
+
+ def verify_v5(self, password: bytes) -> Tuple[bytes, PasswordType]:
+ # TODO: use SASLprep process
+ # verify owner password first
+ key = AlgV5.verify_owner_password(
+ self.R, password, self.values.O, self.values.OE, self.values.U
+ )
+ rc = PasswordType.OWNER_PASSWORD
+ if not key:
+ key = AlgV5.verify_user_password(
+ self.R, password, self.values.U, self.values.UE
+ )
+ rc = PasswordType.USER_PASSWORD
+ if not key:
+ return b"", PasswordType.NOT_DECRYPTED
+
+ # verify Perms
+ if not AlgV5.verify_perms(key, self.values.Perms, self.P, self.EncryptMetadata):
+ logger_warning("ignore '/Perms' verify failed", __name__)
+ return key, rc
+
+ def write_entry(
+ self, user_password: str, owner_password: Optional[str]
+ ) -> DictionaryObject:
+ user_pwd = self._encode_password(user_password)
+ owner_pwd = self._encode_password(owner_password) if owner_password else None
+ if owner_pwd is None:
+ owner_pwd = user_pwd
+
+ if self.V <= 4:
+ self.compute_values_v4(user_pwd, owner_pwd)
+ else:
+ self._key = secrets.token_bytes(self.Length // 8)
+ values = AlgV5.generate_values(
+ self.R, user_pwd, owner_pwd, self._key, self.P, self.EncryptMetadata
+ )
+ self.values.O = values["/O"]
+ self.values.U = values["/U"]
+ self.values.OE = values["/OE"]
+ self.values.UE = values["/UE"]
+ self.values.Perms = values["/Perms"]
+
+ dict_obj = DictionaryObject()
+ dict_obj[NameObject("/V")] = NumberObject(self.V)
+ dict_obj[NameObject("/R")] = NumberObject(self.R)
+ dict_obj[NameObject("/Length")] = NumberObject(self.Length)
+ dict_obj[NameObject("/P")] = NumberObject(self.P)
+ dict_obj[NameObject("/Filter")] = NameObject("/Standard")
+ # ignore /EncryptMetadata
+
+ dict_obj[NameObject("/O")] = ByteStringObject(self.values.O)
+ dict_obj[NameObject("/U")] = ByteStringObject(self.values.U)
+
+ if self.V >= 4:
+ # TODO: allow different method
+ std_cf = DictionaryObject()
+ std_cf[NameObject("/AuthEvent")] = NameObject("/DocOpen")
+ std_cf[NameObject("/CFM")] = NameObject(self.StmF)
+ std_cf[NameObject("/Length")] = NumberObject(self.Length // 8)
+ cf = DictionaryObject()
+ cf[NameObject("/StdCF")] = std_cf
+ dict_obj[NameObject("/CF")] = cf
+ dict_obj[NameObject("/StmF")] = NameObject("/StdCF")
+ dict_obj[NameObject("/StrF")] = NameObject("/StdCF")
+ # ignore EFF
+ # dict_obj[NameObject("/EFF")] = NameObject("/StdCF")
+
+ if self.V >= 5:
+ dict_obj[NameObject("/OE")] = ByteStringObject(self.values.OE)
+ dict_obj[NameObject("/UE")] = ByteStringObject(self.values.UE)
+ dict_obj[NameObject("/Perms")] = ByteStringObject(self.values.Perms)
+ return dict_obj
+
+ def compute_values_v4(self, user_password: bytes, owner_password: bytes) -> None:
+ rc4_key = AlgV4.compute_O_value_key(owner_password, self.R, self.Length)
+ o_value = AlgV4.compute_O_value(rc4_key, user_password, self.R)
+
+ key = AlgV4.compute_key(
+ user_password,
+ self.R,
+ self.Length,
+ o_value,
+ self.P,
+ self.id1_entry,
+ self.EncryptMetadata,
+ )
+ u_value = AlgV4.compute_U_value(key, self.R, self.id1_entry)
+
+ self._key = key
+ self.values.O = o_value
+ self.values.U = u_value
+
+ @staticmethod
+ def read(encryption_entry: DictionaryObject, first_id_entry: bytes) -> "Encryption":
+ if encryption_entry.get("/Filter") != "/Standard":
+ raise NotImplementedError(
+ "only Standard PDF encryption handler is available"
+ )
+ if "/SubFilter" in encryption_entry:
+ raise NotImplementedError("/SubFilter NOT supported")
+
+ stm_filter = "/V2"
+ str_filter = "/V2"
+ ef_filter = "/V2"
+
+ alg_ver = encryption_entry.get("/V", 0)
+ if alg_ver not in (1, 2, 3, 4, 5):
+ raise NotImplementedError(f"Encryption V={alg_ver} NOT supported")
+ if alg_ver >= 4:
+ filters = encryption_entry["/CF"]
+
+ stm_filter = encryption_entry.get("/StmF", "/Identity")
+ str_filter = encryption_entry.get("/StrF", "/Identity")
+ ef_filter = encryption_entry.get("/EFF", stm_filter)
+
+ if stm_filter != "/Identity":
+ stm_filter = filters[stm_filter]["/CFM"] # type: ignore
+ if str_filter != "/Identity":
+ str_filter = filters[str_filter]["/CFM"] # type: ignore
+ if ef_filter != "/Identity":
+ ef_filter = filters[ef_filter]["/CFM"] # type: ignore
+
+ allowed_methods = ("/Identity", "/V2", "/AESV2", "/AESV3")
+ if stm_filter not in allowed_methods:
+ raise NotImplementedError(f"StmF Method {stm_filter} NOT supported!")
+ if str_filter not in allowed_methods:
+ raise NotImplementedError(f"StrF Method {str_filter} NOT supported!")
+ if ef_filter not in allowed_methods:
+ raise NotImplementedError(f"EFF Method {ef_filter} NOT supported!")
+
+ alg_rev = cast(int, encryption_entry["/R"])
+ perm_flags = cast(int, encryption_entry["/P"])
+ key_bits = encryption_entry.get("/Length", 40)
+ encrypt_metadata = encryption_entry.get("/EncryptMetadata")
+ encrypt_metadata = (
+ encrypt_metadata.value if encrypt_metadata is not None else True
+ )
+ values = EncryptionValues()
+ values.O = cast(ByteStringObject, encryption_entry["/O"]).original_bytes
+ values.U = cast(ByteStringObject, encryption_entry["/U"]).original_bytes
+ values.OE = encryption_entry.get("/OE", ByteStringObject()).original_bytes
+ values.UE = encryption_entry.get("/UE", ByteStringObject()).original_bytes
+ values.Perms = encryption_entry.get("/Perms", ByteStringObject()).original_bytes
+ return Encryption(
+ V=alg_ver,
+ R=alg_rev,
+ Length=key_bits,
+ P=perm_flags,
+ EncryptMetadata=encrypt_metadata,
+ first_id_entry=first_id_entry,
+ values=values,
+ StrF=str_filter,
+ StmF=stm_filter,
+ EFF=ef_filter,
+ entry=encryption_entry, # Dummy entry for the moment; will get removed
+ )
+
+ @staticmethod
+ def make(
+ alg: EncryptAlgorithm, permissions: int, first_id_entry: bytes
+ ) -> "Encryption":
+ alg_ver, alg_rev, key_bits = alg
+
+ stm_filter, str_filter, ef_filter = "/V2", "/V2", "/V2"
+
+ if alg == EncryptAlgorithm.AES_128:
+ stm_filter, str_filter, ef_filter = "/AESV2", "/AESV2", "/AESV2"
+ elif alg in (EncryptAlgorithm.AES_256_R5, EncryptAlgorithm.AES_256):
+ stm_filter, str_filter, ef_filter = "/AESV3", "/AESV3", "/AESV3"
+
+ return Encryption(
+ V=alg_ver,
+ R=alg_rev,
+ Length=key_bits,
+ P=permissions,
+ EncryptMetadata=True,
+ first_id_entry=first_id_entry,
+ values=None,
+ StrF=str_filter,
+ StmF=stm_filter,
+ EFF=ef_filter,
+ entry=DictionaryObject(), # Dummy entry for the moment; will get removed
+ )