diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/PyPDF2/_security.py')
-rw-r--r-- | .venv/lib/python3.12/site-packages/PyPDF2/_security.py | 252 |
1 files changed, 252 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/PyPDF2/_security.py b/.venv/lib/python3.12/site-packages/PyPDF2/_security.py new file mode 100644 index 00000000..47e5c373 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/PyPDF2/_security.py @@ -0,0 +1,252 @@ +# Copyright (c) 2006, Mathieu Fenniak +# Copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com> +# +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# * The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +"""Anything related to encryption / decryption.""" + +import struct +from hashlib import md5 +from typing import Tuple, Union + +from ._utils import b_, ord_, str_ +from .generic import ByteStringObject + +try: + from typing import Literal # type: ignore[attr-defined] +except ImportError: + # PEP 586 introduced typing.Literal with Python 3.8 + # For older Python versions, the backport typing_extensions is necessary: + from typing_extensions import Literal # type: ignore[misc] + +# ref: pdf1.8 spec section 3.5.2 algorithm 3.2 +_encryption_padding = ( + b"\x28\xbf\x4e\x5e\x4e\x75\x8a\x41\x64\x00\x4e\x56" + b"\xff\xfa\x01\x08\x2e\x2e\x00\xb6\xd0\x68\x3e\x80\x2f\x0c" + b"\xa9\xfe\x64\x53\x69\x7a" +) + + +def _alg32( + password: str, + rev: Literal[2, 3, 4], + keylen: int, + owner_entry: ByteStringObject, + p_entry: int, + id1_entry: ByteStringObject, + metadata_encrypt: bool = True, +) -> bytes: + """ + Implementation of algorithm 3.2 of the PDF standard security handler. + + See section 3.5.2 of the PDF 1.6 reference. + """ + # 1. Pad or truncate the password string to exactly 32 bytes. If the + # password string is more than 32 bytes long, use only its first 32 bytes; + # if it is less than 32 bytes long, pad it by appending the required number + # of additional bytes from the beginning of the padding string + # (_encryption_padding). + password_bytes = b_((str_(password) + str_(_encryption_padding))[:32]) + # 2. Initialize the MD5 hash function and pass the result of step 1 as + # input to this function. + m = md5(password_bytes) + # 3. Pass the value of the encryption dictionary's /O entry to the MD5 hash + # function. + m.update(owner_entry.original_bytes) + # 4. Treat the value of the /P entry as an unsigned 4-byte integer and pass + # these bytes to the MD5 hash function, low-order byte first. + p_entry_bytes = struct.pack("<i", p_entry) + m.update(p_entry_bytes) + # 5. Pass the first element of the file's file identifier array to the MD5 + # hash function. + m.update(id1_entry.original_bytes) + # 6. (Revision 3 or greater) If document metadata is not being encrypted, + # pass 4 bytes with the value 0xFFFFFFFF to the MD5 hash function. + if rev >= 3 and not metadata_encrypt: + m.update(b"\xff\xff\xff\xff") + # 7. Finish the hash. + md5_hash = m.digest() + # 8. (Revision 3 or greater) Do the following 50 times: Take the output + # from the previous MD5 hash and pass the first n bytes of the output as + # input into a new MD5 hash, where n is the number of bytes of the + # encryption key as defined by the value of the encryption dictionary's + # /Length entry. + if rev >= 3: + for _ in range(50): + md5_hash = md5(md5_hash[:keylen]).digest() + # 9. Set the encryption key to the first n bytes of the output from the + # final MD5 hash, where n is always 5 for revision 2 but, for revision 3 or + # greater, depends on the value of the encryption dictionary's /Length + # entry. + return md5_hash[:keylen] + + +def _alg33( + owner_password: str, user_password: str, rev: Literal[2, 3, 4], keylen: int +) -> bytes: + """ + Implementation of algorithm 3.3 of the PDF standard security handler, + section 3.5.2 of the PDF 1.6 reference. + """ + # steps 1 - 4 + key = _alg33_1(owner_password, rev, keylen) + # 5. Pad or truncate the user password string as described in step 1 of + # algorithm 3.2. + user_password_bytes = b_((user_password + str_(_encryption_padding))[:32]) + # 6. Encrypt the result of step 5, using an RC4 encryption function with + # the encryption key obtained in step 4. + val = RC4_encrypt(key, user_password_bytes) + # 7. (Revision 3 or greater) Do the following 19 times: Take the output + # from the previous invocation of the RC4 function and pass it as input to + # a new invocation of the function; use an encryption key generated by + # taking each byte of the encryption key obtained in step 4 and performing + # an XOR operation between that byte and the single-byte value of the + # iteration counter (from 1 to 19). + if rev >= 3: + for i in range(1, 20): + new_key = "" + for key_char in key: + new_key += chr(ord_(key_char) ^ i) + val = RC4_encrypt(new_key, val) + # 8. Store the output from the final invocation of the RC4 as the value of + # the /O entry in the encryption dictionary. + return val + + +def _alg33_1(password: str, rev: Literal[2, 3, 4], keylen: int) -> bytes: + """Steps 1-4 of algorithm 3.3""" + # 1. Pad or truncate the owner password string as described in step 1 of + # algorithm 3.2. If there is no owner password, use the user password + # instead. + password_bytes = b_((password + str_(_encryption_padding))[:32]) + # 2. Initialize the MD5 hash function and pass the result of step 1 as + # input to this function. + m = md5(password_bytes) + # 3. (Revision 3 or greater) Do the following 50 times: Take the output + # from the previous MD5 hash and pass it as input into a new MD5 hash. + md5_hash = m.digest() + if rev >= 3: + for _ in range(50): + md5_hash = md5(md5_hash).digest() + # 4. Create an RC4 encryption key using the first n bytes of the output + # from the final MD5 hash, where n is always 5 for revision 2 but, for + # revision 3 or greater, depends on the value of the encryption + # dictionary's /Length entry. + key = md5_hash[:keylen] + return key + + +def _alg34( + password: str, + owner_entry: ByteStringObject, + p_entry: int, + id1_entry: ByteStringObject, +) -> Tuple[bytes, bytes]: + """ + Implementation of algorithm 3.4 of the PDF standard security handler. + + See section 3.5.2 of the PDF 1.6 reference. + """ + # 1. Create an encryption key based on the user password string, as + # described in algorithm 3.2. + rev: Literal[2] = 2 + keylen = 5 + key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry) + # 2. Encrypt the 32-byte padding string shown in step 1 of algorithm 3.2, + # using an RC4 encryption function with the encryption key from the + # preceding step. + U = RC4_encrypt(key, _encryption_padding) + # 3. Store the result of step 2 as the value of the /U entry in the + # encryption dictionary. + return U, key + + +def _alg35( + password: str, + rev: Literal[2, 3, 4], + keylen: int, + owner_entry: ByteStringObject, + p_entry: int, + id1_entry: ByteStringObject, + metadata_encrypt: bool, +) -> Tuple[bytes, bytes]: + """ + Implementation of algorithm 3.4 of the PDF standard security handler. + + See section 3.5.2 of the PDF 1.6 reference. + """ + # 1. Create an encryption key based on the user password string, as + # described in Algorithm 3.2. + key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry) + # 2. Initialize the MD5 hash function and pass the 32-byte padding string + # shown in step 1 of Algorithm 3.2 as input to this function. + m = md5() + m.update(_encryption_padding) + # 3. Pass the first element of the file's file identifier array (the value + # of the ID entry in the document's trailer dictionary; see Table 3.13 on + # page 73) to the hash function and finish the hash. (See implementation + # note 25 in Appendix H.) + m.update(id1_entry.original_bytes) + md5_hash = m.digest() + # 4. Encrypt the 16-byte result of the hash, using an RC4 encryption + # function with the encryption key from step 1. + val = RC4_encrypt(key, md5_hash) + # 5. Do the following 19 times: Take the output from the previous + # invocation of the RC4 function and pass it as input to a new invocation + # of the function; use an encryption key generated by taking each byte of + # the original encryption key (obtained in step 2) and performing an XOR + # operation between that byte and the single-byte value of the iteration + # counter (from 1 to 19). + for i in range(1, 20): + new_key = b"" + for k in key: + new_key += b_(chr(ord_(k) ^ i)) + val = RC4_encrypt(new_key, val) + # 6. Append 16 bytes of arbitrary padding to the output from the final + # invocation of the RC4 function and store the 32-byte result as the value + # of the U entry in the encryption dictionary. + # (implementer note: I don't know what "arbitrary padding" is supposed to + # mean, so I have used null bytes. This seems to match a few other + # people's implementations) + return val + (b"\x00" * 16), key + + +def RC4_encrypt(key: Union[str, bytes], plaintext: bytes) -> bytes: # TODO + S = list(range(256)) + j = 0 + for i in range(256): + j = (j + S[i] + ord_(key[i % len(key)])) % 256 + S[i], S[j] = S[j], S[i] + i, j = 0, 0 + retval = [] + for plaintext_char in plaintext: + i = (i + 1) % 256 + j = (j + S[i]) % 256 + S[i], S[j] = S[j], S[i] + t = S[(S[i] + S[j]) % 256] + retval.append(b_(chr(ord_(plaintext_char) ^ t))) + return b"".join(retval) |