aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/pypdf/_page_labels.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/pypdf/_page_labels.py')
-rw-r--r--.venv/lib/python3.12/site-packages/pypdf/_page_labels.py280
1 files changed, 280 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/pypdf/_page_labels.py b/.venv/lib/python3.12/site-packages/pypdf/_page_labels.py
new file mode 100644
index 00000000..b0252795
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/pypdf/_page_labels.py
@@ -0,0 +1,280 @@
+"""
+Page labels are shown by PDF viewers as "the page number".
+
+A page has a numeric index, starting at 0. Additionally, the page
+has a label. In the most simple case:
+
+ label = index + 1
+
+However, the title page and the table of contents might have Roman numerals as
+page labels. This makes things more complicated.
+
+Example 1
+---------
+
+>>> reader.root_object["/PageLabels"]["/Nums"]
+[0, IndirectObject(18, 0, 139929798197504),
+ 8, IndirectObject(19, 0, 139929798197504)]
+>>> reader.get_object(reader.root_object["/PageLabels"]["/Nums"][1])
+{'/S': '/r'}
+>>> reader.get_object(reader.root_object["/PageLabels"]["/Nums"][3])
+{'/S': '/D'}
+
+Example 2
+---------
+The following is a document with pages labeled
+i, ii, iii, iv, 1, 2, 3, A-8, A-9, ...
+
+1 0 obj
+ << /Type /Catalog
+ /PageLabels << /Nums [
+ 0 << /S /r >>
+ 4 << /S /D >>
+ 7 << /S /D
+ /P ( A- )
+ /St 8
+ >>
+ % A number tree containing
+ % three page label dictionaries
+ ]
+ >>
+ ...
+ >>
+endobj
+
+
+ยง12.4.2 PDF Specification 1.7 and 2.0
+=====================================
+
+Entries in a page label dictionary
+----------------------------------
+The /S key:
+D Decimal Arabic numerals
+R Uppercase Roman numerals
+r Lowercase Roman numerals
+A Uppercase letters (A to Z for the first 26 pages,
+ AA to ZZ for the next 26, and so on)
+a Lowercase letters (a to z for the first 26 pages,
+ aa to zz for the next 26, and so on)
+"""
+
+from typing import Iterator, List, Optional, Tuple, cast
+
+from ._protocols import PdfCommonDocProtocol
+from ._utils import logger_warning
+from .generic import ArrayObject, DictionaryObject, NullObject, NumberObject
+
+
+def number2uppercase_roman_numeral(num: int) -> str:
+ roman = [
+ (1000, "M"),
+ (900, "CM"),
+ (500, "D"),
+ (400, "CD"),
+ (100, "C"),
+ (90, "XC"),
+ (50, "L"),
+ (40, "XL"),
+ (10, "X"),
+ (9, "IX"),
+ (5, "V"),
+ (4, "IV"),
+ (1, "I"),
+ ]
+
+ def roman_num(num: int) -> Iterator[str]:
+ for decimal, roman_repr in roman:
+ x, _ = divmod(num, decimal)
+ yield roman_repr * x
+ num -= decimal * x
+ if num <= 0:
+ break
+
+ return "".join(list(roman_num(num)))
+
+
+def number2lowercase_roman_numeral(number: int) -> str:
+ return number2uppercase_roman_numeral(number).lower()
+
+
+def number2uppercase_letter(number: int) -> str:
+ if number <= 0:
+ raise ValueError("Expecting a positive number")
+ alphabet = [chr(i) for i in range(ord("A"), ord("Z") + 1)]
+ rep = ""
+ while number > 0:
+ remainder = number % 26
+ if remainder == 0:
+ remainder = 26
+ rep = alphabet[remainder - 1] + rep
+ # update
+ number -= remainder
+ number = number // 26
+ return rep
+
+
+def number2lowercase_letter(number: int) -> str:
+ return number2uppercase_letter(number).lower()
+
+
+def get_label_from_nums(dictionary_object: DictionaryObject, index: int) -> str:
+ # [Nums] shall be an array of the form
+ # [ key 1 value 1 key 2 value 2 ... key n value n ]
+ # where each key_i is an integer and the corresponding
+ # value_i shall be the object associated with that key.
+ # The keys shall be sorted in numerical order,
+ # analogously to the arrangement of keys in a name tree
+ # as described in 7.9.6, "Name Trees."
+ nums = cast(ArrayObject, dictionary_object["/Nums"])
+ i = 0
+ value = None
+ start_index = 0
+ while i < len(nums):
+ start_index = nums[i]
+ value = nums[i + 1].get_object()
+ if i + 2 == len(nums):
+ break
+ if nums[i + 2] > index:
+ break
+ i += 2
+ m = {
+ None: lambda n: "",
+ "/D": lambda n: str(n),
+ "/R": number2uppercase_roman_numeral,
+ "/r": number2lowercase_roman_numeral,
+ "/A": number2uppercase_letter,
+ "/a": number2lowercase_letter,
+ }
+ # if /Nums array is not following the specification or if /Nums is empty
+ if not isinstance(value, dict):
+ return str(index + 1) # Fallback
+ start = value.get("/St", 1)
+ prefix = value.get("/P", "")
+ return prefix + m[value.get("/S")](index - start_index + start)
+
+
+def index2label(reader: PdfCommonDocProtocol, index: int) -> str:
+ """
+ See 7.9.7 "Number Trees".
+
+ Args:
+ reader: The PdfReader
+ index: The index of the page
+
+ Returns:
+ The label of the page, e.g. "iv" or "4".
+ """
+ root = cast(DictionaryObject, reader.root_object)
+ if "/PageLabels" not in root:
+ return str(index + 1) # Fallback
+ number_tree = cast(DictionaryObject, root["/PageLabels"].get_object())
+ if "/Nums" in number_tree:
+ return get_label_from_nums(number_tree, index)
+ if "/Kids" in number_tree and not isinstance(number_tree["/Kids"], NullObject):
+ # number_tree = {'/Kids': [IndirectObject(7333, 0, 140132998195856), ...]}
+ # Limit maximum depth.
+ level = 0
+ while level < 100:
+ kids = cast(List[DictionaryObject], number_tree["/Kids"])
+ for kid in kids:
+ # kid = {'/Limits': [0, 63], '/Nums': [0, {'/P': 'C1'}, ...]}
+ limits = cast(List[int], kid["/Limits"])
+ if limits[0] <= index <= limits[1]:
+ if kid.get("/Kids", None) is not None:
+ # Recursive definition.
+ level += 1
+ if level == 100: # pragma: no cover
+ raise NotImplementedError("Too deep nesting is not supported.")
+ number_tree = kid
+ # Exit the inner `for` loop and continue at the next level with the
+ # next iteration of the `while` loop.
+ break
+ return get_label_from_nums(kid, index)
+ else:
+ # When there are no kids, make sure to exit the `while` loop directly
+ # and continue with the fallback.
+ break
+
+ logger_warning(
+ f"Could not reliably determine page label for {index}.",
+ __name__
+ )
+ return str(index + 1) # Fallback if neither /Nums nor /Kids is in the number_tree
+
+
+def nums_insert(
+ key: NumberObject,
+ value: DictionaryObject,
+ nums: ArrayObject,
+) -> None:
+ """
+ Insert a key, value pair in a Nums array.
+
+ See 7.9.7 "Number Trees".
+
+ Args:
+ key: number key of the entry
+ value: value of the entry
+ nums: Nums array to modify
+ """
+ if len(nums) % 2 != 0:
+ raise ValueError("a nums like array must have an even number of elements")
+
+ i = len(nums)
+ while i != 0 and key <= nums[i - 2]:
+ i = i - 2
+
+ if i < len(nums) and key == nums[i]:
+ nums[i + 1] = value
+ else:
+ nums.insert(i, key)
+ nums.insert(i + 1, value)
+
+
+def nums_clear_range(
+ key: NumberObject,
+ page_index_to: int,
+ nums: ArrayObject,
+) -> None:
+ """
+ Remove all entries in a number tree in a range after an entry.
+
+ See 7.9.7 "Number Trees".
+
+ Args:
+ key: number key of the entry before the range
+ page_index_to: The page index of the upper limit of the range
+ nums: Nums array to modify
+ """
+ if len(nums) % 2 != 0:
+ raise ValueError("a nums like array must have an even number of elements")
+ if page_index_to < key:
+ raise ValueError("page_index_to must be greater or equal than key")
+
+ i = nums.index(key) + 2
+ while i < len(nums) and nums[i] <= page_index_to:
+ nums.pop(i)
+ nums.pop(i)
+
+
+def nums_next(
+ key: NumberObject,
+ nums: ArrayObject,
+) -> Tuple[Optional[NumberObject], Optional[DictionaryObject]]:
+ """
+ Return the (key, value) pair of the entry after the given one.
+
+ See 7.9.7 "Number Trees".
+
+ Args:
+ key: number key of the entry
+ nums: Nums array
+ """
+ if len(nums) % 2 != 0:
+ raise ValueError("a nums like array must have an even number of elements")
+
+ i = nums.index(key) + 2
+ if i < len(nums):
+ return (nums[i], nums[i + 1])
+ else:
+ return (None, None)