diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/pypdf/pagerange.py | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-4a52a71956a8d46fcb7294ac71734504bb09bcc2.tar.gz |
Diffstat (limited to '.venv/lib/python3.12/site-packages/pypdf/pagerange.py')
-rw-r--r-- | .venv/lib/python3.12/site-packages/pypdf/pagerange.py | 192 |
1 files changed, 192 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/pypdf/pagerange.py b/.venv/lib/python3.12/site-packages/pypdf/pagerange.py new file mode 100644 index 00000000..47a72c72 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pypdf/pagerange.py @@ -0,0 +1,192 @@ +""" +Representation and utils for ranges of PDF file pages. + +Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>. +All rights reserved. This software is available under a BSD license; +see https://github.com/py-pdf/pypdf/blob/main/LICENSE +""" + +import re +from typing import Any, List, Tuple, Union + +from .errors import ParseError + +_INT_RE = r"(0|-?[1-9]\d*)" # A decimal int, don't allow "-0". +PAGE_RANGE_RE = f"^({_INT_RE}|({_INT_RE}?(:{_INT_RE}?(:{_INT_RE}?)?)))$" +# groups: 12 34 5 6 7 8 + + +class PageRange: + """ + A slice-like representation of a range of page indices. + + For example, page numbers, only starting at zero. + + The syntax is like what you would put between brackets [ ]. + The slice is one of the few Python types that can't be subclassed, + but this class converts to and from slices, and allows similar use. + + - PageRange(str) parses a string representing a page range. + - PageRange(slice) directly "imports" a slice. + - to_slice() gives the equivalent slice. + - str() and repr() allow printing. + - indices(n) is like slice.indices(n). + """ + + def __init__(self, arg: Union[slice, "PageRange", str]) -> None: + """ + Initialize with either a slice -- giving the equivalent page range, + or a PageRange object -- making a copy, + or a string like + "int", "[int]:[int]" or "[int]:[int]:[int]", + where the brackets indicate optional ints. + Remember, page indices start with zero. + Page range expression examples: + + : all pages. -1 last page. + 22 just the 23rd page. :-1 all but the last page. + 0:3 the first three pages. -2 second-to-last page. + :3 the first three pages. -2: last two pages. + 5: from the sixth page onward. -3:-1 third & second to last. + The third, "stride" or "step" number is also recognized. + ::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0. + 1:10:2 1 3 5 7 9 2::-1 2 1 0. + ::-1 all pages in reverse order. + Note the difference between this notation and arguments to slice(): + slice(3) means the first three pages; + PageRange("3") means the range of only the fourth page. + However PageRange(slice(3)) means the first three pages. + """ + if isinstance(arg, slice): + self._slice = arg + return + + if isinstance(arg, PageRange): + self._slice = arg.to_slice() + return + + m = isinstance(arg, str) and re.match(PAGE_RANGE_RE, arg) + if not m: + raise ParseError(arg) + elif m.group(2): + # Special case: just an int means a range of one page. + start = int(m.group(2)) + stop = start + 1 if start != -1 else None + self._slice = slice(start, stop) + else: + self._slice = slice(*[int(g) if g else None for g in m.group(4, 6, 8)]) + + @staticmethod + def valid(input: Any) -> bool: + """ + True if input is a valid initializer for a PageRange. + + Args: + input: A possible PageRange string or a PageRange object. + + Returns: + True, if the ``input`` is a valid PageRange. + """ + return isinstance(input, (slice, PageRange)) or ( + isinstance(input, str) and bool(re.match(PAGE_RANGE_RE, input)) + ) + + def to_slice(self) -> slice: + """Return the slice equivalent of this page range.""" + return self._slice + + def __str__(self) -> str: + """A string like "1:2:3".""" + s = self._slice + indices: Union[Tuple[int, int], Tuple[int, int, int]] + if s.step is None: + if s.start is not None and s.stop == s.start + 1: + return str(s.start) + + indices = s.start, s.stop + else: + indices = s.start, s.stop, s.step + return ":".join("" if i is None else str(i) for i in indices) + + def __repr__(self) -> str: + """A string like "PageRange('1:2:3')".""" + return "PageRange(" + repr(str(self)) + ")" + + def indices(self, n: int) -> Tuple[int, int, int]: + """ + Assuming a sequence of length n, calculate the start and stop indices, + and the stride length of the PageRange. + + See help(slice.indices). + + Args: + n: the length of the list of pages to choose from. + + Returns: + Arguments for range(). + """ + return self._slice.indices(n) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, PageRange): + return False + return self._slice == other._slice + + def __add__(self, other: "PageRange") -> "PageRange": + if not isinstance(other, PageRange): + raise TypeError(f"Can't add PageRange and {type(other)}") + if self._slice.step is not None or other._slice.step is not None: + raise ValueError("Can't add PageRange with stride") + a = self._slice.start, self._slice.stop + b = other._slice.start, other._slice.stop + + if a[0] > b[0]: + a, b = b, a + + # Now a[0] is the smallest + if b[0] > a[1]: + # There is a gap between a and b. + raise ValueError("Can't add PageRanges with gap") + return PageRange(slice(a[0], max(a[1], b[1]))) + + +PAGE_RANGE_ALL = PageRange(":") # The range of all pages. + + +def parse_filename_page_ranges( + args: List[Union[str, PageRange, None]] +) -> List[Tuple[str, PageRange]]: + """ + Given a list of filenames and page ranges, return a list of (filename, page_range) pairs. + + Args: + args: A list where the first element is a filename. The other elements are + filenames, page-range expressions, slice objects, or PageRange objects. + A filename not followed by a page range indicates all pages of the file. + + Returns: + A list of (filename, page_range) pairs. + """ + pairs: List[Tuple[str, PageRange]] = [] + pdf_filename = None + did_page_range = False + for arg in args + [None]: + if PageRange.valid(arg): + if not pdf_filename: + raise ValueError( + "The first argument must be a filename, not a page range." + ) + + pairs.append((pdf_filename, PageRange(arg))) + did_page_range = True + else: + # New filename or end of list--do all of the previous file? + if pdf_filename and not did_page_range: + pairs.append((pdf_filename, PAGE_RANGE_ALL)) + + pdf_filename = arg + did_page_range = False + return pairs + + +PageRangeSpec = Union[str, PageRange, Tuple[int, int], Tuple[int, int, int], List[int]] |