diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/pdf2image-1.17.0.dist-info')
6 files changed, 173 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/pdf2image-1.17.0.dist-info/INSTALLER b/.venv/lib/python3.12/site-packages/pdf2image-1.17.0.dist-info/INSTALLER new file mode 100644 index 00000000..a1b589e3 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pdf2image-1.17.0.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/.venv/lib/python3.12/site-packages/pdf2image-1.17.0.dist-info/LICENSE b/.venv/lib/python3.12/site-packages/pdf2image-1.17.0.dist-info/LICENSE new file mode 100644 index 00000000..5aa3e44a --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pdf2image-1.17.0.dist-info/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2017 Edouard Belval + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/.venv/lib/python3.12/site-packages/pdf2image-1.17.0.dist-info/METADATA b/.venv/lib/python3.12/site-packages/pdf2image-1.17.0.dist-info/METADATA new file mode 100644 index 00000000..b13ca2be --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pdf2image-1.17.0.dist-info/METADATA @@ -0,0 +1,128 @@ +Metadata-Version: 2.1 +Name: pdf2image +Version: 1.17.0 +Summary: A wrapper around the pdftoppm and pdftocairo command line tools to convert PDF to a PIL Image list. +Home-page: https://github.com/Belval/pdf2image +Author: Edouard Belval +Author-email: edouard@belval.org +License: MIT +Keywords: pdf image png jpeg jpg convert +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: MIT License +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Description-Content-Type: text/markdown +License-File: LICENSE +Requires-Dist: pillow + +# pdf2image +[](https://circleci.com/gh/Belval/pdf2image/tree/master) [](https://badge.fury.io/py/pdf2image) [](https://codecov.io/gh/Belval/pdf2image) [](https://pepy.tech/project/pdf2image) [](https://belval.github.io/pdf2image) + +A python (3.7+) module that wraps pdftoppm and pdftocairo to convert PDF to a PIL Image object + +## How to install + +`pip install pdf2image` + +### Windows + +Windows users will have to build or download poppler for Windows. I recommend [@oschwartz10612 version](https://github.com/oschwartz10612/poppler-windows/releases/) which is the most up-to-date. You will then have to add the `bin/` folder to [PATH](https://www.architectryan.com/2018/03/17/add-to-the-path-on-windows-10/) or use `poppler_path = r"C:\path\to\poppler-xx\bin" as an argument` in `convert_from_path`. + +### Mac + +Mac users will have to install [poppler](https://poppler.freedesktop.org/). + +Installing using [Brew](https://brew.sh/): + +``` +brew install poppler +``` + +### Linux + +Most distros ship with `pdftoppm` and `pdftocairo`. If they are not installed, refer to your package manager to install `poppler-utils` + +### Platform-independant (Using `conda`) + +1. Install poppler: `conda install -c conda-forge poppler` +2. Install pdf2image: `pip install pdf2image` + +## How does it work? + + +```py +from pdf2image import convert_from_path, convert_from_bytes +from pdf2image.exceptions import ( + PDFInfoNotInstalledError, + PDFPageCountError, + PDFSyntaxError +) +``` + +Then simply do: + +```py +images = convert_from_path('/home/belval/example.pdf') +``` + +OR + +```py +images = convert_from_bytes(open('/home/belval/example.pdf', 'rb').read()) +``` + +OR better yet + +```py +import tempfile + +with tempfile.TemporaryDirectory() as path: + images_from_path = convert_from_path('/home/belval/example.pdf', output_folder=path) + # Do something here +``` + +`images` will be a list of PIL Image representing each page of the PDF document. + +Here are the definitions: + +`convert_from_path(pdf_path, dpi=200, output_folder=None, first_page=None, last_page=None, fmt='ppm', jpegopt=None, thread_count=1, userpw=None, use_cropbox=False, strict=False, transparent=False, single_file=False, output_file=str(uuid.uuid4()), poppler_path=None, grayscale=False, size=None, paths_only=False, use_pdftocairo=False, timeout=600, hide_attributes=False)` + +`convert_from_bytes(pdf_file, dpi=200, output_folder=None, first_page=None, last_page=None, fmt='ppm', jpegopt=None, thread_count=1, userpw=None, use_cropbox=False, strict=False, transparent=False, single_file=False, output_file=str(uuid.uuid4()), poppler_path=None, grayscale=False, size=None, paths_only=False, use_pdftocairo=False, timeout=600, hide_attributes=False)` + +## What's new? + +- Allow users to hide attributes when using pdftoppm with `hide_attributes` (Thank you @StaticRocket) +- Fix console opening on Windows (Thank you @OhMyAgnes!) +- Add `timeout` parameter which raises `PDFPopplerTimeoutError` after the given number of seconds. +- Add `use_pdftocairo` parameter which forces `pdf2image` to use `pdftocairo`. Should improve performance. +- Fixed a bug where using `pdf2image` with multiple threads (but not multiple processes) would cause and exception +- `jpegopt` parameter allows for tuning of the output JPEG when using `fmt="jpeg"` (`-jpegopt` in pdftoppm CLI) (Thank you @abieler) +- `pdfinfo_from_path` and `pdfinfo_from_bytes` which expose the output of the pdfinfo CLI +- `paths_only` parameter will return image paths instead of Image objects, to prevent OOM when converting a big PDF +- `size` parameter allows you to define the shape of the resulting images (`-scale-to` in pdftoppm CLI) + - `size=400` will fit the image to a 400x400 box, preserving aspect ratio + - `size=(400, None)` will make the image 400 pixels wide, preserving aspect ratio + - `size=(500, 500)` will resize the image to 500x500 pixels, not preserving aspect ratio +- `grayscale` parameter allows you to convert images to grayscale (`-gray` in pdftoppm CLI) +- `single_file` parameter allows you to convert the first PDF page only, without adding digits at the end of the `output_file` +- Allow the user to specify poppler's installation path with `poppler_path` + +## Performance tips + +- Using an output folder is significantly faster if you are using an SSD. Otherwise i/o usually becomes the bottleneck. +- Using multiple threads can give you some gains but avoid more than 4 as this will cause i/o bottleneck (even on my NVMe SSD!). +- If i/o is your bottleneck, using the JPEG format can lead to significant gains. +- PNG format is pretty slow, this is because of the compression. +- If you want to know the best settings (most settings will be fine anyway) you can clone the project and run `python tests.py` to get timings. + +## Limitations / known issues + +- A relatively big PDF will use up all your memory and cause the process to be killed (unless you use an output folder) +- Sometimes fail read pdf signed using DocuSign, [Solution for DocuSign issue.](docs/installation.md) + + diff --git a/.venv/lib/python3.12/site-packages/pdf2image-1.17.0.dist-info/RECORD b/.venv/lib/python3.12/site-packages/pdf2image-1.17.0.dist-info/RECORD new file mode 100644 index 00000000..8e1584bd --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pdf2image-1.17.0.dist-info/RECORD @@ -0,0 +1,17 @@ +pdf2image-1.17.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+pdf2image-1.17.0.dist-info/LICENSE,sha256=ZFGkknXd85O54RPnDNw1jbNeQ1bJf9logYPkm8jiRAk,1071
+pdf2image-1.17.0.dist-info/METADATA,sha256=iRYEWdfC-rgKeI3OprmaNrApSUiH4LCzeG6CMomyAhU,6180
+pdf2image-1.17.0.dist-info/RECORD,,
+pdf2image-1.17.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
+pdf2image-1.17.0.dist-info/top_level.txt,sha256=NU3R3bbyOYsKIEHWDiAxGPlHDGtDnlV6J-ZlbM8VSfQ,10
+pdf2image/__init__.py,sha256=6ctYFqFPJsWqMW7pwEz_fiLXzPHtp0TN371OPaex5jk,298
+pdf2image/__pycache__/__init__.cpython-312.pyc,,
+pdf2image/__pycache__/exceptions.cpython-312.pyc,,
+pdf2image/__pycache__/generators.cpython-312.pyc,,
+pdf2image/__pycache__/parsers.cpython-312.pyc,,
+pdf2image/__pycache__/pdf2image.cpython-312.pyc,,
+pdf2image/exceptions.py,sha256=vcxscKFBkqbxiaiUtYIfJZkEc3PRUkUK_NzGw9OuIlQ,623
+pdf2image/generators.py,sha256=tp_LGvNaAuj9e8_lpLhDWF2COsAWtNtq3R4JTCxImpI,917
+pdf2image/parsers.py,sha256=0rDCXKJH2uoxgXyBG8TGCuhTjCX-mzOo7LpIlqMXJq4,2609
+pdf2image/pdf2image.py,sha256=MRinD8xTdbMSkJgIU32y-xaDyBu6To64RpbxguZp3Y8,24642
+pdf2image/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
diff --git a/.venv/lib/python3.12/site-packages/pdf2image-1.17.0.dist-info/WHEEL b/.venv/lib/python3.12/site-packages/pdf2image-1.17.0.dist-info/WHEEL new file mode 100644 index 00000000..becc9a66 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pdf2image-1.17.0.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.37.1) +Root-Is-Purelib: true +Tag: py3-none-any + diff --git a/.venv/lib/python3.12/site-packages/pdf2image-1.17.0.dist-info/top_level.txt b/.venv/lib/python3.12/site-packages/pdf2image-1.17.0.dist-info/top_level.txt new file mode 100644 index 00000000..721b8bd5 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/pdf2image-1.17.0.dist-info/top_level.txt @@ -0,0 +1 @@ +pdf2image |