about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/epub/__init__.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/epub/__init__.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are here HEAD master
Diffstat (limited to '.venv/lib/python3.12/site-packages/epub/__init__.py')
-rw-r--r--.venv/lib/python3.12/site-packages/epub/__init__.py423
1 files changed, 423 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/epub/__init__.py b/.venv/lib/python3.12/site-packages/epub/__init__.py
new file mode 100644
index 00000000..7f6b76ae
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/epub/__init__.py
@@ -0,0 +1,423 @@
+# -*- coding: utf-8 -*-
+"""Library to open and read files in the epub version 2."""
+from __future__ import unicode_literals
+
+
+__author__ = 'Florian Strzelecki <florian.strzelecki@gmail.com>'
+__version__ = '0.5.2'
+__all__ = ['opf', 'ncx', 'utils']
+
+
+import os
+import shutil
+import tempfile
+import uuid
+import warnings
+import zipfile
+
+
+from xml.dom import minidom
+
+from epub import ncx, opf, utils
+
+
+MIMETYPE_EPUB = 'application/epub+zip'
+MIMETYPE_OPF = 'application/oebps-package+xml'
+MIMETYPE_NCX = 'application/x-dtbncx+xml'
+
+DEFAULT_OPF_PATH = 'OEBPS/content.opf'
+DEFAULT_NCX_PATH = 'toc.ncx'
+
+
+def open(filename, mode=None):
+    """Open an epub file and return an EpubFile object"""
+    warnings.warn('Function `epub.open` is deprecated since 0.5.0.',
+                  DeprecationWarning)
+    return open_epub(filename, mode)
+
+
+def open_epub(filename, mode=None):
+    return EpubFile(filename, mode)
+
+
+class BadEpubFile(zipfile.BadZipfile):
+    pass
+
+
+class EpubFile(zipfile.ZipFile):
+    """Represent an epub zip file, as described in version 2.0.1 of epub spec.
+
+    This class allow an access throught a low-level API to the epub real file.
+    It extends zipfile.ZipFile class and modify only a little some of its
+    behavior.
+
+    See http://idpf.org/epub/201 for more information about Epub 2.0.1.
+
+    """
+    @property
+    def content_path(self):
+        """Return the content path, ie, the path relative to OPF file.
+
+        If OPF file is located in `OEBPS/content.opf`, then `content_path` is
+        equal to `OEBPS`.
+
+        """
+        return os.path.dirname(self.opf_path).replace('\\', '/')
+
+    def __init__(self, filename, mode=None):
+        """Open the Epub zip file with mode read "r", write "w" or append "a".
+        """
+        mode = mode or 'r'
+        zipfile.ZipFile.__init__(self, filename, mode)
+        self.uid = None
+        self.opf_path = None
+        self.opf = None
+        self.toc = None
+
+        if self.mode == 'r':
+            self._init_read()
+        elif self.mode == 'w':
+            self._init_new()
+        elif self.mode == 'a':
+            if len(self.namelist()) == 0:
+                self._init_new()
+            else:
+                self._init_read()
+
+    def _init_new(self):
+        """Build an empty epub archive."""
+        # Write mimetype file: 'application/epub+zip'
+        self.writestr('mimetype', MIMETYPE_EPUB)
+        # Default path for opf
+        self.opf_path = DEFAULT_OPF_PATH
+        # Uid & Uid's id
+        uid_id = 'BookId'
+        self.uid = '%s' % uuid.uuid4()
+        # Create metadata, manifest, and spine, as minimalist as possible
+        metadata = opf.Metadata()
+        metadata.add_identifier(self.uid, uid_id, 'uid')
+        manifest = opf.Manifest()
+        manifest.add_item('ncx', 'toc.ncx', MIMETYPE_NCX)
+        spine = opf.Spine('ncx')
+        # Create Opf object
+        self.opf = opf.Opf(uid_id=uid_id,
+                           metadata=metadata, manifest=manifest, spine=spine)
+        # Create Ncx object
+        self.toc = ncx.Ncx()
+        self.toc.uid = self.uid
+
+    def _init_read(self):
+        """Get content from existing epub file"""
+        # Read container.xml to get OPF xml file path
+        xmlstring = self.read('META-INF/container.xml')
+        container_xml = minidom.parseString(xmlstring).documentElement
+
+        for element in container_xml.getElementsByTagName('rootfile'):
+            if element.getAttribute('media-type') == MIMETYPE_OPF:
+                # Only take the first full-path available
+                self.opf_path = element.getAttribute('full-path')
+                break
+
+        # Read OPF xml file
+        xml_string = self.read(self.opf_path)
+        self.opf = opf.parse_opf(xml_string)
+        uids = [x for x in self.opf.metadata.identifiers
+                      if x[1] == self.opf.uid_id]
+        if uids:
+            self.uid = uids[0]
+        else:
+            self.uid = None
+            warnings.warn('The ePub does not define any uid', SyntaxWarning)
+
+        item_toc = self.get_item(self.opf.spine.toc)
+
+        # Inspect NCX toc file
+        self.toc = None
+        if item_toc is not None:
+            self.toc = ncx.parse_toc(self.read_item(item_toc))
+        else:
+            warnings.warn('The ePub does not define any NCX file',
+                          SyntaxWarning)
+            self.toc = ncx.Ncx()
+            self.toc.uid = self.uid
+
+    def close(self):
+        if self.fp is None:
+            return
+        if self.mode in ('w', 'a'):
+            self._write_close()
+        zipfile.ZipFile.close(self)
+
+    def remove_paths(self, paths):
+        """Remove files from the archive
+
+        Warning: This will be slow, it needs to recreate from scratch the
+        complete archive.
+
+        This method (well, the whole behavior of "write epub file") needs
+        a rework in a future version.
+
+        """
+        with tempfile.NamedTemporaryFile('rb', delete=False) as temp:
+            with zipfile.ZipFile(temp.name, 'w') as new_zip:
+                for item in self.infolist():
+                    if item.filename not in paths:
+                        new_zip.writestr(item, self.read(item.filename))
+            zipfile.ZipFile.close(self)
+            shutil.move(temp.name, self.filename)
+            zipfile.ZipFile.__init__(self, self.filename, self.mode)
+
+    def _write_close(self):
+        """Handle writes when closing epub.
+
+        Both new file mode (w) and append file mode (a), some files must be
+        generated: container, OPF, and NCX.
+
+        """
+        item_toc = self.get_item(self.opf.spine.toc)
+
+        # Remove the old files
+        to_remove = ['META-INF/container.xml', self.opf_path]
+        if item_toc:
+            to_remove.append(
+                # Replace \ by /, no matter what OS's separator could be
+                os.path.join(self.content_path,
+                             item_toc.href).replace('\\', '/')
+            )
+
+        self.remove_paths(to_remove)
+
+        # Write META-INF/container.xml
+        self.writestr('META-INF/container.xml',
+                      self._build_container().encode('utf-8'))
+        # Write OPF File
+        self.writestr(self.opf_path,
+                      self.opf.as_xml_document().toxml().encode('utf-8'))
+        # Write NCX File if exist
+        if item_toc:
+            toc_path = os.path.join(
+                self.content_path, item_toc.href
+            ).replace('\\', '/')
+            toc_content = self.toc.as_xml_document().toxml().encode('utf-8')
+
+            self.writestr(toc_path, toc_content)
+
+    def _build_container(self):
+        """Build a simple XML container as in epub 2.0.1 specification."""
+        template = """<?xml version="1.0" encoding="UTF-8"?>
+    <container version="1.0"
+               xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
+        <rootfiles>
+             <rootfile full-path="%s"
+                       media-type="application/oebps-package+xml"/>
+        </rootfiles>
+    </container>"""
+        return template % self.opf_path
+
+    def add_item(self, filename, manifest_item,
+                 append_to_spine=False, is_linear=True):
+        """Add a file to epub.
+
+        A manifest item must be provide to describe it.
+
+        This function will raise a RuntimeError if epub is already closed. It
+        will raise an IOError if epub is open in read-only (`r` mode).
+
+        Optional: you can use `append_to_spine` flag (default=False) to append
+        item to spine, and use `is_linear` (default=True) to specify if it is
+        linear or not.
+
+        """
+        self.check_mode_write()
+        self.opf.manifest.append(manifest_item)
+
+        write_path = os.path.join(
+            self.content_path, manifest_item.href
+        ).replace('\\', '/')
+
+        self.write(filename, write_path)
+
+        if append_to_spine:
+            self.opf.spine.add_itemref(manifest_item.identifier, is_linear)
+
+    def check_mode_write(self):
+        """Raise error if epub file is not writable.
+
+        Raise RuntimeError if file is already closed.
+
+        Raise IOError if file is opened read-only.
+
+        """
+        if not self.fp:
+            raise RuntimeError(
+                  'Attempt to write to EPUB file that was already closed')
+
+        if self.mode == 'r':
+            raise IOError(
+                  'Attempt to write to EPUB file that was open as read-only.')
+
+    # extract method is  zipfile.ZipFile.extract(member[, path[, pwd]])
+
+    def extract_item(self, item, to_path=None):
+        """Extract an item from its href in epub to `to_path` location.
+        """
+        path = item if not hasattr(item, 'href') else item.href
+        member_path = os.path.join(self.content_path, path).replace('\\', '/')
+
+        return  self.extract(member=member_path, path=to_path)
+
+    def get_item(self, identifier):
+        """Get an item from manifest through its "id" attribute.
+
+        Return an EpubManifestItem if found, else None.
+
+        """
+        return self.opf.manifest.get(identifier, None)
+
+    def get_item_by_href(self, href):
+        """Get an item from manifest through its "href" attribute.
+
+        Return an EpubManifestItem if found, else None.
+
+        """
+        found = [x for x in self.opf.manifest.values() if x.href == href]
+        size = len(found)
+        if size == 1:
+            return found[0]
+        elif size > 1:
+            raise LookupError('Multiple items are found with this href.')
+        else:
+            return None
+
+    # read method is zipfile.ZipFile.read(path)
+
+    def read_item(self, item):
+        """Read a file from the epub zipfile container.
+
+        "item" parameter can be the relative path to the opf file or an
+        EpubManifestItem object.
+
+        Html fragments are not acceptable : the path must be exactly the same
+        as indicated in the opf file.
+
+        """
+        path = item
+        if hasattr(item, 'href'):
+            path = item.href
+
+        return self.read(
+            # Replace \ by /, as ZipFile always uses / as path separator.
+            os.path.join(self.content_path, path).replace('\\', '/')
+        )
+
+
+class Book(object):
+    """This class is an attempt to expose a simpler object model than EpubFile.
+
+    WARNING: Work in progress. Use with caution.
+
+    """
+
+    def __init__(self, epub_file):
+        self.epub_file = epub_file
+
+    @property
+    def creators(self):
+        return self.epub_file.opf.metadata.creators
+
+    @property
+    def description(self):
+        return self.epub_file.opf.metadata.description
+
+    @property
+    def isbn(self):
+        return self.epub_file.opf.metadata.get_isbn()
+
+    @property
+    def publisher(self):
+        return self.epub_file.opf.metadata.publisher
+
+    @property
+    def contributors(self):
+        return self.epub_file.opf.metadata.contributors
+
+    @property
+    def dates(self):
+        return self.epub_file.opf.metadata.dates
+
+    @property
+    def dc_type(self):
+        return self.epub_file.opf.metadata.dc_type
+
+    @property
+    def dc_format(self):
+        return self.epub_file.opf.metadata.format
+
+    @property
+    def identifiers(self):
+        return self.epub_file.opf.metadata.identifiers
+
+    @property
+    def source(self):
+        return self.epub_file.opf.metadata.source
+
+    @property
+    def languages(self):
+        return self.epub_file.opf.metadata.languages
+
+    @property
+    def relation(self):
+        return self.epub_file.opf.metadata.relation
+
+    @property
+    def coverage(self):
+        return self.epub_file.opf.metadata.coverage
+
+    @property
+    def right(self):
+        return self.epub_file.opf.metadata.right
+
+    @property
+    def metas(self):
+        return self.epub_file.opf.metadata.metas
+
+    @property
+    def subjects(self):
+        return self.epub_file.opf.metadata.subjects
+
+    @property
+    def titles(self):
+        return self.epub_file.opf.metadata.titles
+
+    @property
+    def chapters(self):
+        """
+        Return a list of linear chapter from spine.
+        """
+        return [BookChapter(self, identifier)
+                for identifier, linear in self.epub_file.opf.spine.itemrefs
+                if linear]
+
+    @property
+    def extra_chapters(self):
+        """
+        Return a list of non-linear chapter from spine.
+        """
+        return [BookChapter(self, identifier)
+                for identifier, linear in self.epub_file.opf.spine.itemrefs
+                if not linear]
+
+
+class BookChapter(object):
+
+    @property
+    def identifier(self):
+        return self._manifest_item.identifier
+
+    def __init__(self, book, identifier, fragment=None):
+        self._book = book
+        self._manifest_item = self._book.epub_file.get_item(identifier)
+        self._fragment = fragment
+
+    def read(self):
+        return self._book.epub_file.read_item(self._manifest_item)