aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/docx/oxml/coreprops.py
blob: 8ba9ff42e0a107c035477ebbe5842108adbfc028 (about) (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
"""Custom element classes for core properties-related XML elements."""

from __future__ import annotations

import datetime as dt
import re
from typing import TYPE_CHECKING, Any, Callable

from docx.oxml.ns import nsdecls, qn
from docx.oxml.parser import parse_xml
from docx.oxml.xmlchemy import BaseOxmlElement, ZeroOrOne

if TYPE_CHECKING:
    from lxml.etree import _Element as etree_Element  # pyright: ignore[reportPrivateUsage]


class CT_CoreProperties(BaseOxmlElement):
    """`<cp:coreProperties>` element, the root element of the Core Properties part.

    Stored as `/docProps/core.xml`. Implements many of the Dublin Core document metadata
    elements. String elements resolve to an empty string ("") if the element is not
    present in the XML. String elements are limited in length to 255 unicode characters.
    """

    get_or_add_revision: Callable[[], etree_Element]

    category = ZeroOrOne("cp:category", successors=())
    contentStatus = ZeroOrOne("cp:contentStatus", successors=())
    created = ZeroOrOne("dcterms:created", successors=())
    creator = ZeroOrOne("dc:creator", successors=())
    description = ZeroOrOne("dc:description", successors=())
    identifier = ZeroOrOne("dc:identifier", successors=())
    keywords = ZeroOrOne("cp:keywords", successors=())
    language = ZeroOrOne("dc:language", successors=())
    lastModifiedBy = ZeroOrOne("cp:lastModifiedBy", successors=())
    lastPrinted = ZeroOrOne("cp:lastPrinted", successors=())
    modified = ZeroOrOne("dcterms:modified", successors=())
    revision: etree_Element | None = ZeroOrOne(  # pyright: ignore[reportAssignmentType]
        "cp:revision", successors=()
    )
    subject = ZeroOrOne("dc:subject", successors=())
    title = ZeroOrOne("dc:title", successors=())
    version = ZeroOrOne("cp:version", successors=())

    _coreProperties_tmpl = "<cp:coreProperties %s/>\n" % nsdecls("cp", "dc", "dcterms")

    @classmethod
    def new(cls):
        """Return a new `<cp:coreProperties>` element."""
        xml = cls._coreProperties_tmpl
        coreProperties = parse_xml(xml)
        return coreProperties

    @property
    def author_text(self):
        """The text in the `dc:creator` child element."""
        return self._text_of_element("creator")

    @author_text.setter
    def author_text(self, value: str):
        self._set_element_text("creator", value)

    @property
    def category_text(self) -> str:
        return self._text_of_element("category")

    @category_text.setter
    def category_text(self, value: str):
        self._set_element_text("category", value)

    @property
    def comments_text(self) -> str:
        return self._text_of_element("description")

    @comments_text.setter
    def comments_text(self, value: str):
        self._set_element_text("description", value)

    @property
    def contentStatus_text(self):
        return self._text_of_element("contentStatus")

    @contentStatus_text.setter
    def contentStatus_text(self, value: str):
        self._set_element_text("contentStatus", value)

    @property
    def created_datetime(self):
        return self._datetime_of_element("created")

    @created_datetime.setter
    def created_datetime(self, value: dt.datetime):
        self._set_element_datetime("created", value)

    @property
    def identifier_text(self):
        return self._text_of_element("identifier")

    @identifier_text.setter
    def identifier_text(self, value: str):
        self._set_element_text("identifier", value)

    @property
    def keywords_text(self):
        return self._text_of_element("keywords")

    @keywords_text.setter
    def keywords_text(self, value: str):
        self._set_element_text("keywords", value)

    @property
    def language_text(self):
        return self._text_of_element("language")

    @language_text.setter
    def language_text(self, value: str):
        self._set_element_text("language", value)

    @property
    def lastModifiedBy_text(self):
        return self._text_of_element("lastModifiedBy")

    @lastModifiedBy_text.setter
    def lastModifiedBy_text(self, value: str):
        self._set_element_text("lastModifiedBy", value)

    @property
    def lastPrinted_datetime(self):
        return self._datetime_of_element("lastPrinted")

    @lastPrinted_datetime.setter
    def lastPrinted_datetime(self, value: dt.datetime):
        self._set_element_datetime("lastPrinted", value)

    @property
    def modified_datetime(self) -> dt.datetime | None:
        return self._datetime_of_element("modified")

    @modified_datetime.setter
    def modified_datetime(self, value: dt.datetime):
        self._set_element_datetime("modified", value)

    @property
    def revision_number(self):
        """Integer value of revision property."""
        revision = self.revision
        if revision is None:
            return 0
        revision_str = str(revision.text)
        try:
            revision = int(revision_str)
        except ValueError:
            # non-integer revision strings also resolve to 0
            revision = 0
        # as do negative integers
        if revision < 0:
            revision = 0
        return revision

    @revision_number.setter
    def revision_number(self, value: int):
        """Set revision property to string value of integer `value`."""
        if not isinstance(value, int) or value < 1:  # pyright: ignore[reportUnnecessaryIsInstance]
            tmpl = "revision property requires positive int, got '%s'"
            raise ValueError(tmpl % value)
        revision = self.get_or_add_revision()
        revision.text = str(value)

    @property
    def subject_text(self):
        return self._text_of_element("subject")

    @subject_text.setter
    def subject_text(self, value: str):
        self._set_element_text("subject", value)

    @property
    def title_text(self):
        return self._text_of_element("title")

    @title_text.setter
    def title_text(self, value: str):
        self._set_element_text("title", value)

    @property
    def version_text(self):
        return self._text_of_element("version")

    @version_text.setter
    def version_text(self, value: str):
        self._set_element_text("version", value)

    def _datetime_of_element(self, property_name: str) -> dt.datetime | None:
        element = getattr(self, property_name)
        if element is None:
            return None
        datetime_str = element.text
        try:
            return self._parse_W3CDTF_to_datetime(datetime_str)
        except ValueError:
            # invalid datetime strings are ignored
            return None

    def _get_or_add(self, prop_name: str) -> BaseOxmlElement:
        """Return element returned by "get_or_add_" method for `prop_name`."""
        get_or_add_method_name = "get_or_add_%s" % prop_name
        get_or_add_method = getattr(self, get_or_add_method_name)
        element = get_or_add_method()
        return element

    @classmethod
    def _offset_dt(cls, dt_: dt.datetime, offset_str: str) -> dt.datetime:
        """A |datetime| instance offset from `dt_` by timezone offset in `offset_str`.

        `offset_str` is like `"-07:00"`.
        """
        match = cls._offset_pattern.match(offset_str)
        if match is None:
            raise ValueError("'%s' is not a valid offset string" % offset_str)
        sign, hours_str, minutes_str = match.groups()
        sign_factor = -1 if sign == "+" else 1
        hours = int(hours_str) * sign_factor
        minutes = int(minutes_str) * sign_factor
        td = dt.timedelta(hours=hours, minutes=minutes)
        return dt_ + td

    _offset_pattern = re.compile(r"([+-])(\d\d):(\d\d)")

    @classmethod
    def _parse_W3CDTF_to_datetime(cls, w3cdtf_str: str) -> dt.datetime:
        # valid W3CDTF date cases:
        # yyyy e.g. "2003"
        # yyyy-mm e.g. "2003-12"
        # yyyy-mm-dd e.g. "2003-12-31"
        # UTC timezone e.g. "2003-12-31T10:14:55Z"
        # numeric timezone e.g. "2003-12-31T10:14:55-08:00"
        templates = (
            "%Y-%m-%dT%H:%M:%S",
            "%Y-%m-%d",
            "%Y-%m",
            "%Y",
        )
        # strptime isn't smart enough to parse literal timezone offsets like
        # "-07:30", so we have to do it ourselves
        parseable_part = w3cdtf_str[:19]
        offset_str = w3cdtf_str[19:]
        dt_ = None
        for tmpl in templates:
            try:
                dt_ = dt.datetime.strptime(parseable_part, tmpl)
            except ValueError:
                continue
        if dt_ is None:
            tmpl = "could not parse W3CDTF datetime string '%s'"
            raise ValueError(tmpl % w3cdtf_str)
        if len(offset_str) == 6:
            dt_ = cls._offset_dt(dt_, offset_str)
        return dt_.replace(tzinfo=dt.timezone.utc)

    def _set_element_datetime(self, prop_name: str, value: dt.datetime):
        """Set date/time value of child element having `prop_name` to `value`."""
        if not isinstance(value, dt.datetime):  # pyright: ignore[reportUnnecessaryIsInstance]
            tmpl = "property requires <type 'datetime.datetime'> object, got %s"
            raise ValueError(tmpl % type(value))
        element = self._get_or_add(prop_name)
        dt_str = value.strftime("%Y-%m-%dT%H:%M:%SZ")
        element.text = dt_str
        if prop_name in ("created", "modified"):
            # These two require an explicit "xsi:type="dcterms:W3CDTF""
            # attribute. The first and last line are a hack required to add
            # the xsi namespace to the root element rather than each child
            # element in which it is referenced
            self.set(qn("xsi:foo"), "bar")
            element.set(qn("xsi:type"), "dcterms:W3CDTF")
            del self.attrib[qn("xsi:foo")]

    def _set_element_text(self, prop_name: str, value: Any) -> None:
        """Set string value of `name` property to `value`."""
        if not isinstance(value, str):
            value = str(value)

        if len(value) > 255:
            tmpl = "exceeded 255 char limit for property, got:\n\n'%s'"
            raise ValueError(tmpl % value)
        element = self._get_or_add(prop_name)
        element.text = value

    def _text_of_element(self, property_name: str) -> str:
        """The text in the element matching `property_name`.

        The empty string if the element is not present or contains no text.
        """
        element = getattr(self, property_name)
        if element is None:
            return ""
        if element.text is None:
            return ""
        return element.text