aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/orgparse/node.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/orgparse/node.py')
-rw-r--r--.venv/lib/python3.12/site-packages/orgparse/node.py1459
1 files changed, 1459 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/orgparse/node.py b/.venv/lib/python3.12/site-packages/orgparse/node.py
new file mode 100644
index 00000000..7ed1cdba
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/orgparse/node.py
@@ -0,0 +1,1459 @@
+import re
+import itertools
+from typing import List, Iterable, Iterator, Optional, Union, Tuple, cast, Dict, Set, Sequence, Any
+
+from .date import OrgDate, OrgDateClock, OrgDateRepeatedTask, parse_sdc, OrgDateScheduled, OrgDateDeadline, OrgDateClosed
+from .inline import to_plain_text
+from .extra import to_rich_text, Rich
+
+
+def lines_to_chunks(lines: Iterable[str]) -> Iterable[List[str]]:
+ chunk: List[str] = []
+ for l in lines:
+ if RE_NODE_HEADER.search(l):
+ yield chunk
+ chunk = []
+ chunk.append(l)
+ yield chunk
+
+RE_NODE_HEADER = re.compile(r"^\*+ ")
+
+
+def parse_heading_level(heading):
+ """
+ Get star-stripped heading and its level
+
+ >>> parse_heading_level('* Heading')
+ ('Heading', 1)
+ >>> parse_heading_level('******** Heading')
+ ('Heading', 8)
+ >>> parse_heading_level('*') # None since no space after star
+ >>> parse_heading_level('*bold*') # None
+ >>> parse_heading_level('not heading') # None
+
+ """
+ match = RE_HEADING_STARS.search(heading)
+ if match:
+ return (match.group(2), len(match.group(1)))
+
+RE_HEADING_STARS = re.compile(r'^(\*+)\s+(.*?)\s*$')
+
+
+def parse_heading_tags(heading: str) -> Tuple[str, List[str]]:
+ """
+ Get first tags and heading without tags
+
+ >>> parse_heading_tags('HEADING')
+ ('HEADING', [])
+ >>> parse_heading_tags('HEADING :TAG1:TAG2:')
+ ('HEADING', ['TAG1', 'TAG2'])
+ >>> parse_heading_tags('HEADING: this is still heading :TAG1:TAG2:')
+ ('HEADING: this is still heading', ['TAG1', 'TAG2'])
+ >>> parse_heading_tags('HEADING :@tag:_tag_:')
+ ('HEADING', ['@tag', '_tag_'])
+
+ Here is the spec of tags from Org Mode manual:
+
+ Tags are normal words containing letters, numbers, ``_``, and
+ ``@``. Tags must be preceded and followed by a single colon,
+ e.g., ``:work:``.
+
+ -- (info "(org) Tags")
+
+ """
+ match = RE_HEADING_TAGS.search(heading)
+ if match:
+ heading = match.group(1)
+ tagstr = match.group(2)
+ tags = tagstr.split(':')
+ else:
+ tags = []
+ return (heading, tags)
+
+# Tags are normal words containing letters, numbers, '_', and '@'. https://orgmode.org/manual/Tags.html
+RE_HEADING_TAGS = re.compile(r'(.*?)\s*:([\w@:]+):\s*$')
+
+
+def parse_heading_todos(heading: str, todo_candidates: List[str]) -> Tuple[str, Optional[str]]:
+ """
+ Get TODO keyword and heading without TODO keyword.
+
+ >>> todos = ['TODO', 'DONE']
+ >>> parse_heading_todos('Normal heading', todos)
+ ('Normal heading', None)
+ >>> parse_heading_todos('TODO Heading', todos)
+ ('Heading', 'TODO')
+
+ """
+ for todo in todo_candidates:
+ if heading == todo:
+ return ('', todo)
+ if heading.startswith(todo + ' '):
+ return (heading[len(todo) + 1:], todo)
+ return (heading, None)
+
+
+def parse_heading_priority(heading):
+ """
+ Get priority and heading without priority field.
+
+ >>> parse_heading_priority('HEADING')
+ ('HEADING', None)
+ >>> parse_heading_priority('[#A] HEADING')
+ ('HEADING', 'A')
+ >>> parse_heading_priority('[#0] HEADING')
+ ('HEADING', '0')
+ >>> parse_heading_priority('[#A]')
+ ('', 'A')
+
+ """
+ match = RE_HEADING_PRIORITY.search(heading)
+ if match:
+ return (match.group(2), match.group(1))
+ else:
+ return (heading, None)
+
+RE_HEADING_PRIORITY = re.compile(r'^\s*\[#([A-Z0-9])\] ?(.*)$')
+
+PropertyValue = Union[str, int, float]
+def parse_property(line: str) -> Tuple[Optional[str], Optional[PropertyValue]]:
+ """
+ Get property from given string.
+
+ >>> parse_property(':Some_property: some value')
+ ('Some_property', 'some value')
+ >>> parse_property(':Effort: 1:10')
+ ('Effort', 70)
+
+ """
+ prop_key = None
+ prop_val: Optional[Union[str, int, float]] = None
+ match = RE_PROP.search(line)
+ if match:
+ prop_key = match.group(1)
+ prop_val = match.group(2)
+ if prop_key == 'Effort':
+ prop_val = parse_duration_to_minutes(prop_val)
+ return (prop_key, prop_val)
+
+RE_PROP = re.compile(r'^\s*:(.*?):\s*(.*?)\s*$')
+
+def parse_duration_to_minutes(duration: str) -> Union[float, int]:
+ """
+ Parse duration minutes from given string.
+ Convert to integer if number has no decimal points
+
+ >>> parse_duration_to_minutes('3:12')
+ 192
+ >>> parse_duration_to_minutes('1:23:45')
+ 83.75
+ >>> parse_duration_to_minutes('1y 3d 3h 4min')
+ 530464
+ >>> parse_duration_to_minutes('1d3h5min')
+ 1625
+ >>> parse_duration_to_minutes('3d 13:35')
+ 5135
+ >>> parse_duration_to_minutes('2.35h')
+ 141
+ >>> parse_duration_to_minutes('10')
+ 10
+ >>> parse_duration_to_minutes('10.')
+ 10
+ >>> parse_duration_to_minutes('1 h')
+ 60
+ >>> parse_duration_to_minutes('')
+ 0
+ """
+
+ minutes = parse_duration_to_minutes_float(duration)
+ return int(minutes) if minutes.is_integer() else minutes
+
+def parse_duration_to_minutes_float(duration: str) -> float:
+ """
+ Parse duration minutes from given string.
+ The following code is fully compatible with the 'org-duration-to-minutes' function in org mode:
+ https://github.com/emacs-mirror/emacs/blob/master/lisp/org/org-duration.el
+
+ >>> parse_duration_to_minutes_float('3:12')
+ 192.0
+ >>> parse_duration_to_minutes_float('1:23:45')
+ 83.75
+ >>> parse_duration_to_minutes_float('1y 3d 3h 4min')
+ 530464.0
+ >>> parse_duration_to_minutes_float('1d3h5min')
+ 1625.0
+ >>> parse_duration_to_minutes_float('3d 13:35')
+ 5135.0
+ >>> parse_duration_to_minutes_float('2.35h')
+ 141.0
+ >>> parse_duration_to_minutes_float('10')
+ 10.0
+ >>> parse_duration_to_minutes_float('10.')
+ 10.0
+ >>> parse_duration_to_minutes_float('1 h')
+ 60.0
+ >>> parse_duration_to_minutes_float('')
+ 0.0
+ """
+
+ match: Optional[Any]
+ if duration == "":
+ return 0.0
+ if isinstance(duration, float):
+ return float(duration)
+ if RE_ORG_DURATION_H_MM.fullmatch(duration):
+ hours, minutes, *seconds_ = map(float, duration.split(":"))
+ seconds = seconds_[0] if seconds_ else 0
+ return seconds / 60.0 + minutes + 60 * hours
+ if RE_ORG_DURATION_FULL.fullmatch(duration):
+ minutes = 0
+ for match in RE_ORG_DURATION_UNIT.finditer(duration):
+ value = float(match.group(1))
+ unit = match.group(2)
+ minutes += value * ORG_DURATION_UNITS[unit]
+ return float(minutes)
+ match = RE_ORG_DURATION_MIXED.fullmatch(duration)
+ if match:
+ units_part = match.groupdict()['A']
+ hms_part = match.groupdict()['B']
+ return parse_duration_to_minutes_float(units_part) + parse_duration_to_minutes_float(hms_part)
+ if RE_FLOAT.fullmatch(duration):
+ return float(duration)
+ raise ValueError("Invalid duration format %s" % duration)
+
+# Conversion factor to minutes for a duration.
+ORG_DURATION_UNITS = {
+ "min": 1,
+ "h": 60,
+ "d": 60 * 24,
+ "w": 60 * 24 * 7,
+ "m": 60 * 24 * 30,
+ "y": 60 * 24 * 365.25,
+}
+# Regexp matching for all units.
+ORG_DURATION_UNITS_RE = r'(%s)' % r'|'.join(ORG_DURATION_UNITS.keys())
+# Regexp matching a duration expressed with H:MM or H:MM:SS format.
+# Hours can use any number of digits.
+ORG_DURATION_H_MM_RE = r'[ \t]*[0-9]+(?::[0-9]{2}){1,2}[ \t]*'
+RE_ORG_DURATION_H_MM = re.compile(ORG_DURATION_H_MM_RE)
+# Regexp matching a duration with an unit.
+# Allowed units are defined in ORG_DURATION_UNITS.
+# Match group 1 contains the bare number.
+# Match group 2 contains the unit.
+ORG_DURATION_UNIT_RE = r'([0-9]+(?:[.][0-9]*)?)[ \t]*' + ORG_DURATION_UNITS_RE
+RE_ORG_DURATION_UNIT = re.compile(ORG_DURATION_UNIT_RE)
+# Regexp matching a duration expressed with units.
+# Allowed units are defined in ORG_DURATION_UNITS.
+ORG_DURATION_FULL_RE = r'(?:[ \t]*%s)+[ \t]*' % ORG_DURATION_UNIT_RE
+RE_ORG_DURATION_FULL = re.compile(ORG_DURATION_FULL_RE)
+# Regexp matching a duration expressed with units and H:MM or H:MM:SS format.
+# Allowed units are defined in ORG_DURATION_UNITS.
+# Match group A contains units part.
+# Match group B contains H:MM or H:MM:SS part.
+ORG_DURATION_MIXED_RE = r'(?P<A>([ \t]*%s)+)[ \t]*(?P<B>[0-9]+(?::[0-9][0-9]){1,2})[ \t]*' % ORG_DURATION_UNIT_RE
+RE_ORG_DURATION_MIXED = re.compile(ORG_DURATION_MIXED_RE)
+# Regexp matching float numbers.
+RE_FLOAT = re.compile(r'[0-9]+([.][0-9]*)?')
+
+def parse_comment(line: str): # -> Optional[Tuple[str, Sequence[str]]]: # todo wtf?? it says 'ABCMeta isn't subscriptable??'
+ """
+ Parse special comment such as ``#+SEQ_TODO``
+
+ >>> parse_comment('#+SEQ_TODO: TODO | DONE')
+ ('SEQ_TODO', ['TODO | DONE'])
+ >>> parse_comment('# not a special comment') # None
+
+ >>> parse_comment('#+FILETAGS: :tag1:tag2:')
+ ('FILETAGS', ['tag1', 'tag2'])
+ """
+ match = re.match(r'\s*#\+', line)
+ if match:
+ end = match.end(0)
+ comment = line[end:].split(':', maxsplit=1)
+ if len(comment) >= 2:
+ key = comment[0]
+ value = comment[1].strip()
+ if key.upper() == 'FILETAGS':
+ # just legacy behaviour; it seems like filetags is the only one that separated by ':'
+ # see https://orgmode.org/org.html#In_002dbuffer-Settings
+ return (key, [c.strip() for c in value.split(':') if len(c.strip()) > 0])
+ else:
+ return (key, [value])
+ return None
+
+
+def parse_seq_todo(line):
+ """
+ Parse value part of SEQ_TODO/TODO/TYP_TODO comment.
+
+ >>> parse_seq_todo('TODO | DONE')
+ (['TODO'], ['DONE'])
+ >>> parse_seq_todo(' Fred Sara Lucy Mike | DONE ')
+ (['Fred', 'Sara', 'Lucy', 'Mike'], ['DONE'])
+ >>> parse_seq_todo('| CANCELED')
+ ([], ['CANCELED'])
+ >>> parse_seq_todo('REPORT(r) BUG(b) KNOWNCAUSE(k) | FIXED(f)')
+ (['REPORT', 'BUG', 'KNOWNCAUSE'], ['FIXED'])
+
+ See also:
+
+ * (info "(org) Per-file keywords")
+ * (info "(org) Fast access to TODO states")
+
+ """
+ todo_done = line.split('|', 1)
+ if len(todo_done) == 2:
+ (todos, dones) = todo_done
+ else:
+ (todos, dones) = (line, '')
+ strip_fast_access_key = lambda x: x.split('(', 1)[0]
+ return (list(map(strip_fast_access_key, todos.split())),
+ list(map(strip_fast_access_key, dones.split())))
+
+
+class OrgEnv(object):
+
+ """
+ Information global to the file (e.g, TODO keywords).
+ """
+
+ def __init__(self, todos=['TODO'], dones=['DONE'],
+ filename='<undefined>'):
+ self._todos = list(todos)
+ self._dones = list(dones)
+ self._todo_not_specified_in_comment = True
+ self._filename = filename
+ self._nodes = []
+
+ @property
+ def nodes(self):
+ """
+ A list of org nodes.
+
+ >>> OrgEnv().nodes # default is empty (of course)
+ []
+
+ >>> from orgparse import loads
+ >>> loads('''
+ ... * Heading 1
+ ... ** Heading 2
+ ... *** Heading 3
+ ... ''').env.nodes # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
+ [<orgparse.node.OrgRootNode object at 0x...>,
+ <orgparse.node.OrgNode object at 0x...>,
+ <orgparse.node.OrgNode object at 0x...>,
+ <orgparse.node.OrgNode object at 0x...>]
+
+ """
+ return self._nodes
+
+ def add_todo_keys(self, todos, dones):
+ if self._todo_not_specified_in_comment:
+ self._todos = []
+ self._dones = []
+ self._todo_not_specified_in_comment = False
+ self._todos.extend(todos)
+ self._dones.extend(dones)
+
+ @property
+ def todo_keys(self):
+ """
+ TODO keywords defined for this document (file).
+
+ >>> env = OrgEnv()
+ >>> env.todo_keys
+ ['TODO']
+
+ """
+ return self._todos
+
+ @property
+ def done_keys(self):
+ """
+ DONE keywords defined for this document (file).
+
+ >>> env = OrgEnv()
+ >>> env.done_keys
+ ['DONE']
+
+ """
+ return self._dones
+
+ @property
+ def all_todo_keys(self):
+ """
+ All TODO keywords (including DONEs).
+
+ >>> env = OrgEnv()
+ >>> env.all_todo_keys
+ ['TODO', 'DONE']
+
+ """
+ return self._todos + self._dones
+
+ @property
+ def filename(self):
+ """
+ Return a path to the source file or similar information.
+
+ If the org objects are not loaded from a file, this value
+ will be a string of the form ``<SOME_TEXT>``.
+
+ :rtype: str
+
+ """
+ return self._filename
+
+ # parser
+
+ def from_chunks(self, chunks):
+ yield OrgRootNode.from_chunk(self, next(chunks))
+ for chunk in chunks:
+ yield OrgNode.from_chunk(self, chunk)
+
+
+class OrgBaseNode(Sequence):
+
+ """
+ Base class for :class:`OrgRootNode` and :class:`OrgNode`
+
+ .. attribute:: env
+
+ An instance of :class:`OrgEnv`.
+ All nodes in a same file shares same instance.
+
+ :class:`OrgBaseNode` is an iterable object:
+
+ >>> from orgparse import loads
+ >>> root = loads('''
+ ... * Heading 1
+ ... ** Heading 2
+ ... *** Heading 3
+ ... * Heading 4
+ ... ''')
+ >>> for node in root:
+ ... print(node)
+ <BLANKLINE>
+ * Heading 1
+ ** Heading 2
+ *** Heading 3
+ * Heading 4
+
+ Note that the first blank line is due to the root node, as
+ iteration contains the object itself. To skip that, use
+ slice access ``[1:]``:
+
+ >>> for node in root[1:]:
+ ... print(node)
+ * Heading 1
+ ** Heading 2
+ *** Heading 3
+ * Heading 4
+
+ It also supports sequence protocol.
+
+ >>> print(root[1])
+ * Heading 1
+ >>> root[0] is root # index 0 means itself
+ True
+ >>> len(root) # remember, sequence contains itself
+ 5
+
+ Note the difference between ``root[1:]`` and ``root[1]``:
+
+ >>> for node in root[1]:
+ ... print(node)
+ * Heading 1
+ ** Heading 2
+ *** Heading 3
+
+ Nodes remember the line number information (1-indexed):
+
+ >>> print(root.children[1].linenumber)
+ 5
+ """
+
+ _body_lines: List[str] # set by the child classes
+
+ def __init__(self, env, index=None) -> None:
+ """
+ Create an :class:`OrgBaseNode` object.
+
+ :type env: :class:`OrgEnv`
+ :arg env: This will be set to the :attr:`env` attribute.
+
+ """
+ self.env = env
+
+ self.linenumber = cast(int, None) # set in parse_lines
+
+ # content
+ self._lines: List[str] = []
+
+ self._properties: Dict[str, PropertyValue] = {}
+ self._timestamps: List[OrgDate] = []
+
+ # FIXME: use `index` argument to set index. (Currently it is
+ # done externally in `parse_lines`.)
+ if index is not None:
+ self._index = index
+ """
+ Index of `self` in `self.env.nodes`.
+
+ It must satisfy an equality::
+
+ self.env.nodes[self._index] is self
+
+ This value is used for quick access for iterator and
+ tree-like traversing.
+
+ """
+
+ def __iter__(self):
+ yield self
+ level = self.level
+ for node in self.env._nodes[self._index + 1:]:
+ if node.level > level:
+ yield node
+ else:
+ break
+
+ def __len__(self):
+ return sum(1 for _ in self)
+
+ def __nonzero__(self):
+ # As self.__len__ returns non-zero value always this is not
+ # needed. This function is only for performance.
+ return True
+
+ __bool__ = __nonzero__ # PY3
+
+ def __getitem__(self, key):
+ if isinstance(key, slice):
+ return itertools.islice(self, key.start, key.stop, key.step)
+ elif isinstance(key, int):
+ if key < 0:
+ key += len(self)
+ for (i, node) in enumerate(self):
+ if i == key:
+ return node
+ raise IndexError("Out of range {0}".format(key))
+ else:
+ raise TypeError("Inappropriate type {0} for {1}"
+ .format(type(key), type(self)))
+
+ # tree structure
+
+ def _find_same_level(self, iterable):
+ for node in iterable:
+ if node.level < self.level:
+ return
+ if node.level == self.level:
+ return node
+
+ @property
+ def previous_same_level(self):
+ """
+ Return previous node if exists or None otherwise.
+
+ >>> from orgparse import loads
+ >>> root = loads('''
+ ... * Node 1
+ ... * Node 2
+ ... ** Node 3
+ ... ''')
+ >>> (n1, n2, n3) = list(root[1:])
+ >>> n1.previous_same_level is None
+ True
+ >>> n2.previous_same_level is n1
+ True
+ >>> n3.previous_same_level is None # n2 is not at the same level
+ True
+
+ """
+ return self._find_same_level(reversed(self.env._nodes[:self._index]))
+
+ @property
+ def next_same_level(self):
+ """
+ Return next node if exists or None otherwise.
+
+ >>> from orgparse import loads
+ >>> root = loads('''
+ ... * Node 1
+ ... * Node 2
+ ... ** Node 3
+ ... ''')
+ >>> (n1, n2, n3) = list(root[1:])
+ >>> n1.next_same_level is n2
+ True
+ >>> n2.next_same_level is None # n3 is not at the same level
+ True
+ >>> n3.next_same_level is None
+ True
+
+ """
+ return self._find_same_level(self.env._nodes[self._index + 1:])
+
+ # FIXME: cache parent node
+ def _find_parent(self):
+ for node in reversed(self.env._nodes[:self._index]):
+ if node.level < self.level:
+ return node
+
+ def get_parent(self, max_level=None):
+ """
+ Return a parent node.
+
+ :arg int max_level:
+ In the normally structured org file, it is a level
+ of the ancestor node to return. For example,
+ ``get_parent(max_level=0)`` returns a root node.
+
+ In the general case, it specify a maximum level of the
+ desired ancestor node. If there is no ancestor node
+ whose level is equal to ``max_level``, this function
+ try to find an ancestor node which level is smaller
+ than ``max_level``.
+
+ >>> from orgparse import loads
+ >>> root = loads('''
+ ... * Node 1
+ ... ** Node 2
+ ... ** Node 3
+ ... ''')
+ >>> (n1, n2, n3) = list(root[1:])
+ >>> n1.get_parent() is root
+ True
+ >>> n2.get_parent() is n1
+ True
+ >>> n3.get_parent() is n1
+ True
+
+ For simplicity, accessing :attr:`parent` is alias of calling
+ :meth:`get_parent` without argument.
+
+ >>> n1.get_parent() is n1.parent
+ True
+ >>> root.parent is None
+ True
+
+ This is a little bit pathological situation -- but works.
+
+ >>> root = loads('''
+ ... * Node 1
+ ... *** Node 2
+ ... ** Node 3
+ ... ''')
+ >>> (n1, n2, n3) = list(root[1:])
+ >>> n1.get_parent() is root
+ True
+ >>> n2.get_parent() is n1
+ True
+ >>> n3.get_parent() is n1
+ True
+
+ Now let's play with `max_level`.
+
+ >>> root = loads('''
+ ... * Node 1 (level 1)
+ ... ** Node 2 (level 2)
+ ... *** Node 3 (level 3)
+ ... ''')
+ >>> (n1, n2, n3) = list(root[1:])
+ >>> n3.get_parent() is n2
+ True
+ >>> n3.get_parent(max_level=2) is n2 # same as default
+ True
+ >>> n3.get_parent(max_level=1) is n1
+ True
+ >>> n3.get_parent(max_level=0) is root
+ True
+
+ """
+ if max_level is None:
+ max_level = self.level - 1
+ parent = self._find_parent()
+ while parent.level > max_level:
+ parent = parent.get_parent()
+ return parent
+
+ @property
+ def parent(self):
+ """
+ Alias of :meth:`get_parent()` (calling without argument).
+ """
+ return self.get_parent()
+
+ # FIXME: cache children nodes
+ def _find_children(self):
+ nodeiter = iter(self.env._nodes[self._index + 1:])
+ try:
+ node = next(nodeiter)
+ except StopIteration:
+ return
+ if node.level <= self.level:
+ return
+ yield node
+ last_child_level = node.level
+ for node in nodeiter:
+ if node.level <= self.level:
+ return
+ if node.level <= last_child_level:
+ yield node
+ last_child_level = node.level
+
+ @property
+ def children(self):
+ """
+ A list of child nodes.
+
+ >>> from orgparse import loads
+ >>> root = loads('''
+ ... * Node 1
+ ... ** Node 2
+ ... *** Node 3
+ ... ** Node 4
+ ... ''')
+ >>> (n1, n2, n3, n4) = list(root[1:])
+ >>> (c1, c2) = n1.children
+ >>> c1 is n2
+ True
+ >>> c2 is n4
+ True
+
+ Note the difference to ``n1[1:]``, which returns the Node 3 also:
+
+ >>> (m1, m2, m3) = list(n1[1:])
+ >>> m2 is n3
+ True
+
+ """
+ return list(self._find_children())
+
+ @property
+ def root(self):
+ """
+ The root node.
+
+ >>> from orgparse import loads
+ >>> root = loads('* Node 1')
+ >>> n1 = root[1]
+ >>> n1.root is root
+ True
+
+ """
+ root = self
+ while True:
+ parent = root.get_parent()
+ if not parent:
+ return root
+ root = parent
+
+ @property
+ def properties(self) -> Dict[str, PropertyValue]:
+ """
+ Node properties as a dictionary.
+
+ >>> from orgparse import loads
+ >>> root = loads('''
+ ... * Node
+ ... :PROPERTIES:
+ ... :SomeProperty: value
+ ... :END:
+ ... ''')
+ >>> root.children[0].properties['SomeProperty']
+ 'value'
+
+ """
+ return self._properties
+
+ def get_property(self, key, val=None) -> Optional[PropertyValue]:
+ """
+ Return property named ``key`` if exists or ``val`` otherwise.
+
+ :arg str key:
+ Key of property.
+
+ :arg val:
+ Default value to return.
+
+ """
+ return self._properties.get(key, val)
+
+ # parser
+
+ @classmethod
+ def from_chunk(cls, env, lines):
+ self = cls(env)
+ self._lines = lines
+ self._parse_comments()
+ return self
+
+ def _parse_comments(self):
+ special_comments: Dict[str, List[str]] = {}
+ for line in self._lines:
+ parsed = parse_comment(line)
+ if parsed:
+ (key, vals) = parsed
+ key = key.upper() # case insensitive, so keep as uppercase
+ special_comments.setdefault(key, []).extend(vals)
+ self._special_comments = special_comments
+ # parse TODO keys and store in OrgEnv
+ for todokey in ['TODO', 'SEQ_TODO', 'TYP_TODO']:
+ for val in special_comments.get(todokey, []):
+ self.env.add_todo_keys(*parse_seq_todo(val))
+
+ def _iparse_properties(self, ilines: Iterator[str]) -> Iterator[str]:
+ self._properties = {}
+ in_property_field = False
+ for line in ilines:
+ if in_property_field:
+ if line.find(":END:") >= 0:
+ break
+ else:
+ (key, val) = parse_property(line)
+ if key is not None and val is not None:
+ self._properties.update({key: val})
+ elif line.find(":PROPERTIES:") >= 0:
+ in_property_field = True
+ else:
+ yield line
+ for line in ilines:
+ yield line
+
+ # misc
+
+ @property
+ def level(self):
+ """
+ Level of this node.
+
+ :rtype: int
+
+ """
+ raise NotImplementedError
+
+ def _get_tags(self, inher=False) -> Set[str]:
+ """
+ Return tags
+
+ :arg bool inher:
+ Mix with tags of all ancestor nodes if ``True``.
+
+ :rtype: set
+
+ """
+ return set()
+
+ @property
+ def tags(self) -> Set[str]:
+ """
+ Tags of this and parent's node.
+
+ >>> from orgparse import loads
+ >>> n2 = loads('''
+ ... * Node 1 :TAG1:
+ ... ** Node 2 :TAG2:
+ ... ''')[2]
+ >>> n2.tags == set(['TAG1', 'TAG2'])
+ True
+
+ """
+ return self._get_tags(inher=True)
+
+ @property
+ def shallow_tags(self) -> Set[str]:
+ """
+ Tags defined for this node (don't look-up parent nodes).
+
+ >>> from orgparse import loads
+ >>> n2 = loads('''
+ ... * Node 1 :TAG1:
+ ... ** Node 2 :TAG2:
+ ... ''')[2]
+ >>> n2.shallow_tags == set(['TAG2'])
+ True
+
+ """
+ return self._get_tags(inher=False)
+
+ @staticmethod
+ def _get_text(text, format='plain'):
+ if format == 'plain':
+ return to_plain_text(text)
+ elif format == 'raw':
+ return text
+ elif format == 'rich':
+ return to_rich_text(text)
+ else:
+ raise ValueError('format={0} is not supported.'.format(format))
+
+ def get_body(self, format='plain') -> str:
+ """
+ Return a string of body text.
+
+ See also: :meth:`get_heading`.
+
+ """
+ return self._get_text(
+ '\n'.join(self._body_lines), format) if self._lines else ''
+
+ @property
+ def body(self) -> str:
+ """Alias of ``.get_body(format='plain')``."""
+ return self.get_body()
+
+ @property
+ def body_rich(self) -> Iterator[Rich]:
+ r = self.get_body(format='rich')
+ return cast(Iterator[Rich], r) # meh..
+
+ @property
+ def heading(self) -> str:
+ raise NotImplementedError
+
+ def is_root(self):
+ """
+ Return ``True`` when it is a root node.
+
+ >>> from orgparse import loads
+ >>> root = loads('* Node 1')
+ >>> root.is_root()
+ True
+ >>> n1 = root[1]
+ >>> n1.is_root()
+ False
+
+ """
+ return False
+
+ def get_timestamps(self, active=False, inactive=False,
+ range=False, point=False):
+ """
+ Return a list of timestamps in the body text.
+
+ :type active: bool
+ :arg active: Include active type timestamps.
+ :type inactive: bool
+ :arg inactive: Include inactive type timestamps.
+ :type range: bool
+ :arg range: Include timestamps which has end date.
+ :type point: bool
+ :arg point: Include timestamps which has no end date.
+
+ :rtype: list of :class:`orgparse.date.OrgDate` subclasses
+
+
+ Consider the following org node:
+
+ >>> from orgparse import loads
+ >>> node = loads('''
+ ... * Node
+ ... CLOSED: [2012-02-26 Sun 21:15] SCHEDULED: <2012-02-26 Sun>
+ ... CLOCK: [2012-02-26 Sun 21:10]--[2012-02-26 Sun 21:15] => 0:05
+ ... Some inactive timestamp [2012-02-23 Thu] in body text.
+ ... Some active timestamp <2012-02-24 Fri> in body text.
+ ... Some inactive time range [2012-02-25 Sat]--[2012-02-27 Mon].
+ ... Some active time range <2012-02-26 Sun>--<2012-02-28 Tue>.
+ ... ''').children[0]
+
+ The default flags are all off, so it does not return anything.
+
+ >>> node.get_timestamps()
+ []
+
+ You can fetch appropriate timestamps using keyword arguments.
+
+ >>> node.get_timestamps(inactive=True, point=True)
+ [OrgDate((2012, 2, 23), None, False)]
+ >>> node.get_timestamps(active=True, point=True)
+ [OrgDate((2012, 2, 24))]
+ >>> node.get_timestamps(inactive=True, range=True)
+ [OrgDate((2012, 2, 25), (2012, 2, 27), False)]
+ >>> node.get_timestamps(active=True, range=True)
+ [OrgDate((2012, 2, 26), (2012, 2, 28))]
+
+ This is more complex example. Only active timestamps,
+ regardless of range/point type.
+
+ >>> node.get_timestamps(active=True, point=True, range=True)
+ [OrgDate((2012, 2, 24)), OrgDate((2012, 2, 26), (2012, 2, 28))]
+
+ """
+ return [
+ ts for ts in self._timestamps if
+ (((active and ts.is_active()) or
+ (inactive and not ts.is_active())) and
+ ((range and ts.has_end()) or
+ (point and not ts.has_end())))]
+
+ @property
+ def datelist(self):
+ """
+ Alias of ``.get_timestamps(active=True, inactive=True, point=True)``.
+
+ :rtype: list of :class:`orgparse.date.OrgDate` subclasses
+
+ >>> from orgparse import loads
+ >>> root = loads('''
+ ... * Node with point dates <2012-02-25 Sat>
+ ... CLOSED: [2012-02-25 Sat 21:15]
+ ... Some inactive timestamp [2012-02-26 Sun] in body text.
+ ... Some active timestamp <2012-02-27 Mon> in body text.
+ ... ''')
+ >>> root.children[0].datelist # doctest: +NORMALIZE_WHITESPACE
+ [OrgDate((2012, 2, 25)),
+ OrgDate((2012, 2, 26), None, False),
+ OrgDate((2012, 2, 27))]
+
+ """
+ return self.get_timestamps(active=True, inactive=True, point=True)
+
+ @property
+ def rangelist(self):
+ """
+ Alias of ``.get_timestamps(active=True, inactive=True, range=True)``.
+
+ :rtype: list of :class:`orgparse.date.OrgDate` subclasses
+
+ >>> from orgparse import loads
+ >>> root = loads('''
+ ... * Node with range dates <2012-02-25 Sat>--<2012-02-28 Tue>
+ ... CLOCK: [2012-02-26 Sun 21:10]--[2012-02-26 Sun 21:15] => 0:05
+ ... Some inactive time range [2012-02-25 Sat]--[2012-02-27 Mon].
+ ... Some active time range <2012-02-26 Sun>--<2012-02-28 Tue>.
+ ... Some time interval <2012-02-27 Mon 11:23-12:10>.
+ ... ''')
+ >>> root.children[0].rangelist # doctest: +NORMALIZE_WHITESPACE
+ [OrgDate((2012, 2, 25), (2012, 2, 28)),
+ OrgDate((2012, 2, 25), (2012, 2, 27), False),
+ OrgDate((2012, 2, 26), (2012, 2, 28)),
+ OrgDate((2012, 2, 27, 11, 23, 0), (2012, 2, 27, 12, 10, 0))]
+
+ """
+ return self.get_timestamps(active=True, inactive=True, range=True)
+
+ def __str__(self) -> str:
+ return "\n".join(self._lines)
+
+ # todo hmm, not sure if it really belongs here and not to OrgRootNode?
+ def get_file_property_list(self, property):
+ """
+ Return a list of the selected property
+ """
+ vals = self._special_comments.get(property.upper(), None)
+ return [] if vals is None else vals
+
+ def get_file_property(self, property):
+ """
+ Return a single element of the selected property or None if it doesn't exist
+ """
+ vals = self._special_comments.get(property.upper(), None)
+ if vals is None:
+ return None
+ elif len(vals) == 1:
+ return vals[0]
+ else:
+ raise RuntimeError('Multiple values for property {}: {}'.format(property, vals))
+
+
+class OrgRootNode(OrgBaseNode):
+
+ """
+ Node to represent a file. Its body contains all lines before the first
+ headline
+
+ See :class:`OrgBaseNode` for other available functions.
+ """
+
+ @property
+ def heading(self) -> str:
+ return ''
+
+ def _get_tags(self, inher=False) -> Set[str]:
+ filetags = self.get_file_property_list('FILETAGS')
+ return set(filetags)
+
+ @property
+ def level(self):
+ return 0
+
+ def get_parent(self, max_level=None):
+ return None
+
+ def is_root(self):
+ return True
+
+ # parsers
+
+ def _parse_pre(self):
+ """Call parsers which must be called before tree structuring"""
+ ilines: Iterator[str] = iter(self._lines)
+ ilines = self._iparse_properties(ilines)
+ ilines = self._iparse_timestamps(ilines)
+ self._body_lines = list(ilines)
+
+ def _iparse_timestamps(self, ilines: Iterator[str]) -> Iterator[str]:
+ self._timestamps = []
+ for line in ilines:
+ self._timestamps.extend(OrgDate.list_from_str(line))
+ yield line
+
+
+class OrgNode(OrgBaseNode):
+
+ """
+ Node to represent normal org node
+
+ See :class:`OrgBaseNode` for other available functions.
+
+ """
+
+ def __init__(self, *args, **kwds) -> None:
+ super(OrgNode, self).__init__(*args, **kwds)
+ # fixme instead of casts, should organize code in such a way that they aren't necessary
+ self._heading = cast(str, None)
+ self._level = None
+ self._tags = cast(List[str], None)
+ self._todo: Optional[str] = None
+ self._priority = None
+ self._scheduled = OrgDateScheduled(None)
+ self._deadline = OrgDateDeadline(None)
+ self._closed = OrgDateClosed(None)
+ self._clocklist: List[OrgDateClock] = []
+ self._body_lines: List[str] = []
+ self._repeated_tasks: List[OrgDateRepeatedTask] = []
+
+ # parser
+
+ def _parse_pre(self):
+ """Call parsers which must be called before tree structuring"""
+ self._parse_heading()
+ # FIXME: make the following parsers "lazy"
+ ilines: Iterator[str] = iter(self._lines)
+ try:
+ next(ilines) # skip heading
+ except StopIteration:
+ return
+ ilines = self._iparse_sdc(ilines)
+ ilines = self._iparse_clock(ilines)
+ ilines = self._iparse_properties(ilines)
+ ilines = self._iparse_repeated_tasks(ilines)
+ ilines = self._iparse_timestamps(ilines)
+ self._body_lines = list(ilines)
+
+ def _parse_heading(self) -> None:
+ heading = self._lines[0]
+ (heading, self._level) = parse_heading_level(heading)
+ (heading, self._tags) = parse_heading_tags(heading)
+ (heading, self._todo) = parse_heading_todos(
+ heading, self.env.all_todo_keys)
+ (heading, self._priority) = parse_heading_priority(heading)
+ self._heading = heading
+
+ # The following ``_iparse_*`` methods are simple generator based
+ # parser. See ``_parse_pre`` for how it is used. The principle
+ # is simple: these methods get an iterator and returns an iterator.
+ # If the item returned by the input iterator must be dedicated to
+ # the parser, do not yield the item or yield it as-is otherwise.
+
+ def _iparse_sdc(self, ilines: Iterator[str]) -> Iterator[str]:
+ """
+ Parse SCHEDULED, DEADLINE and CLOSED time tamps.
+
+ They are assumed be in the first line.
+
+ """
+ try:
+ line = next(ilines)
+ except StopIteration:
+ return
+ (self._scheduled, self._deadline, self._closed) = parse_sdc(line)
+
+ if not (self._scheduled or
+ self._deadline or
+ self._closed):
+ yield line # when none of them were found
+
+ for line in ilines:
+ yield line
+
+ def _iparse_clock(self, ilines: Iterator[str]) -> Iterator[str]:
+ self._clocklist = []
+ for line in ilines:
+ cl = OrgDateClock.from_str(line)
+ if cl:
+ self._clocklist.append(cl)
+ else:
+ yield line
+
+ def _iparse_timestamps(self, ilines: Iterator[str]) -> Iterator[str]:
+ self._timestamps = []
+ self._timestamps.extend(OrgDate.list_from_str(self._heading))
+ for l in ilines:
+ self._timestamps.extend(OrgDate.list_from_str(l))
+ yield l
+
+ def _iparse_repeated_tasks(self, ilines: Iterator[str]) -> Iterator[str]:
+ self._repeated_tasks = []
+ for line in ilines:
+ match = self._repeated_tasks_re.search(line)
+ if match:
+ # FIXME: move this parsing to OrgDateRepeatedTask.from_str
+ mdict = match.groupdict()
+ done_state = mdict['done']
+ todo_state = mdict['todo']
+ date = OrgDate.from_str(mdict['date'])
+ self._repeated_tasks.append(
+ OrgDateRepeatedTask(date.start, todo_state, done_state))
+ else:
+ yield line
+
+ _repeated_tasks_re = re.compile(
+ r'''
+ \s*- \s+
+ State \s+ "(?P<done> [^"]+)" \s+
+ from \s+ "(?P<todo> [^"]+)" \s+
+ \[ (?P<date> [^\]]+) \]''',
+ re.VERBOSE)
+
+ def get_heading(self, format='plain'):
+ """
+ Return a string of head text without tags and TODO keywords.
+
+ >>> from orgparse import loads
+ >>> node = loads('* TODO Node 1').children[0]
+ >>> node.get_heading()
+ 'Node 1'
+
+ It strips off inline markup by default (``format='plain'``).
+ You can get the original raw string by specifying
+ ``format='raw'``.
+
+ >>> node = loads('* [[link][Node 1]]').children[0]
+ >>> node.get_heading()
+ 'Node 1'
+ >>> node.get_heading(format='raw')
+ '[[link][Node 1]]'
+
+ """
+ return self._get_text(self._heading, format)
+
+ @property
+ def heading(self) -> str:
+ """Alias of ``.get_heading(format='plain')``."""
+ return self.get_heading()
+
+ @property
+ def level(self):
+ return self._level
+ """
+ Level attribute of this node. Top level node is level 1.
+
+ >>> from orgparse import loads
+ >>> root = loads('''
+ ... * Node 1
+ ... ** Node 2
+ ... ''')
+ >>> (n1, n2) = root.children
+ >>> root.level
+ 0
+ >>> n1.level
+ 1
+ >>> n2.level
+ 2
+
+ """
+
+ @property
+ def priority(self):
+ """
+ Priority attribute of this node. It is None if undefined.
+
+ >>> from orgparse import loads
+ >>> (n1, n2) = loads('''
+ ... * [#A] Node 1
+ ... * Node 2
+ ... ''').children
+ >>> n1.priority
+ 'A'
+ >>> n2.priority is None
+ True
+
+ """
+ return self._priority
+
+ def _get_tags(self, inher=False) -> Set[str]:
+ tags = set(self._tags)
+ if inher:
+ parent = self.get_parent()
+ if parent:
+ return tags | parent._get_tags(inher=True)
+ return tags
+
+ @property
+ def todo(self) -> Optional[str]:
+ """
+ A TODO keyword of this node if exists or None otherwise.
+
+ >>> from orgparse import loads
+ >>> root = loads('* TODO Node 1')
+ >>> root.children[0].todo
+ 'TODO'
+
+ """
+ return self._todo
+
+ @property
+ def scheduled(self):
+ """
+ Return scheduled timestamp
+
+ :rtype: a subclass of :class:`orgparse.date.OrgDate`
+
+ >>> from orgparse import loads
+ >>> root = loads('''
+ ... * Node
+ ... SCHEDULED: <2012-02-26 Sun>
+ ... ''')
+ >>> root.children[0].scheduled
+ OrgDateScheduled((2012, 2, 26))
+
+ """
+ return self._scheduled
+
+ @property
+ def deadline(self):
+ """
+ Return deadline timestamp.
+
+ :rtype: a subclass of :class:`orgparse.date.OrgDate`
+
+ >>> from orgparse import loads
+ >>> root = loads('''
+ ... * Node
+ ... DEADLINE: <2012-02-26 Sun>
+ ... ''')
+ >>> root.children[0].deadline
+ OrgDateDeadline((2012, 2, 26))
+
+ """
+ return self._deadline
+
+ @property
+ def closed(self):
+ """
+ Return timestamp of closed time.
+
+ :rtype: a subclass of :class:`orgparse.date.OrgDate`
+
+ >>> from orgparse import loads
+ >>> root = loads('''
+ ... * Node
+ ... CLOSED: [2012-02-26 Sun 21:15]
+ ... ''')
+ >>> root.children[0].closed
+ OrgDateClosed((2012, 2, 26, 21, 15, 0))
+
+ """
+ return self._closed
+
+ @property
+ def clock(self):
+ """
+ Return a list of clocked timestamps
+
+ :rtype: a list of a subclass of :class:`orgparse.date.OrgDate`
+
+ >>> from orgparse import loads
+ >>> root = loads('''
+ ... * Node
+ ... CLOCK: [2012-02-26 Sun 21:10]--[2012-02-26 Sun 21:15] => 0:05
+ ... ''')
+ >>> root.children[0].clock
+ [OrgDateClock((2012, 2, 26, 21, 10, 0), (2012, 2, 26, 21, 15, 0))]
+
+ """
+ return self._clocklist
+
+ def has_date(self):
+ """
+ Return ``True`` if it has any kind of timestamp
+ """
+ return (self.scheduled or
+ self.deadline or
+ self.datelist or
+ self.rangelist)
+
+ @property
+ def repeated_tasks(self):
+ """
+ Get repeated tasks marked DONE in an entry having repeater.
+
+ :rtype: list of :class:`orgparse.date.OrgDateRepeatedTask`
+
+ >>> from orgparse import loads
+ >>> node = loads('''
+ ... * TODO Pay the rent
+ ... DEADLINE: <2005-10-01 Sat +1m>
+ ... - State "DONE" from "TODO" [2005-09-01 Thu 16:10]
+ ... - State "DONE" from "TODO" [2005-08-01 Mon 19:44]
+ ... - State "DONE" from "TODO" [2005-07-01 Fri 17:27]
+ ... ''').children[0]
+ >>> node.repeated_tasks # doctest: +NORMALIZE_WHITESPACE
+ [OrgDateRepeatedTask((2005, 9, 1, 16, 10, 0), 'TODO', 'DONE'),
+ OrgDateRepeatedTask((2005, 8, 1, 19, 44, 0), 'TODO', 'DONE'),
+ OrgDateRepeatedTask((2005, 7, 1, 17, 27, 0), 'TODO', 'DONE')]
+ >>> node.repeated_tasks[0].before
+ 'TODO'
+ >>> node.repeated_tasks[0].after
+ 'DONE'
+
+ Repeated tasks in ``:LOGBOOK:`` can be fetched by the same code.
+
+ >>> node = loads('''
+ ... * TODO Pay the rent
+ ... DEADLINE: <2005-10-01 Sat +1m>
+ ... :LOGBOOK:
+ ... - State "DONE" from "TODO" [2005-09-01 Thu 16:10]
+ ... - State "DONE" from "TODO" [2005-08-01 Mon 19:44]
+ ... - State "DONE" from "TODO" [2005-07-01 Fri 17:27]
+ ... :END:
+ ... ''').children[0]
+ >>> node.repeated_tasks # doctest: +NORMALIZE_WHITESPACE
+ [OrgDateRepeatedTask((2005, 9, 1, 16, 10, 0), 'TODO', 'DONE'),
+ OrgDateRepeatedTask((2005, 8, 1, 19, 44, 0), 'TODO', 'DONE'),
+ OrgDateRepeatedTask((2005, 7, 1, 17, 27, 0), 'TODO', 'DONE')]
+
+ See: `(info "(org) Repeated tasks")
+ <http://orgmode.org/manual/Repeated-tasks.html>`_
+
+ """
+ return self._repeated_tasks
+
+
+def parse_lines(lines: Iterable[str], filename, env=None) -> OrgNode:
+ if not env:
+ env = OrgEnv(filename=filename)
+ elif env.filename != filename:
+ raise ValueError('If env is specified, filename must match')
+
+ # parse into node of list (environment will be parsed)
+ ch1, ch2 = itertools.tee(lines_to_chunks(lines))
+ linenos = itertools.accumulate(itertools.chain([0], (len(c) for c in ch1)))
+ nodes = env.from_chunks(ch2)
+ nodelist = []
+ for lineno, node in zip(linenos, nodes):
+ lineno += 1 # in text editors lines are 1-indexed
+ node.linenumber = lineno
+ nodelist.append(node)
+ # parse headings (level, TODO, TAGs, and heading)
+ nodelist[0]._index = 0
+ # parse the root node
+ nodelist[0]._parse_pre()
+ for (i, node) in enumerate(nodelist[1:], 1): # nodes except root node
+ node._index = i
+ node._parse_pre()
+ env._nodes = nodelist
+ return nodelist[0] # root