import re
import itertools
from typing import List, Iterable, Iterator, Optional, Union, Tuple, cast, Dict, Set, Sequence, Any
from .date import OrgDate, OrgDateClock, OrgDateRepeatedTask, parse_sdc, OrgDateScheduled, OrgDateDeadline, OrgDateClosed
from .inline import to_plain_text
from .extra import to_rich_text, Rich
def lines_to_chunks(lines: Iterable[str]) -> Iterable[List[str]]:
chunk: List[str] = []
for l in lines:
if RE_NODE_HEADER.search(l):
yield chunk
chunk = []
chunk.append(l)
yield chunk
RE_NODE_HEADER = re.compile(r"^\*+ ")
def parse_heading_level(heading):
"""
Get star-stripped heading and its level
>>> parse_heading_level('* Heading')
('Heading', 1)
>>> parse_heading_level('******** Heading')
('Heading', 8)
>>> parse_heading_level('*') # None since no space after star
>>> parse_heading_level('*bold*') # None
>>> parse_heading_level('not heading') # None
"""
match = RE_HEADING_STARS.search(heading)
if match:
return (match.group(2), len(match.group(1)))
RE_HEADING_STARS = re.compile(r'^(\*+)\s+(.*?)\s*$')
def parse_heading_tags(heading: str) -> Tuple[str, List[str]]:
"""
Get first tags and heading without tags
>>> parse_heading_tags('HEADING')
('HEADING', [])
>>> parse_heading_tags('HEADING :TAG1:TAG2:')
('HEADING', ['TAG1', 'TAG2'])
>>> parse_heading_tags('HEADING: this is still heading :TAG1:TAG2:')
('HEADING: this is still heading', ['TAG1', 'TAG2'])
>>> parse_heading_tags('HEADING :@tag:_tag_:')
('HEADING', ['@tag', '_tag_'])
Here is the spec of tags from Org Mode manual:
Tags are normal words containing letters, numbers, ``_``, and
``@``. Tags must be preceded and followed by a single colon,
e.g., ``:work:``.
-- (info "(org) Tags")
"""
match = RE_HEADING_TAGS.search(heading)
if match:
heading = match.group(1)
tagstr = match.group(2)
tags = tagstr.split(':')
else:
tags = []
return (heading, tags)
# Tags are normal words containing letters, numbers, '_', and '@'. https://orgmode.org/manual/Tags.html
RE_HEADING_TAGS = re.compile(r'(.*?)\s*:([\w@:]+):\s*$')
def parse_heading_todos(heading: str, todo_candidates: List[str]) -> Tuple[str, Optional[str]]:
"""
Get TODO keyword and heading without TODO keyword.
>>> todos = ['TODO', 'DONE']
>>> parse_heading_todos('Normal heading', todos)
('Normal heading', None)
>>> parse_heading_todos('TODO Heading', todos)
('Heading', 'TODO')
"""
for todo in todo_candidates:
if heading == todo:
return ('', todo)
if heading.startswith(todo + ' '):
return (heading[len(todo) + 1:], todo)
return (heading, None)
def parse_heading_priority(heading):
"""
Get priority and heading without priority field.
>>> parse_heading_priority('HEADING')
('HEADING', None)
>>> parse_heading_priority('[#A] HEADING')
('HEADING', 'A')
>>> parse_heading_priority('[#0] HEADING')
('HEADING', '0')
>>> parse_heading_priority('[#A]')
('', 'A')
"""
match = RE_HEADING_PRIORITY.search(heading)
if match:
return (match.group(2), match.group(1))
else:
return (heading, None)
RE_HEADING_PRIORITY = re.compile(r'^\s*\[#([A-Z0-9])\] ?(.*)$')
PropertyValue = Union[str, int, float]
def parse_property(line: str) -> Tuple[Optional[str], Optional[PropertyValue]]:
"""
Get property from given string.
>>> parse_property(':Some_property: some value')
('Some_property', 'some value')
>>> parse_property(':Effort: 1:10')
('Effort', 70)
"""
prop_key = None
prop_val: Optional[Union[str, int, float]] = None
match = RE_PROP.search(line)
if match:
prop_key = match.group(1)
prop_val = match.group(2)
if prop_key == 'Effort':
prop_val = parse_duration_to_minutes(prop_val)
return (prop_key, prop_val)
RE_PROP = re.compile(r'^\s*:(.*?):\s*(.*?)\s*$')
def parse_duration_to_minutes(duration: str) -> Union[float, int]:
"""
Parse duration minutes from given string.
Convert to integer if number has no decimal points
>>> parse_duration_to_minutes('3:12')
192
>>> parse_duration_to_minutes('1:23:45')
83.75
>>> parse_duration_to_minutes('1y 3d 3h 4min')
530464
>>> parse_duration_to_minutes('1d3h5min')
1625
>>> parse_duration_to_minutes('3d 13:35')
5135
>>> parse_duration_to_minutes('2.35h')
141
>>> parse_duration_to_minutes('10')
10
>>> parse_duration_to_minutes('10.')
10
>>> parse_duration_to_minutes('1 h')
60
>>> parse_duration_to_minutes('')
0
"""
minutes = parse_duration_to_minutes_float(duration)
return int(minutes) if minutes.is_integer() else minutes
def parse_duration_to_minutes_float(duration: str) -> float:
"""
Parse duration minutes from given string.
The following code is fully compatible with the 'org-duration-to-minutes' function in org mode:
https://github.com/emacs-mirror/emacs/blob/master/lisp/org/org-duration.el
>>> parse_duration_to_minutes_float('3:12')
192.0
>>> parse_duration_to_minutes_float('1:23:45')
83.75
>>> parse_duration_to_minutes_float('1y 3d 3h 4min')
530464.0
>>> parse_duration_to_minutes_float('1d3h5min')
1625.0
>>> parse_duration_to_minutes_float('3d 13:35')
5135.0
>>> parse_duration_to_minutes_float('2.35h')
141.0
>>> parse_duration_to_minutes_float('10')
10.0
>>> parse_duration_to_minutes_float('10.')
10.0
>>> parse_duration_to_minutes_float('1 h')
60.0
>>> parse_duration_to_minutes_float('')
0.0
"""
match: Optional[Any]
if duration == "":
return 0.0
if isinstance(duration, float):
return float(duration)
if RE_ORG_DURATION_H_MM.fullmatch(duration):
hours, minutes, *seconds_ = map(float, duration.split(":"))
seconds = seconds_[0] if seconds_ else 0
return seconds / 60.0 + minutes + 60 * hours
if RE_ORG_DURATION_FULL.fullmatch(duration):
minutes = 0
for match in RE_ORG_DURATION_UNIT.finditer(duration):
value = float(match.group(1))
unit = match.group(2)
minutes += value * ORG_DURATION_UNITS[unit]
return float(minutes)
match = RE_ORG_DURATION_MIXED.fullmatch(duration)
if match:
units_part = match.groupdict()['A']
hms_part = match.groupdict()['B']
return parse_duration_to_minutes_float(units_part) + parse_duration_to_minutes_float(hms_part)
if RE_FLOAT.fullmatch(duration):
return float(duration)
raise ValueError("Invalid duration format %s" % duration)
# Conversion factor to minutes for a duration.
ORG_DURATION_UNITS = {
"min": 1,
"h": 60,
"d": 60 * 24,
"w": 60 * 24 * 7,
"m": 60 * 24 * 30,
"y": 60 * 24 * 365.25,
}
# Regexp matching for all units.
ORG_DURATION_UNITS_RE = r'(%s)' % r'|'.join(ORG_DURATION_UNITS.keys())
# Regexp matching a duration expressed with H:MM or H:MM:SS format.
# Hours can use any number of digits.
ORG_DURATION_H_MM_RE = r'[ \t]*[0-9]+(?::[0-9]{2}){1,2}[ \t]*'
RE_ORG_DURATION_H_MM = re.compile(ORG_DURATION_H_MM_RE)
# Regexp matching a duration with an unit.
# Allowed units are defined in ORG_DURATION_UNITS.
# Match group 1 contains the bare number.
# Match group 2 contains the unit.
ORG_DURATION_UNIT_RE = r'([0-9]+(?:[.][0-9]*)?)[ \t]*' + ORG_DURATION_UNITS_RE
RE_ORG_DURATION_UNIT = re.compile(ORG_DURATION_UNIT_RE)
# Regexp matching a duration expressed with units.
# Allowed units are defined in ORG_DURATION_UNITS.
ORG_DURATION_FULL_RE = r'(?:[ \t]*%s)+[ \t]*' % ORG_DURATION_UNIT_RE
RE_ORG_DURATION_FULL = re.compile(ORG_DURATION_FULL_RE)
# Regexp matching a duration expressed with units and H:MM or H:MM:SS format.
# Allowed units are defined in ORG_DURATION_UNITS.
# Match group A contains units part.
# Match group B contains H:MM or H:MM:SS part.
ORG_DURATION_MIXED_RE = r'(?P<A>([ \t]*%s)+)[ \t]*(?P<B>[0-9]+(?::[0-9][0-9]){1,2})[ \t]*' % ORG_DURATION_UNIT_RE
RE_ORG_DURATION_MIXED = re.compile(ORG_DURATION_MIXED_RE)
# Regexp matching float numbers.
RE_FLOAT = re.compile(r'[0-9]+([.][0-9]*)?')
def parse_comment(line: str): # -> Optional[Tuple[str, Sequence[str]]]: # todo wtf?? it says 'ABCMeta isn't subscriptable??'
"""
Parse special comment such as ``#+SEQ_TODO``
>>> parse_comment('#+SEQ_TODO: TODO | DONE')
('SEQ_TODO', ['TODO | DONE'])
>>> parse_comment('# not a special comment') # None
>>> parse_comment('#+FILETAGS: :tag1:tag2:')
('FILETAGS', ['tag1', 'tag2'])
"""
match = re.match(r'\s*#\+', line)
if match:
end = match.end(0)
comment = line[end:].split(':', maxsplit=1)
if len(comment) >= 2:
key = comment[0]
value = comment[1].strip()
if key.upper() == 'FILETAGS':
# just legacy behaviour; it seems like filetags is the only one that separated by ':'
# see https://orgmode.org/org.html#In_002dbuffer-Settings
return (key, [c.strip() for c in value.split(':') if len(c.strip()) > 0])
else:
return (key, [value])
return None
def parse_seq_todo(line):
"""
Parse value part of SEQ_TODO/TODO/TYP_TODO comment.
>>> parse_seq_todo('TODO | DONE')
(['TODO'], ['DONE'])
>>> parse_seq_todo(' Fred Sara Lucy Mike | DONE ')
(['Fred', 'Sara', 'Lucy', 'Mike'], ['DONE'])
>>> parse_seq_todo('| CANCELED')
([], ['CANCELED'])
>>> parse_seq_todo('REPORT(r) BUG(b) KNOWNCAUSE(k) | FIXED(f)')
(['REPORT', 'BUG', 'KNOWNCAUSE'], ['FIXED'])
See also:
* (info "(org) Per-file keywords")
* (info "(org) Fast access to TODO states")
"""
todo_done = line.split('|', 1)
if len(todo_done) == 2:
(todos, dones) = todo_done
else:
(todos, dones) = (line, '')
strip_fast_access_key = lambda x: x.split('(', 1)[0]
return (list(map(strip_fast_access_key, todos.split())),
list(map(strip_fast_access_key, dones.split())))
class OrgEnv(object):
"""
Information global to the file (e.g, TODO keywords).
"""
def __init__(self, todos=['TODO'], dones=['DONE'],
filename='<undefined>'):
self._todos = list(todos)
self._dones = list(dones)
self._todo_not_specified_in_comment = True
self._filename = filename
self._nodes = []
@property
def nodes(self):
"""
A list of org nodes.
>>> OrgEnv().nodes # default is empty (of course)
[]
>>> from orgparse import loads
>>> loads('''
... * Heading 1
... ** Heading 2
... *** Heading 3
... ''').env.nodes # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
[<orgparse.node.OrgRootNode object at 0x...>,
<orgparse.node.OrgNode object at 0x...>,
<orgparse.node.OrgNode object at 0x...>,
<orgparse.node.OrgNode object at 0x...>]
"""
return self._nodes
def add_todo_keys(self, todos, dones):
if self._todo_not_specified_in_comment:
self._todos = []
self._dones = []
self._todo_not_specified_in_comment = False
self._todos.extend(todos)
self._dones.extend(dones)
@property
def todo_keys(self):
"""
TODO keywords defined for this document (file).
>>> env = OrgEnv()
>>> env.todo_keys
['TODO']
"""
return self._todos
@property
def done_keys(self):
"""
DONE keywords defined for this document (file).
>>> env = OrgEnv()
>>> env.done_keys
['DONE']
"""
return self._dones
@property
def all_todo_keys(self):
"""
All TODO keywords (including DONEs).
>>> env = OrgEnv()
>>> env.all_todo_keys
['TODO', 'DONE']
"""
return self._todos + self._dones
@property
def filename(self):
"""
Return a path to the source file or similar information.
If the org objects are not loaded from a file, this value
will be a string of the form ``<SOME_TEXT>``.
:rtype: str
"""
return self._filename
# parser
def from_chunks(self, chunks):
yield OrgRootNode.from_chunk(self, next(chunks))
for chunk in chunks:
yield OrgNode.from_chunk(self, chunk)
class OrgBaseNode(Sequence):
"""
Base class for :class:`OrgRootNode` and :class:`OrgNode`
.. attribute:: env
An instance of :class:`OrgEnv`.
All nodes in a same file shares same instance.
:class:`OrgBaseNode` is an iterable object:
>>> from orgparse import loads
>>> root = loads('''
... * Heading 1
... ** Heading 2
... *** Heading 3
... * Heading 4
... ''')
>>> for node in root:
... print(node)
<BLANKLINE>
* Heading 1
** Heading 2
*** Heading 3
* Heading 4
Note that the first blank line is due to the root node, as
iteration contains the object itself. To skip that, use
slice access ``[1:]``:
>>> for node in root[1:]:
... print(node)
* Heading 1
** Heading 2
*** Heading 3
* Heading 4
It also supports sequence protocol.
>>> print(root[1])
* Heading 1
>>> root[0] is root # index 0 means itself
True
>>> len(root) # remember, sequence contains itself
5
Note the difference between ``root[1:]`` and ``root[1]``:
>>> for node in root[1]:
... print(node)
* Heading 1
** Heading 2
*** Heading 3
Nodes remember the line number information (1-indexed):
>>> print(root.children[1].linenumber)
5
"""
_body_lines: List[str] # set by the child classes
def __init__(self, env, index=None) -> None:
"""
Create an :class:`OrgBaseNode` object.
:type env: :class:`OrgEnv`
:arg env: This will be set to the :attr:`env` attribute.
"""
self.env = env
self.linenumber = cast(int, None) # set in parse_lines
# content
self._lines: List[str] = []
self._properties: Dict[str, PropertyValue] = {}
self._timestamps: List[OrgDate] = []
# FIXME: use `index` argument to set index. (Currently it is
# done externally in `parse_lines`.)
if index is not None:
self._index = index
"""
Index of `self` in `self.env.nodes`.
It must satisfy an equality::
self.env.nodes[self._index] is self
This value is used for quick access for iterator and
tree-like traversing.
"""
def __iter__(self):
yield self
level = self.level
for node in self.env._nodes[self._index + 1:]:
if node.level > level:
yield node
else:
break
def __len__(self):
return sum(1 for _ in self)
def __nonzero__(self):
# As self.__len__ returns non-zero value always this is not
# needed. This function is only for performance.
return True
__bool__ = __nonzero__ # PY3
def __getitem__(self, key):
if isinstance(key, slice):
return itertools.islice(self, key.start, key.stop, key.step)
elif isinstance(key, int):
if key < 0:
key += len(self)
for (i, node) in enumerate(self):
if i == key:
return node
raise IndexError("Out of range {0}".format(key))
else:
raise TypeError("Inappropriate type {0} for {1}"
.format(type(key), type(self)))
# tree structure
def _find_same_level(self, iterable):
for node in iterable:
if node.level < self.level:
return
if node.level == self.level:
return node
@property
def previous_same_level(self):
"""
Return previous node if exists or None otherwise.
>>> from orgparse import loads
>>> root = loads('''
... * Node 1
... * Node 2
... ** Node 3
... ''')
>>> (n1, n2, n3) = list(root[1:])
>>> n1.previous_same_level is None
True
>>> n2.previous_same_level is n1
True
>>> n3.previous_same_level is None # n2 is not at the same level
True
"""
return self._find_same_level(reversed(self.env._nodes[:self._index]))
@property
def next_same_level(self):
"""
Return next node if exists or None otherwise.
>>> from orgparse import loads
>>> root = loads('''
... * Node 1
... * Node 2
... ** Node 3
... ''')
>>> (n1, n2, n3) = list(root[1:])
>>> n1.next_same_level is n2
True
>>> n2.next_same_level is None # n3 is not at the same level
True
>>> n3.next_same_level is None
True
"""
return self._find_same_level(self.env._nodes[self._index + 1:])
# FIXME: cache parent node
def _find_parent(self):
for node in reversed(self.env._nodes[:self._index]):
if node.level < self.level:
return node
def get_parent(self, max_level=None):
"""
Return a parent node.
:arg int max_level:
In the normally structured org file, it is a level
of the ancestor node to return. For example,
``get_parent(max_level=0)`` returns a root node.
In the general case, it specify a maximum level of the
desired ancestor node. If there is no ancestor node
whose level is equal to ``max_level``, this function
try to find an ancestor node which level is smaller
than ``max_level``.
>>> from orgparse import loads
>>> root = loads('''
... * Node 1
... ** Node 2
... ** Node 3
... ''')
>>> (n1, n2, n3) = list(root[1:])
>>> n1.get_parent() is root
True
>>> n2.get_parent() is n1
True
>>> n3.get_parent() is n1
True
For simplicity, accessing :attr:`parent` is alias of calling
:meth:`get_parent` without argument.
>>> n1.get_parent() is n1.parent
True
>>> root.parent is None
True
This is a little bit pathological situation -- but works.
>>> root = loads('''
... * Node 1
... *** Node 2
... ** Node 3
... ''')
>>> (n1, n2, n3) = list(root[1:])
>>> n1.get_parent() is root
True
>>> n2.get_parent() is n1
True
>>> n3.get_parent() is n1
True
Now let's play with `max_level`.
>>> root = loads('''
... * Node 1 (level 1)
... ** Node 2 (level 2)
... *** Node 3 (level 3)
... ''')
>>> (n1, n2, n3) = list(root[1:])
>>> n3.get_parent() is n2
True
>>> n3.get_parent(max_level=2) is n2 # same as default
True
>>> n3.get_parent(max_level=1) is n1
True
>>> n3.get_parent(max_level=0) is root
True
"""
if max_level is None:
max_level = self.level - 1
parent = self._find_parent()
while parent.level > max_level:
parent = parent.get_parent()
return parent
@property
def parent(self):
"""
Alias of :meth:`get_parent()` (calling without argument).
"""
return self.get_parent()
# FIXME: cache children nodes
def _find_children(self):
nodeiter = iter(self.env._nodes[self._index + 1:])
try:
node = next(nodeiter)
except StopIteration:
return
if node.level <= self.level:
return
yield node
last_child_level = node.level
for node in nodeiter:
if node.level <= self.level:
return
if node.level <= last_child_level:
yield node
last_child_level = node.level
@property
def children(self):
"""
A list of child nodes.
>>> from orgparse import loads
>>> root = loads('''
... * Node 1
... ** Node 2
... *** Node 3
... ** Node 4
... ''')
>>> (n1, n2, n3, n4) = list(root[1:])
>>> (c1, c2) = n1.children
>>> c1 is n2
True
>>> c2 is n4
True
Note the difference to ``n1[1:]``, which returns the Node 3 also:
>>> (m1, m2, m3) = list(n1[1:])
>>> m2 is n3
True
"""
return list(self._find_children())
@property
def root(self):
"""
The root node.
>>> from orgparse import loads
>>> root = loads('* Node 1')
>>> n1 = root[1]
>>> n1.root is root
True
"""
root = self
while True:
parent = root.get_parent()
if not parent:
return root
root = parent
@property
def properties(self) -> Dict[str, PropertyValue]:
"""
Node properties as a dictionary.
>>> from orgparse import loads
>>> root = loads('''
... * Node
... :PROPERTIES:
... :SomeProperty: value
... :END:
... ''')
>>> root.children[0].properties['SomeProperty']
'value'
"""
return self._properties
def get_property(self, key, val=None) -> Optional[PropertyValue]:
"""
Return property named ``key`` if exists or ``val`` otherwise.
:arg str key:
Key of property.
:arg val:
Default value to return.
"""
return self._properties.get(key, val)
# parser
@classmethod
def from_chunk(cls, env, lines):
self = cls(env)
self._lines = lines
self._parse_comments()
return self
def _parse_comments(self):
special_comments: Dict[str, List[str]] = {}
for line in self._lines:
parsed = parse_comment(line)
if parsed:
(key, vals) = parsed
key = key.upper() # case insensitive, so keep as uppercase
special_comments.setdefault(key, []).extend(vals)
self._special_comments = special_comments
# parse TODO keys and store in OrgEnv
for todokey in ['TODO', 'SEQ_TODO', 'TYP_TODO']:
for val in special_comments.get(todokey, []):
self.env.add_todo_keys(*parse_seq_todo(val))
def _iparse_properties(self, ilines: Iterator[str]) -> Iterator[str]:
self._properties = {}
in_property_field = False
for line in ilines:
if in_property_field:
if line.find(":END:") >= 0:
break
else:
(key, val) = parse_property(line)
if key is not None and val is not None:
self._properties.update({key: val})
elif line.find(":PROPERTIES:") >= 0:
in_property_field = True
else:
yield line
for line in ilines:
yield line
# misc
@property
def level(self):
"""
Level of this node.
:rtype: int
"""
raise NotImplementedError
def _get_tags(self, inher=False) -> Set[str]:
"""
Return tags
:arg bool inher:
Mix with tags of all ancestor nodes if ``True``.
:rtype: set
"""
return set()
@property
def tags(self) -> Set[str]:
"""
Tags of this and parent's node.
>>> from orgparse import loads
>>> n2 = loads('''
... * Node 1 :TAG1:
... ** Node 2 :TAG2:
... ''')[2]
>>> n2.tags == set(['TAG1', 'TAG2'])
True
"""
return self._get_tags(inher=True)
@property
def shallow_tags(self) -> Set[str]:
"""
Tags defined for this node (don't look-up parent nodes).
>>> from orgparse import loads
>>> n2 = loads('''
... * Node 1 :TAG1:
... ** Node 2 :TAG2:
... ''')[2]
>>> n2.shallow_tags == set(['TAG2'])
True
"""
return self._get_tags(inher=False)
@staticmethod
def _get_text(text, format='plain'):
if format == 'plain':
return to_plain_text(text)
elif format == 'raw':
return text
elif format == 'rich':
return to_rich_text(text)
else:
raise ValueError('format={0} is not supported.'.format(format))
def get_body(self, format='plain') -> str:
"""
Return a string of body text.
See also: :meth:`get_heading`.
"""
return self._get_text(
'\n'.join(self._body_lines), format) if self._lines else ''
@property
def body(self) -> str:
"""Alias of ``.get_body(format='plain')``."""
return self.get_body()
@property
def body_rich(self) -> Iterator[Rich]:
r = self.get_body(format='rich')
return cast(Iterator[Rich], r) # meh..
@property
def heading(self) -> str:
raise NotImplementedError
def is_root(self):
"""
Return ``True`` when it is a root node.
>>> from orgparse import loads
>>> root = loads('* Node 1')
>>> root.is_root()
True
>>> n1 = root[1]
>>> n1.is_root()
False
"""
return False
def get_timestamps(self, active=False, inactive=False,
range=False, point=False):
"""
Return a list of timestamps in the body text.
:type active: bool
:arg active: Include active type timestamps.
:type inactive: bool
:arg inactive: Include inactive type timestamps.
:type range: bool
:arg range: Include timestamps which has end date.
:type point: bool
:arg point: Include timestamps which has no end date.
:rtype: list of :class:`orgparse.date.OrgDate` subclasses
Consider the following org node:
>>> from orgparse import loads
>>> node = loads('''
... * Node
... CLOSED: [2012-02-26 Sun 21:15] SCHEDULED: <2012-02-26 Sun>
... CLOCK: [2012-02-26 Sun 21:10]--[2012-02-26 Sun 21:15] => 0:05
... Some inactive timestamp [2012-02-23 Thu] in body text.
... Some active timestamp <2012-02-24 Fri> in body text.
... Some inactive time range [2012-02-25 Sat]--[2012-02-27 Mon].
... Some active time range <2012-02-26 Sun>--<2012-02-28 Tue>.
... ''').children[0]
The default flags are all off, so it does not return anything.
>>> node.get_timestamps()
[]
You can fetch appropriate timestamps using keyword arguments.
>>> node.get_timestamps(inactive=True, point=True)
[OrgDate((2012, 2, 23), None, False)]
>>> node.get_timestamps(active=True, point=True)
[OrgDate((2012, 2, 24))]
>>> node.get_timestamps(inactive=True, range=True)
[OrgDate((2012, 2, 25), (2012, 2, 27), False)]
>>> node.get_timestamps(active=True, range=True)
[OrgDate((2012, 2, 26), (2012, 2, 28))]
This is more complex example. Only active timestamps,
regardless of range/point type.
>>> node.get_timestamps(active=True, point=True, range=True)
[OrgDate((2012, 2, 24)), OrgDate((2012, 2, 26), (2012, 2, 28))]
"""
return [
ts for ts in self._timestamps if
(((active and ts.is_active()) or
(inactive and not ts.is_active())) and
((range and ts.has_end()) or
(point and not ts.has_end())))]
@property
def datelist(self):
"""
Alias of ``.get_timestamps(active=True, inactive=True, point=True)``.
:rtype: list of :class:`orgparse.date.OrgDate` subclasses
>>> from orgparse import loads
>>> root = loads('''
... * Node with point dates <2012-02-25 Sat>
... CLOSED: [2012-02-25 Sat 21:15]
... Some inactive timestamp [2012-02-26 Sun] in body text.
... Some active timestamp <2012-02-27 Mon> in body text.
... ''')
>>> root.children[0].datelist # doctest: +NORMALIZE_WHITESPACE
[OrgDate((2012, 2, 25)),
OrgDate((2012, 2, 26), None, False),
OrgDate((2012, 2, 27))]
"""
return self.get_timestamps(active=True, inactive=True, point=True)
@property
def rangelist(self):
"""
Alias of ``.get_timestamps(active=True, inactive=True, range=True)``.
:rtype: list of :class:`orgparse.date.OrgDate` subclasses
>>> from orgparse import loads
>>> root = loads('''
... * Node with range dates <2012-02-25 Sat>--<2012-02-28 Tue>
... CLOCK: [2012-02-26 Sun 21:10]--[2012-02-26 Sun 21:15] => 0:05
... Some inactive time range [2012-02-25 Sat]--[2012-02-27 Mon].
... Some active time range <2012-02-26 Sun>--<2012-02-28 Tue>.
... Some time interval <2012-02-27 Mon 11:23-12:10>.
... ''')
>>> root.children[0].rangelist # doctest: +NORMALIZE_WHITESPACE
[OrgDate((2012, 2, 25), (2012, 2, 28)),
OrgDate((2012, 2, 25), (2012, 2, 27), False),
OrgDate((2012, 2, 26), (2012, 2, 28)),
OrgDate((2012, 2, 27, 11, 23, 0), (2012, 2, 27, 12, 10, 0))]
"""
return self.get_timestamps(active=True, inactive=True, range=True)
def __str__(self) -> str:
return "\n".join(self._lines)
# todo hmm, not sure if it really belongs here and not to OrgRootNode?
def get_file_property_list(self, property):
"""
Return a list of the selected property
"""
vals = self._special_comments.get(property.upper(), None)
return [] if vals is None else vals
def get_file_property(self, property):
"""
Return a single element of the selected property or None if it doesn't exist
"""
vals = self._special_comments.get(property.upper(), None)
if vals is None:
return None
elif len(vals) == 1:
return vals[0]
else:
raise RuntimeError('Multiple values for property {}: {}'.format(property, vals))
class OrgRootNode(OrgBaseNode):
"""
Node to represent a file. Its body contains all lines before the first
headline
See :class:`OrgBaseNode` for other available functions.
"""
@property
def heading(self) -> str:
return ''
def _get_tags(self, inher=False) -> Set[str]:
filetags = self.get_file_property_list('FILETAGS')
return set(filetags)
@property
def level(self):
return 0
def get_parent(self, max_level=None):
return None
def is_root(self):
return True
# parsers
def _parse_pre(self):
"""Call parsers which must be called before tree structuring"""
ilines: Iterator[str] = iter(self._lines)
ilines = self._iparse_properties(ilines)
ilines = self._iparse_timestamps(ilines)
self._body_lines = list(ilines)
def _iparse_timestamps(self, ilines: Iterator[str]) -> Iterator[str]:
self._timestamps = []
for line in ilines:
self._timestamps.extend(OrgDate.list_from_str(line))
yield line
class OrgNode(OrgBaseNode):
"""
Node to represent normal org node
See :class:`OrgBaseNode` for other available functions.
"""
def __init__(self, *args, **kwds) -> None:
super(OrgNode, self).__init__(*args, **kwds)
# fixme instead of casts, should organize code in such a way that they aren't necessary
self._heading = cast(str, None)
self._level = None
self._tags = cast(List[str], None)
self._todo: Optional[str] = None
self._priority = None
self._scheduled = OrgDateScheduled(None)
self._deadline = OrgDateDeadline(None)
self._closed = OrgDateClosed(None)
self._clocklist: List[OrgDateClock] = []
self._body_lines: List[str] = []
self._repeated_tasks: List[OrgDateRepeatedTask] = []
# parser
def _parse_pre(self):
"""Call parsers which must be called before tree structuring"""
self._parse_heading()
# FIXME: make the following parsers "lazy"
ilines: Iterator[str] = iter(self._lines)
try:
next(ilines) # skip heading
except StopIteration:
return
ilines = self._iparse_sdc(ilines)
ilines = self._iparse_clock(ilines)
ilines = self._iparse_properties(ilines)
ilines = self._iparse_repeated_tasks(ilines)
ilines = self._iparse_timestamps(ilines)
self._body_lines = list(ilines)
def _parse_heading(self) -> None:
heading = self._lines[0]
(heading, self._level) = parse_heading_level(heading)
(heading, self._tags) = parse_heading_tags(heading)
(heading, self._todo) = parse_heading_todos(
heading, self.env.all_todo_keys)
(heading, self._priority) = parse_heading_priority(heading)
self._heading = heading
# The following ``_iparse_*`` methods are simple generator based
# parser. See ``_parse_pre`` for how it is used. The principle
# is simple: these methods get an iterator and returns an iterator.
# If the item returned by the input iterator must be dedicated to
# the parser, do not yield the item or yield it as-is otherwise.
def _iparse_sdc(self, ilines: Iterator[str]) -> Iterator[str]:
"""
Parse SCHEDULED, DEADLINE and CLOSED time tamps.
They are assumed be in the first line.
"""
try:
line = next(ilines)
except StopIteration:
return
(self._scheduled, self._deadline, self._closed) = parse_sdc(line)
if not (self._scheduled or
self._deadline or
self._closed):
yield line # when none of them were found
for line in ilines:
yield line
def _iparse_clock(self, ilines: Iterator[str]) -> Iterator[str]:
self._clocklist = []
for line in ilines:
cl = OrgDateClock.from_str(line)
if cl:
self._clocklist.append(cl)
else:
yield line
def _iparse_timestamps(self, ilines: Iterator[str]) -> Iterator[str]:
self._timestamps = []
self._timestamps.extend(OrgDate.list_from_str(self._heading))
for l in ilines:
self._timestamps.extend(OrgDate.list_from_str(l))
yield l
def _iparse_repeated_tasks(self, ilines: Iterator[str]) -> Iterator[str]:
self._repeated_tasks = []
for line in ilines:
match = self._repeated_tasks_re.search(line)
if match:
# FIXME: move this parsing to OrgDateRepeatedTask.from_str
mdict = match.groupdict()
done_state = mdict['done']
todo_state = mdict['todo']
date = OrgDate.from_str(mdict['date'])
self._repeated_tasks.append(
OrgDateRepeatedTask(date.start, todo_state, done_state))
else:
yield line
_repeated_tasks_re = re.compile(
r'''
\s*- \s+
State \s+ "(?P<done> [^"]+)" \s+
from \s+ "(?P<todo> [^"]+)" \s+
\[ (?P<date> [^\]]+) \]''',
re.VERBOSE)
def get_heading(self, format='plain'):
"""
Return a string of head text without tags and TODO keywords.
>>> from orgparse import loads
>>> node = loads('* TODO Node 1').children[0]
>>> node.get_heading()
'Node 1'
It strips off inline markup by default (``format='plain'``).
You can get the original raw string by specifying
``format='raw'``.
>>> node = loads('* [[link][Node 1]]').children[0]
>>> node.get_heading()
'Node 1'
>>> node.get_heading(format='raw')
'[[link][Node 1]]'
"""
return self._get_text(self._heading, format)
@property
def heading(self) -> str:
"""Alias of ``.get_heading(format='plain')``."""
return self.get_heading()
@property
def level(self):
return self._level
"""
Level attribute of this node. Top level node is level 1.
>>> from orgparse import loads
>>> root = loads('''
... * Node 1
... ** Node 2
... ''')
>>> (n1, n2) = root.children
>>> root.level
0
>>> n1.level
1
>>> n2.level
2
"""
@property
def priority(self):
"""
Priority attribute of this node. It is None if undefined.
>>> from orgparse import loads
>>> (n1, n2) = loads('''
... * [#A] Node 1
... * Node 2
... ''').children
>>> n1.priority
'A'
>>> n2.priority is None
True
"""
return self._priority
def _get_tags(self, inher=False) -> Set[str]:
tags = set(self._tags)
if inher:
parent = self.get_parent()
if parent:
return tags | parent._get_tags(inher=True)
return tags
@property
def todo(self) -> Optional[str]:
"""
A TODO keyword of this node if exists or None otherwise.
>>> from orgparse import loads
>>> root = loads('* TODO Node 1')
>>> root.children[0].todo
'TODO'
"""
return self._todo
@property
def scheduled(self):
"""
Return scheduled timestamp
:rtype: a subclass of :class:`orgparse.date.OrgDate`
>>> from orgparse import loads
>>> root = loads('''
... * Node
... SCHEDULED: <2012-02-26 Sun>
... ''')
>>> root.children[0].scheduled
OrgDateScheduled((2012, 2, 26))
"""
return self._scheduled
@property
def deadline(self):
"""
Return deadline timestamp.
:rtype: a subclass of :class:`orgparse.date.OrgDate`
>>> from orgparse import loads
>>> root = loads('''
... * Node
... DEADLINE: <2012-02-26 Sun>
... ''')
>>> root.children[0].deadline
OrgDateDeadline((2012, 2, 26))
"""
return self._deadline
@property
def closed(self):
"""
Return timestamp of closed time.
:rtype: a subclass of :class:`orgparse.date.OrgDate`
>>> from orgparse import loads
>>> root = loads('''
... * Node
... CLOSED: [2012-02-26 Sun 21:15]
... ''')
>>> root.children[0].closed
OrgDateClosed((2012, 2, 26, 21, 15, 0))
"""
return self._closed
@property
def clock(self):
"""
Return a list of clocked timestamps
:rtype: a list of a subclass of :class:`orgparse.date.OrgDate`
>>> from orgparse import loads
>>> root = loads('''
... * Node
... CLOCK: [2012-02-26 Sun 21:10]--[2012-02-26 Sun 21:15] => 0:05
... ''')
>>> root.children[0].clock
[OrgDateClock((2012, 2, 26, 21, 10, 0), (2012, 2, 26, 21, 15, 0))]
"""
return self._clocklist
def has_date(self):
"""
Return ``True`` if it has any kind of timestamp
"""
return (self.scheduled or
self.deadline or
self.datelist or
self.rangelist)
@property
def repeated_tasks(self):
"""
Get repeated tasks marked DONE in an entry having repeater.
:rtype: list of :class:`orgparse.date.OrgDateRepeatedTask`
>>> from orgparse import loads
>>> node = loads('''
... * TODO Pay the rent
... DEADLINE: <2005-10-01 Sat +1m>
... - State "DONE" from "TODO" [2005-09-01 Thu 16:10]
... - State "DONE" from "TODO" [2005-08-01 Mon 19:44]
... - State "DONE" from "TODO" [2005-07-01 Fri 17:27]
... ''').children[0]
>>> node.repeated_tasks # doctest: +NORMALIZE_WHITESPACE
[OrgDateRepeatedTask((2005, 9, 1, 16, 10, 0), 'TODO', 'DONE'),
OrgDateRepeatedTask((2005, 8, 1, 19, 44, 0), 'TODO', 'DONE'),
OrgDateRepeatedTask((2005, 7, 1, 17, 27, 0), 'TODO', 'DONE')]
>>> node.repeated_tasks[0].before
'TODO'
>>> node.repeated_tasks[0].after
'DONE'
Repeated tasks in ``:LOGBOOK:`` can be fetched by the same code.
>>> node = loads('''
... * TODO Pay the rent
... DEADLINE: <2005-10-01 Sat +1m>
... :LOGBOOK:
... - State "DONE" from "TODO" [2005-09-01 Thu 16:10]
... - State "DONE" from "TODO" [2005-08-01 Mon 19:44]
... - State "DONE" from "TODO" [2005-07-01 Fri 17:27]
... :END:
... ''').children[0]
>>> node.repeated_tasks # doctest: +NORMALIZE_WHITESPACE
[OrgDateRepeatedTask((2005, 9, 1, 16, 10, 0), 'TODO', 'DONE'),
OrgDateRepeatedTask((2005, 8, 1, 19, 44, 0), 'TODO', 'DONE'),
OrgDateRepeatedTask((2005, 7, 1, 17, 27, 0), 'TODO', 'DONE')]
See: `(info "(org) Repeated tasks")
<http://orgmode.org/manual/Repeated-tasks.html>`_
"""
return self._repeated_tasks
def parse_lines(lines: Iterable[str], filename, env=None) -> OrgNode:
if not env:
env = OrgEnv(filename=filename)
elif env.filename != filename:
raise ValueError('If env is specified, filename must match')
# parse into node of list (environment will be parsed)
ch1, ch2 = itertools.tee(lines_to_chunks(lines))
linenos = itertools.accumulate(itertools.chain([0], (len(c) for c in ch1)))
nodes = env.from_chunks(ch2)
nodelist = []
for lineno, node in zip(linenos, nodes):
lineno += 1 # in text editors lines are 1-indexed
node.linenumber = lineno
nodelist.append(node)
# parse headings (level, TODO, TAGs, and heading)
nodelist[0]._index = 0
# parse the root node
nodelist[0]._parse_pre()
for (i, node) in enumerate(nodelist[1:], 1): # nodes except root node
node._index = i
node._parse_pre()
env._nodes = nodelist
return nodelist[0] # root