import re import itertools from typing import List, Iterable, Iterator, Optional, Union, Tuple, cast, Dict, Set, Sequence, Any from .date import OrgDate, OrgDateClock, OrgDateRepeatedTask, parse_sdc, OrgDateScheduled, OrgDateDeadline, OrgDateClosed from .inline import to_plain_text from .extra import to_rich_text, Rich def lines_to_chunks(lines: Iterable[str]) -> Iterable[List[str]]: chunk: List[str] = [] for l in lines: if RE_NODE_HEADER.search(l): yield chunk chunk = [] chunk.append(l) yield chunk RE_NODE_HEADER = re.compile(r"^\*+ ") def parse_heading_level(heading): """ Get star-stripped heading and its level >>> parse_heading_level('* Heading') ('Heading', 1) >>> parse_heading_level('******** Heading') ('Heading', 8) >>> parse_heading_level('*') # None since no space after star >>> parse_heading_level('*bold*') # None >>> parse_heading_level('not heading') # None """ match = RE_HEADING_STARS.search(heading) if match: return (match.group(2), len(match.group(1))) RE_HEADING_STARS = re.compile(r'^(\*+)\s+(.*?)\s*$') def parse_heading_tags(heading: str) -> Tuple[str, List[str]]: """ Get first tags and heading without tags >>> parse_heading_tags('HEADING') ('HEADING', []) >>> parse_heading_tags('HEADING :TAG1:TAG2:') ('HEADING', ['TAG1', 'TAG2']) >>> parse_heading_tags('HEADING: this is still heading :TAG1:TAG2:') ('HEADING: this is still heading', ['TAG1', 'TAG2']) >>> parse_heading_tags('HEADING :@tag:_tag_:') ('HEADING', ['@tag', '_tag_']) Here is the spec of tags from Org Mode manual: Tags are normal words containing letters, numbers, ``_``, and ``@``. Tags must be preceded and followed by a single colon, e.g., ``:work:``. -- (info "(org) Tags") """ match = RE_HEADING_TAGS.search(heading) if match: heading = match.group(1) tagstr = match.group(2) tags = tagstr.split(':') else: tags = [] return (heading, tags) # Tags are normal words containing letters, numbers, '_', and '@'. https://orgmode.org/manual/Tags.html RE_HEADING_TAGS = re.compile(r'(.*?)\s*:([\w@:]+):\s*$') def parse_heading_todos(heading: str, todo_candidates: List[str]) -> Tuple[str, Optional[str]]: """ Get TODO keyword and heading without TODO keyword. >>> todos = ['TODO', 'DONE'] >>> parse_heading_todos('Normal heading', todos) ('Normal heading', None) >>> parse_heading_todos('TODO Heading', todos) ('Heading', 'TODO') """ for todo in todo_candidates: if heading == todo: return ('', todo) if heading.startswith(todo + ' '): return (heading[len(todo) + 1:], todo) return (heading, None) def parse_heading_priority(heading): """ Get priority and heading without priority field. >>> parse_heading_priority('HEADING') ('HEADING', None) >>> parse_heading_priority('[#A] HEADING') ('HEADING', 'A') >>> parse_heading_priority('[#0] HEADING') ('HEADING', '0') >>> parse_heading_priority('[#A]') ('', 'A') """ match = RE_HEADING_PRIORITY.search(heading) if match: return (match.group(2), match.group(1)) else: return (heading, None) RE_HEADING_PRIORITY = re.compile(r'^\s*\[#([A-Z0-9])\] ?(.*)$') PropertyValue = Union[str, int, float] def parse_property(line: str) -> Tuple[Optional[str], Optional[PropertyValue]]: """ Get property from given string. >>> parse_property(':Some_property: some value') ('Some_property', 'some value') >>> parse_property(':Effort: 1:10') ('Effort', 70) """ prop_key = None prop_val: Optional[Union[str, int, float]] = None match = RE_PROP.search(line) if match: prop_key = match.group(1) prop_val = match.group(2) if prop_key == 'Effort': prop_val = parse_duration_to_minutes(prop_val) return (prop_key, prop_val) RE_PROP = re.compile(r'^\s*:(.*?):\s*(.*?)\s*$') def parse_duration_to_minutes(duration: str) -> Union[float, int]: """ Parse duration minutes from given string. Convert to integer if number has no decimal points >>> parse_duration_to_minutes('3:12') 192 >>> parse_duration_to_minutes('1:23:45') 83.75 >>> parse_duration_to_minutes('1y 3d 3h 4min') 530464 >>> parse_duration_to_minutes('1d3h5min') 1625 >>> parse_duration_to_minutes('3d 13:35') 5135 >>> parse_duration_to_minutes('2.35h') 141 >>> parse_duration_to_minutes('10') 10 >>> parse_duration_to_minutes('10.') 10 >>> parse_duration_to_minutes('1 h') 60 >>> parse_duration_to_minutes('') 0 """ minutes = parse_duration_to_minutes_float(duration) return int(minutes) if minutes.is_integer() else minutes def parse_duration_to_minutes_float(duration: str) -> float: """ Parse duration minutes from given string. The following code is fully compatible with the 'org-duration-to-minutes' function in org mode: https://github.com/emacs-mirror/emacs/blob/master/lisp/org/org-duration.el >>> parse_duration_to_minutes_float('3:12') 192.0 >>> parse_duration_to_minutes_float('1:23:45') 83.75 >>> parse_duration_to_minutes_float('1y 3d 3h 4min') 530464.0 >>> parse_duration_to_minutes_float('1d3h5min') 1625.0 >>> parse_duration_to_minutes_float('3d 13:35') 5135.0 >>> parse_duration_to_minutes_float('2.35h') 141.0 >>> parse_duration_to_minutes_float('10') 10.0 >>> parse_duration_to_minutes_float('10.') 10.0 >>> parse_duration_to_minutes_float('1 h') 60.0 >>> parse_duration_to_minutes_float('') 0.0 """ match: Optional[Any] if duration == "": return 0.0 if isinstance(duration, float): return float(duration) if RE_ORG_DURATION_H_MM.fullmatch(duration): hours, minutes, *seconds_ = map(float, duration.split(":")) seconds = seconds_[0] if seconds_ else 0 return seconds / 60.0 + minutes + 60 * hours if RE_ORG_DURATION_FULL.fullmatch(duration): minutes = 0 for match in RE_ORG_DURATION_UNIT.finditer(duration): value = float(match.group(1)) unit = match.group(2) minutes += value * ORG_DURATION_UNITS[unit] return float(minutes) match = RE_ORG_DURATION_MIXED.fullmatch(duration) if match: units_part = match.groupdict()['A'] hms_part = match.groupdict()['B'] return parse_duration_to_minutes_float(units_part) + parse_duration_to_minutes_float(hms_part) if RE_FLOAT.fullmatch(duration): return float(duration) raise ValueError("Invalid duration format %s" % duration) # Conversion factor to minutes for a duration. ORG_DURATION_UNITS = { "min": 1, "h": 60, "d": 60 * 24, "w": 60 * 24 * 7, "m": 60 * 24 * 30, "y": 60 * 24 * 365.25, } # Regexp matching for all units. ORG_DURATION_UNITS_RE = r'(%s)' % r'|'.join(ORG_DURATION_UNITS.keys()) # Regexp matching a duration expressed with H:MM or H:MM:SS format. # Hours can use any number of digits. ORG_DURATION_H_MM_RE = r'[ \t]*[0-9]+(?::[0-9]{2}){1,2}[ \t]*' RE_ORG_DURATION_H_MM = re.compile(ORG_DURATION_H_MM_RE) # Regexp matching a duration with an unit. # Allowed units are defined in ORG_DURATION_UNITS. # Match group 1 contains the bare number. # Match group 2 contains the unit. ORG_DURATION_UNIT_RE = r'([0-9]+(?:[.][0-9]*)?)[ \t]*' + ORG_DURATION_UNITS_RE RE_ORG_DURATION_UNIT = re.compile(ORG_DURATION_UNIT_RE) # Regexp matching a duration expressed with units. # Allowed units are defined in ORG_DURATION_UNITS. ORG_DURATION_FULL_RE = r'(?:[ \t]*%s)+[ \t]*' % ORG_DURATION_UNIT_RE RE_ORG_DURATION_FULL = re.compile(ORG_DURATION_FULL_RE) # Regexp matching a duration expressed with units and H:MM or H:MM:SS format. # Allowed units are defined in ORG_DURATION_UNITS. # Match group A contains units part. # Match group B contains H:MM or H:MM:SS part. ORG_DURATION_MIXED_RE = r'(?P([ \t]*%s)+)[ \t]*(?P[0-9]+(?::[0-9][0-9]){1,2})[ \t]*' % ORG_DURATION_UNIT_RE RE_ORG_DURATION_MIXED = re.compile(ORG_DURATION_MIXED_RE) # Regexp matching float numbers. RE_FLOAT = re.compile(r'[0-9]+([.][0-9]*)?') def parse_comment(line: str): # -> Optional[Tuple[str, Sequence[str]]]: # todo wtf?? it says 'ABCMeta isn't subscriptable??' """ Parse special comment such as ``#+SEQ_TODO`` >>> parse_comment('#+SEQ_TODO: TODO | DONE') ('SEQ_TODO', ['TODO | DONE']) >>> parse_comment('# not a special comment') # None >>> parse_comment('#+FILETAGS: :tag1:tag2:') ('FILETAGS', ['tag1', 'tag2']) """ match = re.match(r'\s*#\+', line) if match: end = match.end(0) comment = line[end:].split(':', maxsplit=1) if len(comment) >= 2: key = comment[0] value = comment[1].strip() if key.upper() == 'FILETAGS': # just legacy behaviour; it seems like filetags is the only one that separated by ':' # see https://orgmode.org/org.html#In_002dbuffer-Settings return (key, [c.strip() for c in value.split(':') if len(c.strip()) > 0]) else: return (key, [value]) return None def parse_seq_todo(line): """ Parse value part of SEQ_TODO/TODO/TYP_TODO comment. >>> parse_seq_todo('TODO | DONE') (['TODO'], ['DONE']) >>> parse_seq_todo(' Fred Sara Lucy Mike | DONE ') (['Fred', 'Sara', 'Lucy', 'Mike'], ['DONE']) >>> parse_seq_todo('| CANCELED') ([], ['CANCELED']) >>> parse_seq_todo('REPORT(r) BUG(b) KNOWNCAUSE(k) | FIXED(f)') (['REPORT', 'BUG', 'KNOWNCAUSE'], ['FIXED']) See also: * (info "(org) Per-file keywords") * (info "(org) Fast access to TODO states") """ todo_done = line.split('|', 1) if len(todo_done) == 2: (todos, dones) = todo_done else: (todos, dones) = (line, '') strip_fast_access_key = lambda x: x.split('(', 1)[0] return (list(map(strip_fast_access_key, todos.split())), list(map(strip_fast_access_key, dones.split()))) class OrgEnv(object): """ Information global to the file (e.g, TODO keywords). """ def __init__(self, todos=['TODO'], dones=['DONE'], filename=''): self._todos = list(todos) self._dones = list(dones) self._todo_not_specified_in_comment = True self._filename = filename self._nodes = [] @property def nodes(self): """ A list of org nodes. >>> OrgEnv().nodes # default is empty (of course) [] >>> from orgparse import loads >>> loads(''' ... * Heading 1 ... ** Heading 2 ... *** Heading 3 ... ''').env.nodes # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE [, , , ] """ return self._nodes def add_todo_keys(self, todos, dones): if self._todo_not_specified_in_comment: self._todos = [] self._dones = [] self._todo_not_specified_in_comment = False self._todos.extend(todos) self._dones.extend(dones) @property def todo_keys(self): """ TODO keywords defined for this document (file). >>> env = OrgEnv() >>> env.todo_keys ['TODO'] """ return self._todos @property def done_keys(self): """ DONE keywords defined for this document (file). >>> env = OrgEnv() >>> env.done_keys ['DONE'] """ return self._dones @property def all_todo_keys(self): """ All TODO keywords (including DONEs). >>> env = OrgEnv() >>> env.all_todo_keys ['TODO', 'DONE'] """ return self._todos + self._dones @property def filename(self): """ Return a path to the source file or similar information. If the org objects are not loaded from a file, this value will be a string of the form ````. :rtype: str """ return self._filename # parser def from_chunks(self, chunks): yield OrgRootNode.from_chunk(self, next(chunks)) for chunk in chunks: yield OrgNode.from_chunk(self, chunk) class OrgBaseNode(Sequence): """ Base class for :class:`OrgRootNode` and :class:`OrgNode` .. attribute:: env An instance of :class:`OrgEnv`. All nodes in a same file shares same instance. :class:`OrgBaseNode` is an iterable object: >>> from orgparse import loads >>> root = loads(''' ... * Heading 1 ... ** Heading 2 ... *** Heading 3 ... * Heading 4 ... ''') >>> for node in root: ... print(node) * Heading 1 ** Heading 2 *** Heading 3 * Heading 4 Note that the first blank line is due to the root node, as iteration contains the object itself. To skip that, use slice access ``[1:]``: >>> for node in root[1:]: ... print(node) * Heading 1 ** Heading 2 *** Heading 3 * Heading 4 It also supports sequence protocol. >>> print(root[1]) * Heading 1 >>> root[0] is root # index 0 means itself True >>> len(root) # remember, sequence contains itself 5 Note the difference between ``root[1:]`` and ``root[1]``: >>> for node in root[1]: ... print(node) * Heading 1 ** Heading 2 *** Heading 3 Nodes remember the line number information (1-indexed): >>> print(root.children[1].linenumber) 5 """ _body_lines: List[str] # set by the child classes def __init__(self, env, index=None) -> None: """ Create an :class:`OrgBaseNode` object. :type env: :class:`OrgEnv` :arg env: This will be set to the :attr:`env` attribute. """ self.env = env self.linenumber = cast(int, None) # set in parse_lines # content self._lines: List[str] = [] self._properties: Dict[str, PropertyValue] = {} self._timestamps: List[OrgDate] = [] # FIXME: use `index` argument to set index. (Currently it is # done externally in `parse_lines`.) if index is not None: self._index = index """ Index of `self` in `self.env.nodes`. It must satisfy an equality:: self.env.nodes[self._index] is self This value is used for quick access for iterator and tree-like traversing. """ def __iter__(self): yield self level = self.level for node in self.env._nodes[self._index + 1:]: if node.level > level: yield node else: break def __len__(self): return sum(1 for _ in self) def __nonzero__(self): # As self.__len__ returns non-zero value always this is not # needed. This function is only for performance. return True __bool__ = __nonzero__ # PY3 def __getitem__(self, key): if isinstance(key, slice): return itertools.islice(self, key.start, key.stop, key.step) elif isinstance(key, int): if key < 0: key += len(self) for (i, node) in enumerate(self): if i == key: return node raise IndexError("Out of range {0}".format(key)) else: raise TypeError("Inappropriate type {0} for {1}" .format(type(key), type(self))) # tree structure def _find_same_level(self, iterable): for node in iterable: if node.level < self.level: return if node.level == self.level: return node @property def previous_same_level(self): """ Return previous node if exists or None otherwise. >>> from orgparse import loads >>> root = loads(''' ... * Node 1 ... * Node 2 ... ** Node 3 ... ''') >>> (n1, n2, n3) = list(root[1:]) >>> n1.previous_same_level is None True >>> n2.previous_same_level is n1 True >>> n3.previous_same_level is None # n2 is not at the same level True """ return self._find_same_level(reversed(self.env._nodes[:self._index])) @property def next_same_level(self): """ Return next node if exists or None otherwise. >>> from orgparse import loads >>> root = loads(''' ... * Node 1 ... * Node 2 ... ** Node 3 ... ''') >>> (n1, n2, n3) = list(root[1:]) >>> n1.next_same_level is n2 True >>> n2.next_same_level is None # n3 is not at the same level True >>> n3.next_same_level is None True """ return self._find_same_level(self.env._nodes[self._index + 1:]) # FIXME: cache parent node def _find_parent(self): for node in reversed(self.env._nodes[:self._index]): if node.level < self.level: return node def get_parent(self, max_level=None): """ Return a parent node. :arg int max_level: In the normally structured org file, it is a level of the ancestor node to return. For example, ``get_parent(max_level=0)`` returns a root node. In the general case, it specify a maximum level of the desired ancestor node. If there is no ancestor node whose level is equal to ``max_level``, this function try to find an ancestor node which level is smaller than ``max_level``. >>> from orgparse import loads >>> root = loads(''' ... * Node 1 ... ** Node 2 ... ** Node 3 ... ''') >>> (n1, n2, n3) = list(root[1:]) >>> n1.get_parent() is root True >>> n2.get_parent() is n1 True >>> n3.get_parent() is n1 True For simplicity, accessing :attr:`parent` is alias of calling :meth:`get_parent` without argument. >>> n1.get_parent() is n1.parent True >>> root.parent is None True This is a little bit pathological situation -- but works. >>> root = loads(''' ... * Node 1 ... *** Node 2 ... ** Node 3 ... ''') >>> (n1, n2, n3) = list(root[1:]) >>> n1.get_parent() is root True >>> n2.get_parent() is n1 True >>> n3.get_parent() is n1 True Now let's play with `max_level`. >>> root = loads(''' ... * Node 1 (level 1) ... ** Node 2 (level 2) ... *** Node 3 (level 3) ... ''') >>> (n1, n2, n3) = list(root[1:]) >>> n3.get_parent() is n2 True >>> n3.get_parent(max_level=2) is n2 # same as default True >>> n3.get_parent(max_level=1) is n1 True >>> n3.get_parent(max_level=0) is root True """ if max_level is None: max_level = self.level - 1 parent = self._find_parent() while parent.level > max_level: parent = parent.get_parent() return parent @property def parent(self): """ Alias of :meth:`get_parent()` (calling without argument). """ return self.get_parent() # FIXME: cache children nodes def _find_children(self): nodeiter = iter(self.env._nodes[self._index + 1:]) try: node = next(nodeiter) except StopIteration: return if node.level <= self.level: return yield node last_child_level = node.level for node in nodeiter: if node.level <= self.level: return if node.level <= last_child_level: yield node last_child_level = node.level @property def children(self): """ A list of child nodes. >>> from orgparse import loads >>> root = loads(''' ... * Node 1 ... ** Node 2 ... *** Node 3 ... ** Node 4 ... ''') >>> (n1, n2, n3, n4) = list(root[1:]) >>> (c1, c2) = n1.children >>> c1 is n2 True >>> c2 is n4 True Note the difference to ``n1[1:]``, which returns the Node 3 also: >>> (m1, m2, m3) = list(n1[1:]) >>> m2 is n3 True """ return list(self._find_children()) @property def root(self): """ The root node. >>> from orgparse import loads >>> root = loads('* Node 1') >>> n1 = root[1] >>> n1.root is root True """ root = self while True: parent = root.get_parent() if not parent: return root root = parent @property def properties(self) -> Dict[str, PropertyValue]: """ Node properties as a dictionary. >>> from orgparse import loads >>> root = loads(''' ... * Node ... :PROPERTIES: ... :SomeProperty: value ... :END: ... ''') >>> root.children[0].properties['SomeProperty'] 'value' """ return self._properties def get_property(self, key, val=None) -> Optional[PropertyValue]: """ Return property named ``key`` if exists or ``val`` otherwise. :arg str key: Key of property. :arg val: Default value to return. """ return self._properties.get(key, val) # parser @classmethod def from_chunk(cls, env, lines): self = cls(env) self._lines = lines self._parse_comments() return self def _parse_comments(self): special_comments: Dict[str, List[str]] = {} for line in self._lines: parsed = parse_comment(line) if parsed: (key, vals) = parsed key = key.upper() # case insensitive, so keep as uppercase special_comments.setdefault(key, []).extend(vals) self._special_comments = special_comments # parse TODO keys and store in OrgEnv for todokey in ['TODO', 'SEQ_TODO', 'TYP_TODO']: for val in special_comments.get(todokey, []): self.env.add_todo_keys(*parse_seq_todo(val)) def _iparse_properties(self, ilines: Iterator[str]) -> Iterator[str]: self._properties = {} in_property_field = False for line in ilines: if in_property_field: if line.find(":END:") >= 0: break else: (key, val) = parse_property(line) if key is not None and val is not None: self._properties.update({key: val}) elif line.find(":PROPERTIES:") >= 0: in_property_field = True else: yield line for line in ilines: yield line # misc @property def level(self): """ Level of this node. :rtype: int """ raise NotImplementedError def _get_tags(self, inher=False) -> Set[str]: """ Return tags :arg bool inher: Mix with tags of all ancestor nodes if ``True``. :rtype: set """ return set() @property def tags(self) -> Set[str]: """ Tags of this and parent's node. >>> from orgparse import loads >>> n2 = loads(''' ... * Node 1 :TAG1: ... ** Node 2 :TAG2: ... ''')[2] >>> n2.tags == set(['TAG1', 'TAG2']) True """ return self._get_tags(inher=True) @property def shallow_tags(self) -> Set[str]: """ Tags defined for this node (don't look-up parent nodes). >>> from orgparse import loads >>> n2 = loads(''' ... * Node 1 :TAG1: ... ** Node 2 :TAG2: ... ''')[2] >>> n2.shallow_tags == set(['TAG2']) True """ return self._get_tags(inher=False) @staticmethod def _get_text(text, format='plain'): if format == 'plain': return to_plain_text(text) elif format == 'raw': return text elif format == 'rich': return to_rich_text(text) else: raise ValueError('format={0} is not supported.'.format(format)) def get_body(self, format='plain') -> str: """ Return a string of body text. See also: :meth:`get_heading`. """ return self._get_text( '\n'.join(self._body_lines), format) if self._lines else '' @property def body(self) -> str: """Alias of ``.get_body(format='plain')``.""" return self.get_body() @property def body_rich(self) -> Iterator[Rich]: r = self.get_body(format='rich') return cast(Iterator[Rich], r) # meh.. @property def heading(self) -> str: raise NotImplementedError def is_root(self): """ Return ``True`` when it is a root node. >>> from orgparse import loads >>> root = loads('* Node 1') >>> root.is_root() True >>> n1 = root[1] >>> n1.is_root() False """ return False def get_timestamps(self, active=False, inactive=False, range=False, point=False): """ Return a list of timestamps in the body text. :type active: bool :arg active: Include active type timestamps. :type inactive: bool :arg inactive: Include inactive type timestamps. :type range: bool :arg range: Include timestamps which has end date. :type point: bool :arg point: Include timestamps which has no end date. :rtype: list of :class:`orgparse.date.OrgDate` subclasses Consider the following org node: >>> from orgparse import loads >>> node = loads(''' ... * Node ... CLOSED: [2012-02-26 Sun 21:15] SCHEDULED: <2012-02-26 Sun> ... CLOCK: [2012-02-26 Sun 21:10]--[2012-02-26 Sun 21:15] => 0:05 ... Some inactive timestamp [2012-02-23 Thu] in body text. ... Some active timestamp <2012-02-24 Fri> in body text. ... Some inactive time range [2012-02-25 Sat]--[2012-02-27 Mon]. ... Some active time range <2012-02-26 Sun>--<2012-02-28 Tue>. ... ''').children[0] The default flags are all off, so it does not return anything. >>> node.get_timestamps() [] You can fetch appropriate timestamps using keyword arguments. >>> node.get_timestamps(inactive=True, point=True) [OrgDate((2012, 2, 23), None, False)] >>> node.get_timestamps(active=True, point=True) [OrgDate((2012, 2, 24))] >>> node.get_timestamps(inactive=True, range=True) [OrgDate((2012, 2, 25), (2012, 2, 27), False)] >>> node.get_timestamps(active=True, range=True) [OrgDate((2012, 2, 26), (2012, 2, 28))] This is more complex example. Only active timestamps, regardless of range/point type. >>> node.get_timestamps(active=True, point=True, range=True) [OrgDate((2012, 2, 24)), OrgDate((2012, 2, 26), (2012, 2, 28))] """ return [ ts for ts in self._timestamps if (((active and ts.is_active()) or (inactive and not ts.is_active())) and ((range and ts.has_end()) or (point and not ts.has_end())))] @property def datelist(self): """ Alias of ``.get_timestamps(active=True, inactive=True, point=True)``. :rtype: list of :class:`orgparse.date.OrgDate` subclasses >>> from orgparse import loads >>> root = loads(''' ... * Node with point dates <2012-02-25 Sat> ... CLOSED: [2012-02-25 Sat 21:15] ... Some inactive timestamp [2012-02-26 Sun] in body text. ... Some active timestamp <2012-02-27 Mon> in body text. ... ''') >>> root.children[0].datelist # doctest: +NORMALIZE_WHITESPACE [OrgDate((2012, 2, 25)), OrgDate((2012, 2, 26), None, False), OrgDate((2012, 2, 27))] """ return self.get_timestamps(active=True, inactive=True, point=True) @property def rangelist(self): """ Alias of ``.get_timestamps(active=True, inactive=True, range=True)``. :rtype: list of :class:`orgparse.date.OrgDate` subclasses >>> from orgparse import loads >>> root = loads(''' ... * Node with range dates <2012-02-25 Sat>--<2012-02-28 Tue> ... CLOCK: [2012-02-26 Sun 21:10]--[2012-02-26 Sun 21:15] => 0:05 ... Some inactive time range [2012-02-25 Sat]--[2012-02-27 Mon]. ... Some active time range <2012-02-26 Sun>--<2012-02-28 Tue>. ... Some time interval <2012-02-27 Mon 11:23-12:10>. ... ''') >>> root.children[0].rangelist # doctest: +NORMALIZE_WHITESPACE [OrgDate((2012, 2, 25), (2012, 2, 28)), OrgDate((2012, 2, 25), (2012, 2, 27), False), OrgDate((2012, 2, 26), (2012, 2, 28)), OrgDate((2012, 2, 27, 11, 23, 0), (2012, 2, 27, 12, 10, 0))] """ return self.get_timestamps(active=True, inactive=True, range=True) def __str__(self) -> str: return "\n".join(self._lines) # todo hmm, not sure if it really belongs here and not to OrgRootNode? def get_file_property_list(self, property): """ Return a list of the selected property """ vals = self._special_comments.get(property.upper(), None) return [] if vals is None else vals def get_file_property(self, property): """ Return a single element of the selected property or None if it doesn't exist """ vals = self._special_comments.get(property.upper(), None) if vals is None: return None elif len(vals) == 1: return vals[0] else: raise RuntimeError('Multiple values for property {}: {}'.format(property, vals)) class OrgRootNode(OrgBaseNode): """ Node to represent a file. Its body contains all lines before the first headline See :class:`OrgBaseNode` for other available functions. """ @property def heading(self) -> str: return '' def _get_tags(self, inher=False) -> Set[str]: filetags = self.get_file_property_list('FILETAGS') return set(filetags) @property def level(self): return 0 def get_parent(self, max_level=None): return None def is_root(self): return True # parsers def _parse_pre(self): """Call parsers which must be called before tree structuring""" ilines: Iterator[str] = iter(self._lines) ilines = self._iparse_properties(ilines) ilines = self._iparse_timestamps(ilines) self._body_lines = list(ilines) def _iparse_timestamps(self, ilines: Iterator[str]) -> Iterator[str]: self._timestamps = [] for line in ilines: self._timestamps.extend(OrgDate.list_from_str(line)) yield line class OrgNode(OrgBaseNode): """ Node to represent normal org node See :class:`OrgBaseNode` for other available functions. """ def __init__(self, *args, **kwds) -> None: super(OrgNode, self).__init__(*args, **kwds) # fixme instead of casts, should organize code in such a way that they aren't necessary self._heading = cast(str, None) self._level = None self._tags = cast(List[str], None) self._todo: Optional[str] = None self._priority = None self._scheduled = OrgDateScheduled(None) self._deadline = OrgDateDeadline(None) self._closed = OrgDateClosed(None) self._clocklist: List[OrgDateClock] = [] self._body_lines: List[str] = [] self._repeated_tasks: List[OrgDateRepeatedTask] = [] # parser def _parse_pre(self): """Call parsers which must be called before tree structuring""" self._parse_heading() # FIXME: make the following parsers "lazy" ilines: Iterator[str] = iter(self._lines) try: next(ilines) # skip heading except StopIteration: return ilines = self._iparse_sdc(ilines) ilines = self._iparse_clock(ilines) ilines = self._iparse_properties(ilines) ilines = self._iparse_repeated_tasks(ilines) ilines = self._iparse_timestamps(ilines) self._body_lines = list(ilines) def _parse_heading(self) -> None: heading = self._lines[0] (heading, self._level) = parse_heading_level(heading) (heading, self._tags) = parse_heading_tags(heading) (heading, self._todo) = parse_heading_todos( heading, self.env.all_todo_keys) (heading, self._priority) = parse_heading_priority(heading) self._heading = heading # The following ``_iparse_*`` methods are simple generator based # parser. See ``_parse_pre`` for how it is used. The principle # is simple: these methods get an iterator and returns an iterator. # If the item returned by the input iterator must be dedicated to # the parser, do not yield the item or yield it as-is otherwise. def _iparse_sdc(self, ilines: Iterator[str]) -> Iterator[str]: """ Parse SCHEDULED, DEADLINE and CLOSED time tamps. They are assumed be in the first line. """ try: line = next(ilines) except StopIteration: return (self._scheduled, self._deadline, self._closed) = parse_sdc(line) if not (self._scheduled or self._deadline or self._closed): yield line # when none of them were found for line in ilines: yield line def _iparse_clock(self, ilines: Iterator[str]) -> Iterator[str]: self._clocklist = [] for line in ilines: cl = OrgDateClock.from_str(line) if cl: self._clocklist.append(cl) else: yield line def _iparse_timestamps(self, ilines: Iterator[str]) -> Iterator[str]: self._timestamps = [] self._timestamps.extend(OrgDate.list_from_str(self._heading)) for l in ilines: self._timestamps.extend(OrgDate.list_from_str(l)) yield l def _iparse_repeated_tasks(self, ilines: Iterator[str]) -> Iterator[str]: self._repeated_tasks = [] for line in ilines: match = self._repeated_tasks_re.search(line) if match: # FIXME: move this parsing to OrgDateRepeatedTask.from_str mdict = match.groupdict() done_state = mdict['done'] todo_state = mdict['todo'] date = OrgDate.from_str(mdict['date']) self._repeated_tasks.append( OrgDateRepeatedTask(date.start, todo_state, done_state)) else: yield line _repeated_tasks_re = re.compile( r''' \s*- \s+ State \s+ "(?P [^"]+)" \s+ from \s+ "(?P [^"]+)" \s+ \[ (?P [^\]]+) \]''', re.VERBOSE) def get_heading(self, format='plain'): """ Return a string of head text without tags and TODO keywords. >>> from orgparse import loads >>> node = loads('* TODO Node 1').children[0] >>> node.get_heading() 'Node 1' It strips off inline markup by default (``format='plain'``). You can get the original raw string by specifying ``format='raw'``. >>> node = loads('* [[link][Node 1]]').children[0] >>> node.get_heading() 'Node 1' >>> node.get_heading(format='raw') '[[link][Node 1]]' """ return self._get_text(self._heading, format) @property def heading(self) -> str: """Alias of ``.get_heading(format='plain')``.""" return self.get_heading() @property def level(self): return self._level """ Level attribute of this node. Top level node is level 1. >>> from orgparse import loads >>> root = loads(''' ... * Node 1 ... ** Node 2 ... ''') >>> (n1, n2) = root.children >>> root.level 0 >>> n1.level 1 >>> n2.level 2 """ @property def priority(self): """ Priority attribute of this node. It is None if undefined. >>> from orgparse import loads >>> (n1, n2) = loads(''' ... * [#A] Node 1 ... * Node 2 ... ''').children >>> n1.priority 'A' >>> n2.priority is None True """ return self._priority def _get_tags(self, inher=False) -> Set[str]: tags = set(self._tags) if inher: parent = self.get_parent() if parent: return tags | parent._get_tags(inher=True) return tags @property def todo(self) -> Optional[str]: """ A TODO keyword of this node if exists or None otherwise. >>> from orgparse import loads >>> root = loads('* TODO Node 1') >>> root.children[0].todo 'TODO' """ return self._todo @property def scheduled(self): """ Return scheduled timestamp :rtype: a subclass of :class:`orgparse.date.OrgDate` >>> from orgparse import loads >>> root = loads(''' ... * Node ... SCHEDULED: <2012-02-26 Sun> ... ''') >>> root.children[0].scheduled OrgDateScheduled((2012, 2, 26)) """ return self._scheduled @property def deadline(self): """ Return deadline timestamp. :rtype: a subclass of :class:`orgparse.date.OrgDate` >>> from orgparse import loads >>> root = loads(''' ... * Node ... DEADLINE: <2012-02-26 Sun> ... ''') >>> root.children[0].deadline OrgDateDeadline((2012, 2, 26)) """ return self._deadline @property def closed(self): """ Return timestamp of closed time. :rtype: a subclass of :class:`orgparse.date.OrgDate` >>> from orgparse import loads >>> root = loads(''' ... * Node ... CLOSED: [2012-02-26 Sun 21:15] ... ''') >>> root.children[0].closed OrgDateClosed((2012, 2, 26, 21, 15, 0)) """ return self._closed @property def clock(self): """ Return a list of clocked timestamps :rtype: a list of a subclass of :class:`orgparse.date.OrgDate` >>> from orgparse import loads >>> root = loads(''' ... * Node ... CLOCK: [2012-02-26 Sun 21:10]--[2012-02-26 Sun 21:15] => 0:05 ... ''') >>> root.children[0].clock [OrgDateClock((2012, 2, 26, 21, 10, 0), (2012, 2, 26, 21, 15, 0))] """ return self._clocklist def has_date(self): """ Return ``True`` if it has any kind of timestamp """ return (self.scheduled or self.deadline or self.datelist or self.rangelist) @property def repeated_tasks(self): """ Get repeated tasks marked DONE in an entry having repeater. :rtype: list of :class:`orgparse.date.OrgDateRepeatedTask` >>> from orgparse import loads >>> node = loads(''' ... * TODO Pay the rent ... DEADLINE: <2005-10-01 Sat +1m> ... - State "DONE" from "TODO" [2005-09-01 Thu 16:10] ... - State "DONE" from "TODO" [2005-08-01 Mon 19:44] ... - State "DONE" from "TODO" [2005-07-01 Fri 17:27] ... ''').children[0] >>> node.repeated_tasks # doctest: +NORMALIZE_WHITESPACE [OrgDateRepeatedTask((2005, 9, 1, 16, 10, 0), 'TODO', 'DONE'), OrgDateRepeatedTask((2005, 8, 1, 19, 44, 0), 'TODO', 'DONE'), OrgDateRepeatedTask((2005, 7, 1, 17, 27, 0), 'TODO', 'DONE')] >>> node.repeated_tasks[0].before 'TODO' >>> node.repeated_tasks[0].after 'DONE' Repeated tasks in ``:LOGBOOK:`` can be fetched by the same code. >>> node = loads(''' ... * TODO Pay the rent ... DEADLINE: <2005-10-01 Sat +1m> ... :LOGBOOK: ... - State "DONE" from "TODO" [2005-09-01 Thu 16:10] ... - State "DONE" from "TODO" [2005-08-01 Mon 19:44] ... - State "DONE" from "TODO" [2005-07-01 Fri 17:27] ... :END: ... ''').children[0] >>> node.repeated_tasks # doctest: +NORMALIZE_WHITESPACE [OrgDateRepeatedTask((2005, 9, 1, 16, 10, 0), 'TODO', 'DONE'), OrgDateRepeatedTask((2005, 8, 1, 19, 44, 0), 'TODO', 'DONE'), OrgDateRepeatedTask((2005, 7, 1, 17, 27, 0), 'TODO', 'DONE')] See: `(info "(org) Repeated tasks") `_ """ return self._repeated_tasks def parse_lines(lines: Iterable[str], filename, env=None) -> OrgNode: if not env: env = OrgEnv(filename=filename) elif env.filename != filename: raise ValueError('If env is specified, filename must match') # parse into node of list (environment will be parsed) ch1, ch2 = itertools.tee(lines_to_chunks(lines)) linenos = itertools.accumulate(itertools.chain([0], (len(c) for c in ch1))) nodes = env.from_chunks(ch2) nodelist = [] for lineno, node in zip(linenos, nodes): lineno += 1 # in text editors lines are 1-indexed node.linenumber = lineno nodelist.append(node) # parse headings (level, TODO, TAGs, and heading) nodelist[0]._index = 0 # parse the root node nodelist[0]._parse_pre() for (i, node) in enumerate(nodelist[1:], 1): # nodes except root node node._index = i node._parse_pre() env._nodes = nodelist return nodelist[0] # root