.venv/lib/python3.12/site-packages/orgparse/extra.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116

import re
from typing import List, Sequence, Dict, Iterator, Iterable, Union, Optional, Type


RE_TABLE_SEPARATOR = re.compile(r'\s*\|(\-+\+)*\-+\|')
RE_TABLE_ROW = re.compile(r'\s*\|([^|]+)+\|')
STRIP_CELL_WHITESPACE = True


Row = Sequence[str]

class Table:
    def __init__(self, lines: List[str]) -> None:
        self._lines = lines

    @property
    def blocks(self) -> Iterator[Sequence[Row]]:
        group: List[Row] = []
        first = True
        for r in self._pre_rows():
            if r is None:
                if not first or len(group) > 0:
                    yield group
                    first = False
                group = []
            else:
                group.append(r)
        if len(group) > 0:
            yield group

    def __iter__(self) -> Iterator[Row]:
        return self.rows

    @property
    def rows(self) -> Iterator[Row]:
        for r in self._pre_rows():
            if r is not None:
                yield r

    def _pre_rows(self) -> Iterator[Optional[Row]]:
        for l in self._lines:
            if RE_TABLE_SEPARATOR.match(l):
                yield None
            else:
                pr = l.strip().strip('|').split('|')
                if STRIP_CELL_WHITESPACE:
                    pr = [x.strip() for x in pr]
                yield pr
        # TODO use iparse helper?

    @property
    def as_dicts(self) -> 'AsDictHelper':
        bl = list(self.blocks)
        if len(bl) != 2:
            raise RuntimeError('Need two-block table to non-ambiguously guess column names')
        hrows = bl[0]
        if len(hrows) != 1:
            raise RuntimeError(f'Need single row heading to guess column names, got: {hrows}')
        columns = hrows[0]
        assert len(set(columns)) == len(columns), f'Duplicate column names: {columns}'
        return AsDictHelper(
            columns=columns,
            rows=bl[1],
        )


class AsDictHelper:
    def __init__(self, columns: Sequence[str], rows: Sequence[Row]) -> None:
        self.columns = columns
        self._rows = rows

    def __iter__(self) -> Iterator[Dict[str, str]]:
        for x in self._rows:
            yield {k: v for k, v in zip(self.columns, x)}


class Gap:
    # todo later, add indices etc
    pass


Rich = Union[Table, Gap]
def to_rich_text(text: str) -> Iterator[Rich]:
    '''
    Convert an org-mode text into a 'rich' text, e.g. tables/lists/etc, interleaved by gaps.
    NOTE: you shouldn't rely on the number of items returned by this function,
    it might change in the future when more types are supported.

    At the moment only tables are supported.
    '''
    lines = text.splitlines(keepends=True)
    group: List[str] = []
    last: Type[Rich] = Gap
    def emit() -> Rich:
        nonlocal group, last
        if   last is Gap:
            res = Gap()
        elif last is Table:
            res = Table(group) # type: ignore
        else:
            raise RuntimeError(f'Unexpected type {last}')
        group = []
        return res

    for line in lines:
        if RE_TABLE_ROW.match(line) or RE_TABLE_SEPARATOR.match(line):
            cur = Table
        else:
            cur = Gap  # type: ignore
        if cur is not last:
            if len(group) > 0:
                yield emit()
            last = cur
        group.append(line)
    if len(group) > 0:
        yield emit()