aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/orgparse/extra.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/orgparse/extra.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/orgparse/extra.py')
-rw-r--r--.venv/lib/python3.12/site-packages/orgparse/extra.py116
1 files changed, 116 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/orgparse/extra.py b/.venv/lib/python3.12/site-packages/orgparse/extra.py
new file mode 100644
index 00000000..cd51abaf
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/orgparse/extra.py
@@ -0,0 +1,116 @@
+import re
+from typing import List, Sequence, Dict, Iterator, Iterable, Union, Optional, Type
+
+
+RE_TABLE_SEPARATOR = re.compile(r'\s*\|(\-+\+)*\-+\|')
+RE_TABLE_ROW = re.compile(r'\s*\|([^|]+)+\|')
+STRIP_CELL_WHITESPACE = True
+
+
+Row = Sequence[str]
+
+class Table:
+ def __init__(self, lines: List[str]) -> None:
+ self._lines = lines
+
+ @property
+ def blocks(self) -> Iterator[Sequence[Row]]:
+ group: List[Row] = []
+ first = True
+ for r in self._pre_rows():
+ if r is None:
+ if not first or len(group) > 0:
+ yield group
+ first = False
+ group = []
+ else:
+ group.append(r)
+ if len(group) > 0:
+ yield group
+
+ def __iter__(self) -> Iterator[Row]:
+ return self.rows
+
+ @property
+ def rows(self) -> Iterator[Row]:
+ for r in self._pre_rows():
+ if r is not None:
+ yield r
+
+ def _pre_rows(self) -> Iterator[Optional[Row]]:
+ for l in self._lines:
+ if RE_TABLE_SEPARATOR.match(l):
+ yield None
+ else:
+ pr = l.strip().strip('|').split('|')
+ if STRIP_CELL_WHITESPACE:
+ pr = [x.strip() for x in pr]
+ yield pr
+ # TODO use iparse helper?
+
+ @property
+ def as_dicts(self) -> 'AsDictHelper':
+ bl = list(self.blocks)
+ if len(bl) != 2:
+ raise RuntimeError('Need two-block table to non-ambiguously guess column names')
+ hrows = bl[0]
+ if len(hrows) != 1:
+ raise RuntimeError(f'Need single row heading to guess column names, got: {hrows}')
+ columns = hrows[0]
+ assert len(set(columns)) == len(columns), f'Duplicate column names: {columns}'
+ return AsDictHelper(
+ columns=columns,
+ rows=bl[1],
+ )
+
+
+class AsDictHelper:
+ def __init__(self, columns: Sequence[str], rows: Sequence[Row]) -> None:
+ self.columns = columns
+ self._rows = rows
+
+ def __iter__(self) -> Iterator[Dict[str, str]]:
+ for x in self._rows:
+ yield {k: v for k, v in zip(self.columns, x)}
+
+
+class Gap:
+ # todo later, add indices etc
+ pass
+
+
+Rich = Union[Table, Gap]
+def to_rich_text(text: str) -> Iterator[Rich]:
+ '''
+ Convert an org-mode text into a 'rich' text, e.g. tables/lists/etc, interleaved by gaps.
+ NOTE: you shouldn't rely on the number of items returned by this function,
+ it might change in the future when more types are supported.
+
+ At the moment only tables are supported.
+ '''
+ lines = text.splitlines(keepends=True)
+ group: List[str] = []
+ last: Type[Rich] = Gap
+ def emit() -> Rich:
+ nonlocal group, last
+ if last is Gap:
+ res = Gap()
+ elif last is Table:
+ res = Table(group) # type: ignore
+ else:
+ raise RuntimeError(f'Unexpected type {last}')
+ group = []
+ return res
+
+ for line in lines:
+ if RE_TABLE_ROW.match(line) or RE_TABLE_SEPARATOR.match(line):
+ cur = Table
+ else:
+ cur = Gap # type: ignore
+ if cur is not last:
+ if len(group) > 0:
+ yield emit()
+ last = cur
+ group.append(line)
+ if len(group) > 0:
+ yield emit()