1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
|
import json
from typing import AsyncGenerator
from r2r.base.abstractions.document import DataType
from r2r.base.parsers.base_parser import AsyncParser
class JSONParser(AsyncParser[DataType]):
"""A parser for JSON data."""
async def ingest(self, data: DataType) -> AsyncGenerator[str, None]:
"""Ingest JSON data and yield a formatted text representation."""
if isinstance(data, bytes):
data = data.decode("utf-8")
yield self._parse_json(json.loads(data))
def _parse_json(self, data: dict) -> str:
def remove_objects_with_null(obj):
if not isinstance(obj, dict):
return obj
result = obj.copy()
for key, value in obj.items():
if isinstance(value, dict):
result[key] = remove_objects_with_null(value)
elif value is None:
del result[key]
return result
def format_json_as_text(obj, indent=0):
lines = []
indent_str = " " * indent
if isinstance(obj, dict):
for key, value in obj.items():
if isinstance(value, (dict, list)):
nested = format_json_as_text(value, indent + 2)
lines.append(f"{indent_str}{key}:\n{nested}")
else:
lines.append(f"{indent_str}{key}: {value}")
elif isinstance(obj, list):
for item in obj:
nested = format_json_as_text(item, indent + 2)
lines.append(f"{nested}")
else:
return f"{indent_str}{obj}"
return "\n".join(lines)
return format_json_as_text(remove_objects_with_null(data))
|