aboutsummaryrefslogtreecommitdiff
path: root/R2R/tests/test_config.py
blob: 5e60833c906eec8de76bbbd9b9d9eb786de8694a (about) (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import asyncio
import json
from unittest.mock import Mock, mock_open, patch

import pytest

from r2r import DocumentType, R2RConfig


@pytest.fixture(scope="session", autouse=True)
def event_loop_policy():
    asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())


@pytest.fixture(scope="function")
def event_loop():
    loop = asyncio.get_event_loop_policy().new_event_loop()
    yield loop
    loop.close()
    asyncio.set_event_loop(None)


@pytest.fixture(scope="session", autouse=True)
async def cleanup_tasks():
    yield
    for task in asyncio.all_tasks():
        if task is not asyncio.current_task():
            task.cancel()
            try:
                await task
            except asyncio.CancelledError:
                pass


@pytest.fixture
def mock_bad_file():
    mock_data = json.dumps({})
    with patch("builtins.open", mock_open(read_data=mock_data)) as m:
        yield m


@pytest.fixture
def mock_file():
    mock_data = json.dumps(
        {
            "app": {"max_file_size_in_mb": 128},
            "embedding": {
                "provider": "example_provider",
                "base_model": "model",
                "base_dimension": 128,
                "batch_size": 16,
                "text_splitter": "default",
            },
            "kg": {
                "provider": "None",
                "batch_size": 1,
                "text_splitter": {
                    "type": "recursive_character",
                    "chunk_size": 2048,
                    "chunk_overlap": 0,
                },
            },
            "eval": {"llm": {"provider": "local"}},
            "ingestion": {"excluded_parsers": {}},
            "completions": {"provider": "lm_provider"},
            "logging": {
                "provider": "local",
                "log_table": "logs",
                "log_info_table": "log_info",
            },
            "prompt": {"provider": "prompt_provider"},
            "vector_database": {"provider": "vector_db"},
        }
    )
    with patch("builtins.open", mock_open(read_data=mock_data)) as m:
        yield m


@pytest.mark.asyncio
async def test_r2r_config_loading_required_keys(mock_bad_file):
    with pytest.raises(KeyError):
        R2RConfig.from_json("config.json")


@pytest.mark.asyncio
async def test_r2r_config_loading(mock_file):
    config = R2RConfig.from_json("config.json")
    assert (
        config.embedding.provider == "example_provider"
    ), "Provider should match the mock data"


@pytest.fixture
def mock_redis_client():
    client = Mock()
    return client


def test_r2r_config_serialization(mock_file, mock_redis_client):
    config = R2RConfig.from_json("config.json")
    config.save_to_redis(mock_redis_client, "test_key")
    mock_redis_client.set.assert_called_once()
    saved_data = json.loads(mock_redis_client.set.call_args[0][1])
    assert saved_data["app"]["max_file_size_in_mb"] == 128


def test_r2r_config_deserialization(mock_file, mock_redis_client):
    config_data = {
        "app": {"max_file_size_in_mb": 128},
        "embedding": {
            "provider": "example_provider",
            "base_model": "model",
            "base_dimension": 128,
            "batch_size": 16,
            "text_splitter": "default",
        },
        "kg": {
            "provider": "None",
            "batch_size": 1,
            "text_splitter": {
                "type": "recursive_character",
                "chunk_size": 2048,
                "chunk_overlap": 0,
            },
        },
        "eval": {"llm": {"provider": "local"}},
        "ingestion": {"excluded_parsers": ["pdf"]},
        "completions": {"provider": "lm_provider"},
        "logging": {
            "provider": "local",
            "log_table": "logs",
            "log_info_table": "log_info",
        },
        "prompt": {"provider": "prompt_provider"},
        "vector_database": {"provider": "vector_db"},
    }
    mock_redis_client.get.return_value = json.dumps(config_data)
    config = R2RConfig.load_from_redis(mock_redis_client, "test_key")
    assert config.app["max_file_size_in_mb"] == 128
    assert DocumentType.PDF in config.ingestion["excluded_parsers"]


def test_r2r_config_missing_section():
    invalid_data = {
        "embedding": {
            "provider": "example_provider",
            "base_model": "model",
            "base_dimension": 128,
            "batch_size": 16,
            "text_splitter": "default",
        }
    }
    with patch("builtins.open", mock_open(read_data=json.dumps(invalid_data))):
        with pytest.raises(KeyError):
            R2RConfig.from_json("config.json")


def test_r2r_config_missing_required_key():
    invalid_data = {
        "app": {"max_file_size_in_mb": 128},
        "embedding": {
            "base_model": "model",
            "base_dimension": 128,
            "batch_size": 16,
            "text_splitter": "default",
        },
        "kg": {
            "provider": "None",
            "batch_size": 1,
            "text_splitter": {
                "type": "recursive_character",
                "chunk_size": 2048,
                "chunk_overlap": 0,
            },
        },
        "completions": {"provider": "lm_provider"},
        "logging": {
            "provider": "local",
            "log_table": "logs",
            "log_info_table": "log_info",
        },
        "prompt": {"provider": "prompt_provider"},
        "vector_database": {"provider": "vector_db"},
    }
    with patch("builtins.open", mock_open(read_data=json.dumps(invalid_data))):
        with pytest.raises(KeyError):
            R2RConfig.from_json("config.json")