aboutsummaryrefslogtreecommitdiff
path: root/R2R/r2r/cli/cli.py
blob: 4ef38b1ebc0051f7e55e7fec1a0553f3d00d2864 (about) (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
import json
import os
import subprocess
import time
import uuid

import click
from dotenv import load_dotenv

from r2r.main.execution import R2RExecutionWrapper


class JsonParamType(click.ParamType):
    name = "json"

    def convert(self, value, param, ctx):
        try:
            return json.loads(value)
        except json.JSONDecodeError:
            self.fail(f"'{value}' is not a valid JSON string", param, ctx)


JSON = JsonParamType()


@click.group()
@click.option(
    "--config-path", default=None, help="Path to the configuration file"
)
@click.option(
    "--config-name", default=None, help="Name of the configuration to use"
)
@click.option("--client-mode", default=True, help="Run in client mode")
@click.option(
    "--base-url",
    default="http://localhost:8000",
    help="Base URL for client mode",
)
@click.pass_context
def cli(ctx, config_path, config_name, client_mode, base_url):
    """R2R CLI for all core operations."""
    if config_path and config_name:
        raise click.UsageError(
            "Cannot specify both config_path and config_name"
        )

    # Convert relative config path to absolute path
    if config_path:
        config_path = os.path.abspath(config_path)

    if ctx.invoked_subcommand != "serve":
        ctx.obj = R2RExecutionWrapper(
            config_path,
            config_name,
            client_mode if ctx.invoked_subcommand != "serve" else False,
            base_url,
        )
    else:
        ctx.obj = {
            "config_path": config_path,
            "config_name": config_name,
            "base_url": base_url,
        }


@cli.command()
@click.option("--host", default="0.0.0.0", help="Host to run the server on")
@click.option("--port", default=8000, help="Port to run the server on")
@click.option("--docker", is_flag=True, help="Run using Docker")
@click.option(
    "--docker-ext-neo4j",
    is_flag=True,
    help="Run using Docker with external Neo4j",
)
@click.option("--project-name", default="r2r", help="Project name for Docker")
@click.pass_obj
def serve(obj, host, port, docker, docker_ext_neo4j, project_name):
    """Start the R2R server."""
    # Load environment variables from .env file if it exists
    load_dotenv()

    if docker:
        if x := obj.get("config_path", None):
            os.environ["CONFIG_PATH"] = x
        else:
            os.environ["CONFIG_NAME"] = (
                obj.get("config_name", None) or "default"
            )

        os.environ["OLLAMA_API_BASE"] = "http://host.docker.internal:11434"
        # Check if compose files exist in the package directory
        package_dir = os.path.join(
            os.path.dirname(os.path.abspath(__file__)), "..", ".."
        )
        compose_yaml = os.path.join(package_dir, "compose.yaml")
        compose_neo4j_yaml = os.path.join(package_dir, "compose.neo4j.yaml")

        if not os.path.exists(compose_yaml) or not os.path.exists(
            compose_neo4j_yaml
        ):
            click.echo(
                "Error: Docker Compose files not found in the package directory."
            )
            return

        # Build the docker-compose command with the specified host and port
        docker_command = f"docker-compose -f {compose_yaml}"
        if docker_ext_neo4j:
            docker_command += f" -f {compose_neo4j_yaml}"
        if host != "0.0.0.0" or port != 8000:
            docker_command += (
                f" --build-arg HOST={host} --build-arg PORT={port}"
            )

        docker_command += f" --project-name {project_name}"

        docker_command += " up -d"
        os.system(docker_command)
    else:
        wrapper = R2RExecutionWrapper(**obj, client_mode=False)
        wrapper.serve(host, port)


@cli.command()
@click.option(
    "--volumes",
    is_flag=True,
    help="Remove named volumes declared in the `volumes` section of the Compose file",
)
@click.option(
    "--remove-orphans",
    is_flag=True,
    help="Remove containers for services not defined in the Compose file",
)
@click.option("--project-name", default="r2r", help="Project name for Docker")
@click.pass_context
def docker_down(ctx, volumes, remove_orphans, project_name):
    """Bring down the Docker Compose setup and attempt to remove the network if necessary."""
    package_dir = os.path.join(
        os.path.dirname(os.path.abspath(__file__)), "..", ".."
    )
    compose_yaml = os.path.join(package_dir, "compose.yaml")
    compose_neo4j_yaml = os.path.join(package_dir, "compose.neo4j.yaml")

    if not os.path.exists(compose_yaml) or not os.path.exists(
        compose_neo4j_yaml
    ):
        click.echo(
            "Error: Docker Compose files not found in the package directory."
        )
        return

    docker_command = (
        f"docker-compose -f {compose_yaml} -f {compose_neo4j_yaml}"
    )
    docker_command += f" --project-name {project_name}"

    if volumes:
        docker_command += " --volumes"

    if remove_orphans:
        docker_command += " --remove-orphans"

    docker_command += " down"

    click.echo("Bringing down Docker Compose setup...")
    result = os.system(docker_command)

    if result != 0:
        click.echo(
            "An error occurred while bringing down the Docker Compose setup. Attempting to remove the network..."
        )

        # Get the list of networks
        networks = (
            subprocess.check_output(
                ["docker", "network", "ls", "--format", "{{.Name}}"]
            )
            .decode()
            .split()
        )

        # Find the r2r network
        r2r_network = next(
            (
                network
                for network in networks
                if network.startswith("r2r_") and "network" in network
            ),
            None,
        )

        if r2r_network:
            # Try to remove the network
            for _ in range(1):  # Try 1 extra times
                remove_command = f"docker network rm {r2r_network}"
                remove_result = os.system(remove_command)

                if remove_result == 0:
                    click.echo(f"Successfully removed network: {r2r_network}")
                    return
                else:
                    click.echo(
                        f"Failed to remove network: {r2r_network}. Retrying in 5 seconds..."
                    )
                    time.sleep(5)

            click.echo(
                "Failed to remove the network after multiple attempts. Please try the following steps:"
            )
            click.echo(
                "1. Run 'docker ps' to check for any running containers using this network."
            )
            click.echo(
                "2. Stop any running containers with 'docker stop <container_id>'."
            )
            click.echo(
                f"3. Try removing the network manually with 'docker network rm {r2r_network}'."
            )
            click.echo(
                "4. If the above steps don't work, you may need to restart the Docker daemon."
            )
        else:
            click.echo("Could not find the r2r network to remove.")
    else:
        click.echo("Docker Compose setup has been successfully brought down.")


@cli.command()
@click.argument("file-paths", nargs=-1)
@click.option(
    "--document-ids", multiple=True, help="Document IDs for ingestion"
)
@click.option("--metadatas", multiple=True, help="Metadatas for ingestion")
@click.option(
    "--versions",
    multiple=True,
    help="Starting version for ingested files (e.g. `v1`)",
)
@click.pass_obj
def ingest_files(obj, file_paths, document_ids, metadatas, versions):
    """Ingest files into R2R."""

    t0 = time.time()

    # Default to None if empty tuples are provided
    document_ids = None if not document_ids else list(document_ids)
    metadatas = None if not metadatas else list(metadatas)
    versions = None if not versions else list(versions)

    response = obj.ingest_files(
        list(file_paths), document_ids, metadatas, versions
    )
    t1 = time.time()
    click.echo(f"Time taken to ingest files: {t1 - t0:.2f} seconds")
    click.echo(response)


@cli.command()
@click.argument("file-paths", nargs=-1)
@click.option(
    "--document-ids", multiple=True, help="Document IDs for ingestion"
)
@click.option("--metadatas", multiple=True, help="Metadatas for ingestion")
@click.pass_obj
def update_files(obj, file_paths, document_ids, metadatas):
    """Ingest files into R2R."""
    t0 = time.time()

    # Default to None if empty tuples are provided
    metadatas = None if not metadatas else list(metadatas)

    response = obj.update_files(
        list(file_paths), list(document_ids), metadatas
    )
    t1 = time.time()
    click.echo(f"Time taken to ingest files: {t1 - t0:.2f} seconds")
    click.echo(response)


@cli.command()
@click.option(
    "--query", prompt="Enter your search query", help="The search query"
)
@click.option(
    "--use-vector-search", is_flag=True, default=True, help="Use vector search"
)
@click.option(
    "--search-filters", type=JsonParamType(), help="Search filters as JSON"
)
@click.option(
    "--search-limit", default=10, help="Number of search results to return"
)
@click.option("--do-hybrid-search", is_flag=True, help="Perform hybrid search")
@click.option(
    "--use-kg-search", is_flag=True, help="Use knowledge graph search"
)
@click.option("--kg-agent-model", default=None, help="Model for KG agent")
@click.pass_obj
def search(
    obj,
    query,
    use_vector_search,
    search_filters,
    search_limit,
    do_hybrid_search,
    use_kg_search,
    kg_agent_model,
):
    """Perform a search query."""
    kg_agent_generation_config = {}
    if kg_agent_model:
        kg_agent_generation_config["model"] = kg_agent_model

    t0 = time.time()

    results = obj.search(
        query,
        use_vector_search,
        search_filters,
        search_limit,
        do_hybrid_search,
        use_kg_search,
        kg_agent_generation_config,
    )

    if isinstance(results, dict) and "results" in results:
        results = results["results"]

    if "vector_search_results" in results:
        click.echo("Vector search results:")
        for result in results["vector_search_results"]:
            click.echo(result)
    if "kg_search_results" in results and results["kg_search_results"]:
        click.echo("KG search results:", results["kg_search_results"])

    t1 = time.time()
    click.echo(f"Time taken to search: {t1 - t0:.2f} seconds")


@cli.command()
@click.option("--query", prompt="Enter your query", help="The query for RAG")
@click.option(
    "--use-vector-search", is_flag=True, default=True, help="Use vector search"
)
@click.option(
    "--search-filters", type=JsonParamType(), help="Search filters as JSON"
)
@click.option(
    "--search-limit", default=10, help="Number of search results to return"
)
@click.option("--do-hybrid-search", is_flag=True, help="Perform hybrid search")
@click.option(
    "--use-kg-search", is_flag=True, help="Use knowledge graph search"
)
@click.option("--kg-agent-model", default=None, help="Model for KG agent")
@click.option("--stream", is_flag=True, help="Stream the RAG response")
@click.option("--rag-model", default=None, help="Model for RAG")
@click.pass_obj
def rag(
    obj,
    query,
    use_vector_search,
    search_filters,
    search_limit,
    do_hybrid_search,
    use_kg_search,
    kg_agent_model,
    stream,
    rag_model,
):
    """Perform a RAG query."""
    kg_agent_generation_config = {}
    if kg_agent_model:
        kg_agent_generation_config = {"model": kg_agent_model}
    rag_generation_config = {"stream": stream}
    if rag_model:
        rag_generation_config["model"] = rag_model
    t0 = time.time()

    response = obj.rag(
        query,
        use_vector_search,
        search_filters,
        search_limit,
        do_hybrid_search,
        use_kg_search,
        kg_agent_generation_config,
        stream,
        rag_generation_config,
    )
    if stream:
        for chunk in response:
            click.echo(chunk, nl=False)
        click.echo()
    else:
        if obj.client_mode:
            click.echo(f"Search Results:\n{response['search_results']}")
            click.echo(f"Completion:\n{response['completion']}")
        else:
            click.echo(f"Search Results:\n{response.search_results}")
            click.echo(f"Completion:\n{response.completion}")

    t1 = time.time()
    click.echo(f"Time taken for RAG: {t1 - t0:.2f} seconds")


@cli.command()
@click.option("--keys", multiple=True, help="Keys for deletion")
@click.option("--values", multiple=True, help="Values for deletion")
@click.pass_obj
def delete(obj, keys, values):
    """Delete documents based on keys and values."""
    if len(keys) != len(values):
        raise click.UsageError("Number of keys must match number of values")

    t0 = time.time()
    response = obj.delete(list(keys), list(values))
    t1 = time.time()

    click.echo(response)
    click.echo(f"Time taken for deletion: {t1 - t0:.2f} seconds")


@cli.command()
@click.option("--log-type-filter", help="Filter for log types")
@click.pass_obj
def logs(obj, log_type_filter):
    """Retrieve logs with optional type filter."""
    t0 = time.time()
    response = obj.logs(log_type_filter)
    t1 = time.time()

    click.echo(response)
    click.echo(f"Time taken to retrieve logs: {t1 - t0:.2f} seconds")


@cli.command()
@click.option("--document-ids", multiple=True, help="Document IDs to overview")
@click.option("--user-ids", multiple=True, help="User IDs to overview")
@click.pass_obj
def documents_overview(obj, document_ids, user_ids):
    """Get an overview of documents."""
    document_ids = list(document_ids) if document_ids else None
    user_ids = list(user_ids) if user_ids else None

    t0 = time.time()
    response = obj.documents_overview(document_ids, user_ids)
    t1 = time.time()

    for document in response:
        click.echo(document)
    click.echo(f"Time taken to get document overview: {t1 - t0:.2f} seconds")


@cli.command()
@click.argument("document_id")
@click.pass_obj
def document_chunks(obj, document_id):
    """Get chunks of a specific document."""
    t0 = time.time()
    response = obj.document_chunks(document_id)
    t1 = time.time()

    for chunk in response:
        click.echo(chunk)
    click.echo(f"Time taken to get document chunks: {t1 - t0:.2f} seconds")


@cli.command()
@click.pass_obj
def app_settings(obj):
    """Retrieve application settings."""
    t0 = time.time()
    response = obj.app_settings()
    t1 = time.time()

    click.echo(response)
    click.echo(f"Time taken to get app settings: {t1 - t0:.2f} seconds")


@cli.command()
@click.option("--user-ids", multiple=True, help="User IDs to overview")
@click.pass_obj
def users_overview(obj, user_ids):
    """Get an overview of users."""
    user_ids = (
        [uuid.UUID(user_id) for user_id in user_ids] if user_ids else None
    )

    t0 = time.time()
    response = obj.users_overview(user_ids)
    t1 = time.time()

    for user in response:
        click.echo(user)
    click.echo(f"Time taken to get users overview: {t1 - t0:.2f} seconds")


@cli.command()
@click.option(
    "--filters", type=JsonParamType(), help="Filters for analytics as JSON"
)
@click.option(
    "--analysis-types", type=JsonParamType(), help="Analysis types as JSON"
)
@click.pass_obj
def analytics(obj, filters, analysis_types):
    """Retrieve analytics data."""
    t0 = time.time()
    response = obj.analytics(filters, analysis_types)
    t1 = time.time()

    click.echo(response)
    click.echo(f"Time taken to get analytics: {t1 - t0:.2f} seconds")


@cli.command()
@click.option(
    "--limit", default=100, help="Limit the number of relationships returned"
)
@click.pass_obj
def inspect_knowledge_graph(obj, limit):
    """Print relationships from the knowledge graph."""
    t0 = time.time()
    response = obj.inspect_knowledge_graph(limit)
    t1 = time.time()

    click.echo(response)
    click.echo(f"Time taken to print relationships: {t1 - t0:.2f} seconds")


@cli.command()
@click.option(
    "--no-media",
    default=True,
    help="Exclude media files from ingestion",
)
@click.option("--option", default=0, help="Which file to ingest?")
@click.pass_obj
def ingest_sample_file(obj, no_media, option):
    t0 = time.time()
    response = obj.ingest_sample_file(no_media=no_media, option=option)
    t1 = time.time()

    click.echo(response)
    click.echo(f"Time taken to ingest sample: {t1 - t0:.2f} seconds")


@cli.command()
@click.option(
    "--no-media",
    default=True,
    help="Exclude media files from ingestion",
)
@click.pass_obj
def ingest_sample_files(obj, no_media):
    """Ingest all sample files into R2R."""
    t0 = time.time()
    response = obj.ingest_sample_files(no_media=no_media)
    t1 = time.time()

    click.echo(response)
    click.echo(f"Time taken to ingest sample files: {t1 - t0:.2f} seconds")


@cli.command()
@click.pass_obj
def health(obj):
    """Check the health of the server."""
    t0 = time.time()
    response = obj.health()
    t1 = time.time()

    click.echo(response)
    click.echo(f"Time taken to ingest sample: {t1 - t0:.2f} seconds")


@cli.command()
def version():
    """Print the version of R2R."""
    from importlib.metadata import version

    click.echo(version("r2r"))


def main():
    cli()


if __name__ == "__main__":
    main()