4 files changed, 437 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/r2r/__init__.py b/.venv/lib/python3.12/site-packages/r2r/__init__.py
new file mode 100644
index 00000000..0ef83aa4
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/r2r/__init__.py
@@ -0,0 +1,19 @@
+from importlib import metadata
+
+from sdk.async_client import R2RAsyncClient
+from sdk.sync_client import R2RClient
+from shared import *
+from shared import __all__ as shared_all
+
+__version__ = metadata.version("r2r")
+
+__all__ = [
+    "R2RAsyncClient",
+    "R2RClient",
+    "__version__",
+    "R2RException",
+] + shared_all
+
+
+def get_version():
+    return __version__
diff --git a/.venv/lib/python3.12/site-packages/r2r/mcp.py b/.venv/lib/python3.12/site-packages/r2r/mcp.py
new file mode 100644
index 00000000..33490ea1
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/r2r/mcp.py
@@ -0,0 +1,150 @@
+# Add to your local machine with `mcp install r2r/mcp.py -v R2R_API_URL=http://localhost:7272` or so.
+from r2r import R2RClient
+
+
+def id_to_shorthand(id: str) -> str:
+    return str(id)[:7]
+
+
+def format_search_results_for_llm(
+    results,
+) -> str:
+    """
+    Instead of resetting 'source_counter' to 1, we:
+     - For each chunk / graph / web / doc in `results`,
+     - Find the aggregator index from the collector,
+     - Print 'Source [X]:' with that aggregator index.
+    """
+    lines = []
+
+    # We'll build a quick helper to locate aggregator indices for each object:
+    # Or you can rely on the fact that we've added them to the collector
+    # in the same order. But let's do a "lookup aggregator index" approach:
+
+    # 1) Chunk search
+    if results.chunk_search_results:
+        lines.append("Vector Search Results:")
+        for c in results.chunk_search_results:
+            lines.append(f"Source ID [{id_to_shorthand(c.id)}]:")
+            lines.append(c.text or "")  # or c.text[:200] to truncate
+
+    # 2) Graph search
+    if results.graph_search_results:
+        lines.append("Graph Search Results:")
+        for g in results.graph_search_results:
+            lines.append(f"Source ID [{id_to_shorthand(g.id)}]:")
+            if hasattr(g.content, "summary"):
+                lines.append(f"Community Name: {g.content.name}")
+                lines.append(f"ID: {g.content.id}")
+                lines.append(f"Summary: {g.content.summary}")
+                # etc. ...
+            elif hasattr(g.content, "name") and hasattr(
+                g.content, "description"
+            ):
+                lines.append(f"Entity Name: {g.content.name}")
+                lines.append(f"Description: {g.content.description}")
+            elif (
+                hasattr(g.content, "subject")
+                and hasattr(g.content, "predicate")
+                and hasattr(g.content, "object")
+            ):
+                lines.append(
+                    f"Relationship: {g.content.subject}-{g.content.predicate}-{g.content.object}"
+                )
+            # Add metadata if needed
+
+    # 3) Web search
+    if results.web_search_results:
+        lines.append("Web Search Results:")
+        for w in results.web_search_results:
+            lines.append(f"Source ID [{id_to_shorthand(w.id)}]:")
+            lines.append(f"Title: {w.title}")
+            lines.append(f"Link: {w.link}")
+            lines.append(f"Snippet: {w.snippet}")
+
+    # 4) Local context docs
+    if results.document_search_results:
+        lines.append("Local Context Documents:")
+        for doc_result in results.document_search_results:
+            doc_title = doc_result.title or "Untitled Document"
+            doc_id = doc_result.id
+            summary = doc_result.summary
+
+            lines.append(f"Full Document ID: {doc_id}")
+            lines.append(f"Shortened Document ID: {id_to_shorthand(doc_id)}")
+            lines.append(f"Document Title: {doc_title}")
+            if summary:
+                lines.append(f"Summary: {summary}")
+
+            if doc_result.chunks:
+                # Then each chunk inside:
+                for chunk in doc_result.chunks:
+                    lines.append(
+                        f"\nChunk ID {id_to_shorthand(chunk['id'])}:\n{chunk['text']}"
+                    )
+
+    result = "\n".join(lines)
+    return result
+
+
+# Create a FastMCP server
+
+try:
+    from mcp.server.fastmcp import FastMCP
+
+    mcp = FastMCP("R2R Retrieval System")
+except Exception as e:
+    raise ImportError(
+        "MCP is not installed. Please run `pip install mcp`"
+    ) from e
+
+# Pass lifespan to server
+mcp = FastMCP("R2R Retrieval System")
+
+
+# RAG query tool
+@mcp.tool()
+async def search(query: str) -> str:
+    """
+    Performs a
+
+    Args:
+        query: The question to answer using the knowledge base
+
+    Returns:
+        A response generated based on relevant context from the knowledge base
+    """
+    client = R2RClient()
+
+    # Call the RAG endpoint
+    search_response = client.retrieval.search(
+        query=query,
+    )
+    return format_search_results_for_llm(search_response.results)
+
+
+# RAG query tool
+@mcp.tool()
+async def rag(query: str) -> str:
+    """
+    Perform a Retrieval-Augmented Generation query
+
+    Args:
+        query: The question to answer using the knowledge base
+
+    Returns:
+        A response generated based on relevant context from the knowledge base
+    """
+    client = R2RClient()
+
+    # Call the RAG endpoint
+    rag_response = client.retrieval.rag(
+        query=query,
+    )
+
+    return rag_response.results.generated_answer  # type: ignore
+
+
+# Run the server if executed directly
+if __name__ == "__main__":
+    mcp.run()
diff --git a/.venv/lib/python3.12/site-packages/r2r/r2r.toml b/.venv/lib/python3.12/site-packages/r2r/r2r.toml
new file mode 100644
index 00000000..a07893ab
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/r2r/r2r.toml
@@ -0,0 +1,122 @@
+[app]
+# app settings are global available like `r2r_config.agent.app`
+# project_name = "r2r_default" # optional, can also set with `R2R_PROJECT_NAME` env var
+default_max_documents_per_user = 10_000
+default_max_chunks_per_user = 10_000_000
+default_max_collections_per_user = 5_000
+
+# Set the default max upload size to 200 GB for local testing
+default_max_upload_size = 214748364800
+
+# LLM used for internal operations, like deriving conversation names
+fast_llm = "openai/gpt-4o-mini"
+
+# LLM used for user-facing output, like RAG replies
+quality_llm = "openai/gpt-4o"
+
+# LLM used for ingesting visual inputs
+vlm = "openai/gpt-4o"
+
+# LLM used for transcription
+audio_lm = "openai/whisper-1"
+
+# Reasoning model, used for `research` agent
+reasoning_llm = "openai/o3-mini"
+# Planning model, used for `research` agent
+planning_llm = "anthropic/claude-3-7-sonnet-20250219"
+
+
+[agent]
+rag_agent_static_prompt = "static_rag_agent"
+rag_agent_dynamic_prompt = "dynamic_rag_agent"
+# The following tools are available to the `rag` agent
+rag_tools = ["search_file_descriptions", "search_file_knowledge", "get_file_content"] # can add  "web_search" | "web_scrape"
+# The following tools are available to the `research` agent
+research_tools = ["rag", "reasoning", "critique", "python_executor"]
+
+[auth]
+provider = "r2r"
+access_token_lifetime_in_minutes = 60000
+refresh_token_lifetime_in_days = 7
+require_authentication = false
+require_email_verification = false
+default_admin_email = "admin@example.com"
+default_admin_password = "change_me_immediately"
+
+[completion]
+provider = "r2r"
+concurrent_request_limit = 64
+
+  [completion.generation_config]
+  temperature = 0.1
+  top_p = 1
+  max_tokens_to_sample = 4_096
+  stream = false
+  add_generation_kwargs = { }
+
+[crypto]
+provider = "bcrypt"
+
+[database]
+default_collection_name = "Default"
+default_collection_description = "Your default collection."
+collection_summary_prompt = "collection_summary"
+
+  [database.graph_creation_settings]
+    graph_entity_description_prompt = "graph_entity_description"
+    graph_extraction_prompt = "graph_extraction"
+    entity_types = [] # if empty, all entities are extracted
+    relation_types = [] # if empty, all relations are extracted
+    automatic_deduplication = true # enable automatic deduplication of entities
+
+  [database.graph_enrichment_settings]
+    graph_communities_prompt = "graph_communities"
+
+[embedding]
+provider = "litellm"
+# For basic applications, use `openai/text-embedding-3-small` with `base_dimension = 512`
+# For advanced applications, use `openai/text-embedding-3-large` with `base_dimension = 3072` and binary quantization
+base_model = "openai/text-embedding-3-small"
+base_dimension = 512
+# rerank_model = "huggingface/mixedbread-ai/mxbai-rerank-large-v1" # reranking model
+batch_size = 128
+add_title_as_prefix = false
+concurrent_request_limit = 256
+initial_backoff = 1.0
+quantization_settings = { quantization_type = "FP32" }
+
+[completion_embedding]
+# Generally this should be the same as the embedding config, but advanced users may want to run with a different provider to reduce latency
+provider = "litellm"
+base_model = "openai/text-embedding-3-small"
+base_dimension = 512
+batch_size = 128
+add_title_as_prefix = false
+concurrent_request_limit = 256
+
+[ingestion]
+provider = "r2r"
+chunking_strategy = "recursive"
+chunk_size = 1_024
+chunk_overlap = 512
+excluded_parsers = ["mp4"]
+automatic_extraction = true # enable automatic extraction of entities and relations
+
+  [ingestion.chunk_enrichment_settings]
+    chunk_enrichment_prompt = "chunk_enrichment"
+    enable_chunk_enrichment = false # disabled by default
+    n_chunks = 2 # the number of chunks (both preceding and succeeding) to use in enrichment
+
+  [ingestion.extra_parsers]
+    pdf = "zerox"
+
+[logging]
+provider = "r2r"
+log_table = "logs"
+log_info_table = "log_info"
+
+[orchestration]
+provider = "simple"
+
+[email]
+provider = "console_mock" # `smtp` | `sendgrid` supported
diff --git a/.venv/lib/python3.12/site-packages/r2r/serve.py b/.venv/lib/python3.12/site-packages/r2r/serve.py
new file mode 100644
index 00000000..7e9226bc
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/r2r/serve.py
@@ -0,0 +1,146 @@
+import argparse
+import asyncio
+import logging
+import os
+import sys
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+try:
+    from core import R2RApp, R2RBuilder, R2RConfig
+    from core.utils.logging_config import configure_logging
+except ImportError as e:
+    logger.error(
+        f"Failed to start server: core dependencies not installed: {e}"
+    )
+    logger.error("To run the server, install the required dependencies:")
+    logger.error("pip install 'r2r[core]'")
+    sys.exit(1)
+
+
+async def create_app(
+    config_name: Optional[str] = None,
+    config_path: Optional[str] = None,
+    full: bool = False,
+) -> "R2RApp":
+    """
+    Creates and returns an R2R application instance based on the provided
+    or environment-sourced configuration.
+    """
+    # If arguments not passed, fall back to environment variables
+    config_name = config_name or os.getenv("R2R_CONFIG_NAME")
+    config_path = config_path or os.getenv("R2R_CONFIG_PATH")
+
+    if config_path and config_name:
+        raise ValueError(
+            f"Cannot specify both config_path and config_name, got {config_path} and {config_name}"
+        )
+
+    if not config_path and not config_name:
+        # If neither is specified nor set in environment,
+        # default to 'full' if --full is True, else 'default'
+        config_name = "full" if full else "default"
+
+    try:
+        r2r_instance = await R2RBuilder(
+            config=R2RConfig.load(config_name, config_path)
+        ).build()
+
+        # Start orchestration worker
+        await r2r_instance.orchestration_provider.start_worker()
+        return r2r_instance
+    except ImportError as e:
+        logger.error(f"Failed to initialize R2R: {e}")
+        logger.error(
+            "Please check your configuration and installed dependencies"
+        )
+        sys.exit(1)
+
+
+def run_server(
+    host: Optional[str] = None,
+    port: Optional[int] = None,
+    config_name: Optional[str] = None,
+    config_path: Optional[str] = None,
+    full: bool = False,
+):
+    """
+    Runs the R2R server with the provided or environment-based settings.
+    """
+    # Overwrite environment variables if arguments are explicitly passed
+    if host is not None:
+        os.environ["R2R_HOST"] = host
+    if port is not None:
+        os.environ["R2R_PORT"] = str(port)
+    if config_path is not None:
+        os.environ["R2R_CONFIG_PATH"] = config_path
+    if config_name is not None:
+        os.environ["R2R_CONFIG_NAME"] = config_name
+
+    # Fallback to environment or defaults if necessary
+    final_host = os.getenv("R2R_HOST", "0.0.0.0")
+    final_port = int(os.getenv("R2R_PORT", "7272"))
+
+    try:
+        configure_logging()
+    except Exception as e:
+        logger.error(f"Failed to configure logging: {e}")
+
+    try:
+
+        async def start():
+            app = await create_app(config_name, config_path, full)
+            await app.serve(final_host, final_port)
+
+        asyncio.run(start())
+    except Exception as e:
+        logger.error(f"Failed to start R2R server: {e}")
+        sys.exit(1)
+
+
+def main():
+    """
+    Parse command-line arguments and then run the server.
+    """
+    parser = argparse.ArgumentParser(description="Run the R2R server.")
+    parser.add_argument(
+        "--host",
+        default=None,
+        help="Host to bind to. Overrides R2R_HOST env if provided.",
+    )
+    parser.add_argument(
+        "--port",
+        default=None,
+        type=int,
+        help="Port to bind to. Overrides R2R_PORT env if provided.",
+    )
+    parser.add_argument(
+        "--config-path",
+        default=None,
+        help="Path to the configuration file. Overrides R2R_CONFIG_PATH env if provided.",
+    )
+    parser.add_argument(
+        "--config-name",
+        default=None,
+        help="Name of the configuration. Overrides R2R_CONFIG_NAME env if provided.",
+    )
+    parser.add_argument(
+        "--full",
+        action="store_true",
+        help="Use the 'full' config if neither config-path nor config-name is specified.",
+    )
+
+    args = parser.parse_args()
+
+    run_server(
+        host=args.host,
+        port=args.port,
+        config_name=args.config_name,
+        config_path=args.config_path,
+        full=args.full,
+    )
+
+
+if __name__ == "__main__":
+    main()