aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/r2r
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/r2r')
-rw-r--r--.venv/lib/python3.12/site-packages/r2r/__init__.py19
-rw-r--r--.venv/lib/python3.12/site-packages/r2r/mcp.py150
-rw-r--r--.venv/lib/python3.12/site-packages/r2r/r2r.toml122
-rw-r--r--.venv/lib/python3.12/site-packages/r2r/serve.py146
4 files changed, 437 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/r2r/__init__.py b/.venv/lib/python3.12/site-packages/r2r/__init__.py
new file mode 100644
index 00000000..0ef83aa4
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/r2r/__init__.py
@@ -0,0 +1,19 @@
+from importlib import metadata
+
+from sdk.async_client import R2RAsyncClient
+from sdk.sync_client import R2RClient
+from shared import *
+from shared import __all__ as shared_all
+
+__version__ = metadata.version("r2r")
+
+__all__ = [
+ "R2RAsyncClient",
+ "R2RClient",
+ "__version__",
+ "R2RException",
+] + shared_all
+
+
+def get_version():
+ return __version__
diff --git a/.venv/lib/python3.12/site-packages/r2r/mcp.py b/.venv/lib/python3.12/site-packages/r2r/mcp.py
new file mode 100644
index 00000000..33490ea1
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/r2r/mcp.py
@@ -0,0 +1,150 @@
+# Add to your local machine with `mcp install r2r/mcp.py -v R2R_API_URL=http://localhost:7272` or so.
+from r2r import R2RClient
+
+
+def id_to_shorthand(id: str) -> str:
+ return str(id)[:7]
+
+
+def format_search_results_for_llm(
+ results,
+) -> str:
+ """
+ Instead of resetting 'source_counter' to 1, we:
+ - For each chunk / graph / web / doc in `results`,
+ - Find the aggregator index from the collector,
+ - Print 'Source [X]:' with that aggregator index.
+ """
+ lines = []
+
+ # We'll build a quick helper to locate aggregator indices for each object:
+ # Or you can rely on the fact that we've added them to the collector
+ # in the same order. But let's do a "lookup aggregator index" approach:
+
+ # 1) Chunk search
+ if results.chunk_search_results:
+ lines.append("Vector Search Results:")
+ for c in results.chunk_search_results:
+ lines.append(f"Source ID [{id_to_shorthand(c.id)}]:")
+ lines.append(c.text or "") # or c.text[:200] to truncate
+
+ # 2) Graph search
+ if results.graph_search_results:
+ lines.append("Graph Search Results:")
+ for g in results.graph_search_results:
+ lines.append(f"Source ID [{id_to_shorthand(g.id)}]:")
+ if hasattr(g.content, "summary"):
+ lines.append(f"Community Name: {g.content.name}")
+ lines.append(f"ID: {g.content.id}")
+ lines.append(f"Summary: {g.content.summary}")
+ # etc. ...
+ elif hasattr(g.content, "name") and hasattr(
+ g.content, "description"
+ ):
+ lines.append(f"Entity Name: {g.content.name}")
+ lines.append(f"Description: {g.content.description}")
+ elif (
+ hasattr(g.content, "subject")
+ and hasattr(g.content, "predicate")
+ and hasattr(g.content, "object")
+ ):
+ lines.append(
+ f"Relationship: {g.content.subject}-{g.content.predicate}-{g.content.object}"
+ )
+ # Add metadata if needed
+
+ # 3) Web search
+ if results.web_search_results:
+ lines.append("Web Search Results:")
+ for w in results.web_search_results:
+ lines.append(f"Source ID [{id_to_shorthand(w.id)}]:")
+ lines.append(f"Title: {w.title}")
+ lines.append(f"Link: {w.link}")
+ lines.append(f"Snippet: {w.snippet}")
+
+ # 4) Local context docs
+ if results.document_search_results:
+ lines.append("Local Context Documents:")
+ for doc_result in results.document_search_results:
+ doc_title = doc_result.title or "Untitled Document"
+ doc_id = doc_result.id
+ summary = doc_result.summary
+
+ lines.append(f"Full Document ID: {doc_id}")
+ lines.append(f"Shortened Document ID: {id_to_shorthand(doc_id)}")
+ lines.append(f"Document Title: {doc_title}")
+ if summary:
+ lines.append(f"Summary: {summary}")
+
+ if doc_result.chunks:
+ # Then each chunk inside:
+ for chunk in doc_result.chunks:
+ lines.append(
+ f"\nChunk ID {id_to_shorthand(chunk['id'])}:\n{chunk['text']}"
+ )
+
+ result = "\n".join(lines)
+ return result
+
+
+# Create a FastMCP server
+
+try:
+ from mcp.server.fastmcp import FastMCP
+
+ mcp = FastMCP("R2R Retrieval System")
+except Exception as e:
+ raise ImportError(
+ "MCP is not installed. Please run `pip install mcp`"
+ ) from e
+
+# Pass lifespan to server
+mcp = FastMCP("R2R Retrieval System")
+
+
+# RAG query tool
+@mcp.tool()
+async def search(query: str) -> str:
+ """
+ Performs a
+
+ Args:
+ query: The question to answer using the knowledge base
+
+ Returns:
+ A response generated based on relevant context from the knowledge base
+ """
+ client = R2RClient()
+
+ # Call the RAG endpoint
+ search_response = client.retrieval.search(
+ query=query,
+ )
+ return format_search_results_for_llm(search_response.results)
+
+
+# RAG query tool
+@mcp.tool()
+async def rag(query: str) -> str:
+ """
+ Perform a Retrieval-Augmented Generation query
+
+ Args:
+ query: The question to answer using the knowledge base
+
+ Returns:
+ A response generated based on relevant context from the knowledge base
+ """
+ client = R2RClient()
+
+ # Call the RAG endpoint
+ rag_response = client.retrieval.rag(
+ query=query,
+ )
+
+ return rag_response.results.generated_answer # type: ignore
+
+
+# Run the server if executed directly
+if __name__ == "__main__":
+ mcp.run()
diff --git a/.venv/lib/python3.12/site-packages/r2r/r2r.toml b/.venv/lib/python3.12/site-packages/r2r/r2r.toml
new file mode 100644
index 00000000..a07893ab
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/r2r/r2r.toml
@@ -0,0 +1,122 @@
+[app]
+# app settings are global available like `r2r_config.agent.app`
+# project_name = "r2r_default" # optional, can also set with `R2R_PROJECT_NAME` env var
+default_max_documents_per_user = 10_000
+default_max_chunks_per_user = 10_000_000
+default_max_collections_per_user = 5_000
+
+# Set the default max upload size to 200 GB for local testing
+default_max_upload_size = 214748364800
+
+# LLM used for internal operations, like deriving conversation names
+fast_llm = "openai/gpt-4o-mini"
+
+# LLM used for user-facing output, like RAG replies
+quality_llm = "openai/gpt-4o"
+
+# LLM used for ingesting visual inputs
+vlm = "openai/gpt-4o"
+
+# LLM used for transcription
+audio_lm = "openai/whisper-1"
+
+# Reasoning model, used for `research` agent
+reasoning_llm = "openai/o3-mini"
+# Planning model, used for `research` agent
+planning_llm = "anthropic/claude-3-7-sonnet-20250219"
+
+
+[agent]
+rag_agent_static_prompt = "static_rag_agent"
+rag_agent_dynamic_prompt = "dynamic_rag_agent"
+# The following tools are available to the `rag` agent
+rag_tools = ["search_file_descriptions", "search_file_knowledge", "get_file_content"] # can add "web_search" | "web_scrape"
+# The following tools are available to the `research` agent
+research_tools = ["rag", "reasoning", "critique", "python_executor"]
+
+[auth]
+provider = "r2r"
+access_token_lifetime_in_minutes = 60000
+refresh_token_lifetime_in_days = 7
+require_authentication = false
+require_email_verification = false
+default_admin_email = "admin@example.com"
+default_admin_password = "change_me_immediately"
+
+[completion]
+provider = "r2r"
+concurrent_request_limit = 64
+
+ [completion.generation_config]
+ temperature = 0.1
+ top_p = 1
+ max_tokens_to_sample = 4_096
+ stream = false
+ add_generation_kwargs = { }
+
+[crypto]
+provider = "bcrypt"
+
+[database]
+default_collection_name = "Default"
+default_collection_description = "Your default collection."
+collection_summary_prompt = "collection_summary"
+
+ [database.graph_creation_settings]
+ graph_entity_description_prompt = "graph_entity_description"
+ graph_extraction_prompt = "graph_extraction"
+ entity_types = [] # if empty, all entities are extracted
+ relation_types = [] # if empty, all relations are extracted
+ automatic_deduplication = true # enable automatic deduplication of entities
+
+ [database.graph_enrichment_settings]
+ graph_communities_prompt = "graph_communities"
+
+[embedding]
+provider = "litellm"
+# For basic applications, use `openai/text-embedding-3-small` with `base_dimension = 512`
+# For advanced applications, use `openai/text-embedding-3-large` with `base_dimension = 3072` and binary quantization
+base_model = "openai/text-embedding-3-small"
+base_dimension = 512
+# rerank_model = "huggingface/mixedbread-ai/mxbai-rerank-large-v1" # reranking model
+batch_size = 128
+add_title_as_prefix = false
+concurrent_request_limit = 256
+initial_backoff = 1.0
+quantization_settings = { quantization_type = "FP32" }
+
+[completion_embedding]
+# Generally this should be the same as the embedding config, but advanced users may want to run with a different provider to reduce latency
+provider = "litellm"
+base_model = "openai/text-embedding-3-small"
+base_dimension = 512
+batch_size = 128
+add_title_as_prefix = false
+concurrent_request_limit = 256
+
+[ingestion]
+provider = "r2r"
+chunking_strategy = "recursive"
+chunk_size = 1_024
+chunk_overlap = 512
+excluded_parsers = ["mp4"]
+automatic_extraction = true # enable automatic extraction of entities and relations
+
+ [ingestion.chunk_enrichment_settings]
+ chunk_enrichment_prompt = "chunk_enrichment"
+ enable_chunk_enrichment = false # disabled by default
+ n_chunks = 2 # the number of chunks (both preceding and succeeding) to use in enrichment
+
+ [ingestion.extra_parsers]
+ pdf = "zerox"
+
+[logging]
+provider = "r2r"
+log_table = "logs"
+log_info_table = "log_info"
+
+[orchestration]
+provider = "simple"
+
+[email]
+provider = "console_mock" # `smtp` | `sendgrid` supported
diff --git a/.venv/lib/python3.12/site-packages/r2r/serve.py b/.venv/lib/python3.12/site-packages/r2r/serve.py
new file mode 100644
index 00000000..7e9226bc
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/r2r/serve.py
@@ -0,0 +1,146 @@
+import argparse
+import asyncio
+import logging
+import os
+import sys
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+try:
+ from core import R2RApp, R2RBuilder, R2RConfig
+ from core.utils.logging_config import configure_logging
+except ImportError as e:
+ logger.error(
+ f"Failed to start server: core dependencies not installed: {e}"
+ )
+ logger.error("To run the server, install the required dependencies:")
+ logger.error("pip install 'r2r[core]'")
+ sys.exit(1)
+
+
+async def create_app(
+ config_name: Optional[str] = None,
+ config_path: Optional[str] = None,
+ full: bool = False,
+) -> "R2RApp":
+ """
+ Creates and returns an R2R application instance based on the provided
+ or environment-sourced configuration.
+ """
+ # If arguments not passed, fall back to environment variables
+ config_name = config_name or os.getenv("R2R_CONFIG_NAME")
+ config_path = config_path or os.getenv("R2R_CONFIG_PATH")
+
+ if config_path and config_name:
+ raise ValueError(
+ f"Cannot specify both config_path and config_name, got {config_path} and {config_name}"
+ )
+
+ if not config_path and not config_name:
+ # If neither is specified nor set in environment,
+ # default to 'full' if --full is True, else 'default'
+ config_name = "full" if full else "default"
+
+ try:
+ r2r_instance = await R2RBuilder(
+ config=R2RConfig.load(config_name, config_path)
+ ).build()
+
+ # Start orchestration worker
+ await r2r_instance.orchestration_provider.start_worker()
+ return r2r_instance
+ except ImportError as e:
+ logger.error(f"Failed to initialize R2R: {e}")
+ logger.error(
+ "Please check your configuration and installed dependencies"
+ )
+ sys.exit(1)
+
+
+def run_server(
+ host: Optional[str] = None,
+ port: Optional[int] = None,
+ config_name: Optional[str] = None,
+ config_path: Optional[str] = None,
+ full: bool = False,
+):
+ """
+ Runs the R2R server with the provided or environment-based settings.
+ """
+ # Overwrite environment variables if arguments are explicitly passed
+ if host is not None:
+ os.environ["R2R_HOST"] = host
+ if port is not None:
+ os.environ["R2R_PORT"] = str(port)
+ if config_path is not None:
+ os.environ["R2R_CONFIG_PATH"] = config_path
+ if config_name is not None:
+ os.environ["R2R_CONFIG_NAME"] = config_name
+
+ # Fallback to environment or defaults if necessary
+ final_host = os.getenv("R2R_HOST", "0.0.0.0")
+ final_port = int(os.getenv("R2R_PORT", "7272"))
+
+ try:
+ configure_logging()
+ except Exception as e:
+ logger.error(f"Failed to configure logging: {e}")
+
+ try:
+
+ async def start():
+ app = await create_app(config_name, config_path, full)
+ await app.serve(final_host, final_port)
+
+ asyncio.run(start())
+ except Exception as e:
+ logger.error(f"Failed to start R2R server: {e}")
+ sys.exit(1)
+
+
+def main():
+ """
+ Parse command-line arguments and then run the server.
+ """
+ parser = argparse.ArgumentParser(description="Run the R2R server.")
+ parser.add_argument(
+ "--host",
+ default=None,
+ help="Host to bind to. Overrides R2R_HOST env if provided.",
+ )
+ parser.add_argument(
+ "--port",
+ default=None,
+ type=int,
+ help="Port to bind to. Overrides R2R_PORT env if provided.",
+ )
+ parser.add_argument(
+ "--config-path",
+ default=None,
+ help="Path to the configuration file. Overrides R2R_CONFIG_PATH env if provided.",
+ )
+ parser.add_argument(
+ "--config-name",
+ default=None,
+ help="Name of the configuration. Overrides R2R_CONFIG_NAME env if provided.",
+ )
+ parser.add_argument(
+ "--full",
+ action="store_true",
+ help="Use the 'full' config if neither config-path nor config-name is specified.",
+ )
+
+ args = parser.parse_args()
+
+ run_server(
+ host=args.host,
+ port=args.port,
+ config_name=args.config_name,
+ config_path=args.config_path,
+ full=args.full,
+ )
+
+
+if __name__ == "__main__":
+ main()