diff options
Diffstat (limited to 'R2R')
194 files changed, 26088 insertions, 0 deletions
diff --git a/R2R/.dockerignore b/R2R/.dockerignore new file mode 100755 index 00000000..c4730648 --- /dev/null +++ b/R2R/.dockerignore @@ -0,0 +1,20 @@ +__pycache__ +*.pyc +*.pyo +*.pyd +.Python +env +pip-log.txt +pip-delete-this-directory.txt +.tox +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.log +.git +.mypy_cache +.pytest_cache +.hypothesis diff --git a/R2R/.flake8 b/R2R/.flake8 new file mode 100755 index 00000000..5491a9b7 --- /dev/null +++ b/R2R/.flake8 @@ -0,0 +1,3 @@ +[flake8] +exclude = playground/*,web/*,chat/*,docs/*,r2r/examples/,r2r/vecs,r2r/vecs/*,tests +ignore = E501, E722, W503, E203, F541, W293, W291, E266, F601, F403, F405 diff --git a/R2R/.gitattributes b/R2R/.gitattributes new file mode 100755 index 00000000..c8199463 --- /dev/null +++ b/R2R/.gitattributes @@ -0,0 +1,2 @@ +*.html linguist-documentation +*.ipynb linguist-documentation diff --git a/R2R/.github/.codecov.yml b/R2R/.github/.codecov.yml new file mode 100755 index 00000000..8fd648c4 --- /dev/null +++ b/R2R/.github/.codecov.yml @@ -0,0 +1,5 @@ +coverage: + status: + project: + default: + threshold: 100% diff --git a/R2R/.github/ISSUE_TEMPLATE/bug_report.md b/R2R/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100755 index 00000000..dd84ea78 --- /dev/null +++ b/R2R/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,38 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: '' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Desktop (please complete the following information):** + - OS: [e.g. iOS] + - Browser [e.g. chrome, safari] + - Version [e.g. 22] + +**Smartphone (please complete the following information):** + - Device: [e.g. iPhone6] + - OS: [e.g. iOS8.1] + - Browser [e.g. stock browser, safari] + - Version [e.g. 22] + +**Additional context** +Add any other context about the problem here. diff --git a/R2R/.github/ISSUE_TEMPLATE/custom.md b/R2R/.github/ISSUE_TEMPLATE/custom.md new file mode 100755 index 00000000..b894315f --- /dev/null +++ b/R2R/.github/ISSUE_TEMPLATE/custom.md @@ -0,0 +1,8 @@ +--- +name: Custom issue template +about: Describe this issue template's purpose here. +title: '' +labels: '' +assignees: '' + +--- diff --git a/R2R/.github/ISSUE_TEMPLATE/feature_request.md b/R2R/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100755 index 00000000..bbcbbe7d --- /dev/null +++ b/R2R/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/R2R/.github/workflows/build-main.yml b/R2R/.github/workflows/build-main.yml new file mode 100755 index 00000000..b91de2c3 --- /dev/null +++ b/R2R/.github/workflows/build-main.yml @@ -0,0 +1,83 @@ +name: Build and Publish Docker Image + +on: + workflow_dispatch: + inputs: + version: + description: 'Version to publish (leave empty to use default versioning)' + required: false + type: string + +jobs: + build-and-publish: + runs-on: ubuntu-latest + permissions: + packages: write + contents: read + id-token: write + actions: write + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + + - name: Google Auth + uses: 'google-github-actions/auth@v2' + with: + credentials_json: '${{ secrets.GCP_SA_KEY }}' + + - name: Set up Cloud SDK + uses: 'google-github-actions/setup-gcloud@v2' + + - name: Configure SDK + run: 'gcloud auth configure-docker us-east1-docker.pkg.dev' + + - name: Docker Auth + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_TOKEN }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Determine version to use + id: version + run: | + if [ -n "${{ github.event.inputs.version }}" ]; then + echo "RELEASE_VERSION=${{ github.event.inputs.version }}" >> $GITHUB_OUTPUT + else + echo "RELEASE_VERSION=main" >> $GITHUB_OUTPUT + fi + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v5 + with: + images: | + emrgntcmplxty/r2r + us-east1-docker.pkg.dev/alert-rush-397022/sciphi-r2r/r2r + tags: | + type=raw,value=${{ steps.version.outputs.RELEASE_VERSION }} + type=raw,value=latest + + - name: Build and Push Docker Image + uses: docker/build-push-action@v5 + with: + context: . + file: ./Dockerfile + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + platforms: linux/amd64,linux/arm64 + + - name: Trigger Dependent Repositories + if: success() + run: | + curl -X POST \ + -H "Accept: application/vnd.github.v3+json" \ + -H "Authorization: token ${{ secrets.PERSONAL_ACCESS_TOKEN }}" \ + https://api.github.com/repos/SciPhi-AI/R2R-basic-rag-template/dispatches \ + -d '{"event_type": "rebuild", "client_payload": {"r2r_tag": "${{ steps.version.outputs.RELEASE_VERSION }}"}}' diff --git a/R2R/.github/workflows/build-release.yml b/R2R/.github/workflows/build-release.yml new file mode 100755 index 00000000..996f7334 --- /dev/null +++ b/R2R/.github/workflows/build-release.yml @@ -0,0 +1,43 @@ +name: Links + +on: + push: + branches: + - main + pull_request: + branches: + - main + schedule: + - cron: "00 18 * * *" + +jobs: + linkChecker: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Link Checker + id: lychee + uses: lycheeverse/lychee-action@v1 + continue-on-error: true + with: + args: "--output ./docs/lychee/out.md --config ./docs/.lycheerc" + + - name: Get Lychee Exit Code + id: get-exit-code + run: echo "lychee_exit_code=$?" >> $GITHUB_ENV + + - name: Create Custom Report + if: failure() + run: | + echo "Lychee doesn't play nice with relative imports for .mdx files. Add a link to the .lycheeignore if needed!" > ./docs/lychee/custom_report.md + echo "" >> ./docs/lychee/custom_report.md + cat ./docs/lychee/out.md >> ./docs/lychee/custom_report.md + + - name: Create Issue From File + if: failure() + uses: peter-evans/create-issue-from-file@v4 + with: + title: Link Checker Report + content-filepath: ./docs/lychee/custom_report.md + labels: report, automated issue diff --git a/R2R/.github/workflows/ci.yml b/R2R/.github/workflows/ci.yml new file mode 100755 index 00000000..d24bf06e --- /dev/null +++ b/R2R/.github/workflows/ci.yml @@ -0,0 +1,85 @@ +name: CI + +on: + push: + branches: + - main + - '**/feature/*' + pull_request: + branches: + - main + +jobs: + pre-commit: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.12.4' # Specify your Python version here + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install poetry + poetry install + + - name: Install pre-commit + run: poetry run pre-commit install + + - name: Run pre-commit + run: poetry run pre-commit run --all-files + + pytest: + runs-on: ubuntu-latest + timeout-minutes: 15 # Increased timeout to accommodate Ollama setup + + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + POSTGRES_DBNAME: ${{ secrets.POSTGRES_DBNAME }} + POSTGRES_HOST: ${{ secrets.POSTGRES_HOST }} + POSTGRES_PORT: ${{ secrets.POSTGRES_PORT }} + POSTGRES_USER: ${{ secrets.POSTGRES_USER }} + POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }} + POSTGRES_VECS_COLLECTION: ${{ secrets.POSTGRES_VECS_COLLECTION }} + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.12.4' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install poetry + poetry install + + - name: Install Ollama + run: | + curl -fsSL https://ollama.com/install.sh | sudo -E sh + + - name: Start Ollama server + run: | + ollama serve & + sleep 5 + curl -i http://localhost:11434 + + - name: Pull Ollama model + run: | + ollama pull llama2 + + - name: Run tests + run: poetry run pytest tests/ -k "not redis and not sentence_transformer" + + - name: Upload coverage reports to Codecov + uses: codecov/codecov-action@v4.0.1 + with: + token: ${{ secrets.CODECOV_TOKEN }} diff --git a/R2R/.github/workflows/links.yml b/R2R/.github/workflows/links.yml new file mode 100755 index 00000000..75ec1758 --- /dev/null +++ b/R2R/.github/workflows/links.yml @@ -0,0 +1,39 @@ +name: Links + +on: + repository_dispatch: + workflow_dispatch: + schedule: + - cron: "00 18 * * *" + +jobs: + linkChecker: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Link Checker + id: lychee + uses: lycheeverse/lychee-action@v1 + continue-on-error: true + with: + args: "--output ./docs/lychee/out.md --config ./docs/.lycheerc" + + - name: Get Lychee Exit Code + id: get-exit-code + run: echo "lychee_exit_code=$?" >> $GITHUB_ENV + + - name: Create Custom Report + if: failure() + run: | + echo "Lychee doesn't play nice with relative imports for .mdx files. Add a link to the .lycheeignore if needed!" > ./docs/lychee/custom_report.md + echo "" >> ./docs/lychee/custom_report.md + cat ./docs/lychee/out.md >> ./docs/lychee/custom_report.md + + - name: Create Issue From File + if: failure() + uses: peter-evans/create-issue-from-file@v4 + with: + title: Link Checker Report + content-filepath: ./docs/lychee/custom_report.md + labels: report, automated issue diff --git a/R2R/.github/workflows/publish-to-pypi.yml b/R2R/.github/workflows/publish-to-pypi.yml new file mode 100755 index 00000000..79ac0c83 --- /dev/null +++ b/R2R/.github/workflows/publish-to-pypi.yml @@ -0,0 +1,27 @@ +name: Publish to PyPI + +on: + push: + tags: + - "*" + workflow_dispatch: # This line adds manual trigger support + +jobs: + publish: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.9 + + - name: Install poetry + run: pip install poetry + + - name: Build and publish + run: | + poetry build + poetry publish --username __token__ --password ${{ secrets.PYPI_API_TOKEN }} diff --git a/R2R/.gitignore b/R2R/.gitignore new file mode 100755 index 00000000..95c247fa --- /dev/null +++ b/R2R/.gitignore @@ -0,0 +1,20 @@ +.env* +.DS_Store +*.gguf +logs/ +workspace/ +uploads/ +env/ +**/__pycache__ +dump/* +.next +node_modules + +coverage.xml +.coverage + +**/*.sqlite* +**/*.sqlite3* + +r2r/examples/data/* +.aider* diff --git a/R2R/.isort.cfg b/R2R/.isort.cfg new file mode 100755 index 00000000..927aa0b3 --- /dev/null +++ b/R2R/.isort.cfg @@ -0,0 +1,10 @@ +[settings] +profile = black +multi_line_output = 3 +include_trailing_comma = true +force_grid_wrap = 0 +use_parentheses = true +ensure_newline_before_comments = true +line_length = 79 +sections = FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER +skip = .tox,__pycache__,*.pyc,venv*/*,reports,venv,env,node_modules,.env,.venv,dist,my_env diff --git a/R2R/.pre-commit-config.yaml b/R2R/.pre-commit-config.yaml new file mode 100755 index 00000000..58fb9158 --- /dev/null +++ b/R2R/.pre-commit-config.yaml @@ -0,0 +1,32 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-added-large-files + - id: check-ast + - id: check-yaml + + - repo: local + hooks: + - id: isort + name: isort + entry: poetry run isort + language: system + types: [python] + args: ["--check", "--diff"] + + - id: black + name: black + entry: poetry run black + language: system + types: [python] + args: ["--check", "--diff"] + + - id: flake8 + name: flake8 + entry: poetry run flake8 + language: system + types: [python] + args: ["--config=.flake8"] diff --git a/R2R/CODE_OF_CONDUCT.md b/R2R/CODE_OF_CONDUCT.md new file mode 100755 index 00000000..56e0cb92 --- /dev/null +++ b/R2R/CODE_OF_CONDUCT.md @@ -0,0 +1,42 @@ +# Contributor Covenant Code of Conduct Summary + +TL;DR: Be nice. Be respectful. Be professional. Don't be a jerk. + +## Commitment + +We strive for a harassment-free, inclusive, and healthy community experience for all, regardless of personal characteristics or background. + +## Expected Behaviors + +- **Empathy and Kindness**: Show understanding and kindness to others. +- **Respect**: Value different viewpoints and experiences. +- **Constructive Feedback**: Offer and accept feedback graciously. +- **Accountability**: Own up to mistakes and learn from them. +- **Community Focus**: Prioritize what's best for the whole community. + +## Unacceptable Behaviors + +- **Sexualized Content**: Avoid sexual language and unwelcome sexual attention. +- **Disrespect**: No trolling, insults, or derogatory comments. +- **Harassment**: Public or private harassment is unacceptable. +- **Privacy Violations**: Do not share private information without consent. +- **Inappropriate Conduct**: Behavior not suitable for a professional setting is not allowed. + +## Enforcement + +- **Leaders' Responsibility**: Leaders clarify standards and take corrective actions. +- **Scope**: Applies to all community spaces and when representing the community. +- **Reporting**: Incidents can be reported to owen@sciphi.ai. + +## Enforcement Guidelines + +- **Correction**: Private warning for unprofessional behavior. +- **Warning**: Consequences for repeated violations. +- **Temporary Ban**: For serious or sustained inappropriate behavior. +- **Permanent Ban**: For egregious violations, including harassment. + +## Attribution + +Adapted from the [Contributor Covenant version 2.1](https://www.contributor-covenant.org/version/2/1/code_of_conduct.html), with Community Impact Guidelines inspired by [Mozilla's code of conduct enforcement ladder](https://www.mozilla.org/en-US/about/governance/policies/participation/). + +For more details and FAQs, visit [https://www.contributor-covenant.org/faq](https://www.contributor-covenant.org/faq). Translations are available [here](https://www.contributor-covenant.org/translations). diff --git a/R2R/CONTRIBUTING.md b/R2R/CONTRIBUTING.md new file mode 100755 index 00000000..a5469158 --- /dev/null +++ b/R2R/CONTRIBUTING.md @@ -0,0 +1,18 @@ +# R2R Contribution Guide + +## Quick Start + +- **Pre-Discussion**: Feel free to propose your ideas via issues, [Discord](https://discord.gg/p6KqD2kjtB) if you want to get early feedback. +- **Code of Conduct**: Adhere to our [Code of Conduct](./CODE_OF_CONDUCT.md) in all interactions. +- **Pull Requests (PRs)**: Follow the PR process for contributions. + +## Pull Request Process + +1. **Dependencies**: Ensure all dependencies are necessary and documented. +2. **Documentation**: Update README.md with any changes to interfaces, including new environment variables, exposed ports, and other relevant details. +3. **Versioning**: Increment version numbers in examples and README.md following [SemVer](http://semver.org/). +4. **Review**: A PR can be merged after receiving approval from at least two other developers. If you lack merge permissions, request a review for merging. + +## Attribution + +This Code of Conduct adapts from the [Contributor Covenant, version 1.4](http://contributor-covenant.org/version/1/4/). diff --git a/R2R/Dockerfile b/R2R/Dockerfile new file mode 100755 index 00000000..ffce8344 --- /dev/null +++ b/R2R/Dockerfile @@ -0,0 +1,37 @@ +FROM python:3.10-slim AS builder + +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc g++ musl-dev curl libffi-dev gfortran libopenblas-dev \ + && apt-get clean && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +RUN pip install --no-cache-dir poetry + +# Copy the dependencies files +COPY pyproject.toml poetry.lock* ./ + +# Install the dependencies, including gunicorn and uvicorn +RUN poetry config virtualenvs.create false \ + && poetry install --no-dev --no-root \ + && pip install --no-cache-dir gunicorn uvicorn + +# Create the final image +FROM python:3.10-slim + +WORKDIR /app + +# Copy the installed packages from the builder +COPY --from=builder /usr/local/lib/python3.10/site-packages /usr/local/lib/python3.10/site-packages +COPY --from=builder /usr/local/bin /usr/local/bin + +# Copy the application and config +COPY r2r /app/r2r +COPY config.json /app/config.json + +# Expose the port +EXPOSE 8000 + +# Run the application +CMD ["uvicorn", "r2r.main.app_entry:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/R2R/LICENSE.md b/R2R/LICENSE.md new file mode 100755 index 00000000..ca0f8c51 --- /dev/null +++ b/R2R/LICENSE.md @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2024 EmergentAGI Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/R2R/MANIFEST.md b/R2R/MANIFEST.md new file mode 100755 index 00000000..cb752d09 --- /dev/null +++ b/R2R/MANIFEST.md @@ -0,0 +1,3 @@ +# The R2R Manifest + +We will do our best to build useful AI tools for developers _(before AGI)_. diff --git a/R2R/README.md b/R2R/README.md new file mode 100755 index 00000000..903afba8 --- /dev/null +++ b/R2R/README.md @@ -0,0 +1,317 @@ +<p align="left"> + <a href="https://r2r-docs.sciphi.ai"><img src="https://img.shields.io/badge/docs.sciphi.ai-3F16E4" alt="Docs"></a> + <a href="https://discord.gg/p6KqD2kjtB"><img src="https://img.shields.io/discord/1120774652915105934?style=social&logo=discord" alt="Discord"></a> + <a href="https://github.com/SciPhi-AI"><img src="https://img.shields.io/github/stars/SciPhi-AI/R2R" alt="Github Stars"></a> + <a href="https://github.com/SciPhi-AI/R2R/pulse"><img src="https://img.shields.io/github/commit-activity/w/SciPhi-AI/R2R" alt="Commits-per-week"></a> + <a href="https://opensource.org/licenses/MIT"><img src="https://img.shields.io/badge/License-MIT-purple.svg" alt="License: MIT"></a> +</p> + +<img src="./assets/r2r.png" alt="R2R Answer Engine"> +<h3 align="center"> +The ultimate open source RAG answer engine +</h3> + +# About +R2R was designed to bridge the gap between local LLM experimentation and scalable, production-ready Retrieval-Augmented Generation (RAG). R2R provides a comprehensive and SOTA RAG system for developers, built around a RESTful API for ease of use. + +For a more complete view of R2R, check out the [full documentation](https://r2r-docs.sciphi.ai/). + +## Key Features +- **📁 Multimodal Support**: Ingest files ranging from `.txt`, `.pdf`, `.json` to `.png`, `.mp3`, and more. +- **🔍 Hybrid Search**: Combine semantic and keyword search with reciprocal rank fusion for enhanced relevancy. +- **🔗 Graph RAG**: Automatically extract relationships and build knowledge graphs. +- **🗂️ App Management**: Efficiently manage documents and users with rich observability and analytics. +- **🌐 Client-Server**: RESTful API support out of the box. +- **🧩 Configurable**: Provision your application using intuitive configuration files. +- **🔌 Extensible**: Develop your application further with easy builder + factory pattern. +- **🖥️ Dashboard**: Use the [R2R Dashboard](https://github.com/SciPhi-AI/R2R-Dashboard), an open-source React+Next.js app for a user-friendly interaction with R2R. + +## Table of Contents +1. [Install](#install) +2. [R2R Quickstart](#r2r-quickstart) +3. [R2R Dashboard](#r2r-dashboard) +4. [Community and Support](#community-and-support) +5. [Contributing](#contributing) + +# Install + +> [!NOTE] +> Windows users are advised to use Docker to run R2R. + +<details> +<summary><b>Installing with Pip</b> 🐍 </summary> + +```bash +pip install r2r + +# setup env, can freely replace `demo_vecs` +export OPENAI_API_KEY=sk-... +export POSTGRES_USER=YOUR_POSTGRES_USER +export POSTGRES_PASSWORD=YOUR_POSTGRES_PASSWORD +export POSTGRES_HOST=YOUR_POSTGRES_HOST +export POSTGRES_PORT=YOUR_POSTGRES_PORT +export POSTGRES_DBNAME=YOUR_POSTGRES_DBNAME +export POSTGRES_VECS_COLLECTION=demo_vecs +``` +</details> + +<details open> +<summary><b>Installing with Docker</b> 🐳</summary> +Docker allows users to get started with R2R seamlessly—providing R2R, the R2R Dashboard, and a pgvector database all in one place. + +First, clone the R2R repository: +```bash +git clone https://github.com/SciPhi-AI/R2R.git +cd R2R +# for R2R CLI and Python client +pip install . +``` + +Then, run the following command to start all containers: + +For hosted LLMs (e.g., OpenAI): +```bash +# Be sure to set an OpenAI API key +export OPENAI_API_KEY=sk-... +export CONFIG_NAME=default +docker-compose up -d +``` + +For local LLMs (e.g., Ollama): +```bash +export OLLAMA_API_BASE=http://host.docker.internal:11434 +export CONFIG_NAME=local_ollama +docker-compose up -d +``` + +Note: Settings relating to Postgres+pgvector can be overriden by setting the appropriate environment variables before calling `docker-compose`. +```bash +export POSTGRES_USER=$YOUR_POSTGRES_USER +export POSTGRES_PASSWORD=$YOUR_POSTGRES_PASSWORD +export POSTGRES_HOST=$YOUR_POSTGRES_HOST +export POSTGRES_PORT=$YOUR_POSTGRES_PORT +export POSTGRES_DBNAME=$YOUR_POSTGRES_DBNAME +export POSTGRES_VECS_COLLECTION=$MY_VECS_COLLECTION +docker-compose up -d +``` +The `POSTGRES_VECS_COLLECTION` defines the collection where all R2R related tables reside. This collection should be changed when selecting a new embedding model. + +</details> + +# Updates +Star R2R on GitHub by clicking "Star" in the upper right hand corner of the page to be instantly notified of new releases. + + +# R2R Quickstart + +## Demo Video +<div align="center"> + <a href="https://youtu.be/oZzfi_AUNqo"> + <img src="https://img.youtube.com/vi/oZzfi_AUNqo/0.jpg" alt="Watch the video"> + </a> +</div> + + +## Start the R2R server +<details open> +<summary><b>Start the R2R server in Docker</b></summary> + +Edit r2r_env the environment file that defines the DB names and sets the keys to be used by R2R. Below is an example of the file contents: +```plaintext + +# Environment variables for LLM provider(s) +export OPENAI_API_KEY=sk-ajdsfioadufaiouweiru923048-910235r8fpal... +# Environment varialbes for the Postgres database +export POSTGRES_USER=you +export POSTGRES_PASSWORD=youpassword +export POSTGRES_HOST=yourhost +export POSTGRES_PORT=5432 +export POSTGRES_DBNAME=youcomeupwiththis +export POSTGRES_VECS_COLLECTION=you_rag_vecs +export CONFIG_OPTION=default + +export STORAGE_DIRECTORY=/home/user/code/dir_a/storit +export CODE_DIRECTORY=/home/user/code/dir_a/dir_b +``` + +Go to the R2R directory and execute the run bash script. The script will load the file r2r.env into the environment and run the docker container. + +**WATCH OUT** for the existing volumes in your docker instance. If the postgres details match an existing container, but the postgres db does not contain the proper tables, your docker composition will fail. + +```bash +cd R2R +sh run.sh +docker exec -it rag_eval_devenv /bin/bash +``` + +</details> + +<details closed> +<summary><b>Serving the R2R CLI</b> ✈️ </summary> + +```bash +r2r serve --port=8000 +``` + +```plaintext Terminal Output +2024-06-26 16:54:46,998 - INFO - r2r.core.providers.vector_db_provider - Initializing VectorDBProvider with config extra_fields={} provider='pgvector' collection_name='demo_vecs'. +2024-06-26 16:54:48,054 - INFO - r2r.core.providers.embedding_provider - Initializing EmbeddingProvider with config extra_fields={'text_splitter': {'type': 'recursive_character', 'chunk_size': 512, 'chunk_overlap': 20}} provider='openai' base_model='text-embedding-3-small' base_dimension=512 rerank_model=None rerank_dimension=None rerank_transformer_type=None batch_size=128. +2024-06-26 16:54:48,639 - INFO - r2r.core.providers.llm_provider - Initializing LLM provider with config: extra_fields={} provider='litellm' +``` + +</details> + +<details> +<summary><b>Serving with Docker</b> 🐳</summary> + +Successfully completing the installation steps above results in an R2R application being served over port `8000`. + +</details> + +## Ingest a file + +```bash +r2r ingest +# can be called with additional argument, +# e.g. `r2r ingest /path/to/your_file_1 /path/to/your_file_2 ...` +``` + +```plaintext +{'results': {'processed_documents': ["File '.../aristotle.txt' processed successfully."], 'skipped_documents': []}} +``` + + +## Perform a search + + +```bash +r2r search --query="who was aristotle?" --do-hybrid-search +``` + +```plaintext +{'results': {'vector_search_results': [ + { + 'id': '7ed3a01c-88dc-5a58-a68b-6e5d9f292df2', + 'score': 0.780314067545999, + 'metadata': { + 'text': 'Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.', + 'title': 'aristotle.txt', + 'version': 'v0', + 'chunk_order': 0, + 'document_id': 'c9bdbac7-0ea3-5c9e-b590-018bd09b127b', + 'extraction_id': '472d6921-b4cd-5514-bf62-90b05c9102cb', + ... +``` + +## Perform RAG + + +```bash +r2r rag --query="who was aristotle?" --do-hybrid-search +``` + +```plaintext + +Search Results: +{'vector_search_results': [ + {'id': '7ed3a01c-88dc-5a58-a68b-6e5d9f292df2', + 'score': 0.7802911996841491, + 'metadata': {'text': 'Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic schoo + ... +Completion: +{'results': [ + { + 'id': 'chatcmpl-9eXL6sKWlUkP3f6QBnXvEiKkWKBK4', + 'choices': [ + { + 'finish_reason': 'stop', + 'index': 0, + 'logprobs': None, + 'message': { + 'content': "Aristotle (384–322 BC) was an Ancient Greek philosopher and polymath whose writings covered a broad range of subjects including the natural sciences, + ... +``` + + + +## Stream a RAG Response + + +```bash +r2r rag --query="who was aristotle?" --stream --do-hybrid-search +``` + +```plaintext +<search>"{\"id\":\"004ae2e3-c042-50f2-8c03-d4c282651fba\",\"score\":0.7803140675 ...</search> +<completion>Aristotle was an Ancient Greek philosopher and polymath who lived from 384 to 322 BC [1]. He was born in Stagira, Chalcidi....</completion> +``` + +# Hello r2r + +Building with R2R is easy - see the `hello_r2r` example below: + +```python + +from r2r import Document, GenerationConfig, R2R + +app = R2R() # You may pass a custom configuration to `R2R` + +app.ingest_documents( + [ + Document( + type="txt", + data="John is a person that works at Google.", + metadata={}, + ) + ] +) + +rag_results = app.rag( + "Who is john", GenerationConfig(model="gpt-3.5-turbo", temperature=0.0) +) +print(f"Search Results:\n{rag_results.search_results}") +print(f"Completion:\n{rag_results.completion}") + +# RAG Results: +# Search Results: +# AggregateSearchResult(vector_search_results=[VectorSearchResult(id=2d71e689-0a0e-5491-a50b-4ecb9494c832, score=0.6848798582029441, metadata={'text': 'John is a person that works at Google.', 'version': 'v0', 'chunk_order': 0, 'document_id': 'ed76b6ee-dd80-5172-9263-919d493b439a', 'extraction_id': '1ba494d7-cb2f-5f0e-9f64-76c31da11381', 'associatedQuery': 'Who is john'})], kg_search_results=None) +# Completion: +# ChatCompletion(id='chatcmpl-9g0HnjGjyWDLADe7E2EvLWa35cMkB', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='John is a person that works at Google [1].', role='assistant', function_call=None, tool_calls=None))], created=1719797903, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=11, prompt_tokens=145, total_tokens=156)) +``` + +# R2R Dashboard + +Interact with R2R using our [open-source React+Next.js dashboard](https://github.com/SciPhi-AI/R2R-Dashboard). Check out the [Dashboard Cookbook](https://r2r-docs.sciphi.ai/cookbooks/dashboard) to get started! + +# Community and Support + +- [Discord](https://discord.gg/p6KqD2kjtB): Chat live with maintainers and community members +- [Github Issues](https://github.com/SciPhi-AI/R2R/issues): Report bugs and request features + +**Explore our [R2R Docs](https://r2r-docs.sciphi.ai/) for tutorials and cookbooks on various R2R features and integrations, including:** + +### RAG Cookbooks +- [Multiple LLMs](https://r2r-docs.sciphi.ai/cookbooks/multiple-llms): A simple cookbook showing how R2R supports multiple LLMs. +- [Hybrid Search](https://r2r-docs.sciphi.ai/cookbooks/hybrid-search): A brief introduction to running hybrid search with R2R. +- [Multimodal RAG](https://r2r-docs.sciphi.ai/cookbooks/multimodal): A cookbook on multimodal RAG with R2R. +- [Knowledge Graphs](https://r2r-docs.sciphi.ai/cookbooks/knowledge-graph): A walkthrough of automatic knowledge graph generation with R2R. +- [Local RAG](https://r2r-docs.sciphi.ai/cookbooks/local-rag): A quick cookbook demonstration of how to run R2R with local LLMs. +- [Reranking](https://r2r-docs.sciphi.ai/cookbooks/rerank-search): A short guide on how to apply reranking to R2R results. + +### App Features +- [Client-Server](https://r2r-docs.sciphi.ai/cookbooks/client-server): An extension of the basic `R2R Quickstart` with client-server interactions. +- [Document Management](https://r2r-docs.sciphi.ai/cookbooks/document-management): A cookbook showing how to manage your documents with R2R. +- [Analytics & Observability](https://r2r-docs.sciphi.ai/cookbooks/observablity): A cookbook showing R2Rs end to end logging and analytics. +- [Dashboard](https://r2r-docs.sciphi.ai/cookbooks/dashboard): A how-to guide on connecting with the R2R Dashboard. + +# Contributing + +We welcome contributions of all sizes! Here's how you can help: + +- Open a PR for new features, improvements, or better documentation. +- Submit a [feature request](https://github.com/SciPhi-AI/R2R/issues/new?assignees=&labels=&projects=&template=feature_request.md&title=) or [bug report](https://github.com/SciPhi-AI/R2R/issues/new?assignees=&labels=&projects=&template=bug_report.md&title=) + +### Our Contributors +<a href="https://github.com/SciPhi-AI/R2R/graphs/contributors"> + <img src="https://contrib.rocks/image?repo=SciPhi-AI/R2R" /> +</a> diff --git a/R2R/SECURITY.md b/R2R/SECURITY.md new file mode 100755 index 00000000..68a2f2fd --- /dev/null +++ b/R2R/SECURITY.md @@ -0,0 +1,39 @@ + +# Security Policy + +At R2R, we take the security of our project and its users seriously. We appreciate the contributions of security researchers and developers in helping us identify and address potential vulnerabilities. + +## Reporting a Vulnerability + +If you discover a potential security vulnerability in R2R, please follow these steps to report it: + +1. Create a new issue on the GitHub repository using the "Vulnerability Disclosure" issue template. +2. Set the issue as "confidential" if you are unsure whether the issue is a potential vulnerability or not. It is easier to make a confidential issue public than to remediate an issue that should have been confidential. +3. Label the issue with the `security` label at a minimum. Additional labels may be applied by the security team and other project maintainers to assist with the triage process. +4. Provide a detailed description of the vulnerability, including steps to reproduce, potential impact, and any other relevant information. +5. If the issue contains sensitive information or user-specific data, such as private repository contents, assign the `keep confidential` label to the issue. If possible, avoid including such information directly in the issue and instead provide links to resources that are only accessible to the project maintainers. + +## Vulnerability Handling Process + +Once a vulnerability is reported, the R2R security team will follow these steps: + +1. Acknowledge receipt of the vulnerability report within 48 hours. +2. Assess the severity and impact of the vulnerability. +3. Develop a fix or mitigation plan for the vulnerability. +4. Notify the reporter about the progress and estimated timeline for the fix. +5. Once the fix is ready, release a new version of R2R that addresses the vulnerability. +6. Publicly disclose the vulnerability and the fix after a reasonable period to allow users to update their installations. + +## Scope + +This security policy applies to the R2R codebase and its dependencies. It does not cover vulnerabilities in the underlying operating systems, hardware, or third-party libraries used by R2R. + +## Recognition + +We greatly appreciate the efforts of security researchers and developers who responsibly disclose vulnerabilities to us. With your permission, we will acknowledge your contribution in the release notes and any public disclosures related to the vulnerability. + +## Contact + +If you have any questions or concerns regarding the security of R2R, please contact the project maintainers at [security@r2r.com](mailto:security@r2r.com). + +Thank you for helping us keep R2R and its users secure! diff --git a/R2R/assets/quickstart.gif b/R2R/assets/quickstart.gif Binary files differnew file mode 100755 index 00000000..16e44b5c --- /dev/null +++ b/R2R/assets/quickstart.gif diff --git a/R2R/assets/r2r.png b/R2R/assets/r2r.png Binary files differnew file mode 100755 index 00000000..2bfda75f --- /dev/null +++ b/R2R/assets/r2r.png diff --git a/R2R/compose.neo4j.yaml b/R2R/compose.neo4j.yaml new file mode 100755 index 00000000..f3d709ca --- /dev/null +++ b/R2R/compose.neo4j.yaml @@ -0,0 +1,42 @@ + +networks: + r2r-network: + name: r2r-network + +services: + r2r: + depends_on: + neo4j: + condition: service_healthy + + neo4j: + image: neo4j:5.21.0 + ports: + - "7474:7474" # HTTP + - "7687:7687" # Bolt + environment: + - NEO4J_AUTH=${NEO4J_AUTH:-neo4j/ineedastrongerpassword} + - NEO4J_dbms_memory_pagecache_size=${NEO4J_PAGECACHE_SIZE:-512M} + - NEO4J_dbms_memory_heap_max__size=${NEO4J_HEAP_SIZE:-512M} + - NEO4J_apoc_export_file_enabled=true + - NEO4J_apoc_import_file_enabled=true + - NEO4J_apoc_import_file_use__neo4j__config=true + - NEO4JLABS_PLUGINS=["apoc"] + - NEO4J_dbms_security_procedures_unrestricted=apoc.* + - NEO4J_dbms_security_procedures_allowlist=apoc.* + volumes: + - neo4j_data:/data + - neo4j_logs:/logs + - neo4j_plugins:/plugins + networks: + - r2r-network + healthcheck: + test: ["CMD", "neo4j", "status"] + interval: 10s + timeout: 5s + retries: 5 + +volumes: + neo4j_data: + neo4j_logs: + neo4j_plugins: diff --git a/R2R/compose.og.yaml b/R2R/compose.og.yaml new file mode 100755 index 00000000..05c04b3f --- /dev/null +++ b/R2R/compose.og.yaml @@ -0,0 +1,129 @@ +x-depends-on: + neo4j: &neo4j-dependency + condition: service_healthy + +networks: + r2r-network: + name: r2r-network + driver: bridge + attachable: true + ipam: + driver: default + config: + - subnet: 172.28.0.0/16 + labels: + - "com.docker.compose.recreate=always" + + +services: + r2r: + image: emrgntcmplxty/r2r:main + ports: + - "8000:8000" + environment: + - PYTHONUNBUFFERED=1 + - POSTGRES_USER=${POSTGRES_USER:-postgres} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-postgres} + - POSTGRES_HOST=postgres + - POSTGRES_PORT=5432 + - POSTGRES_DBNAME=${POSTGRES_DBNAME:-postgres} + - POSTGRES_VECS_COLLECTION=${POSTGRES_VECS_COLLECTION:-${CONFIG_NAME:-vecs}} + - NEO4J_USER=${NEO4J_USER:-neo4j} + - NEO4J_PASSWORD=${NEO4J_PASSWORD:-ineedastrongerpassword} + - NEO4J_URL=${NEO4J_URL:-bolt://neo4j:7687} + - NEO4J_DATABASE=${NEO4J_DATABASE:-neo4j} + - OPENAI_API_KEY=${OPENAI_API_KEY:-} + - OLLAMA_API_BASE=${OLLAMA_API_BASE:-http://host.docker.internal:11434} + - CONFIG_NAME=${CONFIG_NAME:-} + - CONFIG_PATH=${CONFIG_PATH:-} + - CLIENT_MODE=${CLIENT_MODE:-false} + depends_on: + - postgres + networks: + - r2r-network + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/v1/health"] + interval: 10s + timeout: 5s + retries: 5 + restart: on-failure + volumes: + - ${CONFIG_PATH:-/}:${CONFIG_PATH:-/app/config} + labels: + - "traefik.enable=true" + - "traefik.http.routers.r2r.rule=PathPrefix(`/api`)" + - "traefik.http.services.r2r.loadbalancer.server.port=8000" + - "traefik.http.middlewares.r2r-strip-prefix.stripprefix.prefixes=/api" + - "traefik.http.middlewares.r2r-add-v1.addprefix.prefix=/v1" + - "traefik.http.routers.r2r.middlewares=r2r-strip-prefix,r2r-add-v1,r2r-headers" + - "traefik.http.middlewares.r2r-headers.headers.customrequestheaders.Access-Control-Allow-Origin=*" + - "traefik.http.middlewares.r2r-headers.headers.customrequestheaders.Access-Control-Allow-Methods=GET,POST,OPTIONS" + - "traefik.http.middlewares.r2r-headers.headers.customrequestheaders.Access-Control-Allow-Headers=DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization" + - "traefik.http.middlewares.r2r-headers.headers.customresponseheaders.Access-Control-Expose-Headers=Content-Length,Content-Range" + + r2r-dashboard: + image: emrgntcmplxty/r2r-dashboard:latest + environment: + - NEXT_PUBLIC_API_URL=http://traefik:80/api + depends_on: + - r2r + networks: + - r2r-network + labels: + - "traefik.enable=true" + - "traefik.http.routers.dashboard.rule=PathPrefix(`/`)" + - "traefik.http.services.dashboard.loadbalancer.server.port=3000" + + postgres: + image: pgvector/pgvector:pg16 + environment: + POSTGRES_USER: ${POSTGRES_USER:-postgres} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres} + POSTGRES_DB: ${POSTGRES_DBNAME:-postgres} + volumes: + - postgres_data:/var/lib/postgresql/data + networks: + - r2r-network + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-postgres}"] + interval: 5s + timeout: 5s + retries: 5 + restart: on-failure + + traefik: + image: traefik:v2.9 + command: + - "--api.insecure=true" + - "--providers.docker=true" + - "--providers.docker.exposedbydefault=false" + - "--entrypoints.web.address=:80" + - "--accesslog=true" + - "--accesslog.filepath=/var/log/traefik/access.log" + ports: + - "88:80" + - "8080:8080" # Traefik dashboard + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + networks: + - r2r-network + + rag_eval_devenv: + container_name: rag_eval_devenv + #image: paper_eval_img:0.4 + image: r2r_rag:latest + tty: true + volumes: + - ${CODE_DIRECTORY}:/code + networks: + - r2r-network + +volumes: + postgres_data: + the_code: + driver: local + driver_opts: + o: bind + type: none + device: ${STORAGE_DIRECTORY} + #device: /home/shebes/Coding/gn-ai/gnqa diff --git a/R2R/compose.postgres.yaml b/R2R/compose.postgres.yaml new file mode 100644 index 00000000..9b487fa6 --- /dev/null +++ b/R2R/compose.postgres.yaml @@ -0,0 +1,62 @@ +x-depends-on: + neo4j: &neo4j-dependency + condition: service_healthy + +networks: + r2r-network: + name: r2r-network + driver: bridge + attachable: true + ipam: + driver: default + config: + - subnet: 172.28.0.0/16 + labels: + - "com.docker.compose.recreate=always" + +services: + postgres: + image: pgvector/pgvector:pg16 + container_name: postgres + environment: + - POSTGRES_USER=gnqa + - POSTGRES_PASSWORD=gnqa + - POSTGRES_HOST=gnqa + volumes: + - postgres_pg_data:/var/lib/postgresql/data + networks: + - r2r-network + healthcheck: + test: ["CMD-SHELL", "pg_isready -U gnqa"] + interval: 5s + timeout: 5s + retries: 5 + restart: on-failure + + traefik: + image: traefik:v2.9 + container_name: traefik + command: + - "--api.insecure=true" + - "--providers.docker=true" + - "--providers.docker.exposedbydefault=false" + - "--entrypoints.web.address=:80" + - "--accesslog=true" + - "--accesslog.filepath=/var/log/traefik/access.log" + ports: + - "88:80" + - "8080:8080" # Traefik dashboard + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + networks: + - r2r-network + +volumes: + postgres_data: + postgres_pg_data: + the_code: + driver: local + driver_opts: + o: bind + type: none + device: /home/shebes/Research/code/gn/_R2R_old diff --git a/R2R/compose.yaml b/R2R/compose.yaml new file mode 100755 index 00000000..20539c6e --- /dev/null +++ b/R2R/compose.yaml @@ -0,0 +1,129 @@ +x-depends-on: + neo4j: &neo4j-dependency + condition: service_healthy + +networks: + r2r-network: + name: r2r-network + driver: bridge + attachable: true + ipam: + driver: default + config: + - subnet: 172.28.0.0/16 + labels: + - "com.docker.compose.recreate=always" + + +services: + r2r: + image: emrgntcmplxty/r2r:main + container_name: r2r + ports: + - "8000:8000" + environment: + - PYTHONUNBUFFERED=1 + - POSTGRES_USER=${POSTGRES_USER} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} + - POSTGRES_HOST=postgres + - POSTGRES_PORT=5432 + - POSTGRES_DBNAME=${POSTGRES_DBNAME} + - POSTGRES_VECS_COLLECTION=${POSTGRES_VECS_COLLECTION} + - NEO4J_USER=${NEO4J_USER:-neo4j} + - NEO4J_PASSWORD=${NEO4J_PASSWORD:-ineedastrongerpassword} + - NEO4J_URL=${NEO4J_URL:-bolt://neo4j:7687} + - NEO4J_DATABASE=${NEO4J_DATABASE:-neo4j} + - OPENAI_API_KEY=${OPENAI_API_KEY:-} + - OLLAMA_API_BASE=${OLLAMA_API_BASE:-http://host.docker.internal:11434} + - CONFIG_NAME=${CONFIG_NAME:-} + - CONFIG_PATH=${CONFIG_PATH:-} + - CLIENT_MODE=${CLIENT_MODE:-false} + depends_on: + - postgres + networks: + - r2r-network + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/v1/health"] + interval: 10s + timeout: 5s + retries: 5 + restart: on-failure + volumes: + - ${CONFIG_PATH:-/}:${CONFIG_PATH:-/app/config} + labels: + - "traefik.enable=true" + - "traefik.http.routers.r2r.rule=PathPrefix(`/api`)" + - "traefik.http.services.r2r.loadbalancer.server.port=8000" + - "traefik.http.middlewares.r2r-strip-prefix.stripprefix.prefixes=/api" + - "traefik.http.middlewares.r2r-add-v1.addprefix.prefix=/v1" + - "traefik.http.routers.r2r.middlewares=r2r-strip-prefix,r2r-add-v1,r2r-headers" + - "traefik.http.middlewares.r2r-headers.headers.customrequestheaders.Access-Control-Allow-Origin=*" + - "traefik.http.middlewares.r2r-headers.headers.customrequestheaders.Access-Control-Allow-Methods=GET,POST,OPTIONS" + - "traefik.http.middlewares.r2r-headers.headers.customrequestheaders.Access-Control-Allow-Headers=DNT,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization" + - "traefik.http.middlewares.r2r-headers.headers.customresponseheaders.Access-Control-Expose-Headers=Content-Length,Content-Range" + + r2r-dashboard: + image: emrgntcmplxty/r2r-dashboard:latest + container_name: r2r-dashboard + environment: + - NEXT_PUBLIC_API_URL=http://traefik:80/api + depends_on: + - r2r + networks: + - r2r-network + labels: + - "traefik.enable=true" + - "traefik.http.routers.dashboard.rule=PathPrefix(`/`)" + - "traefik.http.services.dashboard.loadbalancer.server.port=3000" + + postgres: + image: pgvector/pgvector:pg16 + container_name: pg16 + env_file: + - ./env/postgres.env + volumes: + - postgres_data:/var/lib/postgresql/data + networks: + - r2r-network + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER}"] + interval: 5s + timeout: 5s + retries: 5 + restart: on-failure + + traefik: + image: traefik:v2.9 + container_name: traefik + command: + - "--api.insecure=true" + - "--providers.docker=true" + - "--providers.docker.exposedbydefault=false" + - "--entrypoints.web.address=:80" + - "--accesslog=true" + - "--accesslog.filepath=/var/log/traefik/access.log" + ports: + - "88:80" + - "8080:8080" # Traefik dashboard + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + networks: + - r2r-network + + rag_eval_devenv: + container_name: rag_eval_devenv + image: r2r_rag:latest + tty: true + volumes: + - ${CODE_DIRECTORY}:/code + networks: + - r2r-network + +volumes: + postgres_data: + the_code: + driver: local + driver_opts: + o: bind + type: none + device: ${STORAGE_DIRECTORY} diff --git a/R2R/config.json b/R2R/config.json new file mode 100755 index 00000000..52af89b0 --- /dev/null +++ b/R2R/config.json @@ -0,0 +1,59 @@ +{ + "app": { + "max_logs_per_request": 100, + "max_file_size_in_mb": 32 + }, + "completions": { + "provider": "litellm", + "generation_config": { + "model": "gpt-4o", + "temperature": 0.1, + "top_p": 1.0, + "top_k": 100, + "max_tokens_to_sample": 1024, + "stream": false, + "functions": null, + "skip_special_tokens": false, + "stop_token": null, + "num_beams": 1, + "do_sample": true, + "generate_with_chat": false, + "add_generation_kwargs": {}, + "api_base": null + } + }, + "embedding": { + "provider": "openai", + "base_model": "text-embedding-3-small", + "base_dimension": 512, + "batch_size": 128, + "text_splitter": { + "type": "recursive_character", + "chunk_size": 512, + "chunk_overlap": 20 + }, + "rerank_model": "None" + }, + "kg": { + "provider": "None" + }, + "eval": { + "provider": "None" + }, + "ingestion":{ + "excluded_parsers": [ + "mp4" + ] + }, + "logging": { + "provider": "local", + "log_table": "logs", + "log_info_table": "log_info" + }, + "prompt": { + "provider": "local" + }, + "vector_database": { + "provider": "pgvector" + } +} diff --git a/R2R/ingesting/README.md b/R2R/ingesting/README.md new file mode 100644 index 00000000..4a3951f6 --- /dev/null +++ b/R2R/ingesting/README.md @@ -0,0 +1,21 @@ + +<p align="left"> + <a href="https://r2r-docs.sciphi.ai"><img src="https://img.shields.io/badge/docs.sciphi.ai-3F16E4" alt="Docs"></a> + <a href="https://discord.gg/p6KqD2kjtB"><img src="https://img.shields.io/discord/1120774652915105934?style=social&logo=discord" alt="Discord"></a> + <a href="https://github.com/SciPhi-AI"><img src="https://img.shields.io/github/stars/SciPhi-AI/R2R" alt="Github Stars"></a> + <a href="https://github.com/SciPhi-AI/R2R/pulse"><img src="https://img.shields.io/github/commit-activity/w/SciPhi-AI/R2R" alt="Commits-per-week"></a> + <a href="https://opensource.org/licenses/MIT"><img src="https://img.shields.io/badge/License-MIT-purple.svg" alt="License: MIT"></a> +</p> + +<img src="../assets/r2r.png" alt="R2R Answer Engine"> +<h3 align="center"> +Ingesting a library +</h3> + +# About +R2R was designed to bridge the gap between local LLM experimentation and scalable, production-ready Retrieval-Augmented Generation (RAG). R2R provides a comprehensive and SOTA RAG system for developers, built around a RESTful API for ease of use. + +ingest_my_data.py in the directory [[ingesting]] shows a process you can use to place multiple pdf files into the RAGs context. +Because there are limits to how much data can be uploaded and processed by different LLM providers, there are wait times coded into the ingest process. + +In the code there are many directories, because the upload limit was around 35 megabytes, and references existed across many years.
\ No newline at end of file diff --git a/R2R/ingesting/aging_pt1.json b/R2R/ingesting/aging_pt1.json new file mode 100755 index 00000000..27678257 --- /dev/null +++ b/R2R/ingesting/aging_pt1.json @@ -0,0 +1 @@ +{'processed_documents': ["Document '2001 - Human Genome and Diseases.pdf' processed successfully.", "Document '2001 - Demography in the age of genomics.pdf' processed successfully.", "Document '2002 - Roles of the Werner syndrome protein in pathways required.pdf' processed successfully.", "Document '2003 - A Method for Detecting Recent Selection in the Human Genome.pdf' processed successfully.", "Document '2004 - Human blood genomics distinct profiles for gender, age.pdf' processed successfully.", "Document '1996 - Livestock Genomics Comes of Age.pdf' processed successfully.", "Document '2003 - The application of functional genomics.pdf' processed successfully.", "Document '2004 - Genomic instability, aging.pdf' processed successfully.", "Document '2002 - Influence of age, sex, and strength training.pdf' processed successfully.", "Document '2001 - Statistical tests of selective neutrality.pdf' processed successfully.", "Document '1997 - Genetic and functional changes in mitochondria associated with aging.pdf' processed successfully.", "Document '2003 - From basepairs to birdsongs phylogenetic data.pdf' processed successfully.", "Document '2004 - Comparative analysis of processed pseudogenes.pdf' processed successfully.", "Document '2002 - Genome Dynamics in Aging Mice.pdf' processed successfully.", "Document '2004 - Cytonuclear coevolution the genomics.pdf' processed successfully.", "Document '1999 - Functional integrity of mitochondrial genomes in human plateletsand autopsied brain tissues from elderly patients withAlzheimer’s disease.pdf' processed successfully.", "Document '2002 - Large genome rearrangements as a primary cause of aging.pdf' processed successfully.", "Document '2002 - Retroelement Distributions in the Human.pdf' processed successfully.", "Document '2001 - Marked differences in unilateral isolated retinoblastomas.pdf' processed successfully.", "Document '2004 - The emergence of epidemiology.pdf' processed successfully.", "Document '2003 - Functional Genomics of Ageing.pdf' processed successfully.", "Document '1999 - Functional Genomics and Rat Models.pdf' processed successfully.", "Document '2004 - A genome scan for diabetic nephropathy in African Americans.pdf' processed successfully.", "Document '2004 - Endosymbiotic gene transfer.pdf' processed successfully.", "Document '2003 - Gene expression profile of aging in human muscle.pdf' processed successfully.", "Document '2002 - Mitochondrial dysfunction leads to telomere attrition.pdf' processed successfully.", "Document '2004 - Proinflammatory phenotype of coronary arteries promotes endothelial.pdf' processed successfully.", "Document '2003 - Age-related changes in the transcriptional profile of mouse RPE choroid.pdf' processed successfully.", "Document '1996 - Isolation and characterization of genomic and cDNA clones encoding.pdf' processed successfully.", "Document '1999 - Molecular Biology of Aging.pdf' processed successfully.", "Document '2004 - RNA-interference-based functional genomics in mammalian cells.pdf' processed successfully.", "Document '1982 - Is Cell Aging Caused by Respiration-Dependent Injury to the Mitochondrial Genome.pdf' processed successfully.", "Document '2004 - Comparing genomic expression patterns across species.pdf' processed successfully.", "Document '1989 - Genetic instability and aging theories, facts, and future perspectives.pdf' processed successfully.", "Document '2004 - Evolutionary history of Oryza sativa LTR retrotransposons.pdf' processed successfully.", "Document '2004 - Genome-Wide Scan for a Healthy Aging.pdf' processed successfully.", "Document '2000 - A Major Gene Affecting Age-Related Hearing Loss Is Common.pdf' processed successfully.", "Document '2001 - Fungal virulence studies come of age.pdf' processed successfully.", "Document '1999 - The caenorhabditis elegans genome.pdf' processed successfully.", "Document '2001 - Plant Systematics in the Age of Genomics.pdf' processed successfully.", "Document '2001 - Vitamins minerals and genomic stability in humans.pdf' processed successfully.", "Document '2000 - Genome-wide study of aging and oxidative stress.pdf' processed successfully.", "Document '2001 - Genome maintenance mechanisms.pdf' processed successfully.", "Document '2002 - Signatures of Domain Shuffling.pdf' processed successfully.", "Document '2004 - Age-Related Macular Degeneration A High-Resolution Genome Scan.pdf' processed successfully.", "Document '1987 - Genomic 5-Methyldeoxycytidine.pdf' processed successfully.", "Document '2002 - Parkinson’s Disease and Apolipoprotein E Possible.pdf' processed successfully.", "Document '2002 - Functional genomics the coming.pdf' processed successfully.", "Document '2003 - Mitochondrial DNA modifies cognition in interaction.pdf' processed successfully.", "Document '2001 - Methylation meets genomics.pdf' processed successfully.", "Document '2003 - Life-long reduction in MnSOD activity results.pdf' processed successfully.", "Document '2004 - A Transcriptional Profile of Aging.pdf' processed successfully.", "Document '1999 - Rothmund–Thomson Syndrome Responsible Gene, RECQL4.pdf' processed successfully.", "Document '2003 - Caloric restriction promotes genomic stability by induction.pdf' processed successfully.", "Document '2003 - Epigenetic regulation of gene expression how the genome integrates intrinsic and environmental signals.pdf' processed successfully.", "Document '2004 - Genome-wide RNA interference screen identifies.pdf' processed successfully.", "Document '2000 - A full genome scan for age-related maculopathy.pdf' processed successfully.", "Document '1997 - Bioinformatics in a post-genomics age.pdf' processed successfully.", "Document '1990 - Extrachromosomal circular DNAs and genomic sequence plasticity.pdf' processed successfully.", "Document '2004 - Nutritional genomics.pdf' processed successfully.", "Document '2004 - Microbial Genomics and the Periodic Table.pdf' processed successfully.", "Document '1997 - Age associated alterations of the mitochondrial genome.pdf' processed successfully.", "Document '2002 - Genome-Wide Transcript Profiles in Aging.pdf' processed successfully.", "Document '2003 - Genetics, genes, genomics and g.pdf' processed successfully.", "Document '1999 - Cell-by-cell scanning of whole mitochondrial genomes.pdf' processed successfully.", "Document '2004 - Prevalence and correlates of orthostatic.pdf' processed successfully.", "Document '2000 - From life to death – the struggle between chemistry and biology during.pdf' processed successfully.", "Document '1998 - The bottleneck mitochondrial imperatives in oogenesis and ovarian.pdf' processed successfully.", "Document '1993 - Genomic Damage and Its Repair.pdf' processed successfully.", "Document '2001 - Mitochondrial genome instability in human cancers.pdf' processed successfully.", "Document '2001 - The genetics of aging.pdf' processed successfully.", "Document '2003 - Lifelong voluntary exercise in the mouse prevents.pdf' processed successfully.", "Document '2004 - Age-associated alteration of gene expression.pdf' processed successfully.", "Document '1999 - Qualitative assessment of Genotoxicity.pdf' processed successfully.", "Document '2001 - Genomic profiling of short- and long-term caloric.pdf' processed successfully.", "Document '2004 - Ageing, repetitive genomes and DNA.pdf' processed successfully.", "Document '2004 - Genomic DNA methylation of juvenile and mature Acacia mangium.pdf' processed successfully.", "Document '2003 - Welcome to the Genomic Era.pdf' processed successfully.", "Document '2003 - Whole-genome screening indicates a possible burst of formation.pdf' processed successfully.", "Document '2003 - Population Screening in the Age.pdf' processed successfully.", "Document '1997 - Tumour-suppressor genes evolving definitions in the genomic age.pdf' processed successfully.", "Document '2003 - Results of a high-resolution genome screen.pdf' processed successfully.", "Document '2003 - A Whole-Genome Screen of a Quantitative Trait of Age-Related.pdf' processed successfully.", "Document '1997 - Multi-organ characterization of mitochondrial.pdf' processed successfully.", "Document '2003 - Risk Factors for Hodgkin’s Lymphoma by EBV.pdf' processed successfully.", "Document '2002 - Human mitochondrial DNA with large deletions.pdf' processed successfully.", "Document '2002 - Genomic DNA methylation–demethylation during aging.pdf' processed successfully.", "Document '2003 - Age-related impairment of the transcriptional.pdf' processed successfully.", "Document '1995 - Rearranged Mitochondrial Genomes Are Present in Human.pdf' processed successfully.", "Document '2002 - Population Genomics Ageing.pdf' processed successfully.", "Document '2003 - The Dawning era of polymer.pdf' processed successfully.", "Document '2001 - A genome-wide scan for linkage to human.pdf' processed successfully.", "Document '2003 - Molecular mechanisms of reduced adrenergic signaling.pdf' processed successfully.", "Document '2002 - Telomere dysfunction provokes regional amplification.pdf' processed successfully.", "Document '2004 - Mitochondrial Genome Single Nucleotide.pdf' processed successfully.", "Document '1991 - Detection of Epstein-Barr virus genomes in Hodgkin's disease relation to age.pdf' processed successfully.", "Document '1998 - Microarrays biotechnology’s discovery.pdf' processed successfully.", "Document '2003 - The age of the Arabidopsis thaliana genome duplication.pdf' processed successfully.", "Document '2004 - Whole Genome Scan for Obstructive Sleep Apnea.pdf' processed successfully."], 'failed_documents': ["Document '2003 - Telomeres in the chicken genome stability and chromosome ends.pdf': Error code: 429 - {'error': {'message': 'Rate limit reached for text-embedding-3-small in organization org-p90FbCDZwTfuyHzyjGoHvDaR on tokens per min (TPM): Limit 1000000, Used 996746, Requested 12291. Please try again in 542ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}", "Document '1983 - Mitochondrial DNA in Mortal and Immortal Human Cell.pdf': Error code: 429 - {'error': {'message': 'Rate limit reached for text-embedding-3-small in organization org-p90FbCDZwTfuyHzyjGoHvDaR on tokens per min (TPM): Limit 1000000, Used 996385, Requested 12801. Please try again in 551ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}", "Document '2004 - Additive regulation of hepatic gene expression by dwarfism.pdf': Request timed out.", "Document '2004 - A New Resource for Cereal Genomics 22K Barley.pdf': Request timed out.", "Document '1998 - Molecular archaeology of the Escherichia coli genome.pdf': Request timed out.", "Document '2003 - Imaging genomics.pdf': Request timed out.", "Document '2001 - Genomics food and nutrition.pdf': Request timed out.", "Document '2003 - Molecular profiling in the age.pdf': Request timed out."], 'skipped_documents': []} diff --git a/R2R/ingesting/cmds.txt b/R2R/ingesting/cmds.txt new file mode 100755 index 00000000..9745b929 --- /dev/null +++ b/R2R/ingesting/cmds.txt @@ -0,0 +1 @@ +Time taken to ingest files: 541.67 secondsngest-files ~/SolomonVentures/GeneNetwork/docRequesting...N/Gene\ Network.org\ PDFs/*.pdf
\ No newline at end of file diff --git a/R2R/ingesting/ingest_my_data.py b/R2R/ingesting/ingest_my_data.py new file mode 100755 index 00000000..cb0fa551 --- /dev/null +++ b/R2R/ingesting/ingest_my_data.py @@ -0,0 +1,99 @@ +from os import listdir +from os.path import isfile, join +import time +import datetime +import configparser +from r2r import R2RClient + +cfg = configparser.ConfigParser() +cfg.read('_config.cfg') + +client = R2RClient("http://localhost:7272") + +#print("The status of the client is {0}".format(client.health())) + +# should be read from a configuration file +main_file_dir = 'full path' + +data_dir = cfg['PDF_DIR'] + +file_paths = [ + data_dir+cfg['genetics']['diabetes'], + data_dir+cfg['genetics']['aging'] +] + +print("The defined file paths {0}".format(file_paths)) + +""" +file_paths = [ + main_file_dir+'pt02/b/', + main_file_dir+'pt03/c/', + main_file_dir+'pt03/a/', + main_file_dir+'pt03/b/', + main_file_dir+'pt03/c/', + main_file_dir+'pt04/a/', + main_file_dir+'pt04/b/', + main_file_dir+'pt04/c/', + main_file_dir+'pt05/a/', + main_file_dir+'pt05/b/', + main_file_dir+'pt05/c/', + main_file_dir+'pt06/a/', + main_file_dir+'pt06/b/', + main_file_dir+'pt06/c/', + main_file_dir+'pt07/a/', + main_file_dir+'pt07/b/', + main_file_dir+'pt07/c/', + main_file_dir+'pt08/a/', + main_file_dir+'pt08/b/', + main_file_dir+'pt08/c/', + main_file_dir+'pt09/a/', + main_file_dir+'pt09/b/', + main_file_dir+'pt09/c/', + main_file_dir+'pt10/a/', + main_file_dir+'pt10/b/', + main_file_dir+'pt10/c/', + main_file_dir+'pt11/a/', + main_file_dir+'pt11/b/', + main_file_dir+'pt11/c/', + main_file_dir+'pt12/a/', + main_file_dir+'pt12/b/', + main_file_dir+'pt12/c/', + main_file_dir+'pt13/a/', + main_file_dir+'pt13/b/', + main_file_dir+'pt13/c/', + main_file_dir+'pt14/a/', + main_file_dir+'pt14/b/', + main_file_dir+'pt14/c/', + main_file_dir+'pt14/d/' + ] + + +ndx = 0 +for file_list in file_paths: + the_pdfs = [ join(file_list, f) for f in listdir(file_list) if isfile(join(file_list, f))] + ''' + # subroutine to test list content + for the_pdf in the_pdfs: + print('{0} -> {1}'.format(ndx, the_pdf)) + ndx += 1 + ''' + print(datetime.datetime.now()) + begin_ingesting = datetime.datetime.now() + sleeptime = 30 + try: + ingest_response = client.ingest_files( + file_paths=the_pdfs + ) + except: + ingest_response = "Nothing ingested from {0}".format(file_list) + sleeptime = 1 + + end_ingesting = datetime.datetime.now() + + # show results of ingesting documents + print("Entry [{0}]\t{1} {2}\n\t\t{3}".format(ndx, file_list, (end_ingesting-begin_ingesting), ingest_response)) + + # brace against pinging API too quickly + time.sleep(sleeptime) + ndx += 1 +"""
\ No newline at end of file diff --git a/R2R/poetry.lock b/R2R/poetry.lock new file mode 100755 index 00000000..39eb6d04 --- /dev/null +++ b/R2R/poetry.lock @@ -0,0 +1,3942 @@ +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. + +[[package]] +name = "aiohttp" +version = "3.9.5" +description = "Async http client/server framework (asyncio)" +optional = false +python-versions = ">=3.8" +files = [ + {file = "aiohttp-3.9.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:fcde4c397f673fdec23e6b05ebf8d4751314fa7c24f93334bf1f1364c1c69ac7"}, + {file = "aiohttp-3.9.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d6b3f1fabe465e819aed2c421a6743d8debbde79b6a8600739300630a01bf2c"}, + {file = "aiohttp-3.9.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6ae79c1bc12c34082d92bf9422764f799aee4746fd7a392db46b7fd357d4a17a"}, + {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d3ebb9e1316ec74277d19c5f482f98cc65a73ccd5430540d6d11682cd857430"}, + {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84dabd95154f43a2ea80deffec9cb44d2e301e38a0c9d331cc4aa0166fe28ae3"}, + {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c8a02fbeca6f63cb1f0475c799679057fc9268b77075ab7cf3f1c600e81dd46b"}, + {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c26959ca7b75ff768e2776d8055bf9582a6267e24556bb7f7bd29e677932be72"}, + {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:714d4e5231fed4ba2762ed489b4aec07b2b9953cf4ee31e9871caac895a839c0"}, + {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e7a6a8354f1b62e15d48e04350f13e726fa08b62c3d7b8401c0a1314f02e3558"}, + {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c413016880e03e69d166efb5a1a95d40f83d5a3a648d16486592c49ffb76d0db"}, + {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ff84aeb864e0fac81f676be9f4685f0527b660f1efdc40dcede3c251ef1e867f"}, + {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ad7f2919d7dac062f24d6f5fe95d401597fbb015a25771f85e692d043c9d7832"}, + {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:702e2c7c187c1a498a4e2b03155d52658fdd6fda882d3d7fbb891a5cf108bb10"}, + {file = "aiohttp-3.9.5-cp310-cp310-win32.whl", hash = "sha256:67c3119f5ddc7261d47163ed86d760ddf0e625cd6246b4ed852e82159617b5fb"}, + {file = "aiohttp-3.9.5-cp310-cp310-win_amd64.whl", hash = "sha256:471f0ef53ccedec9995287f02caf0c068732f026455f07db3f01a46e49d76bbb"}, + {file = "aiohttp-3.9.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e0ae53e33ee7476dd3d1132f932eeb39bf6125083820049d06edcdca4381f342"}, + {file = "aiohttp-3.9.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c088c4d70d21f8ca5c0b8b5403fe84a7bc8e024161febdd4ef04575ef35d474d"}, + {file = "aiohttp-3.9.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:639d0042b7670222f33b0028de6b4e2fad6451462ce7df2af8aee37dcac55424"}, + {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f26383adb94da5e7fb388d441bf09c61e5e35f455a3217bfd790c6b6bc64b2ee"}, + {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:66331d00fb28dc90aa606d9a54304af76b335ae204d1836f65797d6fe27f1ca2"}, + {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4ff550491f5492ab5ed3533e76b8567f4b37bd2995e780a1f46bca2024223233"}, + {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f22eb3a6c1080d862befa0a89c380b4dafce29dc6cd56083f630073d102eb595"}, + {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a81b1143d42b66ffc40a441379387076243ef7b51019204fd3ec36b9f69e77d6"}, + {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f64fd07515dad67f24b6ea4a66ae2876c01031de91c93075b8093f07c0a2d93d"}, + {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:93e22add827447d2e26d67c9ac0161756007f152fdc5210277d00a85f6c92323"}, + {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:55b39c8684a46e56ef8c8d24faf02de4a2b2ac60d26cee93bc595651ff545de9"}, + {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4715a9b778f4293b9f8ae7a0a7cef9829f02ff8d6277a39d7f40565c737d3771"}, + {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:afc52b8d969eff14e069a710057d15ab9ac17cd4b6753042c407dcea0e40bf75"}, + {file = "aiohttp-3.9.5-cp311-cp311-win32.whl", hash = "sha256:b3df71da99c98534be076196791adca8819761f0bf6e08e07fd7da25127150d6"}, + {file = "aiohttp-3.9.5-cp311-cp311-win_amd64.whl", hash = "sha256:88e311d98cc0bf45b62fc46c66753a83445f5ab20038bcc1b8a1cc05666f428a"}, + {file = "aiohttp-3.9.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:c7a4b7a6cf5b6eb11e109a9755fd4fda7d57395f8c575e166d363b9fc3ec4678"}, + {file = "aiohttp-3.9.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:0a158704edf0abcac8ac371fbb54044f3270bdbc93e254a82b6c82be1ef08f3c"}, + {file = "aiohttp-3.9.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d153f652a687a8e95ad367a86a61e8d53d528b0530ef382ec5aaf533140ed00f"}, + {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82a6a97d9771cb48ae16979c3a3a9a18b600a8505b1115cfe354dfb2054468b4"}, + {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:60cdbd56f4cad9f69c35eaac0fbbdf1f77b0ff9456cebd4902f3dd1cf096464c"}, + {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8676e8fd73141ded15ea586de0b7cda1542960a7b9ad89b2b06428e97125d4fa"}, + {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da00da442a0e31f1c69d26d224e1efd3a1ca5bcbf210978a2ca7426dfcae9f58"}, + {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18f634d540dd099c262e9f887c8bbacc959847cfe5da7a0e2e1cf3f14dbf2daf"}, + {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:320e8618eda64e19d11bdb3bd04ccc0a816c17eaecb7e4945d01deee2a22f95f"}, + {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:2faa61a904b83142747fc6a6d7ad8fccff898c849123030f8e75d5d967fd4a81"}, + {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:8c64a6dc3fe5db7b1b4d2b5cb84c4f677768bdc340611eca673afb7cf416ef5a"}, + {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:393c7aba2b55559ef7ab791c94b44f7482a07bf7640d17b341b79081f5e5cd1a"}, + {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:c671dc117c2c21a1ca10c116cfcd6e3e44da7fcde37bf83b2be485ab377b25da"}, + {file = "aiohttp-3.9.5-cp312-cp312-win32.whl", hash = "sha256:5a7ee16aab26e76add4afc45e8f8206c95d1d75540f1039b84a03c3b3800dd59"}, + {file = "aiohttp-3.9.5-cp312-cp312-win_amd64.whl", hash = "sha256:5ca51eadbd67045396bc92a4345d1790b7301c14d1848feaac1d6a6c9289e888"}, + {file = "aiohttp-3.9.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:694d828b5c41255e54bc2dddb51a9f5150b4eefa9886e38b52605a05d96566e8"}, + {file = "aiohttp-3.9.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0605cc2c0088fcaae79f01c913a38611ad09ba68ff482402d3410bf59039bfb8"}, + {file = "aiohttp-3.9.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4558e5012ee03d2638c681e156461d37b7a113fe13970d438d95d10173d25f78"}, + {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dbc053ac75ccc63dc3a3cc547b98c7258ec35a215a92bd9f983e0aac95d3d5b"}, + {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4109adee842b90671f1b689901b948f347325045c15f46b39797ae1bf17019de"}, + {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6ea1a5b409a85477fd8e5ee6ad8f0e40bf2844c270955e09360418cfd09abac"}, + {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3c2890ca8c59ee683fd09adf32321a40fe1cf164e3387799efb2acebf090c11"}, + {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3916c8692dbd9d55c523374a3b8213e628424d19116ac4308e434dbf6d95bbdd"}, + {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8d1964eb7617907c792ca00b341b5ec3e01ae8c280825deadbbd678447b127e1"}, + {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:d5ab8e1f6bee051a4bf6195e38a5c13e5e161cb7bad83d8854524798bd9fcd6e"}, + {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:52c27110f3862a1afbcb2af4281fc9fdc40327fa286c4625dfee247c3ba90156"}, + {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:7f64cbd44443e80094309875d4f9c71d0401e966d191c3d469cde4642bc2e031"}, + {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8b4f72fbb66279624bfe83fd5eb6aea0022dad8eec62b71e7bf63ee1caadeafe"}, + {file = "aiohttp-3.9.5-cp38-cp38-win32.whl", hash = "sha256:6380c039ec52866c06d69b5c7aad5478b24ed11696f0e72f6b807cfb261453da"}, + {file = "aiohttp-3.9.5-cp38-cp38-win_amd64.whl", hash = "sha256:da22dab31d7180f8c3ac7c7635f3bcd53808f374f6aa333fe0b0b9e14b01f91a"}, + {file = "aiohttp-3.9.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:1732102949ff6087589408d76cd6dea656b93c896b011ecafff418c9661dc4ed"}, + {file = "aiohttp-3.9.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c6021d296318cb6f9414b48e6a439a7f5d1f665464da507e8ff640848ee2a58a"}, + {file = "aiohttp-3.9.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:239f975589a944eeb1bad26b8b140a59a3a320067fb3cd10b75c3092405a1372"}, + {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b7b30258348082826d274504fbc7c849959f1989d86c29bc355107accec6cfb"}, + {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cd2adf5c87ff6d8b277814a28a535b59e20bfea40a101db6b3bdca7e9926bc24"}, + {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e9a3d838441bebcf5cf442700e3963f58b5c33f015341f9ea86dcd7d503c07e2"}, + {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e3a1ae66e3d0c17cf65c08968a5ee3180c5a95920ec2731f53343fac9bad106"}, + {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9c69e77370cce2d6df5d12b4e12bdcca60c47ba13d1cbbc8645dd005a20b738b"}, + {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0cbf56238f4bbf49dab8c2dc2e6b1b68502b1e88d335bea59b3f5b9f4c001475"}, + {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:d1469f228cd9ffddd396d9948b8c9cd8022b6d1bf1e40c6f25b0fb90b4f893ed"}, + {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:45731330e754f5811c314901cebdf19dd776a44b31927fa4b4dbecab9e457b0c"}, + {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:3fcb4046d2904378e3aeea1df51f697b0467f2aac55d232c87ba162709478c46"}, + {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8cf142aa6c1a751fcb364158fd710b8a9be874b81889c2bd13aa8893197455e2"}, + {file = "aiohttp-3.9.5-cp39-cp39-win32.whl", hash = "sha256:7b179eea70833c8dee51ec42f3b4097bd6370892fa93f510f76762105568cf09"}, + {file = "aiohttp-3.9.5-cp39-cp39-win_amd64.whl", hash = "sha256:38d80498e2e169bc61418ff36170e0aad0cd268da8b38a17c4cf29d254a8b3f1"}, + {file = "aiohttp-3.9.5.tar.gz", hash = "sha256:edea7d15772ceeb29db4aff55e482d4bcfb6ae160ce144f2682de02f6d693551"}, +] + +[package.dependencies] +aiosignal = ">=1.1.2" +async-timeout = {version = ">=4.0,<5.0", markers = "python_version < \"3.11\""} +attrs = ">=17.3.0" +frozenlist = ">=1.1.1" +multidict = ">=4.5,<7.0" +yarl = ">=1.0,<2.0" + +[package.extras] +speedups = ["Brotli", "aiodns", "brotlicffi"] + +[[package]] +name = "aiosignal" +version = "1.3.1" +description = "aiosignal: a list of registered asynchronous callbacks" +optional = false +python-versions = ">=3.7" +files = [ + {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, + {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, +] + +[package.dependencies] +frozenlist = ">=1.1.0" + +[[package]] +name = "aiosqlite" +version = "0.20.0" +description = "asyncio bridge to the standard sqlite3 module" +optional = false +python-versions = ">=3.8" +files = [ + {file = "aiosqlite-0.20.0-py3-none-any.whl", hash = "sha256:36a1deaca0cac40ebe32aac9977a6e2bbc7f5189f23f4a54d5908986729e5bd6"}, + {file = "aiosqlite-0.20.0.tar.gz", hash = "sha256:6d35c8c256637f4672f843c31021464090805bf925385ac39473fb16eaaca3d7"}, +] + +[package.dependencies] +typing_extensions = ">=4.0" + +[package.extras] +dev = ["attribution (==1.7.0)", "black (==24.2.0)", "coverage[toml] (==7.4.1)", "flake8 (==7.0.0)", "flake8-bugbear (==24.2.6)", "flit (==3.9.0)", "mypy (==1.8.0)", "ufmt (==2.3.0)", "usort (==1.0.8.post1)"] +docs = ["sphinx (==7.2.6)", "sphinx-mdinclude (==0.5.3)"] + +[[package]] +name = "annotated-types" +version = "0.7.0" +description = "Reusable constraint types to use with typing.Annotated" +optional = false +python-versions = ">=3.8" +files = [ + {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, + {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, +] + +[[package]] +name = "anyio" +version = "4.4.0" +description = "High level compatibility layer for multiple asynchronous event loop implementations" +optional = false +python-versions = ">=3.8" +files = [ + {file = "anyio-4.4.0-py3-none-any.whl", hash = "sha256:c1b2d8f46a8a812513012e1107cb0e68c17159a7a594208005a57dc776e1bdc7"}, + {file = "anyio-4.4.0.tar.gz", hash = "sha256:5aadc6a1bbb7cdb0bede386cac5e2940f5e2ff3aa20277e991cf028e0585ce94"}, +] + +[package.dependencies] +exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""} +idna = ">=2.8" +sniffio = ">=1.1" +typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""} + +[package.extras] +doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] +test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"] +trio = ["trio (>=0.23)"] + +[[package]] +name = "async-timeout" +version = "4.0.3" +description = "Timeout context manager for asyncio programs" +optional = false +python-versions = ">=3.7" +files = [ + {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, + {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, +] + +[[package]] +name = "asyncpg" +version = "0.29.0" +description = "An asyncio PostgreSQL driver" +optional = false +python-versions = ">=3.8.0" +files = [ + {file = "asyncpg-0.29.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72fd0ef9f00aeed37179c62282a3d14262dbbafb74ec0ba16e1b1864d8a12169"}, + {file = "asyncpg-0.29.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:52e8f8f9ff6e21f9b39ca9f8e3e33a5fcdceaf5667a8c5c32bee158e313be385"}, + {file = "asyncpg-0.29.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9e6823a7012be8b68301342ba33b4740e5a166f6bbda0aee32bc01638491a22"}, + {file = "asyncpg-0.29.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:746e80d83ad5d5464cfbf94315eb6744222ab00aa4e522b704322fb182b83610"}, + {file = "asyncpg-0.29.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:ff8e8109cd6a46ff852a5e6bab8b0a047d7ea42fcb7ca5ae6eaae97d8eacf397"}, + {file = "asyncpg-0.29.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:97eb024685b1d7e72b1972863de527c11ff87960837919dac6e34754768098eb"}, + {file = "asyncpg-0.29.0-cp310-cp310-win32.whl", hash = "sha256:5bbb7f2cafd8d1fa3e65431833de2642f4b2124be61a449fa064e1a08d27e449"}, + {file = "asyncpg-0.29.0-cp310-cp310-win_amd64.whl", hash = "sha256:76c3ac6530904838a4b650b2880f8e7af938ee049e769ec2fba7cd66469d7772"}, + {file = "asyncpg-0.29.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4900ee08e85af01adb207519bb4e14b1cae8fd21e0ccf80fac6aa60b6da37b4"}, + {file = "asyncpg-0.29.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a65c1dcd820d5aea7c7d82a3fdcb70e096f8f70d1a8bf93eb458e49bfad036ac"}, + {file = "asyncpg-0.29.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b52e46f165585fd6af4863f268566668407c76b2c72d366bb8b522fa66f1870"}, + {file = "asyncpg-0.29.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc600ee8ef3dd38b8d67421359779f8ccec30b463e7aec7ed481c8346decf99f"}, + {file = "asyncpg-0.29.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:039a261af4f38f949095e1e780bae84a25ffe3e370175193174eb08d3cecab23"}, + {file = "asyncpg-0.29.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6feaf2d8f9138d190e5ec4390c1715c3e87b37715cd69b2c3dfca616134efd2b"}, + {file = "asyncpg-0.29.0-cp311-cp311-win32.whl", hash = "sha256:1e186427c88225ef730555f5fdda6c1812daa884064bfe6bc462fd3a71c4b675"}, + {file = "asyncpg-0.29.0-cp311-cp311-win_amd64.whl", hash = "sha256:cfe73ffae35f518cfd6e4e5f5abb2618ceb5ef02a2365ce64f132601000587d3"}, + {file = "asyncpg-0.29.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6011b0dc29886ab424dc042bf9eeb507670a3b40aece3439944006aafe023178"}, + {file = "asyncpg-0.29.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b544ffc66b039d5ec5a7454667f855f7fec08e0dfaf5a5490dfafbb7abbd2cfb"}, + {file = "asyncpg-0.29.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d84156d5fb530b06c493f9e7635aa18f518fa1d1395ef240d211cb563c4e2364"}, + {file = "asyncpg-0.29.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54858bc25b49d1114178d65a88e48ad50cb2b6f3e475caa0f0c092d5f527c106"}, + {file = "asyncpg-0.29.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bde17a1861cf10d5afce80a36fca736a86769ab3579532c03e45f83ba8a09c59"}, + {file = "asyncpg-0.29.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:37a2ec1b9ff88d8773d3eb6d3784dc7e3fee7756a5317b67f923172a4748a175"}, + {file = "asyncpg-0.29.0-cp312-cp312-win32.whl", hash = "sha256:bb1292d9fad43112a85e98ecdc2e051602bce97c199920586be83254d9dafc02"}, + {file = "asyncpg-0.29.0-cp312-cp312-win_amd64.whl", hash = "sha256:2245be8ec5047a605e0b454c894e54bf2ec787ac04b1cb7e0d3c67aa1e32f0fe"}, + {file = "asyncpg-0.29.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0009a300cae37b8c525e5b449233d59cd9868fd35431abc470a3e364d2b85cb9"}, + {file = "asyncpg-0.29.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5cad1324dbb33f3ca0cd2074d5114354ed3be2b94d48ddfd88af75ebda7c43cc"}, + {file = "asyncpg-0.29.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:012d01df61e009015944ac7543d6ee30c2dc1eb2f6b10b62a3f598beb6531548"}, + {file = "asyncpg-0.29.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:000c996c53c04770798053e1730d34e30cb645ad95a63265aec82da9093d88e7"}, + {file = "asyncpg-0.29.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e0bfe9c4d3429706cf70d3249089de14d6a01192d617e9093a8e941fea8ee775"}, + {file = "asyncpg-0.29.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:642a36eb41b6313ffa328e8a5c5c2b5bea6ee138546c9c3cf1bffaad8ee36dd9"}, + {file = "asyncpg-0.29.0-cp38-cp38-win32.whl", hash = "sha256:a921372bbd0aa3a5822dd0409da61b4cd50df89ae85150149f8c119f23e8c408"}, + {file = "asyncpg-0.29.0-cp38-cp38-win_amd64.whl", hash = "sha256:103aad2b92d1506700cbf51cd8bb5441e7e72e87a7b3a2ca4e32c840f051a6a3"}, + {file = "asyncpg-0.29.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5340dd515d7e52f4c11ada32171d87c05570479dc01dc66d03ee3e150fb695da"}, + {file = "asyncpg-0.29.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e17b52c6cf83e170d3d865571ba574577ab8e533e7361a2b8ce6157d02c665d3"}, + {file = "asyncpg-0.29.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f100d23f273555f4b19b74a96840aa27b85e99ba4b1f18d4ebff0734e78dc090"}, + {file = "asyncpg-0.29.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48e7c58b516057126b363cec8ca02b804644fd012ef8e6c7e23386b7d5e6ce83"}, + {file = "asyncpg-0.29.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f9ea3f24eb4c49a615573724d88a48bd1b7821c890c2effe04f05382ed9e8810"}, + {file = "asyncpg-0.29.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8d36c7f14a22ec9e928f15f92a48207546ffe68bc412f3be718eedccdf10dc5c"}, + {file = "asyncpg-0.29.0-cp39-cp39-win32.whl", hash = "sha256:797ab8123ebaed304a1fad4d7576d5376c3a006a4100380fb9d517f0b59c1ab2"}, + {file = "asyncpg-0.29.0-cp39-cp39-win_amd64.whl", hash = "sha256:cce08a178858b426ae1aa8409b5cc171def45d4293626e7aa6510696d46decd8"}, + {file = "asyncpg-0.29.0.tar.gz", hash = "sha256:d1c49e1f44fffafd9a55e1a9b101590859d881d639ea2922516f5d9c512d354e"}, +] + +[package.dependencies] +async-timeout = {version = ">=4.0.3", markers = "python_version < \"3.12.0\""} + +[package.extras] +docs = ["Sphinx (>=5.3.0,<5.4.0)", "sphinx-rtd-theme (>=1.2.2)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"] +test = ["flake8 (>=6.1,<7.0)", "uvloop (>=0.15.3)"] + +[[package]] +name = "attrs" +version = "23.2.0" +description = "Classes Without Boilerplate" +optional = false +python-versions = ">=3.7" +files = [ + {file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"}, + {file = "attrs-23.2.0.tar.gz", hash = "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30"}, +] + +[package.extras] +cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] +dev = ["attrs[tests]", "pre-commit"] +docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"] +tests = ["attrs[tests-no-zope]", "zope-interface"] +tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"] +tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"] + +[[package]] +name = "backoff" +version = "2.2.1" +description = "Function decoration for backoff and retry" +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, + {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, +] + +[[package]] +name = "beautifulsoup4" +version = "4.12.3" +description = "Screen-scraping library" +optional = false +python-versions = ">=3.6.0" +files = [ + {file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"}, + {file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"}, +] + +[package.dependencies] +soupsieve = ">1.2" + +[package.extras] +cchardet = ["cchardet"] +chardet = ["chardet"] +charset-normalizer = ["charset-normalizer"] +html5lib = ["html5lib"] +lxml = ["lxml"] + +[[package]] +name = "black" +version = "24.4.2" +description = "The uncompromising code formatter." +optional = false +python-versions = ">=3.8" +files = [ + {file = "black-24.4.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:dd1b5a14e417189db4c7b64a6540f31730713d173f0b63e55fabd52d61d8fdce"}, + {file = "black-24.4.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e537d281831ad0e71007dcdcbe50a71470b978c453fa41ce77186bbe0ed6021"}, + {file = "black-24.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eaea3008c281f1038edb473c1aa8ed8143a5535ff18f978a318f10302b254063"}, + {file = "black-24.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:7768a0dbf16a39aa5e9a3ded568bb545c8c2727396d063bbaf847df05b08cd96"}, + {file = "black-24.4.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:257d724c2c9b1660f353b36c802ccece186a30accc7742c176d29c146df6e474"}, + {file = "black-24.4.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bdde6f877a18f24844e381d45e9947a49e97933573ac9d4345399be37621e26c"}, + {file = "black-24.4.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e151054aa00bad1f4e1f04919542885f89f5f7d086b8a59e5000e6c616896ffb"}, + {file = "black-24.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:7e122b1c4fb252fd85df3ca93578732b4749d9be076593076ef4d07a0233c3e1"}, + {file = "black-24.4.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:accf49e151c8ed2c0cdc528691838afd217c50412534e876a19270fea1e28e2d"}, + {file = "black-24.4.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:88c57dc656038f1ab9f92b3eb5335ee9b021412feaa46330d5eba4e51fe49b04"}, + {file = "black-24.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be8bef99eb46d5021bf053114442914baeb3649a89dc5f3a555c88737e5e98fc"}, + {file = "black-24.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:415e686e87dbbe6f4cd5ef0fbf764af7b89f9057b97c908742b6008cc554b9c0"}, + {file = "black-24.4.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bf10f7310db693bb62692609b397e8d67257c55f949abde4c67f9cc574492cc7"}, + {file = "black-24.4.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:98e123f1d5cfd42f886624d84464f7756f60ff6eab89ae845210631714f6db94"}, + {file = "black-24.4.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48a85f2cb5e6799a9ef05347b476cce6c182d6c71ee36925a6c194d074336ef8"}, + {file = "black-24.4.2-cp38-cp38-win_amd64.whl", hash = "sha256:b1530ae42e9d6d5b670a34db49a94115a64596bc77710b1d05e9801e62ca0a7c"}, + {file = "black-24.4.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:37aae07b029fa0174d39daf02748b379399b909652a806e5708199bd93899da1"}, + {file = "black-24.4.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:da33a1a5e49c4122ccdfd56cd021ff1ebc4a1ec4e2d01594fef9b6f267a9e741"}, + {file = "black-24.4.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef703f83fc32e131e9bcc0a5094cfe85599e7109f896fe8bc96cc402f3eb4b6e"}, + {file = "black-24.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:b9176b9832e84308818a99a561e90aa479e73c523b3f77afd07913380ae2eab7"}, + {file = "black-24.4.2-py3-none-any.whl", hash = "sha256:d36ed1124bb81b32f8614555b34cc4259c3fbc7eec17870e8ff8ded335b58d8c"}, + {file = "black-24.4.2.tar.gz", hash = "sha256:c872b53057f000085da66a19c55d68f6f8ddcac2642392ad3a355878406fbd4d"}, +] + +[package.dependencies] +click = ">=8.0.0" +mypy-extensions = ">=0.4.3" +packaging = ">=22.0" +pathspec = ">=0.9.0" +platformdirs = ">=2" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} + +[package.extras] +colorama = ["colorama (>=0.4.3)"] +d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] +jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] +uvloop = ["uvloop (>=0.15.2)"] + +[[package]] +name = "certifi" +version = "2024.7.4" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.6" +files = [ + {file = "certifi-2024.7.4-py3-none-any.whl", hash = "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90"}, + {file = "certifi-2024.7.4.tar.gz", hash = "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b"}, +] + +[[package]] +name = "cfgv" +version = "3.4.0" +description = "Validate configuration and produce human readable error messages." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, + {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, +] + +[[package]] +name = "charset-normalizer" +version = "3.3.2" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, + {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, +] + +[[package]] +name = "click" +version = "8.1.7" +description = "Composable command line interface toolkit" +optional = false +python-versions = ">=3.7" +files = [ + {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, + {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[[package]] +name = "codecov" +version = "2.1.13" +description = "Hosted coverage reports for GitHub, Bitbucket and Gitlab" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "codecov-2.1.13-py2.py3-none-any.whl", hash = "sha256:c2ca5e51bba9ebb43644c43d0690148a55086f7f5e6fd36170858fa4206744d5"}, + {file = "codecov-2.1.13.tar.gz", hash = "sha256:2362b685633caeaf45b9951a9b76ce359cd3581dd515b430c6c3f5dfb4d92a8c"}, +] + +[package.dependencies] +coverage = "*" +requests = ">=2.7.9" + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "coverage" +version = "7.6.0" +description = "Code coverage measurement for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "coverage-7.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:dff044f661f59dace805eedb4a7404c573b6ff0cdba4a524141bc63d7be5c7fd"}, + {file = "coverage-7.6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a8659fd33ee9e6ca03950cfdcdf271d645cf681609153f218826dd9805ab585c"}, + {file = "coverage-7.6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7792f0ab20df8071d669d929c75c97fecfa6bcab82c10ee4adb91c7a54055463"}, + {file = "coverage-7.6.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d4b3cd1ca7cd73d229487fa5caca9e4bc1f0bca96526b922d61053ea751fe791"}, + {file = "coverage-7.6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7e128f85c0b419907d1f38e616c4f1e9f1d1b37a7949f44df9a73d5da5cd53c"}, + {file = "coverage-7.6.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a94925102c89247530ae1dab7dc02c690942566f22e189cbd53579b0693c0783"}, + {file = "coverage-7.6.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:dcd070b5b585b50e6617e8972f3fbbee786afca71b1936ac06257f7e178f00f6"}, + {file = "coverage-7.6.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d50a252b23b9b4dfeefc1f663c568a221092cbaded20a05a11665d0dbec9b8fb"}, + {file = "coverage-7.6.0-cp310-cp310-win32.whl", hash = "sha256:0e7b27d04131c46e6894f23a4ae186a6a2207209a05df5b6ad4caee6d54a222c"}, + {file = "coverage-7.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:54dece71673b3187c86226c3ca793c5f891f9fc3d8aa183f2e3653da18566169"}, + {file = "coverage-7.6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c7b525ab52ce18c57ae232ba6f7010297a87ced82a2383b1afd238849c1ff933"}, + {file = "coverage-7.6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4bea27c4269234e06f621f3fac3925f56ff34bc14521484b8f66a580aacc2e7d"}, + {file = "coverage-7.6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed8d1d1821ba5fc88d4a4f45387b65de52382fa3ef1f0115a4f7a20cdfab0e94"}, + {file = "coverage-7.6.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:01c322ef2bbe15057bc4bf132b525b7e3f7206f071799eb8aa6ad1940bcf5fb1"}, + {file = "coverage-7.6.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:03cafe82c1b32b770a29fd6de923625ccac3185a54a5e66606da26d105f37dac"}, + {file = "coverage-7.6.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0d1b923fc4a40c5832be4f35a5dab0e5ff89cddf83bb4174499e02ea089daf57"}, + {file = "coverage-7.6.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4b03741e70fb811d1a9a1d75355cf391f274ed85847f4b78e35459899f57af4d"}, + {file = "coverage-7.6.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a73d18625f6a8a1cbb11eadc1d03929f9510f4131879288e3f7922097a429f63"}, + {file = "coverage-7.6.0-cp311-cp311-win32.whl", hash = "sha256:65fa405b837060db569a61ec368b74688f429b32fa47a8929a7a2f9b47183713"}, + {file = "coverage-7.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:6379688fb4cfa921ae349c76eb1a9ab26b65f32b03d46bb0eed841fd4cb6afb1"}, + {file = "coverage-7.6.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f7db0b6ae1f96ae41afe626095149ecd1b212b424626175a6633c2999eaad45b"}, + {file = "coverage-7.6.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bbdf9a72403110a3bdae77948b8011f644571311c2fb35ee15f0f10a8fc082e8"}, + {file = "coverage-7.6.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cc44bf0315268e253bf563f3560e6c004efe38f76db03a1558274a6e04bf5d5"}, + {file = "coverage-7.6.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:da8549d17489cd52f85a9829d0e1d91059359b3c54a26f28bec2c5d369524807"}, + {file = "coverage-7.6.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0086cd4fc71b7d485ac93ca4239c8f75732c2ae3ba83f6be1c9be59d9e2c6382"}, + {file = "coverage-7.6.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1fad32ee9b27350687035cb5fdf9145bc9cf0a094a9577d43e909948ebcfa27b"}, + {file = "coverage-7.6.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:044a0985a4f25b335882b0966625270a8d9db3d3409ddc49a4eb00b0ef5e8cee"}, + {file = "coverage-7.6.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:76d5f82213aa78098b9b964ea89de4617e70e0d43e97900c2778a50856dac605"}, + {file = "coverage-7.6.0-cp312-cp312-win32.whl", hash = "sha256:3c59105f8d58ce500f348c5b56163a4113a440dad6daa2294b5052a10db866da"}, + {file = "coverage-7.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:ca5d79cfdae420a1d52bf177de4bc2289c321d6c961ae321503b2ca59c17ae67"}, + {file = "coverage-7.6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d39bd10f0ae453554798b125d2f39884290c480f56e8a02ba7a6ed552005243b"}, + {file = "coverage-7.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:beb08e8508e53a568811016e59f3234d29c2583f6b6e28572f0954a6b4f7e03d"}, + {file = "coverage-7.6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2e16f4cd2bc4d88ba30ca2d3bbf2f21f00f382cf4e1ce3b1ddc96c634bc48ca"}, + {file = "coverage-7.6.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6616d1c9bf1e3faea78711ee42a8b972367d82ceae233ec0ac61cc7fec09fa6b"}, + {file = "coverage-7.6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad4567d6c334c46046d1c4c20024de2a1c3abc626817ae21ae3da600f5779b44"}, + {file = "coverage-7.6.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d17c6a415d68cfe1091d3296ba5749d3d8696e42c37fca5d4860c5bf7b729f03"}, + {file = "coverage-7.6.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:9146579352d7b5f6412735d0f203bbd8d00113a680b66565e205bc605ef81bc6"}, + {file = "coverage-7.6.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:cdab02a0a941af190df8782aafc591ef3ad08824f97850b015c8c6a8b3877b0b"}, + {file = "coverage-7.6.0-cp38-cp38-win32.whl", hash = "sha256:df423f351b162a702c053d5dddc0fc0ef9a9e27ea3f449781ace5f906b664428"}, + {file = "coverage-7.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:f2501d60d7497fd55e391f423f965bbe9e650e9ffc3c627d5f0ac516026000b8"}, + {file = "coverage-7.6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7221f9ac9dad9492cecab6f676b3eaf9185141539d5c9689d13fd6b0d7de840c"}, + {file = "coverage-7.6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ddaaa91bfc4477d2871442bbf30a125e8fe6b05da8a0015507bfbf4718228ab2"}, + {file = "coverage-7.6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c4cbe651f3904e28f3a55d6f371203049034b4ddbce65a54527a3f189ca3b390"}, + {file = "coverage-7.6.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:831b476d79408ab6ccfadaaf199906c833f02fdb32c9ab907b1d4aa0713cfa3b"}, + {file = "coverage-7.6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46c3d091059ad0b9c59d1034de74a7f36dcfa7f6d3bde782c49deb42438f2450"}, + {file = "coverage-7.6.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:4d5fae0a22dc86259dee66f2cc6c1d3e490c4a1214d7daa2a93d07491c5c04b6"}, + {file = "coverage-7.6.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:07ed352205574aad067482e53dd606926afebcb5590653121063fbf4e2175166"}, + {file = "coverage-7.6.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:49c76cdfa13015c4560702574bad67f0e15ca5a2872c6a125f6327ead2b731dd"}, + {file = "coverage-7.6.0-cp39-cp39-win32.whl", hash = "sha256:482855914928c8175735a2a59c8dc5806cf7d8f032e4820d52e845d1f731dca2"}, + {file = "coverage-7.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:543ef9179bc55edfd895154a51792b01c017c87af0ebaae092720152e19e42ca"}, + {file = "coverage-7.6.0-pp38.pp39.pp310-none-any.whl", hash = "sha256:6fe885135c8a479d3e37a7aae61cbd3a0fb2deccb4dda3c25f92a49189f766d6"}, + {file = "coverage-7.6.0.tar.gz", hash = "sha256:289cc803fa1dc901f84701ac10c9ee873619320f2f9aff38794db4a4a0268d51"}, +] + +[package.dependencies] +tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} + +[package.extras] +toml = ["tomli"] + +[[package]] +name = "decorator" +version = "4.4.2" +description = "Decorators for Humans" +optional = true +python-versions = ">=2.6, !=3.0.*, !=3.1.*" +files = [ + {file = "decorator-4.4.2-py2.py3-none-any.whl", hash = "sha256:41fa54c2a0cc4ba648be4fd43cff00aedf5b9465c9bf18d64325bc225f08f760"}, + {file = "decorator-4.4.2.tar.gz", hash = "sha256:e3a62f0520172440ca0dcc823749319382e377f37f140a0b99ef45fecb84bfe7"}, +] + +[[package]] +name = "deprecated" +version = "1.2.14" +description = "Python @deprecated decorator to deprecate old python classes, functions or methods." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c"}, + {file = "Deprecated-1.2.14.tar.gz", hash = "sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3"}, +] + +[package.dependencies] +wrapt = ">=1.10,<2" + +[package.extras] +dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"] + +[[package]] +name = "distlib" +version = "0.3.8" +description = "Distribution utilities" +optional = false +python-versions = "*" +files = [ + {file = "distlib-0.3.8-py2.py3-none-any.whl", hash = "sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784"}, + {file = "distlib-0.3.8.tar.gz", hash = "sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64"}, +] + +[[package]] +name = "distro" +version = "1.9.0" +description = "Distro - an OS platform information API" +optional = false +python-versions = ">=3.6" +files = [ + {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"}, + {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"}, +] + +[[package]] +name = "et-xmlfile" +version = "1.1.0" +description = "An implementation of lxml.xmlfile for the standard library" +optional = false +python-versions = ">=3.6" +files = [ + {file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"}, + {file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"}, +] + +[[package]] +name = "exceptiongroup" +version = "1.2.1" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"}, + {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"}, +] + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "fastapi" +version = "0.109.2" +description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" +optional = false +python-versions = ">=3.8" +files = [ + {file = "fastapi-0.109.2-py3-none-any.whl", hash = "sha256:2c9bab24667293b501cad8dd388c05240c850b58ec5876ee3283c47d6e1e3a4d"}, + {file = "fastapi-0.109.2.tar.gz", hash = "sha256:f3817eac96fe4f65a2ebb4baa000f394e55f5fccdaf7f75250804bc58f354f73"}, +] + +[package.dependencies] +pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.1.0 || >2.1.0,<3.0.0" +starlette = ">=0.36.3,<0.37.0" +typing-extensions = ">=4.8.0" + +[package.extras] +all = ["email-validator (>=2.0.0)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.7)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"] + +[[package]] +name = "filelock" +version = "3.15.4" +description = "A platform independent file lock." +optional = false +python-versions = ">=3.8" +files = [ + {file = "filelock-3.15.4-py3-none-any.whl", hash = "sha256:6ca1fffae96225dab4c6eaf1c4f4f28cd2568d3ec2a44e15a08520504de468e7"}, + {file = "filelock-3.15.4.tar.gz", hash = "sha256:2207938cbc1844345cb01a5a95524dae30f0ce089eba5b00378295a17e3e90cb"}, +] + +[package.extras] +docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8.0.1)", "pytest (>=7.4.3)", "pytest-asyncio (>=0.21)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)", "virtualenv (>=20.26.2)"] +typing = ["typing-extensions (>=4.8)"] + +[[package]] +name = "fire" +version = "0.5.0" +description = "A library for automatically generating command line interfaces." +optional = false +python-versions = "*" +files = [ + {file = "fire-0.5.0.tar.gz", hash = "sha256:a6b0d49e98c8963910021f92bba66f65ab440da2982b78eb1bbf95a0a34aacc6"}, +] + +[package.dependencies] +six = "*" +termcolor = "*" + +[[package]] +name = "flake8" +version = "6.1.0" +description = "the modular source code checker: pep8 pyflakes and co" +optional = false +python-versions = ">=3.8.1" +files = [ + {file = "flake8-6.1.0-py2.py3-none-any.whl", hash = "sha256:ffdfce58ea94c6580c77888a86506937f9a1a227dfcd15f245d694ae20a6b6e5"}, + {file = "flake8-6.1.0.tar.gz", hash = "sha256:d5b3857f07c030bdb5bf41c7f53799571d75c4491748a3adcd47de929e34cd23"}, +] + +[package.dependencies] +mccabe = ">=0.7.0,<0.8.0" +pycodestyle = ">=2.11.0,<2.12.0" +pyflakes = ">=3.1.0,<3.2.0" + +[[package]] +name = "flupy" +version = "1.2.0" +description = "Method chaining built on generators" +optional = false +python-versions = "*" +files = [ + {file = "flupy-1.2.0.tar.gz", hash = "sha256:12487a008e9744cd35d0f6ea3cfa06f4b2b27cb138bf57d0788f5c26e57afe69"}, +] + +[package.dependencies] +typing_extensions = "*" + +[package.extras] +dev = ["black", "mypy", "pre-commit", "pylint", "pytest", "pytest-benchmark", "pytest-cov"] + +[[package]] +name = "frozenlist" +version = "1.4.1" +description = "A list-like structure which implements collections.abc.MutableSequence" +optional = false +python-versions = ">=3.8" +files = [ + {file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f9aa1878d1083b276b0196f2dfbe00c9b7e752475ed3b682025ff20c1c1f51ac"}, + {file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:29acab3f66f0f24674b7dc4736477bcd4bc3ad4b896f5f45379a67bce8b96868"}, + {file = "frozenlist-1.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:74fb4bee6880b529a0c6560885fce4dc95936920f9f20f53d99a213f7bf66776"}, + {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:590344787a90ae57d62511dd7c736ed56b428f04cd8c161fcc5e7232c130c69a"}, + {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:068b63f23b17df8569b7fdca5517edef76171cf3897eb68beb01341131fbd2ad"}, + {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c849d495bf5154cd8da18a9eb15db127d4dba2968d88831aff6f0331ea9bd4c"}, + {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9750cc7fe1ae3b1611bb8cfc3f9ec11d532244235d75901fb6b8e42ce9229dfe"}, + {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9b2de4cf0cdd5bd2dee4c4f63a653c61d2408055ab77b151c1957f221cabf2a"}, + {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0633c8d5337cb5c77acbccc6357ac49a1770b8c487e5b3505c57b949b4b82e98"}, + {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:27657df69e8801be6c3638054e202a135c7f299267f1a55ed3a598934f6c0d75"}, + {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:f9a3ea26252bd92f570600098783d1371354d89d5f6b7dfd87359d669f2109b5"}, + {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:4f57dab5fe3407b6c0c1cc907ac98e8a189f9e418f3b6e54d65a718aaafe3950"}, + {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e02a0e11cf6597299b9f3bbd3f93d79217cb90cfd1411aec33848b13f5c656cc"}, + {file = "frozenlist-1.4.1-cp310-cp310-win32.whl", hash = "sha256:a828c57f00f729620a442881cc60e57cfcec6842ba38e1b19fd3e47ac0ff8dc1"}, + {file = "frozenlist-1.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:f56e2333dda1fe0f909e7cc59f021eba0d2307bc6f012a1ccf2beca6ba362439"}, + {file = "frozenlist-1.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a0cb6f11204443f27a1628b0e460f37fb30f624be6051d490fa7d7e26d4af3d0"}, + {file = "frozenlist-1.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b46c8ae3a8f1f41a0d2ef350c0b6e65822d80772fe46b653ab6b6274f61d4a49"}, + {file = "frozenlist-1.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fde5bd59ab5357e3853313127f4d3565fc7dad314a74d7b5d43c22c6a5ed2ced"}, + {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:722e1124aec435320ae01ee3ac7bec11a5d47f25d0ed6328f2273d287bc3abb0"}, + {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2471c201b70d58a0f0c1f91261542a03d9a5e088ed3dc6c160d614c01649c106"}, + {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c757a9dd70d72b076d6f68efdbb9bc943665ae954dad2801b874c8c69e185068"}, + {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f146e0911cb2f1da549fc58fc7bcd2b836a44b79ef871980d605ec392ff6b0d2"}, + {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f9c515e7914626b2a2e1e311794b4c35720a0be87af52b79ff8e1429fc25f19"}, + {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c302220494f5c1ebeb0912ea782bcd5e2f8308037b3c7553fad0e48ebad6ad82"}, + {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:442acde1e068288a4ba7acfe05f5f343e19fac87bfc96d89eb886b0363e977ec"}, + {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:1b280e6507ea8a4fa0c0a7150b4e526a8d113989e28eaaef946cc77ffd7efc0a"}, + {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:fe1a06da377e3a1062ae5fe0926e12b84eceb8a50b350ddca72dc85015873f74"}, + {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:db9e724bebd621d9beca794f2a4ff1d26eed5965b004a97f1f1685a173b869c2"}, + {file = "frozenlist-1.4.1-cp311-cp311-win32.whl", hash = "sha256:e774d53b1a477a67838a904131c4b0eef6b3d8a651f8b138b04f748fccfefe17"}, + {file = "frozenlist-1.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:fb3c2db03683b5767dedb5769b8a40ebb47d6f7f45b1b3e3b4b51ec8ad9d9825"}, + {file = "frozenlist-1.4.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:1979bc0aeb89b33b588c51c54ab0161791149f2461ea7c7c946d95d5f93b56ae"}, + {file = "frozenlist-1.4.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:cc7b01b3754ea68a62bd77ce6020afaffb44a590c2289089289363472d13aedb"}, + {file = "frozenlist-1.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c9c92be9fd329ac801cc420e08452b70e7aeab94ea4233a4804f0915c14eba9b"}, + {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c3894db91f5a489fc8fa6a9991820f368f0b3cbdb9cd8849547ccfab3392d86"}, + {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ba60bb19387e13597fb059f32cd4d59445d7b18b69a745b8f8e5db0346f33480"}, + {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8aefbba5f69d42246543407ed2461db31006b0f76c4e32dfd6f42215a2c41d09"}, + {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:780d3a35680ced9ce682fbcf4cb9c2bad3136eeff760ab33707b71db84664e3a"}, + {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9acbb16f06fe7f52f441bb6f413ebae6c37baa6ef9edd49cdd567216da8600cd"}, + {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:23b701e65c7b36e4bf15546a89279bd4d8675faabc287d06bbcfac7d3c33e1e6"}, + {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:3e0153a805a98f5ada7e09826255ba99fb4f7524bb81bf6b47fb702666484ae1"}, + {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:dd9b1baec094d91bf36ec729445f7769d0d0cf6b64d04d86e45baf89e2b9059b"}, + {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:1a4471094e146b6790f61b98616ab8e44f72661879cc63fa1049d13ef711e71e"}, + {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5667ed53d68d91920defdf4035d1cdaa3c3121dc0b113255124bcfada1cfa1b8"}, + {file = "frozenlist-1.4.1-cp312-cp312-win32.whl", hash = "sha256:beee944ae828747fd7cb216a70f120767fc9f4f00bacae8543c14a6831673f89"}, + {file = "frozenlist-1.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:64536573d0a2cb6e625cf309984e2d873979709f2cf22839bf2d61790b448ad5"}, + {file = "frozenlist-1.4.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:20b51fa3f588ff2fe658663db52a41a4f7aa6c04f6201449c6c7c476bd255c0d"}, + {file = "frozenlist-1.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:410478a0c562d1a5bcc2f7ea448359fcb050ed48b3c6f6f4f18c313a9bdb1826"}, + {file = "frozenlist-1.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c6321c9efe29975232da3bd0af0ad216800a47e93d763ce64f291917a381b8eb"}, + {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48f6a4533887e189dae092f1cf981f2e3885175f7a0f33c91fb5b7b682b6bab6"}, + {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6eb73fa5426ea69ee0e012fb59cdc76a15b1283d6e32e4f8dc4482ec67d1194d"}, + {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fbeb989b5cc29e8daf7f976b421c220f1b8c731cbf22b9130d8815418ea45887"}, + {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:32453c1de775c889eb4e22f1197fe3bdfe457d16476ea407472b9442e6295f7a"}, + {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693945278a31f2086d9bf3df0fe8254bbeaef1fe71e1351c3bd730aa7d31c41b"}, + {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:1d0ce09d36d53bbbe566fe296965b23b961764c0bcf3ce2fa45f463745c04701"}, + {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:3a670dc61eb0d0eb7080890c13de3066790f9049b47b0de04007090807c776b0"}, + {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:dca69045298ce5c11fd539682cff879cc1e664c245d1c64da929813e54241d11"}, + {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:a06339f38e9ed3a64e4c4e43aec7f59084033647f908e4259d279a52d3757d09"}, + {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b7f2f9f912dca3934c1baec2e4585a674ef16fe00218d833856408c48d5beee7"}, + {file = "frozenlist-1.4.1-cp38-cp38-win32.whl", hash = "sha256:e7004be74cbb7d9f34553a5ce5fb08be14fb33bc86f332fb71cbe5216362a497"}, + {file = "frozenlist-1.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:5a7d70357e7cee13f470c7883a063aae5fe209a493c57d86eb7f5a6f910fae09"}, + {file = "frozenlist-1.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:bfa4a17e17ce9abf47a74ae02f32d014c5e9404b6d9ac7f729e01562bbee601e"}, + {file = "frozenlist-1.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b7e3ed87d4138356775346e6845cccbe66cd9e207f3cd11d2f0b9fd13681359d"}, + {file = "frozenlist-1.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c99169d4ff810155ca50b4da3b075cbde79752443117d89429595c2e8e37fed8"}, + {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edb678da49d9f72c9f6c609fbe41a5dfb9a9282f9e6a2253d5a91e0fc382d7c0"}, + {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6db4667b187a6742b33afbbaf05a7bc551ffcf1ced0000a571aedbb4aa42fc7b"}, + {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:55fdc093b5a3cb41d420884cdaf37a1e74c3c37a31f46e66286d9145d2063bd0"}, + {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82e8211d69a4f4bc360ea22cd6555f8e61a1bd211d1d5d39d3d228b48c83a897"}, + {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89aa2c2eeb20957be2d950b85974b30a01a762f3308cd02bb15e1ad632e22dc7"}, + {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9d3e0c25a2350080e9319724dede4f31f43a6c9779be48021a7f4ebde8b2d742"}, + {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7268252af60904bf52c26173cbadc3a071cece75f873705419c8681f24d3edea"}, + {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:0c250a29735d4f15321007fb02865f0e6b6a41a6b88f1f523ca1596ab5f50bd5"}, + {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:96ec70beabbd3b10e8bfe52616a13561e58fe84c0101dd031dc78f250d5128b9"}, + {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:23b2d7679b73fe0e5a4560b672a39f98dfc6f60df63823b0a9970525325b95f6"}, + {file = "frozenlist-1.4.1-cp39-cp39-win32.whl", hash = "sha256:a7496bfe1da7fb1a4e1cc23bb67c58fab69311cc7d32b5a99c2007b4b2a0e932"}, + {file = "frozenlist-1.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:e6a20a581f9ce92d389a8c7d7c3dd47c81fd5d6e655c8dddf341e14aa48659d0"}, + {file = "frozenlist-1.4.1-py3-none-any.whl", hash = "sha256:04ced3e6a46b4cfffe20f9ae482818e34eba9b5fb0ce4056e4cc9b6e212d09b7"}, + {file = "frozenlist-1.4.1.tar.gz", hash = "sha256:c037a86e8513059a2613aaba4d817bb90b9d9b6b69aace3ce9c877e8c8ed402b"}, +] + +[[package]] +name = "fsspec" +version = "2024.6.1" +description = "File-system specification" +optional = false +python-versions = ">=3.8" +files = [ + {file = "fsspec-2024.6.1-py3-none-any.whl", hash = "sha256:3cb443f8bcd2efb31295a5b9fdb02aee81d8452c80d28f97a6d0959e6cee101e"}, + {file = "fsspec-2024.6.1.tar.gz", hash = "sha256:fad7d7e209dd4c1208e3bbfda706620e0da5142bebbd9c384afb95b07e798e49"}, +] + +[package.extras] +abfs = ["adlfs"] +adl = ["adlfs"] +arrow = ["pyarrow (>=1)"] +dask = ["dask", "distributed"] +dev = ["pre-commit", "ruff"] +doc = ["numpydoc", "sphinx", "sphinx-design", "sphinx-rtd-theme", "yarl"] +dropbox = ["dropbox", "dropboxdrivefs", "requests"] +full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"] +fuse = ["fusepy"] +gcs = ["gcsfs"] +git = ["pygit2"] +github = ["requests"] +gs = ["gcsfs"] +gui = ["panel"] +hdfs = ["pyarrow (>=1)"] +http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)"] +libarchive = ["libarchive-c"] +oci = ["ocifs"] +s3 = ["s3fs"] +sftp = ["paramiko"] +smb = ["smbprotocol"] +ssh = ["paramiko"] +test = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "numpy", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "requests"] +test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask-expr", "dask[dataframe,test]", "moto[server] (>4,<5)", "pytest-timeout", "xarray"] +test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"] +tqdm = ["tqdm"] + +[[package]] +name = "greenlet" +version = "3.0.3" +description = "Lightweight in-process concurrent programming" +optional = false +python-versions = ">=3.7" +files = [ + {file = "greenlet-3.0.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9da2bd29ed9e4f15955dd1595ad7bc9320308a3b766ef7f837e23ad4b4aac31a"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d353cadd6083fdb056bb46ed07e4340b0869c305c8ca54ef9da3421acbdf6881"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dca1e2f3ca00b84a396bc1bce13dd21f680f035314d2379c4160c98153b2059b"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3ed7fb269f15dc662787f4119ec300ad0702fa1b19d2135a37c2c4de6fadfd4a"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd4f49ae60e10adbc94b45c0b5e6a179acc1736cf7a90160b404076ee283cf83"}, + {file = "greenlet-3.0.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:73a411ef564e0e097dbe7e866bb2dda0f027e072b04da387282b02c308807405"}, + {file = "greenlet-3.0.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7f362975f2d179f9e26928c5b517524e89dd48530a0202570d55ad6ca5d8a56f"}, + {file = "greenlet-3.0.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:649dde7de1a5eceb258f9cb00bdf50e978c9db1b996964cd80703614c86495eb"}, + {file = "greenlet-3.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:68834da854554926fbedd38c76e60c4a2e3198c6fbed520b106a8986445caaf9"}, + {file = "greenlet-3.0.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:b1b5667cced97081bf57b8fa1d6bfca67814b0afd38208d52538316e9422fc61"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52f59dd9c96ad2fc0d5724107444f76eb20aaccb675bf825df6435acb7703559"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:afaff6cf5200befd5cec055b07d1c0a5a06c040fe5ad148abcd11ba6ab9b114e"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fe754d231288e1e64323cfad462fcee8f0288654c10bdf4f603a39ed923bef33"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2797aa5aedac23af156bbb5a6aa2cd3427ada2972c828244eb7d1b9255846379"}, + {file = "greenlet-3.0.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b7f009caad047246ed379e1c4dbcb8b020f0a390667ea74d2387be2998f58a22"}, + {file = "greenlet-3.0.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c5e1536de2aad7bf62e27baf79225d0d64360d4168cf2e6becb91baf1ed074f3"}, + {file = "greenlet-3.0.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:894393ce10ceac937e56ec00bb71c4c2f8209ad516e96033e4b3b1de270e200d"}, + {file = "greenlet-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:1ea188d4f49089fc6fb283845ab18a2518d279c7cd9da1065d7a84e991748728"}, + {file = "greenlet-3.0.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:70fb482fdf2c707765ab5f0b6655e9cfcf3780d8d87355a063547b41177599be"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4d1ac74f5c0c0524e4a24335350edad7e5f03b9532da7ea4d3c54d527784f2e"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:149e94a2dd82d19838fe4b2259f1b6b9957d5ba1b25640d2380bea9c5df37676"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:15d79dd26056573940fcb8c7413d84118086f2ec1a8acdfa854631084393efcc"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b7db1ebff4ba09aaaeae6aa491daeb226c8150fc20e836ad00041bcb11230"}, + {file = "greenlet-3.0.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fcd2469d6a2cf298f198f0487e0a5b1a47a42ca0fa4dfd1b6862c999f018ebbf"}, + {file = "greenlet-3.0.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1f672519db1796ca0d8753f9e78ec02355e862d0998193038c7073045899f305"}, + {file = "greenlet-3.0.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2516a9957eed41dd8f1ec0c604f1cdc86758b587d964668b5b196a9db5bfcde6"}, + {file = "greenlet-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:bba5387a6975598857d86de9eac14210a49d554a77eb8261cc68b7d082f78ce2"}, + {file = "greenlet-3.0.3-cp37-cp37m-macosx_11_0_universal2.whl", hash = "sha256:5b51e85cb5ceda94e79d019ed36b35386e8c37d22f07d6a751cb659b180d5274"}, + {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:daf3cb43b7cf2ba96d614252ce1684c1bccee6b2183a01328c98d36fcd7d5cb0"}, + {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:99bf650dc5d69546e076f413a87481ee1d2d09aaaaaca058c9251b6d8c14783f"}, + {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2dd6e660effd852586b6a8478a1d244b8dc90ab5b1321751d2ea15deb49ed414"}, + {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3391d1e16e2a5a1507d83e4a8b100f4ee626e8eca43cf2cadb543de69827c4c"}, + {file = "greenlet-3.0.3-cp37-cp37m-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e1f145462f1fa6e4a4ae3c0f782e580ce44d57c8f2c7aae1b6fa88c0b2efdb41"}, + {file = "greenlet-3.0.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:1a7191e42732df52cb5f39d3527217e7ab73cae2cb3694d241e18f53d84ea9a7"}, + {file = "greenlet-3.0.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:0448abc479fab28b00cb472d278828b3ccca164531daab4e970a0458786055d6"}, + {file = "greenlet-3.0.3-cp37-cp37m-win32.whl", hash = "sha256:b542be2440edc2d48547b5923c408cbe0fc94afb9f18741faa6ae970dbcb9b6d"}, + {file = "greenlet-3.0.3-cp37-cp37m-win_amd64.whl", hash = "sha256:01bc7ea167cf943b4c802068e178bbf70ae2e8c080467070d01bfa02f337ee67"}, + {file = "greenlet-3.0.3-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:1996cb9306c8595335bb157d133daf5cf9f693ef413e7673cb07e3e5871379ca"}, + {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ddc0f794e6ad661e321caa8d2f0a55ce01213c74722587256fb6566049a8b04"}, + {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9db1c18f0eaad2f804728c67d6c610778456e3e1cc4ab4bbd5eeb8e6053c6fc"}, + {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7170375bcc99f1a2fbd9c306f5be8764eaf3ac6b5cb968862cad4c7057756506"}, + {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b66c9c1e7ccabad3a7d037b2bcb740122a7b17a53734b7d72a344ce39882a1b"}, + {file = "greenlet-3.0.3-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:098d86f528c855ead3479afe84b49242e174ed262456c342d70fc7f972bc13c4"}, + {file = "greenlet-3.0.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:81bb9c6d52e8321f09c3d165b2a78c680506d9af285bfccbad9fb7ad5a5da3e5"}, + {file = "greenlet-3.0.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fd096eb7ffef17c456cfa587523c5f92321ae02427ff955bebe9e3c63bc9f0da"}, + {file = "greenlet-3.0.3-cp38-cp38-win32.whl", hash = "sha256:d46677c85c5ba00a9cb6f7a00b2bfa6f812192d2c9f7d9c4f6a55b60216712f3"}, + {file = "greenlet-3.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:419b386f84949bf0e7c73e6032e3457b82a787c1ab4a0e43732898a761cc9dbf"}, + {file = "greenlet-3.0.3-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:da70d4d51c8b306bb7a031d5cff6cc25ad253affe89b70352af5f1cb68e74b53"}, + {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:086152f8fbc5955df88382e8a75984e2bb1c892ad2e3c80a2508954e52295257"}, + {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d73a9fe764d77f87f8ec26a0c85144d6a951a6c438dfe50487df5595c6373eac"}, + {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7dcbe92cc99f08c8dd11f930de4d99ef756c3591a5377d1d9cd7dd5e896da71"}, + {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1551a8195c0d4a68fac7a4325efac0d541b48def35feb49d803674ac32582f61"}, + {file = "greenlet-3.0.3-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:64d7675ad83578e3fc149b617a444fab8efdafc9385471f868eb5ff83e446b8b"}, + {file = "greenlet-3.0.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b37eef18ea55f2ffd8f00ff8fe7c8d3818abd3e25fb73fae2ca3b672e333a7a6"}, + {file = "greenlet-3.0.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:77457465d89b8263bca14759d7c1684df840b6811b2499838cc5b040a8b5b113"}, + {file = "greenlet-3.0.3-cp39-cp39-win32.whl", hash = "sha256:57e8974f23e47dac22b83436bdcf23080ade568ce77df33159e019d161ce1d1e"}, + {file = "greenlet-3.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:c5ee858cfe08f34712f548c3c363e807e7186f03ad7a5039ebadb29e8c6be067"}, + {file = "greenlet-3.0.3.tar.gz", hash = "sha256:43374442353259554ce33599da8b692d5aa96f8976d567d4badf263371fbe491"}, +] + +[package.extras] +docs = ["Sphinx", "furo"] +test = ["objgraph", "psutil"] + +[[package]] +name = "gunicorn" +version = "21.2.0" +description = "WSGI HTTP Server for UNIX" +optional = false +python-versions = ">=3.5" +files = [ + {file = "gunicorn-21.2.0-py3-none-any.whl", hash = "sha256:3213aa5e8c24949e792bcacfc176fef362e7aac80b76c56f6b5122bf350722f0"}, + {file = "gunicorn-21.2.0.tar.gz", hash = "sha256:88ec8bff1d634f98e61b9f65bc4bf3cd918a90806c6f5c48bc5603849ec81033"}, +] + +[package.dependencies] +packaging = "*" + +[package.extras] +eventlet = ["eventlet (>=0.24.1)"] +gevent = ["gevent (>=1.4.0)"] +setproctitle = ["setproctitle"] +tornado = ["tornado (>=0.2)"] + +[[package]] +name = "h11" +version = "0.14.0" +description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +optional = false +python-versions = ">=3.7" +files = [ + {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, + {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, +] + +[[package]] +name = "httpcore" +version = "1.0.5" +description = "A minimal low-level HTTP client." +optional = false +python-versions = ">=3.8" +files = [ + {file = "httpcore-1.0.5-py3-none-any.whl", hash = "sha256:421f18bac248b25d310f3cacd198d55b8e6125c107797b609ff9b7a6ba7991b5"}, + {file = "httpcore-1.0.5.tar.gz", hash = "sha256:34a38e2f9291467ee3b44e89dd52615370e152954ba21721378a87b2960f7a61"}, +] + +[package.dependencies] +certifi = "*" +h11 = ">=0.13,<0.15" + +[package.extras] +asyncio = ["anyio (>=4.0,<5.0)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] +trio = ["trio (>=0.22.0,<0.26.0)"] + +[[package]] +name = "httpx" +version = "0.27.0" +description = "The next generation HTTP client." +optional = false +python-versions = ">=3.8" +files = [ + {file = "httpx-0.27.0-py3-none-any.whl", hash = "sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5"}, + {file = "httpx-0.27.0.tar.gz", hash = "sha256:a0cb88a46f32dc874e04ee956e4c2764aba2aa228f650b06788ba6bda2962ab5"}, +] + +[package.dependencies] +anyio = "*" +certifi = "*" +httpcore = "==1.*" +idna = "*" +sniffio = "*" + +[package.extras] +brotli = ["brotli", "brotlicffi"] +cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] + +[[package]] +name = "huggingface-hub" +version = "0.23.4" +description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" +optional = false +python-versions = ">=3.8.0" +files = [ + {file = "huggingface_hub-0.23.4-py3-none-any.whl", hash = "sha256:3a0b957aa87150addf0cc7bd71b4d954b78e749850e1e7fb29ebbd2db64ca037"}, + {file = "huggingface_hub-0.23.4.tar.gz", hash = "sha256:35d99016433900e44ae7efe1c209164a5a81dbbcd53a52f99c281dcd7ce22431"}, +] + +[package.dependencies] +filelock = "*" +fsspec = ">=2023.5.0" +packaging = ">=20.9" +pyyaml = ">=5.1" +requests = "*" +tqdm = ">=4.42.1" +typing-extensions = ">=3.7.4.3" + +[package.extras] +all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "mypy (==1.5.1)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.3.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] +cli = ["InquirerPy (==0.3.4)"] +dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "mypy (==1.5.1)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.3.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] +fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"] +hf-transfer = ["hf-transfer (>=0.1.4)"] +inference = ["aiohttp", "minijinja (>=1.0)"] +quality = ["mypy (==1.5.1)", "ruff (>=0.3.0)"] +tensorflow = ["graphviz", "pydot", "tensorflow"] +tensorflow-testing = ["keras (<3.0)", "tensorflow"] +testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "minijinja (>=1.0)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"] +torch = ["safetensors", "torch"] +typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)"] + +[[package]] +name = "identify" +version = "2.6.0" +description = "File identification library for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "identify-2.6.0-py2.py3-none-any.whl", hash = "sha256:e79ae4406387a9d300332b5fd366d8994f1525e8414984e1a59e058b2eda2dd0"}, + {file = "identify-2.6.0.tar.gz", hash = "sha256:cb171c685bdc31bcc4c1734698736a7d5b6c8bf2e0c15117f4d469c8640ae5cf"}, +] + +[package.extras] +license = ["ukkonen"] + +[[package]] +name = "idna" +version = "3.7" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.5" +files = [ + {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, + {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, +] + +[[package]] +name = "imageio" +version = "2.34.2" +description = "Library for reading and writing a wide range of image, video, scientific, and volumetric data formats." +optional = true +python-versions = ">=3.8" +files = [ + {file = "imageio-2.34.2-py3-none-any.whl", hash = "sha256:a0bb27ec9d5bab36a9f4835e51b21d2cb099e1f78451441f94687ff3404b79f8"}, + {file = "imageio-2.34.2.tar.gz", hash = "sha256:5c0c0ee8faa018a1c42f649b90395dd4d3bb6187c09053a0cd6f1fdd51bbff5e"}, +] + +[package.dependencies] +numpy = "*" +pillow = ">=8.3.2" + +[package.extras] +all-plugins = ["astropy", "av", "imageio-ffmpeg", "pillow-heif", "psutil", "tifffile"] +all-plugins-pypy = ["av", "imageio-ffmpeg", "pillow-heif", "psutil", "tifffile"] +build = ["wheel"] +dev = ["black", "flake8", "fsspec[github]", "pytest", "pytest-cov"] +docs = ["numpydoc", "pydata-sphinx-theme", "sphinx (<6)"] +ffmpeg = ["imageio-ffmpeg", "psutil"] +fits = ["astropy"] +full = ["astropy", "av", "black", "flake8", "fsspec[github]", "gdal", "imageio-ffmpeg", "itk", "numpydoc", "pillow-heif", "psutil", "pydata-sphinx-theme", "pytest", "pytest-cov", "sphinx (<6)", "tifffile", "wheel"] +gdal = ["gdal"] +itk = ["itk"] +linting = ["black", "flake8"] +pillow-heif = ["pillow-heif"] +pyav = ["av"] +test = ["fsspec[github]", "pytest", "pytest-cov"] +tifffile = ["tifffile"] + +[[package]] +name = "imageio-ffmpeg" +version = "0.5.1" +description = "FFMPEG wrapper for Python" +optional = true +python-versions = ">=3.5" +files = [ + {file = "imageio-ffmpeg-0.5.1.tar.gz", hash = "sha256:0ed7a9b31f560b0c9d929c5291cd430edeb9bed3ce9a497480e536dd4326484c"}, + {file = "imageio_ffmpeg-0.5.1-py3-none-macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:1460e84712b9d06910c1f7bb524096b0341d4b7844cea6c20e099d0a24e795b1"}, + {file = "imageio_ffmpeg-0.5.1-py3-none-manylinux2010_x86_64.whl", hash = "sha256:5289f75c7f755b499653f3209fea4efd1430cba0e39831c381aad2d458f7a316"}, + {file = "imageio_ffmpeg-0.5.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7fa9132a291d5eb28c44553550deb40cbdab831f2a614e55360301a6582eb205"}, + {file = "imageio_ffmpeg-0.5.1-py3-none-win32.whl", hash = "sha256:89efe2c79979d8174ba8476deb7f74d74c331caee3fb2b65ba2883bec0737625"}, + {file = "imageio_ffmpeg-0.5.1-py3-none-win_amd64.whl", hash = "sha256:1521e79e253bedbdd36a547e0cbd94a025ba0b558e17f08fea687d805a0e4698"}, +] + +[package.dependencies] +setuptools = "*" + +[[package]] +name = "importlib-metadata" +version = "8.0.0" +description = "Read metadata from Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "importlib_metadata-8.0.0-py3-none-any.whl", hash = "sha256:15584cf2b1bf449d98ff8a6ff1abef57bf20f3ac6454f431736cd3e660921b2f"}, + {file = "importlib_metadata-8.0.0.tar.gz", hash = "sha256:188bd24e4c346d3f0a933f275c2fec67050326a856b9a359881d7c2a697e8812"}, +] + +[package.dependencies] +zipp = ">=0.5" + +[package.extras] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +perf = ["ipython"] +test = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1)"] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + +[[package]] +name = "intel-openmp" +version = "2021.4.0" +description = "Intel OpenMP* Runtime Library" +optional = true +python-versions = "*" +files = [ + {file = "intel_openmp-2021.4.0-py2.py3-none-macosx_10_15_x86_64.macosx_11_0_x86_64.whl", hash = "sha256:41c01e266a7fdb631a7609191709322da2bbf24b252ba763f125dd651bcc7675"}, + {file = "intel_openmp-2021.4.0-py2.py3-none-manylinux1_i686.whl", hash = "sha256:3b921236a38384e2016f0f3d65af6732cf2c12918087128a9163225451e776f2"}, + {file = "intel_openmp-2021.4.0-py2.py3-none-manylinux1_x86_64.whl", hash = "sha256:e2240ab8d01472fed04f3544a878cda5da16c26232b7ea1b59132dbfb48b186e"}, + {file = "intel_openmp-2021.4.0-py2.py3-none-win32.whl", hash = "sha256:6e863d8fd3d7e8ef389d52cf97a50fe2afe1a19247e8c0d168ce021546f96fc9"}, + {file = "intel_openmp-2021.4.0-py2.py3-none-win_amd64.whl", hash = "sha256:eef4c8bcc8acefd7f5cd3b9384dbf73d59e2c99fc56545712ded913f43c4a94f"}, +] + +[[package]] +name = "isort" +version = "5.12.0" +description = "A Python utility / library to sort Python imports." +optional = false +python-versions = ">=3.8.0" +files = [ + {file = "isort-5.12.0-py3-none-any.whl", hash = "sha256:f84c2818376e66cf843d497486ea8fed8700b340f308f076c6fb1229dff318b6"}, + {file = "isort-5.12.0.tar.gz", hash = "sha256:8bef7dde241278824a6d83f44a544709b065191b95b6e50894bdc722fcba0504"}, +] + +[package.extras] +colors = ["colorama (>=0.4.3)"] +pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib"] +plugins = ["setuptools"] +requirements-deprecated-finder = ["pip-api", "pipreqs"] + +[[package]] +name = "jinja2" +version = "3.1.4" +description = "A very fast and expressive template engine." +optional = false +python-versions = ">=3.7" +files = [ + {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"}, + {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"}, +] + +[package.dependencies] +MarkupSafe = ">=2.0" + +[package.extras] +i18n = ["Babel (>=2.7)"] + +[[package]] +name = "joblib" +version = "1.4.2" +description = "Lightweight pipelining with Python functions" +optional = true +python-versions = ">=3.8" +files = [ + {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"}, + {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, +] + +[[package]] +name = "litellm" +version = "1.40.8" +description = "Library to easily interface with LLM API providers" +optional = false +python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8" +files = [ + {file = "litellm-1.40.8-py3-none-any.whl", hash = "sha256:cd0c313423dad49224696c45ac02c574abcaed6666c597543c2318b3521f4320"}, + {file = "litellm-1.40.8.tar.gz", hash = "sha256:8878d2437ac50bcc6f39ded1729e2113eb5fee645fcebcd32fc241c529a21c00"}, +] + +[package.dependencies] +aiohttp = "*" +click = "*" +importlib-metadata = ">=6.8.0" +jinja2 = ">=3.1.2,<4.0.0" +openai = ">=1.27.0" +python-dotenv = ">=0.2.0" +requests = ">=2.31.0,<3.0.0" +tiktoken = ">=0.4.0" +tokenizers = "*" + +[package.extras] +extra-proxy = ["azure-identity (>=1.15.0,<2.0.0)", "azure-keyvault-secrets (>=4.8.0,<5.0.0)", "google-cloud-kms (>=2.21.3,<3.0.0)", "prisma (==0.11.0)", "resend (>=0.8.0,<0.9.0)"] +proxy = ["PyJWT (>=2.8.0,<3.0.0)", "apscheduler (>=3.10.4,<4.0.0)", "backoff", "cryptography (>=42.0.5,<43.0.0)", "fastapi (>=0.111.0,<0.112.0)", "fastapi-sso (>=0.10.0,<0.11.0)", "gunicorn (>=22.0.0,<23.0.0)", "orjson (>=3.9.7,<4.0.0)", "python-multipart (>=0.0.9,<0.0.10)", "pyyaml (>=6.0.1,<7.0.0)", "rq", "uvicorn (>=0.22.0,<0.23.0)"] + +[[package]] +name = "lxml" +version = "5.2.2" +description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." +optional = false +python-versions = ">=3.6" +files = [ + {file = "lxml-5.2.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:364d03207f3e603922d0d3932ef363d55bbf48e3647395765f9bfcbdf6d23632"}, + {file = "lxml-5.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:50127c186f191b8917ea2fb8b206fbebe87fd414a6084d15568c27d0a21d60db"}, + {file = "lxml-5.2.2-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:74e4f025ef3db1c6da4460dd27c118d8cd136d0391da4e387a15e48e5c975147"}, + {file = "lxml-5.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:981a06a3076997adf7c743dcd0d7a0415582661e2517c7d961493572e909aa1d"}, + {file = "lxml-5.2.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aef5474d913d3b05e613906ba4090433c515e13ea49c837aca18bde190853dff"}, + {file = "lxml-5.2.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1e275ea572389e41e8b039ac076a46cb87ee6b8542df3fff26f5baab43713bca"}, + {file = "lxml-5.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5b65529bb2f21ac7861a0e94fdbf5dc0daab41497d18223b46ee8515e5ad297"}, + {file = "lxml-5.2.2-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:bcc98f911f10278d1daf14b87d65325851a1d29153caaf146877ec37031d5f36"}, + {file = "lxml-5.2.2-cp310-cp310-manylinux_2_28_ppc64le.whl", hash = "sha256:b47633251727c8fe279f34025844b3b3a3e40cd1b198356d003aa146258d13a2"}, + {file = "lxml-5.2.2-cp310-cp310-manylinux_2_28_s390x.whl", hash = "sha256:fbc9d316552f9ef7bba39f4edfad4a734d3d6f93341232a9dddadec4f15d425f"}, + {file = "lxml-5.2.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:13e69be35391ce72712184f69000cda04fc89689429179bc4c0ae5f0b7a8c21b"}, + {file = "lxml-5.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3b6a30a9ab040b3f545b697cb3adbf3696c05a3a68aad172e3fd7ca73ab3c835"}, + {file = "lxml-5.2.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:a233bb68625a85126ac9f1fc66d24337d6e8a0f9207b688eec2e7c880f012ec0"}, + {file = "lxml-5.2.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:dfa7c241073d8f2b8e8dbc7803c434f57dbb83ae2a3d7892dd068d99e96efe2c"}, + {file = "lxml-5.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1a7aca7964ac4bb07680d5c9d63b9d7028cace3e2d43175cb50bba8c5ad33316"}, + {file = "lxml-5.2.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ae4073a60ab98529ab8a72ebf429f2a8cc612619a8c04e08bed27450d52103c0"}, + {file = "lxml-5.2.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ffb2be176fed4457e445fe540617f0252a72a8bc56208fd65a690fdb1f57660b"}, + {file = "lxml-5.2.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:e290d79a4107d7d794634ce3e985b9ae4f920380a813717adf61804904dc4393"}, + {file = "lxml-5.2.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:96e85aa09274955bb6bd483eaf5b12abadade01010478154b0ec70284c1b1526"}, + {file = "lxml-5.2.2-cp310-cp310-win32.whl", hash = "sha256:f956196ef61369f1685d14dad80611488d8dc1ef00be57c0c5a03064005b0f30"}, + {file = "lxml-5.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:875a3f90d7eb5c5d77e529080d95140eacb3c6d13ad5b616ee8095447b1d22e7"}, + {file = "lxml-5.2.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:45f9494613160d0405682f9eee781c7e6d1bf45f819654eb249f8f46a2c22545"}, + {file = "lxml-5.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b0b3f2df149efb242cee2ffdeb6674b7f30d23c9a7af26595099afaf46ef4e88"}, + {file = "lxml-5.2.2-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d28cb356f119a437cc58a13f8135ab8a4c8ece18159eb9194b0d269ec4e28083"}, + {file = "lxml-5.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:657a972f46bbefdbba2d4f14413c0d079f9ae243bd68193cb5061b9732fa54c1"}, + {file = "lxml-5.2.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b74b9ea10063efb77a965a8d5f4182806fbf59ed068b3c3fd6f30d2ac7bee734"}, + {file = "lxml-5.2.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:07542787f86112d46d07d4f3c4e7c760282011b354d012dc4141cc12a68cef5f"}, + {file = "lxml-5.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:303f540ad2dddd35b92415b74b900c749ec2010e703ab3bfd6660979d01fd4ed"}, + {file = "lxml-5.2.2-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2eb2227ce1ff998faf0cd7fe85bbf086aa41dfc5af3b1d80867ecfe75fb68df3"}, + {file = "lxml-5.2.2-cp311-cp311-manylinux_2_28_ppc64le.whl", hash = "sha256:1d8a701774dfc42a2f0b8ccdfe7dbc140500d1049e0632a611985d943fcf12df"}, + {file = "lxml-5.2.2-cp311-cp311-manylinux_2_28_s390x.whl", hash = "sha256:56793b7a1a091a7c286b5f4aa1fe4ae5d1446fe742d00cdf2ffb1077865db10d"}, + {file = "lxml-5.2.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:eb00b549b13bd6d884c863554566095bf6fa9c3cecb2e7b399c4bc7904cb33b5"}, + {file = "lxml-5.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1a2569a1f15ae6c8c64108a2cd2b4a858fc1e13d25846be0666fc144715e32ab"}, + {file = "lxml-5.2.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:8cf85a6e40ff1f37fe0f25719aadf443686b1ac7652593dc53c7ef9b8492b115"}, + {file = "lxml-5.2.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:d237ba6664b8e60fd90b8549a149a74fcc675272e0e95539a00522e4ca688b04"}, + {file = "lxml-5.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0b3f5016e00ae7630a4b83d0868fca1e3d494c78a75b1c7252606a3a1c5fc2ad"}, + {file = "lxml-5.2.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:23441e2b5339bc54dc949e9e675fa35efe858108404ef9aa92f0456929ef6fe8"}, + {file = "lxml-5.2.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:2fb0ba3e8566548d6c8e7dd82a8229ff47bd8fb8c2da237607ac8e5a1b8312e5"}, + {file = "lxml-5.2.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:79d1fb9252e7e2cfe4de6e9a6610c7cbb99b9708e2c3e29057f487de5a9eaefa"}, + {file = "lxml-5.2.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6dcc3d17eac1df7859ae01202e9bb11ffa8c98949dcbeb1069c8b9a75917e01b"}, + {file = "lxml-5.2.2-cp311-cp311-win32.whl", hash = "sha256:4c30a2f83677876465f44c018830f608fa3c6a8a466eb223535035fbc16f3438"}, + {file = "lxml-5.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:49095a38eb333aaf44c06052fd2ec3b8f23e19747ca7ec6f6c954ffea6dbf7be"}, + {file = "lxml-5.2.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:7429e7faa1a60cad26ae4227f4dd0459efde239e494c7312624ce228e04f6391"}, + {file = "lxml-5.2.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:50ccb5d355961c0f12f6cf24b7187dbabd5433f29e15147a67995474f27d1776"}, + {file = "lxml-5.2.2-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc911208b18842a3a57266d8e51fc3cfaccee90a5351b92079beed912a7914c2"}, + {file = "lxml-5.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33ce9e786753743159799fdf8e92a5da351158c4bfb6f2db0bf31e7892a1feb5"}, + {file = "lxml-5.2.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ec87c44f619380878bd49ca109669c9f221d9ae6883a5bcb3616785fa8f94c97"}, + {file = "lxml-5.2.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08ea0f606808354eb8f2dfaac095963cb25d9d28e27edcc375d7b30ab01abbf6"}, + {file = "lxml-5.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75a9632f1d4f698b2e6e2e1ada40e71f369b15d69baddb8968dcc8e683839b18"}, + {file = "lxml-5.2.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:74da9f97daec6928567b48c90ea2c82a106b2d500f397eeb8941e47d30b1ca85"}, + {file = "lxml-5.2.2-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:0969e92af09c5687d769731e3f39ed62427cc72176cebb54b7a9d52cc4fa3b73"}, + {file = "lxml-5.2.2-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:9164361769b6ca7769079f4d426a41df6164879f7f3568be9086e15baca61466"}, + {file = "lxml-5.2.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:d26a618ae1766279f2660aca0081b2220aca6bd1aa06b2cf73f07383faf48927"}, + {file = "lxml-5.2.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab67ed772c584b7ef2379797bf14b82df9aa5f7438c5b9a09624dd834c1c1aaf"}, + {file = "lxml-5.2.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:3d1e35572a56941b32c239774d7e9ad724074d37f90c7a7d499ab98761bd80cf"}, + {file = "lxml-5.2.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:8268cbcd48c5375f46e000adb1390572c98879eb4f77910c6053d25cc3ac2c67"}, + {file = "lxml-5.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e282aedd63c639c07c3857097fc0e236f984ceb4089a8b284da1c526491e3f3d"}, + {file = "lxml-5.2.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfdc2bfe69e9adf0df4915949c22a25b39d175d599bf98e7ddf620a13678585"}, + {file = "lxml-5.2.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4aefd911793b5d2d7a921233a54c90329bf3d4a6817dc465f12ffdfe4fc7b8fe"}, + {file = "lxml-5.2.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:8b8df03a9e995b6211dafa63b32f9d405881518ff1ddd775db4e7b98fb545e1c"}, + {file = "lxml-5.2.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f11ae142f3a322d44513de1018b50f474f8f736bc3cd91d969f464b5bfef8836"}, + {file = "lxml-5.2.2-cp312-cp312-win32.whl", hash = "sha256:16a8326e51fcdffc886294c1e70b11ddccec836516a343f9ed0f82aac043c24a"}, + {file = "lxml-5.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:bbc4b80af581e18568ff07f6395c02114d05f4865c2812a1f02f2eaecf0bfd48"}, + {file = "lxml-5.2.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:e3d9d13603410b72787579769469af730c38f2f25505573a5888a94b62b920f8"}, + {file = "lxml-5.2.2-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:38b67afb0a06b8575948641c1d6d68e41b83a3abeae2ca9eed2ac59892b36706"}, + {file = "lxml-5.2.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c689d0d5381f56de7bd6966a4541bff6e08bf8d3871bbd89a0c6ab18aa699573"}, + {file = "lxml-5.2.2-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:cf2a978c795b54c539f47964ec05e35c05bd045db5ca1e8366988c7f2fe6b3ce"}, + {file = "lxml-5.2.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:739e36ef7412b2bd940f75b278749106e6d025e40027c0b94a17ef7968d55d56"}, + {file = "lxml-5.2.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:d8bbcd21769594dbba9c37d3c819e2d5847656ca99c747ddb31ac1701d0c0ed9"}, + {file = "lxml-5.2.2-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:2304d3c93f2258ccf2cf7a6ba8c761d76ef84948d87bf9664e14d203da2cd264"}, + {file = "lxml-5.2.2-cp36-cp36m-win32.whl", hash = "sha256:02437fb7308386867c8b7b0e5bc4cd4b04548b1c5d089ffb8e7b31009b961dc3"}, + {file = "lxml-5.2.2-cp36-cp36m-win_amd64.whl", hash = "sha256:edcfa83e03370032a489430215c1e7783128808fd3e2e0a3225deee278585196"}, + {file = "lxml-5.2.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:28bf95177400066596cdbcfc933312493799382879da504633d16cf60bba735b"}, + {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3a745cc98d504d5bd2c19b10c79c61c7c3df9222629f1b6210c0368177589fb8"}, + {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b590b39ef90c6b22ec0be925b211298e810b4856909c8ca60d27ffbca6c12e6"}, + {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b336b0416828022bfd5a2e3083e7f5ba54b96242159f83c7e3eebaec752f1716"}, + {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:c2faf60c583af0d135e853c86ac2735ce178f0e338a3c7f9ae8f622fd2eb788c"}, + {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:4bc6cb140a7a0ad1f7bc37e018d0ed690b7b6520ade518285dc3171f7a117905"}, + {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7ff762670cada8e05b32bf1e4dc50b140790909caa8303cfddc4d702b71ea184"}, + {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:57f0a0bbc9868e10ebe874e9f129d2917750adf008fe7b9c1598c0fbbfdde6a6"}, + {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:a6d2092797b388342c1bc932077ad232f914351932353e2e8706851c870bca1f"}, + {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:60499fe961b21264e17a471ec296dcbf4365fbea611bf9e303ab69db7159ce61"}, + {file = "lxml-5.2.2-cp37-cp37m-win32.whl", hash = "sha256:d9b342c76003c6b9336a80efcc766748a333573abf9350f4094ee46b006ec18f"}, + {file = "lxml-5.2.2-cp37-cp37m-win_amd64.whl", hash = "sha256:b16db2770517b8799c79aa80f4053cd6f8b716f21f8aca962725a9565ce3ee40"}, + {file = "lxml-5.2.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7ed07b3062b055d7a7f9d6557a251cc655eed0b3152b76de619516621c56f5d3"}, + {file = "lxml-5.2.2-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f60fdd125d85bf9c279ffb8e94c78c51b3b6a37711464e1f5f31078b45002421"}, + {file = "lxml-5.2.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8a7e24cb69ee5f32e003f50e016d5fde438010c1022c96738b04fc2423e61706"}, + {file = "lxml-5.2.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23cfafd56887eaed93d07bc4547abd5e09d837a002b791e9767765492a75883f"}, + {file = "lxml-5.2.2-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:19b4e485cd07b7d83e3fe3b72132e7df70bfac22b14fe4bf7a23822c3a35bff5"}, + {file = "lxml-5.2.2-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:7ce7ad8abebe737ad6143d9d3bf94b88b93365ea30a5b81f6877ec9c0dee0a48"}, + {file = "lxml-5.2.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e49b052b768bb74f58c7dda4e0bdf7b79d43a9204ca584ffe1fb48a6f3c84c66"}, + {file = "lxml-5.2.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d14a0d029a4e176795cef99c056d58067c06195e0c7e2dbb293bf95c08f772a3"}, + {file = "lxml-5.2.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:be49ad33819d7dcc28a309b86d4ed98e1a65f3075c6acd3cd4fe32103235222b"}, + {file = "lxml-5.2.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:a6d17e0370d2516d5bb9062c7b4cb731cff921fc875644c3d751ad857ba9c5b1"}, + {file = "lxml-5.2.2-cp38-cp38-win32.whl", hash = "sha256:5b8c041b6265e08eac8a724b74b655404070b636a8dd6d7a13c3adc07882ef30"}, + {file = "lxml-5.2.2-cp38-cp38-win_amd64.whl", hash = "sha256:f61efaf4bed1cc0860e567d2ecb2363974d414f7f1f124b1df368bbf183453a6"}, + {file = "lxml-5.2.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:fb91819461b1b56d06fa4bcf86617fac795f6a99d12239fb0c68dbeba41a0a30"}, + {file = "lxml-5.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d4ed0c7cbecde7194cd3228c044e86bf73e30a23505af852857c09c24e77ec5d"}, + {file = "lxml-5.2.2-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54401c77a63cc7d6dc4b4e173bb484f28a5607f3df71484709fe037c92d4f0ed"}, + {file = "lxml-5.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:625e3ef310e7fa3a761d48ca7ea1f9d8718a32b1542e727d584d82f4453d5eeb"}, + {file = "lxml-5.2.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:519895c99c815a1a24a926d5b60627ce5ea48e9f639a5cd328bda0515ea0f10c"}, + {file = "lxml-5.2.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c7079d5eb1c1315a858bbf180000757db8ad904a89476653232db835c3114001"}, + {file = "lxml-5.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:343ab62e9ca78094f2306aefed67dcfad61c4683f87eee48ff2fd74902447726"}, + {file = "lxml-5.2.2-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:cd9e78285da6c9ba2d5c769628f43ef66d96ac3085e59b10ad4f3707980710d3"}, + {file = "lxml-5.2.2-cp39-cp39-manylinux_2_28_ppc64le.whl", hash = "sha256:546cf886f6242dff9ec206331209db9c8e1643ae642dea5fdbecae2453cb50fd"}, + {file = "lxml-5.2.2-cp39-cp39-manylinux_2_28_s390x.whl", hash = "sha256:02f6a8eb6512fdc2fd4ca10a49c341c4e109aa6e9448cc4859af5b949622715a"}, + {file = "lxml-5.2.2-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:339ee4a4704bc724757cd5dd9dc8cf4d00980f5d3e6e06d5847c1b594ace68ab"}, + {file = "lxml-5.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0a028b61a2e357ace98b1615fc03f76eb517cc028993964fe08ad514b1e8892d"}, + {file = "lxml-5.2.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:f90e552ecbad426eab352e7b2933091f2be77115bb16f09f78404861c8322981"}, + {file = "lxml-5.2.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:d83e2d94b69bf31ead2fa45f0acdef0757fa0458a129734f59f67f3d2eb7ef32"}, + {file = "lxml-5.2.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a02d3c48f9bb1e10c7788d92c0c7db6f2002d024ab6e74d6f45ae33e3d0288a3"}, + {file = "lxml-5.2.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:6d68ce8e7b2075390e8ac1e1d3a99e8b6372c694bbe612632606d1d546794207"}, + {file = "lxml-5.2.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:453d037e09a5176d92ec0fd282e934ed26d806331a8b70ab431a81e2fbabf56d"}, + {file = "lxml-5.2.2-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:3b019d4ee84b683342af793b56bb35034bd749e4cbdd3d33f7d1107790f8c472"}, + {file = "lxml-5.2.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:cb3942960f0beb9f46e2a71a3aca220d1ca32feb5a398656be934320804c0df9"}, + {file = "lxml-5.2.2-cp39-cp39-win32.whl", hash = "sha256:ac6540c9fff6e3813d29d0403ee7a81897f1d8ecc09a8ff84d2eea70ede1cdbf"}, + {file = "lxml-5.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:610b5c77428a50269f38a534057444c249976433f40f53e3b47e68349cca1425"}, + {file = "lxml-5.2.2-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:b537bd04d7ccd7c6350cdaaaad911f6312cbd61e6e6045542f781c7f8b2e99d2"}, + {file = "lxml-5.2.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4820c02195d6dfb7b8508ff276752f6b2ff8b64ae5d13ebe02e7667e035000b9"}, + {file = "lxml-5.2.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a09f6184f17a80897172863a655467da2b11151ec98ba8d7af89f17bf63dae"}, + {file = "lxml-5.2.2-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:76acba4c66c47d27c8365e7c10b3d8016a7da83d3191d053a58382311a8bf4e1"}, + {file = "lxml-5.2.2-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:b128092c927eaf485928cec0c28f6b8bead277e28acf56800e972aa2c2abd7a2"}, + {file = "lxml-5.2.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ae791f6bd43305aade8c0e22f816b34f3b72b6c820477aab4d18473a37e8090b"}, + {file = "lxml-5.2.2-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a2f6a1bc2460e643785a2cde17293bd7a8f990884b822f7bca47bee0a82fc66b"}, + {file = "lxml-5.2.2-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e8d351ff44c1638cb6e980623d517abd9f580d2e53bfcd18d8941c052a5a009"}, + {file = "lxml-5.2.2-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bec4bd9133420c5c52d562469c754f27c5c9e36ee06abc169612c959bd7dbb07"}, + {file = "lxml-5.2.2-pp37-pypy37_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:55ce6b6d803890bd3cc89975fca9de1dff39729b43b73cb15ddd933b8bc20484"}, + {file = "lxml-5.2.2-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:8ab6a358d1286498d80fe67bd3d69fcbc7d1359b45b41e74c4a26964ca99c3f8"}, + {file = "lxml-5.2.2-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:06668e39e1f3c065349c51ac27ae430719d7806c026fec462e5693b08b95696b"}, + {file = "lxml-5.2.2-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:9cd5323344d8ebb9fb5e96da5de5ad4ebab993bbf51674259dbe9d7a18049525"}, + {file = "lxml-5.2.2-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89feb82ca055af0fe797a2323ec9043b26bc371365847dbe83c7fd2e2f181c34"}, + {file = "lxml-5.2.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e481bba1e11ba585fb06db666bfc23dbe181dbafc7b25776156120bf12e0d5a6"}, + {file = "lxml-5.2.2-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:9d6c6ea6a11ca0ff9cd0390b885984ed31157c168565702959c25e2191674a14"}, + {file = "lxml-5.2.2-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:3d98de734abee23e61f6b8c2e08a88453ada7d6486dc7cdc82922a03968928db"}, + {file = "lxml-5.2.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:69ab77a1373f1e7563e0fb5a29a8440367dec051da6c7405333699d07444f511"}, + {file = "lxml-5.2.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:34e17913c431f5ae01d8658dbf792fdc457073dcdfbb31dc0cc6ab256e664a8d"}, + {file = "lxml-5.2.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05f8757b03208c3f50097761be2dea0aba02e94f0dc7023ed73a7bb14ff11eb0"}, + {file = "lxml-5.2.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a520b4f9974b0a0a6ed73c2154de57cdfd0c8800f4f15ab2b73238ffed0b36e"}, + {file = "lxml-5.2.2-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5e097646944b66207023bc3c634827de858aebc226d5d4d6d16f0b77566ea182"}, + {file = "lxml-5.2.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:b5e4ef22ff25bfd4ede5f8fb30f7b24446345f3e79d9b7455aef2836437bc38a"}, + {file = "lxml-5.2.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:ff69a9a0b4b17d78170c73abe2ab12084bdf1691550c5629ad1fe7849433f324"}, + {file = "lxml-5.2.2.tar.gz", hash = "sha256:bb2dc4898180bea79863d5487e5f9c7c34297414bad54bcd0f0852aee9cfdb87"}, +] + +[package.extras] +cssselect = ["cssselect (>=0.7)"] +html-clean = ["lxml-html-clean"] +html5 = ["html5lib"] +htmlsoup = ["BeautifulSoup4"] +source = ["Cython (>=3.0.10)"] + +[[package]] +name = "markdown" +version = "3.6" +description = "Python implementation of John Gruber's Markdown." +optional = false +python-versions = ">=3.8" +files = [ + {file = "Markdown-3.6-py3-none-any.whl", hash = "sha256:48f276f4d8cfb8ce6527c8f79e2ee29708508bf4d40aa410fbc3b4ee832c850f"}, + {file = "Markdown-3.6.tar.gz", hash = "sha256:ed4f41f6daecbeeb96e576ce414c41d2d876daa9a16cb35fa8ed8c2ddfad0224"}, +] + +[package.dependencies] +importlib-metadata = {version = ">=4.4", markers = "python_version < \"3.10\""} + +[package.extras] +docs = ["mdx-gh-links (>=0.2)", "mkdocs (>=1.5)", "mkdocs-gen-files", "mkdocs-literate-nav", "mkdocs-nature (>=0.6)", "mkdocs-section-index", "mkdocstrings[python]"] +testing = ["coverage", "pyyaml"] + +[[package]] +name = "markupsafe" +version = "2.1.5" +description = "Safely add untrusted strings to HTML/XML markup." +optional = false +python-versions = ">=3.7" +files = [ + {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-win32.whl", hash = "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-win_amd64.whl", hash = "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-win32.whl", hash = "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-win_amd64.whl", hash = "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-win32.whl", hash = "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-win_amd64.whl", hash = "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-win32.whl", hash = "sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-win_amd64.whl", hash = "sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-win32.whl", hash = "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-win_amd64.whl", hash = "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-win32.whl", hash = "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-win_amd64.whl", hash = "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5"}, + {file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"}, +] + +[[package]] +name = "mccabe" +version = "0.7.0" +description = "McCabe checker, plugin for flake8" +optional = false +python-versions = ">=3.6" +files = [ + {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, + {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, +] + +[[package]] +name = "mkl" +version = "2021.4.0" +description = "Intel® oneAPI Math Kernel Library" +optional = true +python-versions = "*" +files = [ + {file = "mkl-2021.4.0-py2.py3-none-macosx_10_15_x86_64.macosx_11_0_x86_64.whl", hash = "sha256:67460f5cd7e30e405b54d70d1ed3ca78118370b65f7327d495e9c8847705e2fb"}, + {file = "mkl-2021.4.0-py2.py3-none-manylinux1_i686.whl", hash = "sha256:636d07d90e68ccc9630c654d47ce9fdeb036bb46e2b193b3a9ac8cfea683cce5"}, + {file = "mkl-2021.4.0-py2.py3-none-manylinux1_x86_64.whl", hash = "sha256:398dbf2b0d12acaf54117a5210e8f191827f373d362d796091d161f610c1ebfb"}, + {file = "mkl-2021.4.0-py2.py3-none-win32.whl", hash = "sha256:439c640b269a5668134e3dcbcea4350459c4a8bc46469669b2d67e07e3d330e8"}, + {file = "mkl-2021.4.0-py2.py3-none-win_amd64.whl", hash = "sha256:ceef3cafce4c009dd25f65d7ad0d833a0fbadc3d8903991ec92351fe5de1e718"}, +] + +[package.dependencies] +intel-openmp = "==2021.*" +tbb = "==2021.*" + +[[package]] +name = "monotonic" +version = "1.6" +description = "An implementation of time.monotonic() for Python 2 & < 3.3" +optional = false +python-versions = "*" +files = [ + {file = "monotonic-1.6-py2.py3-none-any.whl", hash = "sha256:68687e19a14f11f26d140dd5c86f3dba4bf5df58003000ed467e0e2a69bca96c"}, + {file = "monotonic-1.6.tar.gz", hash = "sha256:3a55207bcfed53ddd5c5bae174524062935efed17792e9de2ad0205ce9ad63f7"}, +] + +[[package]] +name = "moviepy" +version = "1.0.3" +description = "Video editing with Python" +optional = true +python-versions = "*" +files = [ + {file = "moviepy-1.0.3.tar.gz", hash = "sha256:2884e35d1788077db3ff89e763c5ba7bfddbd7ae9108c9bc809e7ba58fa433f5"}, +] + +[package.dependencies] +decorator = ">=4.0.2,<5.0" +imageio = {version = ">=2.5,<3.0", markers = "python_version >= \"3.4\""} +imageio_ffmpeg = {version = ">=0.2.0", markers = "python_version >= \"3.4\""} +numpy = {version = ">=1.17.3", markers = "python_version > \"2.7\""} +proglog = "<=1.0.0" +requests = ">=2.8.1,<3.0" +tqdm = ">=4.11.2,<5.0" + +[package.extras] +doc = ["Sphinx (>=1.5.2,<2.0)", "numpydoc (>=0.6.0,<1.0)", "pygame (>=1.9.3,<2.0)", "sphinx_rtd_theme (>=0.1.10b0,<1.0)"] +optional = ["matplotlib (>=2.0.0,<3.0)", "opencv-python (>=3.0,<4.0)", "scikit-image (>=0.13.0,<1.0)", "scikit-learn", "scipy (>=0.19.0,<1.5)", "youtube_dl"] +test = ["coverage (<5.0)", "coveralls (>=1.1,<2.0)", "pytest (>=3.0.0,<4.0)", "pytest-cov (>=2.5.1,<3.0)", "requests (>=2.8.1,<3.0)"] + +[[package]] +name = "mpmath" +version = "1.3.0" +description = "Python library for arbitrary-precision floating-point arithmetic" +optional = true +python-versions = "*" +files = [ + {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, + {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, +] + +[package.extras] +develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"] +docs = ["sphinx"] +gmpy = ["gmpy2 (>=2.1.0a4)"] +tests = ["pytest (>=4.6)"] + +[[package]] +name = "multidict" +version = "6.0.5" +description = "multidict implementation" +optional = false +python-versions = ">=3.7" +files = [ + {file = "multidict-6.0.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:228b644ae063c10e7f324ab1ab6b548bdf6f8b47f3ec234fef1093bc2735e5f9"}, + {file = "multidict-6.0.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:896ebdcf62683551312c30e20614305f53125750803b614e9e6ce74a96232604"}, + {file = "multidict-6.0.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:411bf8515f3be9813d06004cac41ccf7d1cd46dfe233705933dd163b60e37600"}, + {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d147090048129ce3c453f0292e7697d333db95e52616b3793922945804a433c"}, + {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:215ed703caf15f578dca76ee6f6b21b7603791ae090fbf1ef9d865571039ade5"}, + {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c6390cf87ff6234643428991b7359b5f59cc15155695deb4eda5c777d2b880f"}, + {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21fd81c4ebdb4f214161be351eb5bcf385426bf023041da2fd9e60681f3cebae"}, + {file = "multidict-6.0.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3cc2ad10255f903656017363cd59436f2111443a76f996584d1077e43ee51182"}, + {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6939c95381e003f54cd4c5516740faba40cf5ad3eeff460c3ad1d3e0ea2549bf"}, + {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:220dd781e3f7af2c2c1053da9fa96d9cf3072ca58f057f4c5adaaa1cab8fc442"}, + {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:766c8f7511df26d9f11cd3a8be623e59cca73d44643abab3f8c8c07620524e4a"}, + {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:fe5d7785250541f7f5019ab9cba2c71169dc7d74d0f45253f8313f436458a4ef"}, + {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c1c1496e73051918fcd4f58ff2e0f2f3066d1c76a0c6aeffd9b45d53243702cc"}, + {file = "multidict-6.0.5-cp310-cp310-win32.whl", hash = "sha256:7afcdd1fc07befad18ec4523a782cde4e93e0a2bf71239894b8d61ee578c1319"}, + {file = "multidict-6.0.5-cp310-cp310-win_amd64.whl", hash = "sha256:99f60d34c048c5c2fabc766108c103612344c46e35d4ed9ae0673d33c8fb26e8"}, + {file = "multidict-6.0.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f285e862d2f153a70586579c15c44656f888806ed0e5b56b64489afe4a2dbfba"}, + {file = "multidict-6.0.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:53689bb4e102200a4fafa9de9c7c3c212ab40a7ab2c8e474491914d2305f187e"}, + {file = "multidict-6.0.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:612d1156111ae11d14afaf3a0669ebf6c170dbb735e510a7438ffe2369a847fd"}, + {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7be7047bd08accdb7487737631d25735c9a04327911de89ff1b26b81745bd4e3"}, + {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de170c7b4fe6859beb8926e84f7d7d6c693dfe8e27372ce3b76f01c46e489fcf"}, + {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:04bde7a7b3de05732a4eb39c94574db1ec99abb56162d6c520ad26f83267de29"}, + {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85f67aed7bb647f93e7520633d8f51d3cbc6ab96957c71272b286b2f30dc70ed"}, + {file = "multidict-6.0.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:425bf820055005bfc8aa9a0b99ccb52cc2f4070153e34b701acc98d201693733"}, + {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d3eb1ceec286eba8220c26f3b0096cf189aea7057b6e7b7a2e60ed36b373b77f"}, + {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:7901c05ead4b3fb75113fb1dd33eb1253c6d3ee37ce93305acd9d38e0b5f21a4"}, + {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e0e79d91e71b9867c73323a3444724d496c037e578a0e1755ae159ba14f4f3d1"}, + {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:29bfeb0dff5cb5fdab2023a7a9947b3b4af63e9c47cae2a10ad58394b517fddc"}, + {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e030047e85cbcedbfc073f71836d62dd5dadfbe7531cae27789ff66bc551bd5e"}, + {file = "multidict-6.0.5-cp311-cp311-win32.whl", hash = "sha256:2f4848aa3baa109e6ab81fe2006c77ed4d3cd1e0ac2c1fbddb7b1277c168788c"}, + {file = "multidict-6.0.5-cp311-cp311-win_amd64.whl", hash = "sha256:2faa5ae9376faba05f630d7e5e6be05be22913782b927b19d12b8145968a85ea"}, + {file = "multidict-6.0.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:51d035609b86722963404f711db441cf7134f1889107fb171a970c9701f92e1e"}, + {file = "multidict-6.0.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:cbebcd5bcaf1eaf302617c114aa67569dd3f090dd0ce8ba9e35e9985b41ac35b"}, + {file = "multidict-6.0.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2ffc42c922dbfddb4a4c3b438eb056828719f07608af27d163191cb3e3aa6cc5"}, + {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ceb3b7e6a0135e092de86110c5a74e46bda4bd4fbfeeb3a3bcec79c0f861e450"}, + {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:79660376075cfd4b2c80f295528aa6beb2058fd289f4c9252f986751a4cd0496"}, + {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e4428b29611e989719874670fd152b6625500ad6c686d464e99f5aaeeaca175a"}, + {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d84a5c3a5f7ce6db1f999fb9438f686bc2e09d38143f2d93d8406ed2dd6b9226"}, + {file = "multidict-6.0.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:76c0de87358b192de7ea9649beb392f107dcad9ad27276324c24c91774ca5271"}, + {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:79a6d2ba910adb2cbafc95dad936f8b9386e77c84c35bc0add315b856d7c3abb"}, + {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:92d16a3e275e38293623ebf639c471d3e03bb20b8ebb845237e0d3664914caef"}, + {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:fb616be3538599e797a2017cccca78e354c767165e8858ab5116813146041a24"}, + {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:14c2976aa9038c2629efa2c148022ed5eb4cb939e15ec7aace7ca932f48f9ba6"}, + {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:435a0984199d81ca178b9ae2c26ec3d49692d20ee29bc4c11a2a8d4514c67eda"}, + {file = "multidict-6.0.5-cp312-cp312-win32.whl", hash = "sha256:9fe7b0653ba3d9d65cbe7698cca585bf0f8c83dbbcc710db9c90f478e175f2d5"}, + {file = "multidict-6.0.5-cp312-cp312-win_amd64.whl", hash = "sha256:01265f5e40f5a17f8241d52656ed27192be03bfa8764d88e8220141d1e4b3556"}, + {file = "multidict-6.0.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:19fe01cea168585ba0f678cad6f58133db2aa14eccaf22f88e4a6dccadfad8b3"}, + {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6bf7a982604375a8d49b6cc1b781c1747f243d91b81035a9b43a2126c04766f5"}, + {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:107c0cdefe028703fb5dafe640a409cb146d44a6ae201e55b35a4af8e95457dd"}, + {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:403c0911cd5d5791605808b942c88a8155c2592e05332d2bf78f18697a5fa15e"}, + {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aeaf541ddbad8311a87dd695ed9642401131ea39ad7bc8cf3ef3967fd093b626"}, + {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e4972624066095e52b569e02b5ca97dbd7a7ddd4294bf4e7247d52635630dd83"}, + {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d946b0a9eb8aaa590df1fe082cee553ceab173e6cb5b03239716338629c50c7a"}, + {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b55358304d7a73d7bdf5de62494aaf70bd33015831ffd98bc498b433dfe5b10c"}, + {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:a3145cb08d8625b2d3fee1b2d596a8766352979c9bffe5d7833e0503d0f0b5e5"}, + {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:d65f25da8e248202bd47445cec78e0025c0fe7582b23ec69c3b27a640dd7a8e3"}, + {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:c9bf56195c6bbd293340ea82eafd0071cb3d450c703d2c93afb89f93b8386ccc"}, + {file = "multidict-6.0.5-cp37-cp37m-win32.whl", hash = "sha256:69db76c09796b313331bb7048229e3bee7928eb62bab5e071e9f7fcc4879caee"}, + {file = "multidict-6.0.5-cp37-cp37m-win_amd64.whl", hash = "sha256:fce28b3c8a81b6b36dfac9feb1de115bab619b3c13905b419ec71d03a3fc1423"}, + {file = "multidict-6.0.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:76f067f5121dcecf0d63a67f29080b26c43c71a98b10c701b0677e4a065fbd54"}, + {file = "multidict-6.0.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b82cc8ace10ab5bd93235dfaab2021c70637005e1ac787031f4d1da63d493c1d"}, + {file = "multidict-6.0.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5cb241881eefd96b46f89b1a056187ea8e9ba14ab88ba632e68d7a2ecb7aadf7"}, + {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8e94e6912639a02ce173341ff62cc1201232ab86b8a8fcc05572741a5dc7d93"}, + {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:09a892e4a9fb47331da06948690ae38eaa2426de97b4ccbfafbdcbe5c8f37ff8"}, + {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:55205d03e8a598cfc688c71ca8ea5f66447164efff8869517f175ea632c7cb7b"}, + {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37b15024f864916b4951adb95d3a80c9431299080341ab9544ed148091b53f50"}, + {file = "multidict-6.0.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2a1dee728b52b33eebff5072817176c172050d44d67befd681609b4746e1c2e"}, + {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:edd08e6f2f1a390bf137080507e44ccc086353c8e98c657e666c017718561b89"}, + {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:60d698e8179a42ec85172d12f50b1668254628425a6bd611aba022257cac1386"}, + {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:3d25f19500588cbc47dc19081d78131c32637c25804df8414463ec908631e453"}, + {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:4cc0ef8b962ac7a5e62b9e826bd0cd5040e7d401bc45a6835910ed699037a461"}, + {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:eca2e9d0cc5a889850e9bbd68e98314ada174ff6ccd1129500103df7a94a7a44"}, + {file = "multidict-6.0.5-cp38-cp38-win32.whl", hash = "sha256:4a6a4f196f08c58c59e0b8ef8ec441d12aee4125a7d4f4fef000ccb22f8d7241"}, + {file = "multidict-6.0.5-cp38-cp38-win_amd64.whl", hash = "sha256:0275e35209c27a3f7951e1ce7aaf93ce0d163b28948444bec61dd7badc6d3f8c"}, + {file = "multidict-6.0.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e7be68734bd8c9a513f2b0cfd508802d6609da068f40dc57d4e3494cefc92929"}, + {file = "multidict-6.0.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1d9ea7a7e779d7a3561aade7d596649fbecfa5c08a7674b11b423783217933f9"}, + {file = "multidict-6.0.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ea1456df2a27c73ce51120fa2f519f1bea2f4a03a917f4a43c8707cf4cbbae1a"}, + {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf590b134eb70629e350691ecca88eac3e3b8b3c86992042fb82e3cb1830d5e1"}, + {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5c0631926c4f58e9a5ccce555ad7747d9a9f8b10619621f22f9635f069f6233e"}, + {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dce1c6912ab9ff5f179eaf6efe7365c1f425ed690b03341911bf4939ef2f3046"}, + {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0868d64af83169e4d4152ec612637a543f7a336e4a307b119e98042e852ad9c"}, + {file = "multidict-6.0.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:141b43360bfd3bdd75f15ed811850763555a251e38b2405967f8e25fb43f7d40"}, + {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:7df704ca8cf4a073334e0427ae2345323613e4df18cc224f647f251e5e75a527"}, + {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6214c5a5571802c33f80e6c84713b2c79e024995b9c5897f794b43e714daeec9"}, + {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:cd6c8fca38178e12c00418de737aef1261576bd1b6e8c6134d3e729a4e858b38"}, + {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:e02021f87a5b6932fa6ce916ca004c4d441509d33bbdbeca70d05dff5e9d2479"}, + {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ebd8d160f91a764652d3e51ce0d2956b38efe37c9231cd82cfc0bed2e40b581c"}, + {file = "multidict-6.0.5-cp39-cp39-win32.whl", hash = "sha256:04da1bb8c8dbadf2a18a452639771951c662c5ad03aefe4884775454be322c9b"}, + {file = "multidict-6.0.5-cp39-cp39-win_amd64.whl", hash = "sha256:d6f6d4f185481c9669b9447bf9d9cf3b95a0e9df9d169bbc17e363b7d5487755"}, + {file = "multidict-6.0.5-py3-none-any.whl", hash = "sha256:0d63c74e3d7ab26de115c49bffc92cc77ed23395303d496eae515d4204a625e7"}, + {file = "multidict-6.0.5.tar.gz", hash = "sha256:f7e301075edaf50500f0b341543c41194d8df3ae5caf4702f2095f3ca73dd8da"}, +] + +[[package]] +name = "mypy" +version = "1.10.1" +description = "Optional static typing for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "mypy-1.10.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e36f229acfe250dc660790840916eb49726c928e8ce10fbdf90715090fe4ae02"}, + {file = "mypy-1.10.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:51a46974340baaa4145363b9e051812a2446cf583dfaeba124af966fa44593f7"}, + {file = "mypy-1.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:901c89c2d67bba57aaaca91ccdb659aa3a312de67f23b9dfb059727cce2e2e0a"}, + {file = "mypy-1.10.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0cd62192a4a32b77ceb31272d9e74d23cd88c8060c34d1d3622db3267679a5d9"}, + {file = "mypy-1.10.1-cp310-cp310-win_amd64.whl", hash = "sha256:a2cbc68cb9e943ac0814c13e2452d2046c2f2b23ff0278e26599224cf164e78d"}, + {file = "mypy-1.10.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bd6f629b67bb43dc0d9211ee98b96d8dabc97b1ad38b9b25f5e4c4d7569a0c6a"}, + {file = "mypy-1.10.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a1bbb3a6f5ff319d2b9d40b4080d46cd639abe3516d5a62c070cf0114a457d84"}, + {file = "mypy-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8edd4e9bbbc9d7b79502eb9592cab808585516ae1bcc1446eb9122656c6066f"}, + {file = "mypy-1.10.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6166a88b15f1759f94a46fa474c7b1b05d134b1b61fca627dd7335454cc9aa6b"}, + {file = "mypy-1.10.1-cp311-cp311-win_amd64.whl", hash = "sha256:5bb9cd11c01c8606a9d0b83ffa91d0b236a0e91bc4126d9ba9ce62906ada868e"}, + {file = "mypy-1.10.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d8681909f7b44d0b7b86e653ca152d6dff0eb5eb41694e163c6092124f8246d7"}, + {file = "mypy-1.10.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:378c03f53f10bbdd55ca94e46ec3ba255279706a6aacaecac52ad248f98205d3"}, + {file = "mypy-1.10.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bacf8f3a3d7d849f40ca6caea5c055122efe70e81480c8328ad29c55c69e93e"}, + {file = "mypy-1.10.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:701b5f71413f1e9855566a34d6e9d12624e9e0a8818a5704d74d6b0402e66c04"}, + {file = "mypy-1.10.1-cp312-cp312-win_amd64.whl", hash = "sha256:3c4c2992f6ea46ff7fce0072642cfb62af7a2484efe69017ed8b095f7b39ef31"}, + {file = "mypy-1.10.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:604282c886497645ffb87b8f35a57ec773a4a2721161e709a4422c1636ddde5c"}, + {file = "mypy-1.10.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37fd87cab83f09842653f08de066ee68f1182b9b5282e4634cdb4b407266bade"}, + {file = "mypy-1.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8addf6313777dbb92e9564c5d32ec122bf2c6c39d683ea64de6a1fd98b90fe37"}, + {file = "mypy-1.10.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5cc3ca0a244eb9a5249c7c583ad9a7e881aa5d7b73c35652296ddcdb33b2b9c7"}, + {file = "mypy-1.10.1-cp38-cp38-win_amd64.whl", hash = "sha256:1b3a2ffce52cc4dbaeee4df762f20a2905aa171ef157b82192f2e2f368eec05d"}, + {file = "mypy-1.10.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fe85ed6836165d52ae8b88f99527d3d1b2362e0cb90b005409b8bed90e9059b3"}, + {file = "mypy-1.10.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c2ae450d60d7d020d67ab440c6e3fae375809988119817214440033f26ddf7bf"}, + {file = "mypy-1.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6be84c06e6abd72f960ba9a71561c14137a583093ffcf9bbfaf5e613d63fa531"}, + {file = "mypy-1.10.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2189ff1e39db399f08205e22a797383613ce1cb0cb3b13d8bcf0170e45b96cc3"}, + {file = "mypy-1.10.1-cp39-cp39-win_amd64.whl", hash = "sha256:97a131ee36ac37ce9581f4220311247ab6cba896b4395b9c87af0675a13a755f"}, + {file = "mypy-1.10.1-py3-none-any.whl", hash = "sha256:71d8ac0b906354ebda8ef1673e5fde785936ac1f29ff6987c7483cfbd5a4235a"}, + {file = "mypy-1.10.1.tar.gz", hash = "sha256:1f8f492d7db9e3593ef42d4f115f04e556130f2819ad33ab84551403e97dd4c0"}, +] + +[package.dependencies] +mypy-extensions = ">=1.0.0" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = ">=4.1.0" + +[package.extras] +dmypy = ["psutil (>=4.0)"] +install-types = ["pip"] +mypyc = ["setuptools (>=50)"] +reports = ["lxml"] + +[[package]] +name = "mypy-extensions" +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.5" +files = [ + {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, + {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, +] + +[[package]] +name = "neo4j" +version = "5.22.0" +description = "Neo4j Bolt driver for Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "neo4j-5.22.0-py3-none-any.whl", hash = "sha256:8146755ac93d33cee594975172c15cffb68ab158e3358bb7a73b5e0b83367006"}, + {file = "neo4j-5.22.0.tar.gz", hash = "sha256:199677239ce11fcecabce9962af515df271c1313ba110e737dd7d668fccd0c04"}, +] + +[package.dependencies] +pytz = "*" + +[package.extras] +numpy = ["numpy (>=1.7.0,<2.0.0)"] +pandas = ["numpy (>=1.7.0,<2.0.0)", "pandas (>=1.1.0,<3.0.0)"] +pyarrow = ["pyarrow (>=1.0.0)"] + +[[package]] +name = "nest-asyncio" +version = "1.6.0" +description = "Patch asyncio to allow nested event loops" +optional = false +python-versions = ">=3.5" +files = [ + {file = "nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c"}, + {file = "nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe"}, +] + +[[package]] +name = "networkx" +version = "3.2.1" +description = "Python package for creating and manipulating graphs and networks" +optional = true +python-versions = ">=3.9" +files = [ + {file = "networkx-3.2.1-py3-none-any.whl", hash = "sha256:f18c69adc97877c42332c170849c96cefa91881c99a7cb3e95b7c659ebdc1ec2"}, + {file = "networkx-3.2.1.tar.gz", hash = "sha256:9f1bb5cf3409bf324e0a722c20bdb4c20ee39bf1c30ce8ae499c8502b0b5e0c6"}, +] + +[package.extras] +default = ["matplotlib (>=3.5)", "numpy (>=1.22)", "pandas (>=1.4)", "scipy (>=1.9,!=1.11.0,!=1.11.1)"] +developer = ["changelist (==0.4)", "mypy (>=1.1)", "pre-commit (>=3.2)", "rtoml"] +doc = ["nb2plots (>=0.7)", "nbconvert (<7.9)", "numpydoc (>=1.6)", "pillow (>=9.4)", "pydata-sphinx-theme (>=0.14)", "sphinx (>=7)", "sphinx-gallery (>=0.14)", "texext (>=0.6.7)"] +extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.11)", "sympy (>=1.10)"] +test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"] + +[[package]] +name = "nodeenv" +version = "1.9.1" +description = "Node.js virtual environment builder" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"}, + {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"}, +] + +[[package]] +name = "numpy" +version = "1.26.4" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = ">=3.9" +files = [ + {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"}, + {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"}, + {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4"}, + {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f"}, + {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a"}, + {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2"}, + {file = "numpy-1.26.4-cp310-cp310-win32.whl", hash = "sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07"}, + {file = "numpy-1.26.4-cp310-cp310-win_amd64.whl", hash = "sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5"}, + {file = "numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71"}, + {file = "numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef"}, + {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e"}, + {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5"}, + {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a"}, + {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a"}, + {file = "numpy-1.26.4-cp311-cp311-win32.whl", hash = "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20"}, + {file = "numpy-1.26.4-cp311-cp311-win_amd64.whl", hash = "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2"}, + {file = "numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218"}, + {file = "numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b"}, + {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b"}, + {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed"}, + {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a"}, + {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0"}, + {file = "numpy-1.26.4-cp312-cp312-win32.whl", hash = "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110"}, + {file = "numpy-1.26.4-cp312-cp312-win_amd64.whl", hash = "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818"}, + {file = "numpy-1.26.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c"}, + {file = "numpy-1.26.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be"}, + {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764"}, + {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3"}, + {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd"}, + {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c"}, + {file = "numpy-1.26.4-cp39-cp39-win32.whl", hash = "sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6"}, + {file = "numpy-1.26.4-cp39-cp39-win_amd64.whl", hash = "sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0"}, + {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"}, +] + +[[package]] +name = "nvidia-cublas-cu12" +version = "12.1.3.1" +description = "CUBLAS native runtime libraries" +optional = true +python-versions = ">=3" +files = [ + {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:ee53ccca76a6fc08fb9701aa95b6ceb242cdaab118c3bb152af4e579af792728"}, + {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-win_amd64.whl", hash = "sha256:2b964d60e8cf11b5e1073d179d85fa340c120e99b3067558f3cf98dd69d02906"}, +] + +[[package]] +name = "nvidia-cuda-cupti-cu12" +version = "12.1.105" +description = "CUDA profiling tools runtime libs." +optional = true +python-versions = ">=3" +files = [ + {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:e54fde3983165c624cb79254ae9818a456eb6e87a7fd4d56a2352c24ee542d7e"}, + {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:bea8236d13a0ac7190bd2919c3e8e6ce1e402104276e6f9694479e48bb0eb2a4"}, +] + +[[package]] +name = "nvidia-cuda-nvrtc-cu12" +version = "12.1.105" +description = "NVRTC native runtime libraries" +optional = true +python-versions = ">=3" +files = [ + {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:339b385f50c309763ca65456ec75e17bbefcbbf2893f462cb8b90584cd27a1c2"}, + {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:0a98a522d9ff138b96c010a65e145dc1b4850e9ecb75a0172371793752fd46ed"}, +] + +[[package]] +name = "nvidia-cuda-runtime-cu12" +version = "12.1.105" +description = "CUDA Runtime native Libraries" +optional = true +python-versions = ">=3" +files = [ + {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:6e258468ddf5796e25f1dc591a31029fa317d97a0a94ed93468fc86301d61e40"}, + {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:dfb46ef84d73fababab44cf03e3b83f80700d27ca300e537f85f636fac474344"}, +] + +[[package]] +name = "nvidia-cudnn-cu12" +version = "8.9.2.26" +description = "cuDNN runtime libraries" +optional = true +python-versions = ">=3" +files = [ + {file = "nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl", hash = "sha256:5ccb288774fdfb07a7e7025ffec286971c06d8d7b4fb162525334616d7629ff9"}, +] + +[package.dependencies] +nvidia-cublas-cu12 = "*" + +[[package]] +name = "nvidia-cufft-cu12" +version = "11.0.2.54" +description = "CUFFT native runtime libraries" +optional = true +python-versions = ">=3" +files = [ + {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl", hash = "sha256:794e3948a1aa71fd817c3775866943936774d1c14e7628c74f6f7417224cdf56"}, + {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-win_amd64.whl", hash = "sha256:d9ac353f78ff89951da4af698f80870b1534ed69993f10a4cf1d96f21357e253"}, +] + +[[package]] +name = "nvidia-curand-cu12" +version = "10.3.2.106" +description = "CURAND native runtime libraries" +optional = true +python-versions = ">=3" +files = [ + {file = "nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:9d264c5036dde4e64f1de8c50ae753237c12e0b1348738169cd0f8a536c0e1e0"}, + {file = "nvidia_curand_cu12-10.3.2.106-py3-none-win_amd64.whl", hash = "sha256:75b6b0c574c0037839121317e17fd01f8a69fd2ef8e25853d826fec30bdba74a"}, +] + +[[package]] +name = "nvidia-cusolver-cu12" +version = "11.4.5.107" +description = "CUDA solver native runtime libraries" +optional = true +python-versions = ">=3" +files = [ + {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl", hash = "sha256:8a7ec542f0412294b15072fa7dab71d31334014a69f953004ea7a118206fe0dd"}, + {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-win_amd64.whl", hash = "sha256:74e0c3a24c78612192a74fcd90dd117f1cf21dea4822e66d89e8ea80e3cd2da5"}, +] + +[package.dependencies] +nvidia-cublas-cu12 = "*" +nvidia-cusparse-cu12 = "*" +nvidia-nvjitlink-cu12 = "*" + +[[package]] +name = "nvidia-cusparse-cu12" +version = "12.1.0.106" +description = "CUSPARSE native runtime libraries" +optional = true +python-versions = ">=3" +files = [ + {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:f3b50f42cf363f86ab21f720998517a659a48131e8d538dc02f8768237bd884c"}, + {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-win_amd64.whl", hash = "sha256:b798237e81b9719373e8fae8d4f091b70a0cf09d9d85c95a557e11df2d8e9a5a"}, +] + +[package.dependencies] +nvidia-nvjitlink-cu12 = "*" + +[[package]] +name = "nvidia-nccl-cu12" +version = "2.20.5" +description = "NVIDIA Collective Communication Library (NCCL) Runtime" +optional = true +python-versions = ">=3" +files = [ + {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01"}, + {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:057f6bf9685f75215d0c53bf3ac4a10b3e6578351de307abad9e18a99182af56"}, +] + +[[package]] +name = "nvidia-nvjitlink-cu12" +version = "12.5.82" +description = "Nvidia JIT LTO Library" +optional = true +python-versions = ">=3" +files = [ + {file = "nvidia_nvjitlink_cu12-12.5.82-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f9b37bc5c8cf7509665cb6ada5aaa0ce65618f2332b7d3e78e9790511f111212"}, + {file = "nvidia_nvjitlink_cu12-12.5.82-py3-none-win_amd64.whl", hash = "sha256:e782564d705ff0bf61ac3e1bf730166da66dd2fe9012f111ede5fc49b64ae697"}, +] + +[[package]] +name = "nvidia-nvtx-cu12" +version = "12.1.105" +description = "NVIDIA Tools Extension" +optional = true +python-versions = ">=3" +files = [ + {file = "nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:dc21cf308ca5691e7c04d962e213f8a4aa9bbfa23d95412f452254c2caeb09e5"}, + {file = "nvidia_nvtx_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:65f4d98982b31b60026e0e6de73fbdfc09d08a96f4656dd3665ca616a11e1e82"}, +] + +[[package]] +name = "ollama" +version = "0.2.1" +description = "The official Python client for Ollama." +optional = false +python-versions = "<4.0,>=3.8" +files = [ + {file = "ollama-0.2.1-py3-none-any.whl", hash = "sha256:b6e2414921c94f573a903d1069d682ba2fb2607070ea9e19ca4a7872f2a460ec"}, + {file = "ollama-0.2.1.tar.gz", hash = "sha256:fa316baa9a81eac3beb4affb0a17deb3008fdd6ed05b123c26306cfbe4c349b6"}, +] + +[package.dependencies] +httpx = ">=0.27.0,<0.28.0" + +[[package]] +name = "openai" +version = "1.35.13" +description = "The official Python library for the openai API" +optional = false +python-versions = ">=3.7.1" +files = [ + {file = "openai-1.35.13-py3-none-any.whl", hash = "sha256:36ec3e93e0d1f243f69be85c89b9221a471c3e450dfd9df16c9829e3cdf63e60"}, + {file = "openai-1.35.13.tar.gz", hash = "sha256:c684f3945608baf7d2dcc0ef3ee6f3e27e4c66f21076df0b47be45d57e6ae6e4"}, +] + +[package.dependencies] +anyio = ">=3.5.0,<5" +distro = ">=1.7.0,<2" +httpx = ">=0.23.0,<1" +pydantic = ">=1.9.0,<3" +sniffio = "*" +tqdm = ">4" +typing-extensions = ">=4.7,<5" + +[package.extras] +datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] + +[[package]] +name = "opencv-python" +version = "4.10.0.84" +description = "Wrapper package for OpenCV python bindings." +optional = true +python-versions = ">=3.6" +files = [ + {file = "opencv-python-4.10.0.84.tar.gz", hash = "sha256:72d234e4582e9658ffea8e9cae5b63d488ad06994ef12d81dc303b17472f3526"}, + {file = "opencv_python-4.10.0.84-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:fc182f8f4cda51b45f01c64e4cbedfc2f00aff799debebc305d8d0210c43f251"}, + {file = "opencv_python-4.10.0.84-cp37-abi3-macosx_12_0_x86_64.whl", hash = "sha256:71e575744f1d23f79741450254660442785f45a0797212852ee5199ef12eed98"}, + {file = "opencv_python-4.10.0.84-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09a332b50488e2dda866a6c5573ee192fe3583239fb26ff2f7f9ceb0bc119ea6"}, + {file = "opencv_python-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ace140fc6d647fbe1c692bcb2abce768973491222c067c131d80957c595b71f"}, + {file = "opencv_python-4.10.0.84-cp37-abi3-win32.whl", hash = "sha256:2db02bb7e50b703f0a2d50c50ced72e95c574e1e5a0bb35a8a86d0b35c98c236"}, + {file = "opencv_python-4.10.0.84-cp37-abi3-win_amd64.whl", hash = "sha256:32dbbd94c26f611dc5cc6979e6b7aa1f55a64d6b463cc1dcd3c95505a63e48fe"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.21.0", markers = "python_version == \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\""}, + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, + {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, + {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""}, + {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""}, + {version = ">=1.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"aarch64\" and python_version >= \"3.8\" and python_version < \"3.10\" or python_version > \"3.9\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_system != \"Darwin\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_machine != \"arm64\" and python_version < \"3.10\""}, +] + +[[package]] +name = "openpyxl" +version = "3.1.5" +description = "A Python library to read/write Excel 2010 xlsx/xlsm files" +optional = false +python-versions = ">=3.8" +files = [ + {file = "openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2"}, + {file = "openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050"}, +] + +[package.dependencies] +et-xmlfile = "*" + +[[package]] +name = "packaging" +version = "24.1" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"}, + {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, +] + +[[package]] +name = "pathspec" +version = "0.12.1" +description = "Utility library for gitignore style pattern matching of file paths." +optional = false +python-versions = ">=3.8" +files = [ + {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, + {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, +] + +[[package]] +name = "pgvector" +version = "0.1.8" +description = "pgvector support for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pgvector-0.1.8-py2.py3-none-any.whl", hash = "sha256:99dce3a6580ef73863edb9b8441937671f4e1a09383826e6b0838176cd441a96"}, +] + +[package.dependencies] +numpy = "*" + +[[package]] +name = "pillow" +version = "10.4.0" +description = "Python Imaging Library (Fork)" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pillow-10.4.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:4d9667937cfa347525b319ae34375c37b9ee6b525440f3ef48542fcf66f2731e"}, + {file = "pillow-10.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:543f3dc61c18dafb755773efc89aae60d06b6596a63914107f75459cf984164d"}, + {file = "pillow-10.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7928ecbf1ece13956b95d9cbcfc77137652b02763ba384d9ab508099a2eca856"}, + {file = "pillow-10.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4d49b85c4348ea0b31ea63bc75a9f3857869174e2bf17e7aba02945cd218e6f"}, + {file = "pillow-10.4.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:6c762a5b0997f5659a5ef2266abc1d8851ad7749ad9a6a5506eb23d314e4f46b"}, + {file = "pillow-10.4.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a985e028fc183bf12a77a8bbf36318db4238a3ded7fa9df1b9a133f1cb79f8fc"}, + {file = "pillow-10.4.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:812f7342b0eee081eaec84d91423d1b4650bb9828eb53d8511bcef8ce5aecf1e"}, + {file = "pillow-10.4.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ac1452d2fbe4978c2eec89fb5a23b8387aba707ac72810d9490118817d9c0b46"}, + {file = "pillow-10.4.0-cp310-cp310-win32.whl", hash = "sha256:bcd5e41a859bf2e84fdc42f4edb7d9aba0a13d29a2abadccafad99de3feff984"}, + {file = "pillow-10.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:ecd85a8d3e79cd7158dec1c9e5808e821feea088e2f69a974db5edf84dc53141"}, + {file = "pillow-10.4.0-cp310-cp310-win_arm64.whl", hash = "sha256:ff337c552345e95702c5fde3158acb0625111017d0e5f24bf3acdb9cc16b90d1"}, + {file = "pillow-10.4.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:0a9ec697746f268507404647e531e92889890a087e03681a3606d9b920fbee3c"}, + {file = "pillow-10.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dfe91cb65544a1321e631e696759491ae04a2ea11d36715eca01ce07284738be"}, + {file = "pillow-10.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5dc6761a6efc781e6a1544206f22c80c3af4c8cf461206d46a1e6006e4429ff3"}, + {file = "pillow-10.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e84b6cc6a4a3d76c153a6b19270b3526a5a8ed6b09501d3af891daa2a9de7d6"}, + {file = "pillow-10.4.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:bbc527b519bd3aa9d7f429d152fea69f9ad37c95f0b02aebddff592688998abe"}, + {file = "pillow-10.4.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:76a911dfe51a36041f2e756b00f96ed84677cdeb75d25c767f296c1c1eda1319"}, + {file = "pillow-10.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:59291fb29317122398786c2d44427bbd1a6d7ff54017075b22be9d21aa59bd8d"}, + {file = "pillow-10.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:416d3a5d0e8cfe4f27f574362435bc9bae57f679a7158e0096ad2beb427b8696"}, + {file = "pillow-10.4.0-cp311-cp311-win32.whl", hash = "sha256:7086cc1d5eebb91ad24ded9f58bec6c688e9f0ed7eb3dbbf1e4800280a896496"}, + {file = "pillow-10.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:cbed61494057c0f83b83eb3a310f0bf774b09513307c434d4366ed64f4128a91"}, + {file = "pillow-10.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:f5f0c3e969c8f12dd2bb7e0b15d5c468b51e5017e01e2e867335c81903046a22"}, + {file = "pillow-10.4.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:673655af3eadf4df6b5457033f086e90299fdd7a47983a13827acf7459c15d94"}, + {file = "pillow-10.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:866b6942a92f56300012f5fbac71f2d610312ee65e22f1aa2609e491284e5597"}, + {file = "pillow-10.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29dbdc4207642ea6aad70fbde1a9338753d33fb23ed6956e706936706f52dd80"}, + {file = "pillow-10.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf2342ac639c4cf38799a44950bbc2dfcb685f052b9e262f446482afaf4bffca"}, + {file = "pillow-10.4.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:f5b92f4d70791b4a67157321c4e8225d60b119c5cc9aee8ecf153aace4aad4ef"}, + {file = "pillow-10.4.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:86dcb5a1eb778d8b25659d5e4341269e8590ad6b4e8b44d9f4b07f8d136c414a"}, + {file = "pillow-10.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:780c072c2e11c9b2c7ca37f9a2ee8ba66f44367ac3e5c7832afcfe5104fd6d1b"}, + {file = "pillow-10.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:37fb69d905be665f68f28a8bba3c6d3223c8efe1edf14cc4cfa06c241f8c81d9"}, + {file = "pillow-10.4.0-cp312-cp312-win32.whl", hash = "sha256:7dfecdbad5c301d7b5bde160150b4db4c659cee2b69589705b6f8a0c509d9f42"}, + {file = "pillow-10.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:1d846aea995ad352d4bdcc847535bd56e0fd88d36829d2c90be880ef1ee4668a"}, + {file = "pillow-10.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:e553cad5179a66ba15bb18b353a19020e73a7921296a7979c4a2b7f6a5cd57f9"}, + {file = "pillow-10.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8bc1a764ed8c957a2e9cacf97c8b2b053b70307cf2996aafd70e91a082e70df3"}, + {file = "pillow-10.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6209bb41dc692ddfee4942517c19ee81b86c864b626dbfca272ec0f7cff5d9fb"}, + {file = "pillow-10.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bee197b30783295d2eb680b311af15a20a8b24024a19c3a26431ff83eb8d1f70"}, + {file = "pillow-10.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ef61f5dd14c300786318482456481463b9d6b91ebe5ef12f405afbba77ed0be"}, + {file = "pillow-10.4.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:297e388da6e248c98bc4a02e018966af0c5f92dfacf5a5ca22fa01cb3179bca0"}, + {file = "pillow-10.4.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e4db64794ccdf6cb83a59d73405f63adbe2a1887012e308828596100a0b2f6cc"}, + {file = "pillow-10.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bd2880a07482090a3bcb01f4265f1936a903d70bc740bfcb1fd4e8a2ffe5cf5a"}, + {file = "pillow-10.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4b35b21b819ac1dbd1233317adeecd63495f6babf21b7b2512d244ff6c6ce309"}, + {file = "pillow-10.4.0-cp313-cp313-win32.whl", hash = "sha256:551d3fd6e9dc15e4c1eb6fc4ba2b39c0c7933fa113b220057a34f4bb3268a060"}, + {file = "pillow-10.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:030abdbe43ee02e0de642aee345efa443740aa4d828bfe8e2eb11922ea6a21ea"}, + {file = "pillow-10.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:5b001114dd152cfd6b23befeb28d7aee43553e2402c9f159807bf55f33af8a8d"}, + {file = "pillow-10.4.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:8d4d5063501b6dd4024b8ac2f04962d661222d120381272deea52e3fc52d3736"}, + {file = "pillow-10.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7c1ee6f42250df403c5f103cbd2768a28fe1a0ea1f0f03fe151c8741e1469c8b"}, + {file = "pillow-10.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b15e02e9bb4c21e39876698abf233c8c579127986f8207200bc8a8f6bb27acf2"}, + {file = "pillow-10.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a8d4bade9952ea9a77d0c3e49cbd8b2890a399422258a77f357b9cc9be8d680"}, + {file = "pillow-10.4.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:43efea75eb06b95d1631cb784aa40156177bf9dd5b4b03ff38979e048258bc6b"}, + {file = "pillow-10.4.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:950be4d8ba92aca4b2bb0741285a46bfae3ca699ef913ec8416c1b78eadd64cd"}, + {file = "pillow-10.4.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d7480af14364494365e89d6fddc510a13e5a2c3584cb19ef65415ca57252fb84"}, + {file = "pillow-10.4.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:73664fe514b34c8f02452ffb73b7a92c6774e39a647087f83d67f010eb9a0cf0"}, + {file = "pillow-10.4.0-cp38-cp38-win32.whl", hash = "sha256:e88d5e6ad0d026fba7bdab8c3f225a69f063f116462c49892b0149e21b6c0a0e"}, + {file = "pillow-10.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:5161eef006d335e46895297f642341111945e2c1c899eb406882a6c61a4357ab"}, + {file = "pillow-10.4.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:0ae24a547e8b711ccaaf99c9ae3cd975470e1a30caa80a6aaee9a2f19c05701d"}, + {file = "pillow-10.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:298478fe4f77a4408895605f3482b6cc6222c018b2ce565c2b6b9c354ac3229b"}, + {file = "pillow-10.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:134ace6dc392116566980ee7436477d844520a26a4b1bd4053f6f47d096997fd"}, + {file = "pillow-10.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:930044bb7679ab003b14023138b50181899da3f25de50e9dbee23b61b4de2126"}, + {file = "pillow-10.4.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:c76e5786951e72ed3686e122d14c5d7012f16c8303a674d18cdcd6d89557fc5b"}, + {file = "pillow-10.4.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:b2724fdb354a868ddf9a880cb84d102da914e99119211ef7ecbdc613b8c96b3c"}, + {file = "pillow-10.4.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:dbc6ae66518ab3c5847659e9988c3b60dc94ffb48ef9168656e0019a93dbf8a1"}, + {file = "pillow-10.4.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:06b2f7898047ae93fad74467ec3d28fe84f7831370e3c258afa533f81ef7f3df"}, + {file = "pillow-10.4.0-cp39-cp39-win32.whl", hash = "sha256:7970285ab628a3779aecc35823296a7869f889b8329c16ad5a71e4901a3dc4ef"}, + {file = "pillow-10.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:961a7293b2457b405967af9c77dcaa43cc1a8cd50d23c532e62d48ab6cdd56f5"}, + {file = "pillow-10.4.0-cp39-cp39-win_arm64.whl", hash = "sha256:32cda9e3d601a52baccb2856b8ea1fc213c90b340c542dcef77140dfa3278a9e"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:5b4815f2e65b30f5fbae9dfffa8636d992d49705723fe86a3661806e069352d4"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:8f0aef4ef59694b12cadee839e2ba6afeab89c0f39a3adc02ed51d109117b8da"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f4727572e2918acaa9077c919cbbeb73bd2b3ebcfe033b72f858fc9fbef0026"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff25afb18123cea58a591ea0244b92eb1e61a1fd497bf6d6384f09bc3262ec3e"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:dc3e2db6ba09ffd7d02ae9141cfa0ae23393ee7687248d46a7507b75d610f4f5"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:02a2be69f9c9b8c1e97cf2713e789d4e398c751ecfd9967c18d0ce304efbf885"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:0755ffd4a0c6f267cccbae2e9903d95477ca2f77c4fcf3a3a09570001856c8a5"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:a02364621fe369e06200d4a16558e056fe2805d3468350df3aef21e00d26214b"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:1b5dea9831a90e9d0721ec417a80d4cbd7022093ac38a568db2dd78363b00908"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b885f89040bb8c4a1573566bbb2f44f5c505ef6e74cec7ab9068c900047f04b"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87dd88ded2e6d74d31e1e0a99a726a6765cda32d00ba72dc37f0651f306daaa8"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:2db98790afc70118bd0255c2eeb465e9767ecf1f3c25f9a1abb8ffc8cfd1fe0a"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f7baece4ce06bade126fb84b8af1c33439a76d8a6fd818970215e0560ca28c27"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:cfdd747216947628af7b259d274771d84db2268ca062dd5faf373639d00113a3"}, + {file = "pillow-10.4.0.tar.gz", hash = "sha256:166c1cd4d24309b30d61f79f4a9114b7b2313d7450912277855ff5dfd7cd4a06"}, +] + +[package.extras] +docs = ["furo", "olefile", "sphinx (>=7.3)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"] +fpx = ["olefile"] +mic = ["olefile"] +tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] +typing = ["typing-extensions"] +xmp = ["defusedxml"] + +[[package]] +name = "platformdirs" +version = "4.2.2" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." +optional = false +python-versions = ">=3.8" +files = [ + {file = "platformdirs-4.2.2-py3-none-any.whl", hash = "sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee"}, + {file = "platformdirs-4.2.2.tar.gz", hash = "sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3"}, +] + +[package.extras] +docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"] +type = ["mypy (>=1.8)"] + +[[package]] +name = "pluggy" +version = "1.5.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "posthog" +version = "3.5.0" +description = "Integrate PostHog into any python application." +optional = false +python-versions = "*" +files = [ + {file = "posthog-3.5.0-py2.py3-none-any.whl", hash = "sha256:3c672be7ba6f95d555ea207d4486c171d06657eb34b3ce25eb043bfe7b6b5b76"}, + {file = "posthog-3.5.0.tar.gz", hash = "sha256:8f7e3b2c6e8714d0c0c542a2109b83a7549f63b7113a133ab2763a89245ef2ef"}, +] + +[package.dependencies] +backoff = ">=1.10.0" +monotonic = ">=1.5" +python-dateutil = ">2.1" +requests = ">=2.7,<3.0" +six = ">=1.5" + +[package.extras] +dev = ["black", "flake8", "flake8-print", "isort", "pre-commit"] +sentry = ["django", "sentry-sdk"] +test = ["coverage", "flake8", "freezegun (==0.3.15)", "mock (>=2.0.0)", "pylint", "pytest", "pytest-timeout"] + +[[package]] +name = "pre-commit" +version = "2.21.0" +description = "A framework for managing and maintaining multi-language pre-commit hooks." +optional = false +python-versions = ">=3.7" +files = [ + {file = "pre_commit-2.21.0-py2.py3-none-any.whl", hash = "sha256:e2f91727039fc39a92f58a588a25b87f936de6567eed4f0e673e0507edc75bad"}, + {file = "pre_commit-2.21.0.tar.gz", hash = "sha256:31ef31af7e474a8d8995027fefdfcf509b5c913ff31f2015b4ec4beb26a6f658"}, +] + +[package.dependencies] +cfgv = ">=2.0.0" +identify = ">=1.0.0" +nodeenv = ">=0.11.1" +pyyaml = ">=5.1" +virtualenv = ">=20.10.0" + +[[package]] +name = "proglog" +version = "0.1.10" +description = "Log and progress bar manager for console, notebooks, web..." +optional = true +python-versions = "*" +files = [ + {file = "proglog-0.1.10-py3-none-any.whl", hash = "sha256:19d5da037e8c813da480b741e3fa71fb1ac0a5b02bf21c41577c7f327485ec50"}, + {file = "proglog-0.1.10.tar.gz", hash = "sha256:658c28c9c82e4caeb2f25f488fff9ceace22f8d69b15d0c1c86d64275e4ddab4"}, +] + +[package.dependencies] +tqdm = "*" + +[[package]] +name = "psycopg2-binary" +version = "2.9.9" +description = "psycopg2 - Python-PostgreSQL Database Adapter" +optional = false +python-versions = ">=3.7" +files = [ + {file = "psycopg2-binary-2.9.9.tar.gz", hash = "sha256:7f01846810177d829c7692f1f5ada8096762d9172af1b1a28d4ab5b77c923c1c"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c2470da5418b76232f02a2fcd2229537bb2d5a7096674ce61859c3229f2eb202"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c6af2a6d4b7ee9615cbb162b0738f6e1fd1f5c3eda7e5da17861eacf4c717ea7"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:75723c3c0fbbf34350b46a3199eb50638ab22a0228f93fb472ef4d9becc2382b"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:83791a65b51ad6ee6cf0845634859d69a038ea9b03d7b26e703f94c7e93dbcf9"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0ef4854e82c09e84cc63084a9e4ccd6d9b154f1dbdd283efb92ecd0b5e2b8c84"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed1184ab8f113e8d660ce49a56390ca181f2981066acc27cf637d5c1e10ce46e"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d2997c458c690ec2bc6b0b7ecbafd02b029b7b4283078d3b32a852a7ce3ddd98"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:b58b4710c7f4161b5e9dcbe73bb7c62d65670a87df7bcce9e1faaad43e715245"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:0c009475ee389757e6e34611d75f6e4f05f0cf5ebb76c6037508318e1a1e0d7e"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8dbf6d1bc73f1d04ec1734bae3b4fb0ee3cb2a493d35ede9badbeb901fb40f6f"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-win32.whl", hash = "sha256:3f78fd71c4f43a13d342be74ebbc0666fe1f555b8837eb113cb7416856c79682"}, + {file = "psycopg2_binary-2.9.9-cp310-cp310-win_amd64.whl", hash = "sha256:876801744b0dee379e4e3c38b76fc89f88834bb15bf92ee07d94acd06ec890a0"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ee825e70b1a209475622f7f7b776785bd68f34af6e7a46e2e42f27b659b5bc26"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1ea665f8ce695bcc37a90ee52de7a7980be5161375d42a0b6c6abedbf0d81f0f"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:143072318f793f53819048fdfe30c321890af0c3ec7cb1dfc9cc87aa88241de2"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c332c8d69fb64979ebf76613c66b985414927a40f8defa16cf1bc028b7b0a7b0"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7fc5a5acafb7d6ccca13bfa8c90f8c51f13d8fb87d95656d3950f0158d3ce53"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:977646e05232579d2e7b9c59e21dbe5261f403a88417f6a6512e70d3f8a046be"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b6356793b84728d9d50ead16ab43c187673831e9d4019013f1402c41b1db9b27"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:bc7bb56d04601d443f24094e9e31ae6deec9ccb23581f75343feebaf30423359"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:77853062a2c45be16fd6b8d6de2a99278ee1d985a7bd8b103e97e41c034006d2"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:78151aa3ec21dccd5cdef6c74c3e73386dcdfaf19bced944169697d7ac7482fc"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-win32.whl", hash = "sha256:dc4926288b2a3e9fd7b50dc6a1909a13bbdadfc67d93f3374d984e56f885579d"}, + {file = "psycopg2_binary-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:b76bedd166805480ab069612119ea636f5ab8f8771e640ae103e05a4aae3e417"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8532fd6e6e2dc57bcb3bc90b079c60de896d2128c5d9d6f24a63875a95a088cf"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0605eaed3eb239e87df0d5e3c6489daae3f7388d455d0c0b4df899519c6a38d"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f8544b092a29a6ddd72f3556a9fcf249ec412e10ad28be6a0c0d948924f2212"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d423c8d8a3c82d08fe8af900ad5b613ce3632a1249fd6a223941d0735fce493"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e5afae772c00980525f6d6ecf7cbca55676296b580c0e6abb407f15f3706996"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e6f98446430fdf41bd36d4faa6cb409f5140c1c2cf58ce0bbdaf16af7d3f119"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c77e3d1862452565875eb31bdb45ac62502feabbd53429fdc39a1cc341d681ba"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:cb16c65dcb648d0a43a2521f2f0a2300f40639f6f8c1ecbc662141e4e3e1ee07"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:911dda9c487075abd54e644ccdf5e5c16773470a6a5d3826fda76699410066fb"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:57fede879f08d23c85140a360c6a77709113efd1c993923c59fde17aa27599fe"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-win32.whl", hash = "sha256:64cf30263844fa208851ebb13b0732ce674d8ec6a0c86a4e160495d299ba3c93"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:81ff62668af011f9a48787564ab7eded4e9fb17a4a6a74af5ffa6a457400d2ab"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2293b001e319ab0d869d660a704942c9e2cce19745262a8aba2115ef41a0a42a"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ef7df18daf2c4c07e2695e8cfd5ee7f748a1d54d802330985a78d2a5a6dca9"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a602ea5aff39bb9fac6308e9c9d82b9a35c2bf288e184a816002c9fae930b77"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8359bf4791968c5a78c56103702000105501adb557f3cf772b2c207284273984"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:275ff571376626195ab95a746e6a04c7df8ea34638b99fc11160de91f2fef503"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:f9b5571d33660d5009a8b3c25dc1db560206e2d2f89d3df1cb32d72c0d117d52"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:420f9bbf47a02616e8554e825208cb947969451978dceb77f95ad09c37791dae"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:4154ad09dac630a0f13f37b583eae260c6aa885d67dfbccb5b02c33f31a6d420"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a148c5d507bb9b4f2030a2025c545fccb0e1ef317393eaba42e7eabd28eb6041"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-win32.whl", hash = "sha256:68fc1f1ba168724771e38bee37d940d2865cb0f562380a1fb1ffb428b75cb692"}, + {file = "psycopg2_binary-2.9.9-cp37-cp37m-win_amd64.whl", hash = "sha256:281309265596e388ef483250db3640e5f414168c5a67e9c665cafce9492eda2f"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:60989127da422b74a04345096c10d416c2b41bd7bf2a380eb541059e4e999980"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:246b123cc54bb5361588acc54218c8c9fb73068bf227a4a531d8ed56fa3ca7d6"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:34eccd14566f8fe14b2b95bb13b11572f7c7d5c36da61caf414d23b91fcc5d94"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18d0ef97766055fec15b5de2c06dd8e7654705ce3e5e5eed3b6651a1d2a9a152"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d3f82c171b4ccd83bbaf35aa05e44e690113bd4f3b7b6cc54d2219b132f3ae55"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ead20f7913a9c1e894aebe47cccf9dc834e1618b7aa96155d2091a626e59c972"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ca49a8119c6cbd77375ae303b0cfd8c11f011abbbd64601167ecca18a87e7cdd"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:323ba25b92454adb36fa425dc5cf6f8f19f78948cbad2e7bc6cdf7b0d7982e59"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:1236ed0952fbd919c100bc839eaa4a39ebc397ed1c08a97fc45fee2a595aa1b3"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:729177eaf0aefca0994ce4cffe96ad3c75e377c7b6f4efa59ebf003b6d398716"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-win32.whl", hash = "sha256:804d99b24ad523a1fe18cc707bf741670332f7c7412e9d49cb5eab67e886b9b5"}, + {file = "psycopg2_binary-2.9.9-cp38-cp38-win_amd64.whl", hash = "sha256:a6cdcc3ede532f4a4b96000b6362099591ab4a3e913d70bcbac2b56c872446f7"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:72dffbd8b4194858d0941062a9766f8297e8868e1dd07a7b36212aaa90f49472"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:30dcc86377618a4c8f3b72418df92e77be4254d8f89f14b8e8f57d6d43603c0f"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:31a34c508c003a4347d389a9e6fcc2307cc2150eb516462a7a17512130de109e"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:15208be1c50b99203fe88d15695f22a5bed95ab3f84354c494bcb1d08557df67"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1873aade94b74715be2246321c8650cabf5a0d098a95bab81145ffffa4c13876"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a58c98a7e9c021f357348867f537017057c2ed7f77337fd914d0bedb35dace7"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4686818798f9194d03c9129a4d9a702d9e113a89cb03bffe08c6cf799e053291"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ebdc36bea43063116f0486869652cb2ed7032dbc59fbcb4445c4862b5c1ecf7f"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:ca08decd2697fdea0aea364b370b1249d47336aec935f87b8bbfd7da5b2ee9c1"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ac05fb791acf5e1a3e39402641827780fe44d27e72567a000412c648a85ba860"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-win32.whl", hash = "sha256:9dba73be7305b399924709b91682299794887cbbd88e38226ed9f6712eabee90"}, + {file = "psycopg2_binary-2.9.9-cp39-cp39-win_amd64.whl", hash = "sha256:f7ae5d65ccfbebdfa761585228eb4d0df3a8b15cfb53bd953e713e09fbb12957"}, +] + +[[package]] +name = "pycodestyle" +version = "2.11.1" +description = "Python style guide checker" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pycodestyle-2.11.1-py2.py3-none-any.whl", hash = "sha256:44fe31000b2d866f2e41841b18528a505fbd7fef9017b04eff4e2648a0fadc67"}, + {file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"}, +] + +[[package]] +name = "pydantic" +version = "2.8.2" +description = "Data validation using Python type hints" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pydantic-2.8.2-py3-none-any.whl", hash = "sha256:73ee9fddd406dc318b885c7a2eab8a6472b68b8fb5ba8150949fc3db939f23c8"}, + {file = "pydantic-2.8.2.tar.gz", hash = "sha256:6f62c13d067b0755ad1c21a34bdd06c0c12625a22b0fc09c6b149816604f7c2a"}, +] + +[package.dependencies] +annotated-types = ">=0.4.0" +pydantic-core = "2.20.1" +typing-extensions = {version = ">=4.6.1", markers = "python_version < \"3.13\""} + +[package.extras] +email = ["email-validator (>=2.0.0)"] + +[[package]] +name = "pydantic-core" +version = "2.20.1" +description = "Core functionality for Pydantic validation and serialization" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pydantic_core-2.20.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3acae97ffd19bf091c72df4d726d552c473f3576409b2a7ca36b2f535ffff4a3"}, + {file = "pydantic_core-2.20.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:41f4c96227a67a013e7de5ff8f20fb496ce573893b7f4f2707d065907bffdbd6"}, + {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f239eb799a2081495ea659d8d4a43a8f42cd1fe9ff2e7e436295c38a10c286a"}, + {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:53e431da3fc53360db73eedf6f7124d1076e1b4ee4276b36fb25514544ceb4a3"}, + {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f1f62b2413c3a0e846c3b838b2ecd6c7a19ec6793b2a522745b0869e37ab5bc1"}, + {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d41e6daee2813ecceea8eda38062d69e280b39df793f5a942fa515b8ed67953"}, + {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d482efec8b7dc6bfaedc0f166b2ce349df0011f5d2f1f25537ced4cfc34fd98"}, + {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e93e1a4b4b33daed65d781a57a522ff153dcf748dee70b40c7258c5861e1768a"}, + {file = "pydantic_core-2.20.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e7c4ea22b6739b162c9ecaaa41d718dfad48a244909fe7ef4b54c0b530effc5a"}, + {file = "pydantic_core-2.20.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4f2790949cf385d985a31984907fecb3896999329103df4e4983a4a41e13e840"}, + {file = "pydantic_core-2.20.1-cp310-none-win32.whl", hash = "sha256:5e999ba8dd90e93d57410c5e67ebb67ffcaadcea0ad973240fdfd3a135506250"}, + {file = "pydantic_core-2.20.1-cp310-none-win_amd64.whl", hash = "sha256:512ecfbefef6dac7bc5eaaf46177b2de58cdf7acac8793fe033b24ece0b9566c"}, + {file = "pydantic_core-2.20.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d2a8fa9d6d6f891f3deec72f5cc668e6f66b188ab14bb1ab52422fe8e644f312"}, + {file = "pydantic_core-2.20.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:175873691124f3d0da55aeea1d90660a6ea7a3cfea137c38afa0a5ffabe37b88"}, + {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37eee5b638f0e0dcd18d21f59b679686bbd18917b87db0193ae36f9c23c355fc"}, + {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:25e9185e2d06c16ee438ed39bf62935ec436474a6ac4f9358524220f1b236e43"}, + {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:150906b40ff188a3260cbee25380e7494ee85048584998c1e66df0c7a11c17a6"}, + {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8ad4aeb3e9a97286573c03df758fc7627aecdd02f1da04516a86dc159bf70121"}, + {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d3f3ed29cd9f978c604708511a1f9c2fdcb6c38b9aae36a51905b8811ee5cbf1"}, + {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b0dae11d8f5ded51699c74d9548dcc5938e0804cc8298ec0aa0da95c21fff57b"}, + {file = "pydantic_core-2.20.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:faa6b09ee09433b87992fb5a2859efd1c264ddc37280d2dd5db502126d0e7f27"}, + {file = "pydantic_core-2.20.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9dc1b507c12eb0481d071f3c1808f0529ad41dc415d0ca11f7ebfc666e66a18b"}, + {file = "pydantic_core-2.20.1-cp311-none-win32.whl", hash = "sha256:fa2fddcb7107e0d1808086ca306dcade7df60a13a6c347a7acf1ec139aa6789a"}, + {file = "pydantic_core-2.20.1-cp311-none-win_amd64.whl", hash = "sha256:40a783fb7ee353c50bd3853e626f15677ea527ae556429453685ae32280c19c2"}, + {file = "pydantic_core-2.20.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:595ba5be69b35777474fa07f80fc260ea71255656191adb22a8c53aba4479231"}, + {file = "pydantic_core-2.20.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a4f55095ad087474999ee28d3398bae183a66be4823f753cd7d67dd0153427c9"}, + {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f9aa05d09ecf4c75157197f27cdc9cfaeb7c5f15021c6373932bf3e124af029f"}, + {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e97fdf088d4b31ff4ba35db26d9cc472ac7ef4a2ff2badeabf8d727b3377fc52"}, + {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bc633a9fe1eb87e250b5c57d389cf28998e4292336926b0b6cdaee353f89a237"}, + {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d573faf8eb7e6b1cbbcb4f5b247c60ca8be39fe2c674495df0eb4318303137fe"}, + {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26dc97754b57d2fd00ac2b24dfa341abffc380b823211994c4efac7f13b9e90e"}, + {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:33499e85e739a4b60c9dac710c20a08dc73cb3240c9a0e22325e671b27b70d24"}, + {file = "pydantic_core-2.20.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bebb4d6715c814597f85297c332297c6ce81e29436125ca59d1159b07f423eb1"}, + {file = "pydantic_core-2.20.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:516d9227919612425c8ef1c9b869bbbee249bc91912c8aaffb66116c0b447ebd"}, + {file = "pydantic_core-2.20.1-cp312-none-win32.whl", hash = "sha256:469f29f9093c9d834432034d33f5fe45699e664f12a13bf38c04967ce233d688"}, + {file = "pydantic_core-2.20.1-cp312-none-win_amd64.whl", hash = "sha256:035ede2e16da7281041f0e626459bcae33ed998cca6a0a007a5ebb73414ac72d"}, + {file = "pydantic_core-2.20.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:0827505a5c87e8aa285dc31e9ec7f4a17c81a813d45f70b1d9164e03a813a686"}, + {file = "pydantic_core-2.20.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:19c0fa39fa154e7e0b7f82f88ef85faa2a4c23cc65aae2f5aea625e3c13c735a"}, + {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aa223cd1e36b642092c326d694d8bf59b71ddddc94cdb752bbbb1c5c91d833b"}, + {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c336a6d235522a62fef872c6295a42ecb0c4e1d0f1a3e500fe949415761b8a19"}, + {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7eb6a0587eded33aeefea9f916899d42b1799b7b14b8f8ff2753c0ac1741edac"}, + {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:70c8daf4faca8da5a6d655f9af86faf6ec2e1768f4b8b9d0226c02f3d6209703"}, + {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9fa4c9bf273ca41f940bceb86922a7667cd5bf90e95dbb157cbb8441008482c"}, + {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:11b71d67b4725e7e2a9f6e9c0ac1239bbc0c48cce3dc59f98635efc57d6dac83"}, + {file = "pydantic_core-2.20.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:270755f15174fb983890c49881e93f8f1b80f0b5e3a3cc1394a255706cabd203"}, + {file = "pydantic_core-2.20.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:c81131869240e3e568916ef4c307f8b99583efaa60a8112ef27a366eefba8ef0"}, + {file = "pydantic_core-2.20.1-cp313-none-win32.whl", hash = "sha256:b91ced227c41aa29c672814f50dbb05ec93536abf8f43cd14ec9521ea09afe4e"}, + {file = "pydantic_core-2.20.1-cp313-none-win_amd64.whl", hash = "sha256:65db0f2eefcaad1a3950f498aabb4875c8890438bc80b19362cf633b87a8ab20"}, + {file = "pydantic_core-2.20.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:4745f4ac52cc6686390c40eaa01d48b18997cb130833154801a442323cc78f91"}, + {file = "pydantic_core-2.20.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a8ad4c766d3f33ba8fd692f9aa297c9058970530a32c728a2c4bfd2616d3358b"}, + {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41e81317dd6a0127cabce83c0c9c3fbecceae981c8391e6f1dec88a77c8a569a"}, + {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:04024d270cf63f586ad41fff13fde4311c4fc13ea74676962c876d9577bcc78f"}, + {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eaad4ff2de1c3823fddf82f41121bdf453d922e9a238642b1dedb33c4e4f98ad"}, + {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:26ab812fa0c845df815e506be30337e2df27e88399b985d0bb4e3ecfe72df31c"}, + {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c5ebac750d9d5f2706654c638c041635c385596caf68f81342011ddfa1e5598"}, + {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2aafc5a503855ea5885559eae883978c9b6d8c8993d67766ee73d82e841300dd"}, + {file = "pydantic_core-2.20.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:4868f6bd7c9d98904b748a2653031fc9c2f85b6237009d475b1008bfaeb0a5aa"}, + {file = "pydantic_core-2.20.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:aa2f457b4af386254372dfa78a2eda2563680d982422641a85f271c859df1987"}, + {file = "pydantic_core-2.20.1-cp38-none-win32.whl", hash = "sha256:225b67a1f6d602de0ce7f6c1c3ae89a4aa25d3de9be857999e9124f15dab486a"}, + {file = "pydantic_core-2.20.1-cp38-none-win_amd64.whl", hash = "sha256:6b507132dcfc0dea440cce23ee2182c0ce7aba7054576efc65634f080dbe9434"}, + {file = "pydantic_core-2.20.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:b03f7941783b4c4a26051846dea594628b38f6940a2fdc0df00b221aed39314c"}, + {file = "pydantic_core-2.20.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1eedfeb6089ed3fad42e81a67755846ad4dcc14d73698c120a82e4ccf0f1f9f6"}, + {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:635fee4e041ab9c479e31edda27fcf966ea9614fff1317e280d99eb3e5ab6fe2"}, + {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:77bf3ac639c1ff567ae3b47f8d4cc3dc20f9966a2a6dd2311dcc055d3d04fb8a"}, + {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ed1b0132f24beeec5a78b67d9388656d03e6a7c837394f99257e2d55b461611"}, + {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c6514f963b023aeee506678a1cf821fe31159b925c4b76fe2afa94cc70b3222b"}, + {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10d4204d8ca33146e761c79f83cc861df20e7ae9f6487ca290a97702daf56006"}, + {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2d036c7187b9422ae5b262badb87a20a49eb6c5238b2004e96d4da1231badef1"}, + {file = "pydantic_core-2.20.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9ebfef07dbe1d93efb94b4700f2d278494e9162565a54f124c404a5656d7ff09"}, + {file = "pydantic_core-2.20.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6b9d9bb600328a1ce523ab4f454859e9d439150abb0906c5a1983c146580ebab"}, + {file = "pydantic_core-2.20.1-cp39-none-win32.whl", hash = "sha256:784c1214cb6dd1e3b15dd8b91b9a53852aed16671cc3fbe4786f4f1db07089e2"}, + {file = "pydantic_core-2.20.1-cp39-none-win_amd64.whl", hash = "sha256:d2fe69c5434391727efa54b47a1e7986bb0186e72a41b203df8f5b0a19a4f669"}, + {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a45f84b09ac9c3d35dfcf6a27fd0634d30d183205230a0ebe8373a0e8cfa0906"}, + {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d02a72df14dfdbaf228424573a07af10637bd490f0901cee872c4f434a735b94"}, + {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2b27e6af28f07e2f195552b37d7d66b150adbaa39a6d327766ffd695799780f"}, + {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:084659fac3c83fd674596612aeff6041a18402f1e1bc19ca39e417d554468482"}, + {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:242b8feb3c493ab78be289c034a1f659e8826e2233786e36f2893a950a719bb6"}, + {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:38cf1c40a921d05c5edc61a785c0ddb4bed67827069f535d794ce6bcded919fc"}, + {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e0bbdd76ce9aa5d4209d65f2b27fc6e5ef1312ae6c5333c26db3f5ade53a1e99"}, + {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:254ec27fdb5b1ee60684f91683be95e5133c994cc54e86a0b0963afa25c8f8a6"}, + {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:407653af5617f0757261ae249d3fba09504d7a71ab36ac057c938572d1bc9331"}, + {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:c693e916709c2465b02ca0ad7b387c4f8423d1db7b4649c551f27a529181c5ad"}, + {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b5ff4911aea936a47d9376fd3ab17e970cc543d1b68921886e7f64bd28308d1"}, + {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:177f55a886d74f1808763976ac4efd29b7ed15c69f4d838bbd74d9d09cf6fa86"}, + {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:964faa8a861d2664f0c7ab0c181af0bea66098b1919439815ca8803ef136fc4e"}, + {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:4dd484681c15e6b9a977c785a345d3e378d72678fd5f1f3c0509608da24f2ac0"}, + {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f6d6cff3538391e8486a431569b77921adfcdef14eb18fbf19b7c0a5294d4e6a"}, + {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a6d511cc297ff0883bc3708b465ff82d7560193169a8b93260f74ecb0a5e08a7"}, + {file = "pydantic_core-2.20.1.tar.gz", hash = "sha256:26ca695eeee5f9f1aeeb211ffc12f10bcb6f71e2989988fda61dabd65db878d4"}, +] + +[package.dependencies] +typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" + +[[package]] +name = "pyflakes" +version = "3.1.0" +description = "passive checker of Python programs" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pyflakes-3.1.0-py2.py3-none-any.whl", hash = "sha256:4132f6d49cb4dae6819e5379898f2b8cce3c5f23994194c24b77d5da2e36f774"}, + {file = "pyflakes-3.1.0.tar.gz", hash = "sha256:a0aae034c444db0071aa077972ba4768d40c830d9539fd45bf4cd3f8f6992efc"}, +] + +[[package]] +name = "pypdf" +version = "4.2.0" +description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pypdf-4.2.0-py3-none-any.whl", hash = "sha256:dc035581664e0ad717e3492acebc1a5fc23dba759e788e3d4a9fc9b1a32e72c1"}, + {file = "pypdf-4.2.0.tar.gz", hash = "sha256:fe63f3f7d1dcda1c9374421a94c1bba6c6f8c4a62173a59b64ffd52058f846b1"}, +] + +[package.dependencies] +typing_extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} + +[package.extras] +crypto = ["PyCryptodome", "cryptography"] +dev = ["black", "flit", "pip-tools", "pre-commit (<2.18.0)", "pytest-cov", "pytest-socket", "pytest-timeout", "pytest-xdist", "wheel"] +docs = ["myst_parser", "sphinx", "sphinx_rtd_theme"] +full = ["Pillow (>=8.0.0)", "PyCryptodome", "cryptography"] +image = ["Pillow (>=8.0.0)"] + +[[package]] +name = "pytest" +version = "8.2.2" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-8.2.2-py3-none-any.whl", hash = "sha256:c434598117762e2bd304e526244f67bf66bbd7b5d6cf22138be51ff661980343"}, + {file = "pytest-8.2.2.tar.gz", hash = "sha256:de4bb8104e201939ccdc688b27a89a7be2079b22e2bd2b07f806b6ba71117977"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=1.5,<2.0" +tomli = {version = ">=1", markers = "python_version < \"3.11\""} + +[package.extras] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "pytest-asyncio" +version = "0.23.7" +description = "Pytest support for asyncio" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest_asyncio-0.23.7-py3-none-any.whl", hash = "sha256:009b48127fbe44518a547bddd25611551b0e43ccdbf1e67d12479f569832c20b"}, + {file = "pytest_asyncio-0.23.7.tar.gz", hash = "sha256:5f5c72948f4c49e7db4f29f2521d4031f1c27f86e57b046126654083d4770268"}, +] + +[package.dependencies] +pytest = ">=7.0.0,<9" + +[package.extras] +docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"] +testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"] + +[[package]] +name = "pytest-cov" +version = "5.0.0" +description = "Pytest plugin for measuring coverage." +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-cov-5.0.0.tar.gz", hash = "sha256:5837b58e9f6ebd335b0f8060eecce69b662415b16dc503883a02f45dfeb14857"}, + {file = "pytest_cov-5.0.0-py3-none-any.whl", hash = "sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652"}, +] + +[package.dependencies] +coverage = {version = ">=5.2.1", extras = ["toml"]} +pytest = ">=4.6" + +[package.extras] +testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"] + +[[package]] +name = "pytest-dependency" +version = "0.6.0" +description = "Manage dependencies of tests" +optional = false +python-versions = ">=3.4" +files = [ + {file = "pytest-dependency-0.6.0.tar.gz", hash = "sha256:934b0e6a39d95995062c193f7eaeed8a8ffa06ff1bcef4b62b0dc74a708bacc1"}, +] + +[package.dependencies] +pytest = ">=3.7.0" +setuptools = "*" + +[[package]] +name = "pytest-mock" +version = "3.14.0" +description = "Thin-wrapper around the mock package for easier use with pytest" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-mock-3.14.0.tar.gz", hash = "sha256:2719255a1efeceadbc056d6bf3df3d1c5015530fb40cf347c0f9afac88410bd0"}, + {file = "pytest_mock-3.14.0-py3-none-any.whl", hash = "sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f"}, +] + +[package.dependencies] +pytest = ">=6.2.5" + +[package.extras] +dev = ["pre-commit", "pytest-asyncio", "tox"] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, + {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "python-docx" +version = "1.1.2" +description = "Create, read, and update Microsoft Word .docx files." +optional = false +python-versions = ">=3.7" +files = [ + {file = "python_docx-1.1.2-py3-none-any.whl", hash = "sha256:08c20d6058916fb19853fcf080f7f42b6270d89eac9fa5f8c15f691c0017fabe"}, + {file = "python_docx-1.1.2.tar.gz", hash = "sha256:0cf1f22e95b9002addca7948e16f2cd7acdfd498047f1941ca5d293db7762efd"}, +] + +[package.dependencies] +lxml = ">=3.1.0" +typing-extensions = ">=4.9.0" + +[[package]] +name = "python-dotenv" +version = "1.0.1" +description = "Read key-value pairs from a .env file and set them as environment variables" +optional = false +python-versions = ">=3.8" +files = [ + {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"}, + {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"}, +] + +[package.extras] +cli = ["click (>=5.0)"] + +[[package]] +name = "python-multipart" +version = "0.0.9" +description = "A streaming multipart parser for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "python_multipart-0.0.9-py3-none-any.whl", hash = "sha256:97ca7b8ea7b05f977dc3849c3ba99d51689822fab725c3703af7c866a0c2b215"}, + {file = "python_multipart-0.0.9.tar.gz", hash = "sha256:03f54688c663f1b7977105f021043b0793151e4cb1c1a9d4a11fc13d622c4026"}, +] + +[package.extras] +dev = ["atomicwrites (==1.4.1)", "attrs (==23.2.0)", "coverage (==7.4.1)", "hatch", "invoke (==2.2.0)", "more-itertools (==10.2.0)", "pbr (==6.0.0)", "pluggy (==1.4.0)", "py (==1.11.0)", "pytest (==8.0.0)", "pytest-cov (==4.1.0)", "pytest-timeout (==2.2.0)", "pyyaml (==6.0.1)", "ruff (==0.2.1)"] + +[[package]] +name = "python-pptx" +version = "0.6.23" +description = "Generate and manipulate Open XML PowerPoint (.pptx) files" +optional = false +python-versions = "*" +files = [ + {file = "python-pptx-0.6.23.tar.gz", hash = "sha256:587497ff28e779ab18dbb074f6d4052893c85dedc95ed75df319364f331fedee"}, + {file = "python_pptx-0.6.23-py3-none-any.whl", hash = "sha256:dd0527194627a2b7cc05f3ba23ecaa2d9a0d5ac9b6193a28ed1b7a716f4217d4"}, +] + +[package.dependencies] +lxml = ">=3.1.0" +Pillow = ">=3.3.2" +XlsxWriter = ">=0.5.7" + +[[package]] +name = "pytz" +version = "2024.1" +description = "World timezone definitions, modern and historical" +optional = false +python-versions = "*" +files = [ + {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"}, + {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"}, +] + +[[package]] +name = "pyyaml" +version = "6.0.1" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, + {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, + {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, + {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, + {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, + {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, + {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, + {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, + {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, + {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, + {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, + {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, +] + +[[package]] +name = "redis" +version = "5.0.7" +description = "Python client for Redis database and key-value store" +optional = false +python-versions = ">=3.7" +files = [ + {file = "redis-5.0.7-py3-none-any.whl", hash = "sha256:0e479e24da960c690be5d9b96d21f7b918a98c0cf49af3b6fafaa0753f93a0db"}, + {file = "redis-5.0.7.tar.gz", hash = "sha256:8f611490b93c8109b50adc317b31bfd84fff31def3475b92e7e80bf39f48175b"}, +] + +[package.dependencies] +async-timeout = {version = ">=4.0.3", markers = "python_full_version < \"3.11.3\""} + +[package.extras] +hiredis = ["hiredis (>=1.0.0)"] +ocsp = ["cryptography (>=36.0.1)", "pyopenssl (==20.0.1)", "requests (>=2.26.0)"] + +[[package]] +name = "regex" +version = "2024.5.15" +description = "Alternative regular expression module, to replace re." +optional = false +python-versions = ">=3.8" +files = [ + {file = "regex-2024.5.15-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a81e3cfbae20378d75185171587cbf756015ccb14840702944f014e0d93ea09f"}, + {file = "regex-2024.5.15-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7b59138b219ffa8979013be7bc85bb60c6f7b7575df3d56dc1e403a438c7a3f6"}, + {file = "regex-2024.5.15-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a0bd000c6e266927cb7a1bc39d55be95c4b4f65c5be53e659537537e019232b1"}, + {file = "regex-2024.5.15-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5eaa7ddaf517aa095fa8da0b5015c44d03da83f5bd49c87961e3c997daed0de7"}, + {file = "regex-2024.5.15-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ba68168daedb2c0bab7fd7e00ced5ba90aebf91024dea3c88ad5063c2a562cca"}, + {file = "regex-2024.5.15-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6e8d717bca3a6e2064fc3a08df5cbe366369f4b052dcd21b7416e6d71620dca1"}, + {file = "regex-2024.5.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1337b7dbef9b2f71121cdbf1e97e40de33ff114801263b275aafd75303bd62b5"}, + {file = "regex-2024.5.15-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f9ebd0a36102fcad2f03696e8af4ae682793a5d30b46c647eaf280d6cfb32796"}, + {file = "regex-2024.5.15-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:9efa1a32ad3a3ea112224897cdaeb6aa00381627f567179c0314f7b65d354c62"}, + {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:1595f2d10dff3d805e054ebdc41c124753631b6a471b976963c7b28543cf13b0"}, + {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b802512f3e1f480f41ab5f2cfc0e2f761f08a1f41092d6718868082fc0d27143"}, + {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:a0981022dccabca811e8171f913de05720590c915b033b7e601f35ce4ea7019f"}, + {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:19068a6a79cf99a19ccefa44610491e9ca02c2be3305c7760d3831d38a467a6f"}, + {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1b5269484f6126eee5e687785e83c6b60aad7663dafe842b34691157e5083e53"}, + {file = "regex-2024.5.15-cp310-cp310-win32.whl", hash = "sha256:ada150c5adfa8fbcbf321c30c751dc67d2f12f15bd183ffe4ec7cde351d945b3"}, + {file = "regex-2024.5.15-cp310-cp310-win_amd64.whl", hash = "sha256:ac394ff680fc46b97487941f5e6ae49a9f30ea41c6c6804832063f14b2a5a145"}, + {file = "regex-2024.5.15-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f5b1dff3ad008dccf18e652283f5e5339d70bf8ba7c98bf848ac33db10f7bc7a"}, + {file = "regex-2024.5.15-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c6a2b494a76983df8e3d3feea9b9ffdd558b247e60b92f877f93a1ff43d26656"}, + {file = "regex-2024.5.15-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a32b96f15c8ab2e7d27655969a23895eb799de3665fa94349f3b2fbfd547236f"}, + {file = "regex-2024.5.15-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:10002e86e6068d9e1c91eae8295ef690f02f913c57db120b58fdd35a6bb1af35"}, + {file = "regex-2024.5.15-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ec54d5afa89c19c6dd8541a133be51ee1017a38b412b1321ccb8d6ddbeb4cf7d"}, + {file = "regex-2024.5.15-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:10e4ce0dca9ae7a66e6089bb29355d4432caed736acae36fef0fdd7879f0b0cb"}, + {file = "regex-2024.5.15-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e507ff1e74373c4d3038195fdd2af30d297b4f0950eeda6f515ae3d84a1770f"}, + {file = "regex-2024.5.15-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d1f059a4d795e646e1c37665b9d06062c62d0e8cc3c511fe01315973a6542e40"}, + {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0721931ad5fe0dda45d07f9820b90b2148ccdd8e45bb9e9b42a146cb4f695649"}, + {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:833616ddc75ad595dee848ad984d067f2f31be645d603e4d158bba656bbf516c"}, + {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:287eb7f54fc81546346207c533ad3c2c51a8d61075127d7f6d79aaf96cdee890"}, + {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:19dfb1c504781a136a80ecd1fff9f16dddf5bb43cec6871778c8a907a085bb3d"}, + {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:119af6e56dce35e8dfb5222573b50c89e5508d94d55713c75126b753f834de68"}, + {file = "regex-2024.5.15-cp311-cp311-win32.whl", hash = "sha256:1c1c174d6ec38d6c8a7504087358ce9213d4332f6293a94fbf5249992ba54efa"}, + {file = "regex-2024.5.15-cp311-cp311-win_amd64.whl", hash = "sha256:9e717956dcfd656f5055cc70996ee2cc82ac5149517fc8e1b60261b907740201"}, + {file = "regex-2024.5.15-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:632b01153e5248c134007209b5c6348a544ce96c46005d8456de1d552455b014"}, + {file = "regex-2024.5.15-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e64198f6b856d48192bf921421fdd8ad8eb35e179086e99e99f711957ffedd6e"}, + {file = "regex-2024.5.15-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68811ab14087b2f6e0fc0c2bae9ad689ea3584cad6917fc57be6a48bbd012c49"}, + {file = "regex-2024.5.15-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8ec0c2fea1e886a19c3bee0cd19d862b3aa75dcdfb42ebe8ed30708df64687a"}, + {file = "regex-2024.5.15-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d0c0c0003c10f54a591d220997dd27d953cd9ccc1a7294b40a4be5312be8797b"}, + {file = "regex-2024.5.15-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2431b9e263af1953c55abbd3e2efca67ca80a3de8a0437cb58e2421f8184717a"}, + {file = "regex-2024.5.15-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a605586358893b483976cffc1723fb0f83e526e8f14c6e6614e75919d9862cf"}, + {file = "regex-2024.5.15-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:391d7f7f1e409d192dba8bcd42d3e4cf9e598f3979cdaed6ab11288da88cb9f2"}, + {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9ff11639a8d98969c863d4617595eb5425fd12f7c5ef6621a4b74b71ed8726d5"}, + {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4eee78a04e6c67e8391edd4dad3279828dd66ac4b79570ec998e2155d2e59fd5"}, + {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8fe45aa3f4aa57faabbc9cb46a93363edd6197cbc43523daea044e9ff2fea83e"}, + {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:d0a3d8d6acf0c78a1fff0e210d224b821081330b8524e3e2bc5a68ef6ab5803d"}, + {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c486b4106066d502495b3025a0a7251bf37ea9540433940a23419461ab9f2a80"}, + {file = "regex-2024.5.15-cp312-cp312-win32.whl", hash = "sha256:c49e15eac7c149f3670b3e27f1f28a2c1ddeccd3a2812cba953e01be2ab9b5fe"}, + {file = "regex-2024.5.15-cp312-cp312-win_amd64.whl", hash = "sha256:673b5a6da4557b975c6c90198588181029c60793835ce02f497ea817ff647cb2"}, + {file = "regex-2024.5.15-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:87e2a9c29e672fc65523fb47a90d429b70ef72b901b4e4b1bd42387caf0d6835"}, + {file = "regex-2024.5.15-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c3bea0ba8b73b71b37ac833a7f3fd53825924165da6a924aec78c13032f20850"}, + {file = "regex-2024.5.15-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bfc4f82cabe54f1e7f206fd3d30fda143f84a63fe7d64a81558d6e5f2e5aaba9"}, + {file = "regex-2024.5.15-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5bb9425fe881d578aeca0b2b4b3d314ec88738706f66f219c194d67179337cb"}, + {file = "regex-2024.5.15-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:64c65783e96e563103d641760664125e91bd85d8e49566ee560ded4da0d3e704"}, + {file = "regex-2024.5.15-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cf2430df4148b08fb4324b848672514b1385ae3807651f3567871f130a728cc3"}, + {file = "regex-2024.5.15-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5397de3219a8b08ae9540c48f602996aa6b0b65d5a61683e233af8605c42b0f2"}, + {file = "regex-2024.5.15-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:455705d34b4154a80ead722f4f185b04c4237e8e8e33f265cd0798d0e44825fa"}, + {file = "regex-2024.5.15-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b2b6f1b3bb6f640c1a92be3bbfbcb18657b125b99ecf141fb3310b5282c7d4ed"}, + {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:3ad070b823ca5890cab606c940522d05d3d22395d432f4aaaf9d5b1653e47ced"}, + {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:5b5467acbfc153847d5adb21e21e29847bcb5870e65c94c9206d20eb4e99a384"}, + {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:e6662686aeb633ad65be2a42b4cb00178b3fbf7b91878f9446075c404ada552f"}, + {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:2b4c884767504c0e2401babe8b5b7aea9148680d2e157fa28f01529d1f7fcf67"}, + {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:3cd7874d57f13bf70078f1ff02b8b0aa48d5b9ed25fc48547516c6aba36f5741"}, + {file = "regex-2024.5.15-cp38-cp38-win32.whl", hash = "sha256:e4682f5ba31f475d58884045c1a97a860a007d44938c4c0895f41d64481edbc9"}, + {file = "regex-2024.5.15-cp38-cp38-win_amd64.whl", hash = "sha256:d99ceffa25ac45d150e30bd9ed14ec6039f2aad0ffa6bb87a5936f5782fc1569"}, + {file = "regex-2024.5.15-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:13cdaf31bed30a1e1c2453ef6015aa0983e1366fad2667657dbcac7b02f67133"}, + {file = "regex-2024.5.15-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cac27dcaa821ca271855a32188aa61d12decb6fe45ffe3e722401fe61e323cd1"}, + {file = "regex-2024.5.15-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7dbe2467273b875ea2de38ded4eba86cbcbc9a1a6d0aa11dcf7bd2e67859c435"}, + {file = "regex-2024.5.15-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64f18a9a3513a99c4bef0e3efd4c4a5b11228b48aa80743be822b71e132ae4f5"}, + {file = "regex-2024.5.15-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d347a741ea871c2e278fde6c48f85136c96b8659b632fb57a7d1ce1872547600"}, + {file = "regex-2024.5.15-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1878b8301ed011704aea4c806a3cadbd76f84dece1ec09cc9e4dc934cfa5d4da"}, + {file = "regex-2024.5.15-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4babf07ad476aaf7830d77000874d7611704a7fcf68c9c2ad151f5d94ae4bfc4"}, + {file = "regex-2024.5.15-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:35cb514e137cb3488bce23352af3e12fb0dbedd1ee6e60da053c69fb1b29cc6c"}, + {file = "regex-2024.5.15-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cdd09d47c0b2efee9378679f8510ee6955d329424c659ab3c5e3a6edea696294"}, + {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:72d7a99cd6b8f958e85fc6ca5b37c4303294954eac1376535b03c2a43eb72629"}, + {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:a094801d379ab20c2135529948cb84d417a2169b9bdceda2a36f5f10977ebc16"}, + {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:c0c18345010870e58238790a6779a1219b4d97bd2e77e1140e8ee5d14df071aa"}, + {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:16093f563098448ff6b1fa68170e4acbef94e6b6a4e25e10eae8598bb1694b5d"}, + {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e38a7d4e8f633a33b4c7350fbd8bad3b70bf81439ac67ac38916c4a86b465456"}, + {file = "regex-2024.5.15-cp39-cp39-win32.whl", hash = "sha256:71a455a3c584a88f654b64feccc1e25876066c4f5ef26cd6dd711308aa538694"}, + {file = "regex-2024.5.15-cp39-cp39-win_amd64.whl", hash = "sha256:cab12877a9bdafde5500206d1020a584355a97884dfd388af3699e9137bf7388"}, + {file = "regex-2024.5.15.tar.gz", hash = "sha256:d3ee02d9e5f482cc8309134a91eeaacbdd2261ba111b0fef3748eeb4913e6a2c"}, +] + +[[package]] +name = "requests" +version = "2.32.3" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.8" +files = [ + {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, + {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "safetensors" +version = "0.4.3" +description = "" +optional = true +python-versions = ">=3.7" +files = [ + {file = "safetensors-0.4.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:dcf5705cab159ce0130cd56057f5f3425023c407e170bca60b4868048bae64fd"}, + {file = "safetensors-0.4.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:bb4f8c5d0358a31e9a08daeebb68f5e161cdd4018855426d3f0c23bb51087055"}, + {file = "safetensors-0.4.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70a5319ef409e7f88686a46607cbc3c428271069d8b770076feaf913664a07ac"}, + {file = "safetensors-0.4.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fb9c65bd82f9ef3ce4970dc19ee86be5f6f93d032159acf35e663c6bea02b237"}, + {file = "safetensors-0.4.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:edb5698a7bc282089f64c96c477846950358a46ede85a1c040e0230344fdde10"}, + {file = "safetensors-0.4.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:efcc860be094b8d19ac61b452ec635c7acb9afa77beb218b1d7784c6d41fe8ad"}, + {file = "safetensors-0.4.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d88b33980222085dd6001ae2cad87c6068e0991d4f5ccf44975d216db3b57376"}, + {file = "safetensors-0.4.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5fc6775529fb9f0ce2266edd3e5d3f10aab068e49f765e11f6f2a63b5367021d"}, + {file = "safetensors-0.4.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9c6ad011c1b4e3acff058d6b090f1da8e55a332fbf84695cf3100c649cc452d1"}, + {file = "safetensors-0.4.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8c496c5401c1b9c46d41a7688e8ff5b0310a3b9bae31ce0f0ae870e1ea2b8caf"}, + {file = "safetensors-0.4.3-cp310-none-win32.whl", hash = "sha256:38e2a8666178224a51cca61d3cb4c88704f696eac8f72a49a598a93bbd8a4af9"}, + {file = "safetensors-0.4.3-cp310-none-win_amd64.whl", hash = "sha256:393e6e391467d1b2b829c77e47d726f3b9b93630e6a045b1d1fca67dc78bf632"}, + {file = "safetensors-0.4.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:22f3b5d65e440cec0de8edaa672efa888030802e11c09b3d6203bff60ebff05a"}, + {file = "safetensors-0.4.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c4fa560ebd4522adddb71dcd25d09bf211b5634003f015a4b815b7647d62ebe"}, + {file = "safetensors-0.4.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e9afd5358719f1b2cf425fad638fc3c887997d6782da317096877e5b15b2ce93"}, + {file = "safetensors-0.4.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d8c5093206ef4b198600ae484230402af6713dab1bd5b8e231905d754022bec7"}, + {file = "safetensors-0.4.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0b2104df1579d6ba9052c0ae0e3137c9698b2d85b0645507e6fd1813b70931a"}, + {file = "safetensors-0.4.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8cf18888606dad030455d18f6c381720e57fc6a4170ee1966adb7ebc98d4d6a3"}, + {file = "safetensors-0.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0bf4f9d6323d9f86eef5567eabd88f070691cf031d4c0df27a40d3b4aaee755b"}, + {file = "safetensors-0.4.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:585c9ae13a205807b63bef8a37994f30c917ff800ab8a1ca9c9b5d73024f97ee"}, + {file = "safetensors-0.4.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:faefeb3b81bdfb4e5a55b9bbdf3d8d8753f65506e1d67d03f5c851a6c87150e9"}, + {file = "safetensors-0.4.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:befdf0167ad626f22f6aac6163477fcefa342224a22f11fdd05abb3995c1783c"}, + {file = "safetensors-0.4.3-cp311-none-win32.whl", hash = "sha256:a7cef55929dcbef24af3eb40bedec35d82c3c2fa46338bb13ecf3c5720af8a61"}, + {file = "safetensors-0.4.3-cp311-none-win_amd64.whl", hash = "sha256:840b7ac0eff5633e1d053cc9db12fdf56b566e9403b4950b2dc85393d9b88d67"}, + {file = "safetensors-0.4.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:22d21760dc6ebae42e9c058d75aa9907d9f35e38f896e3c69ba0e7b213033856"}, + {file = "safetensors-0.4.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d22c1a10dff3f64d0d68abb8298a3fd88ccff79f408a3e15b3e7f637ef5c980"}, + {file = "safetensors-0.4.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1648568667f820b8c48317c7006221dc40aced1869908c187f493838a1362bc"}, + {file = "safetensors-0.4.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:446e9fe52c051aeab12aac63d1017e0f68a02a92a027b901c4f8e931b24e5397"}, + {file = "safetensors-0.4.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fef5d70683643618244a4f5221053567ca3e77c2531e42ad48ae05fae909f542"}, + {file = "safetensors-0.4.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2a1f4430cc0c9d6afa01214a4b3919d0a029637df8e09675ceef1ca3f0dfa0df"}, + {file = "safetensors-0.4.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d603846a8585b9432a0fd415db1d4c57c0f860eb4aea21f92559ff9902bae4d"}, + {file = "safetensors-0.4.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a844cdb5d7cbc22f5f16c7e2a0271170750763c4db08381b7f696dbd2c78a361"}, + {file = "safetensors-0.4.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:88887f69f7a00cf02b954cdc3034ffb383b2303bc0ab481d4716e2da51ddc10e"}, + {file = "safetensors-0.4.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ee463219d9ec6c2be1d331ab13a8e0cd50d2f32240a81d498266d77d07b7e71e"}, + {file = "safetensors-0.4.3-cp312-none-win32.whl", hash = "sha256:d0dd4a1db09db2dba0f94d15addc7e7cd3a7b0d393aa4c7518c39ae7374623c3"}, + {file = "safetensors-0.4.3-cp312-none-win_amd64.whl", hash = "sha256:d14d30c25897b2bf19b6fb5ff7e26cc40006ad53fd4a88244fdf26517d852dd7"}, + {file = "safetensors-0.4.3-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:d1456f814655b224d4bf6e7915c51ce74e389b413be791203092b7ff78c936dd"}, + {file = "safetensors-0.4.3-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:455d538aa1aae4a8b279344a08136d3f16334247907b18a5c3c7fa88ef0d3c46"}, + {file = "safetensors-0.4.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf476bca34e1340ee3294ef13e2c625833f83d096cfdf69a5342475602004f95"}, + {file = "safetensors-0.4.3-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:02ef3a24face643456020536591fbd3c717c5abaa2737ec428ccbbc86dffa7a4"}, + {file = "safetensors-0.4.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7de32d0d34b6623bb56ca278f90db081f85fb9c5d327e3c18fd23ac64f465768"}, + {file = "safetensors-0.4.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2a0deb16a1d3ea90c244ceb42d2c6c276059616be21a19ac7101aa97da448faf"}, + {file = "safetensors-0.4.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c59d51f182c729f47e841510b70b967b0752039f79f1de23bcdd86462a9b09ee"}, + {file = "safetensors-0.4.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1f598b713cc1a4eb31d3b3203557ac308acf21c8f41104cdd74bf640c6e538e3"}, + {file = "safetensors-0.4.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:5757e4688f20df083e233b47de43845d1adb7e17b6cf7da5f8444416fc53828d"}, + {file = "safetensors-0.4.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:fe746d03ed8d193674a26105e4f0fe6c726f5bb602ffc695b409eaf02f04763d"}, + {file = "safetensors-0.4.3-cp37-none-win32.whl", hash = "sha256:0d5ffc6a80f715c30af253e0e288ad1cd97a3d0086c9c87995e5093ebc075e50"}, + {file = "safetensors-0.4.3-cp37-none-win_amd64.whl", hash = "sha256:a11c374eb63a9c16c5ed146457241182f310902bd2a9c18255781bb832b6748b"}, + {file = "safetensors-0.4.3-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:b1e31be7945f66be23f4ec1682bb47faa3df34cb89fc68527de6554d3c4258a4"}, + {file = "safetensors-0.4.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:03a4447c784917c9bf01d8f2ac5080bc15c41692202cd5f406afba16629e84d6"}, + {file = "safetensors-0.4.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d244bcafeb1bc06d47cfee71727e775bca88a8efda77a13e7306aae3813fa7e4"}, + {file = "safetensors-0.4.3-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:53c4879b9c6bd7cd25d114ee0ef95420e2812e676314300624594940a8d6a91f"}, + {file = "safetensors-0.4.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:74707624b81f1b7f2b93f5619d4a9f00934d5948005a03f2c1845ffbfff42212"}, + {file = "safetensors-0.4.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0d52c958dc210265157573f81d34adf54e255bc2b59ded6218500c9b15a750eb"}, + {file = "safetensors-0.4.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f9568f380f513a60139971169c4a358b8731509cc19112369902eddb33faa4d"}, + {file = "safetensors-0.4.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0d9cd8e1560dfc514b6d7859247dc6a86ad2f83151a62c577428d5102d872721"}, + {file = "safetensors-0.4.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:89f9f17b0dacb913ed87d57afbc8aad85ea42c1085bd5de2f20d83d13e9fc4b2"}, + {file = "safetensors-0.4.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:1139eb436fd201c133d03c81209d39ac57e129f5e74e34bb9ab60f8d9b726270"}, + {file = "safetensors-0.4.3-cp38-none-win32.whl", hash = "sha256:d9c289f140a9ae4853fc2236a2ffc9a9f2d5eae0cb673167e0f1b8c18c0961ac"}, + {file = "safetensors-0.4.3-cp38-none-win_amd64.whl", hash = "sha256:622afd28968ef3e9786562d352659a37de4481a4070f4ebac883f98c5836563e"}, + {file = "safetensors-0.4.3-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:8651c7299cbd8b4161a36cd6a322fa07d39cd23535b144d02f1c1972d0c62f3c"}, + {file = "safetensors-0.4.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e375d975159ac534c7161269de24ddcd490df2157b55c1a6eeace6cbb56903f0"}, + {file = "safetensors-0.4.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:084fc436e317f83f7071fc6a62ca1c513b2103db325cd09952914b50f51cf78f"}, + {file = "safetensors-0.4.3-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:41a727a7f5e6ad9f1db6951adee21bbdadc632363d79dc434876369a17de6ad6"}, + {file = "safetensors-0.4.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e7dbbde64b6c534548696808a0e01276d28ea5773bc9a2dfb97a88cd3dffe3df"}, + {file = "safetensors-0.4.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bbae3b4b9d997971431c346edbfe6e41e98424a097860ee872721e176040a893"}, + {file = "safetensors-0.4.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01e4b22e3284cd866edeabe4f4d896229495da457229408d2e1e4810c5187121"}, + {file = "safetensors-0.4.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0dd37306546b58d3043eb044c8103a02792cc024b51d1dd16bd3dd1f334cb3ed"}, + {file = "safetensors-0.4.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d8815b5e1dac85fc534a97fd339e12404db557878c090f90442247e87c8aeaea"}, + {file = "safetensors-0.4.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e011cc162503c19f4b1fd63dfcddf73739c7a243a17dac09b78e57a00983ab35"}, + {file = "safetensors-0.4.3-cp39-none-win32.whl", hash = "sha256:01feb3089e5932d7e662eda77c3ecc389f97c0883c4a12b5cfdc32b589a811c3"}, + {file = "safetensors-0.4.3-cp39-none-win_amd64.whl", hash = "sha256:3f9cdca09052f585e62328c1c2923c70f46814715c795be65f0b93f57ec98a02"}, + {file = "safetensors-0.4.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:1b89381517891a7bb7d1405d828b2bf5d75528299f8231e9346b8eba092227f9"}, + {file = "safetensors-0.4.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:cd6fff9e56df398abc5866b19a32124815b656613c1c5ec0f9350906fd798aac"}, + {file = "safetensors-0.4.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:840caf38d86aa7014fe37ade5d0d84e23dcfbc798b8078015831996ecbc206a3"}, + {file = "safetensors-0.4.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9650713b2cfa9537a2baf7dd9fee458b24a0aaaa6cafcea8bdd5fb2b8efdc34"}, + {file = "safetensors-0.4.3-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e4119532cd10dba04b423e0f86aecb96cfa5a602238c0aa012f70c3a40c44b50"}, + {file = "safetensors-0.4.3-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:e066e8861eef6387b7c772344d1fe1f9a72800e04ee9a54239d460c400c72aab"}, + {file = "safetensors-0.4.3-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:90964917f5b0fa0fa07e9a051fbef100250c04d150b7026ccbf87a34a54012e0"}, + {file = "safetensors-0.4.3-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c41e1893d1206aa7054029681778d9a58b3529d4c807002c156d58426c225173"}, + {file = "safetensors-0.4.3-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ae7613a119a71a497d012ccc83775c308b9c1dab454806291427f84397d852fd"}, + {file = "safetensors-0.4.3-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f9bac020faba7f5dc481e881b14b6425265feabb5bfc552551d21189c0eddc3"}, + {file = "safetensors-0.4.3-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:420a98f593ff9930f5822560d14c395ccbc57342ddff3b463bc0b3d6b1951550"}, + {file = "safetensors-0.4.3-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f5e6883af9a68c0028f70a4c19d5a6ab6238a379be36ad300a22318316c00cb0"}, + {file = "safetensors-0.4.3-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:cdd0a3b5da66e7f377474599814dbf5cbf135ff059cc73694de129b58a5e8a2c"}, + {file = "safetensors-0.4.3-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9bfb92f82574d9e58401d79c70c716985dc049b635fef6eecbb024c79b2c46ad"}, + {file = "safetensors-0.4.3-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:3615a96dd2dcc30eb66d82bc76cda2565f4f7bfa89fcb0e31ba3cea8a1a9ecbb"}, + {file = "safetensors-0.4.3-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:868ad1b6fc41209ab6bd12f63923e8baeb1a086814cb2e81a65ed3d497e0cf8f"}, + {file = "safetensors-0.4.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7ffba80aa49bd09195145a7fd233a7781173b422eeb995096f2b30591639517"}, + {file = "safetensors-0.4.3-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c0acbe31340ab150423347e5b9cc595867d814244ac14218932a5cf1dd38eb39"}, + {file = "safetensors-0.4.3-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:19bbdf95de2cf64f25cd614c5236c8b06eb2cfa47cbf64311f4b5d80224623a3"}, + {file = "safetensors-0.4.3-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b852e47eb08475c2c1bd8131207b405793bfc20d6f45aff893d3baaad449ed14"}, + {file = "safetensors-0.4.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5d07cbca5b99babb692d76d8151bec46f461f8ad8daafbfd96b2fca40cadae65"}, + {file = "safetensors-0.4.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:1ab6527a20586d94291c96e00a668fa03f86189b8a9defa2cdd34a1a01acc7d5"}, + {file = "safetensors-0.4.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:02318f01e332cc23ffb4f6716e05a492c5f18b1d13e343c49265149396284a44"}, + {file = "safetensors-0.4.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec4b52ce9a396260eb9731eb6aea41a7320de22ed73a1042c2230af0212758ce"}, + {file = "safetensors-0.4.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:018b691383026a2436a22b648873ed11444a364324e7088b99cd2503dd828400"}, + {file = "safetensors-0.4.3-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:309b10dbcab63269ecbf0e2ca10ce59223bb756ca5d431ce9c9eeabd446569da"}, + {file = "safetensors-0.4.3-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b277482120df46e27a58082df06a15aebda4481e30a1c21eefd0921ae7e03f65"}, + {file = "safetensors-0.4.3.tar.gz", hash = "sha256:2f85fc50c4e07a21e95c24e07460fe6f7e2859d0ce88092838352b798ce711c2"}, +] + +[package.extras] +all = ["safetensors[jax]", "safetensors[numpy]", "safetensors[paddlepaddle]", "safetensors[pinned-tf]", "safetensors[quality]", "safetensors[testing]", "safetensors[torch]"] +dev = ["safetensors[all]"] +jax = ["flax (>=0.6.3)", "jax (>=0.3.25)", "jaxlib (>=0.3.25)", "safetensors[numpy]"] +mlx = ["mlx (>=0.0.9)"] +numpy = ["numpy (>=1.21.6)"] +paddlepaddle = ["paddlepaddle (>=2.4.1)", "safetensors[numpy]"] +pinned-tf = ["safetensors[numpy]", "tensorflow (==2.11.0)"] +quality = ["black (==22.3)", "click (==8.0.4)", "flake8 (>=3.8.3)", "isort (>=5.5.4)"] +tensorflow = ["safetensors[numpy]", "tensorflow (>=2.11.0)"] +testing = ["h5py (>=3.7.0)", "huggingface-hub (>=0.12.1)", "hypothesis (>=6.70.2)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "safetensors[numpy]", "setuptools-rust (>=1.5.2)"] +torch = ["safetensors[numpy]", "torch (>=1.10)"] + +[[package]] +name = "scikit-learn" +version = "1.5.1" +description = "A set of python modules for machine learning and data mining" +optional = true +python-versions = ">=3.9" +files = [ + {file = "scikit_learn-1.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:781586c414f8cc58e71da4f3d7af311e0505a683e112f2f62919e3019abd3745"}, + {file = "scikit_learn-1.5.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:f5b213bc29cc30a89a3130393b0e39c847a15d769d6e59539cd86b75d276b1a7"}, + {file = "scikit_learn-1.5.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ff4ba34c2abff5ec59c803ed1d97d61b036f659a17f55be102679e88f926fac"}, + {file = "scikit_learn-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:161808750c267b77b4a9603cf9c93579c7a74ba8486b1336034c2f1579546d21"}, + {file = "scikit_learn-1.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:10e49170691514a94bb2e03787aa921b82dbc507a4ea1f20fd95557862c98dc1"}, + {file = "scikit_learn-1.5.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:154297ee43c0b83af12464adeab378dee2d0a700ccd03979e2b821e7dd7cc1c2"}, + {file = "scikit_learn-1.5.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:b5e865e9bd59396220de49cb4a57b17016256637c61b4c5cc81aaf16bc123bbe"}, + {file = "scikit_learn-1.5.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:909144d50f367a513cee6090873ae582dba019cb3fca063b38054fa42704c3a4"}, + {file = "scikit_learn-1.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:689b6f74b2c880276e365fe84fe4f1befd6a774f016339c65655eaff12e10cbf"}, + {file = "scikit_learn-1.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:9a07f90846313a7639af6a019d849ff72baadfa4c74c778821ae0fad07b7275b"}, + {file = "scikit_learn-1.5.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5944ce1faada31c55fb2ba20a5346b88e36811aab504ccafb9f0339e9f780395"}, + {file = "scikit_learn-1.5.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:0828673c5b520e879f2af6a9e99eee0eefea69a2188be1ca68a6121b809055c1"}, + {file = "scikit_learn-1.5.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:508907e5f81390e16d754e8815f7497e52139162fd69c4fdbd2dfa5d6cc88915"}, + {file = "scikit_learn-1.5.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97625f217c5c0c5d0505fa2af28ae424bd37949bb2f16ace3ff5f2f81fb4498b"}, + {file = "scikit_learn-1.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:da3f404e9e284d2b0a157e1b56b6566a34eb2798205cba35a211df3296ab7a74"}, + {file = "scikit_learn-1.5.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:88e0672c7ac21eb149d409c74cc29f1d611d5158175846e7a9c2427bd12b3956"}, + {file = "scikit_learn-1.5.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:7b073a27797a283187a4ef4ee149959defc350b46cbf63a84d8514fe16b69855"}, + {file = "scikit_learn-1.5.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b59e3e62d2be870e5c74af4e793293753565c7383ae82943b83383fdcf5cc5c1"}, + {file = "scikit_learn-1.5.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bd8d3a19d4bd6dc5a7d4f358c8c3a60934dc058f363c34c0ac1e9e12a31421d"}, + {file = "scikit_learn-1.5.1-cp39-cp39-win_amd64.whl", hash = "sha256:5f57428de0c900a98389c4a433d4a3cf89de979b3aa24d1c1d251802aa15e44d"}, + {file = "scikit_learn-1.5.1.tar.gz", hash = "sha256:0ea5d40c0e3951df445721927448755d3fe1d80833b0b7308ebff5d2a45e6414"}, +] + +[package.dependencies] +joblib = ">=1.2.0" +numpy = ">=1.19.5" +scipy = ">=1.6.0" +threadpoolctl = ">=3.1.0" + +[package.extras] +benchmark = ["matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "pandas (>=1.1.5)"] +build = ["cython (>=3.0.10)", "meson-python (>=0.16.0)", "numpy (>=1.19.5)", "scipy (>=1.6.0)"] +docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "polars (>=0.20.23)", "pooch (>=1.6.0)", "pydata-sphinx-theme (>=0.15.3)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)", "sphinx (>=7.3.7)", "sphinx-copybutton (>=0.5.2)", "sphinx-design (>=0.5.0)", "sphinx-gallery (>=0.16.0)", "sphinx-prompt (>=1.4.0)", "sphinx-remove-toctrees (>=1.0.0.post1)", "sphinxcontrib-sass (>=0.3.4)", "sphinxext-opengraph (>=0.9.1)"] +examples = ["matplotlib (>=3.3.4)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)"] +install = ["joblib (>=1.2.0)", "numpy (>=1.19.5)", "scipy (>=1.6.0)", "threadpoolctl (>=3.1.0)"] +maintenance = ["conda-lock (==2.5.6)"] +tests = ["black (>=24.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.9)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "polars (>=0.20.23)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pyarrow (>=12.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.2.1)", "scikit-image (>=0.17.2)"] + +[[package]] +name = "scipy" +version = "1.13.1" +description = "Fundamental algorithms for scientific computing in Python" +optional = true +python-versions = ">=3.9" +files = [ + {file = "scipy-1.13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:20335853b85e9a49ff7572ab453794298bcf0354d8068c5f6775a0eabf350aca"}, + {file = "scipy-1.13.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:d605e9c23906d1994f55ace80e0125c587f96c020037ea6aa98d01b4bd2e222f"}, + {file = "scipy-1.13.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cfa31f1def5c819b19ecc3a8b52d28ffdcc7ed52bb20c9a7589669dd3c250989"}, + {file = "scipy-1.13.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f26264b282b9da0952a024ae34710c2aff7d27480ee91a2e82b7b7073c24722f"}, + {file = "scipy-1.13.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:eccfa1906eacc02de42d70ef4aecea45415f5be17e72b61bafcfd329bdc52e94"}, + {file = "scipy-1.13.1-cp310-cp310-win_amd64.whl", hash = "sha256:2831f0dc9c5ea9edd6e51e6e769b655f08ec6db6e2e10f86ef39bd32eb11da54"}, + {file = "scipy-1.13.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:27e52b09c0d3a1d5b63e1105f24177e544a222b43611aaf5bc44d4a0979e32f9"}, + {file = "scipy-1.13.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:54f430b00f0133e2224c3ba42b805bfd0086fe488835effa33fa291561932326"}, + {file = "scipy-1.13.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e89369d27f9e7b0884ae559a3a956e77c02114cc60a6058b4e5011572eea9299"}, + {file = "scipy-1.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a78b4b3345f1b6f68a763c6e25c0c9a23a9fd0f39f5f3d200efe8feda560a5fa"}, + {file = "scipy-1.13.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:45484bee6d65633752c490404513b9ef02475b4284c4cfab0ef946def50b3f59"}, + {file = "scipy-1.13.1-cp311-cp311-win_amd64.whl", hash = "sha256:5713f62f781eebd8d597eb3f88b8bf9274e79eeabf63afb4a737abc6c84ad37b"}, + {file = "scipy-1.13.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5d72782f39716b2b3509cd7c33cdc08c96f2f4d2b06d51e52fb45a19ca0c86a1"}, + {file = "scipy-1.13.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:017367484ce5498445aade74b1d5ab377acdc65e27095155e448c88497755a5d"}, + {file = "scipy-1.13.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:949ae67db5fa78a86e8fa644b9a6b07252f449dcf74247108c50e1d20d2b4627"}, + {file = "scipy-1.13.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de3ade0e53bc1f21358aa74ff4830235d716211d7d077e340c7349bc3542e884"}, + {file = "scipy-1.13.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2ac65fb503dad64218c228e2dc2d0a0193f7904747db43014645ae139c8fad16"}, + {file = "scipy-1.13.1-cp312-cp312-win_amd64.whl", hash = "sha256:cdd7dacfb95fea358916410ec61bbc20440f7860333aee6d882bb8046264e949"}, + {file = "scipy-1.13.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:436bbb42a94a8aeef855d755ce5a465479c721e9d684de76bf61a62e7c2b81d5"}, + {file = "scipy-1.13.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:8335549ebbca860c52bf3d02f80784e91a004b71b059e3eea9678ba994796a24"}, + {file = "scipy-1.13.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d533654b7d221a6a97304ab63c41c96473ff04459e404b83275b60aa8f4b7004"}, + {file = "scipy-1.13.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:637e98dcf185ba7f8e663e122ebf908c4702420477ae52a04f9908707456ba4d"}, + {file = "scipy-1.13.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a014c2b3697bde71724244f63de2476925596c24285c7a637364761f8710891c"}, + {file = "scipy-1.13.1-cp39-cp39-win_amd64.whl", hash = "sha256:392e4ec766654852c25ebad4f64e4e584cf19820b980bc04960bca0b0cd6eaa2"}, + {file = "scipy-1.13.1.tar.gz", hash = "sha256:095a87a0312b08dfd6a6155cbbd310a8c51800fc931b8c0b84003014b874ed3c"}, +] + +[package.dependencies] +numpy = ">=1.22.4,<2.3" + +[package.extras] +dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy", "pycodestyle", "pydevtool", "rich-click", "ruff", "types-psutil", "typing_extensions"] +doc = ["jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.12.0)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0)", "sphinx-design (>=0.4.0)"] +test = ["array-api-strict", "asv", "gmpy2", "hypothesis (>=6.30)", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] + +[[package]] +name = "sentence-transformers" +version = "2.7.0" +description = "Multilingual text embeddings" +optional = true +python-versions = ">=3.8.0" +files = [ + {file = "sentence_transformers-2.7.0-py3-none-any.whl", hash = "sha256:6a7276b05a95931581bbfa4ba49d780b2cf6904fa4a171ec7fd66c343f761c98"}, + {file = "sentence_transformers-2.7.0.tar.gz", hash = "sha256:2f7df99d1c021dded471ed2d079e9d1e4fc8e30ecb06f957be060511b36f24ea"}, +] + +[package.dependencies] +huggingface-hub = ">=0.15.1" +numpy = "*" +Pillow = "*" +scikit-learn = "*" +scipy = "*" +torch = ">=1.11.0" +tqdm = "*" +transformers = ">=4.34.0,<5.0.0" + +[package.extras] +dev = ["pre-commit", "pytest", "ruff (>=0.3.0)"] + +[[package]] +name = "setuptools" +version = "70.3.0" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "setuptools-70.3.0-py3-none-any.whl", hash = "sha256:fe384da74336c398e0d956d1cae0669bc02eed936cdb1d49b57de1990dc11ffc"}, + {file = "setuptools-70.3.0.tar.gz", hash = "sha256:f171bab1dfbc86b132997f26a119f6056a57950d058587841a0082e8830f9dc5"}, +] + +[package.extras] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "mypy (==1.10.0)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.3.2)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + +[[package]] +name = "sniffio" +version = "1.3.1" +description = "Sniff out which async library your code is running under" +optional = false +python-versions = ">=3.7" +files = [ + {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, + {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, +] + +[[package]] +name = "soupsieve" +version = "2.5" +description = "A modern CSS selector implementation for Beautiful Soup." +optional = false +python-versions = ">=3.8" +files = [ + {file = "soupsieve-2.5-py3-none-any.whl", hash = "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7"}, + {file = "soupsieve-2.5.tar.gz", hash = "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690"}, +] + +[[package]] +name = "sqlalchemy" +version = "2.0.31" +description = "Database Abstraction Library" +optional = false +python-versions = ">=3.7" +files = [ + {file = "SQLAlchemy-2.0.31-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f2a213c1b699d3f5768a7272de720387ae0122f1becf0901ed6eaa1abd1baf6c"}, + {file = "SQLAlchemy-2.0.31-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9fea3d0884e82d1e33226935dac990b967bef21315cbcc894605db3441347443"}, + {file = "SQLAlchemy-2.0.31-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3ad7f221d8a69d32d197e5968d798217a4feebe30144986af71ada8c548e9fa"}, + {file = "SQLAlchemy-2.0.31-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f2bee229715b6366f86a95d497c347c22ddffa2c7c96143b59a2aa5cc9eebbc"}, + {file = "SQLAlchemy-2.0.31-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:cd5b94d4819c0c89280b7c6109c7b788a576084bf0a480ae17c227b0bc41e109"}, + {file = "SQLAlchemy-2.0.31-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:750900a471d39a7eeba57580b11983030517a1f512c2cb287d5ad0fcf3aebd58"}, + {file = "SQLAlchemy-2.0.31-cp310-cp310-win32.whl", hash = "sha256:7bd112be780928c7f493c1a192cd8c5fc2a2a7b52b790bc5a84203fb4381c6be"}, + {file = "SQLAlchemy-2.0.31-cp310-cp310-win_amd64.whl", hash = "sha256:5a48ac4d359f058474fadc2115f78a5cdac9988d4f99eae44917f36aa1476327"}, + {file = "SQLAlchemy-2.0.31-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f68470edd70c3ac3b6cd5c2a22a8daf18415203ca1b036aaeb9b0fb6f54e8298"}, + {file = "SQLAlchemy-2.0.31-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2e2c38c2a4c5c634fe6c3c58a789712719fa1bf9b9d6ff5ebfce9a9e5b89c1ca"}, + {file = "SQLAlchemy-2.0.31-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd15026f77420eb2b324dcb93551ad9c5f22fab2c150c286ef1dc1160f110203"}, + {file = "SQLAlchemy-2.0.31-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2196208432deebdfe3b22185d46b08f00ac9d7b01284e168c212919891289396"}, + {file = "SQLAlchemy-2.0.31-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:352b2770097f41bff6029b280c0e03b217c2dcaddc40726f8f53ed58d8a85da4"}, + {file = "SQLAlchemy-2.0.31-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:56d51ae825d20d604583f82c9527d285e9e6d14f9a5516463d9705dab20c3740"}, + {file = "SQLAlchemy-2.0.31-cp311-cp311-win32.whl", hash = "sha256:6e2622844551945db81c26a02f27d94145b561f9d4b0c39ce7bfd2fda5776dac"}, + {file = "SQLAlchemy-2.0.31-cp311-cp311-win_amd64.whl", hash = "sha256:ccaf1b0c90435b6e430f5dd30a5aede4764942a695552eb3a4ab74ed63c5b8d3"}, + {file = "SQLAlchemy-2.0.31-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3b74570d99126992d4b0f91fb87c586a574a5872651185de8297c6f90055ae42"}, + {file = "SQLAlchemy-2.0.31-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6f77c4f042ad493cb8595e2f503c7a4fe44cd7bd59c7582fd6d78d7e7b8ec52c"}, + {file = "SQLAlchemy-2.0.31-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd1591329333daf94467e699e11015d9c944f44c94d2091f4ac493ced0119449"}, + {file = "SQLAlchemy-2.0.31-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:74afabeeff415e35525bf7a4ecdab015f00e06456166a2eba7590e49f8db940e"}, + {file = "SQLAlchemy-2.0.31-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b9c01990d9015df2c6f818aa8f4297d42ee71c9502026bb074e713d496e26b67"}, + {file = "SQLAlchemy-2.0.31-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:66f63278db425838b3c2b1c596654b31939427016ba030e951b292e32b99553e"}, + {file = "SQLAlchemy-2.0.31-cp312-cp312-win32.whl", hash = "sha256:0b0f658414ee4e4b8cbcd4a9bb0fd743c5eeb81fc858ca517217a8013d282c96"}, + {file = "SQLAlchemy-2.0.31-cp312-cp312-win_amd64.whl", hash = "sha256:fa4b1af3e619b5b0b435e333f3967612db06351217c58bfb50cee5f003db2a5a"}, + {file = "SQLAlchemy-2.0.31-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:f43e93057cf52a227eda401251c72b6fbe4756f35fa6bfebb5d73b86881e59b0"}, + {file = "SQLAlchemy-2.0.31-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d337bf94052856d1b330d5fcad44582a30c532a2463776e1651bd3294ee7e58b"}, + {file = "SQLAlchemy-2.0.31-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c06fb43a51ccdff3b4006aafee9fcf15f63f23c580675f7734245ceb6b6a9e05"}, + {file = "SQLAlchemy-2.0.31-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:b6e22630e89f0e8c12332b2b4c282cb01cf4da0d26795b7eae16702a608e7ca1"}, + {file = "SQLAlchemy-2.0.31-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:79a40771363c5e9f3a77f0e28b3302801db08040928146e6808b5b7a40749c88"}, + {file = "SQLAlchemy-2.0.31-cp37-cp37m-win32.whl", hash = "sha256:501ff052229cb79dd4c49c402f6cb03b5a40ae4771efc8bb2bfac9f6c3d3508f"}, + {file = "SQLAlchemy-2.0.31-cp37-cp37m-win_amd64.whl", hash = "sha256:597fec37c382a5442ffd471f66ce12d07d91b281fd474289356b1a0041bdf31d"}, + {file = "SQLAlchemy-2.0.31-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:dc6d69f8829712a4fd799d2ac8d79bdeff651c2301b081fd5d3fe697bd5b4ab9"}, + {file = "SQLAlchemy-2.0.31-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:23b9fbb2f5dd9e630db70fbe47d963c7779e9c81830869bd7d137c2dc1ad05fb"}, + {file = "SQLAlchemy-2.0.31-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2a21c97efcbb9f255d5c12a96ae14da873233597dfd00a3a0c4ce5b3e5e79704"}, + {file = "SQLAlchemy-2.0.31-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26a6a9837589c42b16693cf7bf836f5d42218f44d198f9343dd71d3164ceeeac"}, + {file = "SQLAlchemy-2.0.31-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:dc251477eae03c20fae8db9c1c23ea2ebc47331bcd73927cdcaecd02af98d3c3"}, + {file = "SQLAlchemy-2.0.31-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:2fd17e3bb8058359fa61248c52c7b09a97cf3c820e54207a50af529876451808"}, + {file = "SQLAlchemy-2.0.31-cp38-cp38-win32.whl", hash = "sha256:c76c81c52e1e08f12f4b6a07af2b96b9b15ea67ccdd40ae17019f1c373faa227"}, + {file = "SQLAlchemy-2.0.31-cp38-cp38-win_amd64.whl", hash = "sha256:4b600e9a212ed59355813becbcf282cfda5c93678e15c25a0ef896b354423238"}, + {file = "SQLAlchemy-2.0.31-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b6cf796d9fcc9b37011d3f9936189b3c8074a02a4ed0c0fbbc126772c31a6d4"}, + {file = "SQLAlchemy-2.0.31-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:78fe11dbe37d92667c2c6e74379f75746dc947ee505555a0197cfba9a6d4f1a4"}, + {file = "SQLAlchemy-2.0.31-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2fc47dc6185a83c8100b37acda27658fe4dbd33b7d5e7324111f6521008ab4fe"}, + {file = "SQLAlchemy-2.0.31-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a41514c1a779e2aa9a19f67aaadeb5cbddf0b2b508843fcd7bafdf4c6864005"}, + {file = "SQLAlchemy-2.0.31-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:afb6dde6c11ea4525318e279cd93c8734b795ac8bb5dda0eedd9ebaca7fa23f1"}, + {file = "SQLAlchemy-2.0.31-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:3f9faef422cfbb8fd53716cd14ba95e2ef655400235c3dfad1b5f467ba179c8c"}, + {file = "SQLAlchemy-2.0.31-cp39-cp39-win32.whl", hash = "sha256:fc6b14e8602f59c6ba893980bea96571dd0ed83d8ebb9c4479d9ed5425d562e9"}, + {file = "SQLAlchemy-2.0.31-cp39-cp39-win_amd64.whl", hash = "sha256:3cb8a66b167b033ec72c3812ffc8441d4e9f5f78f5e31e54dcd4c90a4ca5bebc"}, + {file = "SQLAlchemy-2.0.31-py3-none-any.whl", hash = "sha256:69f3e3c08867a8e4856e92d7afb618b95cdee18e0bc1647b77599722c9a28911"}, + {file = "SQLAlchemy-2.0.31.tar.gz", hash = "sha256:b607489dd4a54de56984a0c7656247504bd5523d9d0ba799aef59d4add009484"}, +] + +[package.dependencies] +greenlet = {version = "!=0.4.17", markers = "python_version < \"3.13\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} +typing-extensions = ">=4.6.0" + +[package.extras] +aiomysql = ["aiomysql (>=0.2.0)", "greenlet (!=0.4.17)"] +aioodbc = ["aioodbc", "greenlet (!=0.4.17)"] +aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing_extensions (!=3.10.0.1)"] +asyncio = ["greenlet (!=0.4.17)"] +asyncmy = ["asyncmy (>=0.2.3,!=0.2.4,!=0.2.6)", "greenlet (!=0.4.17)"] +mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2,!=1.1.5)"] +mssql = ["pyodbc"] +mssql-pymssql = ["pymssql"] +mssql-pyodbc = ["pyodbc"] +mypy = ["mypy (>=0.910)"] +mysql = ["mysqlclient (>=1.4.0)"] +mysql-connector = ["mysql-connector-python"] +oracle = ["cx_oracle (>=8)"] +oracle-oracledb = ["oracledb (>=1.0.1)"] +postgresql = ["psycopg2 (>=2.7)"] +postgresql-asyncpg = ["asyncpg", "greenlet (!=0.4.17)"] +postgresql-pg8000 = ["pg8000 (>=1.29.1)"] +postgresql-psycopg = ["psycopg (>=3.0.7)"] +postgresql-psycopg2binary = ["psycopg2-binary"] +postgresql-psycopg2cffi = ["psycopg2cffi"] +postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"] +pymysql = ["pymysql"] +sqlcipher = ["sqlcipher3_binary"] + +[[package]] +name = "starlette" +version = "0.36.3" +description = "The little ASGI library that shines." +optional = false +python-versions = ">=3.8" +files = [ + {file = "starlette-0.36.3-py3-none-any.whl", hash = "sha256:13d429aa93a61dc40bf503e8c801db1f1bca3dc706b10ef2434a36123568f044"}, + {file = "starlette-0.36.3.tar.gz", hash = "sha256:90a671733cfb35771d8cc605e0b679d23b992f8dcfad48cc60b38cb29aeb7080"}, +] + +[package.dependencies] +anyio = ">=3.4.0,<5" +typing-extensions = {version = ">=3.10.0", markers = "python_version < \"3.10\""} + +[package.extras] +full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.7)", "pyyaml"] + +[[package]] +name = "sympy" +version = "1.13.0" +description = "Computer algebra system (CAS) in Python" +optional = true +python-versions = ">=3.8" +files = [ + {file = "sympy-1.13.0-py3-none-any.whl", hash = "sha256:6b0b32a4673fb91bd3cac3b55406c8e01d53ae22780be467301cc452f6680c92"}, + {file = "sympy-1.13.0.tar.gz", hash = "sha256:3b6af8f4d008b9a1a6a4268b335b984b23835f26d1d60b0526ebc71d48a25f57"}, +] + +[package.dependencies] +mpmath = ">=1.1.0,<1.4" + +[package.extras] +dev = ["hypothesis (>=6.70.0)", "pytest (>=7.1.0)"] + +[[package]] +name = "tbb" +version = "2021.13.0" +description = "Intel® oneAPI Threading Building Blocks (oneTBB)" +optional = true +python-versions = "*" +files = [ + {file = "tbb-2021.13.0-py2.py3-none-manylinux1_i686.whl", hash = "sha256:a2567725329639519d46d92a2634cf61e76601dac2f777a05686fea546c4fe4f"}, + {file = "tbb-2021.13.0-py2.py3-none-manylinux1_x86_64.whl", hash = "sha256:aaf667e92849adb012b8874d6393282afc318aca4407fc62f912ee30a22da46a"}, + {file = "tbb-2021.13.0-py3-none-win32.whl", hash = "sha256:6669d26703e9943f6164c6407bd4a237a45007e79b8d3832fe6999576eaaa9ef"}, + {file = "tbb-2021.13.0-py3-none-win_amd64.whl", hash = "sha256:3528a53e4bbe64b07a6112b4c5a00ff3c61924ee46c9c68e004a1ac7ad1f09c3"}, +] + +[[package]] +name = "termcolor" +version = "2.4.0" +description = "ANSI color formatting for output in terminal" +optional = false +python-versions = ">=3.8" +files = [ + {file = "termcolor-2.4.0-py3-none-any.whl", hash = "sha256:9297c0df9c99445c2412e832e882a7884038a25617c60cea2ad69488d4040d63"}, + {file = "termcolor-2.4.0.tar.gz", hash = "sha256:aab9e56047c8ac41ed798fa36d892a37aca6b3e9159f3e0c24bc64a9b3ac7b7a"}, +] + +[package.extras] +tests = ["pytest", "pytest-cov"] + +[[package]] +name = "threadpoolctl" +version = "3.5.0" +description = "threadpoolctl" +optional = true +python-versions = ">=3.8" +files = [ + {file = "threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467"}, + {file = "threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107"}, +] + +[[package]] +name = "tiktoken" +version = "0.5.2" +description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" +optional = false +python-versions = ">=3.8" +files = [ + {file = "tiktoken-0.5.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8c4e654282ef05ec1bd06ead22141a9a1687991cef2c6a81bdd1284301abc71d"}, + {file = "tiktoken-0.5.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7b3134aa24319f42c27718c6967f3c1916a38a715a0fa73d33717ba121231307"}, + {file = "tiktoken-0.5.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6092e6e77730929c8c6a51bb0d7cfdf1b72b63c4d033d6258d1f2ee81052e9e5"}, + {file = "tiktoken-0.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72ad8ae2a747622efae75837abba59be6c15a8f31b4ac3c6156bc56ec7a8e631"}, + {file = "tiktoken-0.5.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:51cba7c8711afa0b885445f0637f0fcc366740798c40b981f08c5f984e02c9d1"}, + {file = "tiktoken-0.5.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:3d8c7d2c9313f8e92e987d585ee2ba0f7c40a0de84f4805b093b634f792124f5"}, + {file = "tiktoken-0.5.2-cp310-cp310-win_amd64.whl", hash = "sha256:692eca18c5fd8d1e0dde767f895c17686faaa102f37640e884eecb6854e7cca7"}, + {file = "tiktoken-0.5.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:138d173abbf1ec75863ad68ca289d4da30caa3245f3c8d4bfb274c4d629a2f77"}, + {file = "tiktoken-0.5.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7388fdd684690973fdc450b47dfd24d7f0cbe658f58a576169baef5ae4658607"}, + {file = "tiktoken-0.5.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a114391790113bcff670c70c24e166a841f7ea8f47ee2fe0e71e08b49d0bf2d4"}, + {file = "tiktoken-0.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca96f001e69f6859dd52926d950cfcc610480e920e576183497ab954e645e6ac"}, + {file = "tiktoken-0.5.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:15fed1dd88e30dfadcdd8e53a8927f04e1f6f81ad08a5ca824858a593ab476c7"}, + {file = "tiktoken-0.5.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:93f8e692db5756f7ea8cb0cfca34638316dcf0841fb8469de8ed7f6a015ba0b0"}, + {file = "tiktoken-0.5.2-cp311-cp311-win_amd64.whl", hash = "sha256:bcae1c4c92df2ffc4fe9f475bf8148dbb0ee2404743168bbeb9dcc4b79dc1fdd"}, + {file = "tiktoken-0.5.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b76a1e17d4eb4357d00f0622d9a48ffbb23401dcf36f9716d9bd9c8e79d421aa"}, + {file = "tiktoken-0.5.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:01d8b171bb5df4035580bc26d4f5339a6fd58d06f069091899d4a798ea279d3e"}, + {file = "tiktoken-0.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42adf7d4fb1ed8de6e0ff2e794a6a15005f056a0d83d22d1d6755a39bffd9e7f"}, + {file = "tiktoken-0.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c3f894dbe0adb44609f3d532b8ea10820d61fdcb288b325a458dfc60fefb7db"}, + {file = "tiktoken-0.5.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:58ccfddb4e62f0df974e8f7e34a667981d9bb553a811256e617731bf1d007d19"}, + {file = "tiktoken-0.5.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:58902a8bad2de4268c2a701f1c844d22bfa3cbcc485b10e8e3e28a050179330b"}, + {file = "tiktoken-0.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:5e39257826d0647fcac403d8fa0a474b30d02ec8ffc012cfaf13083e9b5e82c5"}, + {file = "tiktoken-0.5.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8bde3b0fbf09a23072d39c1ede0e0821f759b4fa254a5f00078909158e90ae1f"}, + {file = "tiktoken-0.5.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2ddee082dcf1231ccf3a591d234935e6acf3e82ee28521fe99af9630bc8d2a60"}, + {file = "tiktoken-0.5.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35c057a6a4e777b5966a7540481a75a31429fc1cb4c9da87b71c8b75b5143037"}, + {file = "tiktoken-0.5.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c4a049b87e28f1dc60509f8eb7790bc8d11f9a70d99b9dd18dfdd81a084ffe6"}, + {file = "tiktoken-0.5.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5bf5ce759089f4f6521ea6ed89d8f988f7b396e9f4afb503b945f5c949c6bec2"}, + {file = "tiktoken-0.5.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0c964f554af1a96884e01188f480dad3fc224c4bbcf7af75d4b74c4b74ae0125"}, + {file = "tiktoken-0.5.2-cp38-cp38-win_amd64.whl", hash = "sha256:368dd5726d2e8788e47ea04f32e20f72a2012a8a67af5b0b003d1e059f1d30a3"}, + {file = "tiktoken-0.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a2deef9115b8cd55536c0a02c0203512f8deb2447f41585e6d929a0b878a0dd2"}, + {file = "tiktoken-0.5.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2ed7d380195affbf886e2f8b92b14edfe13f4768ff5fc8de315adba5b773815e"}, + {file = "tiktoken-0.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c76fce01309c8140ffe15eb34ded2bb94789614b7d1d09e206838fc173776a18"}, + {file = "tiktoken-0.5.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60a5654d6a2e2d152637dd9a880b4482267dfc8a86ccf3ab1cec31a8c76bfae8"}, + {file = "tiktoken-0.5.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:41d4d3228e051b779245a8ddd21d4336f8975563e92375662f42d05a19bdff41"}, + {file = "tiktoken-0.5.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a5c1cdec2c92fcde8c17a50814b525ae6a88e8e5b02030dc120b76e11db93f13"}, + {file = "tiktoken-0.5.2-cp39-cp39-win_amd64.whl", hash = "sha256:84ddb36faedb448a50b246e13d1b6ee3437f60b7169b723a4b2abad75e914f3e"}, + {file = "tiktoken-0.5.2.tar.gz", hash = "sha256:f54c581f134a8ea96ce2023ab221d4d4d81ab614efa0b2fbce926387deb56c80"}, +] + +[package.dependencies] +regex = ">=2022.1.18" +requests = ">=2.26.0" + +[package.extras] +blobfile = ["blobfile (>=2)"] + +[[package]] +name = "tokenizers" +version = "0.19.1" +description = "" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tokenizers-0.19.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:952078130b3d101e05ecfc7fc3640282d74ed26bcf691400f872563fca15ac97"}, + {file = "tokenizers-0.19.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:82c8b8063de6c0468f08e82c4e198763e7b97aabfe573fd4cf7b33930ca4df77"}, + {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:f03727225feaf340ceeb7e00604825addef622d551cbd46b7b775ac834c1e1c4"}, + {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:453e4422efdfc9c6b6bf2eae00d5e323f263fff62b29a8c9cd526c5003f3f642"}, + {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:02e81bf089ebf0e7f4df34fa0207519f07e66d8491d963618252f2e0729e0b46"}, + {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b07c538ba956843833fee1190cf769c60dc62e1cf934ed50d77d5502194d63b1"}, + {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e28cab1582e0eec38b1f38c1c1fb2e56bce5dc180acb1724574fc5f47da2a4fe"}, + {file = "tokenizers-0.19.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b01afb7193d47439f091cd8f070a1ced347ad0f9144952a30a41836902fe09e"}, + {file = "tokenizers-0.19.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7fb297edec6c6841ab2e4e8f357209519188e4a59b557ea4fafcf4691d1b4c98"}, + {file = "tokenizers-0.19.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2e8a3dd055e515df7054378dc9d6fa8c8c34e1f32777fb9a01fea81496b3f9d3"}, + {file = "tokenizers-0.19.1-cp310-none-win32.whl", hash = "sha256:7ff898780a155ea053f5d934925f3902be2ed1f4d916461e1a93019cc7250837"}, + {file = "tokenizers-0.19.1-cp310-none-win_amd64.whl", hash = "sha256:bea6f9947e9419c2fda21ae6c32871e3d398cba549b93f4a65a2d369662d9403"}, + {file = "tokenizers-0.19.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:5c88d1481f1882c2e53e6bb06491e474e420d9ac7bdff172610c4f9ad3898059"}, + {file = "tokenizers-0.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ddf672ed719b4ed82b51499100f5417d7d9f6fb05a65e232249268f35de5ed14"}, + {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:dadc509cc8a9fe460bd274c0e16ac4184d0958117cf026e0ea8b32b438171594"}, + {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfedf31824ca4915b511b03441784ff640378191918264268e6923da48104acc"}, + {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac11016d0a04aa6487b1513a3a36e7bee7eec0e5d30057c9c0408067345c48d2"}, + {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:76951121890fea8330d3a0df9a954b3f2a37e3ec20e5b0530e9a0044ca2e11fe"}, + {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b342d2ce8fc8d00f376af068e3274e2e8649562e3bc6ae4a67784ded6b99428d"}, + {file = "tokenizers-0.19.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d16ff18907f4909dca9b076b9c2d899114dd6abceeb074eca0c93e2353f943aa"}, + {file = "tokenizers-0.19.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:706a37cc5332f85f26efbe2bdc9ef8a9b372b77e4645331a405073e4b3a8c1c6"}, + {file = "tokenizers-0.19.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:16baac68651701364b0289979ecec728546133e8e8fe38f66fe48ad07996b88b"}, + {file = "tokenizers-0.19.1-cp311-none-win32.whl", hash = "sha256:9ed240c56b4403e22b9584ee37d87b8bfa14865134e3e1c3fb4b2c42fafd3256"}, + {file = "tokenizers-0.19.1-cp311-none-win_amd64.whl", hash = "sha256:ad57d59341710b94a7d9dbea13f5c1e7d76fd8d9bcd944a7a6ab0b0da6e0cc66"}, + {file = "tokenizers-0.19.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:621d670e1b1c281a1c9698ed89451395d318802ff88d1fc1accff0867a06f153"}, + {file = "tokenizers-0.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d924204a3dbe50b75630bd16f821ebda6a5f729928df30f582fb5aade90c818a"}, + {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:4f3fefdc0446b1a1e6d81cd4c07088ac015665d2e812f6dbba4a06267d1a2c95"}, + {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9620b78e0b2d52ef07b0d428323fb34e8ea1219c5eac98c2596311f20f1f9266"}, + {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:04ce49e82d100594715ac1b2ce87d1a36e61891a91de774755f743babcd0dd52"}, + {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c5c2ff13d157afe413bf7e25789879dd463e5a4abfb529a2d8f8473d8042e28f"}, + {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3174c76efd9d08f836bfccaca7cfec3f4d1c0a4cf3acbc7236ad577cc423c840"}, + {file = "tokenizers-0.19.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c9d5b6c0e7a1e979bec10ff960fae925e947aab95619a6fdb4c1d8ff3708ce3"}, + {file = "tokenizers-0.19.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a179856d1caee06577220ebcfa332af046d576fb73454b8f4d4b0ba8324423ea"}, + {file = "tokenizers-0.19.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:952b80dac1a6492170f8c2429bd11fcaa14377e097d12a1dbe0ef2fb2241e16c"}, + {file = "tokenizers-0.19.1-cp312-none-win32.whl", hash = "sha256:01d62812454c188306755c94755465505836fd616f75067abcae529c35edeb57"}, + {file = "tokenizers-0.19.1-cp312-none-win_amd64.whl", hash = "sha256:b70bfbe3a82d3e3fb2a5e9b22a39f8d1740c96c68b6ace0086b39074f08ab89a"}, + {file = "tokenizers-0.19.1-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:bb9dfe7dae85bc6119d705a76dc068c062b8b575abe3595e3c6276480e67e3f1"}, + {file = "tokenizers-0.19.1-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:1f0360cbea28ea99944ac089c00de7b2e3e1c58f479fb8613b6d8d511ce98267"}, + {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:71e3ec71f0e78780851fef28c2a9babe20270404c921b756d7c532d280349214"}, + {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b82931fa619dbad979c0ee8e54dd5278acc418209cc897e42fac041f5366d626"}, + {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e8ff5b90eabdcdaa19af697885f70fe0b714ce16709cf43d4952f1f85299e73a"}, + {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e742d76ad84acbdb1a8e4694f915fe59ff6edc381c97d6dfdd054954e3478ad4"}, + {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d8c5d59d7b59885eab559d5bc082b2985555a54cda04dda4c65528d90ad252ad"}, + {file = "tokenizers-0.19.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b2da5c32ed869bebd990c9420df49813709e953674c0722ff471a116d97b22d"}, + {file = "tokenizers-0.19.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:638e43936cc8b2cbb9f9d8dde0fe5e7e30766a3318d2342999ae27f68fdc9bd6"}, + {file = "tokenizers-0.19.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:78e769eb3b2c79687d9cb0f89ef77223e8e279b75c0a968e637ca7043a84463f"}, + {file = "tokenizers-0.19.1-cp37-none-win32.whl", hash = "sha256:72791f9bb1ca78e3ae525d4782e85272c63faaef9940d92142aa3eb79f3407a3"}, + {file = "tokenizers-0.19.1-cp37-none-win_amd64.whl", hash = "sha256:f3bbb7a0c5fcb692950b041ae11067ac54826204318922da754f908d95619fbc"}, + {file = "tokenizers-0.19.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:07f9295349bbbcedae8cefdbcfa7f686aa420be8aca5d4f7d1ae6016c128c0c5"}, + {file = "tokenizers-0.19.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:10a707cc6c4b6b183ec5dbfc5c34f3064e18cf62b4a938cb41699e33a99e03c1"}, + {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6309271f57b397aa0aff0cbbe632ca9d70430839ca3178bf0f06f825924eca22"}, + {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ad23d37d68cf00d54af184586d79b84075ada495e7c5c0f601f051b162112dc"}, + {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:427c4f0f3df9109314d4f75b8d1f65d9477033e67ffaec4bca53293d3aca286d"}, + {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e83a31c9cf181a0a3ef0abad2b5f6b43399faf5da7e696196ddd110d332519ee"}, + {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c27b99889bd58b7e301468c0838c5ed75e60c66df0d4db80c08f43462f82e0d3"}, + {file = "tokenizers-0.19.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bac0b0eb952412b0b196ca7a40e7dce4ed6f6926489313414010f2e6b9ec2adf"}, + {file = "tokenizers-0.19.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8a6298bde623725ca31c9035a04bf2ef63208d266acd2bed8c2cb7d2b7d53ce6"}, + {file = "tokenizers-0.19.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:08a44864e42fa6d7d76d7be4bec62c9982f6f6248b4aa42f7302aa01e0abfd26"}, + {file = "tokenizers-0.19.1-cp38-none-win32.whl", hash = "sha256:1de5bc8652252d9357a666e609cb1453d4f8e160eb1fb2830ee369dd658e8975"}, + {file = "tokenizers-0.19.1-cp38-none-win_amd64.whl", hash = "sha256:0bcce02bf1ad9882345b34d5bd25ed4949a480cf0e656bbd468f4d8986f7a3f1"}, + {file = "tokenizers-0.19.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:0b9394bd204842a2a1fd37fe29935353742be4a3460b6ccbaefa93f58a8df43d"}, + {file = "tokenizers-0.19.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4692ab92f91b87769d950ca14dbb61f8a9ef36a62f94bad6c82cc84a51f76f6a"}, + {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6258c2ef6f06259f70a682491c78561d492e885adeaf9f64f5389f78aa49a051"}, + {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c85cf76561fbd01e0d9ea2d1cbe711a65400092bc52b5242b16cfd22e51f0c58"}, + {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:670b802d4d82bbbb832ddb0d41df7015b3e549714c0e77f9bed3e74d42400fbe"}, + {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:85aa3ab4b03d5e99fdd31660872249df5e855334b6c333e0bc13032ff4469c4a"}, + {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cbf001afbbed111a79ca47d75941e9e5361297a87d186cbfc11ed45e30b5daba"}, + {file = "tokenizers-0.19.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4c89aa46c269e4e70c4d4f9d6bc644fcc39bb409cb2a81227923404dd6f5227"}, + {file = "tokenizers-0.19.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:39c1ec76ea1027438fafe16ecb0fb84795e62e9d643444c1090179e63808c69d"}, + {file = "tokenizers-0.19.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c2a0d47a89b48d7daa241e004e71fb5a50533718897a4cd6235cb846d511a478"}, + {file = "tokenizers-0.19.1-cp39-none-win32.whl", hash = "sha256:61b7fe8886f2e104d4caf9218b157b106207e0f2a4905c9c7ac98890688aabeb"}, + {file = "tokenizers-0.19.1-cp39-none-win_amd64.whl", hash = "sha256:f97660f6c43efd3e0bfd3f2e3e5615bf215680bad6ee3d469df6454b8c6e8256"}, + {file = "tokenizers-0.19.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3b11853f17b54c2fe47742c56d8a33bf49ce31caf531e87ac0d7d13d327c9334"}, + {file = "tokenizers-0.19.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d26194ef6c13302f446d39972aaa36a1dda6450bc8949f5eb4c27f51191375bd"}, + {file = "tokenizers-0.19.1-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:e8d1ed93beda54bbd6131a2cb363a576eac746d5c26ba5b7556bc6f964425594"}, + {file = "tokenizers-0.19.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca407133536f19bdec44b3da117ef0d12e43f6d4b56ac4c765f37eca501c7bda"}, + {file = "tokenizers-0.19.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce05fde79d2bc2e46ac08aacbc142bead21614d937aac950be88dc79f9db9022"}, + {file = "tokenizers-0.19.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:35583cd46d16f07c054efd18b5d46af4a2f070a2dd0a47914e66f3ff5efb2b1e"}, + {file = "tokenizers-0.19.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:43350270bfc16b06ad3f6f07eab21f089adb835544417afda0f83256a8bf8b75"}, + {file = "tokenizers-0.19.1-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b4399b59d1af5645bcee2072a463318114c39b8547437a7c2d6a186a1b5a0e2d"}, + {file = "tokenizers-0.19.1-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6852c5b2a853b8b0ddc5993cd4f33bfffdca4fcc5d52f89dd4b8eada99379285"}, + {file = "tokenizers-0.19.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bcd266ae85c3d39df2f7e7d0e07f6c41a55e9a3123bb11f854412952deacd828"}, + {file = "tokenizers-0.19.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ecb2651956eea2aa0a2d099434134b1b68f1c31f9a5084d6d53f08ed43d45ff2"}, + {file = "tokenizers-0.19.1-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:b279ab506ec4445166ac476fb4d3cc383accde1ea152998509a94d82547c8e2a"}, + {file = "tokenizers-0.19.1-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:89183e55fb86e61d848ff83753f64cded119f5d6e1f553d14ffee3700d0a4a49"}, + {file = "tokenizers-0.19.1-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b2edbc75744235eea94d595a8b70fe279dd42f3296f76d5a86dde1d46e35f574"}, + {file = "tokenizers-0.19.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:0e64bfde9a723274e9a71630c3e9494ed7b4c0f76a1faacf7fe294cd26f7ae7c"}, + {file = "tokenizers-0.19.1-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0b5ca92bfa717759c052e345770792d02d1f43b06f9e790ca0a1db62838816f3"}, + {file = "tokenizers-0.19.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f8a20266e695ec9d7a946a019c1d5ca4eddb6613d4f466888eee04f16eedb85"}, + {file = "tokenizers-0.19.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63c38f45d8f2a2ec0f3a20073cccb335b9f99f73b3c69483cd52ebc75369d8a1"}, + {file = "tokenizers-0.19.1-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:dd26e3afe8a7b61422df3176e06664503d3f5973b94f45d5c45987e1cb711876"}, + {file = "tokenizers-0.19.1-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:eddd5783a4a6309ce23432353cdb36220e25cbb779bfa9122320666508b44b88"}, + {file = "tokenizers-0.19.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:56ae39d4036b753994476a1b935584071093b55c7a72e3b8288e68c313ca26e7"}, + {file = "tokenizers-0.19.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:f9939ca7e58c2758c01b40324a59c034ce0cebad18e0d4563a9b1beab3018243"}, + {file = "tokenizers-0.19.1-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6c330c0eb815d212893c67a032e9dc1b38a803eccb32f3e8172c19cc69fbb439"}, + {file = "tokenizers-0.19.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec11802450a2487cdf0e634b750a04cbdc1c4d066b97d94ce7dd2cb51ebb325b"}, + {file = "tokenizers-0.19.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2b718f316b596f36e1dae097a7d5b91fc5b85e90bf08b01ff139bd8953b25af"}, + {file = "tokenizers-0.19.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:ed69af290c2b65169f0ba9034d1dc39a5db9459b32f1dd8b5f3f32a3fcf06eab"}, + {file = "tokenizers-0.19.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f8a9c828277133af13f3859d1b6bf1c3cb6e9e1637df0e45312e6b7c2e622b1f"}, + {file = "tokenizers-0.19.1.tar.gz", hash = "sha256:ee59e6680ed0fdbe6b724cf38bd70400a0c1dd623b07ac729087270caeac88e3"}, +] + +[package.dependencies] +huggingface-hub = ">=0.16.4,<1.0" + +[package.extras] +dev = ["tokenizers[testing]"] +docs = ["setuptools-rust", "sphinx", "sphinx-rtd-theme"] +testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests", "ruff"] + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + +[[package]] +name = "torch" +version = "2.3.1" +description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" +optional = true +python-versions = ">=3.8.0" +files = [ + {file = "torch-2.3.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:605a25b23944be5ab7c3467e843580e1d888b8066e5aaf17ff7bf9cc30001cc3"}, + {file = "torch-2.3.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:f2357eb0965583a0954d6f9ad005bba0091f956aef879822274b1bcdb11bd308"}, + {file = "torch-2.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:32b05fe0d1ada7f69c9f86c14ff69b0ef1957a5a54199bacba63d22d8fab720b"}, + {file = "torch-2.3.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:7c09a94362778428484bcf995f6004b04952106aee0ef45ff0b4bab484f5498d"}, + {file = "torch-2.3.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:b2ec81b61bb094ea4a9dee1cd3f7b76a44555375719ad29f05c0ca8ef596ad39"}, + {file = "torch-2.3.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:490cc3d917d1fe0bd027057dfe9941dc1d6d8e3cae76140f5dd9a7e5bc7130ab"}, + {file = "torch-2.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:5802530783bd465fe66c2df99123c9a54be06da118fbd785a25ab0a88123758a"}, + {file = "torch-2.3.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:a7dd4ed388ad1f3d502bf09453d5fe596c7b121de7e0cfaca1e2017782e9bbac"}, + {file = "torch-2.3.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:a486c0b1976a118805fc7c9641d02df7afbb0c21e6b555d3bb985c9f9601b61a"}, + {file = "torch-2.3.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:224259821fe3e4c6f7edf1528e4fe4ac779c77addaa74215eb0b63a5c474d66c"}, + {file = "torch-2.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:e5fdccbf6f1334b2203a61a0e03821d5845f1421defe311dabeae2fc8fbeac2d"}, + {file = "torch-2.3.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:3c333dc2ebc189561514eda06e81df22bf8fb64e2384746b2cb9f04f96d1d4c8"}, + {file = "torch-2.3.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:07e9ba746832b8d069cacb45f312cadd8ad02b81ea527ec9766c0e7404bb3feb"}, + {file = "torch-2.3.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:462d1c07dbf6bb5d9d2f3316fee73a24f3d12cd8dacf681ad46ef6418f7f6626"}, + {file = "torch-2.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:ff60bf7ce3de1d43ad3f6969983f321a31f0a45df3690921720bcad6a8596cc4"}, + {file = "torch-2.3.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:bee0bd33dc58aa8fc8a7527876e9b9a0e812ad08122054a5bff2ce5abf005b10"}, + {file = "torch-2.3.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:aaa872abde9a3d4f91580f6396d54888620f4a0b92e3976a6034759df4b961ad"}, + {file = "torch-2.3.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:3d7a7f7ef21a7520510553dc3938b0c57c116a7daee20736a9e25cbc0e832bdc"}, + {file = "torch-2.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:4777f6cefa0c2b5fa87223c213e7b6f417cf254a45e5829be4ccd1b2a4ee1011"}, + {file = "torch-2.3.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:2bb5af780c55be68fe100feb0528d2edebace1d55cb2e351de735809ba7391eb"}, +] + +[package.dependencies] +filelock = "*" +fsspec = "*" +jinja2 = "*" +mkl = {version = ">=2021.1.1,<=2021.4.0", markers = "platform_system == \"Windows\""} +networkx = "*" +nvidia-cublas-cu12 = {version = "12.1.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cuda-cupti-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cuda-nvrtc-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cuda-runtime-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cudnn-cu12 = {version = "8.9.2.26", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cufft-cu12 = {version = "11.0.2.54", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-curand-cu12 = {version = "10.3.2.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cusolver-cu12 = {version = "11.4.5.107", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-cusparse-cu12 = {version = "12.1.0.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-nccl-cu12 = {version = "2.20.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +sympy = "*" +triton = {version = "2.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.12\""} +typing-extensions = ">=4.8.0" + +[package.extras] +opt-einsum = ["opt-einsum (>=3.3)"] +optree = ["optree (>=0.9.1)"] + +[[package]] +name = "tqdm" +version = "4.66.4" +description = "Fast, Extensible Progress Meter" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tqdm-4.66.4-py3-none-any.whl", hash = "sha256:b75ca56b413b030bc3f00af51fd2c1a1a5eac6a0c1cca83cbb37a5c52abce644"}, + {file = "tqdm-4.66.4.tar.gz", hash = "sha256:e4d936c9de8727928f3be6079590e97d9abfe8d39a590be678eb5919ffc186bb"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + +[[package]] +name = "transformers" +version = "4.42.4" +description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" +optional = true +python-versions = ">=3.8.0" +files = [ + {file = "transformers-4.42.4-py3-none-any.whl", hash = "sha256:6d59061392d0f1da312af29c962df9017ff3c0108c681a56d1bc981004d16d24"}, + {file = "transformers-4.42.4.tar.gz", hash = "sha256:f956e25e24df851f650cb2c158b6f4352dfae9d702f04c113ed24fc36ce7ae2d"}, +] + +[package.dependencies] +filelock = "*" +huggingface-hub = ">=0.23.2,<1.0" +numpy = ">=1.17,<2.0" +packaging = ">=20.0" +pyyaml = ">=5.1" +regex = "!=2019.12.17" +requests = "*" +safetensors = ">=0.4.1" +tokenizers = ">=0.19,<0.20" +tqdm = ">=4.27" + +[package.extras] +accelerate = ["accelerate (>=0.21.0)"] +agents = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch"] +all = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm (<=0.9.16)", "tokenizers (>=0.19,<0.20)", "torch", "torchaudio", "torchvision"] +audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] +benchmark = ["optimum-benchmark (>=0.2.0)"] +codecarbon = ["codecarbon (==1.2.0)"] +deepspeed = ["accelerate (>=0.21.0)", "deepspeed (>=0.9.3)"] +deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.21.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk", "optuna", "parameterized", "protobuf", "psutil", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.4.4)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"] +dev = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "av (==9.2.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "decord (==0.6.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.4.4)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm (<=0.9.16)", "tokenizers (>=0.19,<0.20)", "torch", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] +dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.4.4)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.19,<0.20)", "urllib3 (<2.0.0)"] +dev-torch = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "librosa", "nltk", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.4.4)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (<=0.9.16)", "tokenizers (>=0.19,<0.20)", "torch", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] +flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)", "scipy (<1.13.0)"] +flax-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] +ftfy = ["ftfy"] +integrations = ["optuna", "ray[tune] (>=2.7.0)", "sigopt"] +ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "rhoknp (>=1.1.0,<1.3.1)", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)"] +modelcreation = ["cookiecutter (==1.7.3)"] +natten = ["natten (>=0.14.6,<0.15.0)"] +onnx = ["onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "tf2onnx"] +onnxruntime = ["onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"] +optuna = ["optuna"] +quality = ["GitPython (<3.1.19)", "datasets (!=2.5.0)", "isort (>=5.5.4)", "ruff (==0.4.4)", "urllib3 (<2.0.0)"] +ray = ["ray[tune] (>=2.7.0)"] +retrieval = ["datasets (!=2.5.0)", "faiss-cpu"] +ruff = ["ruff (==0.4.4)"] +sagemaker = ["sagemaker (>=2.31.0)"] +sentencepiece = ["protobuf", "sentencepiece (>=0.1.91,!=0.1.92)"] +serving = ["fastapi", "pydantic", "starlette", "uvicorn"] +sigopt = ["sigopt"] +sklearn = ["scikit-learn"] +speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] +testing = ["GitPython (<3.1.19)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk", "parameterized", "psutil", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.4.4)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"] +tf = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx"] +tf-cpu = ["keras (>2.9,<2.16)", "keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow-cpu (>2.9,<2.16)", "tensorflow-probability (<0.24)", "tensorflow-text (<2.16)", "tf2onnx"] +tf-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] +timm = ["timm (<=0.9.16)"] +tokenizers = ["tokenizers (>=0.19,<0.20)"] +torch = ["accelerate (>=0.21.0)", "torch"] +torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] +torch-vision = ["Pillow (>=10.0.1,<=15.0)", "torchvision"] +torchhub = ["filelock", "huggingface-hub (>=0.23.2,<1.0)", "importlib-metadata", "numpy (>=1.17,<2.0)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.19,<0.20)", "torch", "tqdm (>=4.27)"] +video = ["av (==9.2.0)", "decord (==0.6.0)"] +vision = ["Pillow (>=10.0.1,<=15.0)"] + +[[package]] +name = "triton" +version = "2.3.1" +description = "A language and compiler for custom Deep Learning operations" +optional = true +python-versions = "*" +files = [ + {file = "triton-2.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c84595cbe5e546b1b290d2a58b1494df5a2ef066dd890655e5b8a8a92205c33"}, + {file = "triton-2.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9d64ae33bcb3a7a18081e3a746e8cf87ca8623ca13d2c362413ce7a486f893e"}, + {file = "triton-2.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eaf80e8761a9e3498aa92e7bf83a085b31959c61f5e8ac14eedd018df6fccd10"}, + {file = "triton-2.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b13bf35a2b659af7159bf78e92798dc62d877aa991de723937329e2d382f1991"}, + {file = "triton-2.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63381e35ded3304704ea867ffde3b7cfc42c16a55b3062d41e017ef510433d66"}, + {file = "triton-2.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d968264523c7a07911c8fb51b4e0d1b920204dae71491b1fe7b01b62a31e124"}, +] + +[package.dependencies] +filelock = "*" + +[package.extras] +build = ["cmake (>=3.20)", "lit"] +tests = ["autopep8", "flake8", "isort", "numpy", "pytest", "scipy (>=1.7.1)", "torch"] +tutorials = ["matplotlib", "pandas", "tabulate", "torch"] + +[[package]] +name = "types-requests" +version = "2.32.0.20240712" +description = "Typing stubs for requests" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-requests-2.32.0.20240712.tar.gz", hash = "sha256:90c079ff05e549f6bf50e02e910210b98b8ff1ebdd18e19c873cd237737c1358"}, + {file = "types_requests-2.32.0.20240712-py3-none-any.whl", hash = "sha256:f754283e152c752e46e70942fa2a146b5bc70393522257bb85bd1ef7e019dcc3"}, +] + +[package.dependencies] +urllib3 = ">=2" + +[[package]] +name = "typing-extensions" +version = "4.12.2" +description = "Backported and Experimental Type Hints for Python 3.8+" +optional = false +python-versions = ">=3.8" +files = [ + {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, + {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, +] + +[[package]] +name = "urllib3" +version = "2.2.2" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.8" +files = [ + {file = "urllib3-2.2.2-py3-none-any.whl", hash = "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472"}, + {file = "urllib3-2.2.2.tar.gz", hash = "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +h2 = ["h2 (>=4,<5)"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + +[[package]] +name = "uvicorn" +version = "0.27.1" +description = "The lightning-fast ASGI server." +optional = false +python-versions = ">=3.8" +files = [ + {file = "uvicorn-0.27.1-py3-none-any.whl", hash = "sha256:5c89da2f3895767472a35556e539fd59f7edbe9b1e9c0e1c99eebeadc61838e4"}, + {file = "uvicorn-0.27.1.tar.gz", hash = "sha256:3d9a267296243532db80c83a959a3400502165ade2c1338dea4e67915fd4745a"}, +] + +[package.dependencies] +click = ">=7.0" +h11 = ">=0.8" +typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} + +[package.extras] +standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] + +[[package]] +name = "vecs" +version = "0.4.3" +description = "pgvector client" +optional = false +python-versions = "*" +files = [ + {file = "vecs-0.4.3.tar.gz", hash = "sha256:0a60294143aec43bd0344bb9235b6e57f8f919d102538f6b989d7b85095a31ce"}, +] + +[package.dependencies] +deprecated = "==1.2.*" +flupy = "==1.*" +pgvector = "==0.1.*" +psycopg2-binary = "==2.9.*" +sqlalchemy = "==2.*" + +[package.extras] +dev = ["numpy", "parse", "pytest", "pytest-cov"] +docs = ["mike", "mkdocs", "pygments", "pymarkdown", "pymdown-extensions"] +text-embedding = ["sentence-transformers (==2.*)"] + +[[package]] +name = "virtualenv" +version = "20.26.3" +description = "Virtual Python Environment builder" +optional = false +python-versions = ">=3.7" +files = [ + {file = "virtualenv-20.26.3-py3-none-any.whl", hash = "sha256:8cc4a31139e796e9a7de2cd5cf2489de1217193116a8fd42328f1bd65f434589"}, + {file = "virtualenv-20.26.3.tar.gz", hash = "sha256:4c43a2a236279d9ea36a0d76f98d84bd6ca94ac4e0f4a3b9d46d05e10fea542a"}, +] + +[package.dependencies] +distlib = ">=0.3.7,<1" +filelock = ">=3.12.2,<4" +platformdirs = ">=3.9.1,<5" + +[package.extras] +docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] + +[[package]] +name = "wrapt" +version = "1.16.0" +description = "Module for decorators, wrappers and monkey patching." +optional = false +python-versions = ">=3.6" +files = [ + {file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"}, + {file = "wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb2dee3874a500de01c93d5c71415fcaef1d858370d405824783e7a8ef5db440"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a88e6010048489cda82b1326889ec075a8c856c2e6a256072b28eaee3ccf487"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac83a914ebaf589b69f7d0a1277602ff494e21f4c2f743313414378f8f50a4cf"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:73aa7d98215d39b8455f103de64391cb79dfcad601701a3aa0dddacf74911d72"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:807cc8543a477ab7422f1120a217054f958a66ef7314f76dd9e77d3f02cdccd0"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bf5703fdeb350e36885f2875d853ce13172ae281c56e509f4e6eca049bdfb136"}, + {file = "wrapt-1.16.0-cp310-cp310-win32.whl", hash = "sha256:f6b2d0c6703c988d334f297aa5df18c45e97b0af3679bb75059e0e0bd8b1069d"}, + {file = "wrapt-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:decbfa2f618fa8ed81c95ee18a387ff973143c656ef800c9f24fb7e9c16054e2"}, + {file = "wrapt-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a5db485fe2de4403f13fafdc231b0dbae5eca4359232d2efc79025527375b09"}, + {file = "wrapt-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75ea7d0ee2a15733684badb16de6794894ed9c55aa5e9903260922f0482e687d"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a452f9ca3e3267cd4d0fcf2edd0d035b1934ac2bd7e0e57ac91ad6b95c0c6389"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:43aa59eadec7890d9958748db829df269f0368521ba6dc68cc172d5d03ed8060"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72554a23c78a8e7aa02abbd699d129eead8b147a23c56e08d08dfc29cfdddca1"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d2efee35b4b0a347e0d99d28e884dfd82797852d62fcd7ebdeee26f3ceb72cf3"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6dcfcffe73710be01d90cae08c3e548d90932d37b39ef83969ae135d36ef3956"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:eb6e651000a19c96f452c85132811d25e9264d836951022d6e81df2fff38337d"}, + {file = "wrapt-1.16.0-cp311-cp311-win32.whl", hash = "sha256:66027d667efe95cc4fa945af59f92c5a02c6f5bb6012bff9e60542c74c75c362"}, + {file = "wrapt-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:aefbc4cb0a54f91af643660a0a150ce2c090d3652cf4052a5397fb2de549cd89"}, + {file = "wrapt-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5eb404d89131ec9b4f748fa5cfb5346802e5ee8836f57d516576e61f304f3b7b"}, + {file = "wrapt-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9090c9e676d5236a6948330e83cb89969f433b1943a558968f659ead07cb3b36"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94265b00870aa407bd0cbcfd536f17ecde43b94fb8d228560a1e9d3041462d73"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2058f813d4f2b5e3a9eb2eb3faf8f1d99b81c3e51aeda4b168406443e8ba809"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98b5e1f498a8ca1858a1cdbffb023bfd954da4e3fa2c0cb5853d40014557248b"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:14d7dc606219cdd7405133c713f2c218d4252f2a469003f8c46bb92d5d095d81"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:49aac49dc4782cb04f58986e81ea0b4768e4ff197b57324dcbd7699c5dfb40b9"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:418abb18146475c310d7a6dc71143d6f7adec5b004ac9ce08dc7a34e2babdc5c"}, + {file = "wrapt-1.16.0-cp312-cp312-win32.whl", hash = "sha256:685f568fa5e627e93f3b52fda002c7ed2fa1800b50ce51f6ed1d572d8ab3e7fc"}, + {file = "wrapt-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:dcdba5c86e368442528f7060039eda390cc4091bfd1dca41e8046af7c910dda8"}, + {file = "wrapt-1.16.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:d462f28826f4657968ae51d2181a074dfe03c200d6131690b7d65d55b0f360f8"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a33a747400b94b6d6b8a165e4480264a64a78c8a4c734b62136062e9a248dd39"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3646eefa23daeba62643a58aac816945cadc0afaf21800a1421eeba5f6cfb9c"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ebf019be5c09d400cf7b024aa52b1f3aeebeff51550d007e92c3c1c4afc2a40"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:0d2691979e93d06a95a26257adb7bfd0c93818e89b1406f5a28f36e0d8c1e1fc"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:1acd723ee2a8826f3d53910255643e33673e1d11db84ce5880675954183ec47e"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:bc57efac2da352a51cc4658878a68d2b1b67dbe9d33c36cb826ca449d80a8465"}, + {file = "wrapt-1.16.0-cp36-cp36m-win32.whl", hash = "sha256:da4813f751142436b075ed7aa012a8778aa43a99f7b36afe9b742d3ed8bdc95e"}, + {file = "wrapt-1.16.0-cp36-cp36m-win_amd64.whl", hash = "sha256:6f6eac2360f2d543cc875a0e5efd413b6cbd483cb3ad7ebf888884a6e0d2e966"}, + {file = "wrapt-1.16.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a0ea261ce52b5952bf669684a251a66df239ec6d441ccb59ec7afa882265d593"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bd2d7ff69a2cac767fbf7a2b206add2e9a210e57947dd7ce03e25d03d2de292"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9159485323798c8dc530a224bd3ffcf76659319ccc7bbd52e01e73bd0241a0c5"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a86373cf37cd7764f2201b76496aba58a52e76dedfaa698ef9e9688bfd9e41cf"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:73870c364c11f03ed072dda68ff7aea6d2a3a5c3fe250d917a429c7432e15228"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b935ae30c6e7400022b50f8d359c03ed233d45b725cfdd299462f41ee5ffba6f"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:db98ad84a55eb09b3c32a96c576476777e87c520a34e2519d3e59c44710c002c"}, + {file = "wrapt-1.16.0-cp37-cp37m-win32.whl", hash = "sha256:9153ed35fc5e4fa3b2fe97bddaa7cbec0ed22412b85bcdaf54aeba92ea37428c"}, + {file = "wrapt-1.16.0-cp37-cp37m-win_amd64.whl", hash = "sha256:66dfbaa7cfa3eb707bbfcd46dab2bc6207b005cbc9caa2199bcbc81d95071a00"}, + {file = "wrapt-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1dd50a2696ff89f57bd8847647a1c363b687d3d796dc30d4dd4a9d1689a706f0"}, + {file = "wrapt-1.16.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:44a2754372e32ab315734c6c73b24351d06e77ffff6ae27d2ecf14cf3d229202"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e9723528b9f787dc59168369e42ae1c3b0d3fadb2f1a71de14531d321ee05b0"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dbed418ba5c3dce92619656802cc5355cb679e58d0d89b50f116e4a9d5a9603e"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:941988b89b4fd6b41c3f0bfb20e92bd23746579736b7343283297c4c8cbae68f"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6a42cd0cfa8ffc1915aef79cb4284f6383d8a3e9dcca70c445dcfdd639d51267"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1ca9b6085e4f866bd584fb135a041bfc32cab916e69f714a7d1d397f8c4891ca"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d5e49454f19ef621089e204f862388d29e6e8d8b162efce05208913dde5b9ad6"}, + {file = "wrapt-1.16.0-cp38-cp38-win32.whl", hash = "sha256:c31f72b1b6624c9d863fc095da460802f43a7c6868c5dda140f51da24fd47d7b"}, + {file = "wrapt-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:490b0ee15c1a55be9c1bd8609b8cecd60e325f0575fc98f50058eae366e01f41"}, + {file = "wrapt-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9b201ae332c3637a42f02d1045e1d0cccfdc41f1f2f801dafbaa7e9b4797bfc2"}, + {file = "wrapt-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2076fad65c6736184e77d7d4729b63a6d1ae0b70da4868adeec40989858eb3fb"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5cd603b575ebceca7da5a3a251e69561bec509e0b46e4993e1cac402b7247b8"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b47cfad9e9bbbed2339081f4e346c93ecd7ab504299403320bf85f7f85c7d46c"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8212564d49c50eb4565e502814f694e240c55551a5f1bc841d4fcaabb0a9b8a"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5f15814a33e42b04e3de432e573aa557f9f0f56458745c2074952f564c50e664"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db2e408d983b0e61e238cf579c09ef7020560441906ca990fe8412153e3b291f"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:edfad1d29c73f9b863ebe7082ae9321374ccb10879eeabc84ba3b69f2579d537"}, + {file = "wrapt-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed867c42c268f876097248e05b6117a65bcd1e63b779e916fe2e33cd6fd0d3c3"}, + {file = "wrapt-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:eb1b046be06b0fce7249f1d025cd359b4b80fc1c3e24ad9eca33e0dcdb2e4a35"}, + {file = "wrapt-1.16.0-py3-none-any.whl", hash = "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1"}, + {file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"}, +] + +[[package]] +name = "xlsxwriter" +version = "3.2.0" +description = "A Python module for creating Excel XLSX files." +optional = false +python-versions = ">=3.6" +files = [ + {file = "XlsxWriter-3.2.0-py3-none-any.whl", hash = "sha256:ecfd5405b3e0e228219bcaf24c2ca0915e012ca9464a14048021d21a995d490e"}, + {file = "XlsxWriter-3.2.0.tar.gz", hash = "sha256:9977d0c661a72866a61f9f7a809e25ebbb0fb7036baa3b9fe74afcfca6b3cb8c"}, +] + +[[package]] +name = "yarl" +version = "1.9.4" +description = "Yet another URL library" +optional = false +python-versions = ">=3.7" +files = [ + {file = "yarl-1.9.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a8c1df72eb746f4136fe9a2e72b0c9dc1da1cbd23b5372f94b5820ff8ae30e0e"}, + {file = "yarl-1.9.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a3a6ed1d525bfb91b3fc9b690c5a21bb52de28c018530ad85093cc488bee2dd2"}, + {file = "yarl-1.9.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c38c9ddb6103ceae4e4498f9c08fac9b590c5c71b0370f98714768e22ac6fa66"}, + {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9e09c9d74f4566e905a0b8fa668c58109f7624db96a2171f21747abc7524234"}, + {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8477c1ee4bd47c57d49621a062121c3023609f7a13b8a46953eb6c9716ca392"}, + {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5ff2c858f5f6a42c2a8e751100f237c5e869cbde669a724f2062d4c4ef93551"}, + {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:357495293086c5b6d34ca9616a43d329317feab7917518bc97a08f9e55648455"}, + {file = "yarl-1.9.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54525ae423d7b7a8ee81ba189f131054defdb122cde31ff17477951464c1691c"}, + {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:801e9264d19643548651b9db361ce3287176671fb0117f96b5ac0ee1c3530d53"}, + {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e516dc8baf7b380e6c1c26792610230f37147bb754d6426462ab115a02944385"}, + {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:7d5aaac37d19b2904bb9dfe12cdb08c8443e7ba7d2852894ad448d4b8f442863"}, + {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:54beabb809ffcacbd9d28ac57b0db46e42a6e341a030293fb3185c409e626b8b"}, + {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bac8d525a8dbc2a1507ec731d2867025d11ceadcb4dd421423a5d42c56818541"}, + {file = "yarl-1.9.4-cp310-cp310-win32.whl", hash = "sha256:7855426dfbddac81896b6e533ebefc0af2f132d4a47340cee6d22cac7190022d"}, + {file = "yarl-1.9.4-cp310-cp310-win_amd64.whl", hash = "sha256:848cd2a1df56ddbffeb375535fb62c9d1645dde33ca4d51341378b3f5954429b"}, + {file = "yarl-1.9.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:35a2b9396879ce32754bd457d31a51ff0a9d426fd9e0e3c33394bf4b9036b099"}, + {file = "yarl-1.9.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c7d56b293cc071e82532f70adcbd8b61909eec973ae9d2d1f9b233f3d943f2c"}, + {file = "yarl-1.9.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d8a1c6c0be645c745a081c192e747c5de06e944a0d21245f4cf7c05e457c36e0"}, + {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b3c1ffe10069f655ea2d731808e76e0f452fc6c749bea04781daf18e6039525"}, + {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:549d19c84c55d11687ddbd47eeb348a89df9cb30e1993f1b128f4685cd0ebbf8"}, + {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7409f968456111140c1c95301cadf071bd30a81cbd7ab829169fb9e3d72eae9"}, + {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e23a6d84d9d1738dbc6e38167776107e63307dfc8ad108e580548d1f2c587f42"}, + {file = "yarl-1.9.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d8b889777de69897406c9fb0b76cdf2fd0f31267861ae7501d93003d55f54fbe"}, + {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:03caa9507d3d3c83bca08650678e25364e1843b484f19986a527630ca376ecce"}, + {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4e9035df8d0880b2f1c7f5031f33f69e071dfe72ee9310cfc76f7b605958ceb9"}, + {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:c0ec0ed476f77db9fb29bca17f0a8fcc7bc97ad4c6c1d8959c507decb22e8572"}, + {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:ee04010f26d5102399bd17f8df8bc38dc7ccd7701dc77f4a68c5b8d733406958"}, + {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49a180c2e0743d5d6e0b4d1a9e5f633c62eca3f8a86ba5dd3c471060e352ca98"}, + {file = "yarl-1.9.4-cp311-cp311-win32.whl", hash = "sha256:81eb57278deb6098a5b62e88ad8281b2ba09f2f1147c4767522353eaa6260b31"}, + {file = "yarl-1.9.4-cp311-cp311-win_amd64.whl", hash = "sha256:d1d2532b340b692880261c15aee4dc94dd22ca5d61b9db9a8a361953d36410b1"}, + {file = "yarl-1.9.4-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0d2454f0aef65ea81037759be5ca9947539667eecebca092733b2eb43c965a81"}, + {file = "yarl-1.9.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:44d8ffbb9c06e5a7f529f38f53eda23e50d1ed33c6c869e01481d3fafa6b8142"}, + {file = "yarl-1.9.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aaaea1e536f98754a6e5c56091baa1b6ce2f2700cc4a00b0d49eca8dea471074"}, + {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3777ce5536d17989c91696db1d459574e9a9bd37660ea7ee4d3344579bb6f129"}, + {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9fc5fc1eeb029757349ad26bbc5880557389a03fa6ada41703db5e068881e5f2"}, + {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ea65804b5dc88dacd4a40279af0cdadcfe74b3e5b4c897aa0d81cf86927fee78"}, + {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa102d6d280a5455ad6a0f9e6d769989638718e938a6a0a2ff3f4a7ff8c62cc4"}, + {file = "yarl-1.9.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09efe4615ada057ba2d30df871d2f668af661e971dfeedf0c159927d48bbeff0"}, + {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:008d3e808d03ef28542372d01057fd09168419cdc8f848efe2804f894ae03e51"}, + {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:6f5cb257bc2ec58f437da2b37a8cd48f666db96d47b8a3115c29f316313654ff"}, + {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:992f18e0ea248ee03b5a6e8b3b4738850ae7dbb172cc41c966462801cbf62cf7"}, + {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:0e9d124c191d5b881060a9e5060627694c3bdd1fe24c5eecc8d5d7d0eb6faabc"}, + {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3986b6f41ad22988e53d5778f91855dc0399b043fc8946d4f2e68af22ee9ff10"}, + {file = "yarl-1.9.4-cp312-cp312-win32.whl", hash = "sha256:4b21516d181cd77ebd06ce160ef8cc2a5e9ad35fb1c5930882baff5ac865eee7"}, + {file = "yarl-1.9.4-cp312-cp312-win_amd64.whl", hash = "sha256:a9bd00dc3bc395a662900f33f74feb3e757429e545d831eef5bb280252631984"}, + {file = "yarl-1.9.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:63b20738b5aac74e239622d2fe30df4fca4942a86e31bf47a81a0e94c14df94f"}, + {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7d7f7de27b8944f1fee2c26a88b4dabc2409d2fea7a9ed3df79b67277644e17"}, + {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c74018551e31269d56fab81a728f683667e7c28c04e807ba08f8c9e3bba32f14"}, + {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ca06675212f94e7a610e85ca36948bb8fc023e458dd6c63ef71abfd482481aa5"}, + {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5aef935237d60a51a62b86249839b51345f47564208c6ee615ed2a40878dccdd"}, + {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b134fd795e2322b7684155b7855cc99409d10b2e408056db2b93b51a52accc7"}, + {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d25039a474c4c72a5ad4b52495056f843a7ff07b632c1b92ea9043a3d9950f6e"}, + {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f7d6b36dd2e029b6bcb8a13cf19664c7b8e19ab3a58e0fefbb5b8461447ed5ec"}, + {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:957b4774373cf6f709359e5c8c4a0af9f6d7875db657adb0feaf8d6cb3c3964c"}, + {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:d7eeb6d22331e2fd42fce928a81c697c9ee2d51400bd1a28803965883e13cead"}, + {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:6a962e04b8f91f8c4e5917e518d17958e3bdee71fd1d8b88cdce74dd0ebbf434"}, + {file = "yarl-1.9.4-cp37-cp37m-win32.whl", hash = "sha256:f3bc6af6e2b8f92eced34ef6a96ffb248e863af20ef4fde9448cc8c9b858b749"}, + {file = "yarl-1.9.4-cp37-cp37m-win_amd64.whl", hash = "sha256:ad4d7a90a92e528aadf4965d685c17dacff3df282db1121136c382dc0b6014d2"}, + {file = "yarl-1.9.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ec61d826d80fc293ed46c9dd26995921e3a82146feacd952ef0757236fc137be"}, + {file = "yarl-1.9.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8be9e837ea9113676e5754b43b940b50cce76d9ed7d2461df1af39a8ee674d9f"}, + {file = "yarl-1.9.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bef596fdaa8f26e3d66af846bbe77057237cb6e8efff8cd7cc8dff9a62278bbf"}, + {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d47552b6e52c3319fede1b60b3de120fe83bde9b7bddad11a69fb0af7db32f1"}, + {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84fc30f71689d7fc9168b92788abc977dc8cefa806909565fc2951d02f6b7d57"}, + {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4aa9741085f635934f3a2583e16fcf62ba835719a8b2b28fb2917bb0537c1dfa"}, + {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:206a55215e6d05dbc6c98ce598a59e6fbd0c493e2de4ea6cc2f4934d5a18d130"}, + {file = "yarl-1.9.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07574b007ee20e5c375a8fe4a0789fad26db905f9813be0f9fef5a68080de559"}, + {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5a2e2433eb9344a163aced6a5f6c9222c0786e5a9e9cac2c89f0b28433f56e23"}, + {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:6ad6d10ed9b67a382b45f29ea028f92d25bc0bc1daf6c5b801b90b5aa70fb9ec"}, + {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:6fe79f998a4052d79e1c30eeb7d6c1c1056ad33300f682465e1b4e9b5a188b78"}, + {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:a825ec844298c791fd28ed14ed1bffc56a98d15b8c58a20e0e08c1f5f2bea1be"}, + {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8619d6915b3b0b34420cf9b2bb6d81ef59d984cb0fde7544e9ece32b4b3043c3"}, + {file = "yarl-1.9.4-cp38-cp38-win32.whl", hash = "sha256:686a0c2f85f83463272ddffd4deb5e591c98aac1897d65e92319f729c320eece"}, + {file = "yarl-1.9.4-cp38-cp38-win_amd64.whl", hash = "sha256:a00862fb23195b6b8322f7d781b0dc1d82cb3bcac346d1e38689370cc1cc398b"}, + {file = "yarl-1.9.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:604f31d97fa493083ea21bd9b92c419012531c4e17ea6da0f65cacdcf5d0bd27"}, + {file = "yarl-1.9.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8a854227cf581330ffa2c4824d96e52ee621dd571078a252c25e3a3b3d94a1b1"}, + {file = "yarl-1.9.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ba6f52cbc7809cd8d74604cce9c14868306ae4aa0282016b641c661f981a6e91"}, + {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a6327976c7c2f4ee6816eff196e25385ccc02cb81427952414a64811037bbc8b"}, + {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8397a3817d7dcdd14bb266283cd1d6fc7264a48c186b986f32e86d86d35fbac5"}, + {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e0381b4ce23ff92f8170080c97678040fc5b08da85e9e292292aba67fdac6c34"}, + {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23d32a2594cb5d565d358a92e151315d1b2268bc10f4610d098f96b147370136"}, + {file = "yarl-1.9.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ddb2a5c08a4eaaba605340fdee8fc08e406c56617566d9643ad8bf6852778fc7"}, + {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:26a1dc6285e03f3cc9e839a2da83bcbf31dcb0d004c72d0730e755b33466c30e"}, + {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:18580f672e44ce1238b82f7fb87d727c4a131f3a9d33a5e0e82b793362bf18b4"}, + {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:29e0f83f37610f173eb7e7b5562dd71467993495e568e708d99e9d1944f561ec"}, + {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:1f23e4fe1e8794f74b6027d7cf19dc25f8b63af1483d91d595d4a07eca1fb26c"}, + {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:db8e58b9d79200c76956cefd14d5c90af54416ff5353c5bfd7cbe58818e26ef0"}, + {file = "yarl-1.9.4-cp39-cp39-win32.whl", hash = "sha256:c7224cab95645c7ab53791022ae77a4509472613e839dab722a72abe5a684575"}, + {file = "yarl-1.9.4-cp39-cp39-win_amd64.whl", hash = "sha256:824d6c50492add5da9374875ce72db7a0733b29c2394890aef23d533106e2b15"}, + {file = "yarl-1.9.4-py3-none-any.whl", hash = "sha256:928cecb0ef9d5a7946eb6ff58417ad2fe9375762382f1bf5c55e61645f2c43ad"}, + {file = "yarl-1.9.4.tar.gz", hash = "sha256:566db86717cf8080b99b58b083b773a908ae40f06681e87e589a976faf8246bf"}, +] + +[package.dependencies] +idna = ">=2.0" +multidict = ">=4.0" + +[[package]] +name = "zipp" +version = "3.19.2" +description = "Backport of pathlib-compatible object wrapper for zip files" +optional = false +python-versions = ">=3.8" +files = [ + {file = "zipp-3.19.2-py3-none-any.whl", hash = "sha256:f091755f667055f2d02b32c53771a7a6c8b47e1fdbc4b72a8b9072b3eef8015c"}, + {file = "zipp-3.19.2.tar.gz", hash = "sha256:bf1dcf6450f873a13e952a29504887c89e6de7506209e5b1bcc3460135d4de19"}, +] + +[package.extras] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"] + +[extras] +all = ["moviepy", "opencv-python", "sentence-transformers", "tiktoken"] +ingest-movies = ["moviepy", "opencv-python"] +local-embedding = ["sentence-transformers"] + +[metadata] +lock-version = "2.0" +python-versions = ">=3.9,<3.13" +content-hash = "a11aaf6da08f70ddd083f7b3d50a1a57cda078e91b23afe97077774b3483aaac" diff --git a/R2R/pyproject.toml b/R2R/pyproject.toml new file mode 100755 index 00000000..153e2cd4 --- /dev/null +++ b/R2R/pyproject.toml @@ -0,0 +1,108 @@ +[build-system] +requires = ["poetry-core", "setuptools", "wheel"] +build-backend = "poetry.core.masonry.api" + +[tool.poetry] +name = "r2r" +version = "0.2.59" +description = "SciPhi R2R" +authors = ["Owen Colegrove <owen@sciphi.ai>"] +license = "MIT" +readme = "README.md" +include = ["config.json", "compose.yaml", "compose.neo4j.yaml"] + +[tool.poetry.dependencies] +# Python Versions +python = ">=3.9,<3.13" + +# Required dependencies + +# python +pydantic = "^2.6.3" +python-multipart = "^0.0.9" + +# infrastructure +fastapi = "^0.109.2" +fire = "^0.5.0" +gunicorn = "^21.2.0" +requests = "^2.31.0" +types-requests = "^2.31.0" +uvicorn = "^0.27.0.post1" + +# TODO - Make some of these optional +# async db providers +aiosqlite = "^0.20.0" +asyncpg = "^0.29.0" +redis = "^5.0.4" + +# ingestion +beautifulsoup4 = "^4.12.3" +openpyxl = "^3.1.2" +markdown = "^3.6" +pypdf = "^4.2.0" +python-pptx = "^0.6.23" +python-docx = "^1.1.0" +nest-asyncio = "^1.6.0" +opencv-python = { version = "^4.10.0.82", optional = true } +moviepy = { version = "^1.0.3", optional = true } + +# embedding providers +tiktoken = {version = "^0.5.2", optional = true} +sentence-transformers = {version = "^2.7.0", optional = true} + +# vector db providers +vecs = "^0.4.0" + +# llm providers +litellm = "^1.35.18" +openai = "^1.11.1" + +# integrations +fsspec = "^2024.6.0" +posthog = "^3.5.0" +sqlalchemy = "^2.0.30" +ollama = "^0.2.1" +neo4j = "^5.21.0" + +[tool.poetry.extras] +all = ["tiktoken", "sentence-transformers", "moviepy", "opencv-python"] +local-embedding = ["sentence-transformers"] +ingest-movies = ["moviepy", "opencv-python"] + +[tool.poetry.group.dev.dependencies] +black = "^24.3.0" +codecov = "^2.1.13" +flake8 = "6.1.0" +isort = "5.12.0" +mypy = "^1.5.1" +pre-commit = "^2.9" +pytest = "^8.2.0" +pytest-asyncio = "^0.23.6" +pytest-dependency = "^0.6.0" +pytest-mock = "^3.14.0" +pytest-cov = "^5.0.0" + +[tool.poetry.scripts] +r2r = "r2r.cli.cli:main" + +[tool.black] +line-length = 79 + +[tool.mypy] +ignore_missing_imports = true +exclude = 'playground/.*|deprecated/.*|dump/.*|docs/source|vecs/*' + +[[tool.mypy.overrides]] +module = "yaml" +ignore_missing_imports = true + +[tool.pytest.ini_options] +asyncio_mode = "auto" +addopts = "--cov=r2r --cov-report=term-missing --cov-report=xml" +testpaths = [ + "tests", +] +filterwarnings = [ + "ignore::DeprecationWarning", + "ignore::pytest.PytestUnraisableExceptionWarning", +] diff --git a/R2R/r2r/__init__.py b/R2R/r2r/__init__.py new file mode 100755 index 00000000..492cc13a --- /dev/null +++ b/R2R/r2r/__init__.py @@ -0,0 +1,110 @@ +import logging + +# Keep '*' imports for enhanced development velocity +# corresponding flake8 error codes are F403, F405 +from .base import * +from .integrations import * +from .main import * +from .parsers import * +from .pipelines import * +from .pipes import * +from .prompts import * + +logger = logging.getLogger("r2r") +logger.setLevel(logging.INFO) + +# Create a console handler and set the level to info +ch = logging.StreamHandler() +ch.setLevel(logging.INFO) + +# Create a formatter and set it for the handler +formatter = logging.Formatter( + "%(asctime)s - %(levelname)s - %(name)s - %(message)s" +) +ch.setFormatter(formatter) + +# Add the handler to the logger +logger.addHandler(ch) + +# Optional: Prevent propagation to the root logger +logger.propagate = False + +__all__ = [ + "R2RException", + "LoggingConfig", + "LocalKVLoggingProvider", + "PostgresLoggingConfig", + "PostgresKVLoggingProvider", + "RedisLoggingConfig", + "RedisKVLoggingProvider", + "KVLoggingSingleton", + "VectorEntry", + "VectorType", + "Vector", + "VectorSearchRequest", + "VectorSearchResult", + "AsyncPipe", + "PipeType", + "AsyncState", + "Prompt", + "DataType", + "DocumentType", + "Document", + "Extraction", + "ExtractionType", + "Fragment", + "FragmentType", + "SearchPipe", + # Parsers + "AsyncParser", + "CSVParser", + "DOCXParser", + "HTMLParser", + "JSONParser", + "MDParser", + "PDFParser", + "PPTParser", + "TextParser", + "XLSXParser", + "AsyncPipeline", + # Providers + "EmbeddingConfig", + "EmbeddingProvider", + "EvalConfig", + "EvalProvider", + "LLMEvalProvider", + "PromptConfig", + "PromptProvider", + "GenerationConfig", + "LLMChatCompletion", + "LLMChatCompletionChunk", + "LLMConfig", + "LLMProvider", + "VectorDBConfig", + "VectorDBProvider", + "R2RConfig", + "TextSplitter", + "RecursiveCharacterTextSplitter", + "generate_run_id", + "generate_id_from_label", + "R2REngine", + # Pipes + "EmbeddingPipe", + "EvalPipe", + "ParsingPipe", + "QueryTransformPipe", + "SearchRAGPipe", + "StreamingSearchRAGPipe", + "VectorSearchPipe", + "VectorStoragePipe", + "R2RPromptProvider", + "WebSearchPipe", + "R2RBuilder", + "R2R", + "KGAgentSearchPipe", + # Prebuilts + "MultiSearchPipe", + "R2RPipeFactoryWithMultiSearch", + # Integrations + "SerperClient", +] diff --git a/R2R/r2r/base/__init__.py b/R2R/r2r/base/__init__.py new file mode 100755 index 00000000..a6794a84 --- /dev/null +++ b/R2R/r2r/base/__init__.py @@ -0,0 +1,160 @@ +from .abstractions.base import AsyncSyncMeta, UserStats, syncable +from .abstractions.document import ( + DataType, + Document, + DocumentInfo, + DocumentType, + Entity, + Extraction, + ExtractionType, + Fragment, + FragmentType, + KGExtraction, + Triple, + extract_entities, + extract_triples, +) +from .abstractions.exception import R2RDocumentProcessingError, R2RException +from .abstractions.llama_abstractions import VectorStoreQuery +from .abstractions.llm import ( + GenerationConfig, + LLMChatCompletion, + LLMChatCompletionChunk, + RAGCompletion, +) +from .abstractions.prompt import Prompt +from .abstractions.search import ( + AggregateSearchResult, + KGSearchRequest, + KGSearchResult, + KGSearchSettings, + VectorSearchRequest, + VectorSearchResult, + VectorSearchSettings, +) +from .abstractions.vector import Vector, VectorEntry, VectorType +from .logging.kv_logger import ( + KVLoggingSingleton, + LocalKVLoggingProvider, + LoggingConfig, + PostgresKVLoggingProvider, + PostgresLoggingConfig, + RedisKVLoggingProvider, + RedisLoggingConfig, +) +from .logging.log_processor import ( + AnalysisTypes, + FilterCriteria, + LogAnalytics, + LogAnalyticsConfig, + LogProcessor, +) +from .logging.run_manager import RunManager, manage_run +from .parsers import AsyncParser +from .pipeline.base_pipeline import AsyncPipeline +from .pipes.base_pipe import AsyncPipe, AsyncState, PipeType +from .providers.embedding_provider import EmbeddingConfig, EmbeddingProvider +from .providers.eval_provider import EvalConfig, EvalProvider +from .providers.kg_provider import KGConfig, KGProvider, update_kg_prompt +from .providers.llm_provider import LLMConfig, LLMProvider +from .providers.prompt_provider import PromptConfig, PromptProvider +from .providers.vector_db_provider import VectorDBConfig, VectorDBProvider +from .utils import ( + EntityType, + RecursiveCharacterTextSplitter, + Relation, + TextSplitter, + format_entity_types, + format_relations, + generate_id_from_label, + generate_run_id, + increment_version, + run_pipeline, + to_async_generator, +) + +__all__ = [ + # Logging + "AsyncParser", + "AnalysisTypes", + "LogAnalytics", + "LogAnalyticsConfig", + "LogProcessor", + "LoggingConfig", + "LocalKVLoggingProvider", + "PostgresLoggingConfig", + "PostgresKVLoggingProvider", + "RedisLoggingConfig", + "AsyncSyncMeta", + "syncable", + "RedisKVLoggingProvider", + "KVLoggingSingleton", + "RunManager", + "manage_run", + # Abstractions + "VectorEntry", + "VectorType", + "Vector", + "VectorSearchRequest", + "VectorSearchResult", + "VectorSearchSettings", + "KGSearchRequest", + "KGSearchResult", + "KGSearchSettings", + "AggregateSearchResult", + "AsyncPipe", + "PipeType", + "AsyncState", + "AsyncPipe", + "Prompt", + "DataType", + "DocumentType", + "Document", + "DocumentInfo", + "Extraction", + "ExtractionType", + "Fragment", + "FragmentType", + "extract_entities", + "Entity", + "extract_triples", + "R2RException", + "R2RDocumentProcessingError", + "Triple", + "KGExtraction", + "UserStats", + # Pipelines + "AsyncPipeline", + # Providers + "EmbeddingConfig", + "EmbeddingProvider", + "EvalConfig", + "EvalProvider", + "PromptConfig", + "PromptProvider", + "GenerationConfig", + "RAGCompletion", + "VectorStoreQuery", + "LLMChatCompletion", + "LLMChatCompletionChunk", + "LLMConfig", + "LLMProvider", + "VectorDBConfig", + "VectorDBProvider", + "KGProvider", + "KGConfig", + "update_kg_prompt", + # Other + "FilterCriteria", + "TextSplitter", + "RecursiveCharacterTextSplitter", + "to_async_generator", + "EntityType", + "Relation", + "format_entity_types", + "format_relations", + "increment_version", + "run_pipeline", + "generate_run_id", + "generate_id_from_label", +] diff --git a/R2R/r2r/base/abstractions/__init__.py b/R2R/r2r/base/abstractions/__init__.py new file mode 100755 index 00000000..e69de29b --- /dev/null +++ b/R2R/r2r/base/abstractions/__init__.py diff --git a/R2R/r2r/base/abstractions/base.py b/R2R/r2r/base/abstractions/base.py new file mode 100755 index 00000000..7121f6ce --- /dev/null +++ b/R2R/r2r/base/abstractions/base.py @@ -0,0 +1,93 @@ +import asyncio +import uuid +from typing import List + +from pydantic import BaseModel + + +class UserStats(BaseModel): + user_id: uuid.UUID + num_files: int + total_size_in_bytes: int + document_ids: List[uuid.UUID] + + +class AsyncSyncMeta(type): + _event_loop = None # Class-level shared event loop + + @classmethod + def get_event_loop(cls): + if cls._event_loop is None or cls._event_loop.is_closed(): + cls._event_loop = asyncio.new_event_loop() + asyncio.set_event_loop(cls._event_loop) + return cls._event_loop + + def __new__(cls, name, bases, dct): + new_cls = super().__new__(cls, name, bases, dct) + for attr_name, attr_value in dct.items(): + if asyncio.iscoroutinefunction(attr_value) and getattr( + attr_value, "_syncable", False + ): + sync_method_name = attr_name[ + 1: + ] # Remove leading 'a' for sync method + async_method = attr_value + + def make_sync_method(async_method): + def sync_wrapper(self, *args, **kwargs): + loop = cls.get_event_loop() + if not loop.is_running(): + # Setup to run the loop in a background thread if necessary + # to prevent blocking the main thread in a synchronous call environment + from threading import Thread + + result = None + exception = None + + def run(): + nonlocal result, exception + try: + asyncio.set_event_loop(loop) + result = loop.run_until_complete( + async_method(self, *args, **kwargs) + ) + except Exception as e: + exception = e + finally: + generation_config = kwargs.get( + "rag_generation_config", None + ) + if ( + not generation_config + or not generation_config.stream + ): + loop.run_until_complete( + loop.shutdown_asyncgens() + ) + loop.close() + + thread = Thread(target=run) + thread.start() + thread.join() + if exception: + raise exception + return result + else: + # If there's already a running loop, schedule and execute the coroutine + future = asyncio.run_coroutine_threadsafe( + async_method(self, *args, **kwargs), loop + ) + return future.result() + + return sync_wrapper + + setattr( + new_cls, sync_method_name, make_sync_method(async_method) + ) + return new_cls + + +def syncable(func): + """Decorator to mark methods for synchronous wrapper creation.""" + func._syncable = True + return func diff --git a/R2R/r2r/base/abstractions/document.py b/R2R/r2r/base/abstractions/document.py new file mode 100755 index 00000000..117db7b9 --- /dev/null +++ b/R2R/r2r/base/abstractions/document.py @@ -0,0 +1,242 @@ +"""Abstractions for documents and their extractions.""" + +import base64 +import json +import logging +import uuid +from datetime import datetime +from enum import Enum +from typing import Optional, Union + +from pydantic import BaseModel, Field + +logger = logging.getLogger(__name__) + +DataType = Union[str, bytes] + + +class DocumentType(str, Enum): + """Types of documents that can be stored.""" + + CSV = "csv" + DOCX = "docx" + HTML = "html" + JSON = "json" + MD = "md" + PDF = "pdf" + PPTX = "pptx" + TXT = "txt" + XLSX = "xlsx" + GIF = "gif" + PNG = "png" + JPG = "jpg" + JPEG = "jpeg" + SVG = "svg" + MP3 = "mp3" + MP4 = "mp4" + + +class Document(BaseModel): + id: uuid.UUID = Field(default_factory=uuid.uuid4) + type: DocumentType + data: Union[str, bytes] + metadata: dict + + def __init__(self, *args, **kwargs): + data = kwargs.get("data") + if data and isinstance(data, str): + try: + # Try to decode if it's already base64 encoded + kwargs["data"] = base64.b64decode(data) + except: + # If it's not base64, encode it to bytes + kwargs["data"] = data.encode("utf-8") + + doc_type = kwargs.get("type") + if isinstance(doc_type, str): + kwargs["type"] = DocumentType(doc_type) + + # Generate UUID based on the hash of the data + if "id" not in kwargs: + if isinstance(kwargs["data"], bytes): + data_hash = uuid.uuid5( + uuid.NAMESPACE_DNS, kwargs["data"].decode("utf-8") + ) + else: + data_hash = uuid.uuid5(uuid.NAMESPACE_DNS, kwargs["data"]) + + kwargs["id"] = data_hash # Set the id based on the data hash + + super().__init__(*args, **kwargs) + + class Config: + arbitrary_types_allowed = True + json_encoders = { + uuid.UUID: str, + bytes: lambda v: base64.b64encode(v).decode("utf-8"), + } + + +class DocumentStatus(str, Enum): + """Status of document processing.""" + + PROCESSING = "processing" + # TODO - Extend support for `partial-failure` + # PARTIAL_FAILURE = "partial-failure" + FAILURE = "failure" + SUCCESS = "success" + + +class DocumentInfo(BaseModel): + """Base class for document information handling.""" + + document_id: uuid.UUID + version: str + size_in_bytes: int + metadata: dict + status: DocumentStatus = DocumentStatus.PROCESSING + + user_id: Optional[uuid.UUID] = None + title: Optional[str] = None + created_at: Optional[datetime] = None + updated_at: Optional[datetime] = None + + def convert_to_db_entry(self): + """Prepare the document info for database entry, extracting certain fields from metadata.""" + now = datetime.now() + metadata = self.metadata + if "user_id" in metadata: + metadata["user_id"] = str(metadata["user_id"]) + + metadata["title"] = metadata.get("title", "N/A") + return { + "document_id": str(self.document_id), + "title": metadata.get("title", "N/A"), + "user_id": metadata.get("user_id", None), + "version": self.version, + "size_in_bytes": self.size_in_bytes, + "metadata": json.dumps(self.metadata), + "created_at": self.created_at or now, + "updated_at": self.updated_at or now, + "status": self.status, + } + + +class ExtractionType(Enum): + """Types of extractions that can be performed.""" + + TXT = "txt" + IMG = "img" + MOV = "mov" + + +class Extraction(BaseModel): + """An extraction from a document.""" + + id: uuid.UUID + type: ExtractionType = ExtractionType.TXT + data: DataType + metadata: dict + document_id: uuid.UUID + + +class FragmentType(Enum): + """A type of fragment that can be extracted from a document.""" + + TEXT = "text" + IMAGE = "image" + + +class Fragment(BaseModel): + """A fragment extracted from a document.""" + + id: uuid.UUID + type: FragmentType + data: DataType + metadata: dict + document_id: uuid.UUID + extraction_id: uuid.UUID + + +class Entity(BaseModel): + """An entity extracted from a document.""" + + category: str + subcategory: Optional[str] = None + value: str + + def __str__(self): + return ( + f"{self.category}:{self.subcategory}:{self.value}" + if self.subcategory + else f"{self.category}:{self.value}" + ) + + +class Triple(BaseModel): + """A triple extracted from a document.""" + + subject: str + predicate: str + object: str + + +def extract_entities(llm_payload: list[str]) -> dict[str, Entity]: + entities = {} + for entry in llm_payload: + try: + if "], " in entry: # Check if the entry is an entity + entry_val = entry.split("], ")[0] + "]" + entry = entry.split("], ")[1] + colon_count = entry.count(":") + + if colon_count == 1: + category, value = entry.split(":") + subcategory = None + elif colon_count >= 2: + parts = entry.split(":", 2) + category, subcategory, value = ( + parts[0], + parts[1], + parts[2], + ) + else: + raise ValueError("Unexpected entry format") + + entities[entry_val] = Entity( + category=category, subcategory=subcategory, value=value + ) + except Exception as e: + logger.error(f"Error processing entity {entry}: {e}") + continue + return entities + + +def extract_triples( + llm_payload: list[str], entities: dict[str, Entity] +) -> list[Triple]: + triples = [] + for entry in llm_payload: + try: + if "], " not in entry: # Check if the entry is an entity + elements = entry.split(" ") + subject = elements[0] + predicate = elements[1] + object = " ".join(elements[2:]) + subject = entities[subject].value # Use entity.value + if "[" in object and "]" in object: + object = entities[object].value # Use entity.value + triples.append( + Triple(subject=subject, predicate=predicate, object=object) + ) + except Exception as e: + logger.error(f"Error processing triplet {entry}: {e}") + continue + return triples + + +class KGExtraction(BaseModel): + """An extraction from a document that is part of a knowledge graph.""" + + entities: dict[str, Entity] + triples: list[Triple] diff --git a/R2R/r2r/base/abstractions/exception.py b/R2R/r2r/base/abstractions/exception.py new file mode 100755 index 00000000..c76625a3 --- /dev/null +++ b/R2R/r2r/base/abstractions/exception.py @@ -0,0 +1,16 @@ +from typing import Any, Optional + + +class R2RException(Exception): + def __init__( + self, message: str, status_code: int, detail: Optional[Any] = None + ): + self.message = message + self.status_code = status_code + super().__init__(self.message) + + +class R2RDocumentProcessingError(R2RException): + def __init__(self, error_message, document_id): + self.document_id = document_id + super().__init__(error_message, 400, {"document_id": document_id}) diff --git a/R2R/r2r/base/abstractions/llama_abstractions.py b/R2R/r2r/base/abstractions/llama_abstractions.py new file mode 100755 index 00000000..f6bc36e6 --- /dev/null +++ b/R2R/r2r/base/abstractions/llama_abstractions.py @@ -0,0 +1,439 @@ +# abstractions are taken from LlamaIndex +# https://github.com/run-llama/llama_index +from abc import ABC, abstractmethod +from dataclasses import dataclass +from enum import Enum +from typing import Any, Dict, List, Optional, Tuple, Union + +from pydantic import BaseModel, Field, StrictFloat, StrictInt, StrictStr + + +class LabelledNode(BaseModel): + """An entity in a graph.""" + + label: str = Field(default="node", description="The label of the node.") + embedding: Optional[List[float]] = Field( + default=None, description="The embeddings of the node." + ) + properties: Dict[str, Any] = Field(default_factory=dict) + + @abstractmethod + def __str__(self) -> str: + """Return the string representation of the node.""" + ... + + @property + @abstractmethod + def id(self) -> str: + """Get the node id.""" + ... + + +class EntityNode(LabelledNode): + """An entity in a graph.""" + + name: str = Field(description="The name of the entity.") + label: str = Field(default="entity", description="The label of the node.") + properties: Dict[str, Any] = Field(default_factory=dict) + + def __str__(self) -> str: + """Return the string representation of the node.""" + return self.name + + @property + def id(self) -> str: + """Get the node id.""" + return self.name.replace('"', " ") + + +class ChunkNode(LabelledNode): + """A text chunk in a graph.""" + + text: str = Field(description="The text content of the chunk.") + id_: Optional[str] = Field( + default=None, + description="The id of the node. Defaults to a hash of the text.", + ) + label: str = Field( + default="text_chunk", description="The label of the node." + ) + properties: Dict[str, Any] = Field(default_factory=dict) + + def __str__(self) -> str: + """Return the string representation of the node.""" + return self.text + + @property + def id(self) -> str: + """Get the node id.""" + return str(hash(self.text)) if self.id_ is None else self.id_ + + +class Relation(BaseModel): + """A relation connecting two entities in a graph.""" + + label: str + source_id: str + target_id: str + properties: Dict[str, Any] = Field(default_factory=dict) + + def __str__(self) -> str: + """Return the string representation of the relation.""" + return self.label + + @property + def id(self) -> str: + """Get the relation id.""" + return self.label + + +Triplet = Tuple[LabelledNode, Relation, LabelledNode] + + +class VectorStoreQueryMode(str, Enum): + """Vector store query mode.""" + + DEFAULT = "default" + SPARSE = "sparse" + HYBRID = "hybrid" + TEXT_SEARCH = "text_search" + SEMANTIC_HYBRID = "semantic_hybrid" + + # fit learners + SVM = "svm" + LOGISTIC_REGRESSION = "logistic_regression" + LINEAR_REGRESSION = "linear_regression" + + # maximum marginal relevance + MMR = "mmr" + + +class FilterOperator(str, Enum): + """Vector store filter operator.""" + + # TODO add more operators + EQ = "==" # default operator (string, int, float) + GT = ">" # greater than (int, float) + LT = "<" # less than (int, float) + NE = "!=" # not equal to (string, int, float) + GTE = ">=" # greater than or equal to (int, float) + LTE = "<=" # less than or equal to (int, float) + IN = "in" # In array (string or number) + NIN = "nin" # Not in array (string or number) + ANY = "any" # Contains any (array of strings) + ALL = "all" # Contains all (array of strings) + TEXT_MATCH = "text_match" # full text match (allows you to search for a specific substring, token or phrase within the text field) + CONTAINS = "contains" # metadata array contains value (string or number) + + +class MetadataFilter(BaseModel): + """Comprehensive metadata filter for vector stores to support more operators. + + Value uses Strict* types, as int, float and str are compatible types and were all + converted to string before. + + See: https://docs.pydantic.dev/latest/usage/types/#strict-types + """ + + key: str + value: Union[ + StrictInt, + StrictFloat, + StrictStr, + List[StrictStr], + List[StrictFloat], + List[StrictInt], + ] + operator: FilterOperator = FilterOperator.EQ + + @classmethod + def from_dict( + cls, + filter_dict: Dict, + ) -> "MetadataFilter": + """Create MetadataFilter from dictionary. + + Args: + filter_dict: Dict with key, value and operator. + + """ + return MetadataFilter.parse_obj(filter_dict) + + +# # TODO: Deprecate ExactMatchFilter and use MetadataFilter instead +# # Keep class for now so that AutoRetriever can still work with old vector stores +# class ExactMatchFilter(BaseModel): +# key: str +# value: Union[StrictInt, StrictFloat, StrictStr] + +# set ExactMatchFilter to MetadataFilter +ExactMatchFilter = MetadataFilter + + +class FilterCondition(str, Enum): + """Vector store filter conditions to combine different filters.""" + + # TODO add more conditions + AND = "and" + OR = "or" + + +class MetadataFilters(BaseModel): + """Metadata filters for vector stores.""" + + # Exact match filters and Advanced filters with operators like >, <, >=, <=, !=, etc. + filters: List[Union[MetadataFilter, ExactMatchFilter, "MetadataFilters"]] + # and/or such conditions for combining different filters + condition: Optional[FilterCondition] = FilterCondition.AND + + +@dataclass +class VectorStoreQuery: + """Vector store query.""" + + query_embedding: Optional[List[float]] = None + similarity_top_k: int = 1 + doc_ids: Optional[List[str]] = None + node_ids: Optional[List[str]] = None + query_str: Optional[str] = None + output_fields: Optional[List[str]] = None + embedding_field: Optional[str] = None + + mode: VectorStoreQueryMode = VectorStoreQueryMode.DEFAULT + + # NOTE: only for hybrid search (0 for bm25, 1 for vector search) + alpha: Optional[float] = None + + # metadata filters + filters: Optional[MetadataFilters] = None + + # only for mmr + mmr_threshold: Optional[float] = None + + # NOTE: currently only used by postgres hybrid search + sparse_top_k: Optional[int] = None + # NOTE: return top k results from hybrid search. similarity_top_k is used for dense search top k + hybrid_top_k: Optional[int] = None + + +class PropertyGraphStore(ABC): + """Abstract labelled graph store protocol. + + This protocol defines the interface for a graph store, which is responsible + for storing and retrieving knowledge graph data. + + Attributes: + client: Any: The client used to connect to the graph store. + get: Callable[[str], List[List[str]]]: Get triplets for a given subject. + get_rel_map: Callable[[Optional[List[str]], int], Dict[str, List[List[str]]]]: + Get subjects' rel map in max depth. + upsert_triplet: Callable[[str, str, str], None]: Upsert a triplet. + delete: Callable[[str, str, str], None]: Delete a triplet. + persist: Callable[[str, Optional[fsspec.AbstractFileSystem]], None]: + Persist the graph store to a file. + """ + + supports_structured_queries: bool = False + supports_vector_queries: bool = False + + @property + def client(self) -> Any: + """Get client.""" + ... + + @abstractmethod + def get( + self, + properties: Optional[dict] = None, + ids: Optional[List[str]] = None, + ) -> List[LabelledNode]: + """Get nodes with matching values.""" + ... + + @abstractmethod + def get_triplets( + self, + entity_names: Optional[List[str]] = None, + relation_names: Optional[List[str]] = None, + properties: Optional[dict] = None, + ids: Optional[List[str]] = None, + ) -> List[Triplet]: + """Get triplets with matching values.""" + ... + + @abstractmethod + def get_rel_map( + self, + graph_nodes: List[LabelledNode], + depth: int = 2, + limit: int = 30, + ignore_rels: Optional[List[str]] = None, + ) -> List[Triplet]: + """Get depth-aware rel map.""" + ... + + @abstractmethod + def upsert_nodes(self, nodes: List[LabelledNode]) -> None: + """Upsert nodes.""" + ... + + @abstractmethod + def upsert_relations(self, relations: List[Relation]) -> None: + """Upsert relations.""" + ... + + @abstractmethod + def delete( + self, + entity_names: Optional[List[str]] = None, + relation_names: Optional[List[str]] = None, + properties: Optional[dict] = None, + ids: Optional[List[str]] = None, + ) -> None: + """Delete matching data.""" + ... + + @abstractmethod + def structured_query( + self, query: str, param_map: Optional[Dict[str, Any]] = None + ) -> Any: + """Query the graph store with statement and parameters.""" + ... + + @abstractmethod + def vector_query( + self, query: VectorStoreQuery, **kwargs: Any + ) -> Tuple[List[LabelledNode], List[float]]: + """Query the graph store with a vector store query.""" + ... + + # def persist( + # self, persist_path: str, fs: Optional[fsspec.AbstractFileSystem] = None + # ) -> None: + # """Persist the graph store to a file.""" + # return + + def get_schema(self, refresh: bool = False) -> Any: + """Get the schema of the graph store.""" + return None + + def get_schema_str(self, refresh: bool = False) -> str: + """Get the schema of the graph store as a string.""" + return str(self.get_schema(refresh=refresh)) + + ### ----- Async Methods ----- ### + + async def aget( + self, + properties: Optional[dict] = None, + ids: Optional[List[str]] = None, + ) -> List[LabelledNode]: + """Asynchronously get nodes with matching values.""" + return self.get(properties, ids) + + async def aget_triplets( + self, + entity_names: Optional[List[str]] = None, + relation_names: Optional[List[str]] = None, + properties: Optional[dict] = None, + ids: Optional[List[str]] = None, + ) -> List[Triplet]: + """Asynchronously get triplets with matching values.""" + return self.get_triplets(entity_names, relation_names, properties, ids) + + async def aget_rel_map( + self, + graph_nodes: List[LabelledNode], + depth: int = 2, + limit: int = 30, + ignore_rels: Optional[List[str]] = None, + ) -> List[Triplet]: + """Asynchronously get depth-aware rel map.""" + return self.get_rel_map(graph_nodes, depth, limit, ignore_rels) + + async def aupsert_nodes(self, nodes: List[LabelledNode]) -> None: + """Asynchronously add nodes.""" + return self.upsert_nodes(nodes) + + async def aupsert_relations(self, relations: List[Relation]) -> None: + """Asynchronously add relations.""" + return self.upsert_relations(relations) + + async def adelete( + self, + entity_names: Optional[List[str]] = None, + relation_names: Optional[List[str]] = None, + properties: Optional[dict] = None, + ids: Optional[List[str]] = None, + ) -> None: + """Asynchronously delete matching data.""" + return self.delete(entity_names, relation_names, properties, ids) + + async def astructured_query( + self, query: str, param_map: Optional[Dict[str, Any]] = {} + ) -> Any: + """Asynchronously query the graph store with statement and parameters.""" + return self.structured_query(query, param_map) + + async def avector_query( + self, query: VectorStoreQuery, **kwargs: Any + ) -> Tuple[List[LabelledNode], List[float]]: + """Asynchronously query the graph store with a vector store query.""" + return self.vector_query(query, **kwargs) + + async def aget_schema(self, refresh: bool = False) -> str: + """Asynchronously get the schema of the graph store.""" + return self.get_schema(refresh=refresh) + + async def aget_schema_str(self, refresh: bool = False) -> str: + """Asynchronously get the schema of the graph store as a string.""" + return str(await self.aget_schema(refresh=refresh)) + + +LIST_LIMIT = 128 + + +def clean_string_values(text: str) -> str: + return text.replace("\n", " ").replace("\r", " ") + + +def value_sanitize(d: Any) -> Any: + """Sanitize the input dictionary or list. + + Sanitizes the input by removing embedding-like values, + lists with more than 128 elements, that are mostly irrelevant for + generating answers in a LLM context. These properties, if left in + results, can occupy significant context space and detract from + the LLM's performance by introducing unnecessary noise and cost. + """ + if isinstance(d, dict): + new_dict = {} + for key, value in d.items(): + if isinstance(value, dict): + sanitized_value = value_sanitize(value) + if ( + sanitized_value is not None + ): # Check if the sanitized value is not None + new_dict[key] = sanitized_value + elif isinstance(value, list): + if len(value) < LIST_LIMIT: + sanitized_value = value_sanitize(value) + if ( + sanitized_value is not None + ): # Check if the sanitized value is not None + new_dict[key] = sanitized_value + # Do not include the key if the list is oversized + else: + new_dict[key] = value + return new_dict + elif isinstance(d, list): + if len(d) < LIST_LIMIT: + return [ + value_sanitize(item) + for item in d + if value_sanitize(item) is not None + ] + else: + return None + else: + return d diff --git a/R2R/r2r/base/abstractions/llm.py b/R2R/r2r/base/abstractions/llm.py new file mode 100755 index 00000000..3178d8dc --- /dev/null +++ b/R2R/r2r/base/abstractions/llm.py @@ -0,0 +1,112 @@ +"""Abstractions for the LLM model.""" + +from typing import TYPE_CHECKING, ClassVar, Optional + +from openai.types.chat import ChatCompletion, ChatCompletionChunk +from pydantic import BaseModel, Field + +if TYPE_CHECKING: + from .search import AggregateSearchResult + +LLMChatCompletion = ChatCompletion +LLMChatCompletionChunk = ChatCompletionChunk + + +class RAGCompletion: + completion: LLMChatCompletion + search_results: "AggregateSearchResult" + + def __init__( + self, + completion: LLMChatCompletion, + search_results: "AggregateSearchResult", + ): + self.completion = completion + self.search_results = search_results + + +class GenerationConfig(BaseModel): + _defaults: ClassVar[dict] = { + "model": "gpt-4o", + "temperature": 0.1, + "top_p": 1.0, + "top_k": 100, + "max_tokens_to_sample": 1024, + "stream": False, + "functions": None, + "skip_special_tokens": False, + "stop_token": None, + "num_beams": 1, + "do_sample": True, + "generate_with_chat": False, + "add_generation_kwargs": None, + "api_base": None, + } + + model: str = Field( + default_factory=lambda: GenerationConfig._defaults["model"] + ) + temperature: float = Field( + default_factory=lambda: GenerationConfig._defaults["temperature"] + ) + top_p: float = Field( + default_factory=lambda: GenerationConfig._defaults["top_p"] + ) + top_k: int = Field( + default_factory=lambda: GenerationConfig._defaults["top_k"] + ) + max_tokens_to_sample: int = Field( + default_factory=lambda: GenerationConfig._defaults[ + "max_tokens_to_sample" + ] + ) + stream: bool = Field( + default_factory=lambda: GenerationConfig._defaults["stream"] + ) + functions: Optional[list[dict]] = Field( + default_factory=lambda: GenerationConfig._defaults["functions"] + ) + skip_special_tokens: bool = Field( + default_factory=lambda: GenerationConfig._defaults[ + "skip_special_tokens" + ] + ) + stop_token: Optional[str] = Field( + default_factory=lambda: GenerationConfig._defaults["stop_token"] + ) + num_beams: int = Field( + default_factory=lambda: GenerationConfig._defaults["num_beams"] + ) + do_sample: bool = Field( + default_factory=lambda: GenerationConfig._defaults["do_sample"] + ) + generate_with_chat: bool = Field( + default_factory=lambda: GenerationConfig._defaults[ + "generate_with_chat" + ] + ) + add_generation_kwargs: Optional[dict] = Field( + default_factory=lambda: GenerationConfig._defaults[ + "add_generation_kwargs" + ] + ) + api_base: Optional[str] = Field( + default_factory=lambda: GenerationConfig._defaults["api_base"] + ) + + @classmethod + def set_default(cls, **kwargs): + for key, value in kwargs.items(): + if key in cls._defaults: + cls._defaults[key] = value + else: + raise AttributeError( + f"No default attribute '{key}' in GenerationConfig" + ) + + def __init__(self, **data): + model = data.pop("model", None) + if model is not None: + super().__init__(model=model, **data) + else: + super().__init__(**data) diff --git a/R2R/r2r/base/abstractions/prompt.py b/R2R/r2r/base/abstractions/prompt.py new file mode 100755 index 00000000..e37eeb5f --- /dev/null +++ b/R2R/r2r/base/abstractions/prompt.py @@ -0,0 +1,31 @@ +"""Abstraction for a prompt that can be formatted with inputs.""" + +from typing import Any + +from pydantic import BaseModel + + +class Prompt(BaseModel): + """A prompt that can be formatted with inputs.""" + + name: str + template: str + input_types: dict[str, str] + + def format_prompt(self, inputs: dict[str, Any]) -> str: + self._validate_inputs(inputs) + return self.template.format(**inputs) + + def _validate_inputs(self, inputs: dict[str, Any]) -> None: + for var, expected_type_name in self.input_types.items(): + expected_type = self._convert_type(expected_type_name) + if var not in inputs: + raise ValueError(f"Missing input: {var}") + if not isinstance(inputs[var], expected_type): + raise TypeError( + f"Input '{var}' must be of type {expected_type.__name__}, got {type(inputs[var]).__name__} instead." + ) + + def _convert_type(self, type_name: str) -> type: + type_mapping = {"int": int, "str": str} + return type_mapping.get(type_name, str) diff --git a/R2R/r2r/base/abstractions/search.py b/R2R/r2r/base/abstractions/search.py new file mode 100755 index 00000000..b13cc5aa --- /dev/null +++ b/R2R/r2r/base/abstractions/search.py @@ -0,0 +1,84 @@ +"""Abstractions for search functionality.""" + +import uuid +from typing import Any, Dict, List, Optional, Tuple + +from pydantic import BaseModel, Field + +from .llm import GenerationConfig + + +class VectorSearchRequest(BaseModel): + """Request for a search operation.""" + + query: str + limit: int + filters: Optional[dict[str, Any]] = None + + +class VectorSearchResult(BaseModel): + """Result of a search operation.""" + + id: uuid.UUID + score: float + metadata: dict[str, Any] + + def __str__(self) -> str: + return f"VectorSearchResult(id={self.id}, score={self.score}, metadata={self.metadata})" + + def __repr__(self) -> str: + return f"VectorSearchResult(id={self.id}, score={self.score}, metadata={self.metadata})" + + def dict(self) -> dict: + return { + "id": self.id, + "score": self.score, + "metadata": self.metadata, + } + + +class KGSearchRequest(BaseModel): + """Request for a knowledge graph search operation.""" + + query: str + + +# [query, ...] +KGSearchResult = List[Tuple[str, List[Dict[str, Any]]]] + + +class AggregateSearchResult(BaseModel): + """Result of an aggregate search operation.""" + + vector_search_results: Optional[List[VectorSearchResult]] + kg_search_results: Optional[KGSearchResult] = None + + def __str__(self) -> str: + return f"AggregateSearchResult(vector_search_results={self.vector_search_results}, kg_search_results={self.kg_search_results})" + + def __repr__(self) -> str: + return f"AggregateSearchResult(vector_search_results={self.vector_search_results}, kg_search_results={self.kg_search_results})" + + def dict(self) -> dict: + return { + "vector_search_results": ( + [result.dict() for result in self.vector_search_results] + if self.vector_search_results + else [] + ), + "kg_search_results": self.kg_search_results or [], + } + + +class VectorSearchSettings(BaseModel): + use_vector_search: bool = True + search_filters: dict[str, Any] = Field(default_factory=dict) + search_limit: int = 10 + do_hybrid_search: bool = False + + +class KGSearchSettings(BaseModel): + use_kg_search: bool = False + agent_generation_config: Optional[GenerationConfig] = Field( + default_factory=GenerationConfig + ) diff --git a/R2R/r2r/base/abstractions/vector.py b/R2R/r2r/base/abstractions/vector.py new file mode 100755 index 00000000..445f3302 --- /dev/null +++ b/R2R/r2r/base/abstractions/vector.py @@ -0,0 +1,66 @@ +"""Abstraction for a vector that can be stored in the system.""" + +from enum import Enum +from typing import Any +from uuid import UUID + + +class VectorType(Enum): + FIXED = "FIXED" + + +class Vector: + """A vector with the option to fix the number of elements.""" + + def __init__( + self, + data: list[float], + type: VectorType = VectorType.FIXED, + length: int = -1, + ): + self.data = data + self.type = type + self.length = length + + if ( + self.type == VectorType.FIXED + and length > 0 + and len(data) != length + ): + raise ValueError(f"Vector must be exactly {length} elements long.") + + def __repr__(self) -> str: + return ( + f"Vector(data={self.data}, type={self.type}, length={self.length})" + ) + + +class VectorEntry: + """A vector entry that can be stored directly in supported vector databases.""" + + def __init__(self, id: UUID, vector: Vector, metadata: dict[str, Any]): + """Create a new VectorEntry object.""" + self.vector = vector + self.id = id + self.metadata = metadata + + def to_serializable(self) -> str: + """Return a serializable representation of the VectorEntry.""" + metadata = self.metadata + + for key in metadata: + if isinstance(metadata[key], UUID): + metadata[key] = str(metadata[key]) + return { + "id": str(self.id), + "vector": self.vector.data, + "metadata": metadata, + } + + def __str__(self) -> str: + """Return a string representation of the VectorEntry.""" + return f"VectorEntry(id={self.id}, vector={self.vector}, metadata={self.metadata})" + + def __repr__(self) -> str: + """Return an unambiguous string representation of the VectorEntry.""" + return f"VectorEntry(id={self.id}, vector={self.vector}, metadata={self.metadata})" diff --git a/R2R/r2r/base/logging/__init__.py b/R2R/r2r/base/logging/__init__.py new file mode 100755 index 00000000..e69de29b --- /dev/null +++ b/R2R/r2r/base/logging/__init__.py diff --git a/R2R/r2r/base/logging/kv_logger.py b/R2R/r2r/base/logging/kv_logger.py new file mode 100755 index 00000000..2d444e9f --- /dev/null +++ b/R2R/r2r/base/logging/kv_logger.py @@ -0,0 +1,547 @@ +import json +import logging +import os +import uuid +from abc import abstractmethod +from datetime import datetime +from typing import Optional + +import asyncpg +from pydantic import BaseModel + +from ..providers.base_provider import Provider, ProviderConfig + +logger = logging.getLogger(__name__) + + +class RunInfo(BaseModel): + run_id: uuid.UUID + log_type: str + + +class LoggingConfig(ProviderConfig): + provider: str = "local" + log_table: str = "logs" + log_info_table: str = "logs_pipeline_info" + logging_path: Optional[str] = None + + def validate(self) -> None: + pass + + @property + def supported_providers(self) -> list[str]: + return ["local", "postgres", "redis"] + + +class KVLoggingProvider(Provider): + @abstractmethod + async def close(self): + pass + + @abstractmethod + async def log(self, log_id: uuid.UUID, key: str, value: str): + pass + + @abstractmethod + async def get_run_info( + self, + limit: int = 10, + log_type_filter: Optional[str] = None, + ) -> list[RunInfo]: + pass + + @abstractmethod + async def get_logs( + self, run_ids: list[uuid.UUID], limit_per_run: int + ) -> list: + pass + + +class LocalKVLoggingProvider(KVLoggingProvider): + def __init__(self, config: LoggingConfig): + self.log_table = config.log_table + self.log_info_table = config.log_info_table + self.logging_path = config.logging_path or os.getenv( + "LOCAL_DB_PATH", "local.sqlite" + ) + if not self.logging_path: + raise ValueError( + "Please set the environment variable LOCAL_DB_PATH." + ) + self.conn = None + try: + import aiosqlite + + self.aiosqlite = aiosqlite + except ImportError: + raise ImportError( + "Please install aiosqlite to use the LocalKVLoggingProvider." + ) + + async def init(self): + self.conn = await self.aiosqlite.connect(self.logging_path) + await self.conn.execute( + f""" + CREATE TABLE IF NOT EXISTS {self.log_table} ( + timestamp DATETIME, + log_id TEXT, + key TEXT, + value TEXT + ) + """ + ) + await self.conn.execute( + f""" + CREATE TABLE IF NOT EXISTS {self.log_info_table} ( + timestamp DATETIME, + log_id TEXT UNIQUE, + log_type TEXT + ) + """ + ) + await self.conn.commit() + + async def __aenter__(self): + if self.conn is None: + await self.init() + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + await self.close() + + async def close(self): + if self.conn: + await self.conn.close() + self.conn = None + + async def log( + self, + log_id: uuid.UUID, + key: str, + value: str, + is_info_log=False, + ): + collection = self.log_info_table if is_info_log else self.log_table + + if is_info_log: + if "type" not in key: + raise ValueError("Info log keys must contain the text 'type'") + await self.conn.execute( + f"INSERT INTO {collection} (timestamp, log_id, log_type) VALUES (datetime('now'), ?, ?)", + (str(log_id), value), + ) + else: + await self.conn.execute( + f"INSERT INTO {collection} (timestamp, log_id, key, value) VALUES (datetime('now'), ?, ?, ?)", + (str(log_id), key, value), + ) + await self.conn.commit() + + async def get_run_info( + self, limit: int = 10, log_type_filter: Optional[str] = None + ) -> list[RunInfo]: + cursor = await self.conn.cursor() + query = f'SELECT log_id, log_type FROM "{self.log_info_table}"' + conditions = [] + params = [] + if log_type_filter: + conditions.append("log_type = ?") + params.append(log_type_filter) + if conditions: + query += " WHERE " + " AND ".join(conditions) + query += " ORDER BY timestamp DESC LIMIT ?" + params.append(limit) + await cursor.execute(query, params) + rows = await cursor.fetchall() + return [ + RunInfo(run_id=uuid.UUID(row[0]), log_type=row[1]) for row in rows + ] + + async def get_logs( + self, run_ids: list[uuid.UUID], limit_per_run: int = 10 + ) -> list: + if not run_ids: + raise ValueError("No run ids provided.") + cursor = await self.conn.cursor() + placeholders = ",".join(["?" for _ in run_ids]) + query = f""" + SELECT * + FROM ( + SELECT *, ROW_NUMBER() OVER (PARTITION BY log_id ORDER BY timestamp DESC) as rn + FROM {self.log_table} + WHERE log_id IN ({placeholders}) + ) + WHERE rn <= ? + ORDER BY timestamp DESC + """ + params = [str(ele) for ele in run_ids] + [limit_per_run] + await cursor.execute(query, params) + rows = await cursor.fetchall() + new_rows = [] + for row in rows: + new_rows.append( + (row[0], uuid.UUID(row[1]), row[2], row[3], row[4]) + ) + return [ + {desc[0]: row[i] for i, desc in enumerate(cursor.description)} + for row in new_rows + ] + + +class PostgresLoggingConfig(LoggingConfig): + provider: str = "postgres" + log_table: str = "logs" + log_info_table: str = "logs_pipeline_info" + + def validate(self) -> None: + required_env_vars = [ + "POSTGRES_DBNAME", + "POSTGRES_USER", + "POSTGRES_PASSWORD", + "POSTGRES_HOST", + "POSTGRES_PORT", + ] + for var in required_env_vars: + if not os.getenv(var): + raise ValueError(f"Environment variable {var} is not set.") + + @property + def supported_providers(self) -> list[str]: + return ["postgres"] + + +class PostgresKVLoggingProvider(KVLoggingProvider): + def __init__(self, config: PostgresLoggingConfig): + self.log_table = config.log_table + self.log_info_table = config.log_info_table + self.config = config + self.pool = None + if not os.getenv("POSTGRES_DBNAME"): + raise ValueError( + "Please set the environment variable POSTGRES_DBNAME." + ) + if not os.getenv("POSTGRES_USER"): + raise ValueError( + "Please set the environment variable POSTGRES_USER." + ) + if not os.getenv("POSTGRES_PASSWORD"): + raise ValueError( + "Please set the environment variable POSTGRES_PASSWORD." + ) + if not os.getenv("POSTGRES_HOST"): + raise ValueError( + "Please set the environment variable POSTGRES_HOST." + ) + if not os.getenv("POSTGRES_PORT"): + raise ValueError( + "Please set the environment variable POSTGRES_PORT." + ) + + async def init(self): + self.pool = await asyncpg.create_pool( + database=os.getenv("POSTGRES_DBNAME"), + user=os.getenv("POSTGRES_USER"), + password=os.getenv("POSTGRES_PASSWORD"), + host=os.getenv("POSTGRES_HOST"), + port=os.getenv("POSTGRES_PORT"), + statement_cache_size=0, # Disable statement caching + ) + async with self.pool.acquire() as conn: + await conn.execute( + f""" + CREATE TABLE IF NOT EXISTS "{self.log_table}" ( + timestamp TIMESTAMPTZ, + log_id UUID, + key TEXT, + value TEXT + ) + """ + ) + await conn.execute( + f""" + CREATE TABLE IF NOT EXISTS "{self.log_info_table}" ( + timestamp TIMESTAMPTZ, + log_id UUID UNIQUE, + log_type TEXT + ) + """ + ) + + async def __aenter__(self): + if self.pool is None: + await self.init() + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + await self.close() + + async def close(self): + if self.pool: + await self.pool.close() + self.pool = None + + async def log( + self, + log_id: uuid.UUID, + key: str, + value: str, + is_info_log=False, + ): + collection = self.log_info_table if is_info_log else self.log_table + + if is_info_log: + if "type" not in key: + raise ValueError( + "Info log key must contain the string `type`." + ) + async with self.pool.acquire() as conn: + await self.pool.execute( + f'INSERT INTO "{collection}" (timestamp, log_id, log_type) VALUES (NOW(), $1, $2)', + log_id, + value, + ) + else: + async with self.pool.acquire() as conn: + await conn.execute( + f'INSERT INTO "{collection}" (timestamp, log_id, key, value) VALUES (NOW(), $1, $2, $3)', + log_id, + key, + value, + ) + + async def get_run_info( + self, limit: int = 10, log_type_filter: Optional[str] = None + ) -> list[RunInfo]: + query = f"SELECT log_id, log_type FROM {self.log_info_table}" + conditions = [] + params = [] + if log_type_filter: + conditions.append("log_type = $1") + params.append(log_type_filter) + if conditions: + query += " WHERE " + " AND ".join(conditions) + query += " ORDER BY timestamp DESC LIMIT $2" + params.append(limit) + async with self.pool.acquire() as conn: + rows = await conn.fetch(query, *params) + return [ + RunInfo(run_id=row["log_id"], log_type=row["log_type"]) + for row in rows + ] + + async def get_logs( + self, run_ids: list[uuid.UUID], limit_per_run: int = 10 + ) -> list: + if not run_ids: + raise ValueError("No run ids provided.") + + placeholders = ",".join([f"${i + 1}" for i in range(len(run_ids))]) + query = f""" + SELECT * FROM ( + SELECT *, ROW_NUMBER() OVER (PARTITION BY log_id ORDER BY timestamp DESC) as rn + FROM "{self.log_table}" + WHERE log_id::text IN ({placeholders}) + ) sub + WHERE sub.rn <= ${len(run_ids) + 1} + ORDER BY sub.timestamp DESC + """ + params = [str(run_id) for run_id in run_ids] + [limit_per_run] + async with self.pool.acquire() as conn: + rows = await conn.fetch(query, *params) + return [{key: row[key] for key in row.keys()} for row in rows] + + +class RedisLoggingConfig(LoggingConfig): + provider: str = "redis" + log_table: str = "logs" + log_info_table: str = "logs_pipeline_info" + + def validate(self) -> None: + required_env_vars = ["REDIS_CLUSTER_IP", "REDIS_CLUSTER_PORT"] + for var in required_env_vars: + if not os.getenv(var): + raise ValueError(f"Environment variable {var} is not set.") + + @property + def supported_providers(self) -> list[str]: + return ["redis"] + + +class RedisKVLoggingProvider(KVLoggingProvider): + def __init__(self, config: RedisLoggingConfig): + logger.info( + f"Initializing RedisKVLoggingProvider with config: {config}" + ) + + if not all( + [ + os.getenv("REDIS_CLUSTER_IP"), + os.getenv("REDIS_CLUSTER_PORT"), + ] + ): + raise ValueError( + "Please set the environment variables REDIS_CLUSTER_IP and REDIS_CLUSTER_PORT to run `LoggingDatabaseConnection` with `redis`." + ) + try: + from redis.asyncio import Redis + except ImportError: + raise ValueError( + "Error, `redis` is not installed. Please install it using `pip install redis`." + ) + + cluster_ip = os.getenv("REDIS_CLUSTER_IP") + port = os.getenv("REDIS_CLUSTER_PORT") + self.redis = Redis(host=cluster_ip, port=port, decode_responses=True) + self.log_key = config.log_table + self.log_info_key = config.log_info_table + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc, tb): + await self.close() + + async def close(self): + await self.redis.close() + + async def log( + self, + log_id: uuid.UUID, + key: str, + value: str, + is_info_log=False, + ): + timestamp = datetime.now().timestamp() + log_entry = { + "timestamp": timestamp, + "log_id": str(log_id), + "key": key, + "value": value, + } + if is_info_log: + if "type" not in key: + raise ValueError("Metadata keys must contain the text 'type'") + log_entry["log_type"] = value + await self.redis.hset( + self.log_info_key, str(log_id), json.dumps(log_entry) + ) + await self.redis.zadd( + f"{self.log_info_key}_sorted", {str(log_id): timestamp} + ) + else: + await self.redis.lpush( + f"{self.log_key}:{str(log_id)}", json.dumps(log_entry) + ) + + async def get_run_info( + self, limit: int = 10, log_type_filter: Optional[str] = None + ) -> list[RunInfo]: + run_info_list = [] + start = 0 + count_per_batch = 100 # Adjust batch size as needed + + while len(run_info_list) < limit: + log_ids = await self.redis.zrevrange( + f"{self.log_info_key}_sorted", + start, + start + count_per_batch - 1, + ) + if not log_ids: + break # No more log IDs to process + + start += count_per_batch + + for log_id in log_ids: + log_entry = json.loads( + await self.redis.hget(self.log_info_key, log_id) + ) + if log_type_filter: + if log_entry["log_type"] == log_type_filter: + run_info_list.append( + RunInfo( + run_id=uuid.UUID(log_entry["log_id"]), + log_type=log_entry["log_type"], + ) + ) + else: + run_info_list.append( + RunInfo( + run_id=uuid.UUID(log_entry["log_id"]), + log_type=log_entry["log_type"], + ) + ) + + if len(run_info_list) >= limit: + break + + return run_info_list[:limit] + + async def get_logs( + self, run_ids: list[uuid.UUID], limit_per_run: int = 10 + ) -> list: + logs = [] + for run_id in run_ids: + raw_logs = await self.redis.lrange( + f"{self.log_key}:{str(run_id)}", 0, limit_per_run - 1 + ) + for raw_log in raw_logs: + json_log = json.loads(raw_log) + json_log["log_id"] = uuid.UUID(json_log["log_id"]) + logs.append(json_log) + return logs + + +class KVLoggingSingleton: + _instance = None + _is_configured = False + + SUPPORTED_PROVIDERS = { + "local": LocalKVLoggingProvider, + "postgres": PostgresKVLoggingProvider, + "redis": RedisKVLoggingProvider, + } + + @classmethod + def get_instance(cls): + return cls.SUPPORTED_PROVIDERS[cls._config.provider](cls._config) + + @classmethod + def configure( + cls, logging_config: Optional[LoggingConfig] = LoggingConfig() + ): + if not cls._is_configured: + cls._config = logging_config + cls._is_configured = True + else: + raise Exception("KVLoggingSingleton is already configured.") + + @classmethod + async def log( + cls, + log_id: uuid.UUID, + key: str, + value: str, + is_info_log=False, + ): + try: + async with cls.get_instance() as provider: + await provider.log(log_id, key, value, is_info_log=is_info_log) + + except Exception as e: + logger.error(f"Error logging data {(log_id, key, value)}: {e}") + + @classmethod + async def get_run_info( + cls, limit: int = 10, log_type_filter: Optional[str] = None + ) -> list[RunInfo]: + async with cls.get_instance() as provider: + return await provider.get_run_info( + limit, log_type_filter=log_type_filter + ) + + @classmethod + async def get_logs( + cls, run_ids: list[uuid.UUID], limit_per_run: int = 10 + ) -> list: + async with cls.get_instance() as provider: + return await provider.get_logs(run_ids, limit_per_run) diff --git a/R2R/r2r/base/logging/log_processor.py b/R2R/r2r/base/logging/log_processor.py new file mode 100755 index 00000000..e85d8de2 --- /dev/null +++ b/R2R/r2r/base/logging/log_processor.py @@ -0,0 +1,196 @@ +import contextlib +import json +import logging +import statistics +from collections import defaultdict +from typing import Any, Callable, Dict, List, Optional, Sequence + +from pydantic import BaseModel + +logger = logging.getLogger(__name__) + + +class FilterCriteria(BaseModel): + filters: Optional[dict[str, str]] = None + + +class LogProcessor: + timestamp_format = "%Y-%m-%d %H:%M:%S" + + def __init__(self, filters: Dict[str, Callable[[Dict[str, Any]], bool]]): + self.filters = filters + self.populations = {name: [] for name in filters} + + def process_log(self, log: Dict[str, Any]): + for name, filter_func in self.filters.items(): + if filter_func(log): + self.populations[name].append(log) + + +class StatisticsCalculator: + @staticmethod + def calculate_statistics( + population: List[Dict[str, Any]], + stat_functions: Dict[str, Callable[[List[Dict[str, Any]]], Any]], + ) -> Dict[str, Any]: + return { + name: func(population) for name, func in stat_functions.items() + } + + +class DistributionGenerator: + @staticmethod + def generate_distributions( + population: List[Dict[str, Any]], + dist_functions: Dict[str, Callable[[List[Dict[str, Any]]], Any]], + ) -> Dict[str, Any]: + return { + name: func(population) for name, func in dist_functions.items() + } + + +class VisualizationPreparer: + @staticmethod + def prepare_visualization_data( + data: Dict[str, Any], + vis_functions: Dict[str, Callable[[Dict[str, Any]], Any]], + ) -> Dict[str, Any]: + return {name: func(data) for name, func in vis_functions.items()} + + +class LogAnalyticsConfig: + def __init__(self, filters, stat_functions, dist_functions, vis_functions): + self.filters = filters + self.stat_functions = stat_functions + self.dist_functions = dist_functions + self.vis_functions = vis_functions + + +class AnalysisTypes(BaseModel): + analysis_types: Optional[dict[str, Sequence[str]]] = None + + @staticmethod + def generate_bar_chart_data(logs, key): + chart_data = {"labels": [], "datasets": []} + value_counts = defaultdict(int) + + for log in logs: + if "entries" in log: + for entry in log["entries"]: + if entry["key"] == key: + value_counts[entry["value"]] += 1 + elif "key" in log and log["key"] == key: + value_counts[log["value"]] += 1 + + for value, count in value_counts.items(): + chart_data["labels"].append(value) + chart_data["datasets"].append({"label": key, "data": [count]}) + + return chart_data + + @staticmethod + def calculate_basic_statistics(logs, key): + values = [] + for log in logs: + if log["key"] == "search_results": + results = json.loads(log["value"]) + scores = [ + float(json.loads(result)["score"]) for result in results + ] + values.extend(scores) + else: + value = log.get("value") + if value is not None: + with contextlib.suppress(ValueError): + values.append(float(value)) + + if not values: + return { + "Mean": None, + "Median": None, + "Mode": None, + "Standard Deviation": None, + "Variance": None, + } + + if len(values) == 1: + single_value = round(values[0], 3) + return { + "Mean": single_value, + "Median": single_value, + "Mode": single_value, + "Standard Deviation": 0, + "Variance": 0, + } + + mean = round(sum(values) / len(values), 3) + median = round(statistics.median(values), 3) + mode = ( + round(statistics.mode(values), 3) + if len(set(values)) != len(values) + else None + ) + std_dev = round(statistics.stdev(values) if len(values) > 1 else 0, 3) + variance = round( + statistics.variance(values) if len(values) > 1 else 0, 3 + ) + + return { + "Mean": mean, + "Median": median, + "Mode": mode, + "Standard Deviation": std_dev, + "Variance": variance, + } + + @staticmethod + def calculate_percentile(logs, key, percentile): + values = [] + for log in logs: + if log["key"] == key: + value = log.get("value") + if value is not None: + with contextlib.suppress(ValueError): + values.append(float(value)) + + if not values: + return {"percentile": percentile, "value": None} + + values.sort() + index = int((percentile / 100) * (len(values) - 1)) + return {"percentile": percentile, "value": round(values[index], 3)} + + +class LogAnalytics: + def __init__(self, logs: List[Dict[str, Any]], config: LogAnalyticsConfig): + self.logs = logs + self.log_processor = LogProcessor(config.filters) + self.statistics_calculator = StatisticsCalculator() + self.distribution_generator = DistributionGenerator() + self.visualization_preparer = VisualizationPreparer() + self.config = config + + def count_logs(self) -> Dict[str, Any]: + """Count the logs for each filter.""" + return { + name: len(population) + for name, population in self.log_processor.populations.items() + } + + def process_logs(self) -> Dict[str, Any]: + for log in self.logs: + self.log_processor.process_log(log) + + analytics = {} + for name, population in self.log_processor.populations.items(): + stats = self.statistics_calculator.calculate_statistics( + population, self.config.stat_functions + ) + dists = self.distribution_generator.generate_distributions( + population, self.config.dist_functions + ) + analytics[name] = {"statistics": stats, "distributions": dists} + + return self.visualization_preparer.prepare_visualization_data( + analytics, self.config.vis_functions + ) diff --git a/R2R/r2r/base/logging/run_manager.py b/R2R/r2r/base/logging/run_manager.py new file mode 100755 index 00000000..ac192bca --- /dev/null +++ b/R2R/r2r/base/logging/run_manager.py @@ -0,0 +1,56 @@ +import contextvars +import uuid +from contextlib import asynccontextmanager +from typing import Any + +from .kv_logger import KVLoggingSingleton + +run_id_var = contextvars.ContextVar("run_id", default=None) + + +class RunManager: + def __init__(self, logger: KVLoggingSingleton): + self.logger = logger + self.run_info = {} + + def generate_run_id(self) -> uuid.UUID: + return uuid.uuid4() + + async def set_run_info(self, pipeline_type: str): + run_id = run_id_var.get() + if run_id is None: + run_id = self.generate_run_id() + token = run_id_var.set(run_id) + self.run_info[run_id] = {"pipeline_type": pipeline_type} + else: + token = run_id_var.set(run_id) + return run_id, token + + async def get_run_info(self): + run_id = run_id_var.get() + return self.run_info.get(run_id, None) + + async def log_run_info( + self, key: str, value: Any, is_info_log: bool = False + ): + run_id = run_id_var.get() + if run_id: + await self.logger.log( + log_id=run_id, key=key, value=value, is_info_log=is_info_log + ) + + async def clear_run_info(self, token: contextvars.Token): + run_id = run_id_var.get() + run_id_var.reset(token) + if run_id and run_id in self.run_info: + del self.run_info[run_id] + + +@asynccontextmanager +async def manage_run(run_manager: RunManager, pipeline_type: str): + run_id, token = await run_manager.set_run_info(pipeline_type) + try: + yield run_id + finally: + # Note: Do not clear the run info to ensure the run ID remains the same + run_id_var.reset(token) diff --git a/R2R/r2r/base/parsers/__init__.py b/R2R/r2r/base/parsers/__init__.py new file mode 100755 index 00000000..d7696202 --- /dev/null +++ b/R2R/r2r/base/parsers/__init__.py @@ -0,0 +1,5 @@ +from .base_parser import AsyncParser + +__all__ = [ + "AsyncParser", +] diff --git a/R2R/r2r/base/parsers/base_parser.py b/R2R/r2r/base/parsers/base_parser.py new file mode 100755 index 00000000..f1bb49d7 --- /dev/null +++ b/R2R/r2r/base/parsers/base_parser.py @@ -0,0 +1,14 @@ +"""Abstract base class for parsers.""" + +from abc import ABC, abstractmethod +from typing import AsyncGenerator, Generic, TypeVar + +from ..abstractions.document import DataType + +T = TypeVar("T") + + +class AsyncParser(ABC, Generic[T]): + @abstractmethod + async def ingest(self, data: T) -> AsyncGenerator[DataType, None]: + pass diff --git a/R2R/r2r/base/pipeline/__init__.py b/R2R/r2r/base/pipeline/__init__.py new file mode 100755 index 00000000..e69de29b --- /dev/null +++ b/R2R/r2r/base/pipeline/__init__.py diff --git a/R2R/r2r/base/pipeline/base_pipeline.py b/R2R/r2r/base/pipeline/base_pipeline.py new file mode 100755 index 00000000..3c1eff9a --- /dev/null +++ b/R2R/r2r/base/pipeline/base_pipeline.py @@ -0,0 +1,233 @@ +"""Base pipeline class for running a sequence of pipes.""" + +import asyncio +import logging +from enum import Enum +from typing import Any, AsyncGenerator, Optional + +from ..logging.kv_logger import KVLoggingSingleton +from ..logging.run_manager import RunManager, manage_run +from ..pipes.base_pipe import AsyncPipe, AsyncState + +logger = logging.getLogger(__name__) + + +class PipelineTypes(Enum): + EVAL = "eval" + INGESTION = "ingestion" + SEARCH = "search" + RAG = "rag" + OTHER = "other" + + +class AsyncPipeline: + """Pipeline class for running a sequence of pipes.""" + + pipeline_type: str = "other" + + def __init__( + self, + pipe_logger: Optional[KVLoggingSingleton] = None, + run_manager: Optional[RunManager] = None, + ): + self.pipes: list[AsyncPipe] = [] + self.upstream_outputs: list[list[dict[str, str]]] = [] + self.pipe_logger = pipe_logger or KVLoggingSingleton() + self.run_manager = run_manager or RunManager(self.pipe_logger) + self.futures = {} + self.level = 0 + + def add_pipe( + self, + pipe: AsyncPipe, + add_upstream_outputs: Optional[list[dict[str, str]]] = None, + *args, + **kwargs, + ) -> None: + """Add a pipe to the pipeline.""" + self.pipes.append(pipe) + if not add_upstream_outputs: + add_upstream_outputs = [] + self.upstream_outputs.append(add_upstream_outputs) + + async def run( + self, + input: Any, + state: Optional[AsyncState] = None, + stream: bool = False, + run_manager: Optional[RunManager] = None, + log_run_info: bool = True, + *args: Any, + **kwargs: Any, + ): + """Run the pipeline.""" + run_manager = run_manager or self.run_manager + + try: + PipelineTypes(self.pipeline_type) + except ValueError: + raise ValueError( + f"Invalid pipeline type: {self.pipeline_type}, must be one of {PipelineTypes.__members__.keys()}" + ) + + self.state = state or AsyncState() + current_input = input + async with manage_run(run_manager, self.pipeline_type): + if log_run_info: + await run_manager.log_run_info( + key="pipeline_type", + value=self.pipeline_type, + is_info_log=True, + ) + try: + for pipe_num in range(len(self.pipes)): + config_name = self.pipes[pipe_num].config.name + self.futures[config_name] = asyncio.Future() + + current_input = self._run_pipe( + pipe_num, + current_input, + run_manager, + *args, + **kwargs, + ) + self.futures[config_name].set_result(current_input) + if not stream: + final_result = await self._consume_all(current_input) + return final_result + else: + return current_input + except Exception as error: + logger.error(f"Pipeline failed with error: {error}") + raise error + + async def _consume_all(self, gen: AsyncGenerator) -> list[Any]: + result = [] + async for item in gen: + if hasattr( + item, "__aiter__" + ): # Check if the item is an async generator + sub_result = await self._consume_all(item) + result.extend(sub_result) + else: + result.append(item) + return result + + async def _run_pipe( + self, + pipe_num: int, + input: Any, + run_manager: RunManager, + *args: Any, + **kwargs: Any, + ): + # Collect inputs, waiting for the necessary futures + pipe = self.pipes[pipe_num] + add_upstream_outputs = self.sort_upstream_outputs( + self.upstream_outputs[pipe_num] + ) + input_dict = {"message": input} + + # Group upstream outputs by prev_pipe_name + grouped_upstream_outputs = {} + for upstream_input in add_upstream_outputs: + upstream_pipe_name = upstream_input["prev_pipe_name"] + if upstream_pipe_name not in grouped_upstream_outputs: + grouped_upstream_outputs[upstream_pipe_name] = [] + grouped_upstream_outputs[upstream_pipe_name].append(upstream_input) + + for ( + upstream_pipe_name, + upstream_inputs, + ) in grouped_upstream_outputs.items(): + + async def resolve_future_output(future): + result = future.result() + # consume the async generator + return [item async for item in result] + + async def replay_items_as_async_gen(items): + for item in items: + yield item + + temp_results = await resolve_future_output( + self.futures[upstream_pipe_name] + ) + if upstream_pipe_name == self.pipes[pipe_num - 1].config.name: + input_dict["message"] = replay_items_as_async_gen(temp_results) + + for upstream_input in upstream_inputs: + outputs = await self.state.get(upstream_pipe_name, "output") + prev_output_field = upstream_input.get( + "prev_output_field", None + ) + if not prev_output_field: + raise ValueError( + "`prev_output_field` must be specified in the upstream_input" + ) + input_dict[upstream_input["input_field"]] = outputs[ + prev_output_field + ] + + # Handle the pipe generator + async for ele in await pipe.run( + pipe.Input(**input_dict), + self.state, + run_manager, + *args, + **kwargs, + ): + yield ele + + def sort_upstream_outputs( + self, add_upstream_outputs: list[dict[str, str]] + ) -> list[dict[str, str]]: + pipe_name_to_index = { + pipe.config.name: index for index, pipe in enumerate(self.pipes) + } + + def get_pipe_index(upstream_output): + return pipe_name_to_index[upstream_output["prev_pipe_name"]] + + sorted_outputs = sorted( + add_upstream_outputs, key=get_pipe_index, reverse=True + ) + return sorted_outputs + + +class EvalPipeline(AsyncPipeline): + """A pipeline for evaluation.""" + + pipeline_type: str = "eval" + + async def run( + self, + input: Any, + state: Optional[AsyncState] = None, + stream: bool = False, + run_manager: Optional[RunManager] = None, + *args: Any, + **kwargs: Any, + ): + return await super().run( + input, state, stream, run_manager, *args, **kwargs + ) + + def add_pipe( + self, + pipe: AsyncPipe, + add_upstream_outputs: Optional[list[dict[str, str]]] = None, + *args, + **kwargs, + ) -> None: + logger.debug(f"Adding pipe {pipe.config.name} to the EvalPipeline") + return super().add_pipe(pipe, add_upstream_outputs, *args, **kwargs) + + +async def dequeue_requests(queue: asyncio.Queue) -> AsyncGenerator: + """Create an async generator to dequeue requests.""" + while True: + request = await queue.get() + if request is None: + break + yield request diff --git a/R2R/r2r/base/pipes/__init__.py b/R2R/r2r/base/pipes/__init__.py new file mode 100755 index 00000000..e69de29b --- /dev/null +++ b/R2R/r2r/base/pipes/__init__.py diff --git a/R2R/r2r/base/pipes/base_pipe.py b/R2R/r2r/base/pipes/base_pipe.py new file mode 100755 index 00000000..63e3d04e --- /dev/null +++ b/R2R/r2r/base/pipes/base_pipe.py @@ -0,0 +1,163 @@ +import asyncio +import logging +import uuid +from abc import abstractmethod +from enum import Enum +from typing import Any, AsyncGenerator, Optional + +from pydantic import BaseModel + +from r2r.base.logging.kv_logger import KVLoggingSingleton +from r2r.base.logging.run_manager import RunManager, manage_run + +logger = logging.getLogger(__name__) + + +class PipeType(Enum): + INGESTOR = "ingestor" + EVAL = "eval" + GENERATOR = "generator" + SEARCH = "search" + TRANSFORM = "transform" + OTHER = "other" + + +class AsyncState: + """A state object for storing data between pipes.""" + + def __init__(self): + self.data = {} + self.lock = asyncio.Lock() + + async def update(self, outer_key: str, values: dict): + """Update the state with new values.""" + async with self.lock: + if not isinstance(values, dict): + raise ValueError("Values must be contained in a dictionary.") + if outer_key not in self.data: + self.data[outer_key] = {} + for inner_key, inner_value in values.items(): + self.data[outer_key][inner_key] = inner_value + + async def get(self, outer_key: str, inner_key: str, default=None): + """Get a value from the state.""" + async with self.lock: + if outer_key not in self.data: + raise ValueError( + f"Key {outer_key} does not exist in the state." + ) + if inner_key not in self.data[outer_key]: + return default or {} + return self.data[outer_key][inner_key] + + async def delete(self, outer_key: str, inner_key: Optional[str] = None): + """Delete a value from the state.""" + async with self.lock: + if outer_key in self.data and not inner_key: + del self.data[outer_key] + else: + if inner_key not in self.data[outer_key]: + raise ValueError( + f"Key {inner_key} does not exist in the state." + ) + del self.data[outer_key][inner_key] + + +class AsyncPipe: + """An asynchronous pipe for processing data with logging capabilities.""" + + class PipeConfig(BaseModel): + """Configuration for a pipe.""" + + name: str = "default_pipe" + max_log_queue_size: int = 100 + + class Config: + extra = "forbid" + arbitrary_types_allowed = True + + class Input(BaseModel): + """Input for a pipe.""" + + message: AsyncGenerator[Any, None] + + class Config: + extra = "forbid" + arbitrary_types_allowed = True + + def __init__( + self, + type: PipeType = PipeType.OTHER, + config: Optional[PipeConfig] = None, + pipe_logger: Optional[KVLoggingSingleton] = None, + run_manager: Optional[RunManager] = None, + ): + self._config = config or self.PipeConfig() + self._type = type + self.pipe_logger = pipe_logger or KVLoggingSingleton() + self.log_queue = asyncio.Queue() + self.log_worker_task = None + self._run_manager = run_manager or RunManager(self.pipe_logger) + + logger.debug( + f"Initialized pipe {self.config.name} of type {self.type}" + ) + + @property + def config(self) -> PipeConfig: + return self._config + + @property + def type(self) -> PipeType: + return self._type + + async def log_worker(self): + while True: + log_data = await self.log_queue.get() + run_id, key, value = log_data + await self.pipe_logger.log(run_id, key, value) + self.log_queue.task_done() + + async def enqueue_log(self, run_id: uuid.UUID, key: str, value: str): + if self.log_queue.qsize() < self.config.max_log_queue_size: + await self.log_queue.put((run_id, key, value)) + + async def run( + self, + input: Input, + state: AsyncState, + run_manager: Optional[RunManager] = None, + *args: Any, + **kwargs: Any, + ) -> AsyncGenerator[Any, None]: + """Run the pipe with logging capabilities.""" + + run_manager = run_manager or self._run_manager + + async def wrapped_run() -> AsyncGenerator[Any, None]: + async with manage_run(run_manager, self.config.name) as run_id: + self.log_worker_task = asyncio.create_task( + self.log_worker(), name=f"log-worker-{self.config.name}" + ) + try: + async for result in self._run_logic( + input, state, run_id=run_id, *args, **kwargs + ): + yield result + finally: + await self.log_queue.join() + self.log_worker_task.cancel() + self.log_queue = asyncio.Queue() + + return wrapped_run() + + @abstractmethod + async def _run_logic( + self, + input: Input, + state: AsyncState, + run_id: uuid.UUID, + *args: Any, + **kwargs: Any, + ) -> AsyncGenerator[Any, None]: + pass diff --git a/R2R/r2r/base/providers/__init__.py b/R2R/r2r/base/providers/__init__.py new file mode 100755 index 00000000..e69de29b --- /dev/null +++ b/R2R/r2r/base/providers/__init__.py diff --git a/R2R/r2r/base/providers/base_provider.py b/R2R/r2r/base/providers/base_provider.py new file mode 100755 index 00000000..8ee8d56a --- /dev/null +++ b/R2R/r2r/base/providers/base_provider.py @@ -0,0 +1,48 @@ +from abc import ABC, abstractmethod, abstractproperty +from typing import Any, Optional, Type + +from pydantic import BaseModel + + +class ProviderConfig(BaseModel, ABC): + """A base provider configuration class""" + + extra_fields: dict[str, Any] = {} + provider: Optional[str] = None + + class Config: + arbitrary_types_allowed = True + ignore_extra = True + + @abstractmethod + def validate(self) -> None: + pass + + @classmethod + def create(cls: Type["ProviderConfig"], **kwargs: Any) -> "ProviderConfig": + base_args = cls.__fields__.keys() + filtered_kwargs = { + k: v if v != "None" else None + for k, v in kwargs.items() + if k in base_args + } + instance = cls(**filtered_kwargs) + for k, v in kwargs.items(): + if k not in base_args: + instance.extra_fields[k] = v + return instance + + @abstractproperty + @property + def supported_providers(self) -> list[str]: + """Define a list of supported providers.""" + pass + + +class Provider(ABC): + """A base provider class to provide a common interface for all providers.""" + + def __init__(self, config: Optional[ProviderConfig] = None): + if config: + config.validate() + self.config = config diff --git a/R2R/r2r/base/providers/embedding_provider.py b/R2R/r2r/base/providers/embedding_provider.py new file mode 100755 index 00000000..8f3af56f --- /dev/null +++ b/R2R/r2r/base/providers/embedding_provider.py @@ -0,0 +1,83 @@ +import logging +from abc import abstractmethod +from enum import Enum +from typing import Optional + +from ..abstractions.search import VectorSearchResult +from .base_provider import Provider, ProviderConfig + +logger = logging.getLogger(__name__) + + +class EmbeddingConfig(ProviderConfig): + """A base embedding configuration class""" + + provider: Optional[str] = None + base_model: Optional[str] = None + base_dimension: Optional[int] = None + rerank_model: Optional[str] = None + rerank_dimension: Optional[int] = None + rerank_transformer_type: Optional[str] = None + batch_size: int = 1 + + def validate(self) -> None: + if self.provider not in self.supported_providers: + raise ValueError(f"Provider '{self.provider}' is not supported.") + + @property + def supported_providers(self) -> list[str]: + return [None, "openai", "ollama", "sentence-transformers"] + + +class EmbeddingProvider(Provider): + """An abstract class to provide a common interface for embedding providers.""" + + class PipeStage(Enum): + BASE = 1 + RERANK = 2 + + def __init__(self, config: EmbeddingConfig): + if not isinstance(config, EmbeddingConfig): + raise ValueError( + "EmbeddingProvider must be initialized with a `EmbeddingConfig`." + ) + logger.info(f"Initializing EmbeddingProvider with config {config}.") + + super().__init__(config) + + @abstractmethod + def get_embedding(self, text: str, stage: PipeStage = PipeStage.BASE): + pass + + async def async_get_embedding( + self, text: str, stage: PipeStage = PipeStage.BASE + ): + return self.get_embedding(text, stage) + + @abstractmethod + def get_embeddings( + self, texts: list[str], stage: PipeStage = PipeStage.BASE + ): + pass + + async def async_get_embeddings( + self, texts: list[str], stage: PipeStage = PipeStage.BASE + ): + return self.get_embeddings(texts, stage) + + @abstractmethod + def rerank( + self, + query: str, + results: list[VectorSearchResult], + stage: PipeStage = PipeStage.RERANK, + limit: int = 10, + ): + pass + + @abstractmethod + def tokenize_string( + self, text: str, model: str, stage: PipeStage + ) -> list[int]: + """Tokenizes the input string.""" + pass diff --git a/R2R/r2r/base/providers/eval_provider.py b/R2R/r2r/base/providers/eval_provider.py new file mode 100755 index 00000000..76053f87 --- /dev/null +++ b/R2R/r2r/base/providers/eval_provider.py @@ -0,0 +1,46 @@ +from typing import Optional, Union + +from ..abstractions.llm import GenerationConfig +from .base_provider import Provider, ProviderConfig +from .llm_provider import LLMConfig + + +class EvalConfig(ProviderConfig): + """A base eval config class""" + + llm: Optional[LLMConfig] = None + + def validate(self) -> None: + if self.provider not in self.supported_providers: + raise ValueError(f"Provider {self.provider} not supported.") + if self.provider and not self.llm: + raise ValueError( + "EvalConfig must have a `llm` attribute when specifying a provider." + ) + + @property + def supported_providers(self) -> list[str]: + return [None, "local"] + + +class EvalProvider(Provider): + """An abstract class to provide a common interface for evaluation providers.""" + + def __init__(self, config: EvalConfig): + if not isinstance(config, EvalConfig): + raise ValueError( + "EvalProvider must be initialized with a `EvalConfig`." + ) + + super().__init__(config) + + def evaluate( + self, + query: str, + context: str, + completion: str, + eval_generation_config: Optional[GenerationConfig] = None, + ) -> dict[str, dict[str, Union[str, float]]]: + return self._evaluate( + query, context, completion, eval_generation_config + ) diff --git a/R2R/r2r/base/providers/kg_provider.py b/R2R/r2r/base/providers/kg_provider.py new file mode 100755 index 00000000..4ae96b11 --- /dev/null +++ b/R2R/r2r/base/providers/kg_provider.py @@ -0,0 +1,182 @@ +"""Base classes for knowledge graph providers.""" + +import json +import logging +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Any, Optional, Tuple + +from .prompt_provider import PromptProvider + +if TYPE_CHECKING: + from r2r.main import R2RClient + +from ...base.utils.base_utils import EntityType, Relation +from ..abstractions.llama_abstractions import EntityNode, LabelledNode +from ..abstractions.llama_abstractions import Relation as LlamaRelation +from ..abstractions.llama_abstractions import VectorStoreQuery +from ..abstractions.llm import GenerationConfig +from .base_provider import ProviderConfig + +logger = logging.getLogger(__name__) + + +class KGConfig(ProviderConfig): + """A base KG config class""" + + provider: Optional[str] = None + batch_size: int = 1 + kg_extraction_prompt: Optional[str] = "few_shot_ner_kg_extraction" + kg_agent_prompt: Optional[str] = "kg_agent" + kg_extraction_config: Optional[GenerationConfig] = None + + def validate(self) -> None: + if self.provider not in self.supported_providers: + raise ValueError(f"Provider '{self.provider}' is not supported.") + + @property + def supported_providers(self) -> list[str]: + return [None, "neo4j"] + + +class KGProvider(ABC): + """An abstract class to provide a common interface for Knowledge Graphs.""" + + def __init__(self, config: KGConfig) -> None: + if not isinstance(config, KGConfig): + raise ValueError( + "KGProvider must be initialized with a `KGConfig`." + ) + logger.info(f"Initializing KG provider with config: {config}") + self.config = config + self.validate_config() + + def validate_config(self) -> None: + self.config.validate() + + @property + @abstractmethod + def client(self) -> Any: + """Get client.""" + pass + + @abstractmethod + def get(self, subj: str) -> list[list[str]]: + """Abstract method to get triplets.""" + pass + + @abstractmethod + def get_rel_map( + self, + subjs: Optional[list[str]] = None, + depth: int = 2, + limit: int = 30, + ) -> dict[str, list[list[str]]]: + """Abstract method to get depth-aware rel map.""" + pass + + @abstractmethod + def upsert_nodes(self, nodes: list[EntityNode]) -> None: + """Abstract method to add triplet.""" + pass + + @abstractmethod + def upsert_relations(self, relations: list[LlamaRelation]) -> None: + """Abstract method to add triplet.""" + pass + + @abstractmethod + def delete(self, subj: str, rel: str, obj: str) -> None: + """Abstract method to delete triplet.""" + pass + + @abstractmethod + def get_schema(self, refresh: bool = False) -> str: + """Abstract method to get the schema of the graph store.""" + pass + + @abstractmethod + def structured_query( + self, query: str, param_map: Optional[dict[str, Any]] = {} + ) -> Any: + """Abstract method to query the graph store with statement and parameters.""" + pass + + @abstractmethod + def vector_query( + self, query: VectorStoreQuery, **kwargs: Any + ) -> Tuple[list[LabelledNode], list[float]]: + """Abstract method to query the graph store with a vector store query.""" + + # TODO - Type this method. + @abstractmethod + def update_extraction_prompt( + self, + prompt_provider: Any, + entity_types: list[Any], + relations: list[Relation], + ): + """Abstract method to update the KG extraction prompt.""" + pass + + # TODO - Type this method. + @abstractmethod + def update_kg_agent_prompt( + self, + prompt_provider: Any, + entity_types: list[Any], + relations: list[Relation], + ): + """Abstract method to update the KG agent prompt.""" + pass + + +def escape_braces(s: str) -> str: + """ + Escape braces in a string. + This is a placeholder function - implement the actual logic as needed. + """ + # Implement your escape_braces logic here + return s.replace("{", "{{").replace("}", "}}") + + +# TODO - Make this more configurable / intelligent +def update_kg_prompt( + client: "R2RClient", + r2r_prompts: PromptProvider, + prompt_base: str, + entity_types: list[EntityType], + relations: list[Relation], +) -> None: + # Get the default extraction template + template_name: str = f"{prompt_base}_with_spec" + + new_template: str = r2r_prompts.get_prompt( + template_name, + { + "entity_types": json.dumps( + { + "entity_types": [ + str(entity.name) for entity in entity_types + ] + }, + indent=4, + ), + "relations": json.dumps( + {"predicates": [str(relation.name) for relation in relations]}, + indent=4, + ), + "input": """\n{input}""", + }, + ) + + # Escape all braces in the template, except for the {input} placeholder, for formatting + escaped_template: str = escape_braces(new_template).replace( + """{{input}}""", """{input}""" + ) + + # Update the client's prompt + client.update_prompt( + prompt_base, + template=escaped_template, + input_types={"input": "str"}, + ) diff --git a/R2R/r2r/base/providers/llm_provider.py b/R2R/r2r/base/providers/llm_provider.py new file mode 100755 index 00000000..9b6499a4 --- /dev/null +++ b/R2R/r2r/base/providers/llm_provider.py @@ -0,0 +1,66 @@ +"""Base classes for language model providers.""" + +import logging +from abc import abstractmethod +from typing import Optional + +from r2r.base.abstractions.llm import GenerationConfig + +from ..abstractions.llm import LLMChatCompletion, LLMChatCompletionChunk +from .base_provider import Provider, ProviderConfig + +logger = logging.getLogger(__name__) + + +class LLMConfig(ProviderConfig): + """A base LLM config class""" + + provider: Optional[str] = None + generation_config: Optional[GenerationConfig] = None + + def validate(self) -> None: + if not self.provider: + raise ValueError("Provider must be set.") + + if self.provider and self.provider not in self.supported_providers: + raise ValueError(f"Provider '{self.provider}' is not supported.") + + @property + def supported_providers(self) -> list[str]: + return ["litellm", "openai"] + + +class LLMProvider(Provider): + """An abstract class to provide a common interface for LLMs.""" + + def __init__( + self, + config: LLMConfig, + ) -> None: + if not isinstance(config, LLMConfig): + raise ValueError( + "LLMProvider must be initialized with a `LLMConfig`." + ) + logger.info(f"Initializing LLM provider with config: {config}") + + super().__init__(config) + + @abstractmethod + def get_completion( + self, + messages: list[dict], + generation_config: GenerationConfig, + **kwargs, + ) -> LLMChatCompletion: + """Abstract method to get a chat completion from the provider.""" + pass + + @abstractmethod + def get_completion_stream( + self, + messages: list[dict], + generation_config: GenerationConfig, + **kwargs, + ) -> LLMChatCompletionChunk: + """Abstract method to get a completion stream from the provider.""" + pass diff --git a/R2R/r2r/base/providers/prompt_provider.py b/R2R/r2r/base/providers/prompt_provider.py new file mode 100755 index 00000000..78af9e11 --- /dev/null +++ b/R2R/r2r/base/providers/prompt_provider.py @@ -0,0 +1,65 @@ +import logging +from abc import abstractmethod +from typing import Any, Optional + +from .base_provider import Provider, ProviderConfig + +logger = logging.getLogger(__name__) + + +class PromptConfig(ProviderConfig): + def validate(self) -> None: + pass + + @property + def supported_providers(self) -> list[str]: + # Return a list of supported prompt providers + return ["default_prompt_provider"] + + +class PromptProvider(Provider): + def __init__(self, config: Optional[PromptConfig] = None): + if config is None: + config = PromptConfig() + elif not isinstance(config, PromptConfig): + raise ValueError( + "PromptProvider must be initialized with a `PromptConfig`." + ) + logger.info(f"Initializing PromptProvider with config {config}.") + super().__init__(config) + + @abstractmethod + def add_prompt( + self, name: str, template: str, input_types: dict[str, str] + ) -> None: + pass + + @abstractmethod + def get_prompt( + self, prompt_name: str, inputs: Optional[dict[str, Any]] = None + ) -> str: + pass + + @abstractmethod + def get_all_prompts(self) -> dict[str, str]: + pass + + @abstractmethod + def update_prompt( + self, + name: str, + template: Optional[str] = None, + input_types: Optional[dict[str, str]] = None, + ) -> None: + pass + + def _get_message_payload( + self, system_prompt: str, task_prompt: str + ) -> dict: + return [ + { + "role": "system", + "content": system_prompt, + }, + {"role": "user", "content": task_prompt}, + ] diff --git a/R2R/r2r/base/providers/vector_db_provider.py b/R2R/r2r/base/providers/vector_db_provider.py new file mode 100755 index 00000000..a6d5aaa8 --- /dev/null +++ b/R2R/r2r/base/providers/vector_db_provider.py @@ -0,0 +1,142 @@ +import logging +from abc import ABC, abstractmethod +from typing import Optional, Union + +from ..abstractions.document import DocumentInfo +from ..abstractions.search import VectorSearchResult +from ..abstractions.vector import VectorEntry +from .base_provider import Provider, ProviderConfig + +logger = logging.getLogger(__name__) + + +class VectorDBConfig(ProviderConfig): + provider: str + + def __post_init__(self): + self.validate() + # Capture additional fields + for key, value in self.extra_fields.items(): + setattr(self, key, value) + + def validate(self) -> None: + if self.provider not in self.supported_providers: + raise ValueError(f"Provider '{self.provider}' is not supported.") + + @property + def supported_providers(self) -> list[str]: + return ["local", "pgvector"] + + +class VectorDBProvider(Provider, ABC): + def __init__(self, config: VectorDBConfig): + if not isinstance(config, VectorDBConfig): + raise ValueError( + "VectorDBProvider must be initialized with a `VectorDBConfig`." + ) + logger.info(f"Initializing VectorDBProvider with config {config}.") + super().__init__(config) + + @abstractmethod + def initialize_collection(self, dimension: int) -> None: + pass + + @abstractmethod + def copy(self, entry: VectorEntry, commit: bool = True) -> None: + pass + + @abstractmethod + def upsert(self, entry: VectorEntry, commit: bool = True) -> None: + pass + + @abstractmethod + def search( + self, + query_vector: list[float], + filters: dict[str, Union[bool, int, str]] = {}, + limit: int = 10, + *args, + **kwargs, + ) -> list[VectorSearchResult]: + pass + + @abstractmethod + def hybrid_search( + self, + query_text: str, + query_vector: list[float], + limit: int = 10, + filters: Optional[dict[str, Union[bool, int, str]]] = None, + # Hybrid search parameters + full_text_weight: float = 1.0, + semantic_weight: float = 1.0, + rrf_k: int = 20, # typical value is ~2x the number of results you want + *args, + **kwargs, + ) -> list[VectorSearchResult]: + pass + + @abstractmethod + def create_index(self, index_type, column_name, index_options): + pass + + def upsert_entries( + self, entries: list[VectorEntry], commit: bool = True + ) -> None: + for entry in entries: + self.upsert(entry, commit=commit) + + def copy_entries( + self, entries: list[VectorEntry], commit: bool = True + ) -> None: + for entry in entries: + self.copy(entry, commit=commit) + + @abstractmethod + def delete_by_metadata( + self, + metadata_fields: list[str], + metadata_values: list[Union[bool, int, str]], + ) -> list[str]: + if len(metadata_fields) != len(metadata_values): + raise ValueError( + "The number of metadata fields and values must be equal." + ) + pass + + @abstractmethod + def get_metadatas( + self, + metadata_fields: list[str], + filter_field: Optional[str] = None, + filter_value: Optional[str] = None, + ) -> list[str]: + pass + + @abstractmethod + def upsert_documents_overview( + self, document_infs: list[DocumentInfo] + ) -> None: + pass + + @abstractmethod + def get_documents_overview( + self, + filter_document_ids: Optional[list[str]] = None, + filter_user_ids: Optional[list[str]] = None, + ) -> list[DocumentInfo]: + pass + + @abstractmethod + def get_document_chunks(self, document_id: str) -> list[dict]: + pass + + @abstractmethod + def delete_from_documents_overview( + self, document_id: str, version: Optional[str] = None + ) -> dict: + pass + + @abstractmethod + def get_users_overview(self, user_ids: Optional[list[str]] = None) -> dict: + pass diff --git a/R2R/r2r/base/utils/__init__.py b/R2R/r2r/base/utils/__init__.py new file mode 100755 index 00000000..104d50eb --- /dev/null +++ b/R2R/r2r/base/utils/__init__.py @@ -0,0 +1,26 @@ +from .base_utils import ( + EntityType, + Relation, + format_entity_types, + format_relations, + generate_id_from_label, + generate_run_id, + increment_version, + run_pipeline, + to_async_generator, +) +from .splitter.text import RecursiveCharacterTextSplitter, TextSplitter + +__all__ = [ + "RecursiveCharacterTextSplitter", + "TextSplitter", + "run_pipeline", + "to_async_generator", + "generate_run_id", + "generate_id_from_label", + "increment_version", + "EntityType", + "Relation", + "format_entity_types", + "format_relations", +] diff --git a/R2R/r2r/base/utils/base_utils.py b/R2R/r2r/base/utils/base_utils.py new file mode 100755 index 00000000..12652833 --- /dev/null +++ b/R2R/r2r/base/utils/base_utils.py @@ -0,0 +1,63 @@ +import asyncio +import uuid +from typing import TYPE_CHECKING, Any, AsyncGenerator, Iterable + +if TYPE_CHECKING: + from ..pipeline.base_pipeline import AsyncPipeline + + +def generate_run_id() -> uuid.UUID: + return uuid.uuid4() + + +def generate_id_from_label(label: str) -> uuid.UUID: + return uuid.uuid5(uuid.NAMESPACE_DNS, label) + + +async def to_async_generator( + iterable: Iterable[Any], +) -> AsyncGenerator[Any, None]: + for item in iterable: + yield item + + +def run_pipeline(pipeline: "AsyncPipeline", input: Any, *args, **kwargs): + if not isinstance(input, AsyncGenerator) and not isinstance(input, list): + input = to_async_generator([input]) + elif not isinstance(input, AsyncGenerator): + input = to_async_generator(input) + + async def _run_pipeline(input, *args, **kwargs): + return await pipeline.run(input, *args, **kwargs) + + return asyncio.run(_run_pipeline(input, *args, **kwargs)) + + +def increment_version(version: str) -> str: + prefix = version[:-1] + suffix = int(version[-1]) + return f"{prefix}{suffix + 1}" + + +class EntityType: + def __init__(self, name: str): + self.name = name + + +class Relation: + def __init__(self, name: str): + self.name = name + + +def format_entity_types(entity_types: list[EntityType]) -> str: + lines = [] + for entity in entity_types: + lines.append(entity.name) + return "\n".join(lines) + + +def format_relations(predicates: list[Relation]) -> str: + lines = [] + for predicate in predicates: + lines.append(predicate.name) + return "\n".join(lines) diff --git a/R2R/r2r/base/utils/splitter/__init__.py b/R2R/r2r/base/utils/splitter/__init__.py new file mode 100755 index 00000000..07a9f554 --- /dev/null +++ b/R2R/r2r/base/utils/splitter/__init__.py @@ -0,0 +1,3 @@ +from .text import RecursiveCharacterTextSplitter + +__all__ = ["RecursiveCharacterTextSplitter"] diff --git a/R2R/r2r/base/utils/splitter/text.py b/R2R/r2r/base/utils/splitter/text.py new file mode 100755 index 00000000..5458310c --- /dev/null +++ b/R2R/r2r/base/utils/splitter/text.py @@ -0,0 +1,1979 @@ +# Source - LangChain +# URL: https://github.com/langchain-ai/langchain/blob/6a5b084704afa22ca02f78d0464f35aed75d1ff2/libs/langchain/langchain/text_splitter.py#L851 +"""**Text Splitters** are classes for splitting text. + + +**Class hierarchy:** + +.. code-block:: + + BaseDocumentTransformer --> TextSplitter --> <name>TextSplitter # Example: CharacterTextSplitter + RecursiveCharacterTextSplitter --> <name>TextSplitter + +Note: **MarkdownHeaderTextSplitter** and **HTMLHeaderTextSplitter do not derive from TextSplitter. + + +**Main helpers:** + +.. code-block:: + + Document, Tokenizer, Language, LineType, HeaderType + +""" # noqa: E501 + +from __future__ import annotations + +import copy +import json +import logging +import pathlib +import re +from abc import ABC, abstractmethod +from dataclasses import dataclass +from enum import Enum +from io import BytesIO, StringIO +from typing import ( + AbstractSet, + Any, + Callable, + Collection, + Dict, + Iterable, + List, + Literal, + Optional, + Sequence, + Tuple, + Type, + TypedDict, + TypeVar, + Union, + cast, +) + +import requests +from pydantic import BaseModel, Field, PrivateAttr +from typing_extensions import NotRequired + +logger = logging.getLogger(__name__) + +TS = TypeVar("TS", bound="TextSplitter") + + +class BaseSerialized(TypedDict): + """Base class for serialized objects.""" + + lc: int + id: List[str] + name: NotRequired[str] + graph: NotRequired[Dict[str, Any]] + + +class SerializedConstructor(BaseSerialized): + """Serialized constructor.""" + + type: Literal["constructor"] + kwargs: Dict[str, Any] + + +class SerializedSecret(BaseSerialized): + """Serialized secret.""" + + type: Literal["secret"] + + +class SerializedNotImplemented(BaseSerialized): + """Serialized not implemented.""" + + type: Literal["not_implemented"] + repr: Optional[str] + + +def try_neq_default(value: Any, key: str, model: BaseModel) -> bool: + """Try to determine if a value is different from the default. + + Args: + value: The value. + key: The key. + model: The model. + + Returns: + Whether the value is different from the default. + """ + try: + return model.__fields__[key].get_default() != value + except Exception: + return True + + +class Serializable(BaseModel, ABC): + """Serializable base class.""" + + @classmethod + def is_lc_serializable(cls) -> bool: + """Is this class serializable?""" + return False + + @classmethod + def get_lc_namespace(cls) -> List[str]: + """Get the namespace of the langchain object. + + For example, if the class is `langchain.llms.openai.OpenAI`, then the + namespace is ["langchain", "llms", "openai"] + """ + return cls.__module__.split(".") + + @property + def lc_secrets(self) -> Dict[str, str]: + """A map of constructor argument names to secret ids. + + For example, + {"openai_api_key": "OPENAI_API_KEY"} + """ + return dict() + + @property + def lc_attributes(self) -> Dict: + """List of attribute names that should be included in the serialized kwargs. + + These attributes must be accepted by the constructor. + """ + return {} + + @classmethod + def lc_id(cls) -> List[str]: + """A unique identifier for this class for serialization purposes. + + The unique identifier is a list of strings that describes the path + to the object. + """ + return [*cls.get_lc_namespace(), cls.__name__] + + class Config: + extra = "ignore" + + def __repr_args__(self) -> Any: + return [ + (k, v) + for k, v in super().__repr_args__() + if (k not in self.__fields__ or try_neq_default(v, k, self)) + ] + + _lc_kwargs = PrivateAttr(default_factory=dict) + + def __init__(self, **kwargs: Any) -> None: + super().__init__(**kwargs) + self._lc_kwargs = kwargs + + def to_json( + self, + ) -> Union[SerializedConstructor, SerializedNotImplemented]: + if not self.is_lc_serializable(): + return self.to_json_not_implemented() + + secrets = dict() + # Get latest values for kwargs if there is an attribute with same name + lc_kwargs = { + k: getattr(self, k, v) + for k, v in self._lc_kwargs.items() + if not (self.__exclude_fields__ or {}).get(k, False) # type: ignore + } + + # Merge the lc_secrets and lc_attributes from every class in the MRO + for cls in [None, *self.__class__.mro()]: + # Once we get to Serializable, we're done + if cls is Serializable: + break + + if cls: + deprecated_attributes = [ + "lc_namespace", + "lc_serializable", + ] + + for attr in deprecated_attributes: + if hasattr(cls, attr): + raise ValueError( + f"Class {self.__class__} has a deprecated " + f"attribute {attr}. Please use the corresponding " + f"classmethod instead." + ) + + # Get a reference to self bound to each class in the MRO + this = cast( + Serializable, self if cls is None else super(cls, self) + ) + + secrets.update(this.lc_secrets) + # Now also add the aliases for the secrets + # This ensures known secret aliases are hidden. + # Note: this does NOT hide any other extra kwargs + # that are not present in the fields. + for key in list(secrets): + value = secrets[key] + if key in this.__fields__: + secrets[this.__fields__[key].alias] = value + lc_kwargs.update(this.lc_attributes) + + # include all secrets, even if not specified in kwargs + # as these secrets may be passed as an environment variable instead + for key in secrets.keys(): + secret_value = getattr(self, key, None) or lc_kwargs.get(key) + if secret_value is not None: + lc_kwargs.update({key: secret_value}) + + return { + "lc": 1, + "type": "constructor", + "id": self.lc_id(), + "kwargs": ( + lc_kwargs + if not secrets + else _replace_secrets(lc_kwargs, secrets) + ), + } + + def to_json_not_implemented(self) -> SerializedNotImplemented: + return to_json_not_implemented(self) + + +def _replace_secrets( + root: Dict[Any, Any], secrets_map: Dict[str, str] +) -> Dict[Any, Any]: + result = root.copy() + for path, secret_id in secrets_map.items(): + [*parts, last] = path.split(".") + current = result + for part in parts: + if part not in current: + break + current[part] = current[part].copy() + current = current[part] + if last in current: + current[last] = { + "lc": 1, + "type": "secret", + "id": [secret_id], + } + return result + + +def to_json_not_implemented(obj: object) -> SerializedNotImplemented: + """Serialize a "not implemented" object. + + Args: + obj: object to serialize + + Returns: + SerializedNotImplemented + """ + _id: List[str] = [] + try: + if hasattr(obj, "__name__"): + _id = [*obj.__module__.split("."), obj.__name__] + elif hasattr(obj, "__class__"): + _id = [ + *obj.__class__.__module__.split("."), + obj.__class__.__name__, + ] + except Exception: + pass + + result: SerializedNotImplemented = { + "lc": 1, + "type": "not_implemented", + "id": _id, + "repr": None, + } + try: + result["repr"] = repr(obj) + except Exception: + pass + return result + + +class Document(Serializable): + """Class for storing a piece of text and associated metadata.""" + + page_content: str + """String text.""" + metadata: dict = Field(default_factory=dict) + """Arbitrary metadata about the page content (e.g., source, relationships to other + documents, etc.). + """ + type: Literal["Document"] = "Document" + + def __init__(self, page_content: str, **kwargs: Any) -> None: + """Pass page_content in as positional or named arg.""" + super().__init__(page_content=page_content, **kwargs) + + @classmethod + def is_lc_serializable(cls) -> bool: + """Return whether this class is serializable.""" + return True + + @classmethod + def get_lc_namespace(cls) -> List[str]: + """Get the namespace of the langchain object.""" + return ["langchain", "schema", "document"] + + +class BaseDocumentTransformer(ABC): + """Abstract base class for document transformation systems. + + A document transformation system takes a sequence of Documents and returns a + sequence of transformed Documents. + + Example: + .. code-block:: python + + class EmbeddingsRedundantFilter(BaseDocumentTransformer, BaseModel): + embeddings: Embeddings + similarity_fn: Callable = cosine_similarity + similarity_threshold: float = 0.95 + + class Config: + arbitrary_types_allowed = True + + def transform_documents( + self, documents: Sequence[Document], **kwargs: Any + ) -> Sequence[Document]: + stateful_documents = get_stateful_documents(documents) + embedded_documents = _get_embeddings_from_stateful_docs( + self.embeddings, stateful_documents + ) + included_idxs = _filter_similar_embeddings( + embedded_documents, self.similarity_fn, self.similarity_threshold + ) + return [stateful_documents[i] for i in sorted(included_idxs)] + + async def atransform_documents( + self, documents: Sequence[Document], **kwargs: Any + ) -> Sequence[Document]: + raise NotImplementedError + + """ # noqa: E501 + + @abstractmethod + def transform_documents( + self, documents: Sequence[Document], **kwargs: Any + ) -> Sequence[Document]: + """Transform a list of documents. + + Args: + documents: A sequence of Documents to be transformed. + + Returns: + A list of transformed Documents. + """ + + async def atransform_documents( + self, documents: Sequence[Document], **kwargs: Any + ) -> Sequence[Document]: + """Asynchronously transform a list of documents. + + Args: + documents: A sequence of Documents to be transformed. + + Returns: + A list of transformed Documents. + """ + raise NotImplementedError("This method is not implemented.") + # return await langchain_core.runnables.config.run_in_executor( + # None, self.transform_documents, documents, **kwargs + # ) + + +def _make_spacy_pipe_for_splitting( + pipe: str, *, max_length: int = 1_000_000 +) -> Any: # avoid importing spacy + try: + import spacy + except ImportError: + raise ImportError( + "Spacy is not installed, please install it with `pip install spacy`." + ) + if pipe == "sentencizer": + from spacy.lang.en import English + + sentencizer = English() + sentencizer.add_pipe("sentencizer") + else: + sentencizer = spacy.load(pipe, exclude=["ner", "tagger"]) + sentencizer.max_length = max_length + return sentencizer + + +def _split_text_with_regex( + text: str, separator: str, keep_separator: bool +) -> List[str]: + # Now that we have the separator, split the text + if separator: + if keep_separator: + # The parentheses in the pattern keep the delimiters in the result. + _splits = re.split(f"({separator})", text) + splits = [ + _splits[i] + _splits[i + 1] for i in range(1, len(_splits), 2) + ] + if len(_splits) % 2 == 0: + splits += _splits[-1:] + splits = [_splits[0]] + splits + else: + splits = re.split(separator, text) + else: + splits = list(text) + return [s for s in splits if s != ""] + + +class TextSplitter(BaseDocumentTransformer, ABC): + """Interface for splitting text into chunks.""" + + def __init__( + self, + chunk_size: int = 4000, + chunk_overlap: int = 200, + length_function: Callable[[str], int] = len, + keep_separator: bool = False, + add_start_index: bool = False, + strip_whitespace: bool = True, + ) -> None: + """Create a new TextSplitter. + + Args: + chunk_size: Maximum size of chunks to return + chunk_overlap: Overlap in characters between chunks + length_function: Function that measures the length of given chunks + keep_separator: Whether to keep the separator in the chunks + add_start_index: If `True`, includes chunk's start index in metadata + strip_whitespace: If `True`, strips whitespace from the start and end of + every document + """ + if chunk_overlap > chunk_size: + raise ValueError( + f"Got a larger chunk overlap ({chunk_overlap}) than chunk size " + f"({chunk_size}), should be smaller." + ) + self._chunk_size = chunk_size + self._chunk_overlap = chunk_overlap + self._length_function = length_function + self._keep_separator = keep_separator + self._add_start_index = add_start_index + self._strip_whitespace = strip_whitespace + + @abstractmethod + def split_text(self, text: str) -> List[str]: + """Split text into multiple components.""" + + def create_documents( + self, texts: List[str], metadatas: Optional[List[dict]] = None + ) -> List[Document]: + """Create documents from a list of texts.""" + _metadatas = metadatas or [{}] * len(texts) + documents = [] + for i, text in enumerate(texts): + index = 0 + previous_chunk_len = 0 + for chunk in self.split_text(text): + metadata = copy.deepcopy(_metadatas[i]) + if self._add_start_index: + offset = index + previous_chunk_len - self._chunk_overlap + index = text.find(chunk, max(0, offset)) + metadata["start_index"] = index + previous_chunk_len = len(chunk) + new_doc = Document(page_content=chunk, metadata=metadata) + documents.append(new_doc) + return documents + + def split_documents(self, documents: Iterable[Document]) -> List[Document]: + """Split documents.""" + texts, metadatas = [], [] + for doc in documents: + texts.append(doc.page_content) + metadatas.append(doc.metadata) + return self.create_documents(texts, metadatas=metadatas) + + def _join_docs(self, docs: List[str], separator: str) -> Optional[str]: + text = separator.join(docs) + if self._strip_whitespace: + text = text.strip() + if text == "": + return None + else: + return text + + def _merge_splits( + self, splits: Iterable[str], separator: str + ) -> List[str]: + # We now want to combine these smaller pieces into medium size + # chunks to send to the LLM. + separator_len = self._length_function(separator) + + docs = [] + current_doc: List[str] = [] + total = 0 + for d in splits: + _len = self._length_function(d) + if ( + total + _len + (separator_len if len(current_doc) > 0 else 0) + > self._chunk_size + ): + if total > self._chunk_size: + logger.warning( + f"Created a chunk of size {total}, " + f"which is longer than the specified {self._chunk_size}" + ) + if len(current_doc) > 0: + doc = self._join_docs(current_doc, separator) + if doc is not None: + docs.append(doc) + # Keep on popping if: + # - we have a larger chunk than in the chunk overlap + # - or if we still have any chunks and the length is long + while total > self._chunk_overlap or ( + total + + _len + + (separator_len if len(current_doc) > 0 else 0) + > self._chunk_size + and total > 0 + ): + total -= self._length_function(current_doc[0]) + ( + separator_len if len(current_doc) > 1 else 0 + ) + current_doc = current_doc[1:] + current_doc.append(d) + total += _len + (separator_len if len(current_doc) > 1 else 0) + doc = self._join_docs(current_doc, separator) + if doc is not None: + docs.append(doc) + return docs + + @classmethod + def from_huggingface_tokenizer( + cls, tokenizer: Any, **kwargs: Any + ) -> TextSplitter: + """Text splitter that uses HuggingFace tokenizer to count length.""" + try: + from transformers import PreTrainedTokenizerBase + + if not isinstance(tokenizer, PreTrainedTokenizerBase): + raise ValueError( + "Tokenizer received was not an instance of PreTrainedTokenizerBase" + ) + + def _huggingface_tokenizer_length(text: str) -> int: + return len(tokenizer.encode(text)) + + except ImportError: + raise ValueError( + "Could not import transformers python package. " + "Please install it with `pip install transformers`." + ) + return cls(length_function=_huggingface_tokenizer_length, **kwargs) + + @classmethod + def from_tiktoken_encoder( + cls: Type[TS], + encoding_name: str = "gpt2", + model: Optional[str] = None, + allowed_special: Union[Literal["all"], AbstractSet[str]] = set(), + disallowed_special: Union[Literal["all"], Collection[str]] = "all", + **kwargs: Any, + ) -> TS: + """Text splitter that uses tiktoken encoder to count length.""" + try: + import tiktoken + except ImportError: + raise ImportError( + "Could not import tiktoken python package. " + "This is needed in order to calculate max_tokens_for_prompt. " + "Please install it with `pip install tiktoken`." + ) + + if model is not None: + enc = tiktoken.encoding_for_model(model) + else: + enc = tiktoken.get_encoding(encoding_name) + + def _tiktoken_encoder(text: str) -> int: + return len( + enc.encode( + text, + allowed_special=allowed_special, + disallowed_special=disallowed_special, + ) + ) + + if issubclass(cls, TokenTextSplitter): + extra_kwargs = { + "encoding_name": encoding_name, + "model": model, + "allowed_special": allowed_special, + "disallowed_special": disallowed_special, + } + kwargs = {**kwargs, **extra_kwargs} + + return cls(length_function=_tiktoken_encoder, **kwargs) + + def transform_documents( + self, documents: Sequence[Document], **kwargs: Any + ) -> Sequence[Document]: + """Transform sequence of documents by splitting them.""" + return self.split_documents(list(documents)) + + +class CharacterTextSplitter(TextSplitter): + """Splitting text that looks at characters.""" + + def __init__( + self, + separator: str = "\n\n", + is_separator_regex: bool = False, + **kwargs: Any, + ) -> None: + """Create a new TextSplitter.""" + super().__init__(**kwargs) + self._separator = separator + self._is_separator_regex = is_separator_regex + + def split_text(self, text: str) -> List[str]: + """Split incoming text and return chunks.""" + # First we naively split the large input into a bunch of smaller ones. + separator = ( + self._separator + if self._is_separator_regex + else re.escape(self._separator) + ) + splits = _split_text_with_regex(text, separator, self._keep_separator) + _separator = "" if self._keep_separator else self._separator + return self._merge_splits(splits, _separator) + + +class LineType(TypedDict): + """Line type as typed dict.""" + + metadata: Dict[str, str] + content: str + + +class HeaderType(TypedDict): + """Header type as typed dict.""" + + level: int + name: str + data: str + + +class MarkdownHeaderTextSplitter: + """Splitting markdown files based on specified headers.""" + + def __init__( + self, + headers_to_split_on: List[Tuple[str, str]], + return_each_line: bool = False, + strip_headers: bool = True, + ): + """Create a new MarkdownHeaderTextSplitter. + + Args: + headers_to_split_on: Headers we want to track + return_each_line: Return each line w/ associated headers + strip_headers: Strip split headers from the content of the chunk + """ + # Output line-by-line or aggregated into chunks w/ common headers + self.return_each_line = return_each_line + # Given the headers we want to split on, + # (e.g., "#, ##, etc") order by length + self.headers_to_split_on = sorted( + headers_to_split_on, key=lambda split: len(split[0]), reverse=True + ) + # Strip headers split headers from the content of the chunk + self.strip_headers = strip_headers + + def aggregate_lines_to_chunks( + self, lines: List[LineType] + ) -> List[Document]: + """Combine lines with common metadata into chunks + Args: + lines: Line of text / associated header metadata + """ + aggregated_chunks: List[LineType] = [] + + for line in lines: + if ( + aggregated_chunks + and aggregated_chunks[-1]["metadata"] == line["metadata"] + ): + # If the last line in the aggregated list + # has the same metadata as the current line, + # append the current content to the last lines's content + aggregated_chunks[-1]["content"] += " \n" + line["content"] + elif ( + aggregated_chunks + and aggregated_chunks[-1]["metadata"] != line["metadata"] + # may be issues if other metadata is present + and len(aggregated_chunks[-1]["metadata"]) + < len(line["metadata"]) + and aggregated_chunks[-1]["content"].split("\n")[-1][0] == "#" + and not self.strip_headers + ): + # If the last line in the aggregated list + # has different metadata as the current line, + # and has shallower header level than the current line, + # and the last line is a header, + # and we are not stripping headers, + # append the current content to the last line's content + aggregated_chunks[-1]["content"] += " \n" + line["content"] + # and update the last line's metadata + aggregated_chunks[-1]["metadata"] = line["metadata"] + else: + # Otherwise, append the current line to the aggregated list + aggregated_chunks.append(line) + + return [ + Document(page_content=chunk["content"], metadata=chunk["metadata"]) + for chunk in aggregated_chunks + ] + + def split_text(self, text: str) -> List[Document]: + """Split markdown file + Args: + text: Markdown file""" + + # Split the input text by newline character ("\n"). + lines = text.split("\n") + # Final output + lines_with_metadata: List[LineType] = [] + # Content and metadata of the chunk currently being processed + current_content: List[str] = [] + current_metadata: Dict[str, str] = {} + # Keep track of the nested header structure + # header_stack: List[Dict[str, Union[int, str]]] = [] + header_stack: List[HeaderType] = [] + initial_metadata: Dict[str, str] = {} + + in_code_block = False + opening_fence = "" + + for line in lines: + stripped_line = line.strip() + + if not in_code_block: + # Exclude inline code spans + if ( + stripped_line.startswith("```") + and stripped_line.count("```") == 1 + ): + in_code_block = True + opening_fence = "```" + elif stripped_line.startswith("~~~"): + in_code_block = True + opening_fence = "~~~" + else: + if stripped_line.startswith(opening_fence): + in_code_block = False + opening_fence = "" + + if in_code_block: + current_content.append(stripped_line) + continue + + # Check each line against each of the header types (e.g., #, ##) + for sep, name in self.headers_to_split_on: + # Check if line starts with a header that we intend to split on + if stripped_line.startswith(sep) and ( + # Header with no text OR header is followed by space + # Both are valid conditions that sep is being used a header + len(stripped_line) == len(sep) + or stripped_line[len(sep)] == " " + ): + # Ensure we are tracking the header as metadata + if name is not None: + # Get the current header level + current_header_level = sep.count("#") + + # Pop out headers of lower or same level from the stack + while ( + header_stack + and header_stack[-1]["level"] + >= current_header_level + ): + # We have encountered a new header + # at the same or higher level + popped_header = header_stack.pop() + # Clear the metadata for the + # popped header in initial_metadata + if popped_header["name"] in initial_metadata: + initial_metadata.pop(popped_header["name"]) + + # Push the current header to the stack + header: HeaderType = { + "level": current_header_level, + "name": name, + "data": stripped_line[len(sep) :].strip(), + } + header_stack.append(header) + # Update initial_metadata with the current header + initial_metadata[name] = header["data"] + + # Add the previous line to the lines_with_metadata + # only if current_content is not empty + if current_content: + lines_with_metadata.append( + { + "content": "\n".join(current_content), + "metadata": current_metadata.copy(), + } + ) + current_content.clear() + + if not self.strip_headers: + current_content.append(stripped_line) + + break + else: + if stripped_line: + current_content.append(stripped_line) + elif current_content: + lines_with_metadata.append( + { + "content": "\n".join(current_content), + "metadata": current_metadata.copy(), + } + ) + current_content.clear() + + current_metadata = initial_metadata.copy() + + if current_content: + lines_with_metadata.append( + { + "content": "\n".join(current_content), + "metadata": current_metadata, + } + ) + + # lines_with_metadata has each line with associated header metadata + # aggregate these into chunks based on common metadata + if not self.return_each_line: + return self.aggregate_lines_to_chunks(lines_with_metadata) + else: + return [ + Document( + page_content=chunk["content"], metadata=chunk["metadata"] + ) + for chunk in lines_with_metadata + ] + + +class ElementType(TypedDict): + """Element type as typed dict.""" + + url: str + xpath: str + content: str + metadata: Dict[str, str] + + +class HTMLHeaderTextSplitter: + """ + Splitting HTML files based on specified headers. + Requires lxml package. + """ + + def __init__( + self, + headers_to_split_on: List[Tuple[str, str]], + return_each_element: bool = False, + ): + """Create a new HTMLHeaderTextSplitter. + + Args: + headers_to_split_on: list of tuples of headers we want to track mapped to + (arbitrary) keys for metadata. Allowed header values: h1, h2, h3, h4, + h5, h6 e.g. [("h1", "Header 1"), ("h2", "Header 2)]. + return_each_element: Return each element w/ associated headers. + """ + # Output element-by-element or aggregated into chunks w/ common headers + self.return_each_element = return_each_element + self.headers_to_split_on = sorted(headers_to_split_on) + + def aggregate_elements_to_chunks( + self, elements: List[ElementType] + ) -> List[Document]: + """Combine elements with common metadata into chunks + + Args: + elements: HTML element content with associated identifying info and metadata + """ + aggregated_chunks: List[ElementType] = [] + + for element in elements: + if ( + aggregated_chunks + and aggregated_chunks[-1]["metadata"] == element["metadata"] + ): + # If the last element in the aggregated list + # has the same metadata as the current element, + # append the current content to the last element's content + aggregated_chunks[-1]["content"] += " \n" + element["content"] + else: + # Otherwise, append the current element to the aggregated list + aggregated_chunks.append(element) + + return [ + Document(page_content=chunk["content"], metadata=chunk["metadata"]) + for chunk in aggregated_chunks + ] + + def split_text_from_url(self, url: str) -> List[Document]: + """Split HTML from web URL + + Args: + url: web URL + """ + r = requests.get(url) + return self.split_text_from_file(BytesIO(r.content)) + + def split_text(self, text: str) -> List[Document]: + """Split HTML text string + + Args: + text: HTML text + """ + return self.split_text_from_file(StringIO(text)) + + def split_text_from_file(self, file: Any) -> List[Document]: + """Split HTML file + + Args: + file: HTML file + """ + try: + from lxml import etree + except ImportError as e: + raise ImportError( + "Unable to import lxml, please install with `pip install lxml`." + ) from e + # use lxml library to parse html document and return xml ElementTree + # Explicitly encoding in utf-8 allows non-English + # html files to be processed without garbled characters + parser = etree.HTMLParser(encoding="utf-8") + tree = etree.parse(file, parser) + + # document transformation for "structure-aware" chunking is handled with xsl. + # see comments in html_chunks_with_headers.xslt for more detailed information. + xslt_path = ( + pathlib.Path(__file__).parent + / "document_transformers/xsl/html_chunks_with_headers.xslt" + ) + xslt_tree = etree.parse(xslt_path) + transform = etree.XSLT(xslt_tree) + result = transform(tree) + result_dom = etree.fromstring(str(result)) + + # create filter and mapping for header metadata + header_filter = [header[0] for header in self.headers_to_split_on] + header_mapping = dict(self.headers_to_split_on) + + # map xhtml namespace prefix + ns_map = {"h": "http://www.w3.org/1999/xhtml"} + + # build list of elements from DOM + elements = [] + for element in result_dom.findall("*//*", ns_map): + if element.findall("*[@class='headers']") or element.findall( + "*[@class='chunk']" + ): + elements.append( + ElementType( + url=file, + xpath="".join( + [ + node.text + for node in element.findall( + "*[@class='xpath']", ns_map + ) + ] + ), + content="".join( + [ + node.text + for node in element.findall( + "*[@class='chunk']", ns_map + ) + ] + ), + metadata={ + # Add text of specified headers to metadata using header + # mapping. + header_mapping[node.tag]: node.text + for node in filter( + lambda x: x.tag in header_filter, + element.findall( + "*[@class='headers']/*", ns_map + ), + ) + }, + ) + ) + + if not self.return_each_element: + return self.aggregate_elements_to_chunks(elements) + else: + return [ + Document( + page_content=chunk["content"], metadata=chunk["metadata"] + ) + for chunk in elements + ] + + +# should be in newer Python versions (3.10+) +# @dataclass(frozen=True, kw_only=True, slots=True) +@dataclass(frozen=True) +class Tokenizer: + """Tokenizer data class.""" + + chunk_overlap: int + """Overlap in tokens between chunks""" + tokens_per_chunk: int + """Maximum number of tokens per chunk""" + decode: Callable[[List[int]], str] + """ Function to decode a list of token ids to a string""" + encode: Callable[[str], List[int]] + """ Function to encode a string to a list of token ids""" + + +def split_text_on_tokens(*, text: str, tokenizer: Tokenizer) -> List[str]: + """Split incoming text and return chunks using tokenizer.""" + splits: List[str] = [] + input_ids = tokenizer.encode(text) + start_idx = 0 + cur_idx = min(start_idx + tokenizer.tokens_per_chunk, len(input_ids)) + chunk_ids = input_ids[start_idx:cur_idx] + while start_idx < len(input_ids): + splits.append(tokenizer.decode(chunk_ids)) + if cur_idx == len(input_ids): + break + start_idx += tokenizer.tokens_per_chunk - tokenizer.chunk_overlap + cur_idx = min(start_idx + tokenizer.tokens_per_chunk, len(input_ids)) + chunk_ids = input_ids[start_idx:cur_idx] + return splits + + +class TokenTextSplitter(TextSplitter): + """Splitting text to tokens using model tokenizer.""" + + def __init__( + self, + encoding_name: str = "gpt2", + model: Optional[str] = None, + allowed_special: Union[Literal["all"], AbstractSet[str]] = set(), + disallowed_special: Union[Literal["all"], Collection[str]] = "all", + **kwargs: Any, + ) -> None: + """Create a new TextSplitter.""" + super().__init__(**kwargs) + try: + import tiktoken + except ImportError: + raise ImportError( + "Could not import tiktoken python package. " + "This is needed in order to for TokenTextSplitter. " + "Please install it with `pip install tiktoken`." + ) + + if model is not None: + enc = tiktoken.encoding_for_model(model) + else: + enc = tiktoken.get_encoding(encoding_name) + self._tokenizer = enc + self._allowed_special = allowed_special + self._disallowed_special = disallowed_special + + def split_text(self, text: str) -> List[str]: + def _encode(_text: str) -> List[int]: + return self._tokenizer.encode( + _text, + allowed_special=self._allowed_special, + disallowed_special=self._disallowed_special, + ) + + tokenizer = Tokenizer( + chunk_overlap=self._chunk_overlap, + tokens_per_chunk=self._chunk_size, + decode=self._tokenizer.decode, + encode=_encode, + ) + + return split_text_on_tokens(text=text, tokenizer=tokenizer) + + +class SentenceTransformersTokenTextSplitter(TextSplitter): + """Splitting text to tokens using sentence model tokenizer.""" + + def __init__( + self, + chunk_overlap: int = 50, + model: str = "sentence-transformers/all-mpnet-base-v2", + tokens_per_chunk: Optional[int] = None, + **kwargs: Any, + ) -> None: + """Create a new TextSplitter.""" + super().__init__(**kwargs, chunk_overlap=chunk_overlap) + + try: + from sentence_transformers import SentenceTransformer + except ImportError: + raise ImportError( + "Could not import sentence_transformer python package. " + "This is needed in order to for SentenceTransformersTokenTextSplitter. " + "Please install it with `pip install sentence-transformers`." + ) + + self.model = model + self._model = SentenceTransformer(self.model, trust_remote_code=True) + self.tokenizer = self._model.tokenizer + self._initialize_chunk_configuration(tokens_per_chunk=tokens_per_chunk) + + def _initialize_chunk_configuration( + self, *, tokens_per_chunk: Optional[int] + ) -> None: + self.maximum_tokens_per_chunk = cast(int, self._model.max_seq_length) + + if tokens_per_chunk is None: + self.tokens_per_chunk = self.maximum_tokens_per_chunk + else: + self.tokens_per_chunk = tokens_per_chunk + + if self.tokens_per_chunk > self.maximum_tokens_per_chunk: + raise ValueError( + f"The token limit of the models '{self.model}'" + f" is: {self.maximum_tokens_per_chunk}." + f" Argument tokens_per_chunk={self.tokens_per_chunk}" + f" > maximum token limit." + ) + + def split_text(self, text: str) -> List[str]: + def encode_strip_start_and_stop_token_ids(text: str) -> List[int]: + return self._encode(text)[1:-1] + + tokenizer = Tokenizer( + chunk_overlap=self._chunk_overlap, + tokens_per_chunk=self.tokens_per_chunk, + decode=self.tokenizer.decode, + encode=encode_strip_start_and_stop_token_ids, + ) + + return split_text_on_tokens(text=text, tokenizer=tokenizer) + + def count_tokens(self, *, text: str) -> int: + return len(self._encode(text)) + + _max_length_equal_32_bit_integer: int = 2**32 + + def _encode(self, text: str) -> List[int]: + token_ids_with_start_and_end_token_ids = self.tokenizer.encode( + text, + max_length=self._max_length_equal_32_bit_integer, + truncation="do_not_truncate", + ) + return token_ids_with_start_and_end_token_ids + + +class Language(str, Enum): + """Enum of the programming languages.""" + + CPP = "cpp" + GO = "go" + JAVA = "java" + KOTLIN = "kotlin" + JS = "js" + TS = "ts" + PHP = "php" + PROTO = "proto" + PYTHON = "python" + RST = "rst" + RUBY = "ruby" + RUST = "rust" + SCALA = "scala" + SWIFT = "swift" + MARKDOWN = "markdown" + LATEX = "latex" + HTML = "html" + SOL = "sol" + CSHARP = "csharp" + COBOL = "cobol" + C = "c" + LUA = "lua" + PERL = "perl" + + +class RecursiveCharacterTextSplitter(TextSplitter): + """Splitting text by recursively look at characters. + + Recursively tries to split by different characters to find one + that works. + """ + + def __init__( + self, + separators: Optional[List[str]] = None, + keep_separator: bool = True, + is_separator_regex: bool = False, + **kwargs: Any, + ) -> None: + """Create a new TextSplitter.""" + super().__init__(keep_separator=keep_separator, **kwargs) + self._separators = separators or ["\n\n", "\n", " ", ""] + self._is_separator_regex = is_separator_regex + + def _split_text(self, text: str, separators: List[str]) -> List[str]: + """Split incoming text and return chunks.""" + final_chunks = [] + # Get appropriate separator to use + separator = separators[-1] + new_separators = [] + for i, _s in enumerate(separators): + _separator = _s if self._is_separator_regex else re.escape(_s) + if _s == "": + separator = _s + break + if re.search(_separator, text): + separator = _s + new_separators = separators[i + 1 :] + break + + _separator = ( + separator if self._is_separator_regex else re.escape(separator) + ) + splits = _split_text_with_regex(text, _separator, self._keep_separator) + + # Now go merging things, recursively splitting longer texts. + _good_splits = [] + _separator = "" if self._keep_separator else separator + for s in splits: + if self._length_function(s) < self._chunk_size: + _good_splits.append(s) + else: + if _good_splits: + merged_text = self._merge_splits(_good_splits, _separator) + final_chunks.extend(merged_text) + _good_splits = [] + if not new_separators: + final_chunks.append(s) + else: + other_info = self._split_text(s, new_separators) + final_chunks.extend(other_info) + if _good_splits: + merged_text = self._merge_splits(_good_splits, _separator) + final_chunks.extend(merged_text) + return final_chunks + + def split_text(self, text: str) -> List[str]: + return self._split_text(text, self._separators) + + @classmethod + def from_language( + cls, language: Language, **kwargs: Any + ) -> RecursiveCharacterTextSplitter: + separators = cls.get_separators_for_language(language) + return cls(separators=separators, is_separator_regex=True, **kwargs) + + @staticmethod + def get_separators_for_language(language: Language) -> List[str]: + if language == Language.CPP: + return [ + # Split along class definitions + "\nclass ", + # Split along function definitions + "\nvoid ", + "\nint ", + "\nfloat ", + "\ndouble ", + # Split along control flow statements + "\nif ", + "\nfor ", + "\nwhile ", + "\nswitch ", + "\ncase ", + # Split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ] + elif language == Language.GO: + return [ + # Split along function definitions + "\nfunc ", + "\nvar ", + "\nconst ", + "\ntype ", + # Split along control flow statements + "\nif ", + "\nfor ", + "\nswitch ", + "\ncase ", + # Split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ] + elif language == Language.JAVA: + return [ + # Split along class definitions + "\nclass ", + # Split along method definitions + "\npublic ", + "\nprotected ", + "\nprivate ", + "\nstatic ", + # Split along control flow statements + "\nif ", + "\nfor ", + "\nwhile ", + "\nswitch ", + "\ncase ", + # Split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ] + elif language == Language.KOTLIN: + return [ + # Split along class definitions + "\nclass ", + # Split along method definitions + "\npublic ", + "\nprotected ", + "\nprivate ", + "\ninternal ", + "\ncompanion ", + "\nfun ", + "\nval ", + "\nvar ", + # Split along control flow statements + "\nif ", + "\nfor ", + "\nwhile ", + "\nwhen ", + "\ncase ", + "\nelse ", + # Split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ] + elif language == Language.JS: + return [ + # Split along function definitions + "\nfunction ", + "\nconst ", + "\nlet ", + "\nvar ", + "\nclass ", + # Split along control flow statements + "\nif ", + "\nfor ", + "\nwhile ", + "\nswitch ", + "\ncase ", + "\ndefault ", + # Split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ] + elif language == Language.TS: + return [ + "\nenum ", + "\ninterface ", + "\nnamespace ", + "\ntype ", + # Split along class definitions + "\nclass ", + # Split along function definitions + "\nfunction ", + "\nconst ", + "\nlet ", + "\nvar ", + # Split along control flow statements + "\nif ", + "\nfor ", + "\nwhile ", + "\nswitch ", + "\ncase ", + "\ndefault ", + # Split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ] + elif language == Language.PHP: + return [ + # Split along function definitions + "\nfunction ", + # Split along class definitions + "\nclass ", + # Split along control flow statements + "\nif ", + "\nforeach ", + "\nwhile ", + "\ndo ", + "\nswitch ", + "\ncase ", + # Split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ] + elif language == Language.PROTO: + return [ + # Split along message definitions + "\nmessage ", + # Split along service definitions + "\nservice ", + # Split along enum definitions + "\nenum ", + # Split along option definitions + "\noption ", + # Split along import statements + "\nimport ", + # Split along syntax declarations + "\nsyntax ", + # Split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ] + elif language == Language.PYTHON: + return [ + # First, try to split along class definitions + "\nclass ", + "\ndef ", + "\n\tdef ", + # Now split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ] + elif language == Language.RST: + return [ + # Split along section titles + "\n=+\n", + "\n-+\n", + "\n\\*+\n", + # Split along directive markers + "\n\n.. *\n\n", + # Split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ] + elif language == Language.RUBY: + return [ + # Split along method definitions + "\ndef ", + "\nclass ", + # Split along control flow statements + "\nif ", + "\nunless ", + "\nwhile ", + "\nfor ", + "\ndo ", + "\nbegin ", + "\nrescue ", + # Split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ] + elif language == Language.RUST: + return [ + # Split along function definitions + "\nfn ", + "\nconst ", + "\nlet ", + # Split along control flow statements + "\nif ", + "\nwhile ", + "\nfor ", + "\nloop ", + "\nmatch ", + "\nconst ", + # Split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ] + elif language == Language.SCALA: + return [ + # Split along class definitions + "\nclass ", + "\nobject ", + # Split along method definitions + "\ndef ", + "\nval ", + "\nvar ", + # Split along control flow statements + "\nif ", + "\nfor ", + "\nwhile ", + "\nmatch ", + "\ncase ", + # Split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ] + elif language == Language.SWIFT: + return [ + # Split along function definitions + "\nfunc ", + # Split along class definitions + "\nclass ", + "\nstruct ", + "\nenum ", + # Split along control flow statements + "\nif ", + "\nfor ", + "\nwhile ", + "\ndo ", + "\nswitch ", + "\ncase ", + # Split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ] + elif language == Language.MARKDOWN: + return [ + # First, try to split along Markdown headings (starting with level 2) + "\n#{1,6} ", + # Note the alternative syntax for headings (below) is not handled here + # Heading level 2 + # --------------- + # End of code block + "```\n", + # Horizontal lines + "\n\\*\\*\\*+\n", + "\n---+\n", + "\n___+\n", + # Note that this splitter doesn't handle horizontal lines defined + # by *three or more* of ***, ---, or ___, but this is not handled + "\n\n", + "\n", + " ", + "", + ] + elif language == Language.LATEX: + return [ + # First, try to split along Latex sections + "\n\\\\chapter{", + "\n\\\\section{", + "\n\\\\subsection{", + "\n\\\\subsubsection{", + # Now split by environments + "\n\\\\begin{enumerate}", + "\n\\\\begin{itemize}", + "\n\\\\begin{description}", + "\n\\\\begin{list}", + "\n\\\\begin{quote}", + "\n\\\\begin{quotation}", + "\n\\\\begin{verse}", + "\n\\\\begin{verbatim}", + # Now split by math environments + "\n\\\begin{align}", + "$$", + "$", + # Now split by the normal type of lines + " ", + "", + ] + elif language == Language.HTML: + return [ + # First, try to split along HTML tags + "<body", + "<div", + "<p", + "<br", + "<li", + "<h1", + "<h2", + "<h3", + "<h4", + "<h5", + "<h6", + "<span", + "<table", + "<tr", + "<td", + "<th", + "<ul", + "<ol", + "<header", + "<footer", + "<nav", + # Head + "<head", + "<style", + "<script", + "<meta", + "<title", + "", + ] + elif language == Language.CSHARP: + return [ + "\ninterface ", + "\nenum ", + "\nimplements ", + "\ndelegate ", + "\nevent ", + # Split along class definitions + "\nclass ", + "\nabstract ", + # Split along method definitions + "\npublic ", + "\nprotected ", + "\nprivate ", + "\nstatic ", + "\nreturn ", + # Split along control flow statements + "\nif ", + "\ncontinue ", + "\nfor ", + "\nforeach ", + "\nwhile ", + "\nswitch ", + "\nbreak ", + "\ncase ", + "\nelse ", + # Split by exceptions + "\ntry ", + "\nthrow ", + "\nfinally ", + "\ncatch ", + # Split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ] + elif language == Language.SOL: + return [ + # Split along compiler information definitions + "\npragma ", + "\nusing ", + # Split along contract definitions + "\ncontract ", + "\ninterface ", + "\nlibrary ", + # Split along method definitions + "\nconstructor ", + "\ntype ", + "\nfunction ", + "\nevent ", + "\nmodifier ", + "\nerror ", + "\nstruct ", + "\nenum ", + # Split along control flow statements + "\nif ", + "\nfor ", + "\nwhile ", + "\ndo while ", + "\nassembly ", + # Split by the normal type of lines + "\n\n", + "\n", + " ", + "", + ] + elif language == Language.COBOL: + return [ + # Split along divisions + "\nIDENTIFICATION DIVISION.", + "\nENVIRONMENT DIVISION.", + "\nDATA DIVISION.", + "\nPROCEDURE DIVISION.", + # Split along sections within DATA DIVISION + "\nWORKING-STORAGE SECTION.", + "\nLINKAGE SECTION.", + "\nFILE SECTION.", + # Split along sections within PROCEDURE DIVISION + "\nINPUT-OUTPUT SECTION.", + # Split along paragraphs and common statements + "\nOPEN ", + "\nCLOSE ", + "\nREAD ", + "\nWRITE ", + "\nIF ", + "\nELSE ", + "\nMOVE ", + "\nPERFORM ", + "\nUNTIL ", + "\nVARYING ", + "\nACCEPT ", + "\nDISPLAY ", + "\nSTOP RUN.", + # Split by the normal type of lines + "\n", + " ", + "", + ] + + else: + raise ValueError( + f"Language {language} is not supported! " + f"Please choose from {list(Language)}" + ) + + +class NLTKTextSplitter(TextSplitter): + """Splitting text using NLTK package.""" + + def __init__( + self, separator: str = "\n\n", language: str = "english", **kwargs: Any + ) -> None: + """Initialize the NLTK splitter.""" + super().__init__(**kwargs) + try: + from nltk.tokenize import sent_tokenize + + self._tokenizer = sent_tokenize + except ImportError: + raise ImportError( + "NLTK is not installed, please install it with `pip install nltk`." + ) + self._separator = separator + self._language = language + + def split_text(self, text: str) -> List[str]: + """Split incoming text and return chunks.""" + # First we naively split the large input into a bunch of smaller ones. + splits = self._tokenizer(text, language=self._language) + return self._merge_splits(splits, self._separator) + + +class SpacyTextSplitter(TextSplitter): + """Splitting text using Spacy package. + + + Per default, Spacy's `en_core_web_sm` model is used and + its default max_length is 1000000 (it is the length of maximum character + this model takes which can be increased for large files). For a faster, but + potentially less accurate splitting, you can use `pipe='sentencizer'`. + """ + + def __init__( + self, + separator: str = "\n\n", + pipe: str = "en_core_web_sm", + max_length: int = 1_000_000, + **kwargs: Any, + ) -> None: + """Initialize the spacy text splitter.""" + super().__init__(**kwargs) + self._tokenizer = _make_spacy_pipe_for_splitting( + pipe, max_length=max_length + ) + self._separator = separator + + def split_text(self, text: str) -> List[str]: + """Split incoming text and return chunks.""" + splits = (s.text for s in self._tokenizer(text).sents) + return self._merge_splits(splits, self._separator) + + +class KonlpyTextSplitter(TextSplitter): + """Splitting text using Konlpy package. + + It is good for splitting Korean text. + """ + + def __init__( + self, + separator: str = "\n\n", + **kwargs: Any, + ) -> None: + """Initialize the Konlpy text splitter.""" + super().__init__(**kwargs) + self._separator = separator + try: + from konlpy.tag import Kkma + except ImportError: + raise ImportError( + """ + Konlpy is not installed, please install it with + `pip install konlpy` + """ + ) + self.kkma = Kkma() + + def split_text(self, text: str) -> List[str]: + """Split incoming text and return chunks.""" + splits = self.kkma.sentences(text) + return self._merge_splits(splits, self._separator) + + +# For backwards compatibility +class PythonCodeTextSplitter(RecursiveCharacterTextSplitter): + """Attempts to split the text along Python syntax.""" + + def __init__(self, **kwargs: Any) -> None: + """Initialize a PythonCodeTextSplitter.""" + separators = self.get_separators_for_language(Language.PYTHON) + super().__init__(separators=separators, **kwargs) + + +class MarkdownTextSplitter(RecursiveCharacterTextSplitter): + """Attempts to split the text along Markdown-formatted headings.""" + + def __init__(self, **kwargs: Any) -> None: + """Initialize a MarkdownTextSplitter.""" + separators = self.get_separators_for_language(Language.MARKDOWN) + super().__init__(separators=separators, **kwargs) + + +class LatexTextSplitter(RecursiveCharacterTextSplitter): + """Attempts to split the text along Latex-formatted layout elements.""" + + def __init__(self, **kwargs: Any) -> None: + """Initialize a LatexTextSplitter.""" + separators = self.get_separators_for_language(Language.LATEX) + super().__init__(separators=separators, **kwargs) + + +class RecursiveJsonSplitter: + def __init__( + self, max_chunk_size: int = 2000, min_chunk_size: Optional[int] = None + ): + super().__init__() + self.max_chunk_size = max_chunk_size + self.min_chunk_size = ( + min_chunk_size + if min_chunk_size is not None + else max(max_chunk_size - 200, 50) + ) + + @staticmethod + def _json_size(data: Dict) -> int: + """Calculate the size of the serialized JSON object.""" + return len(json.dumps(data)) + + @staticmethod + def _set_nested_dict(d: Dict, path: List[str], value: Any) -> None: + """Set a value in a nested dictionary based on the given path.""" + for key in path[:-1]: + d = d.setdefault(key, {}) + d[path[-1]] = value + + def _list_to_dict_preprocessing(self, data: Any) -> Any: + if isinstance(data, dict): + # Process each key-value pair in the dictionary + return { + k: self._list_to_dict_preprocessing(v) for k, v in data.items() + } + elif isinstance(data, list): + # Convert the list to a dictionary with index-based keys + return { + str(i): self._list_to_dict_preprocessing(item) + for i, item in enumerate(data) + } + else: + # Base case: the item is neither a dict nor a list, so return it unchanged + return data + + def _json_split( + self, + data: Dict[str, Any], + current_path: List[str] = [], + chunks: List[Dict] = [{}], + ) -> List[Dict]: + """ + Split json into maximum size dictionaries while preserving structure. + """ + if isinstance(data, dict): + for key, value in data.items(): + new_path = current_path + [key] + chunk_size = self._json_size(chunks[-1]) + size = self._json_size({key: value}) + remaining = self.max_chunk_size - chunk_size + + if size < remaining: + # Add item to current chunk + self._set_nested_dict(chunks[-1], new_path, value) + else: + if chunk_size >= self.min_chunk_size: + # Chunk is big enough, start a new chunk + chunks.append({}) + + # Iterate + self._json_split(value, new_path, chunks) + else: + # handle single item + self._set_nested_dict(chunks[-1], current_path, data) + return chunks + + def split_json( + self, + json_data: Dict[str, Any], + convert_lists: bool = False, + ) -> List[Dict]: + """Splits JSON into a list of JSON chunks""" + + if convert_lists: + chunks = self._json_split( + self._list_to_dict_preprocessing(json_data) + ) + else: + chunks = self._json_split(json_data) + + # Remove the last chunk if it's empty + if not chunks[-1]: + chunks.pop() + return chunks + + def split_text( + self, json_data: Dict[str, Any], convert_lists: bool = False + ) -> List[str]: + """Splits JSON into a list of JSON formatted strings""" + + chunks = self.split_json( + json_data=json_data, convert_lists=convert_lists + ) + + # Convert to string + return [json.dumps(chunk) for chunk in chunks] + + def create_documents( + self, + texts: List[Dict], + convert_lists: bool = False, + metadatas: Optional[List[dict]] = None, + ) -> List[Document]: + """Create documents from a list of json objects (Dict).""" + _metadatas = metadatas or [{}] * len(texts) + documents = [] + for i, text in enumerate(texts): + for chunk in self.split_text( + json_data=text, convert_lists=convert_lists + ): + metadata = copy.deepcopy(_metadatas[i]) + new_doc = Document(page_content=chunk, metadata=metadata) + documents.append(new_doc) + return documents diff --git a/R2R/r2r/cli/__init__.py b/R2R/r2r/cli/__init__.py new file mode 100755 index 00000000..e69de29b --- /dev/null +++ b/R2R/r2r/cli/__init__.py diff --git a/R2R/r2r/cli/cli.py b/R2R/r2r/cli/cli.py new file mode 100755 index 00000000..4ef38b1e --- /dev/null +++ b/R2R/r2r/cli/cli.py @@ -0,0 +1,592 @@ +import json +import os +import subprocess +import time +import uuid + +import click +from dotenv import load_dotenv + +from r2r.main.execution import R2RExecutionWrapper + + +class JsonParamType(click.ParamType): + name = "json" + + def convert(self, value, param, ctx): + try: + return json.loads(value) + except json.JSONDecodeError: + self.fail(f"'{value}' is not a valid JSON string", param, ctx) + + +JSON = JsonParamType() + + +@click.group() +@click.option( + "--config-path", default=None, help="Path to the configuration file" +) +@click.option( + "--config-name", default=None, help="Name of the configuration to use" +) +@click.option("--client-mode", default=True, help="Run in client mode") +@click.option( + "--base-url", + default="http://localhost:8000", + help="Base URL for client mode", +) +@click.pass_context +def cli(ctx, config_path, config_name, client_mode, base_url): + """R2R CLI for all core operations.""" + if config_path and config_name: + raise click.UsageError( + "Cannot specify both config_path and config_name" + ) + + # Convert relative config path to absolute path + if config_path: + config_path = os.path.abspath(config_path) + + if ctx.invoked_subcommand != "serve": + ctx.obj = R2RExecutionWrapper( + config_path, + config_name, + client_mode if ctx.invoked_subcommand != "serve" else False, + base_url, + ) + else: + ctx.obj = { + "config_path": config_path, + "config_name": config_name, + "base_url": base_url, + } + + +@cli.command() +@click.option("--host", default="0.0.0.0", help="Host to run the server on") +@click.option("--port", default=8000, help="Port to run the server on") +@click.option("--docker", is_flag=True, help="Run using Docker") +@click.option( + "--docker-ext-neo4j", + is_flag=True, + help="Run using Docker with external Neo4j", +) +@click.option("--project-name", default="r2r", help="Project name for Docker") +@click.pass_obj +def serve(obj, host, port, docker, docker_ext_neo4j, project_name): + """Start the R2R server.""" + # Load environment variables from .env file if it exists + load_dotenv() + + if docker: + if x := obj.get("config_path", None): + os.environ["CONFIG_PATH"] = x + else: + os.environ["CONFIG_NAME"] = ( + obj.get("config_name", None) or "default" + ) + + os.environ["OLLAMA_API_BASE"] = "http://host.docker.internal:11434" + # Check if compose files exist in the package directory + package_dir = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "..", ".." + ) + compose_yaml = os.path.join(package_dir, "compose.yaml") + compose_neo4j_yaml = os.path.join(package_dir, "compose.neo4j.yaml") + + if not os.path.exists(compose_yaml) or not os.path.exists( + compose_neo4j_yaml + ): + click.echo( + "Error: Docker Compose files not found in the package directory." + ) + return + + # Build the docker-compose command with the specified host and port + docker_command = f"docker-compose -f {compose_yaml}" + if docker_ext_neo4j: + docker_command += f" -f {compose_neo4j_yaml}" + if host != "0.0.0.0" or port != 8000: + docker_command += ( + f" --build-arg HOST={host} --build-arg PORT={port}" + ) + + docker_command += f" --project-name {project_name}" + + docker_command += " up -d" + os.system(docker_command) + else: + wrapper = R2RExecutionWrapper(**obj, client_mode=False) + wrapper.serve(host, port) + + +@cli.command() +@click.option( + "--volumes", + is_flag=True, + help="Remove named volumes declared in the `volumes` section of the Compose file", +) +@click.option( + "--remove-orphans", + is_flag=True, + help="Remove containers for services not defined in the Compose file", +) +@click.option("--project-name", default="r2r", help="Project name for Docker") +@click.pass_context +def docker_down(ctx, volumes, remove_orphans, project_name): + """Bring down the Docker Compose setup and attempt to remove the network if necessary.""" + package_dir = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "..", ".." + ) + compose_yaml = os.path.join(package_dir, "compose.yaml") + compose_neo4j_yaml = os.path.join(package_dir, "compose.neo4j.yaml") + + if not os.path.exists(compose_yaml) or not os.path.exists( + compose_neo4j_yaml + ): + click.echo( + "Error: Docker Compose files not found in the package directory." + ) + return + + docker_command = ( + f"docker-compose -f {compose_yaml} -f {compose_neo4j_yaml}" + ) + docker_command += f" --project-name {project_name}" + + if volumes: + docker_command += " --volumes" + + if remove_orphans: + docker_command += " --remove-orphans" + + docker_command += " down" + + click.echo("Bringing down Docker Compose setup...") + result = os.system(docker_command) + + if result != 0: + click.echo( + "An error occurred while bringing down the Docker Compose setup. Attempting to remove the network..." + ) + + # Get the list of networks + networks = ( + subprocess.check_output( + ["docker", "network", "ls", "--format", "{{.Name}}"] + ) + .decode() + .split() + ) + + # Find the r2r network + r2r_network = next( + ( + network + for network in networks + if network.startswith("r2r_") and "network" in network + ), + None, + ) + + if r2r_network: + # Try to remove the network + for _ in range(1): # Try 1 extra times + remove_command = f"docker network rm {r2r_network}" + remove_result = os.system(remove_command) + + if remove_result == 0: + click.echo(f"Successfully removed network: {r2r_network}") + return + else: + click.echo( + f"Failed to remove network: {r2r_network}. Retrying in 5 seconds..." + ) + time.sleep(5) + + click.echo( + "Failed to remove the network after multiple attempts. Please try the following steps:" + ) + click.echo( + "1. Run 'docker ps' to check for any running containers using this network." + ) + click.echo( + "2. Stop any running containers with 'docker stop <container_id>'." + ) + click.echo( + f"3. Try removing the network manually with 'docker network rm {r2r_network}'." + ) + click.echo( + "4. If the above steps don't work, you may need to restart the Docker daemon." + ) + else: + click.echo("Could not find the r2r network to remove.") + else: + click.echo("Docker Compose setup has been successfully brought down.") + + +@cli.command() +@click.argument("file-paths", nargs=-1) +@click.option( + "--document-ids", multiple=True, help="Document IDs for ingestion" +) +@click.option("--metadatas", multiple=True, help="Metadatas for ingestion") +@click.option( + "--versions", + multiple=True, + help="Starting version for ingested files (e.g. `v1`)", +) +@click.pass_obj +def ingest_files(obj, file_paths, document_ids, metadatas, versions): + """Ingest files into R2R.""" + + t0 = time.time() + + # Default to None if empty tuples are provided + document_ids = None if not document_ids else list(document_ids) + metadatas = None if not metadatas else list(metadatas) + versions = None if not versions else list(versions) + + response = obj.ingest_files( + list(file_paths), document_ids, metadatas, versions + ) + t1 = time.time() + click.echo(f"Time taken to ingest files: {t1 - t0:.2f} seconds") + click.echo(response) + + +@cli.command() +@click.argument("file-paths", nargs=-1) +@click.option( + "--document-ids", multiple=True, help="Document IDs for ingestion" +) +@click.option("--metadatas", multiple=True, help="Metadatas for ingestion") +@click.pass_obj +def update_files(obj, file_paths, document_ids, metadatas): + """Ingest files into R2R.""" + t0 = time.time() + + # Default to None if empty tuples are provided + metadatas = None if not metadatas else list(metadatas) + + response = obj.update_files( + list(file_paths), list(document_ids), metadatas + ) + t1 = time.time() + click.echo(f"Time taken to ingest files: {t1 - t0:.2f} seconds") + click.echo(response) + + +@cli.command() +@click.option( + "--query", prompt="Enter your search query", help="The search query" +) +@click.option( + "--use-vector-search", is_flag=True, default=True, help="Use vector search" +) +@click.option( + "--search-filters", type=JsonParamType(), help="Search filters as JSON" +) +@click.option( + "--search-limit", default=10, help="Number of search results to return" +) +@click.option("--do-hybrid-search", is_flag=True, help="Perform hybrid search") +@click.option( + "--use-kg-search", is_flag=True, help="Use knowledge graph search" +) +@click.option("--kg-agent-model", default=None, help="Model for KG agent") +@click.pass_obj +def search( + obj, + query, + use_vector_search, + search_filters, + search_limit, + do_hybrid_search, + use_kg_search, + kg_agent_model, +): + """Perform a search query.""" + kg_agent_generation_config = {} + if kg_agent_model: + kg_agent_generation_config["model"] = kg_agent_model + + t0 = time.time() + + results = obj.search( + query, + use_vector_search, + search_filters, + search_limit, + do_hybrid_search, + use_kg_search, + kg_agent_generation_config, + ) + + if isinstance(results, dict) and "results" in results: + results = results["results"] + + if "vector_search_results" in results: + click.echo("Vector search results:") + for result in results["vector_search_results"]: + click.echo(result) + if "kg_search_results" in results and results["kg_search_results"]: + click.echo("KG search results:", results["kg_search_results"]) + + t1 = time.time() + click.echo(f"Time taken to search: {t1 - t0:.2f} seconds") + + +@cli.command() +@click.option("--query", prompt="Enter your query", help="The query for RAG") +@click.option( + "--use-vector-search", is_flag=True, default=True, help="Use vector search" +) +@click.option( + "--search-filters", type=JsonParamType(), help="Search filters as JSON" +) +@click.option( + "--search-limit", default=10, help="Number of search results to return" +) +@click.option("--do-hybrid-search", is_flag=True, help="Perform hybrid search") +@click.option( + "--use-kg-search", is_flag=True, help="Use knowledge graph search" +) +@click.option("--kg-agent-model", default=None, help="Model for KG agent") +@click.option("--stream", is_flag=True, help="Stream the RAG response") +@click.option("--rag-model", default=None, help="Model for RAG") +@click.pass_obj +def rag( + obj, + query, + use_vector_search, + search_filters, + search_limit, + do_hybrid_search, + use_kg_search, + kg_agent_model, + stream, + rag_model, +): + """Perform a RAG query.""" + kg_agent_generation_config = {} + if kg_agent_model: + kg_agent_generation_config = {"model": kg_agent_model} + rag_generation_config = {"stream": stream} + if rag_model: + rag_generation_config["model"] = rag_model + t0 = time.time() + + response = obj.rag( + query, + use_vector_search, + search_filters, + search_limit, + do_hybrid_search, + use_kg_search, + kg_agent_generation_config, + stream, + rag_generation_config, + ) + if stream: + for chunk in response: + click.echo(chunk, nl=False) + click.echo() + else: + if obj.client_mode: + click.echo(f"Search Results:\n{response['search_results']}") + click.echo(f"Completion:\n{response['completion']}") + else: + click.echo(f"Search Results:\n{response.search_results}") + click.echo(f"Completion:\n{response.completion}") + + t1 = time.time() + click.echo(f"Time taken for RAG: {t1 - t0:.2f} seconds") + + +@cli.command() +@click.option("--keys", multiple=True, help="Keys for deletion") +@click.option("--values", multiple=True, help="Values for deletion") +@click.pass_obj +def delete(obj, keys, values): + """Delete documents based on keys and values.""" + if len(keys) != len(values): + raise click.UsageError("Number of keys must match number of values") + + t0 = time.time() + response = obj.delete(list(keys), list(values)) + t1 = time.time() + + click.echo(response) + click.echo(f"Time taken for deletion: {t1 - t0:.2f} seconds") + + +@cli.command() +@click.option("--log-type-filter", help="Filter for log types") +@click.pass_obj +def logs(obj, log_type_filter): + """Retrieve logs with optional type filter.""" + t0 = time.time() + response = obj.logs(log_type_filter) + t1 = time.time() + + click.echo(response) + click.echo(f"Time taken to retrieve logs: {t1 - t0:.2f} seconds") + + +@cli.command() +@click.option("--document-ids", multiple=True, help="Document IDs to overview") +@click.option("--user-ids", multiple=True, help="User IDs to overview") +@click.pass_obj +def documents_overview(obj, document_ids, user_ids): + """Get an overview of documents.""" + document_ids = list(document_ids) if document_ids else None + user_ids = list(user_ids) if user_ids else None + + t0 = time.time() + response = obj.documents_overview(document_ids, user_ids) + t1 = time.time() + + for document in response: + click.echo(document) + click.echo(f"Time taken to get document overview: {t1 - t0:.2f} seconds") + + +@cli.command() +@click.argument("document_id") +@click.pass_obj +def document_chunks(obj, document_id): + """Get chunks of a specific document.""" + t0 = time.time() + response = obj.document_chunks(document_id) + t1 = time.time() + + for chunk in response: + click.echo(chunk) + click.echo(f"Time taken to get document chunks: {t1 - t0:.2f} seconds") + + +@cli.command() +@click.pass_obj +def app_settings(obj): + """Retrieve application settings.""" + t0 = time.time() + response = obj.app_settings() + t1 = time.time() + + click.echo(response) + click.echo(f"Time taken to get app settings: {t1 - t0:.2f} seconds") + + +@cli.command() +@click.option("--user-ids", multiple=True, help="User IDs to overview") +@click.pass_obj +def users_overview(obj, user_ids): + """Get an overview of users.""" + user_ids = ( + [uuid.UUID(user_id) for user_id in user_ids] if user_ids else None + ) + + t0 = time.time() + response = obj.users_overview(user_ids) + t1 = time.time() + + for user in response: + click.echo(user) + click.echo(f"Time taken to get users overview: {t1 - t0:.2f} seconds") + + +@cli.command() +@click.option( + "--filters", type=JsonParamType(), help="Filters for analytics as JSON" +) +@click.option( + "--analysis-types", type=JsonParamType(), help="Analysis types as JSON" +) +@click.pass_obj +def analytics(obj, filters, analysis_types): + """Retrieve analytics data.""" + t0 = time.time() + response = obj.analytics(filters, analysis_types) + t1 = time.time() + + click.echo(response) + click.echo(f"Time taken to get analytics: {t1 - t0:.2f} seconds") + + +@cli.command() +@click.option( + "--limit", default=100, help="Limit the number of relationships returned" +) +@click.pass_obj +def inspect_knowledge_graph(obj, limit): + """Print relationships from the knowledge graph.""" + t0 = time.time() + response = obj.inspect_knowledge_graph(limit) + t1 = time.time() + + click.echo(response) + click.echo(f"Time taken to print relationships: {t1 - t0:.2f} seconds") + + +@cli.command() +@click.option( + "--no-media", + default=True, + help="Exclude media files from ingestion", +) +@click.option("--option", default=0, help="Which file to ingest?") +@click.pass_obj +def ingest_sample_file(obj, no_media, option): + t0 = time.time() + response = obj.ingest_sample_file(no_media=no_media, option=option) + t1 = time.time() + + click.echo(response) + click.echo(f"Time taken to ingest sample: {t1 - t0:.2f} seconds") + + +@cli.command() +@click.option( + "--no-media", + default=True, + help="Exclude media files from ingestion", +) +@click.pass_obj +def ingest_sample_files(obj, no_media): + """Ingest all sample files into R2R.""" + t0 = time.time() + response = obj.ingest_sample_files(no_media=no_media) + t1 = time.time() + + click.echo(response) + click.echo(f"Time taken to ingest sample files: {t1 - t0:.2f} seconds") + + +@cli.command() +@click.pass_obj +def health(obj): + """Check the health of the server.""" + t0 = time.time() + response = obj.health() + t1 = time.time() + + click.echo(response) + click.echo(f"Time taken to ingest sample: {t1 - t0:.2f} seconds") + + +@cli.command() +def version(): + """Print the version of R2R.""" + from importlib.metadata import version + + click.echo(version("r2r")) + + +def main(): + cli() + + +if __name__ == "__main__": + main() diff --git a/R2R/r2r/examples/__init__.py b/R2R/r2r/examples/__init__.py new file mode 100755 index 00000000..e69de29b --- /dev/null +++ b/R2R/r2r/examples/__init__.py diff --git a/R2R/r2r/examples/configs/local_neo4j_kg.json b/R2R/r2r/examples/configs/local_neo4j_kg.json new file mode 100755 index 00000000..0b4254dc --- /dev/null +++ b/R2R/r2r/examples/configs/local_neo4j_kg.json @@ -0,0 +1,69 @@ +{ + "kg": { + "provider": "neo4j", + "batch_size": 1, + "text_splitter": { + "type": "recursive_character", + "chunk_size": 512, + "chunk_overlap": 0 + }, + "max_entities": 10, + "max_relations": 20, + "kg_extraction_prompt": "zero_shot_ner_kg_extraction", + "kg_extraction_config": { + "model": "ollama/sciphi/triplex", + "temperature": 1.0, + "top_p": 1.0, + "top_k": 100, + "max_tokens_to_sample": 1024, + "stream": false, + "functions": null, + "skip_special_tokens": false, + "stop_token": null, + "num_beams": 1, + "do_sample": true, + "generate_with_chat": false, + "add_generation_kwargs": {}, + "api_base": null + } + }, + "completions": { + "provider": "litellm", + "generation_config": { + "model": "ollama/llama3", + "temperature": 0.1, + "top_p": 1.0, + "top_k": 100, + "max_tokens_to_sample": 1024, + "stream": false, + "functions": null, + "skip_special_tokens": false, + "stop_token": null, + "num_beams": 1, + "do_sample": true, + "generate_with_chat": false, + "add_generation_kwargs": {}, + "api_base": null + } + }, + "embedding": { + "provider": "ollama", + "base_model": "mxbai-embed-large", + "base_dimension": 1024, + "batch_size": 32 + }, + "ingestion":{ + "excluded_parsers": [ + "gif", + "jpeg", + "jpg", + "png", + "svg", + "mp3", + "mp4" + ] + }, + "vector_database": { + "provider": "pgvector" + } +} diff --git a/R2R/r2r/examples/configs/local_ollama.json b/R2R/r2r/examples/configs/local_ollama.json new file mode 100755 index 00000000..d6fd68a5 --- /dev/null +++ b/R2R/r2r/examples/configs/local_ollama.json @@ -0,0 +1,41 @@ +{ + "completions": { + "provider": "litellm", + "generation_config": { + "model": "ollama/llama3", + "temperature": 0.1, + "top_p": 1.0, + "top_k": 100, + "max_tokens_to_sample": 1024, + "stream": false, + "functions": null, + "skip_special_tokens": false, + "stop_token": null, + "num_beams": 1, + "do_sample": true, + "generate_with_chat": false, + "add_generation_kwargs": {}, + "api_base": null + } + }, + "embedding": { + "provider": "ollama", + "base_model": "mxbai-embed-large", + "base_dimension": 1024, + "batch_size": 32 + }, + "ingestion":{ + "excluded_parsers": [ + "gif", + "jpeg", + "jpg", + "png", + "svg", + "mp3", + "mp4" + ] + }, + "vector_database": { + "provider": "pgvector" + } +} diff --git a/R2R/r2r/examples/configs/local_ollama_rerank.json b/R2R/r2r/examples/configs/local_ollama_rerank.json new file mode 100755 index 00000000..3a9abbe2 --- /dev/null +++ b/R2R/r2r/examples/configs/local_ollama_rerank.json @@ -0,0 +1,46 @@ +{ + "completions": { + "provider": "litellm", + "generation_config": { + "model": "ollama/llama3", + "temperature": 0.1, + "top_p": 1.0, + "top_k": 100, + "max_tokens_to_sample": 1024, + "stream": false, + "functions": null, + "skip_special_tokens": false, + "stop_token": null, + "num_beams": 1, + "do_sample": true, + "generate_with_chat": false, + "add_generation_kwargs": {}, + "api_base": null + } + }, + "embedding": { + "provider": "sentence-transformers", + "base_model": "all-MiniLM-L6-v2", + "base_dimension": 384, + "rerank_model": "jinaai/jina-reranker-v1-turbo-en", + "rerank_dimension": 384, + "rerank_transformer_type": "CrossEncoder", + "batch_size": 32, + "text_splitter": { + "type": "recursive_character", + "chunk_size": 512, + "chunk_overlap": 20 + } + }, + "ingestion":{ + "excluded_parsers": [ + "gif", + "jpeg", + "jpg", + "png", + "svg", + "mp3", + "mp4" + ] + } +} diff --git a/R2R/r2r/examples/configs/neo4j_kg.json b/R2R/r2r/examples/configs/neo4j_kg.json new file mode 100755 index 00000000..67fd0682 --- /dev/null +++ b/R2R/r2r/examples/configs/neo4j_kg.json @@ -0,0 +1,27 @@ +{ + "kg": { + "provider": "neo4j", + "batch_size": 1, + "text_splitter": { + "type": "recursive_character", + "chunk_size": 1024, + "chunk_overlap": 0 + }, + "kg_extraction_config": { + "model": "gpt-4o", + "temperature": 0.1, + "top_p": 1.0, + "top_k": 100, + "max_tokens_to_sample": 1024, + "stream": false, + "functions": null, + "skip_special_tokens": false, + "stop_token": null, + "num_beams": 1, + "do_sample": true, + "generate_with_chat": false, + "add_generation_kwargs": {}, + "api_base": null + } + } +} diff --git a/R2R/r2r/examples/configs/postgres_logging.json b/R2R/r2r/examples/configs/postgres_logging.json new file mode 100755 index 00000000..ec659bf4 --- /dev/null +++ b/R2R/r2r/examples/configs/postgres_logging.json @@ -0,0 +1,7 @@ +{ + "logging": { + "provider": "postgres", + "log_table": "logs", + "log_info_table": "log_info" + } +} diff --git a/R2R/r2r/examples/hello_r2r.py b/R2R/r2r/examples/hello_r2r.py new file mode 100755 index 00000000..97a49956 --- /dev/null +++ b/R2R/r2r/examples/hello_r2r.py @@ -0,0 +1,25 @@ +from r2r import R2R, Document, GenerationConfig + +app = R2R() # You may pass a custom configuration to `R2R` with config=... + +app.ingest_documents( + [ + Document( + type="txt", + data="John is a person that works at Google.", + metadata={}, + ) + ] +) + +rag_results = app.rag( + "Who is john", GenerationConfig(model="gpt-3.5-turbo", temperature=0.0) +) +print(f"Search Results:\n{rag_results.search_results}") +print(f"Completion:\n{rag_results.completion}") + +# RAG Results: +# Search Results: +# AggregateSearchResult(vector_search_results=[VectorSearchResult(id=2d71e689-0a0e-5491-a50b-4ecb9494c832, score=0.6848798582029441, metadata={'text': 'John is a person that works at Google.', 'version': 'v0', 'chunk_order': 0, 'document_id': 'ed76b6ee-dd80-5172-9263-919d493b439a', 'extraction_id': '1ba494d7-cb2f-5f0e-9f64-76c31da11381', 'associatedQuery': 'Who is john'})], kg_search_results=None) +# Completion: +# ChatCompletion(id='chatcmpl-9g0HnjGjyWDLADe7E2EvLWa35cMkB', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='John is a person that works at Google [1].', role='assistant', function_call=None, tool_calls=None))], created=1719797903, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=11, prompt_tokens=145, total_tokens=156)) diff --git a/R2R/r2r/examples/scripts/advanced_kg_cookbook.py b/R2R/r2r/examples/scripts/advanced_kg_cookbook.py new file mode 100755 index 00000000..a4d59a79 --- /dev/null +++ b/R2R/r2r/examples/scripts/advanced_kg_cookbook.py @@ -0,0 +1,194 @@ +import json +import os + +import fire +import requests +from bs4 import BeautifulSoup, Comment + +from r2r import ( + EntityType, + R2RClient, + R2RPromptProvider, + Relation, + update_kg_prompt, +) + + +def escape_braces(text): + return text.replace("{", "{{").replace("}", "}}") + + +def get_all_yc_co_directory_urls(): + this_file_path = os.path.abspath(os.path.dirname(__file__)) + yc_company_dump_path = os.path.join( + this_file_path, "..", "data", "yc_companies.txt" + ) + + with open(yc_company_dump_path, "r") as f: + urls = f.readlines() + urls = [url.strip() for url in urls] + return {url.split("/")[-1]: url for url in urls} + + +# Function to fetch and clean HTML content +def fetch_and_clean_yc_co_data(url): + # Fetch the HTML content from the URL + response = requests.get(url) + response.raise_for_status() # Raise an error for bad status codes + html_content = response.text + + # Parse the HTML content with BeautifulSoup + soup = BeautifulSoup(html_content, "html.parser") + + # Remove all <script>, <style>, <meta>, <link>, <header>, <nav>, and <footer> elements + for element in soup( + ["script", "style", "meta", "link", "header", "nav", "footer"] + ): + element.decompose() + + # Remove comments + for comment in soup.findAll(text=lambda text: isinstance(text, Comment)): + comment.extract() + + # Select the main content (you can adjust the selector based on the structure of your target pages) + main_content = soup.select_one("main") or soup.body + + if main_content: + spans = main_content.find_all(["span", "a"]) + + proc_spans = [] + for span in spans: + proc_spans.append(span.get_text(separator=" ", strip=True)) + span_text = "\n".join(proc_spans) + + # Extract the text content from the main content + paragraphs = main_content.find_all( + ["p", "h1", "h2", "h3", "h4", "h5", "h6", "li"] + ) + cleaned_text = ( + "### Bulk:\n\n" + + "\n\n".join( + paragraph.get_text(separator=" ", strip=True) + for paragraph in paragraphs + ) + + "\n\n### Metadata:\n\n" + + span_text + ) + + return cleaned_text + else: + return "Main content not found" + + +def execute_query(provider, query, params={}): + print(f"Executing query: {query}") + with provider.client.session(database=provider._database) as session: + result = session.run(query, params) + return [record.data() for record in result] + + +def main( + max_entries=50, + local_mode=True, + base_url="http://localhost:8000", +): + + # Specify the entity types for the KG extraction prompt + entity_types = [ + EntityType("COMPANY"), + EntityType("SCHOOL"), + EntityType("LOCATION"), + EntityType("PERSON"), + EntityType("DATE"), + EntityType("OTHER"), + EntityType("QUANTITY"), + EntityType("EVENT"), + EntityType("INDUSTRY"), + EntityType("MEDIA"), + ] + + # Specify the relations for the KG construction + relations = [ + # Founder Relations + Relation("EDUCATED_AT"), + Relation("WORKED_AT"), + Relation("FOUNDED"), + # Company relations + Relation("RAISED"), + Relation("REVENUE"), + Relation("TEAM_SIZE"), + Relation("LOCATION"), + Relation("ACQUIRED_BY"), + Relation("ANNOUNCED"), + Relation("INDUSTRY"), + # Product relations + Relation("PRODUCT"), + Relation("FEATURES"), + Relation("TECHNOLOGY"), + # Additional relations + Relation("HAS"), + Relation("AS_OF"), + Relation("PARTICIPATED"), + Relation("ASSOCIATED"), + ] + + client = R2RClient(base_url=base_url) + r2r_prompts = R2RPromptProvider() + + prompt_base = ( + "zero_shot_ner_kg_extraction" + if local_mode + else "few_shot_ner_kg_extraction" + ) + + update_kg_prompt(client, r2r_prompts, prompt_base, entity_types, relations) + + url_map = get_all_yc_co_directory_urls() + + i = 0 + # Ingest and clean the data for each company + for company, url in url_map.items(): + company_data = fetch_and_clean_yc_co_data(url) + if i >= max_entries: + break + i += 1 + + try: + # Ingest as a text document + file_name = f"{company}.txt" + with open(file_name, "w") as f: + f.write(company_data) + + client.ingest_files( + [file_name], + metadatas=[{"title": company}], + ) + os.remove(file_name) + except: + continue + + print(client.inspect_knowledge_graph(1_000)["results"]) + + if not local_mode: + + update_kg_prompt( + client, r2r_prompts, "kg_agent", entity_types, relations + ) + + result = client.search( + query="Find up to 10 founders that worked at Google", + use_kg_search=True, + )["results"] + + print("result:\n", result) + print("Search Result:\n", result["kg_search_results"]) + + result = client.rag( + query="Find up to 10 founders that worked at Google", + use_kg_search=True, + ) + print("RAG Result:\n", result) + + +if __name__ == "__main__": + fire.Fire(main) diff --git a/R2R/r2r/examples/scripts/basic_kg_cookbook.py b/R2R/r2r/examples/scripts/basic_kg_cookbook.py new file mode 100755 index 00000000..52db9cff --- /dev/null +++ b/R2R/r2r/examples/scripts/basic_kg_cookbook.py @@ -0,0 +1,67 @@ +from r2r import R2RClient + +if __name__ == "__main__": + client = R2RClient(base_url="http://localhost:8000") + + with open("john.txt", "w") as f: + f.write("John is a person that works at Google.") + with open("paul.txt", "w") as f: + f.write("Paul is a person that works at Microsoft that knows John.") + + client.ingest_files( + ["john.txt", "paul.txt"], + metadatas=[ + { + "title": "KG Document 1", + "user_id": "063edaf8-3e63-4cb9-a4d6-a855f36376c3", + }, + { + "title": "KG Document 2", + "user_id": "063edaf8-3e63-4cb9-a4d6-a855f36376c3", + }, + ], + ) + + # Get the KG provider + # neo4j_kg = app.providers.kg + + # # The expected entities + # entity_names = ["John", "Paul", "Google", "Microsoft"] + + # print("\nEntities:") + # for entity in entity_names: + # print( + # f"Locating {entity}:\n", neo4j_kg.get(properties={"name": entity}) + # ) + + # relationships = neo4j_kg.get_triplets(entity_names=entity_names) + + # print("\nRelationships:") + # for triplet in relationships: + # source, relation, target = triplet + # print(f"{source} -[{relation.label}]-> {target} ") + + # # Search the vector database + # search_results = app.search(query="Who is john") + # print("\nSearch Results:\n", search_results) + + # # Semantic search over the knowledge graph + # from r2r.base import VectorStoreQuery + + # node_result = neo4j_kg.vector_query( + # VectorStoreQuery( + # query_embedding=app.providers.embedding.get_embedding("A person"), + # ) + # ) + # print("\nNode Result:", node_result) + + # # Structured query + # structured_query = """ + # MATCH (p1:person)-[:KNOWS]->(p2:person) + # RETURN p1.name AS Person1, p2.name AS Person2 + # ORDER BY p1.name + # LIMIT 10; + # """ + # print("Executing query:\n", structured_query) + # structured_result = neo4j_kg.structured_query(structured_query) + # print("Structured Results:\n", structured_result) diff --git a/R2R/r2r/examples/scripts/run_hyde.py b/R2R/r2r/examples/scripts/run_hyde.py new file mode 100755 index 00000000..c82ce525 --- /dev/null +++ b/R2R/r2r/examples/scripts/run_hyde.py @@ -0,0 +1,33 @@ +import fire + +from r2r import R2RBuilder, R2RConfig +from r2r.base.abstractions.llm import GenerationConfig +from r2r.main.assembly.factory_extensions import R2RPipeFactoryWithMultiSearch + + +def main(task_prompt_name="hyde", query="Who was aristotle?"): + # Load the configuration file + config = R2RConfig.from_json() + + app = ( + R2RBuilder(config) + .with_pipe_factory(R2RPipeFactoryWithMultiSearch) + .build( + # Add optional override arguments which propagate to the pipe factory + task_prompt_name=task_prompt_name, + ) + ) + + # Run the RAG pipeline through the R2R application + result = app.rag( + query, + query_transform_generation_config=GenerationConfig(model="gpt-4o"), + rag_generation_config=GenerationConfig(model="gpt-3.5-turbo"), + ) + + print(f"Search Results:\n\n{result.search_results}") + print(f"RAG Results:\n\n{result.completion}") + + +if __name__ == "__main__": + fire.Fire(main) diff --git a/R2R/r2r/examples/scripts/run_web_multi_rag.py b/R2R/r2r/examples/scripts/run_web_multi_rag.py new file mode 100755 index 00000000..3ba70b74 --- /dev/null +++ b/R2R/r2r/examples/scripts/run_web_multi_rag.py @@ -0,0 +1,54 @@ +import fire + +from r2r import R2RBuilder, SerperClient, WebSearchPipe +from r2r.base.abstractions.llm import GenerationConfig +from r2r.main.assembly.factory_extensions import R2RPipeFactoryWithMultiSearch + + +def run_rag_pipeline(query="Who was Aristotle?"): + # Initialize a web search pipe + web_search_pipe = WebSearchPipe(serper_client=SerperClient()) + + # Define a new synthetic query generation template + synthetic_query_generation_template = { + "name": "synthetic_query_generation_template", + "template": """ + ### Instruction: + Given the following query, write a double newline separated list of up to {num_outputs} advanced queries meant to help answer the original query. + DO NOT generate any single query which is likely to require information from multiple distinct documents. + EACH single query will be used to carry out a cosine similarity semantic search over distinct indexed documents. + FOR EXAMPLE, if asked `how do the key themes of Great Gatsby compare with 1984`, the two queries would be + `What are the key themes of Great Gatsby?` and `What are the key themes of 1984?`. + Here is the original user query to be transformed into answers: + + ### Query: + {message} + + ### Response: + """, + "input_types": {"num_outputs": "int", "message": "str"}, + } + + # Build the R2R application with the custom pipeline + app = ( + R2RBuilder() + .with_pipe_factory(R2RPipeFactoryWithMultiSearch) + .build( + # override inputs consumed in building the MultiSearchPipe + multi_inner_search_pipe_override=web_search_pipe, + query_generation_template_override=synthetic_query_generation_template, + ) + ) + + # Run the RAG pipeline through the R2R application + result = app.rag( + query, + rag_generation_config=GenerationConfig(model="gpt-4o"), + ) + + print(f"Search Results:\n\n{result.search_results}") + print(f"RAG Results:\n\n{result.completion}") + + +if __name__ == "__main__": + fire.Fire(run_rag_pipeline) diff --git a/R2R/r2r/examples/scripts/run_web_rag.py b/R2R/r2r/examples/scripts/run_web_rag.py new file mode 100755 index 00000000..7535ae27 --- /dev/null +++ b/R2R/r2r/examples/scripts/run_web_rag.py @@ -0,0 +1,26 @@ +import fire + +from r2r import R2RBuilder, SerperClient, WebSearchPipe +from r2r.base.abstractions.llm import GenerationConfig + + +def run_rag_pipeline(query="Who was Aristotle?"): + # Create search pipe override and pipes + web_search_pipe = WebSearchPipe( + serper_client=SerperClient() # TODO - Develop a `WebSearchProvider` for configurability + ) + + app = R2RBuilder().with_vector_search_pipe(web_search_pipe).build() + + # Run the RAG pipeline through the R2R application + result = app.rag( + query, + rag_generation_config=GenerationConfig(model="gpt-4o"), + ) + + print(f"Search Results:\n\n{result.search_results}") + print(f"RAG Results:\n\n{result.completion}") + + +if __name__ == "__main__": + fire.Fire(run_rag_pipeline) diff --git a/R2R/r2r/examples/scripts/sample_data_ingestor.py b/R2R/r2r/examples/scripts/sample_data_ingestor.py new file mode 100755 index 00000000..67eecd16 --- /dev/null +++ b/R2R/r2r/examples/scripts/sample_data_ingestor.py @@ -0,0 +1,81 @@ +import os +import uuid +from typing import TYPE_CHECKING + +import fire + +if TYPE_CHECKING: + from r2r.main.execution import R2RExecutionWrapper + + +class SampleDataIngestor: + USER_IDS = [ + "063edaf8-3e63-4cb9-a4d6-a855f36376c3", + "45c3f5a8-bcbe-43b1-9b20-51c07fd79f14", + "c6c23d85-6217-4caa-b391-91ec0021a000", + None, + ] + + def __init__( + self, + executor: "R2RExecutionWrapper", + ): + self.executor = executor + + @staticmethod + def get_sample_files(no_media: bool = True) -> list[str]: + examples_dir = os.path.join( + os.path.dirname(os.path.abspath(__file__)), ".." + ) + + files = [ + os.path.join(examples_dir, "data", "aristotle.txt"), + os.path.join(examples_dir, "data", "got.txt"), + os.path.join(examples_dir, "data", "screen_shot.png"), + os.path.join(examples_dir, "data", "pg_essay_1.html"), + os.path.join(examples_dir, "data", "pg_essay_2.html"), + os.path.join(examples_dir, "data", "pg_essay_3.html"), + os.path.join(examples_dir, "data", "pg_essay_4.html"), + os.path.join(examples_dir, "data", "pg_essay_5.html"), + os.path.join(examples_dir, "data", "lyft_2021.pdf"), + os.path.join(examples_dir, "data", "uber_2021.pdf"), + os.path.join(examples_dir, "data", "sample.mp3"), + os.path.join(examples_dir, "data", "sample2.mp3"), + ] + if no_media: + excluded_types = ["jpeg", "jpg", "png", "svg", "mp3", "mp4"] + files = [ + file_path + for file_path in files + if file_path.split(".")[-1].lower() not in excluded_types + ] + return files + + def ingest_sample_files(self, no_media: bool = True): + sample_files = self.get_sample_files(no_media) + user_ids = [ + uuid.UUID(user_id) if user_id else None + for user_id in self.USER_IDS + ] + + response = self.executor.ingest_files( + sample_files, + [ + {"user_id": user_ids[it % len(user_ids)]} + for it in range(len(sample_files)) + ], + ) + return response + + def ingest_sample_file(self, no_media: bool = True, option: int = 0): + sample_files = self.get_sample_files() + user_id = uuid.UUID(self.USER_IDS[option % len(self.USER_IDS)]) + + response = self.executor.ingest_files( + [sample_files[option]], [{"user_id": user_id}] + ) + return response + + +if __name__ == "__main__": + fire.Fire(SampleDataIngestor) diff --git a/R2R/r2r/examples/scripts/test_e2e.py b/R2R/r2r/examples/scripts/test_e2e.py new file mode 100755 index 00000000..11829f94 --- /dev/null +++ b/R2R/r2r/examples/scripts/test_e2e.py @@ -0,0 +1,321 @@ +import json +import math +from datetime import datetime, timezone +from uuid import UUID + +from r2r.main.execution import R2RExecutionWrapper + +expected_logs = [ + { + "run_id": ..., + "run_type": "ingestion", + "entries": [ + { + "key": "document_parse_result", + "value": "Document 'aristotle.txt' processed successfully.", + } + ], + }, + { + "run_id": ..., + "run_type": "search", + "entries": [ + {"key": "search_latency", "value": "0.45"}, + { + "key": "search_results", + "value": '["{\\"id\\":\\"7ed3a01c-88dc-5a58-a68b-6e5d9f292df2\\",\\"score\\":0.773841586847122,\\"metadata\\":{\\"text\\":\\"Aristotle[A] (Greek: \\u1f08\\u03c1\\u03b9\\u03c3\\u03c4\\u03bf\\u03c4\\u03ad\\u03bb\\u03b7\\u03c2 Aristot\\u00e9l\\u0113s, pronounced [aristot\\u00e9l\\u025b\\u02d0s]; 384\\u2013322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":0,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"e6f58828-2e6d-5eb1-94f3-efbc0b7c1699\\",\\"score\\":0.669298529624939,\\"metadata\\":{\\"text\\":\\"Aristotle was revered among medieval Muslim scholars as \\\\\\"The First Teacher\\\\\\", and among medieval Christians like Thomas Aquinas as simply \\\\\\"The Philosopher\\\\\\", while the poet Dante called him \\\\\\"the master of those who know\\\\\\". His works contain the earliest known formal study of logic, and were studied by medieval scholars such as Peter Abelard and Jean Buridan. Aristotle\'s influence on logic continued well into the 19th century. In addition, his ethics, although always influential, gained renewed interest with\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":5,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"c818bc72-2ac8-581b-b51a-0ca826f5f2b8\\",\\"score\\":0.652687707703574,\\"metadata\\":{\\"text\\":\\"Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle\'s parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle\'s childhood has survived, he probably spent\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":8,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"d8ea40eb-cd48-5bd7-b2df-05f6268aed10\\",\\"score\\":0.636079056730387,\\"metadata\\":{\\"text\\":\\"Aristotle has been called the father of logic, biology, political science, zoology, embryology, natural law, scientific method, rhetoric, psychology, realism, criticism, individualism, teleology, and meteorology.[151]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":177,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"b32f0e19-029f-5b3f-856c-e7e4141f52f5\\",\\"score\\":0.624098479747772,\\"metadata\\":{\\"text\\":\\"Among countless other achievements, Aristotle was the founder of formal logic,[146] pioneered the study of zoology, and left every future scientist and philosopher in his debt through his contributions to the scientific method.[2][147][148] Taneli Kukkonen, observes that his achievement in founding two sciences is unmatched, and his reach in influencing \\\\\\"every branch of intellectual enterprise\\\\\\" including Western ethical and political theory, theology, rhetoric, and literary analysis is equally long. As a\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":175,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"e6c5d5f6-7fc4-5bb8-847d-44cfa16f5178\\",\\"score\\":0.619364976882935,\\"metadata\\":{\\"text\\":\\"Little is known about Aristotle\'s life. He was born in the city of Stagira in northern Greece during the Classical period. His father, Nicomachus, died when Aristotle was a child, and he was brought up by a guardian. At 17 or 18, he joined Plato\'s Academy in Athens and remained there until the age of 37 (c.\\u2009347 BC). Shortly after Plato died, Aristotle left Athens and, at the request of Philip II of Macedon, tutored his son Alexander the Great beginning in 343 BC. He established a library in the Lyceum,\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":1,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"90b891ee-0a67-54ba-838a-e02e1647adab\\",\\"score\\":0.6177915291003779,\\"metadata\\":{\\"text\\":\\"Like his teacher Plato, Aristotle\'s philosophy aims at the universal. Aristotle\'s ontology places the universal (katholou) in particulars (kath\' hekaston), things in the world, whereas for Plato the universal is a separately existing form which actual things imitate. For Aristotle, \\\\\\"form\\\\\\" is still what phenomena are based on, but is \\\\\\"instantiated\\\\\\" in a particular substance.[34]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":37,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"5f6213d1-a46b-5ed4-a15c-e95bab271621\\",\\"score\\":0.606411576271057,\\"metadata\\":{\\"text\\":\\"Aristotle was one of the most revered Western thinkers in early Islamic theology. Most of the still extant works of Aristotle,[167] as well as a number of the original Greek commentaries, were translated into Arabic and studied by Muslim philosophers, scientists and scholars. Averroes, Avicenna and Alpharabius, who wrote on Aristotle in great depth, also influenced Thomas Aquinas and other Western Christian scholastic philosophers. Alkindus greatly admired Aristotle\'s philosophy,[168] and Averroes spoke of\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":194,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"ce43de17-635c-5a01-aae2-e160a6d56f4b\\",\\"score\\":0.601802307421038,\\"metadata\\":{\\"text\\":\\"passed to Plato\'s nephew Speusippus, although it is possible that he feared the anti-Macedonian sentiments in Athens at that time and left before Plato died.[10] Aristotle then accompanied Xenocrates to the court of his friend Hermias of Atarneus in Asia Minor. After the death of Hermias, Aristotle travelled with his pupil Theophrastus to the island of Lesbos, where together they researched the botany and zoology of the island and its sheltered lagoon. While in Lesbos, Aristotle married Pythias, either\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":12,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"8550e2b7-43f8-5a59-9c13-c9678670a2da\\",\\"score\\":0.595871686935425,\\"metadata\\":{\\"text\\":\\"The immediate influence of Aristotle\'s work was felt as the Lyceum grew into the Peripatetic school. Aristotle\'s students included Aristoxenus, Dicaearchus, Demetrius of Phalerum, Eudemos of Rhodes, Harpalus, Hephaestion, Mnason of Phocis, Nicomachus, and Theophrastus. Aristotle\'s influence over Alexander the Great is seen in the latter\'s bringing with him on his expedition a host of zoologists, botanists, and researchers. He had also learned a great deal about Persian customs and traditions from his\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":181,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}"]', + }, + {"key": "search_query", "value": "who is aristotle?"}, + ], + }, + # {'run_id': ..., 'run_type': 'search', 'entries': [{'key': 'search_query', 'value': 'who is aristotle?'}, {'key': 'search_latency', 'value': '0.51'}, {'key': 'search_results', 'value': '["{\\"id\\":\\"7ed3a01c-88dc-5a58-a68b-6e5d9f292df2\\",\\"score\\":0.773841586847122,\\"metadata\\":{\\"text\\":\\"Aristotle[A] (Greek: \\u1f08\\u03c1\\u03b9\\u03c3\\u03c4\\u03bf\\u03c4\\u03ad\\u03bb\\u03b7\\u03c2 Aristot\\u00e9l\\u0113s, pronounced [aristot\\u00e9l\\u025b\\u02d0s]; 384\\u2013322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":0,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"e6f58828-2e6d-5eb1-94f3-efbc0b7c1699\\",\\"score\\":0.669298529624939,\\"metadata\\":{\\"text\\":\\"Aristotle was revered among medieval Muslim scholars as \\\\\\"The First Teacher\\\\\\", and among medieval Christians like Thomas Aquinas as simply \\\\\\"The Philosopher\\\\\\", while the poet Dante called him \\\\\\"the master of those who know\\\\\\". His works contain the earliest known formal study of logic, and were studied by medieval scholars such as Peter Abelard and Jean Buridan. Aristotle\'s influence on logic continued well into the 19th century. In addition, his ethics, although always influential, gained renewed interest with\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":5,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"c818bc72-2ac8-581b-b51a-0ca826f5f2b8\\",\\"score\\":0.652687707703574,\\"metadata\\":{\\"text\\":\\"Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle\'s parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle\'s childhood has survived, he probably spent\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":8,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"d8ea40eb-cd48-5bd7-b2df-05f6268aed10\\",\\"score\\":0.636050164699554,\\"metadata\\":{\\"text\\":\\"Aristotle has been called the father of logic, biology, political science, zoology, embryology, natural law, scientific method, rhetoric, psychology, realism, criticism, individualism, teleology, and meteorology.[151]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":177,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"b32f0e19-029f-5b3f-856c-e7e4141f52f5\\",\\"score\\":0.624127291194959,\\"metadata\\":{\\"text\\":\\"Among countless other achievements, Aristotle was the founder of formal logic,[146] pioneered the study of zoology, and left every future scientist and philosopher in his debt through his contributions to the scientific method.[2][147][148] Taneli Kukkonen, observes that his achievement in founding two sciences is unmatched, and his reach in influencing \\\\\\"every branch of intellectual enterprise\\\\\\" including Western ethical and political theory, theology, rhetoric, and literary analysis is equally long. As a\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":175,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"e6c5d5f6-7fc4-5bb8-847d-44cfa16f5178\\",\\"score\\":0.619364976882935,\\"metadata\\":{\\"text\\":\\"Little is known about Aristotle\'s life. He was born in the city of Stagira in northern Greece during the Classical period. His father, Nicomachus, died when Aristotle was a child, and he was brought up by a guardian. At 17 or 18, he joined Plato\'s Academy in Athens and remained there until the age of 37 (c.\\u2009347 BC). Shortly after Plato died, Aristotle left Athens and, at the request of Philip II of Macedon, tutored his son Alexander the Great beginning in 343 BC. He established a library in the Lyceum,\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":1,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"90b891ee-0a67-54ba-838a-e02e1647adab\\",\\"score\\":0.6177915291003779,\\"metadata\\":{\\"text\\":\\"Like his teacher Plato, Aristotle\'s philosophy aims at the universal. Aristotle\'s ontology places the universal (katholou) in particulars (kath\' hekaston), things in the world, whereas for Plato the universal is a separately existing form which actual things imitate. For Aristotle, \\\\\\"form\\\\\\" is still what phenomena are based on, but is \\\\\\"instantiated\\\\\\" in a particular substance.[34]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":37,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"5f6213d1-a46b-5ed4-a15c-e95bab271621\\",\\"score\\":0.606407422018273,\\"metadata\\":{\\"text\\":\\"Aristotle was one of the most revered Western thinkers in early Islamic theology. Most of the still extant works of Aristotle,[167] as well as a number of the original Greek commentaries, were translated into Arabic and studied by Muslim philosophers, scientists and scholars. Averroes, Avicenna and Alpharabius, who wrote on Aristotle in great depth, also influenced Thomas Aquinas and other Western Christian scholastic philosophers. Alkindus greatly admired Aristotle\'s philosophy,[168] and Averroes spoke of\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":194,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"ce43de17-635c-5a01-aae2-e160a6d56f4b\\",\\"score\\":0.601802307421038,\\"metadata\\":{\\"text\\":\\"passed to Plato\'s nephew Speusippus, although it is possible that he feared the anti-Macedonian sentiments in Athens at that time and left before Plato died.[10] Aristotle then accompanied Xenocrates to the court of his friend Hermias of Atarneus in Asia Minor. After the death of Hermias, Aristotle travelled with his pupil Theophrastus to the island of Lesbos, where together they researched the botany and zoology of the island and its sheltered lagoon. While in Lesbos, Aristotle married Pythias, either\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":12,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"8550e2b7-43f8-5a59-9c13-c9678670a2da\\",\\"score\\":0.5959202888059449,\\"metadata\\":{\\"text\\":\\"The immediate influence of Aristotle\'s work was felt as the Lyceum grew into the Peripatetic school. Aristotle\'s students included Aristoxenus, Dicaearchus, Demetrius of Phalerum, Eudemos of Rhodes, Harpalus, Hephaestion, Mnason of Phocis, Nicomachus, and Theophrastus. Aristotle\'s influence over Alexander the Great is seen in the latter\'s bringing with him on his expedition a host of zoologists, botanists, and researchers. He had also learned a great deal about Persian customs and traditions from his\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":181,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"1175585b-fc58-5e44-bfcb-cb1996289936\\",\\"score\\":0.594988668263635,\\"metadata\\":{\\"text\\":\\"after friends and relatives, and to deal with the latter as with beasts or plants\\\\\\".[13] By 335 BC, Aristotle had returned to Athens, establishing his own school there known as the Lyceum. Aristotle conducted courses at the school for the next twelve years. While in Athens, his wife Pythias died and Aristotle became involved with Herpyllis of Stagira. They had a son whom Aristotle named after his father, Nicomachus. If the Suda \\u2013 an uncritical compilation from the Middle Ages \\u2013 is accurate, he may also have\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":16,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"8f8f5140-2d4b-5877-9cfb-d2df590831c2\\",\\"score\\":0.5928938565520601,\\"metadata\\":{\\"text\\":\\"In Protrepticus, the character \'Aristotle\' states:[123]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":147,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"ee40bbc8-16f7-5808-8f14-c8fd16391cfc\\",\\"score\\":0.591026663780212,\\"metadata\\":{\\"text\\":\\"Transmission\\\\nFurther information: List of writers influenced by Aristotle\\\\nMore than 2300 years after his death, Aristotle remains one of the most influential people who ever lived.[142][143][144] He contributed to almost every field of human knowledge then in existence, and he was the founder of many new fields. According to the philosopher Bryan Magee, \\\\\\"it is doubtful whether any human being has ever known as much as he did\\\\\\".[145]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":174,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"703219b2-3284-533f-8df7-fee42f52c3d2\\",\\"score\\":0.5850032146276001,\\"metadata\\":{\\"text\\":\\"At the age of seventeen or eighteen, Aristotle moved to Athens to continue his education at Plato\'s Academy.[8] He probably experienced the Eleusinian Mysteries as he wrote when describing the sights one viewed at the Eleusinian Mysteries, \\\\\\"to experience is to learn\\\\\\" [\\u03c0\\u03b1\\u03b8\\u03b5\\u03af\\u03bd \\u03bc\\u03b1\\u03b8\\u03b5\\u0129\\u03bd].[9] Aristotle remained in Athens for nearly twenty years before leaving in 348/47 BC. The traditional story about his departure records that he was disappointed with the Academy\'s direction after control passed to Plato\'s\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":11,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"044a1a8a-7661-52b9-af63-83c243216d34\\",\\"score\\":0.5834955821337959,\\"metadata\\":{\\"text\\":\\"\\u2014\\u200aAristotle. Politics, Book 4, 1294b.10\\u201318\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":152,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"4173b9bc-4b39-5dc1-b9af-a2fc0e282389\\",\\"score\\":0.5787635539488301,\\"metadata\\":{\\"text\\":\\"This period in Athens, between 335 and 323 BC, is when Aristotle is believed to have composed many of his works.[12] He wrote many dialogues, of which only fragments have survived. Those works that have survived are in treatise form and were not, for the most part, intended for widespread publication; they are generally thought to be lecture aids for his students. His most important treatises include Physics, Metaphysics, Nicomachean Ethics, Politics, On the Soul and Poetics. Aristotle studied and made\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":19,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"038127f5-6603-5258-8658-7c60eaf3dae3\\",\\"score\\":0.575957238674164,\\"metadata\\":{\\"text\\":\\"Averroes spoke of Aristotle as the \\\\\\"exemplar\\\\\\" for all future philosophers.[169] Medieval Muslim scholars regularly described Aristotle as the \\\\\\"First Teacher\\\\\\".[167] The title was later used by Western philosophers (as in the famous poem of Dante) who were influenced by the tradition of Islamic philosophy.[170]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":195,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"5fb90839-d04c-50b0-8f7f-ffc1a938c019\\",\\"score\\":0.574642419815063,\\"metadata\\":{\\"text\\":\\"Aristotle was appointed as the head of the royal Academy of Macedon. During Aristotle\'s time in the Macedonian court, he gave lessons not only to Alexander but also to two other future kings: Ptolemy and Cassander.[13] Aristotle encouraged Alexander toward eastern conquest, and Aristotle\'s own attitude towards Persia was unabashedly ethnocentric. In one famous example, he counsels Alexander to be \\\\\\"a leader to the Greeks and a despot to the barbarians, to look after the former as after friends and\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":15,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"76a117a7-706b-5d7f-a856-e5fc4bb0d8a2\\",\\"score\\":0.5701740640298101,\\"metadata\\":{\\"text\\":\\"Life\\\\nIn general, the details of Aristotle\'s life are not well-established. The biographies written in ancient times are often speculative and historians only agree on a few salient points.[B]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":7,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"d64af737-640e-5ea0-9259-4c83638a714d\\",\\"score\\":0.565754532814026,\\"metadata\\":{\\"text\\":\\"Aristotle was the first person to study biology systematically,[61] and biology forms a large part of his writings. He spent two years observing and describing the zoology of Lesbos and the surrounding seas, including in particular the Pyrrha lagoon in the centre of Lesbos.[62][63] His data in History of Animals, Generation of Animals, Movement of Animals, and Parts of Animals are assembled from his own observations,[64] statements given by people with specialized knowledge, such as beekeepers and\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":85,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"d00de43e-11c4-59b9-ba09-c16ad525a3d3\\",\\"score\\":0.56225860118866,\\"metadata\\":{\\"text\\":\\"Aristotle wrote his works on papyrus scrolls, the common writing medium of that era.[O] His writings are divisible into two groups: the \\\\\\"exoteric\\\\\\", intended for the public, and the \\\\\\"esoteric\\\\\\", for use within the Lyceum school.[208][P][209] Aristotle\'s \\\\\\"lost\\\\\\" works stray considerably in characterization from the surviving Aristotelian corpus. Whereas the lost works appear to have been originally written with a view to subsequent publication, the surviving works mostly resemble lecture notes not intended for\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":222,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"dec8fec3-2b27-554b-b953-27db6a221078\\",\\"score\\":0.561229228973389,\\"metadata\\":{\\"text\\":\\"Aristotle\'s views profoundly shaped medieval scholarship. The influence of his physical science extended from late antiquity and the Early Middle Ages into the Renaissance, and was not replaced systematically until the Enlightenment and theories such as classical mechanics were developed. He influenced Judeo-Islamic philosophies during the Middle Ages, as well as Christian theology, especially the Neoplatonism of the Early Church and the scholastic tradition of the Catholic Church.\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":4,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"eb18cdd3-981e-54bb-835d-5988776c5bb9\\",\\"score\\":0.557450473308563,\\"metadata\\":{\\"text\\":\\"With the loss of the study of ancient Greek in the early medieval Latin West, Aristotle was practically unknown there from c.\\u2009CE 600 to c.\\u20091100 except through the Latin translation of the Organon made by Boethius. In the twelfth and thirteenth centuries, interest in Aristotle revived and Latin Christians had translations made, both from Arabic translations, such as those by Gerard of Cremona,[171] and from the original Greek, such as those by James of Venice and William of Moerbeke.\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":197,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"40014d76-eba6-5a2a-9c2b-672ac6ddd212\\",\\"score\\":0.557388577305591,\\"metadata\\":{\\"text\\":\\"The Dutch historian of science Eduard Jan Dijksterhuis writes that Aristotle and his predecessors showed the difficulty of science by \\\\\\"proceed[ing] so readily to frame a theory of such a general character\\\\\\" on limited evidence from their senses.[192] In 1985, the biologist Peter Medawar could still state in \\\\\\"pure seventeenth century\\\\\\"[193] tones that Aristotle had assembled \\\\\\"a strange and generally speaking rather tiresome farrago of hearsay, imperfect observation, wishful thinking and credulity amounting to\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":212,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"65c3d9cd-0ab5-5ef7-8296-a1baeff3592d\\",\\"score\\":0.5566293664728881,\\"metadata\\":{\\"text\\":\\"Aristotle made substantial contributions to economic thought, especially to thought in the Middle Ages.[128] In Politics, Aristotle addresses the city, property, and trade. His response to criticisms of private property, in Lionel Robbins\'s view, anticipated later proponents of private property among philosophers and economists, as it related to the overall utility of social arrangements.[128] Aristotle believed that although communal arrangements may seem beneficial to society, and that although private\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":156,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"a2cc5fc4-99a4-53c5-8c74-2c68399f2a7e\\",\\"score\\":0.549582594682865,\\"metadata\\":{\\"text\\":\\"Practical philosophy\\\\nAristotle\'s practical philosophy covers areas such as ethics, politics, economics, and rhetoric.[40]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":134,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"27a6a904-7b94-59ca-81ea-8b99b2816f81\\",\\"score\\":0.547278100576631,\\"metadata\\":{\\"text\\":\\"After the Scholastic Thomas Aquinas wrote his Summa Theologica, working from Moerbeke\'s translations and calling Aristotle \\\\\\"The Philosopher\\\\\\",[172] the demand for Aristotle\'s writings grew, and the Greek manuscripts returned to the West, stimulating a revival of Aristotelianism in Europe that continued into the Renaissance.[173] These thinkers blended Aristotelian philosophy with Christianity, bringing the thought of Ancient Greece into the Middle Ages. Scholars such as Boethius, Peter Abelard, and John\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":198,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"d4fcaaff-1d1e-5ed4-8d60-6a71a5721ea4\\",\\"score\\":0.544766214801896,\\"metadata\\":{\\"text\\":\\"Aristotle\'s view, incapable of participating in political life.[124] On this ground, proponents of feminist metaphysics have accused Aristotle of misogyny[125] and sexism.[126] However, Aristotle gave equal weight to women\'s happiness as he did to men\'s, and commented in his Rhetoric that the things that lead to happiness need to be in women as well as men.[N]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":154,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"fe38e23d-4798-5cd3-8d42-e858f7e97537\\",\\"score\\":0.54476398229599,\\"metadata\\":{\\"text\\":\\"Theoretical philosophy\\\\nLogic\\\\nMain article: Term logic\\\\nFurther information: Non-Aristotelian logic\\\\nWith the Prior Analytics, Aristotle is credited with the earliest study of formal logic,[23] and his conception of it was the dominant form of Western logic until 19th-century advances in mathematical logic.[24] Kant stated in the Critique of Pure Reason that with Aristotle, logic reached its completion.[25]\\\\n\\\\nOrganon\\\\nMain article: Organon\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":23,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"b29f4f0e-c033-5613-a8fa-62ea11f951ed\\",\\"score\\":0.5409726500511169,\\"metadata\\":{\\"text\\":\\"Zoologists have frequently mocked Aristotle for errors and unverified secondhand reports. However, modern observation has confirmed several of his more surprising claims.[195][196][197] Aristotle\'s work remains largely unknown to modern scientists, though zoologists sometimes mention him as the father of biology[150] or in particular of marine biology.[198] Practising zoologists are unlikely to adhere to Aristotle\'s chain of being, but its influence is still perceptible in the use of the terms \\\\\\"lower\\\\\\" and\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":214,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"cb6775dd-85a1-5e62-bd23-b00842a34231\\",\\"score\\":0.53917521238327,\\"metadata\\":{\\"text\\":\\"Ethics\\\\nMain article: Aristotelian ethics\\\\nAristotle considered ethics to be a practical rather than theoretical study, i.e., one aimed at becoming good and doing good rather than knowing for its own sake. He wrote several treatises on ethics, most notably including the Nicomachean Ethics.[117]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":136,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"31ddcd60-5b19-5b9c-9f47-6b144001308f\\",\\"score\\":0.5359366855583609,\\"metadata\\":{\\"text\\":\\"Aristotle has been depicted by major artists including Lucas Cranach the Elder,[218] Justus van Gent, Raphael, Paolo Veronese, Jusepe de Ribera,[219] Rembrandt,[220] and Francesco Hayez over the centuries. Among the best-known depictions is Raphael\'s fresco The School of Athens, in the Vatican\'s Apostolic Palace, where the figures of Plato and Aristotle are central to the image, at the architectural vanishing point, reflecting their importance.[221] Rembrandt\'s Aristotle with a Bust of Homer, too, is a\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":231,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"c3c15401-51cf-5c53-8182-feac4f7fc6cf\\",\\"score\\":0.535427896056898,\\"metadata\\":{\\"text\\":\\"Though Aristotle wrote many elegant treatises and dialogues for publication, only around a third of his original output has survived, none of it intended for publication. Aristotle provided a complex synthesis of the various philosophies existing prior to him. His teachings and methods of inquiry have had a significant impact across the world, and remain a subject of contemporary philosophical discussion.\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":3,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"b5da0460-6b55-550a-ad14-3eae61df567f\\",\\"score\\":0.53429651260376,\\"metadata\\":{\\"text\\":\\"Aristotle\'s views on women influenced later Western philosophers, who quoted him as an authority until the end of the Middle Ages, but these views have been controversial in modern times. Aristotle\'s analysis of procreation describes an active, ensouling masculine element bringing life to an inert, passive female element. The biological differences are a result of the fact that the female body is well-suited for reproduction, which changes her body temperature, which in turn makes her, in Aristotle\'s view,\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":153,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"bbb9c9e5-c084-5068-ac8a-019c8441eebb\\",\\"score\\":0.532585024833679,\\"metadata\\":{\\"text\\":\\"The scholar Taneli Kukkonen notes that \\\\\\"in the best 20th-century scholarship Aristotle comes alive as a thinker wrestling with the full weight of the Greek philosophical tradition.\\\\\\"[148] What follows is an overview of the transmission and influence of his texts and ideas into the modern era.\\\\n\\\\nHis successor, Theophrastus\\\\nMain articles: Theophrastus and Historia Plantarum (Theophrastus)\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":178,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"8b9081fd-5603-5704-9ba6-8957525f0c05\\",\\"score\\":0.532379746437073,\\"metadata\\":{\\"text\\":\\"Politics\\\\nMain article: Politics (Aristotle)\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":141,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"f56f324f-e2fe-5552-b4fd-a2324d080e97\\",\\"score\\":0.5316953063011169,\\"metadata\\":{\\"text\\":\\"equally long. As a result, Kukkonen argues, any analysis of reality today \\\\\\"will almost certainly carry Aristotelian overtones ... evidence of an exceptionally forceful mind.\\\\\\"[148] Jonathan Barnes wrote that \\\\\\"an account of Aristotle\'s intellectual afterlife would be little less than a history of European thought\\\\\\".[149]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":176,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"1e64fff6-6f61-5490-8743-4947487cced2\\",\\"score\\":0.5270282030105591,\\"metadata\\":{\\"text\\":\\"Present science\\\\nThe philosopher Bertrand Russell claims that \\\\\\"almost every serious intellectual advance has had to begin with an attack on some Aristotelian doctrine\\\\\\". Russell calls Aristotle\'s ethics \\\\\\"repulsive\\\\\\", and labelled his logic \\\\\\"as definitely antiquated as Ptolemaic astronomy\\\\\\". Russell states that these errors make it difficult to do historical justice to Aristotle, until one remembers what an advance he made upon all of his predecessors.[191]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":211,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"68446969-0dbf-5cbb-852a-07612d886c46\\",\\"score\\":0.525567233562469,\\"metadata\\":{\\"text\\":\\"Plato (left) and Aristotle in Raphael\'s 1509 fresco, The School of Athens. Aristotle holds his Nicomachean Ethics and gestures to the earth, representing his view in immanent realism, whilst Plato gestures to the heavens, indicating his Theory of Forms, and holds his Timaeus.[26][27]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":24,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"794a7a47-8157-5a75-b3d4-7bd51dd9f04e\\",\\"score\\":0.523991107940674,\\"metadata\\":{\\"text\\":\\"The works of Aristotle that have survived from antiquity through medieval manuscript transmission are collected in the Corpus Aristotelicum. These texts, as opposed to Aristotle\'s lost works, are technical philosophical treatises from within Aristotle\'s school.[205] Reference to them is made according to the organization of Immanuel Bekker\'s Royal Prussian Academy edition (Aristotelis Opera edidit Academia Regia Borussica, Berlin, 1831\\u20131870), which in turn is based on ancient classifications of these\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":219,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"30561c1d-cdf7-5281-beb5-25d98903fb8f\\",\\"score\\":0.523762373700001,\\"metadata\\":{\\"text\\":\\"Charles Darwin regarded Aristotle as the most important contributor to the subject of biology. In an 1882 letter he wrote that \\\\\\"Linnaeus and Cuvier have been my two gods, though in very different ways, but they were mere schoolboys to old Aristotle\\\\\\".[187][188] Also, in later editions of the book \\\\\\"On the Origin of Species\', Darwin traced evolutionary ideas as far back as Aristotle;[189] the text he cites is a summary by Aristotle of the ideas of the earlier Greek philosopher Empedocles.[190]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":210,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"a0dafc12-79f1-506c-8916-41733f864a76\\",\\"score\\":0.522506475448608,\\"metadata\\":{\\"text\\":\\"traditions from his teacher. Although his respect for Aristotle was diminished as his travels made it clear that much of Aristotle\'s geography was clearly wrong, when the old philosopher released his works to the public, Alexander complained \\\\\\"Thou hast not done well to publish thy acroamatic doctrines; for in what shall I surpass other men if those doctrines wherein I have been trained are to be all men\'s common property?\\\\\\"[155]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":182,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"d2e3d3b2-a20d-510a-a96f-9705b02411be\\",\\"score\\":0.5219854116439819,\\"metadata\\":{\\"text\\":\\"Moses Maimonides (considered to be the foremost intellectual figure of medieval Judaism)[179] adopted Aristotelianism from the Islamic scholars and based his Guide for the Perplexed on it and that became the basis of Jewish scholastic philosophy. Maimonides also considered Aristotle to be the greatest philosopher that ever lived, and styled him as the \\\\\\"chief of the philosophers\\\\\\".[180][181][182] Also, in his letter to Samuel ibn Tibbon, Maimonides observes that there is no need for Samuel to study the\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":203,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"c2c7aa9c-8a52-5ccf-b5e6-ab9e918cac18\\",\\"score\\":0.5219346284866331,\\"metadata\\":{\\"text\\":\\"Metaphysics\\\\nMain article: Metaphysics (Aristotle)\\\\nThe word \\\\\\"metaphysics\\\\\\" appears to have been coined by the first century AD editor who assembled various small selections of Aristotle\'s works to the treatise we know by the name Metaphysics.[34] Aristotle called it \\\\\\"first philosophy\\\\\\", and distinguished it from mathematics and natural science (physics) as the contemplative (theoretik\\u0113) philosophy which is \\\\\\"theological\\\\\\" and studies the divine. He wrote in his Metaphysics (1026a16):\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":30,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"fad6a42d-845f-5f75-9416-560f6bdfc622\\",\\"score\\":0.5203073620796199,\\"metadata\\":{\\"text\\":\\"Near the end of his life, Alexander and Aristotle became estranged over Alexander\'s relationship with Persia and Persians. A widespread tradition in antiquity suspected Aristotle of playing a role in Alexander\'s death, but the only evidence of this is an unlikely claim made some six years after the death.[16] Following Alexander\'s death, anti-Macedonian sentiment in Athens was rekindled. In 322 BC, Demophilus and Eurymedon the Hierophant reportedly denounced Aristotle for impiety,[17] prompting him to flee\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":21,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"ccfa969d-8634-5dd8-8b8f-e664667f95a0\\",\\"score\\":0.520039439201355,\\"metadata\\":{\\"text\\":\\"In addition to his works on ethics, which address the individual, Aristotle addressed the city in his work titled Politics. Aristotle considered the city to be a natural community. Moreover, he considered the city to be prior in importance to the family, which in turn is prior to the individual, \\\\\\"for the whole must of necessity be prior to the part\\\\\\".[120] He famously stated that \\\\\\"man is by nature a political animal\\\\\\" and argued that humanity\'s defining factor among others in the animal kingdom is its\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":142,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"f5db5c10-ace8-5a28-9b15-d1ef88ac2754\\",\\"score\\":0.519339799880981,\\"metadata\\":{\\"text\\":\\"sense, Aristotle\'s biology is scientific.[78]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":96,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"5c80f870-f275-5109-a752-5f9e0caf859c\\",\\"score\\":0.517741410866893,\\"metadata\\":{\\"text\\":\\"Aristotle\'s \\\\\\"natural philosophy\\\\\\" spans a wide range of natural phenomena including those now covered by physics, biology and other natural sciences.[40] In Aristotle\'s terminology, \\\\\\"natural philosophy\\\\\\" is a branch of philosophy examining the phenomena of the natural world, and includes fields that would be regarded today as physics, biology and other natural sciences. Aristotle\'s work encompassed virtually all facets of intellectual inquiry. Aristotle makes philosophy in the broad sense coextensive with\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":51,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"87154733-18e0-56c0-9d00-a6e3d3601277\\",\\"score\\":0.513434050865886,\\"metadata\\":{\\"text\\":\\"Aristotle\'s writings on motion remained influential until the Early Modern period. John Philoponus (in Late antiquity) and Galileo (in Early modern period) are said to have shown by experiment that Aristotle\'s claim that a heavier object falls faster than a lighter object is incorrect.[40] A contrary opinion is given by Carlo Rovelli, who argues that Aristotle\'s physics of motion is correct within its domain of validity, that of objects in the Earth\'s gravitational field immersed in a fluid such as air. In\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":64,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"d7fed20f-2710-529e-8518-33e31335ee4e\\",\\"score\\":0.513187944889069,\\"metadata\\":{\\"text\\":\\"Aristotle taught that virtue has to do with the proper function (ergon) of a thing. An eye is only a good eye in so much as it can see, because the proper function of an eye is sight. Aristotle reasoned that humans must have a function specific to humans, and that this function must be an activity of the psuch\\u0113 (soul) in accordance with reason (logos). Aristotle identified such an optimum activity (the virtuous mean, between the accompanying vices of excess or deficiency[15]) of the soul as the aim of all\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":137,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"604d5540-35cf-5575-90ac-e77cddf30b79\\",\\"score\\":0.50912079269414,\\"metadata\\":{\\"text\\":\\"Aristotle also made many observations about the hydrologic cycle. For example, he made some of the earliest observations about desalination: he observed early \\u2013 and correctly \\u2013 that when seawater is heated, freshwater evaporates and that the oceans are then replenished by the cycle of rainfall and river runoff (\\\\\\"I have proved by experiment that salt water evaporated forms fresh and the vapor does not when it condenses condense into sea water again.\\\\\\")[60]\\\\n\\\\nBiology\\\\nMain article: Aristotle\'s biology\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":83,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"a1d385d3-062f-5b72-9ee6-8b6ba0d68140\\",\\"score\\":0.506430864334106,\\"metadata\\":{\\"text\\":\\"According to Strabo and Plutarch, after Aristotle\'s death, his library and writings went to Theophrastus (Aristotle\'s successor as head of the Lycaeum and the Peripatetic school).[215] After the death of Theophrastus, the peripatetic library went to Neleus of Scepsis.[216]:\\u200a5\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":225,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"3f156748-3db2-5d2a-bce2-4267f9b46784\\",\\"score\\":0.502232283220752,\\"metadata\\":{\\"text\\":\\"Most of Aristotle\'s work is probably not in its original form, because it was most likely edited by students and later lecturers. The logical works of Aristotle were compiled into a set of six books called the Organon around 40 BC by Andronicus of Rhodes or others among his followers.[28] The books are:\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":25,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"75295549-fd8c-562c-8a19-26ba3276c9e8\\",\\"score\\":0.500771760848554,\\"metadata\\":{\\"text\\":\\"Islamic portrayal of Aristotle (right) in the Kit\\u0101b na\\u02bft al-\\u1e25ayaw\\u0101n, c.\\u20091220.[166]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":193,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"bf8a1c08-f7fb-5636-83bf-87b9af9561e1\\",\\"score\\":0.500191748119388,\\"metadata\\":{\\"text\\":\\"Aristotle did not do experiments in the modern sense.[74] He used the ancient Greek term pepeiramenoi to mean observations, or at most investigative procedures like dissection.[75] In Generation of Animals, he finds a fertilized hen\'s egg of a suitable stage and opens it to see the embryo\'s heart beating inside.[76][77]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":94,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"ace70c54-12d0-5dc2-9b27-b566e985e0f7\\",\\"score\\":0.49837121377162896,\\"metadata\\":{\\"text\\":\\"Portrait bust of Aristotle; an Imperial Roman (1st or 2nd century AD) copy of a lost bronze sculpture made by Lysippos.\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":18,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"900f6eb4-d13b-5129-9ed6-3f78b31b83b1\\",\\"score\\":0.49812573194503795,\\"metadata\\":{\\"text\\":\\"The first medical teacher at Alexandria, Herophilus of Chalcedon, corrected Aristotle, placing intelligence in the brain, and connected the nervous system to motion and sensation. Herophilus also distinguished between veins and arteries, noting that the latter pulse while the former do not.[157] Though a few ancient atomists such as Lucretius challenged the teleological viewpoint of Aristotelian ideas about life, teleology (and after the rise of Christianity, natural theology) would remain central to\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":184,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"4c6bf2b3-9140-5fe5-adba-27579b339647\\",\\"score\\":0.49481466443451305,\\"metadata\\":{\\"text\\":\\"One of Aristotle\'s types of syllogism[D]\\\\nIn words\\\\tIn\\\\nterms[E]\\\\tIn equations[F]\\\\n All men are mortal.\\\\n\\\\n All Greeks are men.\\\\n\\\\n\\u2234 All Greeks are mortal.\\\\tM a P\\\\n\\\\nS a M\\\\n\\\\nS a P\\\\nWhat is today called Aristotelian logic with its types of syllogism (methods of logical argument),[32] Aristotle himself would have labelled \\\\\\"analytics\\\\\\". The term \\\\\\"logic\\\\\\" he reserved to mean dialectics.\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":29,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"d1966c66-9f90-5d28-81c2-80e662e3b977\\",\\"score\\":0.49248007029532903,\\"metadata\\":{\\"text\\":\\"terms \\\\\\"lower\\\\\\" and \\\\\\"upper\\\\\\" to designate taxa such as groups of plants.[199] The evolutionary biologist Armand Marie Leroi has reconstructed Aristotle\'s biology,[200] while Niko Tinbergen\'s four questions, based on Aristotle\'s four causes, are used to analyse animal behaviour; they examine function, phylogeny, mechanism, and ontogeny.[201][202] The concept of homology began with Aristotle;[203] the evolutionary developmental biologist Lewis I. Held commented that he would be interested in the concept of deep\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":215,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"2eeb68da-6525-56d6-be4d-d42ab93c4172\\",\\"score\\":0.49181364403489203,\\"metadata\\":{\\"text\\":\\"Aristotle taught that to achieve a virtuous and potentially happy character requires a first stage of having the fortune to be habituated not deliberately, but by teachers, and experience, leading to a later stage in which one consciously chooses to do the best things. When the best people come to live life this way their practical wisdom (phronesis) and their intellect (nous) can develop with each other towards the highest possible human virtue, the wisdom of an accomplished theoretical or speculative\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":139,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"93bcfe9d-e093-5543-b47b-3e0b9b075801\\",\\"score\\":0.491194203233546,\\"metadata\\":{\\"text\\":\\"Aristotle was one of the first people to record any geological observations. He stated that geological change was too slow to be observed in one person\'s lifetime.[56][57] The geologist Charles Lyell noted that Aristotle described such change, including \\\\\\"lakes that had dried up\\\\\\" and \\\\\\"deserts that had become watered by rivers\\\\\\", giving as examples the growth of the Nile delta since the time of Homer, and \\\\\\"the upheaving of one of the Aeolian islands, previous to a volcanic eruption.\\\\\\"\'[58]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":81,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"399db731-efe2-5198-84b7-5b9cdf6d3a08\\",\\"score\\":0.48828518320829895,\\"metadata\\":{\\"text\\":\\"Poetics\\\\nMain article: Poetics (Aristotle)\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":166,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"8f369acc-7504-5cec-ad88-89cb1d3889c0\\",\\"score\\":0.487353653469465,\\"metadata\\":{\\"text\\":\\"Epistemology\\\\nAristotle\'s immanent realism means his epistemology is based on the study of things that exist or happen in the world, and rises to knowledge of the universal, whereas for Plato epistemology begins with knowledge of universal Forms (or ideas) and descends to knowledge of particular imitations of these.[31] Aristotle uses induction from examples alongside deduction, whereas Plato relies on deduction from a priori principles.[31]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":49,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"084c501e-c5e7-53c8-9316-4734ba5f5045\\",\\"score\\":0.48603272438049305,\\"metadata\\":{\\"text\\":\\"Through meticulous commentaries and critical engagements, figures like Al-Farabi and Ibn Sina (Avicenna) breathed new life into Aristotle\'s ideas. They harmonized his logic with Islamic theology, employed his scientific methodologies to explore the natural world, and even reinterpreted his ethics within the framework of Islamic morality. This revival was not mere imitation. Islamic thinkers embraced Aristotle\'s rigorous methods while simultaneously challenging his conclusions where they diverged from their\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":187,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"a4c8191b-7a5c-576b-883d-4ffdc9840553\\",\\"score\\":0.484963147936056,\\"metadata\\":{\\"text\\":\\"Greek Christian scribes played a crucial role in the preservation of Aristotle by copying all the extant Greek language manuscripts of the corpus. The first Greek Christians to comment extensively on Aristotle were Philoponus, Elias, and David in the sixth century, and Stephen of Alexandria in the early seventh century.[162] John Philoponus stands out for having attempted a fundamental critique of Aristotle\'s views on the eternity of the world, movement, and other elements of Aristotelian thought.[163]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":190,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"65c94370-53be-543f-bf07-c9c8ac8a5b3c\\",\\"score\\":0.48229515552520796,\\"metadata\\":{\\"text\\":\\"is Aristotle\'s division of sensation and thought, which generally differed from the concepts of previous philosophers, with the exception of Alcmaeon.[95]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":111,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"b95901dc-798d-5206-994d-9948ebf10aba\\",\\"score\\":0.48190322518348705,\\"metadata\\":{\\"text\\":\\"coextensive with reasoning, which he also would describe as \\\\\\"science\\\\\\". However, his use of the term science carries a different meaning than that covered by the term \\\\\\"scientific method\\\\\\". For Aristotle, \\\\\\"all science (dianoia) is either practical, poetical or theoretical\\\\\\" (Metaphysics 1025b25). His practical science includes ethics and politics; his poetical science means the study of fine arts including poetry; his theoretical science covers physics, mathematics and metaphysics.[40]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":52,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"7d161587-aad7-5a0a-9e16-01b8542cb93c\\",\\"score\\":0.47876675237120203,\\"metadata\\":{\\"text\\":\\"\\\\\\"Aristotle tutoring Alexander\\\\\\" by Jean Leon Gerome Ferris.\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":14,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"c890996c-f87d-51f2-8d47-316d460893f3\\",\\"score\\":0.477234495630966,\\"metadata\\":{\\"text\\":\\"Samuel to study the writings of philosophers who preceded Aristotle because the works of the latter are \\\\\\"sufficient by themselves and [superior] to all that were written before them. His intellect, Aristotle\'s is the extreme limit of human intellect, apart from him upon whom the divine emanation has flowed forth to such an extent that they reach the level of prophecy, there being no level higher\\\\\\".[183]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":204,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"c2892882-5619-5283-a785-c1701168b0c9\\",\\"score\\":0.476796509218425,\\"metadata\\":{\\"text\\":\\"As Plato\'s disciple Aristotle was rather critical concerning democracy and, following the outline of certain ideas from Plato\'s Statesman, he developed a coherent theory of integrating various forms of power into a so-called mixed state:\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":150,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"91f7b4b9-700c-581c-bac0-028515f34bae\\",\\"score\\":0.47289261221885703,\\"metadata\\":{\\"text\\":\\"Aristotle suggested that the reason for anything coming about can be attributed to four different types of simultaneously active factors. His term aitia is traditionally translated as \\\\\\"cause\\\\\\", but it does not always refer to temporal sequence; it might be better translated as \\\\\\"explanation\\\\\\", but the traditional rendering will be employed here.[48][49]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":68,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"195a8d75-0627-5531-bef5-5354cbfd8bdf\\",\\"score\\":0.472823365712862,\\"metadata\\":{\\"text\\":\\"Byzantine scholars\\\\nSee also: Commentaries on Aristotle and Byzantine Aristotelianism\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":189,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"e639d635-0587-59dc-b038-21633a6e025d\\",\\"score\\":0.47108021378517195,\\"metadata\\":{\\"text\\":\\"The order of the books (or the teachings from which they are composed) is not certain, but this list was derived from analysis of Aristotle\'s writings. It goes from the basics, the analysis of simple terms in the Categories, the analysis of propositions and their elementary relations in On Interpretation, to the study of more complex forms, namely, syllogisms (in the Analytics)[29][30] and dialectics (in the Topics and Sophistical Refutations). The first three treatises form the core of the logical theory\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":27,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"7f0b2712-d35d-5ceb-bacb-04b3916a9cc2\\",\\"score\\":0.46897770371966696,\\"metadata\\":{\\"text\\":\\"Aristotle believed the chain of thought, which ends in recollection of certain impressions, was connected systematically in relationships such as similarity, contrast, and contiguity, described in his laws of association. Aristotle believed that past experiences are hidden within the mind. A force operates to awaken the hidden material to bring up the actual experience. According to Aristotle, association is the power innate in a mental state, which operates upon the unexpressed remains of former\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":123,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"ed4d0d2c-73dc-5baa-8351-065482dbdf9b\\",\\"score\\":0.46736327008227996,\\"metadata\\":{\\"text\\":\\"According to scholar Roger Theodore Lafferty, Dante built up the philosophy of the Comedy with the works of Aristotle as a foundation, just as the scholastics used Aristotle as the basis for their thinking. Dante knew Aristotle directly from Latin translations of his works and indirectly through quotations in the works of Albert Magnus.[175] Dante even acknowledges Aristotle\'s influence explicitly in the poem, specifically when Virgil justifies the Inferno\'s structure by citing the Nicomachean Ethics.[176]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":200,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"e7513658-792c-5fb4-9458-162178a5b72b\\",\\"score\\":0.465195149183273,\\"metadata\\":{\\"text\\":\\"but while Aristotle was aware that new mutations or hybridizations could occur, he saw these as rare accidents. For Aristotle, accidents, like heat waves in winter, must be considered distinct from natural causes. He was thus critical of Empedocles\'s materialist theory of a \\\\\\"survival of the fittest\\\\\\" origin of living things and their organs, and ridiculed the idea that accidents could lead to orderly results.[72] To put his views into modern terms, he nowhere says that different species can have a common\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":90,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"f475f7c3-1f6a-5867-8e0c-af0054425446\\",\\"score\\":0.46305066347122203,\\"metadata\\":{\\"text\\":\\"When Sulla seized Athens in 86 BC, he seized the library and transferred it to Rome. There, Andronicus of Rhodes organized the texts into the first complete edition of Aristotle\'s works (and works attributed to him).[217] The Aristotelian texts we have today are based on these.[216]:\\u200a6\\u20138\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":229,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"a586637b-49bb-59c8-b33a-5db313349a89\\",\\"score\\":0.462634655080161,\\"metadata\\":{\\"text\\":\\"Aristotle distinguished about 500 species of animals,[82][83] arranging these in the History of Animals in a graded scale of perfection, a nonreligious version of the scala naturae, with man at the top. His system had eleven grades of animal, from highest potential to lowest, expressed in their form at birth: the highest gave live birth to hot and wet creatures, the lowest laid cold, dry mineral-like eggs. Animals came above plants, and these in turn were above minerals.[84][85] He grouped what the modern\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":101,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"e3fecc19-7a17-59c2-ba78-f5f2b7c98fb9\\",\\"score\\":0.46187688069435096,\\"metadata\\":{\\"text\\":\\"School of Aristotle in Mieza, Macedonia, Greece.\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":10,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"baddd21e-a650-5789-b8ef-cfe8468da3dc\\",\\"score\\":0.45838105430490195,\\"metadata\\":{\\"text\\":\\"Plato\'s forms exist as universals, like the ideal form of an apple. For Aristotle, both matter and form belong to the individual thing (hylomorphism).\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":36,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"e15c86e4-94c7-5b98-aafa-64f117bd3e75\\",\\"score\\":0.45783278597398,\\"metadata\\":{\\"text\\":\\"about it.[98] Aristotle\'s other criticism is that Plato\'s view of reincarnation entails that it is possible for a soul and its body to be mis-matched; in principle, Aristotle alleges, any soul can go with any body, according to Plato\'s theory.[99] Aristotle\'s claim that the soul is the form of a living being eliminates that possibility and thus rules out reincarnation.[100]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":113,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"feb50519-0826-5af7-9ef2-13f642f968ca\\",\\"score\\":0.45751739054711305,\\"metadata\\":{\\"text\\":\\"In On the Soul, Aristotle famously criticizes Plato\'s theory of the soul and develops his own in response. The first criticism is against Plato\'s view of the soul in the Timaeus that the soul takes up space and is able to come into physical contact with bodies.[96] 20th-century scholarship overwhelmingly opposed Aristotle\'s interpretation of Plato and maintained that he had misunderstood him.[97] Today\'s scholars have tended to re-assess Aristotle\'s interpretation and been more positive about it.[98]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":112,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"fa12e1f8-09bb-5b05-8606-f2c141b484be\\",\\"score\\":0.45727583765983604,\\"metadata\\":{\\"text\\":\\"kingdom is its rationality.[121] Aristotle conceived of politics as being like an organism rather than like a machine, and as a collection of parts none of which can exist without the others. Aristotle\'s conception of the city is organic, and he is considered one of the first to conceive of the city in this manner.[122]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":143,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"70eb9c23-af31-5dda-a721-5b6c8577d64e\\",\\"score\\":0.45682342221806704,\\"metadata\\":{\\"text\\":\\"Abelard, and John Buridan worked on Aristotelian logic.[174]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":199,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"bf6b1961-f96b-50c5-b8a9-c60a7b80bc0b\\",\\"score\\":0.452890230801411,\\"metadata\\":{\\"text\\":\\"studied and made significant contributions to \\\\\\"logic, metaphysics, mathematics, physics, biology, botany, ethics, politics, agriculture, medicine, dance, and theatre.\\\\\\"[15]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":20,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"694a4266-0e4c-5691-948e-05dacf99a30a\\",\\"score\\":0.45269417762756303,\\"metadata\\":{\\"text\\":\\"Revival\\\\nIn the slumbering centuries following the decline of the Roman Empire, Aristotle\'s vast philosophical and scientific corpus lay largely dormant in the West. But in the burgeoning intellectual heartland of the Abbasid Caliphate, his works underwent a remarkable revival.[159] Translated into Arabic alongside other Greek classics, Aristotle\'s logic, ethics, and natural philosophy ignited the minds of early Islamic scholars.[160]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":186,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"a614876f-67b2-58e7-aaf1-12917410baff\\",\\"score\\":0.44979420603548304,\\"metadata\\":{\\"text\\":\\"Pythias, either Hermias\'s adoptive daughter or niece. They had a daughter, whom they also named Pythias. In 343 BC, Aristotle was invited by Philip II of Macedon to become the tutor to his son Alexander.[11][12]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":13,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"cd03f71f-20c8-501b-9468-cfce807ab7a7\\",\\"score\\":0.44468317095242205,\\"metadata\\":{\\"text\\":\\"Economics\\\\nMain article: Politics (Aristotle)\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":155,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"8897fd3b-6eae-576b-9322-6dc057fbeb69\\",\\"score\\":0.444220006465912,\\"metadata\\":{\\"text\\":\\"not intended for publication.[210][208] Cicero\'s description of Aristotle\'s literary style as \\\\\\"a river of gold\\\\\\" must have applied to the published works, not the surviving notes.[Q] A major question in the history of Aristotle\'s works is how the exoteric writings were all lost, and how the ones now possessed came to be found.[212] The consensus is that Andronicus of Rhodes collected the esoteric works of Aristotle\'s school which existed in the form of smaller, separate works, distinguished them from those\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":223,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"f8e157ac-289f-57f7-9912-678686e2e358\\",\\"score\\":0.44353434443473805,\\"metadata\\":{\\"text\\":\\"In his On Generation and Corruption, Aristotle related each of the four elements proposed earlier by Empedocles, earth, water, air, and fire, to two of the four sensible qualities, hot, cold, wet, and dry. In the Empedoclean scheme, all matter was made of the four elements, in differing proportions. Aristotle\'s scheme added the heavenly aether, the divine substance of the heavenly spheres, stars and planets.[41]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":55,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"3728064f-4f14-5a96-a9ac-318385e7a9c2\\",\\"score\\":0.441104352474213,\\"metadata\\":{\\"text\\":\\"One component of Aristotle\'s theory of dreams disagrees with previously held beliefs. He claimed that dreams are not foretelling and not sent by a divine being. Aristotle reasoned naturalistically that instances in which dreams do resemble future events are simply coincidences.[116] Aristotle claimed that a dream is first established by the fact that the person is asleep when they experience it. If a person had an image appear for a moment after waking up or if they see something in the dark it is not\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":132,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"be401ab8-f58a-59da-a40a-deceae5b2fc2\\",\\"score\\":0.43738391622209105,\\"metadata\\":{\\"text\\":\\"Aristotle\'s Rhetoric proposes that a speaker can use three basic kinds of appeals to persuade his audience: ethos (an appeal to the speaker\'s character), pathos (an appeal to the audience\'s emotion), and logos (an appeal to logical reasoning).[130] He also categorizes rhetoric into three genres: epideictic (ceremonial speeches dealing with praise or blame), forensic (judicial speeches over guilt or innocence), and deliberative (speeches calling on an audience to make a decision on an issue).[131] Aristotle\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":164,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"e663ee7c-62bc-52cf-b4a1-197ae5e91d4b\\",\\"score\\":0.43465039134025596,\\"metadata\\":{\\"text\\":\\"Instead, he practiced a different style of science: systematically gathering data, discovering patterns common to whole groups of animals, and inferring possible causal explanations from these.[78][79] This style is common in modern biology when large amounts of data become available in a new field, such as genomics. It does not result in the same certainty as experimental science, but it sets out testable hypotheses and constructs a narrative explanation of what is observed. In this sense, Aristotle\'s\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":95,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"dd14665e-c7e3-56af-9d31-dac1e78b5b9f\\",\\"score\\":0.43291583257412003,\\"metadata\\":{\\"text\\":\\"Aristotle examines the concepts of substance (ousia) and essence (to ti \\u00ean einai, \\\\\\"the what it was to be\\\\\\") in his Metaphysics (Book VII), and he concludes that a particular substance is a combination of both matter and form, a philosophical theory called hylomorphism. In Book VIII, he distinguishes the matter of the substance as the substratum, or the stuff of which it is composed. For example, the matter of a house is the bricks, stones, timbers, etc., or whatever constitutes the potential house, while\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":33,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"58ba0cc5-4324-5dd5-a4d4-5338cdf69b0e\\",\\"score\\":0.432026573177988,\\"metadata\\":{\\"text\\":\\"Hellenistic science\\\\nFurther information: Ancient Greek medicine\\\\nAfter Theophrastus, the Lyceum failed to produce any original work. Though interest in Aristotle\'s ideas survived, they were generally taken unquestioningly.[156] It is not until the age of Alexandria under the Ptolemies that advances in biology can be again found.\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":183,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"a5ddeb77-7da5-561f-af07-ce2a71d1a8dd\\",\\"score\\":0.430962983908476,\\"metadata\\":{\\"text\\":\\"Medieval Europe\\\\nFurther information: Aristotelianism and Syllogism \\u00a7 Medieval\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":196,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"3932c7f5-e252-5209-a9a3-48c309e2bbf4\\",\\"score\\":0.43086590950882,\\"metadata\\":{\\"text\\":\\"\\ud835\\udc63\\\\n=\\\\n\\ud835\\udc50\\\\n\\ud835\\udc4a\\\\n\\ud835\\udf0c{\\\\\\\\displaystyle v=c{\\\\\\\\frac {W}{\\\\\\\\rho }}}\\\\nAristotle implies that in a vacuum the speed of fall would become infinite, and concludes from this apparent absurdity that a vacuum is not possible.[45][43] Opinions have varied on whether Aristotle intended to state quantitative laws. Henri Carteron held the \\\\\\"extreme view\\\\\\"[43] that Aristotle\'s concept of force was basically qualitative,[46] but other authors reject this.[43]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":62,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"b4134b8e-7569-5df5-93f3-9803da687c7a\\",\\"score\\":0.427144382766225,\\"metadata\\":{\\"text\\":\\"thought.[163] Philoponus questioned Aristotle\'s teaching of physics, noting its flaws and introducing the theory of impetus to explain his observations.[164]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":191,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"ef9af951-f06e-5c1a-8c57-8964ee147dfa\\",\\"score\\":0.426049922344094,\\"metadata\\":{\\"text\\":\\"For Aristotle, the soul is the form of a living being. Because all beings are composites of form and matter, the form of living beings is that which endows them with what is specific to living beings, e.g. the ability to initiate movement (or in the case of plants, growth and transformations, which Aristotle considers types of movement).[11] In contrast to earlier philosophers, but in accordance with the Egyptians, he placed the rational soul in the heart, rather than the brain.[94] Notable is Aristotle\'s\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":110,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"71044b07-177e-5e0a-8267-941c535226f0\\",\\"score\\":0.423012614250183,\\"metadata\\":{\\"text\\":\\"exceptions, such as that sharks had a placenta like the tetrapods. To a modern biologist, the explanation, not available to Aristotle, is convergent evolution.[86] Philosophers of science have generally concluded that Aristotle was not interested in taxonomy,[87][88] but zoologists who studied this question in the early 21st century think otherwise.[89][90][91] He believed that purposive final causes guided all natural processes; this teleological view justified his observed data as an expression of formal\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":103,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}"]'}]}, + {}, + { + "run_id": ..., + "run_type": "search", + "entries": [ + {"key": "search_latency", "value": "0.47"}, + { + "key": "search_results", + "value": '["{\\"id\\":\\"c818bc72-2ac8-581b-b51a-0ca826f5f2b8\\",\\"score\\":1.0,\\"metadata\\":{\\"text\\":\\"Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle\'s parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle\'s childhood has survived, he probably spent\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":8,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"5f6213d1-a46b-5ed4-a15c-e95bab271621\\",\\"score\\":1.0,\\"metadata\\":{\\"text\\":\\"Aristotle was one of the most revered Western thinkers in early Islamic theology. Most of the still extant works of Aristotle,[167] as well as a number of the original Greek commentaries, were translated into Arabic and studied by Muslim philosophers, scientists and scholars. Averroes, Avicenna and Alpharabius, who wrote on Aristotle in great depth, also influenced Thomas Aquinas and other Western Christian scholastic philosophers. Alkindus greatly admired Aristotle\'s philosophy,[168] and Averroes spoke of\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":194,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"e6c5d5f6-7fc4-5bb8-847d-44cfa16f5178\\",\\"score\\":1.0,\\"metadata\\":{\\"text\\":\\"Little is known about Aristotle\'s life. He was born in the city of Stagira in northern Greece during the Classical period. His father, Nicomachus, died when Aristotle was a child, and he was brought up by a guardian. At 17 or 18, he joined Plato\'s Academy in Athens and remained there until the age of 37 (c.\\u2009347 BC). Shortly after Plato died, Aristotle left Athens and, at the request of Philip II of Macedon, tutored his son Alexander the Great beginning in 343 BC. He established a library in the Lyceum,\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":1,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"1175585b-fc58-5e44-bfcb-cb1996289936\\",\\"score\\":1.0,\\"metadata\\":{\\"text\\":\\"after friends and relatives, and to deal with the latter as with beasts or plants\\\\\\".[13] By 335 BC, Aristotle had returned to Athens, establishing his own school there known as the Lyceum. Aristotle conducted courses at the school for the next twelve years. While in Athens, his wife Pythias died and Aristotle became involved with Herpyllis of Stagira. They had a son whom Aristotle named after his father, Nicomachus. If the Suda \\u2013 an uncritical compilation from the Middle Ages \\u2013 is accurate, he may also have\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":16,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"5fb90839-d04c-50b0-8f7f-ffc1a938c019\\",\\"score\\":1.0,\\"metadata\\":{\\"text\\":\\"Aristotle was appointed as the head of the royal Academy of Macedon. During Aristotle\'s time in the Macedonian court, he gave lessons not only to Alexander but also to two other future kings: Ptolemy and Cassander.[13] Aristotle encouraged Alexander toward eastern conquest, and Aristotle\'s own attitude towards Persia was unabashedly ethnocentric. In one famous example, he counsels Alexander to be \\\\\\"a leader to the Greeks and a despot to the barbarians, to look after the former as after friends and\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":15,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"8550e2b7-43f8-5a59-9c13-c9678670a2da\\",\\"score\\":1.0,\\"metadata\\":{\\"text\\":\\"The immediate influence of Aristotle\'s work was felt as the Lyceum grew into the Peripatetic school. Aristotle\'s students included Aristoxenus, Dicaearchus, Demetrius of Phalerum, Eudemos of Rhodes, Harpalus, Hephaestion, Mnason of Phocis, Nicomachus, and Theophrastus. Aristotle\'s influence over Alexander the Great is seen in the latter\'s bringing with him on his expedition a host of zoologists, botanists, and researchers. He had also learned a great deal about Persian customs and traditions from his\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":181,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"7ed3a01c-88dc-5a58-a68b-6e5d9f292df2\\",\\"score\\":1.0,\\"metadata\\":{\\"text\\":\\"Aristotle[A] (Greek: \\u1f08\\u03c1\\u03b9\\u03c3\\u03c4\\u03bf\\u03c4\\u03ad\\u03bb\\u03b7\\u03c2 Aristot\\u00e9l\\u0113s, pronounced [aristot\\u00e9l\\u025b\\u02d0s]; 384\\u2013322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":0,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"ed4d0d2c-73dc-5baa-8351-065482dbdf9b\\",\\"score\\":1.0,\\"metadata\\":{\\"text\\":\\"According to scholar Roger Theodore Lafferty, Dante built up the philosophy of the Comedy with the works of Aristotle as a foundation, just as the scholastics used Aristotle as the basis for their thinking. Dante knew Aristotle directly from Latin translations of his works and indirectly through quotations in the works of Albert Magnus.[175] Dante even acknowledges Aristotle\'s influence explicitly in the poem, specifically when Virgil justifies the Inferno\'s structure by citing the Nicomachean Ethics.[176]\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":200,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"e6f58828-2e6d-5eb1-94f3-efbc0b7c1699\\",\\"score\\":1.0,\\"metadata\\":{\\"text\\":\\"Aristotle was revered among medieval Muslim scholars as \\\\\\"The First Teacher\\\\\\", and among medieval Christians like Thomas Aquinas as simply \\\\\\"The Philosopher\\\\\\", while the poet Dante called him \\\\\\"the master of those who know\\\\\\". His works contain the earliest known formal study of logic, and were studied by medieval scholars such as Peter Abelard and Jean Buridan. Aristotle\'s influence on logic continued well into the 19th century. In addition, his ethics, although always influential, gained renewed interest with\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":5,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}", "{\\"id\\":\\"5c80f870-f275-5109-a752-5f9e0caf859c\\",\\"score\\":1.0,\\"metadata\\":{\\"text\\":\\"Aristotle\'s \\\\\\"natural philosophy\\\\\\" spans a wide range of natural phenomena including those now covered by physics, biology and other natural sciences.[40] In Aristotle\'s terminology, \\\\\\"natural philosophy\\\\\\" is a branch of philosophy examining the phenomena of the natural world, and includes fields that would be regarded today as physics, biology and other natural sciences. Aristotle\'s work encompassed virtually all facets of intellectual inquiry. Aristotle makes philosophy in the broad sense coextensive with\\",\\"title\\":\\"aristotle.txt\\",\\"user_id\\":\\"063edaf8-3e63-4cb9-a4d6-a855f36376c3\\",\\"version\\":\\"v0\\",\\"chunk_order\\":51,\\"document_id\\":\\"c9bdbac7-0ea3-5c9e-b590-018bd09b127b\\",\\"extraction_id\\":\\"472d6921-b4cd-5514-bf62-90b05c9102cb\\",\\"associatedQuery\\":\\"who is aristotle?\\"}}"]', + }, + {"key": "search_query", "value": "who is aristotle?"}, + ], + }, +] + + +def approx_equal(a, b, tolerance=1e-3): + """Compare two float values for approximate equality.""" + return math.isclose(a, b, rel_tol=tolerance) + + +def compare_search_results(actual, expected): + """Compare search results while allowing for slight differences in scores.""" + actual_results = json.loads(actual) + expected_results = json.loads(expected) + + if len(actual_results) != len(expected_results): + return False + + for actual_item, expected_item in zip(actual_results, expected_results): + actual_dict = json.loads(actual_item) + expected_dict = json.loads(expected_item) + + if actual_dict["id"] != expected_dict["id"]: + raise AssertionError( + f"IDs do not match: {actual_dict['id']} != {expected_dict['id']}" + ) + + if not approx_equal( + actual_dict["score"], expected_dict["score"], tolerance=1e-2 + ): + raise AssertionError( + f"Scores do not match: {actual_dict['score']} != {expected_dict['score']}" + ) + + if actual_dict["metadata"] != expected_dict["metadata"]: + raise AssertionError( + f"Metadata does not match: {actual_dict['metadata']} != {expected_dict['metadata']}" + ) + + return True + + +def test_ingestion_success(wrapper): + """Test the initial successful ingestion process.""" + result = wrapper.ingest_sample_file() + expected_payload = { + "processed_documents": [ + "Document 'aristotle.txt' processed successfully." + ], + "failed_documents": [], + "skipped_documents": [], + } + for key in expected_payload: + assert key in result + assert len(result[key]) == len(expected_payload[key]) + for i, value in enumerate(result[key]): + assert value == expected_payload[key][i] + print("Initial ingestion test passed successfully.") + + +def test_full_ingestion_success(wrapper): + """Test the initial successful ingestion process.""" + result = wrapper.ingest_sample_files() + + expected_processed = { + "Document 'lyft_2021.pdf' processed successfully.", + "Document 'uber_2021.pdf' processed successfully.", + "Document 'pg_essay_3.html' processed successfully.", + "Document 'pg_essay_2.html' processed successfully.", + "Document 'pg_essay_4.html' processed successfully.", + "Document 'got.txt' processed successfully.", + "Document 'pg_essay_5.html' processed successfully.", + "Document 'pg_essay_1.html' processed successfully.", + } + expected_failed = set() + expected_skipped = { + "Document 'aristotle.txt' skipped since it already exists." + } + + assert set(result["processed_documents"]) == expected_processed + assert set(result["failed_documents"]) == expected_failed + assert set(result["skipped_documents"]) == expected_skipped + + assert len(result["processed_documents"]) == len(expected_processed) + assert len(result["failed_documents"]) == len(expected_failed) + assert len(result["skipped_documents"]) == len(expected_skipped) + + print("Initial ingestion test passed successfully.") + + +def test_ingestion_failure(wrapper): + """Test the subsequent ingestion process that should fail.""" + try: + wrapper.ingest_sample_file() + raise AssertionError("Expected an exception, but none was raised.") + except Exception as e: + assert ( + str(e) + == "Document with ID c9bdbac7-0ea3-5c9e-b590-018bd09b127b was already successfully processed." + ) + print("Subsequent ingestion test passed: Expected error was raised.") + + +def test_logs(wrapper, expected_length): + """Test the logging functionality.""" + logs = wrapper.logs() + assert len(logs) == expected_length + log = logs[0] + expected_log = expected_logs[expected_length - 1] + + assert log["run_id"] is not None + assert log["run_type"] == expected_log["run_type"] + assert len(log["entries"]) == len(expected_log["entries"]) + + for i in range(len(log["entries"])): + entry = log["entries"][i] + print("entry: ", entry) + expected_entry = None + for expected_entry in expected_log["entries"]: + if expected_entry["key"] == entry["key"]: + break + if expected_entry is None: + raise AssertionError(f"Unexpected entry: {entry}") + print("expected_entry: ", expected_entry) + + if "latency" in entry["key"]: + continue + elif "search_results" == entry["key"]: + assert compare_search_results( + entry["value"], expected_entry["value"] + ) + else: + assert entry["key"] == entry["key"] + assert entry["value"] == expected_entry["value"] + print("Logs test passed.") + + +def test_vector_search( + wrapper, query, expected_scores, do_hybrid_search=False, search_limit=10 +): + """Test search functionality with given parameters.""" + search_results = wrapper.search( + query, do_hybrid_search=do_hybrid_search, search_limit=search_limit + ) + assert "vector_search_results" in search_results + scores = [ + result["score"] for result in search_results["vector_search_results"] + ] + if expected_scores: + assert len(scores) == len(expected_scores) + assert all(approx_equal(a, b) for a, b in zip(scores, expected_scores)) + print( + f"Search test passed for query: '{query}', hybrid: {do_hybrid_search}, limit: {search_limit}" + ) + return search_results["vector_search_results"] + + +def test_documents_overview(wrapper): + """Test the documents_overview functionality.""" + documents_overview = wrapper.documents_overview() + assert len(documents_overview) == 1 + doc_info = documents_overview[0] + + assert isinstance(doc_info.document_id, UUID) + assert doc_info.document_id == UUID("c9bdbac7-0ea3-5c9e-b590-018bd09b127b") + assert doc_info.version == "v0" + assert doc_info.size_in_bytes == 73353 + assert doc_info.metadata == { + "title": "aristotle.txt", + "user_id": "063edaf8-3e63-4cb9-a4d6-a855f36376c3", + } + assert doc_info.status.value == "success" + assert doc_info.user_id == UUID("063edaf8-3e63-4cb9-a4d6-a855f36376c3") + assert doc_info.title == "aristotle.txt" + assert isinstance(doc_info.created_at, datetime) + assert isinstance(doc_info.updated_at, datetime) + assert doc_info.created_at.tzinfo == timezone.utc + assert doc_info.updated_at.tzinfo == timezone.utc + + +def test_users_overview(wrapper): + """Test the users_overview functionality.""" + users_overview = wrapper.users_overview() + assert len(users_overview) == 1 + user_stats = users_overview[0] + + assert isinstance(user_stats.user_id, UUID) + assert user_stats.user_id == UUID("063edaf8-3e63-4cb9-a4d6-a855f36376c3") + assert user_stats.num_files == 1 + assert user_stats.total_size_in_bytes == 73353 + assert len(user_stats.document_ids) == 1 + assert user_stats.document_ids[0] == UUID( + "c9bdbac7-0ea3-5c9e-b590-018bd09b127b" + ) + + print("Users overview test passed successfully.") + + +def test_document_chunks(wrapper): + """Test the document_chunks functionality.""" + document_chunks = wrapper.document_chunks( + "c9bdbac7-0ea3-5c9e-b590-018bd09b127b" + ) + assert len(document_chunks) == 233 + first_chunk = document_chunks[0] + assert ( + first_chunk["text"] + == "Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science." + ) + assert first_chunk["chunk_order"] == 0 + print("Document chunks test passed successfully.") + + +def main(): + wrapper = R2RExecutionWrapper(client_mode=False) + + # Test ingestion + test_ingestion_success(wrapper) + + # Test logs + test_logs(wrapper, expected_length=1) + + # Test the document overview table + _ = test_documents_overview(wrapper) + + # Test the users overview table + test_users_overview(wrapper) + + # Test the document chunks method + test_document_chunks(wrapper) + + # Test subsequent ingestion (expecting failure) + test_ingestion_failure(wrapper) + + # Test regular search + regular_expected_scores = [ + 0.7737913131713869, + 0.669298529624939, + 0.652687707703574, + 0.636050164699554, + 0.624127291194959, + 0.619364976882935, + 0.6177915291003779, + 0.606354117393494, + 0.601802307421038, + 0.595915484915322, + ] + _ = test_vector_search( + wrapper, "who is aristotle?", regular_expected_scores + ) + test_logs(wrapper, expected_length=2) + + # Test search with larger limit + large_filter_results = test_vector_search( + wrapper, "who is aristotle?", None, search_limit=100 + ) + assert len(large_filter_results) == 100 + assert approx_equal( + large_filter_results[0]["score"], regular_expected_scores[0] + ) + # test_logs(wrapper, expected_length=3) + + # Test hybrid search + hybrid_expected_zero_result = "Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent" + hybrid_expected_scores = [1] * 10 + hybrid_results = test_vector_search( + wrapper, + "who is aristotle?", + hybrid_expected_scores, + do_hybrid_search=True, + ) + assert hybrid_results[0]["metadata"]["text"] == hybrid_expected_zero_result + # test_logs(wrapper, expected_length=4) + + test_full_ingestion_success(wrapper) + + +if __name__ == "__main__": + main() diff --git a/R2R/r2r/integrations/__init__.py b/R2R/r2r/integrations/__init__.py new file mode 100755 index 00000000..0830f40c --- /dev/null +++ b/R2R/r2r/integrations/__init__.py @@ -0,0 +1,3 @@ +from .serper import SerperClient + +__all__ = ["SerperClient"] diff --git a/R2R/r2r/integrations/serper.py b/R2R/r2r/integrations/serper.py new file mode 100755 index 00000000..14333d1a --- /dev/null +++ b/R2R/r2r/integrations/serper.py @@ -0,0 +1,103 @@ +import http.client +import json +import os + + +# TODO - Move process json to dedicated data processing module +def process_json(json_object, indent=0): + """ + Recursively traverses the JSON object (dicts and lists) to create an unstructured text blob. + """ + text_blob = "" + if isinstance(json_object, dict): + for key, value in json_object.items(): + padding = " " * indent + if isinstance(value, (dict, list)): + text_blob += ( + f"{padding}{key}:\n{process_json(value, indent + 1)}" + ) + else: + text_blob += f"{padding}{key}: {value}\n" + elif isinstance(json_object, list): + for index, item in enumerate(json_object): + padding = " " * indent + if isinstance(item, (dict, list)): + text_blob += f"{padding}Item {index + 1}:\n{process_json(item, indent + 1)}" + else: + text_blob += f"{padding}Item {index + 1}: {item}\n" + return text_blob + + +# TODO - Introduce abstract "Integration" ABC. +class SerperClient: + def __init__(self, api_base: str = "google.serper.dev") -> None: + api_key = os.getenv("SERPER_API_KEY") + if not api_key: + raise ValueError( + "Please set the `SERPER_API_KEY` environment variable to use `SerperClient`." + ) + + self.api_base = api_base + self.headers = { + "X-API-KEY": api_key, + "Content-Type": "application/json", + } + + @staticmethod + def _extract_results(result_data: dict) -> list: + formatted_results = [] + + for key, value in result_data.items(): + # Skip searchParameters as it's not a result entry + if key == "searchParameters": + continue + + # Handle 'answerBox' as a single item + if key == "answerBox": + value["type"] = key # Add the type key to the dictionary + formatted_results.append(value) + # Handle lists of results + elif isinstance(value, list): + for item in value: + item["type"] = key # Add the type key to the dictionary + formatted_results.append(item) + # Handle 'peopleAlsoAsk' and potentially other single item formats + elif isinstance(value, dict): + value["type"] = key # Add the type key to the dictionary + formatted_results.append(value) + + return formatted_results + + # TODO - Add explicit typing for the return value + def get_raw(self, query: str, limit: int = 10) -> list: + connection = http.client.HTTPSConnection(self.api_base) + payload = json.dumps({"q": query, "num_outputs": limit}) + connection.request("POST", "/search", payload, self.headers) + response = connection.getresponse() + data = response.read() + json_data = json.loads(data.decode("utf-8")) + return SerperClient._extract_results(json_data) + + @staticmethod + def construct_context(results: list) -> str: + # Organize results by type + organized_results = {} + for result in results: + result_type = result.metadata.pop( + "type", "Unknown" + ) # Pop the type and use as key + if result_type not in organized_results: + organized_results[result_type] = [result.metadata] + else: + organized_results[result_type].append(result.metadata) + + context = "" + # Iterate over each result type + for result_type, items in organized_results.items(): + context += f"# {result_type} Results:\n" + for index, item in enumerate(items, start=1): + # Process each item under the current type + context += f"Item {index}:\n" + context += process_json(item) + "\n" + + return context diff --git a/R2R/r2r/main/__init__.py b/R2R/r2r/main/__init__.py new file mode 100755 index 00000000..55a828d6 --- /dev/null +++ b/R2R/r2r/main/__init__.py @@ -0,0 +1,54 @@ +from .abstractions import R2RPipelines, R2RProviders +from .api.client import R2RClient +from .api.requests import ( + R2RAnalyticsRequest, + R2RDeleteRequest, + R2RDocumentChunksRequest, + R2RDocumentsOverviewRequest, + R2REvalRequest, + R2RIngestFilesRequest, + R2RRAGRequest, + R2RSearchRequest, + R2RUpdateFilesRequest, + R2RUpdatePromptRequest, + R2RUsersOverviewRequest, +) +from .app import R2RApp +from .assembly.builder import R2RBuilder +from .assembly.config import R2RConfig +from .assembly.factory import ( + R2RPipeFactory, + R2RPipelineFactory, + R2RProviderFactory, +) +from .assembly.factory_extensions import R2RPipeFactoryWithMultiSearch +from .engine import R2REngine +from .execution import R2RExecutionWrapper +from .r2r import R2R + +__all__ = [ + "R2R", + "R2RPipelines", + "R2RProviders", + "R2RUpdatePromptRequest", + "R2RIngestFilesRequest", + "R2RUpdateFilesRequest", + "R2RSearchRequest", + "R2RRAGRequest", + "R2REvalRequest", + "R2RDeleteRequest", + "R2RAnalyticsRequest", + "R2RUsersOverviewRequest", + "R2RDocumentsOverviewRequest", + "R2RDocumentChunksRequest", + "R2REngine", + "R2RExecutionWrapper", + "R2RConfig", + "R2RClient", + "R2RPipeFactory", + "R2RPipelineFactory", + "R2RProviderFactory", + "R2RPipeFactoryWithMultiSearch", + "R2RBuilder", + "R2RApp", +] diff --git a/R2R/r2r/main/abstractions.py b/R2R/r2r/main/abstractions.py new file mode 100755 index 00000000..3622b22d --- /dev/null +++ b/R2R/r2r/main/abstractions.py @@ -0,0 +1,58 @@ +from typing import Optional + +from pydantic import BaseModel + +from r2r.base import ( + AsyncPipe, + EmbeddingProvider, + EvalProvider, + KGProvider, + LLMProvider, + PromptProvider, + VectorDBProvider, +) +from r2r.pipelines import ( + EvalPipeline, + IngestionPipeline, + RAGPipeline, + SearchPipeline, +) + + +class R2RProviders(BaseModel): + vector_db: Optional[VectorDBProvider] + embedding: Optional[EmbeddingProvider] + llm: Optional[LLMProvider] + prompt: Optional[PromptProvider] + eval: Optional[EvalProvider] + kg: Optional[KGProvider] + + class Config: + arbitrary_types_allowed = True + + +class R2RPipes(BaseModel): + parsing_pipe: Optional[AsyncPipe] + embedding_pipe: Optional[AsyncPipe] + vector_storage_pipe: Optional[AsyncPipe] + vector_search_pipe: Optional[AsyncPipe] + rag_pipe: Optional[AsyncPipe] + streaming_rag_pipe: Optional[AsyncPipe] + eval_pipe: Optional[AsyncPipe] + kg_pipe: Optional[AsyncPipe] + kg_storage_pipe: Optional[AsyncPipe] + kg_agent_search_pipe: Optional[AsyncPipe] + + class Config: + arbitrary_types_allowed = True + + +class R2RPipelines(BaseModel): + eval_pipeline: EvalPipeline + ingestion_pipeline: IngestionPipeline + search_pipeline: SearchPipeline + rag_pipeline: RAGPipeline + streaming_rag_pipeline: RAGPipeline + + class Config: + arbitrary_types_allowed = True diff --git a/R2R/r2r/main/api/__init__.py b/R2R/r2r/main/api/__init__.py new file mode 100755 index 00000000..e69de29b --- /dev/null +++ b/R2R/r2r/main/api/__init__.py diff --git a/R2R/r2r/main/api/client.py b/R2R/r2r/main/api/client.py new file mode 100755 index 00000000..b0f5b966 --- /dev/null +++ b/R2R/r2r/main/api/client.py @@ -0,0 +1,377 @@ +import asyncio +import functools +import json +import os +import threading +import time +import uuid +from contextlib import ExitStack +from typing import Any, AsyncGenerator, Generator, Optional, Union + +import fire +import httpx +import nest_asyncio +import requests + +from .requests import ( + R2RAnalyticsRequest, + R2RDeleteRequest, + R2RDocumentChunksRequest, + R2RDocumentsOverviewRequest, + R2RIngestFilesRequest, + R2RLogsRequest, + R2RPrintRelationshipsRequest, + R2RRAGRequest, + R2RSearchRequest, + R2RUpdateFilesRequest, + R2RUpdatePromptRequest, + R2RUsersOverviewRequest, +) + +nest_asyncio.apply() + + +class R2RHTTPError(Exception): + def __init__(self, status_code, error_type, message): + self.status_code = status_code + self.error_type = error_type + self.message = message + super().__init__(f"[{status_code}] {error_type}: {message}") + + +def handle_request_error(response): + if response.status_code >= 400: + try: + error_content = response.json() + if isinstance(error_content, dict) and "detail" in error_content: + detail = error_content["detail"] + if isinstance(detail, dict): + message = detail.get("message", str(response.text)) + error_type = detail.get("error_type", "UnknownError") + else: + message = str(detail) + error_type = "HTTPException" + else: + message = str(error_content) + error_type = "UnknownError" + except json.JSONDecodeError: + message = response.text + error_type = "UnknownError" + + raise R2RHTTPError( + status_code=response.status_code, + error_type=error_type, + message=message, + ) + + +def monitor_request(func): + @functools.wraps(func) + def wrapper(*args, monitor=False, **kwargs): + if not monitor: + return func(*args, **kwargs) + + result = None + exception = None + + def run_func(): + nonlocal result, exception + try: + result = func(*args, **kwargs) + except Exception as e: + exception = e + + thread = threading.Thread(target=run_func) + thread.start() + + dots = [".", "..", "..."] + i = 0 + while thread.is_alive(): + print(f"\rRequesting{dots[i % 3]}", end="", flush=True) + i += 1 + time.sleep(0.5) + + thread.join() + + print("\r", end="", flush=True) + + if exception: + raise exception + return result + + return wrapper + + +class R2RClient: + def __init__(self, base_url: str, prefix: str = "/v1"): + self.base_url = base_url + self.prefix = prefix + + def _make_request(self, method, endpoint, **kwargs): + url = f"{self.base_url}{self.prefix}/{endpoint}" + response = requests.request(method, url, **kwargs) + handle_request_error(response) + return response.json() + + def health(self) -> dict: + return self._make_request("GET", "health") + + def update_prompt( + self, + name: str = "default_system", + template: Optional[str] = None, + input_types: Optional[dict] = None, + ) -> dict: + request = R2RUpdatePromptRequest( + name=name, template=template, input_types=input_types + ) + return self._make_request( + "POST", "update_prompt", json=json.loads(request.json()) + ) + + @monitor_request + def ingest_files( + self, + file_paths: list[str], + metadatas: Optional[list[dict]] = None, + document_ids: Optional[list[Union[uuid.UUID, str]]] = None, + versions: Optional[list[str]] = None, + ) -> dict: + all_file_paths = [] + + for path in file_paths: + if os.path.isdir(path): + for root, _, files in os.walk(path): + all_file_paths.extend( + os.path.join(root, file) for file in files + ) + else: + all_file_paths.append(path) + + files_to_upload = [ + ( + "files", + ( + os.path.basename(file), + open(file, "rb"), + "application/octet-stream", + ), + ) + for file in all_file_paths + ] + request = R2RIngestFilesRequest( + metadatas=metadatas, + document_ids=( + [str(ele) for ele in document_ids] if document_ids else None + ), + versions=versions, + ) + try: + return self._make_request( + "POST", + "ingest_files", + data={ + k: json.dumps(v) + for k, v in json.loads(request.json()).items() + }, + files=files_to_upload, + ) + finally: + for _, file_tuple in files_to_upload: + file_tuple[1].close() + + @monitor_request + def update_files( + self, + file_paths: list[str], + document_ids: list[str], + metadatas: Optional[list[dict]] = None, + ) -> dict: + request = R2RUpdateFilesRequest( + metadatas=metadatas, + document_ids=document_ids, + ) + with ExitStack() as stack: + return self._make_request( + "POST", + "update_files", + data={ + k: json.dumps(v) + for k, v in json.loads(request.json()).items() + }, + files=[ + ( + "files", + ( + path.split("/")[-1], + stack.enter_context(open(path, "rb")), + "application/octet-stream", + ), + ) + for path in file_paths + ], + ) + + def search( + self, + query: str, + use_vector_search: bool = True, + search_filters: Optional[dict[str, Any]] = {}, + search_limit: int = 10, + do_hybrid_search: bool = False, + use_kg_search: bool = False, + kg_agent_generation_config: Optional[dict] = None, + ) -> dict: + request = R2RSearchRequest( + query=query, + vector_search_settings={ + "use_vector_search": use_vector_search, + "search_filters": search_filters or {}, + "search_limit": search_limit, + "do_hybrid_search": do_hybrid_search, + }, + kg_search_settings={ + "use_kg_search": use_kg_search, + "agent_generation_config": kg_agent_generation_config, + }, + ) + return self._make_request( + "POST", "search", json=json.loads(request.json()) + ) + + def rag( + self, + query: str, + use_vector_search: bool = True, + search_filters: Optional[dict[str, Any]] = {}, + search_limit: int = 10, + do_hybrid_search: bool = False, + use_kg_search: bool = False, + kg_agent_generation_config: Optional[dict] = None, + rag_generation_config: Optional[dict] = None, + ) -> dict: + request = R2RRAGRequest( + query=query, + vector_search_settings={ + "use_vector_search": use_vector_search, + "search_filters": search_filters or {}, + "search_limit": search_limit, + "do_hybrid_search": do_hybrid_search, + }, + kg_search_settings={ + "use_kg_search": use_kg_search, + "agent_generation_config": kg_agent_generation_config, + }, + rag_generation_config=rag_generation_config, + ) + + if rag_generation_config and rag_generation_config.get( + "stream", False + ): + return self._stream_rag_sync(request) + else: + return self._make_request( + "POST", "rag", json=json.loads(request.json()) + ) + + async def _stream_rag( + self, rag_request: R2RRAGRequest + ) -> AsyncGenerator[str, None]: + url = f"{self.base_url}{self.prefix}/rag" + async with httpx.AsyncClient() as client: + async with client.stream( + "POST", url, json=json.loads(rag_request.json()) + ) as response: + handle_request_error(response) + async for chunk in response.aiter_text(): + yield chunk + + def _stream_rag_sync( + self, rag_request: R2RRAGRequest + ) -> Generator[str, None, None]: + async def run_async_generator(): + async for chunk in self._stream_rag(rag_request): + yield chunk + + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + async_gen = run_async_generator() + + try: + while True: + chunk = loop.run_until_complete(async_gen.__anext__()) + yield chunk + except StopAsyncIteration: + pass + finally: + loop.close() + + def delete( + self, keys: list[str], values: list[Union[bool, int, str]] + ) -> dict: + request = R2RDeleteRequest(keys=keys, values=values) + return self._make_request( + "DELETE", "delete", json=json.loads(request.json()) + ) + + def logs(self, log_type_filter: Optional[str] = None) -> dict: + request = R2RLogsRequest(log_type_filter=log_type_filter) + return self._make_request( + "GET", "logs", json=json.loads(request.json()) + ) + + def app_settings(self) -> dict: + return self._make_request("GET", "app_settings") + + def analytics(self, filter_criteria: dict, analysis_types: dict) -> dict: + request = R2RAnalyticsRequest( + filter_criteria=filter_criteria, analysis_types=analysis_types + ) + return self._make_request( + "GET", "analytics", json=json.loads(request.json()) + ) + + def users_overview( + self, user_ids: Optional[list[uuid.UUID]] = None + ) -> dict: + request = R2RUsersOverviewRequest(user_ids=user_ids) + return self._make_request( + "GET", "users_overview", json=json.loads(request.json()) + ) + + def documents_overview( + self, + document_ids: Optional[list[str]] = None, + user_ids: Optional[list[str]] = None, + ) -> dict: + request = R2RDocumentsOverviewRequest( + document_ids=( + [uuid.UUID(did) for did in document_ids] + if document_ids + else None + ), + user_ids=( + [uuid.UUID(uid) for uid in user_ids] if user_ids else None + ), + ) + return self._make_request( + "GET", "documents_overview", json=json.loads(request.json()) + ) + + def document_chunks(self, document_id: str) -> dict: + request = R2RDocumentChunksRequest(document_id=document_id) + return self._make_request( + "GET", "document_chunks", json=json.loads(request.json()) + ) + + def inspect_knowledge_graph(self, limit: int = 100) -> str: + request = R2RPrintRelationshipsRequest(limit=limit) + return self._make_request( + "POST", "inspect_knowledge_graph", json=json.loads(request.json()) + ) + + +if __name__ == "__main__": + client = R2RClient(base_url="http://localhost:8000") + fire.Fire(client) diff --git a/R2R/r2r/main/api/requests.py b/R2R/r2r/main/api/requests.py new file mode 100755 index 00000000..5c63ab82 --- /dev/null +++ b/R2R/r2r/main/api/requests.py @@ -0,0 +1,79 @@ +import uuid +from typing import Optional, Union + +from pydantic import BaseModel + +from r2r.base import AnalysisTypes, FilterCriteria + + +class R2RUpdatePromptRequest(BaseModel): + name: str + template: Optional[str] = None + input_types: Optional[dict[str, str]] = {} + + +class R2RIngestFilesRequest(BaseModel): + document_ids: Optional[list[uuid.UUID]] = None + metadatas: Optional[list[dict]] = None + versions: Optional[list[str]] = None + + +class R2RUpdateFilesRequest(BaseModel): + metadatas: Optional[list[dict]] = None + document_ids: Optional[list[uuid.UUID]] = None + + +class R2RSearchRequest(BaseModel): + query: str + vector_search_settings: Optional[dict] = None + kg_search_settings: Optional[dict] = None + + +class R2RRAGRequest(BaseModel): + query: str + vector_search_settings: Optional[dict] = None + kg_search_settings: Optional[dict] = None + rag_generation_config: Optional[dict] = None + + +class R2REvalRequest(BaseModel): + query: str + context: str + completion: str + + +class R2RDeleteRequest(BaseModel): + keys: list[str] + values: list[Union[bool, int, str]] + + +class R2RAnalyticsRequest(BaseModel): + filter_criteria: FilterCriteria + analysis_types: AnalysisTypes + + +class R2RUsersOverviewRequest(BaseModel): + user_ids: Optional[list[uuid.UUID]] + + +class R2RDocumentsOverviewRequest(BaseModel): + document_ids: Optional[list[uuid.UUID]] + user_ids: Optional[list[uuid.UUID]] + + +class R2RDocumentChunksRequest(BaseModel): + document_id: uuid.UUID + + +class R2RLogsRequest(BaseModel): + log_type_filter: Optional[str] = (None,) + max_runs_requested: int = 100 + + +class R2RPrintRelationshipsRequest(BaseModel): + limit: int = 100 + + +class R2RExtractionRequest(BaseModel): + entity_types: list[str] + relations: list[str] diff --git a/R2R/r2r/main/api/routes/__init__.py b/R2R/r2r/main/api/routes/__init__.py new file mode 100755 index 00000000..e69de29b --- /dev/null +++ b/R2R/r2r/main/api/routes/__init__.py diff --git a/R2R/r2r/main/api/routes/base_router.py b/R2R/r2r/main/api/routes/base_router.py new file mode 100755 index 00000000..d06a9935 --- /dev/null +++ b/R2R/r2r/main/api/routes/base_router.py @@ -0,0 +1,75 @@ +import functools +import logging + +from fastapi import APIRouter, HTTPException +from fastapi.responses import StreamingResponse + +from r2r.base import R2RException, manage_run + +logger = logging.getLogger(__name__) + + +class BaseRouter: + def __init__(self, engine): + self.engine = engine + self.router = APIRouter() + + def base_endpoint(self, func): + @functools.wraps(func) + async def wrapper(*args, **kwargs): + async with manage_run( + self.engine.run_manager, func.__name__ + ) as run_id: + try: + results = await func(*args, **kwargs) + if isinstance(results, StreamingResponse): + return results + + return {"results": results} + except R2RException as re: + raise HTTPException( + status_code=re.status_code, + detail={ + "message": re.message, + "error_type": type(re).__name__, + }, + ) + except Exception as e: + # Get the pipeline name based on the function name + pipeline_name = f"{func.__name__.split('_')[0]}_pipeline" + + # Safely get the pipeline object and its type + pipeline = getattr( + self.engine.pipelines, pipeline_name, None + ) + pipeline_type = getattr( + pipeline, "pipeline_type", "unknown" + ) + + await self.engine.logging_connection.log( + log_id=run_id, + key="pipeline_type", + value=pipeline_type, + is_info_log=True, + ) + await self.engine.logging_connection.log( + log_id=run_id, + key="error", + value=str(e), + is_info_log=False, + ) + logger.error(f"{func.__name__}() - \n\n{str(e)})") + raise HTTPException( + status_code=500, + detail={ + "message": f"An error occurred during {func.__name__}", + "error": str(e), + "error_type": type(e).__name__, + }, + ) from e + + return wrapper + + @classmethod + def build_router(cls, engine): + return cls(engine).router diff --git a/R2R/r2r/main/api/routes/ingestion.py b/R2R/r2r/main/api/routes/ingestion.py new file mode 100755 index 00000000..be583602 --- /dev/null +++ b/R2R/r2r/main/api/routes/ingestion.py @@ -0,0 +1,42 @@ +from fastapi import Depends, File, UploadFile + +from ...engine import R2REngine +from ...services.ingestion_service import IngestionService +from ..requests import R2RIngestFilesRequest, R2RUpdateFilesRequest +from .base_router import BaseRouter + + +class IngestionRouter(BaseRouter): + def __init__(self, engine: R2REngine): + super().__init__(engine) + self.setup_routes() + + def setup_routes(self): + @self.router.post("/ingest_files") + @self.base_endpoint + async def ingest_files_app( + files: list[UploadFile] = File(...), + request: R2RIngestFilesRequest = Depends( + IngestionService.parse_ingest_files_form_data + ), + ): + return await self.engine.aingest_files( + files=files, + metadatas=request.metadatas, + document_ids=request.document_ids, + versions=request.versions, + ) + + @self.router.post("/update_files") + @self.base_endpoint + async def update_files_app( + files: list[UploadFile] = File(...), + request: R2RUpdateFilesRequest = Depends( + IngestionService.parse_update_files_form_data + ), + ): + return await self.engine.aupdate_files( + files=files, + metadatas=request.metadatas, + document_ids=request.document_ids, + ) diff --git a/R2R/r2r/main/api/routes/management.py b/R2R/r2r/main/api/routes/management.py new file mode 100755 index 00000000..921fb534 --- /dev/null +++ b/R2R/r2r/main/api/routes/management.py @@ -0,0 +1,101 @@ +from ...engine import R2REngine +from ..requests import ( + R2RAnalyticsRequest, + R2RDeleteRequest, + R2RDocumentChunksRequest, + R2RDocumentsOverviewRequest, + R2RLogsRequest, + R2RPrintRelationshipsRequest, + R2RUpdatePromptRequest, + R2RUsersOverviewRequest, +) +from .base_router import BaseRouter + + +class ManagementRouter(BaseRouter): + def __init__(self, engine: R2REngine): + super().__init__(engine) + self.setup_routes() + + def setup_routes(self): + @self.router.get("/health") + async def health_check(): + return {"response": "ok"} + + @self.router.post("/update_prompt") + @self.base_endpoint + async def update_prompt_app(request: R2RUpdatePromptRequest): + return await self.engine.aupdate_prompt( + request.name, request.template, request.input_types + ) + + @self.router.post("/logs") + @self.router.get("/logs") + @self.base_endpoint + async def get_logs_app(request: R2RLogsRequest): + return await self.engine.alogs( + log_type_filter=request.log_type_filter, + max_runs_requested=request.max_runs_requested, + ) + + @self.router.post("/analytics") + @self.router.get("/analytics") + @self.base_endpoint + async def get_analytics_app(request: R2RAnalyticsRequest): + return await self.engine.aanalytics( + filter_criteria=request.filter_criteria, + analysis_types=request.analysis_types, + ) + + @self.router.post("/users_overview") + @self.router.get("/users_overview") + @self.base_endpoint + async def get_users_overview_app(request: R2RUsersOverviewRequest): + return await self.engine.ausers_overview(user_ids=request.user_ids) + + @self.router.delete("/delete") + @self.base_endpoint + async def delete_app(request: R2RDeleteRequest): + return await self.engine.adelete( + keys=request.keys, values=request.values + ) + + @self.router.post("/documents_overview") + @self.router.get("/documents_overview") + @self.base_endpoint + async def get_documents_overview_app( + request: R2RDocumentsOverviewRequest, + ): + return await self.engine.adocuments_overview( + document_ids=request.document_ids, user_ids=request.user_ids + ) + + @self.router.post("/document_chunks") + @self.router.get("/document_chunks") + @self.base_endpoint + async def get_document_chunks_app(request: R2RDocumentChunksRequest): + return await self.engine.adocument_chunks(request.document_id) + + @self.router.post("/inspect_knowledge_graph") + @self.router.get("/inspect_knowledge_graph") + @self.base_endpoint + async def inspect_knowledge_graph( + request: R2RPrintRelationshipsRequest, + ): + return await self.engine.inspect_knowledge_graph( + limit=request.limit + ) + + @self.router.get("/app_settings") + @self.base_endpoint + async def get_app_settings_app(): + return await self.engine.aapp_settings() + + @self.router.get("/openapi_spec") + @self.base_endpoint + def get_openapi_spec_app(): + return self.engine.openapi_spec() + + +def create_management_router(engine: R2REngine): + return ManagementRouter(engine).router diff --git a/R2R/r2r/main/api/routes/retrieval.py b/R2R/r2r/main/api/routes/retrieval.py new file mode 100755 index 00000000..b2d352aa --- /dev/null +++ b/R2R/r2r/main/api/routes/retrieval.py @@ -0,0 +1,91 @@ +from fastapi.responses import StreamingResponse + +from r2r.base import GenerationConfig, KGSearchSettings, VectorSearchSettings + +from ...engine import R2REngine +from ..requests import R2REvalRequest, R2RRAGRequest, R2RSearchRequest +from .base_router import BaseRouter + + +class RetrievalRouter(BaseRouter): + def __init__(self, engine: R2REngine): + super().__init__(engine) + self.setup_routes() + + def setup_routes(self): + @self.router.post("/search") + @self.base_endpoint + async def search_app(request: R2RSearchRequest): + if "agent_generation_config" in request.kg_search_settings: + request.kg_search_settings["agent_generation_config"] = ( + GenerationConfig( + **request.kg_search_settings["agent_generation_config"] + or {} + ) + ) + + results = await self.engine.asearch( + query=request.query, + vector_search_settings=VectorSearchSettings( + **(request.vector_search_settings or {}) + ), + kg_search_settings=KGSearchSettings( + **(request.kg_search_settings or {}) + ), + ) + return results + + @self.router.post("/rag") + @self.base_endpoint + async def rag_app(request: R2RRAGRequest): + if "agent_generation_config" in request.kg_search_settings: + request.kg_search_settings["agent_generation_config"] = ( + GenerationConfig( + **( + request.kg_search_settings[ + "agent_generation_config" + ] + or {} + ) + ) + ) + response = await self.engine.arag( + query=request.query, + vector_search_settings=VectorSearchSettings( + **(request.vector_search_settings or {}) + ), + kg_search_settings=KGSearchSettings( + **(request.kg_search_settings or {}) + ), + rag_generation_config=GenerationConfig( + **(request.rag_generation_config or {}) + ), + ) + if ( + request.rag_generation_config + and request.rag_generation_config.get("stream", False) + ): + + async def stream_generator(): + async for chunk in response: + yield chunk + + return StreamingResponse( + stream_generator(), media_type="application/json" + ) + else: + return response + + @self.router.post("/evaluate") + @self.base_endpoint + async def evaluate_app(request: R2REvalRequest): + results = await self.engine.aevaluate( + query=request.query, + context=request.context, + completion=request.completion, + ) + return results + + +def create_retrieval_router(engine: R2REngine): + return RetrievalRouter(engine).router diff --git a/R2R/r2r/main/app.py b/R2R/r2r/main/app.py new file mode 100755 index 00000000..981445e4 --- /dev/null +++ b/R2R/r2r/main/app.py @@ -0,0 +1,53 @@ +from fastapi import FastAPI + +from .engine import R2REngine + + +class R2RApp: + def __init__(self, engine: R2REngine): + self.engine = engine + self._setup_routes() + self._apply_cors() + + async def openapi_spec(self, *args, **kwargs): + from fastapi.openapi.utils import get_openapi + + return get_openapi( + title="R2R Application API", + version="1.0.0", + routes=self.app.routes, + ) + + def _setup_routes(self): + from .api.routes import ingestion, management, retrieval + + self.app = FastAPI() + + # Create routers with the engine + ingestion_router = ingestion.IngestionRouter.build_router(self.engine) + management_router = management.ManagementRouter.build_router( + self.engine + ) + retrieval_router = retrieval.RetrievalRouter.build_router(self.engine) + + # Include routers in the app + self.app.include_router(ingestion_router, prefix="/v1") + self.app.include_router(management_router, prefix="/v1") + self.app.include_router(retrieval_router, prefix="/v1") + + def _apply_cors(self): + from fastapi.middleware.cors import CORSMiddleware + + origins = ["*", "http://localhost:3000", "http://localhost:8000"] + self.app.add_middleware( + CORSMiddleware, + allow_origins=origins, + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], + ) + + def serve(self, host: str = "0.0.0.0", port: int = 8000): + import uvicorn + + uvicorn.run(self.app, host=host, port=port) diff --git a/R2R/r2r/main/app_entry.py b/R2R/r2r/main/app_entry.py new file mode 100755 index 00000000..29b705d7 --- /dev/null +++ b/R2R/r2r/main/app_entry.py @@ -0,0 +1,84 @@ +import logging +import os +from enum import Enum +from typing import Optional + +from fastapi import FastAPI + +from r2r import R2RBuilder, R2RConfig +from r2r.main.execution import R2RExecutionWrapper + +logger = logging.getLogger(__name__) +current_file_path = os.path.dirname(__file__) +configs_path = os.path.join(current_file_path, "..", "..", "..") + + +class PipelineType(Enum): + QNA = "qna" + WEB = "web" + HYDE = "hyde" + + +def r2r_app( + config_name: Optional[str] = "default", + config_path: Optional[str] = None, + client_mode: bool = False, + base_url: Optional[str] = None, + pipeline_type: PipelineType = PipelineType.QNA, +) -> FastAPI: + if pipeline_type != PipelineType.QNA: + raise ValueError("Only QNA pipeline is supported in quickstart.") + if config_path and config_name: + raise ValueError("Cannot specify both config and config_name") + + if config_path: + config = R2RConfig.from_json(config_path) + else: + config_name = os.getenv("CONFIG_NAME") or config_name + if config_name not in R2RBuilder.CONFIG_OPTIONS: + raise ValueError(f"Invalid config name: {config_name}") + config = R2RConfig.from_json(R2RBuilder.CONFIG_OPTIONS[config_name]) + + if ( + config.embedding.provider == "openai" + and "OPENAI_API_KEY" not in os.environ + ): + raise ValueError( + "Must set OPENAI_API_KEY in order to initialize OpenAIEmbeddingProvider." + ) + + wrapper = R2RExecutionWrapper( + config_name=config_name, + config_path=config_path, + client_mode=client_mode, + base_url=base_url, + ) + + return wrapper.get_app() + + +logging.basicConfig(level=logging.INFO) + +config_name = os.getenv("CONFIG_NAME", None) +config_path = os.getenv("CONFIG_PATH", None) +if not config_path and not config_name: + config_name = "default" +client_mode = os.getenv("CLIENT_MODE", "false").lower() == "true" +base_url = os.getenv("BASE_URL") +host = os.getenv("HOST", "0.0.0.0") +port = int(os.getenv("PORT", "8000")) +pipeline_type = os.getenv("PIPELINE_TYPE", "qna") + +logger.info(f"Environment CONFIG_NAME: {config_name}") +logger.info(f"Environment CONFIG_PATH: {config_path}") +logger.info(f"Environment CLIENT_MODE: {client_mode}") +logger.info(f"Environment BASE_URL: {base_url}") +logger.info(f"Environment PIPELINE_TYPE: {pipeline_type}") + +app = r2r_app( + config_name=config_name, + config_path=config_path, + client_mode=client_mode, + base_url=base_url, + pipeline_type=PipelineType(pipeline_type), +) diff --git a/R2R/r2r/main/assembly/__init__.py b/R2R/r2r/main/assembly/__init__.py new file mode 100755 index 00000000..e69de29b --- /dev/null +++ b/R2R/r2r/main/assembly/__init__.py diff --git a/R2R/r2r/main/assembly/builder.py b/R2R/r2r/main/assembly/builder.py new file mode 100755 index 00000000..863fc6d0 --- /dev/null +++ b/R2R/r2r/main/assembly/builder.py @@ -0,0 +1,207 @@ +import os +from typing import Optional, Type + +from r2r.base import ( + AsyncPipe, + EmbeddingProvider, + EvalProvider, + LLMProvider, + PromptProvider, + VectorDBProvider, +) +from r2r.pipelines import ( + EvalPipeline, + IngestionPipeline, + RAGPipeline, + SearchPipeline, +) + +from ..app import R2RApp +from ..engine import R2REngine +from ..r2r import R2R +from .config import R2RConfig +from .factory import R2RPipeFactory, R2RPipelineFactory, R2RProviderFactory + + +class R2RBuilder: + current_file_path = os.path.dirname(__file__) + config_root = os.path.join( + current_file_path, "..", "..", "examples", "configs" + ) + CONFIG_OPTIONS = { + "default": None, + "local_ollama": os.path.join(config_root, "local_ollama.json"), + "local_ollama_rerank": os.path.join( + config_root, "local_ollama_rerank.json" + ), + "neo4j_kg": os.path.join(config_root, "neo4j_kg.json"), + "local_neo4j_kg": os.path.join(config_root, "local_neo4j_kg.json"), + "postgres_logging": os.path.join(config_root, "postgres_logging.json"), + } + + @staticmethod + def _get_config(config_name): + if config_name is None: + return R2RConfig.from_json() + if config_name in R2RBuilder.CONFIG_OPTIONS: + return R2RConfig.from_json(R2RBuilder.CONFIG_OPTIONS[config_name]) + raise ValueError(f"Invalid config name: {config_name}") + + def __init__( + self, + config: Optional[R2RConfig] = None, + from_config: Optional[str] = None, + ): + if config and from_config: + raise ValueError("Cannot specify both config and config_name") + self.config = config or R2RBuilder._get_config(from_config) + self.r2r_app_override: Optional[Type[R2REngine]] = None + self.provider_factory_override: Optional[Type[R2RProviderFactory]] = ( + None + ) + self.pipe_factory_override: Optional[R2RPipeFactory] = None + self.pipeline_factory_override: Optional[R2RPipelineFactory] = None + self.vector_db_provider_override: Optional[VectorDBProvider] = None + self.embedding_provider_override: Optional[EmbeddingProvider] = None + self.eval_provider_override: Optional[EvalProvider] = None + self.llm_provider_override: Optional[LLMProvider] = None + self.prompt_provider_override: Optional[PromptProvider] = None + self.parsing_pipe_override: Optional[AsyncPipe] = None + self.embedding_pipe_override: Optional[AsyncPipe] = None + self.vector_storage_pipe_override: Optional[AsyncPipe] = None + self.vector_search_pipe_override: Optional[AsyncPipe] = None + self.rag_pipe_override: Optional[AsyncPipe] = None + self.streaming_rag_pipe_override: Optional[AsyncPipe] = None + self.eval_pipe_override: Optional[AsyncPipe] = None + self.ingestion_pipeline: Optional[IngestionPipeline] = None + self.search_pipeline: Optional[SearchPipeline] = None + self.rag_pipeline: Optional[RAGPipeline] = None + self.streaming_rag_pipeline: Optional[RAGPipeline] = None + self.eval_pipeline: Optional[EvalPipeline] = None + + def with_app(self, app: Type[R2REngine]): + self.r2r_app_override = app + return self + + def with_provider_factory(self, factory: Type[R2RProviderFactory]): + self.provider_factory_override = factory + return self + + def with_pipe_factory(self, factory: R2RPipeFactory): + self.pipe_factory_override = factory + return self + + def with_pipeline_factory(self, factory: R2RPipelineFactory): + self.pipeline_factory_override = factory + return self + + def with_vector_db_provider(self, provider: VectorDBProvider): + self.vector_db_provider_override = provider + return self + + def with_embedding_provider(self, provider: EmbeddingProvider): + self.embedding_provider_override = provider + return self + + def with_eval_provider(self, provider: EvalProvider): + self.eval_provider_override = provider + return self + + def with_llm_provider(self, provider: LLMProvider): + self.llm_provider_override = provider + return self + + def with_prompt_provider(self, provider: PromptProvider): + self.prompt_provider_override = provider + return self + + def with_parsing_pipe(self, pipe: AsyncPipe): + self.parsing_pipe_override = pipe + return self + + def with_embedding_pipe(self, pipe: AsyncPipe): + self.embedding_pipe_override = pipe + return self + + def with_vector_storage_pipe(self, pipe: AsyncPipe): + self.vector_storage_pipe_override = pipe + return self + + def with_vector_search_pipe(self, pipe: AsyncPipe): + self.vector_search_pipe_override = pipe + return self + + def with_rag_pipe(self, pipe: AsyncPipe): + self.rag_pipe_override = pipe + return self + + def with_streaming_rag_pipe(self, pipe: AsyncPipe): + self.streaming_rag_pipe_override = pipe + return self + + def with_eval_pipe(self, pipe: AsyncPipe): + self.eval_pipe_override = pipe + return self + + def with_ingestion_pipeline(self, pipeline: IngestionPipeline): + self.ingestion_pipeline = pipeline + return self + + def with_vector_search_pipeline(self, pipeline: SearchPipeline): + self.search_pipeline = pipeline + return self + + def with_rag_pipeline(self, pipeline: RAGPipeline): + self.rag_pipeline = pipeline + return self + + def with_streaming_rag_pipeline(self, pipeline: RAGPipeline): + self.streaming_rag_pipeline = pipeline + return self + + def with_eval_pipeline(self, pipeline: EvalPipeline): + self.eval_pipeline = pipeline + return self + + def build(self, *args, **kwargs) -> R2R: + provider_factory = self.provider_factory_override or R2RProviderFactory + pipe_factory = self.pipe_factory_override or R2RPipeFactory + pipeline_factory = self.pipeline_factory_override or R2RPipelineFactory + + providers = provider_factory(self.config).create_providers( + vector_db_provider_override=self.vector_db_provider_override, + embedding_provider_override=self.embedding_provider_override, + eval_provider_override=self.eval_provider_override, + llm_provider_override=self.llm_provider_override, + prompt_provider_override=self.prompt_provider_override, + *args, + **kwargs, + ) + + pipes = pipe_factory(self.config, providers).create_pipes( + parsing_pipe_override=self.parsing_pipe_override, + embedding_pipe_override=self.embedding_pipe_override, + vector_storage_pipe_override=self.vector_storage_pipe_override, + vector_search_pipe_override=self.vector_search_pipe_override, + rag_pipe_override=self.rag_pipe_override, + streaming_rag_pipe_override=self.streaming_rag_pipe_override, + eval_pipe_override=self.eval_pipe_override, + *args, + **kwargs, + ) + + pipelines = pipeline_factory(self.config, pipes).create_pipelines( + ingestion_pipeline=self.ingestion_pipeline, + search_pipeline=self.search_pipeline, + rag_pipeline=self.rag_pipeline, + streaming_rag_pipeline=self.streaming_rag_pipeline, + eval_pipeline=self.eval_pipeline, + *args, + **kwargs, + ) + + engine = (self.r2r_app_override or R2REngine)( + self.config, providers, pipelines + ) + r2r_app = R2RApp(engine) + return R2R(engine=engine, app=r2r_app) diff --git a/R2R/r2r/main/assembly/config.py b/R2R/r2r/main/assembly/config.py new file mode 100755 index 00000000..d52c4561 --- /dev/null +++ b/R2R/r2r/main/assembly/config.py @@ -0,0 +1,167 @@ +import json +import logging +import os +from enum import Enum +from typing import Any + +from ...base.abstractions.document import DocumentType +from ...base.abstractions.llm import GenerationConfig +from ...base.logging.kv_logger import LoggingConfig +from ...base.providers.embedding_provider import EmbeddingConfig +from ...base.providers.eval_provider import EvalConfig +from ...base.providers.kg_provider import KGConfig +from ...base.providers.llm_provider import LLMConfig +from ...base.providers.prompt_provider import PromptConfig +from ...base.providers.vector_db_provider import ProviderConfig, VectorDBConfig + +logger = logging.getLogger(__name__) + + +class R2RConfig: + REQUIRED_KEYS: dict[str, list] = { + "app": ["max_file_size_in_mb"], + "embedding": [ + "provider", + "base_model", + "base_dimension", + "batch_size", + "text_splitter", + ], + "eval": ["llm"], + "kg": [ + "provider", + "batch_size", + "kg_extraction_config", + "text_splitter", + ], + "ingestion": ["excluded_parsers"], + "completions": ["provider"], + "logging": ["provider", "log_table"], + "prompt": ["provider"], + "vector_database": ["provider"], + } + app: dict[str, Any] + embedding: EmbeddingConfig + completions: LLMConfig + logging: LoggingConfig + prompt: PromptConfig + vector_database: VectorDBConfig + + def __init__(self, config_data: dict[str, Any]): + # Load the default configuration + default_config = self.load_default_config() + + # Override the default configuration with the passed configuration + for key in config_data: + if key in default_config: + default_config[key].update(config_data[key]) + else: + default_config[key] = config_data[key] + + # Validate and set the configuration + for section, keys in R2RConfig.REQUIRED_KEYS.items(): + # Check the keys when provider is set + # TODO - Clean up robust null checks + if "provider" in default_config[section] and ( + default_config[section]["provider"] is not None + and default_config[section]["provider"] != "None" + and default_config[section]["provider"] != "null" + ): + self._validate_config_section(default_config, section, keys) + setattr(self, section, default_config[section]) + + self.app = self.app # for type hinting + self.ingestion = self.ingestion # for type hinting + self.ingestion["excluded_parsers"] = [ + DocumentType(k) for k in self.ingestion["excluded_parsers"] + ] + # override GenerationConfig defaults + GenerationConfig.set_default( + **self.completions.get("generation_config", {}) + ) + self.embedding = EmbeddingConfig.create(**self.embedding) + self.kg = KGConfig.create(**self.kg) + eval_llm = self.eval.pop("llm", None) + self.eval = EvalConfig.create( + **self.eval, llm=LLMConfig.create(**eval_llm) if eval_llm else None + ) + self.completions = LLMConfig.create(**self.completions) + self.logging = LoggingConfig.create(**self.logging) + self.prompt = PromptConfig.create(**self.prompt) + self.vector_database = VectorDBConfig.create(**self.vector_database) + + def _validate_config_section( + self, config_data: dict[str, Any], section: str, keys: list + ): + if section not in config_data: + raise ValueError(f"Missing '{section}' section in config") + if not all(key in config_data[section] for key in keys): + raise ValueError(f"Missing required keys in '{section}' config") + + @classmethod + def from_json(cls, config_path: str = None) -> "R2RConfig": + if config_path is None: + # Get the root directory of the project + file_dir = os.path.dirname(os.path.abspath(__file__)) + config_path = os.path.join( + file_dir, "..", "..", "..", "config.json" + ) + + # Load configuration from JSON file + with open(config_path) as f: + config_data = json.load(f) + + return cls(config_data) + + def to_json(self): + config_data = { + section: self._serialize_config(getattr(self, section)) + for section in R2RConfig.REQUIRED_KEYS.keys() + } + return json.dumps(config_data) + + def save_to_redis(self, redis_client: Any, key: str): + redis_client.set(f"R2RConfig:{key}", self.to_json()) + + @classmethod + def load_from_redis(cls, redis_client: Any, key: str) -> "R2RConfig": + config_data = redis_client.get(f"R2RConfig:{key}") + if config_data is None: + raise ValueError( + f"Configuration not found in Redis with key '{key}'" + ) + config_data = json.loads(config_data) + # config_data["ingestion"]["selected_parsers"] = { + # DocumentType(k): v + # for k, v in config_data["ingestion"]["selected_parsers"].items() + # } + return cls(config_data) + + @classmethod + def load_default_config(cls) -> dict: + # Get the root directory of the project + file_dir = os.path.dirname(os.path.abspath(__file__)) + default_config_path = os.path.join( + file_dir, "..", "..", "..", "config.json" + ) + # Load default configuration from JSON file + with open(default_config_path) as f: + return json.load(f) + + @staticmethod + def _serialize_config(config_section: Any) -> dict: + # TODO - Make this approach cleaner + if isinstance(config_section, ProviderConfig): + config_section = config_section.dict() + filtered_result = {} + for k, v in config_section.items(): + if isinstance(k, Enum): + k = k.value + if isinstance(v, dict): + formatted_v = { + k2.value if isinstance(k2, Enum) else k2: v2 + for k2, v2 in v.items() + } + v = formatted_v + filtered_result[k] = v + return filtered_result diff --git a/R2R/r2r/main/assembly/factory.py b/R2R/r2r/main/assembly/factory.py new file mode 100755 index 00000000..4e147337 --- /dev/null +++ b/R2R/r2r/main/assembly/factory.py @@ -0,0 +1,484 @@ +import logging +import os +from typing import Any, Optional + +from r2r.base import ( + AsyncPipe, + EmbeddingConfig, + EmbeddingProvider, + EvalProvider, + KGProvider, + KVLoggingSingleton, + LLMConfig, + LLMProvider, + PromptProvider, + VectorDBConfig, + VectorDBProvider, +) +from r2r.pipelines import ( + EvalPipeline, + IngestionPipeline, + RAGPipeline, + SearchPipeline, +) + +from ..abstractions import R2RPipelines, R2RPipes, R2RProviders +from .config import R2RConfig + +logger = logging.getLogger(__name__) + + +class R2RProviderFactory: + def __init__(self, config: R2RConfig): + self.config = config + + def create_vector_db_provider( + self, vector_db_config: VectorDBConfig, *args, **kwargs + ) -> VectorDBProvider: + vector_db_provider: Optional[VectorDBProvider] = None + if vector_db_config.provider == "pgvector": + from r2r.providers.vector_dbs import PGVectorDB + + vector_db_provider = PGVectorDB(vector_db_config) + else: + raise ValueError( + f"Vector database provider {vector_db_config.provider} not supported" + ) + if not vector_db_provider: + raise ValueError("Vector database provider not found") + + if not self.config.embedding.base_dimension: + raise ValueError("Search dimension not found in embedding config") + + vector_db_provider.initialize_collection( + self.config.embedding.base_dimension + ) + return vector_db_provider + + def create_embedding_provider( + self, embedding: EmbeddingConfig, *args, **kwargs + ) -> EmbeddingProvider: + embedding_provider: Optional[EmbeddingProvider] = None + + if embedding.provider == "openai": + if not os.getenv("OPENAI_API_KEY"): + raise ValueError( + "Must set OPENAI_API_KEY in order to initialize OpenAIEmbeddingProvider." + ) + from r2r.providers.embeddings import OpenAIEmbeddingProvider + + embedding_provider = OpenAIEmbeddingProvider(embedding) + elif embedding.provider == "ollama": + from r2r.providers.embeddings import OllamaEmbeddingProvider + + embedding_provider = OllamaEmbeddingProvider(embedding) + + elif embedding.provider == "sentence-transformers": + from r2r.providers.embeddings import ( + SentenceTransformerEmbeddingProvider, + ) + + embedding_provider = SentenceTransformerEmbeddingProvider( + embedding + ) + elif embedding is None: + embedding_provider = None + else: + raise ValueError( + f"Embedding provider {embedding.provider} not supported" + ) + + return embedding_provider + + def create_eval_provider( + self, eval_config, prompt_provider, *args, **kwargs + ) -> Optional[EvalProvider]: + if eval_config.provider == "local": + from r2r.providers.eval import LLMEvalProvider + + llm_provider = self.create_llm_provider(eval_config.llm) + eval_provider = LLMEvalProvider( + eval_config, + llm_provider=llm_provider, + prompt_provider=prompt_provider, + ) + elif eval_config.provider is None: + eval_provider = None + else: + raise ValueError( + f"Eval provider {eval_config.provider} not supported." + ) + + return eval_provider + + def create_llm_provider( + self, llm_config: LLMConfig, *args, **kwargs + ) -> LLMProvider: + llm_provider: Optional[LLMProvider] = None + if llm_config.provider == "openai": + from r2r.providers.llms import OpenAILLM + + llm_provider = OpenAILLM(llm_config) + elif llm_config.provider == "litellm": + from r2r.providers.llms import LiteLLM + + llm_provider = LiteLLM(llm_config) + else: + raise ValueError( + f"Language model provider {llm_config.provider} not supported" + ) + if not llm_provider: + raise ValueError("Language model provider not found") + return llm_provider + + def create_prompt_provider( + self, prompt_config, *args, **kwargs + ) -> PromptProvider: + prompt_provider = None + if prompt_config.provider == "local": + from r2r.prompts import R2RPromptProvider + + prompt_provider = R2RPromptProvider() + else: + raise ValueError( + f"Prompt provider {prompt_config.provider} not supported" + ) + return prompt_provider + + def create_kg_provider(self, kg_config, *args, **kwargs): + if kg_config.provider == "neo4j": + from r2r.providers.kg import Neo4jKGProvider + + return Neo4jKGProvider(kg_config) + elif kg_config.provider is None: + return None + else: + raise ValueError( + f"KG provider {kg_config.provider} not supported." + ) + + def create_providers( + self, + vector_db_provider_override: Optional[VectorDBProvider] = None, + embedding_provider_override: Optional[EmbeddingProvider] = None, + eval_provider_override: Optional[EvalProvider] = None, + llm_provider_override: Optional[LLMProvider] = None, + prompt_provider_override: Optional[PromptProvider] = None, + kg_provider_override: Optional[KGProvider] = None, + *args, + **kwargs, + ) -> R2RProviders: + prompt_provider = ( + prompt_provider_override + or self.create_prompt_provider(self.config.prompt, *args, **kwargs) + ) + return R2RProviders( + vector_db=vector_db_provider_override + or self.create_vector_db_provider( + self.config.vector_database, *args, **kwargs + ), + embedding=embedding_provider_override + or self.create_embedding_provider( + self.config.embedding, *args, **kwargs + ), + eval=eval_provider_override + or self.create_eval_provider( + self.config.eval, + prompt_provider=prompt_provider, + *args, + **kwargs, + ), + llm=llm_provider_override + or self.create_llm_provider( + self.config.completions, *args, **kwargs + ), + prompt=prompt_provider_override + or self.create_prompt_provider( + self.config.prompt, *args, **kwargs + ), + kg=kg_provider_override + or self.create_kg_provider(self.config.kg, *args, **kwargs), + ) + + +class R2RPipeFactory: + def __init__(self, config: R2RConfig, providers: R2RProviders): + self.config = config + self.providers = providers + + def create_pipes( + self, + parsing_pipe_override: Optional[AsyncPipe] = None, + embedding_pipe_override: Optional[AsyncPipe] = None, + kg_pipe_override: Optional[AsyncPipe] = None, + kg_storage_pipe_override: Optional[AsyncPipe] = None, + kg_agent_pipe_override: Optional[AsyncPipe] = None, + vector_storage_pipe_override: Optional[AsyncPipe] = None, + vector_search_pipe_override: Optional[AsyncPipe] = None, + rag_pipe_override: Optional[AsyncPipe] = None, + streaming_rag_pipe_override: Optional[AsyncPipe] = None, + eval_pipe_override: Optional[AsyncPipe] = None, + *args, + **kwargs, + ) -> R2RPipes: + return R2RPipes( + parsing_pipe=parsing_pipe_override + or self.create_parsing_pipe( + self.config.ingestion.get("excluded_parsers"), *args, **kwargs + ), + embedding_pipe=embedding_pipe_override + or self.create_embedding_pipe(*args, **kwargs), + kg_pipe=kg_pipe_override or self.create_kg_pipe(*args, **kwargs), + kg_storage_pipe=kg_storage_pipe_override + or self.create_kg_storage_pipe(*args, **kwargs), + kg_agent_search_pipe=kg_agent_pipe_override + or self.create_kg_agent_pipe(*args, **kwargs), + vector_storage_pipe=vector_storage_pipe_override + or self.create_vector_storage_pipe(*args, **kwargs), + vector_search_pipe=vector_search_pipe_override + or self.create_vector_search_pipe(*args, **kwargs), + rag_pipe=rag_pipe_override + or self.create_rag_pipe(*args, **kwargs), + streaming_rag_pipe=streaming_rag_pipe_override + or self.create_rag_pipe(stream=True, *args, **kwargs), + eval_pipe=eval_pipe_override + or self.create_eval_pipe(*args, **kwargs), + ) + + def create_parsing_pipe( + self, excluded_parsers: Optional[list] = None, *args, **kwargs + ) -> Any: + from r2r.pipes import ParsingPipe + + return ParsingPipe(excluded_parsers=excluded_parsers or []) + + def create_embedding_pipe(self, *args, **kwargs) -> Any: + if self.config.embedding.provider is None: + return None + + from r2r.base import RecursiveCharacterTextSplitter + from r2r.pipes import EmbeddingPipe + + text_splitter_config = self.config.embedding.extra_fields.get( + "text_splitter" + ) + if not text_splitter_config: + raise ValueError( + "Text splitter config not found in embedding config" + ) + + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=text_splitter_config["chunk_size"], + chunk_overlap=text_splitter_config["chunk_overlap"], + length_function=len, + is_separator_regex=False, + ) + return EmbeddingPipe( + embedding_provider=self.providers.embedding, + vector_db_provider=self.providers.vector_db, + text_splitter=text_splitter, + embedding_batch_size=self.config.embedding.batch_size, + ) + + def create_vector_storage_pipe(self, *args, **kwargs) -> Any: + if self.config.embedding.provider is None: + return None + + from r2r.pipes import VectorStoragePipe + + return VectorStoragePipe(vector_db_provider=self.providers.vector_db) + + def create_vector_search_pipe(self, *args, **kwargs) -> Any: + if self.config.embedding.provider is None: + return None + + from r2r.pipes import VectorSearchPipe + + return VectorSearchPipe( + vector_db_provider=self.providers.vector_db, + embedding_provider=self.providers.embedding, + ) + + def create_kg_pipe(self, *args, **kwargs) -> Any: + if self.config.kg.provider is None: + return None + + from r2r.base import RecursiveCharacterTextSplitter + from r2r.pipes import KGExtractionPipe + + text_splitter_config = self.config.kg.extra_fields.get("text_splitter") + if not text_splitter_config: + raise ValueError("Text splitter config not found in kg config.") + + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=text_splitter_config["chunk_size"], + chunk_overlap=text_splitter_config["chunk_overlap"], + length_function=len, + is_separator_regex=False, + ) + return KGExtractionPipe( + kg_provider=self.providers.kg, + llm_provider=self.providers.llm, + prompt_provider=self.providers.prompt, + vector_db_provider=self.providers.vector_db, + text_splitter=text_splitter, + kg_batch_size=self.config.kg.batch_size, + ) + + def create_kg_storage_pipe(self, *args, **kwargs) -> Any: + if self.config.kg.provider is None: + return None + + from r2r.pipes import KGStoragePipe + + return KGStoragePipe( + kg_provider=self.providers.kg, + embedding_provider=self.providers.embedding, + ) + + def create_kg_agent_pipe(self, *args, **kwargs) -> Any: + if self.config.kg.provider is None: + return None + + from r2r.pipes import KGAgentSearchPipe + + return KGAgentSearchPipe( + kg_provider=self.providers.kg, + llm_provider=self.providers.llm, + prompt_provider=self.providers.prompt, + ) + + def create_rag_pipe(self, stream: bool = False, *args, **kwargs) -> Any: + if stream: + from r2r.pipes import StreamingSearchRAGPipe + + return StreamingSearchRAGPipe( + llm_provider=self.providers.llm, + prompt_provider=self.providers.prompt, + ) + else: + from r2r.pipes import SearchRAGPipe + + return SearchRAGPipe( + llm_provider=self.providers.llm, + prompt_provider=self.providers.prompt, + ) + + def create_eval_pipe(self, *args, **kwargs) -> Any: + from r2r.pipes import EvalPipe + + return EvalPipe(eval_provider=self.providers.eval) + + +class R2RPipelineFactory: + def __init__(self, config: R2RConfig, pipes: R2RPipes): + self.config = config + self.pipes = pipes + + def create_ingestion_pipeline(self, *args, **kwargs) -> IngestionPipeline: + """factory method to create an ingestion pipeline.""" + ingestion_pipeline = IngestionPipeline() + + ingestion_pipeline.add_pipe( + pipe=self.pipes.parsing_pipe, parsing_pipe=True + ) + # Add embedding pipes if provider is set + if self.config.embedding.provider is not None: + ingestion_pipeline.add_pipe( + self.pipes.embedding_pipe, embedding_pipe=True + ) + ingestion_pipeline.add_pipe( + self.pipes.vector_storage_pipe, embedding_pipe=True + ) + # Add KG pipes if provider is set + if self.config.kg.provider is not None: + ingestion_pipeline.add_pipe(self.pipes.kg_pipe, kg_pipe=True) + ingestion_pipeline.add_pipe( + self.pipes.kg_storage_pipe, kg_pipe=True + ) + + return ingestion_pipeline + + def create_search_pipeline(self, *args, **kwargs) -> SearchPipeline: + """factory method to create an ingestion pipeline.""" + search_pipeline = SearchPipeline() + + # Add vector search pipes if embedding provider and vector provider is set + if ( + self.config.embedding.provider is not None + and self.config.vector_database.provider is not None + ): + search_pipeline.add_pipe( + self.pipes.vector_search_pipe, vector_search_pipe=True + ) + + # Add KG pipes if provider is set + if self.config.kg.provider is not None: + search_pipeline.add_pipe( + self.pipes.kg_agent_search_pipe, kg_pipe=True + ) + + return search_pipeline + + def create_rag_pipeline( + self, + search_pipeline: SearchPipeline, + stream: bool = False, + *args, + **kwargs, + ) -> RAGPipeline: + rag_pipe = ( + self.pipes.streaming_rag_pipe if stream else self.pipes.rag_pipe + ) + + rag_pipeline = RAGPipeline() + rag_pipeline.set_search_pipeline(search_pipeline) + rag_pipeline.add_pipe(rag_pipe) + return rag_pipeline + + def create_eval_pipeline(self, *args, **kwargs) -> EvalPipeline: + eval_pipeline = EvalPipeline() + eval_pipeline.add_pipe(self.pipes.eval_pipe) + return eval_pipeline + + def create_pipelines( + self, + ingestion_pipeline: Optional[IngestionPipeline] = None, + search_pipeline: Optional[SearchPipeline] = None, + rag_pipeline: Optional[RAGPipeline] = None, + streaming_rag_pipeline: Optional[RAGPipeline] = None, + eval_pipeline: Optional[EvalPipeline] = None, + *args, + **kwargs, + ) -> R2RPipelines: + try: + self.configure_logging() + except Exception as e: + logger.warn(f"Error configuring logging: {e}") + search_pipeline = search_pipeline or self.create_search_pipeline( + *args, **kwargs + ) + return R2RPipelines( + ingestion_pipeline=ingestion_pipeline + or self.create_ingestion_pipeline(*args, **kwargs), + search_pipeline=search_pipeline, + rag_pipeline=rag_pipeline + or self.create_rag_pipeline( + search_pipeline=search_pipeline, + stream=False, + *args, + **kwargs, + ), + streaming_rag_pipeline=streaming_rag_pipeline + or self.create_rag_pipeline( + search_pipeline=search_pipeline, + stream=True, + *args, + **kwargs, + ), + eval_pipeline=eval_pipeline + or self.create_eval_pipeline(*args, **kwargs), + ) + + def configure_logging(self): + KVLoggingSingleton.configure(self.config.logging) diff --git a/R2R/r2r/main/assembly/factory_extensions.py b/R2R/r2r/main/assembly/factory_extensions.py new file mode 100755 index 00000000..56e82ef7 --- /dev/null +++ b/R2R/r2r/main/assembly/factory_extensions.py @@ -0,0 +1,69 @@ +from r2r.main import R2RPipeFactory +from r2r.pipes.retrieval.multi_search import MultiSearchPipe +from r2r.pipes.retrieval.query_transform_pipe import QueryTransformPipe + + +class R2RPipeFactoryWithMultiSearch(R2RPipeFactory): + QUERY_GENERATION_TEMPLATE: dict = ( + { # TODO - Can we have stricter typing like so? `: {"template": str, "input_types": dict[str, str]} = {`` + "template": "### Instruction:\n\nGiven the following query that follows to write a double newline separated list of up to {num_outputs} queries meant to help answer the original query. \nDO NOT generate any single query which is likely to require information from multiple distinct documents, \nEACH single query will be used to carry out a cosine similarity semantic search over distinct indexed documents, such as varied medical documents. \nFOR EXAMPLE if asked `how do the key themes of Great Gatsby compare with 1984`, the two queries would be \n`What are the key themes of Great Gatsby?` and `What are the key themes of 1984?`.\nHere is the original user query to be transformed into answers:\n\n### Query:\n{message}\n\n### Response:\n", + "input_types": {"num_outputs": "int", "message": "str"}, + } + ) + + def create_vector_search_pipe(self, *args, **kwargs): + """ + A factory method to create a search pipe. + + Overrides include + task_prompt_name: str + multi_query_transform_pipe_override: QueryTransformPipe + multi_inner_search_pipe_override: SearchPipe + query_generation_template_override: {'template': str, 'input_types': dict[str, str]} + """ + multi_search_config = MultiSearchPipe.PipeConfig() + if kwargs.get("task_prompt_name") and kwargs.get( + "query_generation_template_override" + ): + raise ValueError( + "Cannot provide both `task_prompt_name` and `query_generation_template_override`" + ) + task_prompt_name = ( + kwargs.get("task_prompt_name") + or f"{multi_search_config.name}_task_prompt" + ) + if kwargs.get("query_generation_template_override"): + # Add a prompt for transforming the user query + template = kwargs.get("query_generation_template_override") + self.providers.prompt.add_prompt( + **( + kwargs.get("query_generation_template_override") + or self.QUERY_GENERATION_TEMPLATE + ), + ) + task_prompt_name = template["name"] + + # Initialize the new query transform pipe + query_transform_pipe = kwargs.get( + "multi_query_transform_pipe_override", None + ) or QueryTransformPipe( + llm_provider=self.providers.llm, + prompt_provider=self.providers.prompt, + config=QueryTransformPipe.QueryTransformConfig( + name=multi_search_config.name, + task_prompt=task_prompt_name, + ), + ) + # Create search pipe override and pipes + inner_search_pipe = kwargs.get( + "multi_inner_search_pipe_override", None + ) or super().create_vector_search_pipe(*args, **kwargs) + + # TODO - modify `create_..._pipe` to allow naming the pipe + inner_search_pipe.config.name = multi_search_config.name + + return MultiSearchPipe( + query_transform_pipe=query_transform_pipe, + inner_search_pipe=inner_search_pipe, + config=multi_search_config, + ) diff --git a/R2R/r2r/main/engine.py b/R2R/r2r/main/engine.py new file mode 100755 index 00000000..a73b932e --- /dev/null +++ b/R2R/r2r/main/engine.py @@ -0,0 +1,109 @@ +from typing import Optional + +from r2r.base import KVLoggingSingleton, RunManager +from r2r.base.abstractions.base import AsyncSyncMeta, syncable + +from .abstractions import R2RPipelines, R2RProviders +from .assembly.config import R2RConfig +from .services.ingestion_service import IngestionService +from .services.management_service import ManagementService +from .services.retrieval_service import RetrievalService + + +class R2REngine(metaclass=AsyncSyncMeta): + def __init__( + self, + config: R2RConfig, + providers: R2RProviders, + pipelines: R2RPipelines, + run_manager: Optional[RunManager] = None, + ): + logging_connection = KVLoggingSingleton() + run_manager = run_manager or RunManager(logging_connection) + + self.config = config + self.providers = providers + self.pipelines = pipelines + self.logging_connection = KVLoggingSingleton() + self.run_manager = run_manager + + self.ingestion_service = IngestionService( + config, providers, pipelines, run_manager, logging_connection + ) + self.retrieval_service = RetrievalService( + config, providers, pipelines, run_manager, logging_connection + ) + self.management_service = ManagementService( + config, providers, pipelines, run_manager, logging_connection + ) + + # Ingestion routes + @syncable + async def aingest_documents(self, *args, **kwargs): + return await self.ingestion_service.ingest_documents(*args, **kwargs) + + @syncable + async def aupdate_documents(self, *args, **kwargs): + return await self.ingestion_service.update_documents(*args, **kwargs) + + @syncable + async def aingest_files(self, *args, **kwargs): + return await self.ingestion_service.ingest_files(*args, **kwargs) + + @syncable + async def aupdate_files(self, *args, **kwargs): + return await self.ingestion_service.update_files(*args, **kwargs) + + # Retrieval routes + @syncable + async def asearch(self, *args, **kwargs): + return await self.retrieval_service.search(*args, **kwargs) + + @syncable + async def arag(self, *args, **kwargs): + return await self.retrieval_service.rag(*args, **kwargs) + + @syncable + async def aevaluate(self, *args, **kwargs): + return await self.retrieval_service.evaluate(*args, **kwargs) + + # Management routes + @syncable + async def aupdate_prompt(self, *args, **kwargs): + return await self.management_service.update_prompt(*args, **kwargs) + + @syncable + async def alogs(self, *args, **kwargs): + return await self.management_service.alogs(*args, **kwargs) + + @syncable + async def aanalytics(self, *args, **kwargs): + return await self.management_service.aanalytics(*args, **kwargs) + + @syncable + async def aapp_settings(self, *args, **kwargs): + return await self.management_service.aapp_settings(*args, **kwargs) + + @syncable + async def ausers_overview(self, *args, **kwargs): + return await self.management_service.ausers_overview(*args, **kwargs) + + @syncable + async def adelete(self, *args, **kwargs): + return await self.management_service.delete(*args, **kwargs) + + @syncable + async def adocuments_overview(self, *args, **kwargs): + return await self.management_service.adocuments_overview( + *args, **kwargs + ) + + @syncable + async def inspect_knowledge_graph(self, *args, **kwargs): + return await self.management_service.inspect_knowledge_graph( + *args, **kwargs + ) + + @syncable + async def adocument_chunks(self, *args, **kwargs): + return await self.management_service.document_chunks(*args, **kwargs) diff --git a/R2R/r2r/main/execution.py b/R2R/r2r/main/execution.py new file mode 100755 index 00000000..187a2eea --- /dev/null +++ b/R2R/r2r/main/execution.py @@ -0,0 +1,421 @@ +import ast +import asyncio +import json +import os +import uuid +from typing import Optional, Union + +from fastapi import UploadFile + +from r2r.base import ( + AnalysisTypes, + FilterCriteria, + GenerationConfig, + KGSearchSettings, + VectorSearchSettings, + generate_id_from_label, +) + +from .api.client import R2RClient +from .assembly.builder import R2RBuilder +from .assembly.config import R2RConfig +from .r2r import R2R + + +class R2RExecutionWrapper: + """A demo class for the R2R library.""" + + def __init__( + self, + config_path: Optional[str] = None, + config_name: Optional[str] = "default", + client_mode: bool = True, + base_url="http://localhost:8000", + ): + if config_path and config_name: + raise Exception("Cannot specify both config_path and config_name") + + # Handle fire CLI + if isinstance(client_mode, str): + client_mode = client_mode.lower() == "true" + self.client_mode = client_mode + self.base_url = base_url + + if self.client_mode: + self.client = R2RClient(base_url) + self.app = None + else: + config = ( + R2RConfig.from_json(config_path) + if config_path + else R2RConfig.from_json( + R2RBuilder.CONFIG_OPTIONS[config_name or "default"] + ) + ) + + self.client = None + self.app = R2R(config=config) + + def serve(self, host: str = "0.0.0.0", port: int = 8000): + if not self.client_mode: + self.app.serve(host, port) + else: + raise ValueError( + "Serve method is only available when `client_mode=False`." + ) + + def _parse_metadata_string(metadata_string: str) -> list[dict]: + """ + Convert a string representation of metadata into a list of dictionaries. + + The input string can be in one of two formats: + 1. JSON array of objects: '[{"key": "value"}, {"key2": "value2"}]' + 2. Python-like list of dictionaries: "[{'key': 'value'}, {'key2': 'value2'}]" + + Args: + metadata_string (str): The string representation of metadata. + + Returns: + list[dict]: A list of dictionaries representing the metadata. + + Raises: + ValueError: If the string cannot be parsed into a list of dictionaries. + """ + if not metadata_string: + return [] + + try: + # First, try to parse as JSON + return json.loads(metadata_string) + except json.JSONDecodeError as e: + try: + # If JSON parsing fails, try to evaluate as a Python literal + result = ast.literal_eval(metadata_string) + if not isinstance(result, list) or not all( + isinstance(item, dict) for item in result + ): + raise ValueError( + "The string does not represent a list of dictionaries" + ) from e + return result + except (ValueError, SyntaxError) as exc: + raise ValueError( + "Unable to parse the metadata string. " + "Please ensure it's a valid JSON array or Python list of dictionaries." + ) from exc + + def ingest_files( + self, + file_paths: list[str], + metadatas: Optional[list[dict]] = None, + document_ids: Optional[list[Union[uuid.UUID, str]]] = None, + versions: Optional[list[str]] = None, + ): + if isinstance(file_paths, str): + file_paths = list(file_paths.split(",")) + if isinstance(metadatas, str): + metadatas = self._parse_metadata_string(metadatas) + if isinstance(document_ids, str): + document_ids = list(document_ids.split(",")) + if isinstance(versions, str): + versions = list(versions.split(",")) + + all_file_paths = [] + for path in file_paths: + if os.path.isdir(path): + for root, _, files in os.walk(path): + all_file_paths.extend( + os.path.join(root, file) for file in files + ) + else: + all_file_paths.append(path) + + if not document_ids: + document_ids = [ + generate_id_from_label(os.path.basename(file_path)) + for file_path in all_file_paths + ] + + files = [ + UploadFile( + filename=os.path.basename(file_path), + file=open(file_path, "rb"), + ) + for file_path in all_file_paths + ] + + for file in files: + file.file.seek(0, 2) + file.size = file.file.tell() + file.file.seek(0) + + try: + if self.client_mode: + return self.client.ingest_files( + file_paths=all_file_paths, + document_ids=document_ids, + metadatas=metadatas, + versions=versions, + monitor=True, + )["results"] + else: + return self.app.ingest_files( + files=files, + document_ids=document_ids, + metadatas=metadatas, + versions=versions, + ) + finally: + for file in files: + file.file.close() + + def update_files( + self, + file_paths: list[str], + document_ids: list[str], + metadatas: Optional[list[dict]] = None, + ): + if isinstance(file_paths, str): + file_paths = list(file_paths.split(",")) + if isinstance(metadatas, str): + metadatas = self._parse_metadata_string(metadatas) + if isinstance(document_ids, str): + document_ids = list(document_ids.split(",")) + + if self.client_mode: + return self.client.update_files( + file_paths=file_paths, + document_ids=document_ids, + metadatas=metadatas, + monitor=True, + )["results"] + else: + files = [ + UploadFile( + filename=file_path, + file=open(file_path, "rb"), + ) + for file_path in file_paths + ] + return self.app.update_files( + files=files, document_ids=document_ids, metadatas=metadatas + ) + + def search( + self, + query: str, + use_vector_search: bool = True, + search_filters: Optional[dict] = None, + search_limit: int = 10, + do_hybrid_search: bool = False, + use_kg_search: bool = False, + kg_agent_generation_config: Optional[dict] = None, + ): + if self.client_mode: + return self.client.search( + query, + use_vector_search, + search_filters, + search_limit, + do_hybrid_search, + use_kg_search, + kg_agent_generation_config, + )["results"] + else: + return self.app.search( + query, + VectorSearchSettings( + use_vector_search=use_vector_search, + search_filters=search_filters or {}, + search_limit=search_limit, + do_hybrid_search=do_hybrid_search, + ), + KGSearchSettings( + use_kg_search=use_kg_search, + agent_generation_config=GenerationConfig( + **(kg_agent_generation_config or {}) + ), + ), + ) + + def rag( + self, + query: str, + use_vector_search: bool = True, + search_filters: Optional[dict] = None, + search_limit: int = 10, + do_hybrid_search: bool = False, + use_kg_search: bool = False, + kg_agent_generation_config: Optional[dict] = None, + stream: bool = False, + rag_generation_config: Optional[dict] = None, + ): + if self.client_mode: + response = self.client.rag( + query=query, + use_vector_search=use_vector_search, + search_filters=search_filters or {}, + search_limit=search_limit, + do_hybrid_search=do_hybrid_search, + use_kg_search=use_kg_search, + kg_agent_generation_config=kg_agent_generation_config, + rag_generation_config=rag_generation_config, + ) + if not stream: + response = response["results"] + return response + else: + return response + else: + response = self.app.rag( + query, + vector_search_settings=VectorSearchSettings( + use_vector_search=use_vector_search, + search_filters=search_filters or {}, + search_limit=search_limit, + do_hybrid_search=do_hybrid_search, + ), + kg_search_settings=KGSearchSettings( + use_kg_search=use_kg_search, + agent_generation_config=GenerationConfig( + **(kg_agent_generation_config or {}) + ), + ), + rag_generation_config=GenerationConfig( + **(rag_generation_config or {}) + ), + ) + if not stream: + return response + else: + + async def async_generator(): + async for chunk in response: + yield chunk + + def sync_generator(): + try: + loop = asyncio.get_event_loop() + async_gen = async_generator() + while True: + try: + yield loop.run_until_complete( + async_gen.__anext__() + ) + except StopAsyncIteration: + break + except Exception: + pass + + return sync_generator() + + def documents_overview( + self, + document_ids: Optional[list[str]] = None, + user_ids: Optional[list[str]] = None, + ): + if self.client_mode: + return self.client.documents_overview(document_ids, user_ids)[ + "results" + ] + else: + return self.app.documents_overview(document_ids, user_ids) + + def delete( + self, + keys: list[str], + values: list[str], + ): + if self.client_mode: + return self.client.delete(keys, values)["results"] + else: + return self.app.delete(keys, values) + + def logs(self, log_type_filter: Optional[str] = None): + if self.client_mode: + return self.client.logs(log_type_filter)["results"] + else: + return self.app.logs(log_type_filter) + + def document_chunks(self, document_id: str): + doc_uuid = uuid.UUID(document_id) + if self.client_mode: + return self.client.document_chunks(doc_uuid)["results"] + else: + return self.app.document_chunks(doc_uuid) + + def app_settings(self): + if self.client_mode: + return self.client.app_settings() + else: + return self.app.app_settings() + + def users_overview(self, user_ids: Optional[list[uuid.UUID]] = None): + if self.client_mode: + return self.client.users_overview(user_ids)["results"] + else: + return self.app.users_overview(user_ids) + + def analytics( + self, + filters: Optional[str] = None, + analysis_types: Optional[str] = None, + ): + filter_criteria = FilterCriteria(filters=filters) + analysis_types = AnalysisTypes(analysis_types=analysis_types) + + if self.client_mode: + return self.client.analytics( + filter_criteria=filter_criteria.model_dump(), + analysis_types=analysis_types.model_dump(), + )["results"] + else: + return self.app.analytics( + filter_criteria=filter_criteria, analysis_types=analysis_types + ) + + def ingest_sample_file(self, no_media: bool = True, option: int = 0): + from r2r.examples.scripts.sample_data_ingestor import ( + SampleDataIngestor, + ) + + """Ingest the first sample file into R2R.""" + sample_ingestor = SampleDataIngestor(self) + return sample_ingestor.ingest_sample_file( + no_media=no_media, option=option + ) + + def ingest_sample_files(self, no_media: bool = True): + from r2r.examples.scripts.sample_data_ingestor import ( + SampleDataIngestor, + ) + + """Ingest the first sample file into R2R.""" + sample_ingestor = SampleDataIngestor(self) + return sample_ingestor.ingest_sample_files(no_media=no_media) + + def inspect_knowledge_graph(self, limit: int = 100) -> str: + if self.client_mode: + return self.client.inspect_knowledge_graph(limit)["results"] + else: + return self.engine.inspect_knowledge_graph(limit) + + def health(self) -> str: + if self.client_mode: + return self.client.health() + else: + pass + + def get_app(self): + if not self.client_mode: + return self.app.app.app + else: + raise Exception( + "`get_app` method is only available when running with `client_mode=False`." + ) + + +if __name__ == "__main__": + import fire + + fire.Fire(R2RExecutionWrapper) diff --git a/R2R/r2r/main/r2r.py b/R2R/r2r/main/r2r.py new file mode 100755 index 00000000..2d8601b2 --- /dev/null +++ b/R2R/r2r/main/r2r.py @@ -0,0 +1,51 @@ +from typing import Optional + +from .app import R2RApp +from .assembly.config import R2RConfig +from .engine import R2REngine + + +class R2R: + engine: R2REngine + app: R2RApp + + def __init__( + self, + engine: Optional[R2REngine] = None, + app: Optional[R2RApp] = None, + config: Optional[R2RConfig] = None, + from_config: Optional[str] = None, + *args, + **kwargs + ): + if engine and app: + self.engine = engine + self.app = app + elif (config or from_config) or ( + config is None and from_config is None + ): + from .assembly.builder import R2RBuilder + + # Handle the case where 'from_config' is None and 'config' is None + if not config and not from_config: + from_config = "default" + builder = R2RBuilder( + config=config, + from_config=from_config, + ) + built = builder.build() + self.engine = built.engine + self.app = built.app + else: + raise ValueError( + "Must provide either 'engine' and 'app', or 'config'/'from_config' to build the R2R object." + ) + + def __getattr__(self, name): + # Check if the attribute name is 'app' and return it directly + if name == "app": + return self.app + elif name == "serve": + return self.app.serve + # Otherwise, delegate to the engine + return getattr(self.engine, name) diff --git a/R2R/r2r/main/services/__init__.py b/R2R/r2r/main/services/__init__.py new file mode 100755 index 00000000..e69de29b --- /dev/null +++ b/R2R/r2r/main/services/__init__.py diff --git a/R2R/r2r/main/services/base.py b/R2R/r2r/main/services/base.py new file mode 100755 index 00000000..02c0675d --- /dev/null +++ b/R2R/r2r/main/services/base.py @@ -0,0 +1,22 @@ +from abc import ABC + +from r2r.base import KVLoggingSingleton, RunManager + +from ..abstractions import R2RPipelines, R2RProviders +from ..assembly.config import R2RConfig + + +class Service(ABC): + def __init__( + self, + config: R2RConfig, + providers: R2RProviders, + pipelines: R2RPipelines, + run_manager: RunManager, + logging_connection: KVLoggingSingleton, + ): + self.config = config + self.providers = providers + self.pipelines = pipelines + self.run_manager = run_manager + self.logging_connection = logging_connection diff --git a/R2R/r2r/main/services/ingestion_service.py b/R2R/r2r/main/services/ingestion_service.py new file mode 100755 index 00000000..5677807a --- /dev/null +++ b/R2R/r2r/main/services/ingestion_service.py @@ -0,0 +1,505 @@ +import json +import logging +import uuid +from collections import defaultdict +from datetime import datetime +from typing import Any, Optional, Union + +from fastapi import Form, UploadFile + +from r2r.base import ( + Document, + DocumentInfo, + DocumentType, + KVLoggingSingleton, + R2RDocumentProcessingError, + R2RException, + RunManager, + generate_id_from_label, + increment_version, + to_async_generator, +) +from r2r.telemetry.telemetry_decorator import telemetry_event + +from ..abstractions import R2RPipelines, R2RProviders +from ..api.requests import R2RIngestFilesRequest, R2RUpdateFilesRequest +from ..assembly.config import R2RConfig +from .base import Service + +logger = logging.getLogger(__name__) +MB_CONVERSION_FACTOR = 1024 * 1024 + + +class IngestionService(Service): + def __init__( + self, + config: R2RConfig, + providers: R2RProviders, + pipelines: R2RPipelines, + run_manager: RunManager, + logging_connection: KVLoggingSingleton, + ): + super().__init__( + config, providers, pipelines, run_manager, logging_connection + ) + + def _file_to_document( + self, file: UploadFile, document_id: uuid.UUID, metadata: dict + ) -> Document: + file_extension = file.filename.split(".")[-1].lower() + if file_extension.upper() not in DocumentType.__members__: + raise R2RException( + status_code=415, + message=f"'{file_extension}' is not a valid DocumentType.", + ) + + document_title = ( + metadata.get("title", None) or file.filename.split("/")[-1] + ) + metadata["title"] = document_title + + return Document( + id=document_id, + type=DocumentType[file_extension.upper()], + data=file.file.read(), + metadata=metadata, + ) + + @telemetry_event("IngestDocuments") + async def ingest_documents( + self, + documents: list[Document], + versions: Optional[list[str]] = None, + *args: Any, + **kwargs: Any, + ): + if len(documents) == 0: + raise R2RException( + status_code=400, message="No documents provided for ingestion." + ) + + document_infos = [] + skipped_documents = [] + processed_documents = {} + duplicate_documents = defaultdict(list) + + existing_document_info = { + doc_info.document_id: doc_info + for doc_info in self.providers.vector_db.get_documents_overview() + } + + for iteration, document in enumerate(documents): + version = versions[iteration] if versions else "v0" + + # Check for duplicates within the current batch + if document.id in processed_documents: + duplicate_documents[document.id].append( + document.metadata.get("title", str(document.id)) + ) + continue + + if ( + document.id in existing_document_info + and existing_document_info[document.id].version == version + and existing_document_info[document.id].status == "success" + ): + logger.error( + f"Document with ID {document.id} was already successfully processed." + ) + if len(documents) == 1: + raise R2RException( + status_code=409, + message=f"Document with ID {document.id} was already successfully processed.", + ) + skipped_documents.append( + ( + document.id, + document.metadata.get("title", None) + or str(document.id), + ) + ) + continue + + now = datetime.now() + document_infos.append( + DocumentInfo( + document_id=document.id, + version=version, + size_in_bytes=len(document.data), + metadata=document.metadata.copy(), + title=document.metadata.get("title", str(document.id)), + user_id=document.metadata.get("user_id", None), + created_at=now, + updated_at=now, + status="processing", # Set initial status to `processing` + ) + ) + + processed_documents[document.id] = document.metadata.get( + "title", str(document.id) + ) + + if duplicate_documents: + duplicate_details = [ + f"{doc_id}: {', '.join(titles)}" + for doc_id, titles in duplicate_documents.items() + ] + warning_message = f"Duplicate documents detected: {'; '.join(duplicate_details)}. These duplicates were skipped." + raise R2RException(status_code=418, message=warning_message) + + if skipped_documents and len(skipped_documents) == len(documents): + logger.error("All provided documents already exist.") + raise R2RException( + status_code=409, + message="All provided documents already exist. Use the `update_documents` endpoint instead to update these documents.", + ) + + # Insert pending document infos + self.providers.vector_db.upsert_documents_overview(document_infos) + ingestion_results = await self.pipelines.ingestion_pipeline.run( + input=to_async_generator( + [ + doc + for doc in documents + if doc.id + not in [skipped[0] for skipped in skipped_documents] + ] + ), + versions=[info.version for info in document_infos], + run_manager=self.run_manager, + *args, + **kwargs, + ) + + return await self._process_ingestion_results( + ingestion_results, + document_infos, + skipped_documents, + processed_documents, + ) + + @telemetry_event("IngestFiles") + async def ingest_files( + self, + files: list[UploadFile], + metadatas: Optional[list[dict]] = None, + document_ids: Optional[list[uuid.UUID]] = None, + versions: Optional[list[str]] = None, + *args: Any, + **kwargs: Any, + ): + if not files: + raise R2RException( + status_code=400, message="No files provided for ingestion." + ) + + try: + documents = [] + for iteration, file in enumerate(files): + logger.info(f"Processing file: {file.filename}") + if ( + file.size + > self.config.app.get("max_file_size_in_mb", 32) + * MB_CONVERSION_FACTOR + ): + raise R2RException( + status_code=413, + message=f"File size exceeds maximum allowed size: {file.filename}", + ) + if not file.filename: + raise R2RException( + status_code=400, message="File name not provided." + ) + + document_metadata = metadatas[iteration] if metadatas else {} + document_id = ( + document_ids[iteration] + if document_ids + else generate_id_from_label(file.filename.split("/")[-1]) + ) + + document = self._file_to_document( + file, document_id, document_metadata + ) + documents.append(document) + + return await self.ingest_documents( + documents, versions, *args, **kwargs + ) + + finally: + for file in files: + file.file.close() + + @telemetry_event("UpdateFiles") + async def update_files( + self, + files: list[UploadFile], + document_ids: list[uuid.UUID], + metadatas: Optional[list[dict]] = None, + *args: Any, + **kwargs: Any, + ): + if not files: + raise R2RException( + status_code=400, message="No files provided for update." + ) + + try: + if len(document_ids) != len(files): + raise R2RException( + status_code=400, + message="Number of ids does not match number of files.", + ) + + documents_overview = await self._documents_overview( + document_ids=document_ids + ) + if len(documents_overview) != len(files): + raise R2RException( + status_code=404, + message="One or more documents was not found.", + ) + + documents = [] + new_versions = [] + + for it, (file, doc_id, doc_info) in enumerate( + zip(files, document_ids, documents_overview) + ): + if not doc_info: + raise R2RException( + status_code=404, + message=f"Document with id {doc_id} not found.", + ) + + new_version = increment_version(doc_info.version) + new_versions.append(new_version) + + updated_metadata = ( + metadatas[it] if metadatas else doc_info.metadata + ) + updated_metadata["title"] = ( + updated_metadata.get("title", None) + or file.filename.split("/")[-1] + ) + + document = self._file_to_document( + file, doc_id, updated_metadata + ) + documents.append(document) + + ingestion_results = await self.ingest_documents( + documents, versions=new_versions, *args, **kwargs + ) + + for doc_id, old_version in zip( + document_ids, + [doc_info.version for doc_info in documents_overview], + ): + await self._delete( + ["document_id", "version"], [str(doc_id), old_version] + ) + self.providers.vector_db.delete_from_documents_overview( + doc_id, old_version + ) + + return ingestion_results + + finally: + for file in files: + file.file.close() + + async def _process_ingestion_results( + self, + ingestion_results: dict, + document_infos: list[DocumentInfo], + skipped_documents: list[tuple[str, str]], + processed_documents: dict, + ): + skipped_ids = [ele[0] for ele in skipped_documents] + failed_ids = [] + successful_ids = [] + + results = {} + if ingestion_results["embedding_pipeline_output"]: + results = { + k: v for k, v in ingestion_results["embedding_pipeline_output"] + } + for doc_id, error in results.items(): + if isinstance(error, R2RDocumentProcessingError): + logger.error( + f"Error processing document with ID {error.document_id}: {error.message}" + ) + failed_ids.append(error.document_id) + elif isinstance(error, Exception): + logger.error(f"Error processing document: {error}") + failed_ids.append(doc_id) + else: + successful_ids.append(doc_id) + + documents_to_upsert = [] + for document_info in document_infos: + if document_info.document_id not in skipped_ids: + if document_info.document_id in failed_ids: + document_info.status = "failure" + elif document_info.document_id in successful_ids: + document_info.status = "success" + documents_to_upsert.append(document_info) + + if documents_to_upsert: + self.providers.vector_db.upsert_documents_overview( + documents_to_upsert + ) + + results = { + "processed_documents": [ + f"Document '{processed_documents[document_id]}' processed successfully." + for document_id in successful_ids + ], + "failed_documents": [ + f"Document '{processed_documents[document_id]}': {results[document_id]}" + for document_id in failed_ids + ], + "skipped_documents": [ + f"Document '{filename}' skipped since it already exists." + for _, filename in skipped_documents + ], + } + + # TODO - Clean up logging for document parse results + run_ids = list(self.run_manager.run_info.keys()) + if run_ids: + run_id = run_ids[0] + for key in results: + if key in ["processed_documents", "failed_documents"]: + for value in results[key]: + await self.logging_connection.log( + log_id=run_id, + key="document_parse_result", + value=value, + ) + return results + + @staticmethod + def parse_ingest_files_form_data( + metadatas: Optional[str] = Form(None), + document_ids: str = Form(None), + versions: Optional[str] = Form(None), + ) -> R2RIngestFilesRequest: + try: + parsed_metadatas = ( + json.loads(metadatas) + if metadatas and metadatas != "null" + else None + ) + if parsed_metadatas is not None and not isinstance( + parsed_metadatas, list + ): + raise ValueError("metadatas must be a list of dictionaries") + + parsed_document_ids = ( + json.loads(document_ids) + if document_ids and document_ids != "null" + else None + ) + if parsed_document_ids is not None: + parsed_document_ids = [ + uuid.UUID(doc_id) for doc_id in parsed_document_ids + ] + + parsed_versions = ( + json.loads(versions) + if versions and versions != "null" + else None + ) + + request_data = { + "metadatas": parsed_metadatas, + "document_ids": parsed_document_ids, + "versions": parsed_versions, + } + return R2RIngestFilesRequest(**request_data) + except json.JSONDecodeError as e: + raise R2RException( + status_code=400, message=f"Invalid JSON in form data: {e}" + ) + except ValueError as e: + raise R2RException(status_code=400, message=str(e)) + except Exception as e: + raise R2RException( + status_code=400, message=f"Error processing form data: {e}" + ) + + @staticmethod + def parse_update_files_form_data( + metadatas: Optional[str] = Form(None), + document_ids: str = Form(...), + ) -> R2RUpdateFilesRequest: + try: + parsed_metadatas = ( + json.loads(metadatas) + if metadatas and metadatas != "null" + else None + ) + if parsed_metadatas is not None and not isinstance( + parsed_metadatas, list + ): + raise ValueError("metadatas must be a list of dictionaries") + + if not document_ids or document_ids == "null": + raise ValueError("document_ids is required and cannot be null") + + parsed_document_ids = json.loads(document_ids) + if not isinstance(parsed_document_ids, list): + raise ValueError("document_ids must be a list") + parsed_document_ids = [ + uuid.UUID(doc_id) for doc_id in parsed_document_ids + ] + + request_data = { + "metadatas": parsed_metadatas, + "document_ids": parsed_document_ids, + } + return R2RUpdateFilesRequest(**request_data) + except json.JSONDecodeError as e: + raise R2RException( + status_code=400, message=f"Invalid JSON in form data: {e}" + ) + except ValueError as e: + raise R2RException(status_code=400, message=str(e)) + except Exception as e: + raise R2RException( + status_code=400, message=f"Error processing form data: {e}" + ) + + # TODO - Move to mgmt service for document info, delete, post orchestration buildout + async def _documents_overview( + self, + document_ids: Optional[list[uuid.UUID]] = None, + user_ids: Optional[list[uuid.UUID]] = None, + *args: Any, + **kwargs: Any, + ): + return self.providers.vector_db.get_documents_overview( + filter_document_ids=( + [str(ele) for ele in document_ids] if document_ids else None + ), + filter_user_ids=( + [str(ele) for ele in user_ids] if user_ids else None + ), + ) + + async def _delete( + self, keys: list[str], values: list[Union[bool, int, str]] + ): + logger.info( + f"Deleting documents which match on these keys and values: ({keys}, {values})" + ) + + ids = self.providers.vector_db.delete_by_metadata(keys, values) + if not ids: + raise R2RException( + status_code=404, message="No entries found for deletion." + ) + return "Entries deleted successfully." diff --git a/R2R/r2r/main/services/management_service.py b/R2R/r2r/main/services/management_service.py new file mode 100755 index 00000000..00f1f56e --- /dev/null +++ b/R2R/r2r/main/services/management_service.py @@ -0,0 +1,385 @@ +import logging +import uuid +from collections import defaultdict +from typing import Any, Dict, List, Optional, Tuple, Union + +from r2r.base import ( + AnalysisTypes, + FilterCriteria, + KVLoggingSingleton, + LogProcessor, + R2RException, + RunManager, +) +from r2r.telemetry.telemetry_decorator import telemetry_event + +from ..abstractions import R2RPipelines, R2RProviders +from ..assembly.config import R2RConfig +from .base import Service + +logger = logging.getLogger(__name__) + + +class ManagementService(Service): + def __init__( + self, + config: R2RConfig, + providers: R2RProviders, + pipelines: R2RPipelines, + run_manager: RunManager, + logging_connection: KVLoggingSingleton, + ): + super().__init__( + config, providers, pipelines, run_manager, logging_connection + ) + + @telemetry_event("UpdatePrompt") + async def update_prompt( + self, + name: str, + template: Optional[str] = None, + input_types: Optional[dict[str, str]] = {}, + *args, + **kwargs, + ): + self.providers.prompt.update_prompt(name, template, input_types) + return f"Prompt '{name}' added successfully." + + @telemetry_event("Logs") + async def alogs( + self, + log_type_filter: Optional[str] = None, + max_runs_requested: int = 100, + *args: Any, + **kwargs: Any, + ): + if self.logging_connection is None: + raise R2RException( + status_code=404, message="Logging provider not found." + ) + if ( + self.config.app.get("max_logs_per_request", 100) + > max_runs_requested + ): + raise R2RException( + status_code=400, + message="Max runs requested exceeds the limit.", + ) + + run_info = await self.logging_connection.get_run_info( + limit=max_runs_requested, + log_type_filter=log_type_filter, + ) + run_ids = [run.run_id for run in run_info] + if len(run_ids) == 0: + return [] + logs = await self.logging_connection.get_logs(run_ids) + # Aggregate logs by run_id and include run_type + aggregated_logs = [] + + for run in run_info: + run_logs = [log for log in logs if log["log_id"] == run.run_id] + entries = [ + {"key": log["key"], "value": log["value"]} for log in run_logs + ][ + ::-1 + ] # Reverse order so that earliest logged values appear first. + aggregated_logs.append( + { + "run_id": run.run_id, + "run_type": run.log_type, + "entries": entries, + } + ) + + return aggregated_logs + + @telemetry_event("Analytics") + async def aanalytics( + self, + filter_criteria: FilterCriteria, + analysis_types: AnalysisTypes, + *args, + **kwargs, + ): + run_info = await self.logging_connection.get_run_info(limit=100) + run_ids = [info.run_id for info in run_info] + + if not run_ids: + return { + "analytics_data": "No logs found.", + "filtered_logs": {}, + } + logs = await self.logging_connection.get_logs(run_ids=run_ids) + + filters = {} + if filter_criteria.filters: + for key, value in filter_criteria.filters.items(): + filters[key] = lambda log, value=value: ( + any( + entry.get("key") == value + for entry in log.get("entries", []) + ) + if "entries" in log + else log.get("key") == value + ) + + log_processor = LogProcessor(filters) + for log in logs: + if "entries" in log and isinstance(log["entries"], list): + log_processor.process_log(log) + elif "key" in log: + log_processor.process_log(log) + else: + logger.warning( + f"Skipping log due to missing or malformed 'entries': {log}" + ) + + filtered_logs = dict(log_processor.populations.items()) + results = {"filtered_logs": filtered_logs} + + if analysis_types and analysis_types.analysis_types: + for ( + filter_key, + analysis_config, + ) in analysis_types.analysis_types.items(): + if filter_key in filtered_logs: + analysis_type = analysis_config[0] + if analysis_type == "bar_chart": + extract_key = analysis_config[1] + results[filter_key] = ( + AnalysisTypes.generate_bar_chart_data( + filtered_logs[filter_key], extract_key + ) + ) + elif analysis_type == "basic_statistics": + extract_key = analysis_config[1] + results[filter_key] = ( + AnalysisTypes.calculate_basic_statistics( + filtered_logs[filter_key], extract_key + ) + ) + elif analysis_type == "percentile": + extract_key = analysis_config[1] + percentile = int(analysis_config[2]) + results[filter_key] = ( + AnalysisTypes.calculate_percentile( + filtered_logs[filter_key], + extract_key, + percentile, + ) + ) + else: + logger.warning( + f"Unknown analysis type for filter key '{filter_key}': {analysis_type}" + ) + + return results + + @telemetry_event("AppSettings") + async def aapp_settings(self, *args: Any, **kwargs: Any): + prompts = self.providers.prompt.get_all_prompts() + return { + "config": self.config.to_json(), + "prompts": { + name: prompt.dict() for name, prompt in prompts.items() + }, + } + + @telemetry_event("UsersOverview") + async def ausers_overview( + self, + user_ids: Optional[list[uuid.UUID]] = None, + *args, + **kwargs, + ): + return self.providers.vector_db.get_users_overview( + [str(ele) for ele in user_ids] if user_ids else None + ) + + @telemetry_event("Delete") + async def delete( + self, + keys: list[str], + values: list[Union[bool, int, str]], + *args, + **kwargs, + ): + metadata = ", ".join( + f"{key}={value}" for key, value in zip(keys, values) + ) + values = [str(value) for value in values] + logger.info(f"Deleting entries with metadata: {metadata}") + ids = self.providers.vector_db.delete_by_metadata(keys, values) + if not ids: + raise R2RException( + status_code=404, message="No entries found for deletion." + ) + for id in ids: + self.providers.vector_db.delete_from_documents_overview(id) + return f"Documents {ids} deleted successfully." + + @telemetry_event("DocumentsOverview") + async def adocuments_overview( + self, + document_ids: Optional[list[uuid.UUID]] = None, + user_ids: Optional[list[uuid.UUID]] = None, + *args: Any, + **kwargs: Any, + ): + return self.providers.vector_db.get_documents_overview( + filter_document_ids=( + [str(ele) for ele in document_ids] if document_ids else None + ), + filter_user_ids=( + [str(ele) for ele in user_ids] if user_ids else None + ), + ) + + @telemetry_event("DocumentChunks") + async def document_chunks( + self, + document_id: uuid.UUID, + *args, + **kwargs, + ): + return self.providers.vector_db.get_document_chunks(str(document_id)) + + @telemetry_event("UsersOverview") + async def users_overview( + self, + user_ids: Optional[list[uuid.UUID]], + *args, + **kwargs, + ): + return self.providers.vector_db.get_users_overview( + [str(ele) for ele in user_ids] + ) + + @telemetry_event("InspectKnowledgeGraph") + async def inspect_knowledge_graph( + self, limit=10000, *args: Any, **kwargs: Any + ): + if self.providers.kg is None: + raise R2RException( + status_code=404, message="Knowledge Graph provider not found." + ) + + rel_query = f""" + MATCH (n1)-[r]->(n2) + RETURN n1.id AS subject, type(r) AS relation, n2.id AS object + LIMIT {limit} + """ + + try: + with self.providers.kg.client.session( + database=self.providers.kg._database + ) as session: + results = session.run(rel_query) + relationships = [ + (record["subject"], record["relation"], record["object"]) + for record in results + ] + + # Create graph representation and group relationships + graph, grouped_relationships = self.process_relationships( + relationships + ) + + # Generate output + output = self.generate_output(grouped_relationships, graph) + + return "\n".join(output) + + except Exception as e: + logger.error(f"Error printing relationships: {str(e)}") + raise R2RException( + status_code=500, + message=f"An error occurred while fetching relationships: {str(e)}", + ) + + def process_relationships( + self, relationships: List[Tuple[str, str, str]] + ) -> Tuple[Dict[str, List[str]], Dict[str, Dict[str, List[str]]]]: + graph = defaultdict(list) + grouped = defaultdict(lambda: defaultdict(list)) + for subject, relation, obj in relationships: + graph[subject].append(obj) + grouped[subject][relation].append(obj) + if obj not in graph: + graph[obj] = [] + return dict(graph), dict(grouped) + + def generate_output( + self, + grouped_relationships: Dict[str, Dict[str, List[str]]], + graph: Dict[str, List[str]], + ) -> List[str]: + output = [] + + # Print grouped relationships + for subject, relations in grouped_relationships.items(): + output.append(f"\n== {subject} ==") + for relation, objects in relations.items(): + output.append(f" {relation}:") + for obj in objects: + output.append(f" - {obj}") + + # Print basic graph statistics + output.append("\n== Graph Statistics ==") + output.append(f"Number of nodes: {len(graph)}") + output.append( + f"Number of edges: {sum(len(neighbors) for neighbors in graph.values())}" + ) + output.append( + f"Number of connected components: {self.count_connected_components(graph)}" + ) + + # Find central nodes + central_nodes = self.get_central_nodes(graph) + output.append("\n== Most Central Nodes ==") + for node, centrality in central_nodes: + output.append(f" {node}: {centrality:.4f}") + + return output + + def count_connected_components(self, graph: Dict[str, List[str]]) -> int: + visited = set() + components = 0 + + def dfs(node): + visited.add(node) + for neighbor in graph[node]: + if neighbor not in visited: + dfs(neighbor) + + for node in graph: + if node not in visited: + dfs(node) + components += 1 + + return components + + def get_central_nodes( + self, graph: Dict[str, List[str]] + ) -> List[Tuple[str, float]]: + degree = {node: len(neighbors) for node, neighbors in graph.items()} + total_nodes = len(graph) + centrality = { + node: deg / (total_nodes - 1) for node, deg in degree.items() + } + return sorted(centrality.items(), key=lambda x: x[1], reverse=True)[:5] + + @telemetry_event("AppSettings") + async def app_settings( + self, + *args, + **kwargs, + ): + prompts = self.providers.prompt.get_all_prompts() + return { + "config": self.config.to_json(), + "prompts": { + name: prompt.dict() for name, prompt in prompts.items() + }, + } diff --git a/R2R/r2r/main/services/retrieval_service.py b/R2R/r2r/main/services/retrieval_service.py new file mode 100755 index 00000000..c4f6aff5 --- /dev/null +++ b/R2R/r2r/main/services/retrieval_service.py @@ -0,0 +1,207 @@ +import logging +import time +import uuid +from typing import Optional + +from r2r.base import ( + GenerationConfig, + KGSearchSettings, + KVLoggingSingleton, + R2RException, + RunManager, + VectorSearchSettings, + manage_run, + to_async_generator, +) +from r2r.pipes import EvalPipe +from r2r.telemetry.telemetry_decorator import telemetry_event + +from ..abstractions import R2RPipelines, R2RProviders +from ..assembly.config import R2RConfig +from .base import Service + +logger = logging.getLogger(__name__) + + +class RetrievalService(Service): + def __init__( + self, + config: R2RConfig, + providers: R2RProviders, + pipelines: R2RPipelines, + run_manager: RunManager, + logging_connection: KVLoggingSingleton, + ): + super().__init__( + config, providers, pipelines, run_manager, logging_connection + ) + + @telemetry_event("Search") + async def search( + self, + query: str, + vector_search_settings: VectorSearchSettings = VectorSearchSettings(), + kg_search_settings: KGSearchSettings = KGSearchSettings(), + *args, + **kwargs, + ): + async with manage_run(self.run_manager, "search_app") as run_id: + t0 = time.time() + + if ( + kg_search_settings.use_kg_search + and self.config.kg.provider is None + ): + raise R2RException( + status_code=400, + message="Knowledge Graph search is not enabled in the configuration.", + ) + + if ( + vector_search_settings.use_vector_search + and self.config.vector_database.provider is None + ): + raise R2RException( + status_code=400, + message="Vector search is not enabled in the configuration.", + ) + + # TODO - Remove these transforms once we have a better way to handle this + for filter, value in vector_search_settings.search_filters.items(): + if isinstance(value, uuid.UUID): + vector_search_settings.search_filters[filter] = str(value) + + results = await self.pipelines.search_pipeline.run( + input=to_async_generator([query]), + vector_search_settings=vector_search_settings, + kg_search_settings=kg_search_settings, + run_manager=self.run_manager, + *args, + **kwargs, + ) + + t1 = time.time() + latency = f"{t1 - t0:.2f}" + + await self.logging_connection.log( + log_id=run_id, + key="search_latency", + value=latency, + is_info_log=False, + ) + + return results.dict() + + @telemetry_event("RAG") + async def rag( + self, + query: str, + rag_generation_config: GenerationConfig, + vector_search_settings: VectorSearchSettings = VectorSearchSettings(), + kg_search_settings: KGSearchSettings = KGSearchSettings(), + *args, + **kwargs, + ): + async with manage_run(self.run_manager, "rag_app") as run_id: + try: + t0 = time.time() + + # TODO - Remove these transforms once we have a better way to handle this + for ( + filter, + value, + ) in vector_search_settings.search_filters.items(): + if isinstance(value, uuid.UUID): + vector_search_settings.search_filters[filter] = str( + value + ) + + if rag_generation_config.stream: + t1 = time.time() + latency = f"{t1 - t0:.2f}" + + await self.logging_connection.log( + log_id=run_id, + key="rag_generation_latency", + value=latency, + is_info_log=False, + ) + + async def stream_response(): + async with manage_run(self.run_manager, "arag"): + async for ( + chunk + ) in await self.pipelines.streaming_rag_pipeline.run( + input=to_async_generator([query]), + run_manager=self.run_manager, + vector_search_settings=vector_search_settings, + kg_search_settings=kg_search_settings, + rag_generation_config=rag_generation_config, + ): + yield chunk + + return stream_response() + + results = await self.pipelines.rag_pipeline.run( + input=to_async_generator([query]), + run_manager=self.run_manager, + vector_search_settings=vector_search_settings, + kg_search_settings=kg_search_settings, + rag_generation_config=rag_generation_config, + *args, + **kwargs, + ) + + t1 = time.time() + latency = f"{t1 - t0:.2f}" + + await self.logging_connection.log( + log_id=run_id, + key="rag_generation_latency", + value=latency, + is_info_log=False, + ) + + if len(results) == 0: + raise R2RException( + status_code=404, message="No results found" + ) + if len(results) > 1: + logger.warning( + f"Multiple results found for query: {query}" + ) + # unpack the first result + return results[0] + + except Exception as e: + logger.error(f"Pipeline error: {str(e)}") + if "NoneType" in str(e): + raise R2RException( + status_code=502, + message="Ollama server not reachable or returned an invalid response", + ) + raise R2RException( + status_code=500, message="Internal Server Error" + ) + + @telemetry_event("Evaluate") + async def evaluate( + self, + query: str, + context: str, + completion: str, + eval_generation_config: Optional[GenerationConfig], + *args, + **kwargs, + ): + eval_payload = EvalPipe.EvalPayload( + query=query, + context=context, + completion=completion, + ) + result = await self.eval_pipeline.run( + input=to_async_generator([eval_payload]), + run_manager=self.run_manager, + eval_generation_config=eval_generation_config, + ) + return result diff --git a/R2R/r2r/parsers/__init__.py b/R2R/r2r/parsers/__init__.py new file mode 100755 index 00000000..bd833a95 --- /dev/null +++ b/R2R/r2r/parsers/__init__.py @@ -0,0 +1,27 @@ +from .media.audio_parser import AudioParser +from .media.docx_parser import DOCXParser +from .media.img_parser import ImageParser +from .media.movie_parser import MovieParser +from .media.pdf_parser import PDFParser +from .media.ppt_parser import PPTParser +from .structured.csv_parser import CSVParser +from .structured.json_parser import JSONParser +from .structured.xlsx_parser import XLSXParser +from .text.html_parser import HTMLParser +from .text.md_parser import MDParser +from .text.text_parser import TextParser + +__all__ = [ + "AudioParser", + "DOCXParser", + "ImageParser", + "MovieParser", + "PDFParser", + "PPTParser", + "MDParser", + "HTMLParser", + "TextParser", + "CSVParser", + "JSONParser", + "XLSXParser", +] diff --git a/R2R/r2r/parsers/media/__init__.py b/R2R/r2r/parsers/media/__init__.py new file mode 100755 index 00000000..e69de29b --- /dev/null +++ b/R2R/r2r/parsers/media/__init__.py diff --git a/R2R/r2r/parsers/media/audio_parser.py b/R2R/r2r/parsers/media/audio_parser.py new file mode 100755 index 00000000..8a7735e4 --- /dev/null +++ b/R2R/r2r/parsers/media/audio_parser.py @@ -0,0 +1,32 @@ +import os +from typing import AsyncGenerator + +from r2r.base.parsers.base_parser import AsyncParser +from r2r.parsers.media.openai_helpers import process_audio_with_openai + + +class AudioParser(AsyncParser[bytes]): + """A parser for audio data.""" + + def __init__( + self, api_base: str = "https://api.openai.com/v1/audio/transcriptions" + ): + self.api_base = api_base + self.openai_api_key = os.environ.get("OPENAI_API_KEY") + if not self.openai_api_key: + raise ValueError( + "Error, environment variable `OPENAI_API_KEY` is required to run `AudioParser`." + ) + + async def ingest(self, data: bytes) -> AsyncGenerator[str, None]: + """Ingest audio data and yield a transcription.""" + temp_audio_path = "temp_audio.wav" + with open(temp_audio_path, "wb") as f: + f.write(data) + try: + transcription_text = process_audio_with_openai( + open(temp_audio_path, "rb"), self.openai_api_key + ) + yield transcription_text + finally: + os.remove(temp_audio_path) diff --git a/R2R/r2r/parsers/media/docx_parser.py b/R2R/r2r/parsers/media/docx_parser.py new file mode 100755 index 00000000..9edced81 --- /dev/null +++ b/R2R/r2r/parsers/media/docx_parser.py @@ -0,0 +1,28 @@ +from io import BytesIO +from typing import AsyncGenerator + +from r2r.base.abstractions.document import DataType +from r2r.base.parsers.base_parser import AsyncParser + + +class DOCXParser(AsyncParser[DataType]): + """A parser for DOCX data.""" + + def __init__(self): + try: + from docx import Document + + self.Document = Document + except ImportError: + raise ValueError( + "Error, `python-docx` is required to run `DOCXParser`. Please install it using `pip install python-docx`." + ) + + async def ingest(self, data: DataType) -> AsyncGenerator[str, None]: + """Ingest DOCX data and yield text from each paragraph.""" + if isinstance(data, str): + raise ValueError("DOCX data must be in bytes format.") + + doc = self.Document(BytesIO(data)) + for paragraph in doc.paragraphs: + yield paragraph.text diff --git a/R2R/r2r/parsers/media/img_parser.py b/R2R/r2r/parsers/media/img_parser.py new file mode 100755 index 00000000..7c40656a --- /dev/null +++ b/R2R/r2r/parsers/media/img_parser.py @@ -0,0 +1,40 @@ +import os +from typing import AsyncGenerator + +from r2r.base.abstractions.document import DataType +from r2r.base.parsers.base_parser import AsyncParser +from r2r.parsers.media.openai_helpers import process_frame_with_openai + + +class ImageParser(AsyncParser[DataType]): + """A parser for image data.""" + + def __init__( + self, + model: str = "gpt-4o", + max_tokens: int = 2_048, + api_base: str = "https://api.openai.com/v1/chat/completions", + ): + self.model = model + self.max_tokens = max_tokens + self.openai_api_key = os.environ.get("OPENAI_API_KEY") + if not self.openai_api_key: + raise ValueError( + "Error, environment variable `OPENAI_API_KEY` is required to run `ImageParser`." + ) + self.api_base = api_base + + async def ingest(self, data: DataType) -> AsyncGenerator[str, None]: + """Ingest image data and yield a description.""" + if isinstance(data, bytes): + import base64 + + data = base64.b64encode(data).decode("utf-8") + + yield process_frame_with_openai( + data, + self.openai_api_key, + self.model, + self.max_tokens, + self.api_base, + ) diff --git a/R2R/r2r/parsers/media/movie_parser.py b/R2R/r2r/parsers/media/movie_parser.py new file mode 100755 index 00000000..c00b80d9 --- /dev/null +++ b/R2R/r2r/parsers/media/movie_parser.py @@ -0,0 +1,108 @@ +import base64 +import os +from typing import AsyncGenerator + +from r2r.base.parsers.base_parser import AsyncParser +from r2r.parsers.media.openai_helpers import ( + process_audio_with_openai, + process_frame_with_openai, +) + + +class MovieParser(AsyncParser): + """A parser for movie data.""" + + def __init__( + self, + model: str = "gpt-4o", + max_tokens: int = 2048, + seconds_per_frame: int = 2, + max_frames: int = 10, + ): + try: + import cv2 + + self.cv2 = cv2 + except ImportError: + raise ValueError( + "Error, `opencv-python` is required to run `MovieParser`. Please install it using `pip install opencv-python`." + ) + try: + import moviepy.editor as mp + + self.mp = mp + except ImportError: + raise ValueError( + "Error, `moviepy` is required to run `MovieParser`. Please install it using `pip install moviepy`." + ) + + self.model = model + self.max_tokens = max_tokens + self.seconds_per_frame = seconds_per_frame + self.max_frames = max_frames + self.openai_api_key = os.environ.get("OPENAI_API_KEY") + if not self.openai_api_key: + raise ValueError( + "Error, environment variable `OPENAI_API_KEY` is required to run `MovieParser`." + ) + + async def ingest(self, data: bytes) -> AsyncGenerator[str, None]: + """Ingest movie data and yield a description.""" + temp_video_path = "temp_movie.mp4" + with open(temp_video_path, "wb") as f: + f.write(data) + try: + raw_frames, audio_file = self.process_video(temp_video_path) + for frame in raw_frames: + frame_text = process_frame_with_openai( + frame, self.openai_api_key + ) + yield frame_text + + if audio_file: + transcription_text = process_audio_with_openai( + audio_file, self.openai_api_key + ) + yield transcription_text + finally: + os.remove(temp_video_path) + + def process_video(self, video_path): + base64Frames = [] + base_video_path, _ = os.path.splitext(video_path) + + video = self.cv2.VideoCapture(video_path) + total_frames = int(video.get(self.cv2.CAP_PROP_FRAME_COUNT)) + fps = video.get(self.cv2.CAP_PROP_FPS) + frames_to_skip = int(fps * self.seconds_per_frame) + curr_frame = 0 + + # Calculate frames to skip based on max_frames if it is set + if self.max_frames and self.max_frames < total_frames / frames_to_skip: + frames_to_skip = max(total_frames // self.max_frames, 1) + + frame_count = 0 + while curr_frame < total_frames - 1 and ( + not self.max_frames or frame_count < self.max_frames + ): + video.set(self.cv2.CAP_PROP_POS_FRAMES, curr_frame) + success, frame = video.read() + if not success: + break + _, buffer = self.cv2.imencode(".jpg", frame) + base64Frames.append(base64.b64encode(buffer).decode("utf-8")) + curr_frame += frames_to_skip + frame_count += 1 + video.release() + + audio_path = f"{base_video_path}.wav" + audio_file = None + with self.mp.VideoFileClip(video_path) as clip: + if clip.audio is not None: + clip.audio.write_audiofile( + audio_path, codec="pcm_s16le", fps=16000 + ) + audio_file = open(audio_path, "rb") + os.remove(audio_path) + + return base64Frames, audio_file diff --git a/R2R/r2r/parsers/media/openai_helpers.py b/R2R/r2r/parsers/media/openai_helpers.py new file mode 100755 index 00000000..707dadda --- /dev/null +++ b/R2R/r2r/parsers/media/openai_helpers.py @@ -0,0 +1,58 @@ +"""Implementations of parsers for different data types.""" + +import requests + + +def process_frame_with_openai( + data: bytes, + api_key: str, + model: str = "gpt-4o", + max_tokens: int = 2_048, + api_base: str = "https://api.openai.com/v1/chat/completions", +) -> str: + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {api_key}", + } + + payload = { + "model": model, + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "First, provide a title for the image, then explain everything that you see. Be very thorough in your analysis as a user will need to understand the image without seeing it. If it is possible to transcribe the image to text directly, then do so. The more detail you provide, the better the user will understand the image.", + }, + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{data}"}, + }, + ], + } + ], + "max_tokens": max_tokens, + } + + response = requests.post(api_base, headers=headers, json=payload) + response_json = response.json() + return response_json["choices"][0]["message"]["content"] + + +def process_audio_with_openai( + audio_file, + api_key: str, + audio_api_base: str = "https://api.openai.com/v1/audio/transcriptions", +) -> str: + headers = {"Authorization": f"Bearer {api_key}"} + + transcription_response = requests.post( + audio_api_base, + headers=headers, + files={"file": audio_file}, + data={"model": "whisper-1"}, + ) + transcription = transcription_response.json() + + return transcription["text"] diff --git a/R2R/r2r/parsers/media/pdf_parser.py b/R2R/r2r/parsers/media/pdf_parser.py new file mode 100755 index 00000000..b60a9b33 --- /dev/null +++ b/R2R/r2r/parsers/media/pdf_parser.py @@ -0,0 +1,34 @@ +import string +from io import BytesIO +from typing import AsyncGenerator + +from r2r.base.abstractions.document import DataType +from r2r.base.parsers.base_parser import AsyncParser + + +class PDFParser(AsyncParser[DataType]): + """A parser for PDF data.""" + + def __init__(self): + try: + from pypdf import PdfReader + + self.PdfReader = PdfReader + except ImportError: + raise ValueError( + "Error, `pypdf` is required to run `PyPDFParser`. Please install it using `pip install pypdf`." + ) + + async def ingest(self, data: DataType) -> AsyncGenerator[str, None]: + """Ingest PDF data and yield text from each page.""" + if isinstance(data, str): + raise ValueError("PDF data must be in bytes format.") + + pdf = self.PdfReader(BytesIO(data)) + for page in pdf.pages: + page_text = page.extract_text() + if page_text is not None: + page_text = "".join( + filter(lambda x: x in string.printable, page_text) + ) + yield page_text diff --git a/R2R/r2r/parsers/media/ppt_parser.py b/R2R/r2r/parsers/media/ppt_parser.py new file mode 100755 index 00000000..8f192840 --- /dev/null +++ b/R2R/r2r/parsers/media/ppt_parser.py @@ -0,0 +1,30 @@ +from io import BytesIO +from typing import AsyncGenerator + +from r2r.base.abstractions.document import DataType +from r2r.base.parsers.base_parser import AsyncParser + + +class PPTParser(AsyncParser[DataType]): + """A parser for PPT data.""" + + def __init__(self): + try: + from pptx import Presentation + + self.Presentation = Presentation + except ImportError: + raise ValueError( + "Error, `python-pptx` is required to run `PPTParser`. Please install it using `pip install python-pptx`." + ) + + async def ingest(self, data: DataType) -> AsyncGenerator[str, None]: + """Ingest PPT data and yield text from each slide.""" + if isinstance(data, str): + raise ValueError("PPT data must be in bytes format.") + + prs = self.Presentation(BytesIO(data)) + for slide in prs.slides: + for shape in slide.shapes: + if hasattr(shape, "text"): + yield shape.text diff --git a/R2R/r2r/parsers/structured/__init__.py b/R2R/r2r/parsers/structured/__init__.py new file mode 100755 index 00000000..e69de29b --- /dev/null +++ b/R2R/r2r/parsers/structured/__init__.py diff --git a/R2R/r2r/parsers/structured/csv_parser.py b/R2R/r2r/parsers/structured/csv_parser.py new file mode 100755 index 00000000..343d9fbf --- /dev/null +++ b/R2R/r2r/parsers/structured/csv_parser.py @@ -0,0 +1,25 @@ +from typing import AsyncGenerator, Union + +from r2r.base.abstractions.document import DataType +from r2r.base.parsers.base_parser import AsyncParser + + +class CSVParser(AsyncParser[DataType]): + """A parser for CSV data.""" + + def __init__(self): + import csv + from io import StringIO + + self.csv = csv + self.StringIO = StringIO + + async def ingest( + self, data: Union[str, bytes] + ) -> AsyncGenerator[str, None]: + """Ingest CSV data and yield text from each row.""" + if isinstance(data, bytes): + data = data.decode("utf-8") + csv_reader = self.csv.reader(self.StringIO(data)) + for row in csv_reader: + yield ", ".join(row) diff --git a/R2R/r2r/parsers/structured/json_parser.py b/R2R/r2r/parsers/structured/json_parser.py new file mode 100755 index 00000000..23d63065 --- /dev/null +++ b/R2R/r2r/parsers/structured/json_parser.py @@ -0,0 +1,49 @@ +import json +from typing import AsyncGenerator + +from r2r.base.abstractions.document import DataType +from r2r.base.parsers.base_parser import AsyncParser + + +class JSONParser(AsyncParser[DataType]): + """A parser for JSON data.""" + + async def ingest(self, data: DataType) -> AsyncGenerator[str, None]: + """Ingest JSON data and yield a formatted text representation.""" + if isinstance(data, bytes): + data = data.decode("utf-8") + yield self._parse_json(json.loads(data)) + + def _parse_json(self, data: dict) -> str: + def remove_objects_with_null(obj): + if not isinstance(obj, dict): + return obj + result = obj.copy() + for key, value in obj.items(): + if isinstance(value, dict): + result[key] = remove_objects_with_null(value) + elif value is None: + del result[key] + return result + + def format_json_as_text(obj, indent=0): + lines = [] + indent_str = " " * indent + + if isinstance(obj, dict): + for key, value in obj.items(): + if isinstance(value, (dict, list)): + nested = format_json_as_text(value, indent + 2) + lines.append(f"{indent_str}{key}:\n{nested}") + else: + lines.append(f"{indent_str}{key}: {value}") + elif isinstance(obj, list): + for item in obj: + nested = format_json_as_text(item, indent + 2) + lines.append(f"{nested}") + else: + return f"{indent_str}{obj}" + + return "\n".join(lines) + + return format_json_as_text(remove_objects_with_null(data)) diff --git a/R2R/r2r/parsers/structured/xlsx_parser.py b/R2R/r2r/parsers/structured/xlsx_parser.py new file mode 100755 index 00000000..68a3bdc6 --- /dev/null +++ b/R2R/r2r/parsers/structured/xlsx_parser.py @@ -0,0 +1,29 @@ +from io import BytesIO +from typing import AsyncGenerator + +from r2r.base.abstractions.document import DataType +from r2r.base.parsers.base_parser import AsyncParser + + +class XLSXParser(AsyncParser[DataType]): + """A parser for XLSX data.""" + + def __init__(self): + try: + from openpyxl import load_workbook + + self.load_workbook = load_workbook + except ImportError: + raise ValueError( + "Error, `openpyxl` is required to run `XLSXParser`. Please install it using `pip install openpyxl`." + ) + + async def ingest(self, data: bytes) -> AsyncGenerator[str, None]: + """Ingest XLSX data and yield text from each row.""" + if isinstance(data, str): + raise ValueError("XLSX data must be in bytes format.") + + wb = self.load_workbook(filename=BytesIO(data)) + for sheet in wb.worksheets: + for row in sheet.iter_rows(values_only=True): + yield ", ".join(map(str, row)) diff --git a/R2R/r2r/parsers/text/__init__.py b/R2R/r2r/parsers/text/__init__.py new file mode 100755 index 00000000..e69de29b --- /dev/null +++ b/R2R/r2r/parsers/text/__init__.py diff --git a/R2R/r2r/parsers/text/html_parser.py b/R2R/r2r/parsers/text/html_parser.py new file mode 100755 index 00000000..9c663fbe --- /dev/null +++ b/R2R/r2r/parsers/text/html_parser.py @@ -0,0 +1,15 @@ +from typing import AsyncGenerator + +from bs4 import BeautifulSoup + +from r2r.base.abstractions.document import DataType +from r2r.base.parsers.base_parser import AsyncParser + + +class HTMLParser(AsyncParser[DataType]): + """A parser for HTML data.""" + + async def ingest(self, data: DataType) -> AsyncGenerator[str, None]: + """Ingest HTML data and yield text.""" + soup = BeautifulSoup(data, "html.parser") + yield soup.get_text() diff --git a/R2R/r2r/parsers/text/md_parser.py b/R2R/r2r/parsers/text/md_parser.py new file mode 100755 index 00000000..ada9ae57 --- /dev/null +++ b/R2R/r2r/parsers/text/md_parser.py @@ -0,0 +1,23 @@ +from typing import AsyncGenerator + +from bs4 import BeautifulSoup + +from r2r.base.abstractions.document import DataType +from r2r.base.parsers.base_parser import AsyncParser + + +class MDParser(AsyncParser[DataType]): + """A parser for Markdown data.""" + + def __init__(self): + import markdown + + self.markdown = markdown + + async def ingest(self, data: DataType) -> AsyncGenerator[str, None]: + """Ingest Markdown data and yield text.""" + if isinstance(data, bytes): + data = data.decode("utf-8") + html = self.markdown.markdown(data) + soup = BeautifulSoup(html, "html.parser") + yield soup.get_text() diff --git a/R2R/r2r/parsers/text/text_parser.py b/R2R/r2r/parsers/text/text_parser.py new file mode 100755 index 00000000..0c8ab7ca --- /dev/null +++ b/R2R/r2r/parsers/text/text_parser.py @@ -0,0 +1,13 @@ +from typing import AsyncGenerator + +from r2r.base.abstractions.document import DataType +from r2r.base.parsers.base_parser import AsyncParser + + +class TextParser(AsyncParser[DataType]): + """A parser for raw text data.""" + + async def ingest(self, data: DataType) -> AsyncGenerator[DataType, None]: + if isinstance(data, bytes): + data = data.decode("utf-8") + yield data diff --git a/R2R/r2r/pipelines/__init__.py b/R2R/r2r/pipelines/__init__.py new file mode 100755 index 00000000..ebe3a0c3 --- /dev/null +++ b/R2R/r2r/pipelines/__init__.py @@ -0,0 +1,11 @@ +from .eval_pipeline import EvalPipeline +from .ingestion_pipeline import IngestionPipeline +from .rag_pipeline import RAGPipeline +from .search_pipeline import SearchPipeline + +__all__ = [ + "IngestionPipeline", + "SearchPipeline", + "RAGPipeline", + "EvalPipeline", +] diff --git a/R2R/r2r/pipelines/eval_pipeline.py b/R2R/r2r/pipelines/eval_pipeline.py new file mode 100755 index 00000000..60aa50d4 --- /dev/null +++ b/R2R/r2r/pipelines/eval_pipeline.py @@ -0,0 +1,37 @@ +import logging +from typing import Any, Optional + +from r2r.base.logging.run_manager import RunManager +from r2r.base.pipeline.base_pipeline import AsyncPipeline +from r2r.base.pipes.base_pipe import AsyncPipe, AsyncState + +logger = logging.getLogger(__name__) + + +class EvalPipeline(AsyncPipeline): + """A pipeline for evaluation.""" + + pipeline_type: str = "eval" + + async def run( + self, + input: Any, + state: Optional[AsyncState] = None, + stream: bool = False, + run_manager: Optional[RunManager] = None, + *args: Any, + **kwargs: Any, + ): + return await super().run( + input, state, stream, run_manager, *args, **kwargs + ) + + def add_pipe( + self, + pipe: AsyncPipe, + add_upstream_outputs: Optional[list[dict[str, str]]] = None, + *args, + **kwargs, + ) -> None: + logger.debug(f"Adding pipe {pipe.config.name} to the EvalPipeline") + return super().add_pipe(pipe, add_upstream_outputs, *args, **kwargs) diff --git a/R2R/r2r/pipelines/ingestion_pipeline.py b/R2R/r2r/pipelines/ingestion_pipeline.py new file mode 100755 index 00000000..df1263f9 --- /dev/null +++ b/R2R/r2r/pipelines/ingestion_pipeline.py @@ -0,0 +1,144 @@ +import asyncio +import logging +from asyncio import Queue +from typing import Any, Optional + +from r2r.base.logging.kv_logger import KVLoggingSingleton +from r2r.base.logging.run_manager import RunManager, manage_run +from r2r.base.pipeline.base_pipeline import AsyncPipeline, dequeue_requests +from r2r.base.pipes.base_pipe import AsyncPipe, AsyncState + +logger = logging.getLogger(__name__) + + +class IngestionPipeline(AsyncPipeline): + """A pipeline for ingestion.""" + + pipeline_type: str = "ingestion" + + def __init__( + self, + pipe_logger: Optional[KVLoggingSingleton] = None, + run_manager: Optional[RunManager] = None, + ): + super().__init__(pipe_logger, run_manager) + self.parsing_pipe = None + self.embedding_pipeline = None + self.kg_pipeline = None + + async def run( + self, + input: Any, + state: Optional[AsyncState] = None, + stream: bool = False, + run_manager: Optional[RunManager] = None, + log_run_info: bool = True, + *args: Any, + **kwargs: Any, + ) -> None: + self.state = state or AsyncState() + async with manage_run(run_manager, self.pipeline_type): + if log_run_info: + await run_manager.log_run_info( + key="pipeline_type", + value=self.pipeline_type, + is_info_log=True, + ) + if self.parsing_pipe is None: + raise ValueError( + "parsing_pipeline must be set before running the ingestion pipeline" + ) + if self.embedding_pipeline is None and self.kg_pipeline is None: + raise ValueError( + "At least one of embedding_pipeline or kg_pipeline must be set before running the ingestion pipeline" + ) + # Use queues to duplicate the documents for each pipeline + embedding_queue = Queue() + kg_queue = Queue() + + async def enqueue_documents(): + async for document in await self.parsing_pipe.run( + self.parsing_pipe.Input(message=input), + state, + run_manager, + *args, + **kwargs, + ): + if self.embedding_pipeline: + await embedding_queue.put(document) + if self.kg_pipeline: + await kg_queue.put(document) + await embedding_queue.put(None) + await kg_queue.put(None) + + # Start the document enqueuing process + enqueue_task = asyncio.create_task(enqueue_documents()) + + # Start the embedding and KG pipelines in parallel + if self.embedding_pipeline: + embedding_task = asyncio.create_task( + self.embedding_pipeline.run( + dequeue_requests(embedding_queue), + state, + stream, + run_manager, + log_run_info=False, # Do not log run info since we have already done so + *args, + **kwargs, + ) + ) + + if self.kg_pipeline: + kg_task = asyncio.create_task( + self.kg_pipeline.run( + dequeue_requests(kg_queue), + state, + stream, + run_manager, + log_run_info=False, # Do not log run info since we have already done so + *args, + **kwargs, + ) + ) + + # Wait for the enqueueing task to complete + await enqueue_task + + results = {} + # Wait for the embedding and KG tasks to complete + if self.embedding_pipeline: + results["embedding_pipeline_output"] = await embedding_task + if self.kg_pipeline: + results["kg_pipeline_output"] = await kg_task + return results + + def add_pipe( + self, + pipe: AsyncPipe, + add_upstream_outputs: Optional[list[dict[str, str]]] = None, + parsing_pipe: bool = False, + kg_pipe: bool = False, + embedding_pipe: bool = False, + *args, + **kwargs, + ) -> None: + logger.debug( + f"Adding pipe {pipe.config.name} to the IngestionPipeline" + ) + + if parsing_pipe: + self.parsing_pipe = pipe + elif kg_pipe: + if not self.kg_pipeline: + self.kg_pipeline = AsyncPipeline() + self.kg_pipeline.add_pipe( + pipe, add_upstream_outputs, *args, **kwargs + ) + elif embedding_pipe: + if not self.embedding_pipeline: + self.embedding_pipeline = AsyncPipeline() + self.embedding_pipeline.add_pipe( + pipe, add_upstream_outputs, *args, **kwargs + ) + else: + raise ValueError("Pipe must be a parsing, embedding, or KG pipe") diff --git a/R2R/r2r/pipelines/rag_pipeline.py b/R2R/r2r/pipelines/rag_pipeline.py new file mode 100755 index 00000000..b257ccaa --- /dev/null +++ b/R2R/r2r/pipelines/rag_pipeline.py @@ -0,0 +1,115 @@ +import asyncio +import logging +from typing import Any, Optional + +from ..base.abstractions.llm import GenerationConfig +from ..base.abstractions.search import KGSearchSettings, VectorSearchSettings +from ..base.logging.kv_logger import KVLoggingSingleton +from ..base.logging.run_manager import RunManager, manage_run +from ..base.pipeline.base_pipeline import AsyncPipeline +from ..base.pipes.base_pipe import AsyncPipe, AsyncState +from ..base.utils import to_async_generator + +logger = logging.getLogger(__name__) + + +class RAGPipeline(AsyncPipeline): + """A pipeline for RAG.""" + + pipeline_type: str = "rag" + + def __init__( + self, + pipe_logger: Optional[KVLoggingSingleton] = None, + run_manager: Optional[RunManager] = None, + ): + super().__init__(pipe_logger, run_manager) + self._search_pipeline = None + self._rag_pipeline = None + + async def run( + self, + input: Any, + state: Optional[AsyncState] = None, + run_manager: Optional[RunManager] = None, + log_run_info=True, + vector_search_settings: VectorSearchSettings = VectorSearchSettings(), + kg_search_settings: KGSearchSettings = KGSearchSettings(), + rag_generation_config: GenerationConfig = GenerationConfig(), + *args: Any, + **kwargs: Any, + ): + self.state = state or AsyncState() + async with manage_run(run_manager, self.pipeline_type): + if log_run_info: + await run_manager.log_run_info( + key="pipeline_type", + value=self.pipeline_type, + is_info_log=True, + ) + + if not self._search_pipeline: + raise ValueError( + "_search_pipeline must be set before running the RAG pipeline" + ) + + async def multi_query_generator(input): + tasks = [] + async for query in input: + task = asyncio.create_task( + self._search_pipeline.run( + to_async_generator([query]), + state=state, + stream=False, # do not stream the search results + run_manager=run_manager, + log_run_info=False, # do not log the run info as it is already logged above + vector_search_settings=vector_search_settings, + kg_search_settings=kg_search_settings, + *args, + **kwargs, + ) + ) + tasks.append((query, task)) + + for query, task in tasks: + yield (query, await task) + + rag_results = await self._rag_pipeline.run( + input=multi_query_generator(input), + state=state, + stream=rag_generation_config.stream, + run_manager=run_manager, + log_run_info=False, + rag_generation_config=rag_generation_config, + *args, + **kwargs, + ) + return rag_results + + def add_pipe( + self, + pipe: AsyncPipe, + add_upstream_outputs: Optional[list[dict[str, str]]] = None, + rag_pipe: bool = True, + *args, + **kwargs, + ) -> None: + logger.debug(f"Adding pipe {pipe.config.name} to the RAGPipeline") + if not rag_pipe: + raise ValueError( + "Only pipes that are part of the RAG pipeline can be added to the RAG pipeline" + ) + if not self._rag_pipeline: + self._rag_pipeline = AsyncPipeline() + self._rag_pipeline.add_pipe( + pipe, add_upstream_outputs, *args, **kwargs + ) + + def set_search_pipeline( + self, + _search_pipeline: AsyncPipeline, + *args, + **kwargs, + ) -> None: + logger.debug(f"Setting search pipeline for the RAGPipeline") + self._search_pipeline = _search_pipeline diff --git a/R2R/r2r/pipelines/search_pipeline.py b/R2R/r2r/pipelines/search_pipeline.py new file mode 100755 index 00000000..25e0c7bb --- /dev/null +++ b/R2R/r2r/pipelines/search_pipeline.py @@ -0,0 +1,140 @@ +import asyncio +import logging +from asyncio import Queue +from typing import Any, Optional + +from ..base.abstractions.search import ( + AggregateSearchResult, + KGSearchSettings, + VectorSearchSettings, +) +from ..base.logging.kv_logger import KVLoggingSingleton +from ..base.logging.run_manager import RunManager, manage_run +from ..base.pipeline.base_pipeline import AsyncPipeline, dequeue_requests +from ..base.pipes.base_pipe import AsyncPipe, AsyncState + +logger = logging.getLogger(__name__) + + +class SearchPipeline(AsyncPipeline): + """A pipeline for search.""" + + pipeline_type: str = "search" + + def __init__( + self, + pipe_logger: Optional[KVLoggingSingleton] = None, + run_manager: Optional[RunManager] = None, + ): + super().__init__(pipe_logger, run_manager) + self._parsing_pipe = None + self._vector_search_pipeline = None + self._kg_search_pipeline = None + + async def run( + self, + input: Any, + state: Optional[AsyncState] = None, + stream: bool = False, + run_manager: Optional[RunManager] = None, + log_run_info: bool = True, + vector_search_settings: VectorSearchSettings = VectorSearchSettings(), + kg_search_settings: KGSearchSettings = KGSearchSettings(), + *args: Any, + **kwargs: Any, + ): + self.state = state or AsyncState() + do_vector_search = ( + self._vector_search_pipeline is not None + and vector_search_settings.use_vector_search + ) + do_kg = ( + self._kg_search_pipeline is not None + and kg_search_settings.use_kg_search + ) + async with manage_run(run_manager, self.pipeline_type): + if log_run_info: + await run_manager.log_run_info( + key="pipeline_type", + value=self.pipeline_type, + is_info_log=True, + ) + + vector_search_queue = Queue() + kg_queue = Queue() + + async def enqueue_requests(): + async for message in input: + if do_vector_search: + await vector_search_queue.put(message) + if do_kg: + await kg_queue.put(message) + + await vector_search_queue.put(None) + await kg_queue.put(None) + + # Start the document enqueuing process + enqueue_task = asyncio.create_task(enqueue_requests()) + + # Start the embedding and KG pipelines in parallel + if do_vector_search: + vector_search_task = asyncio.create_task( + self._vector_search_pipeline.run( + dequeue_requests(vector_search_queue), + state, + stream, + run_manager, + log_run_info=False, + vector_search_settings=vector_search_settings, + ) + ) + + if do_kg: + kg_task = asyncio.create_task( + self._kg_search_pipeline.run( + dequeue_requests(kg_queue), + state, + stream, + run_manager, + log_run_info=False, + kg_search_settings=kg_search_settings, + ) + ) + + await enqueue_task + + vector_search_results = ( + await vector_search_task if do_vector_search else None + ) + kg_results = await kg_task if do_kg else None + + return AggregateSearchResult( + vector_search_results=vector_search_results, + kg_search_results=kg_results, + ) + + def add_pipe( + self, + pipe: AsyncPipe, + add_upstream_outputs: Optional[list[dict[str, str]]] = None, + kg_pipe: bool = False, + vector_search_pipe: bool = False, + *args, + **kwargs, + ) -> None: + logger.debug(f"Adding pipe {pipe.config.name} to the SearchPipeline") + + if kg_pipe: + if not self._kg_search_pipeline: + self._kg_search_pipeline = AsyncPipeline() + self._kg_search_pipeline.add_pipe( + pipe, add_upstream_outputs, *args, **kwargs + ) + elif vector_search_pipe: + if not self._vector_search_pipeline: + self._vector_search_pipeline = AsyncPipeline() + self._vector_search_pipeline.add_pipe( + pipe, add_upstream_outputs, *args, **kwargs + ) + else: + raise ValueError("Pipe must be a vector search or KG pipe") diff --git a/R2R/r2r/pipes/__init__.py b/R2R/r2r/pipes/__init__.py new file mode 100755 index 00000000..b86c31c0 --- /dev/null +++ b/R2R/r2r/pipes/__init__.py @@ -0,0 +1,31 @@ +from .abstractions.search_pipe import SearchPipe +from .ingestion.embedding_pipe import EmbeddingPipe +from .ingestion.kg_extraction_pipe import KGExtractionPipe +from .ingestion.kg_storage_pipe import KGStoragePipe +from .ingestion.parsing_pipe import ParsingPipe +from .ingestion.vector_storage_pipe import VectorStoragePipe +from .other.eval_pipe import EvalPipe +from .other.web_search_pipe import WebSearchPipe +from .retrieval.kg_agent_search_pipe import KGAgentSearchPipe +from .retrieval.multi_search import MultiSearchPipe +from .retrieval.query_transform_pipe import QueryTransformPipe +from .retrieval.search_rag_pipe import SearchRAGPipe +from .retrieval.streaming_rag_pipe import StreamingSearchRAGPipe +from .retrieval.vector_search_pipe import VectorSearchPipe + +__all__ = [ + "SearchPipe", + "EmbeddingPipe", + "EvalPipe", + "KGExtractionPipe", + "ParsingPipe", + "QueryTransformPipe", + "SearchRAGPipe", + "StreamingSearchRAGPipe", + "VectorSearchPipe", + "VectorStoragePipe", + "WebSearchPipe", + "KGAgentSearchPipe", + "KGStoragePipe", + "MultiSearchPipe", +] diff --git a/R2R/r2r/pipes/abstractions/__init__.py b/R2R/r2r/pipes/abstractions/__init__.py new file mode 100755 index 00000000..e69de29b --- /dev/null +++ b/R2R/r2r/pipes/abstractions/__init__.py diff --git a/R2R/r2r/pipes/abstractions/generator_pipe.py b/R2R/r2r/pipes/abstractions/generator_pipe.py new file mode 100755 index 00000000..002ebd23 --- /dev/null +++ b/R2R/r2r/pipes/abstractions/generator_pipe.py @@ -0,0 +1,58 @@ +import uuid +from abc import abstractmethod +from typing import Any, AsyncGenerator, Optional + +from r2r.base import ( + AsyncState, + KVLoggingSingleton, + LLMProvider, + PipeType, + PromptProvider, +) +from r2r.base.abstractions.llm import GenerationConfig +from r2r.base.pipes.base_pipe import AsyncPipe + + +class GeneratorPipe(AsyncPipe): + class Config(AsyncPipe.PipeConfig): + name: str + task_prompt: str + system_prompt: str = "default_system" + + def __init__( + self, + llm_provider: LLMProvider, + prompt_provider: PromptProvider, + type: PipeType = PipeType.GENERATOR, + config: Optional[Config] = None, + pipe_logger: Optional[KVLoggingSingleton] = None, + *args, + **kwargs, + ): + super().__init__( + type=type, + config=config or self.Config(), + pipe_logger=pipe_logger, + *args, + **kwargs, + ) + self.llm_provider = llm_provider + self.prompt_provider = prompt_provider + + @abstractmethod + async def _run_logic( + self, + input: AsyncPipe.Input, + state: AsyncState, + run_id: uuid.UUID, + rag_generation_config: GenerationConfig, + *args: Any, + **kwargs: Any, + ) -> AsyncGenerator[Any, None]: + pass + + @abstractmethod + def _get_message_payload( + self, message: str, *args: Any, **kwargs: Any + ) -> list: + pass diff --git a/R2R/r2r/pipes/abstractions/search_pipe.py b/R2R/r2r/pipes/abstractions/search_pipe.py new file mode 100755 index 00000000..bb0303e0 --- /dev/null +++ b/R2R/r2r/pipes/abstractions/search_pipe.py @@ -0,0 +1,62 @@ +import logging +import uuid +from abc import abstractmethod +from typing import Any, AsyncGenerator, Optional, Union + +from r2r.base import ( + AsyncPipe, + AsyncState, + KVLoggingSingleton, + PipeType, + VectorSearchResult, +) + +logger = logging.getLogger(__name__) + + +class SearchPipe(AsyncPipe): + class SearchConfig(AsyncPipe.PipeConfig): + name: str = "default_vector_search" + search_filters: dict = {} + search_limit: int = 10 + + class Input(AsyncPipe.Input): + message: Union[AsyncGenerator[str, None], str] + + def __init__( + self, + pipe_logger: Optional[KVLoggingSingleton] = None, + type: PipeType = PipeType.SEARCH, + config: Optional[AsyncPipe.PipeConfig] = None, + *args, + **kwargs, + ): + super().__init__( + pipe_logger=pipe_logger, + type=type, + config=config, + *args, + **kwargs, + ) + + @abstractmethod + async def search( + self, + query: str, + filters: dict[str, Any] = {}, + limit: int = 10, + *args: Any, + **kwargs: Any, + ) -> AsyncGenerator[VectorSearchResult, None]: + pass + + @abstractmethod + async def _run_logic( + self, + input: Input, + state: AsyncState, + run_id: uuid.UUID, + *args: Any, + **kwargs, + ) -> AsyncGenerator[VectorSearchResult, None]: + pass diff --git a/R2R/r2r/pipes/ingestion/__init__.py b/R2R/r2r/pipes/ingestion/__init__.py new file mode 100755 index 00000000..e69de29b --- /dev/null +++ b/R2R/r2r/pipes/ingestion/__init__.py diff --git a/R2R/r2r/pipes/ingestion/embedding_pipe.py b/R2R/r2r/pipes/ingestion/embedding_pipe.py new file mode 100755 index 00000000..971ccc9d --- /dev/null +++ b/R2R/r2r/pipes/ingestion/embedding_pipe.py @@ -0,0 +1,218 @@ +import asyncio +import copy +import logging +import uuid +from typing import Any, AsyncGenerator, Optional, Union + +from r2r.base import ( + AsyncState, + EmbeddingProvider, + Extraction, + Fragment, + FragmentType, + KVLoggingSingleton, + PipeType, + R2RDocumentProcessingError, + TextSplitter, + Vector, + VectorEntry, + generate_id_from_label, +) +from r2r.base.pipes.base_pipe import AsyncPipe + +logger = logging.getLogger(__name__) + + +class EmbeddingPipe(AsyncPipe): + """ + Embeds and stores documents using a specified embedding model and database. + """ + + class Input(AsyncPipe.Input): + message: AsyncGenerator[ + Union[Extraction, R2RDocumentProcessingError], None + ] + + def __init__( + self, + embedding_provider: EmbeddingProvider, + text_splitter: TextSplitter, + embedding_batch_size: int = 1, + id_prefix: str = "demo", + pipe_logger: Optional[KVLoggingSingleton] = None, + type: PipeType = PipeType.INGESTOR, + config: Optional[AsyncPipe.PipeConfig] = None, + *args, + **kwargs, + ): + """ + Initializes the embedding pipe with necessary components and configurations. + """ + super().__init__( + pipe_logger=pipe_logger, + type=type, + config=config + or AsyncPipe.PipeConfig(name="default_embedding_pipe"), + ) + self.embedding_provider = embedding_provider + self.text_splitter = text_splitter + self.embedding_batch_size = embedding_batch_size + self.id_prefix = id_prefix + self.pipe_run_info = None + + async def fragment( + self, extraction: Extraction, run_id: uuid.UUID + ) -> AsyncGenerator[Fragment, None]: + """ + Splits text into manageable chunks for embedding. + """ + if not isinstance(extraction, Extraction): + raise ValueError( + f"Expected an Extraction, but received {type(extraction)}." + ) + if not isinstance(extraction.data, str): + raise ValueError( + f"Expected a string, but received {type(extraction.data)}." + ) + text_chunks = [ + ele.page_content + for ele in self.text_splitter.create_documents([extraction.data]) + ] + for iteration, chunk in enumerate(text_chunks): + fragment = Fragment( + id=generate_id_from_label(f"{extraction.id}-{iteration}"), + type=FragmentType.TEXT, + data=chunk, + metadata=copy.deepcopy(extraction.metadata), + extraction_id=extraction.id, + document_id=extraction.document_id, + ) + yield fragment + iteration += 1 + + async def transform_fragments( + self, fragments: list[Fragment], metadatas: list[dict] + ) -> AsyncGenerator[Fragment, None]: + """ + Transforms text chunks based on their metadata, e.g., adding prefixes. + """ + async for fragment, metadata in zip(fragments, metadatas): + if "chunk_prefix" in metadata: + prefix = metadata.pop("chunk_prefix") + fragment.data = f"{prefix}\n{fragment.data}" + yield fragment + + async def embed(self, fragments: list[Fragment]) -> list[float]: + return await self.embedding_provider.async_get_embeddings( + [fragment.data for fragment in fragments], + EmbeddingProvider.PipeStage.BASE, + ) + + async def _process_batch( + self, fragment_batch: list[Fragment] + ) -> list[VectorEntry]: + """ + Embeds a batch of fragments and yields vector entries. + """ + vectors = await self.embed(fragment_batch) + return [ + VectorEntry( + id=fragment.id, + vector=Vector(data=raw_vector), + metadata={ + "document_id": fragment.document_id, + "extraction_id": fragment.extraction_id, + "text": fragment.data, + **fragment.metadata, + }, + ) + for raw_vector, fragment in zip(vectors, fragment_batch) + ] + + async def _process_and_enqueue_batch( + self, fragment_batch: list[Fragment], vector_entry_queue: asyncio.Queue + ): + try: + batch_result = await self._process_batch(fragment_batch) + for vector_entry in batch_result: + await vector_entry_queue.put(vector_entry) + except Exception as e: + logger.error(f"Error processing batch: {e}") + await vector_entry_queue.put( + R2RDocumentProcessingError( + error_message=str(e), + document_id=fragment_batch[0].document_id, + ) + ) + finally: + await vector_entry_queue.put(None) # Signal completion + + async def _run_logic( + self, + input: Input, + state: AsyncState, + run_id: uuid.UUID, + *args: Any, + **kwargs: Any, + ) -> AsyncGenerator[Union[R2RDocumentProcessingError, VectorEntry], None]: + """ + Executes the embedding pipe: chunking, transforming, embedding, and storing documents. + """ + vector_entry_queue = asyncio.Queue() + fragment_batch = [] + active_tasks = 0 + + fragment_info = {} + async for extraction in input.message: + if isinstance(extraction, R2RDocumentProcessingError): + yield extraction + continue + + async for fragment in self.fragment(extraction, run_id): + if extraction.document_id in fragment_info: + fragment_info[extraction.document_id] += 1 + else: + fragment_info[extraction.document_id] = 0 # Start with 0 + fragment.metadata["chunk_order"] = fragment_info[ + extraction.document_id + ] + + version = fragment.metadata.get("version", "v0") + + # Ensure fragment ID is set correctly + if not fragment.id: + fragment.id = generate_id_from_label( + f"{extraction.id}-{fragment_info[extraction.document_id]}-{version}" + ) + + fragment_batch.append(fragment) + if len(fragment_batch) >= self.embedding_batch_size: + asyncio.create_task( + self._process_and_enqueue_batch( + fragment_batch.copy(), vector_entry_queue + ) + ) + active_tasks += 1 + fragment_batch.clear() + + logger.debug( + f"Fragmented the input document ids into counts as shown: {fragment_info}" + ) + + if fragment_batch: + asyncio.create_task( + self._process_and_enqueue_batch( + fragment_batch.copy(), vector_entry_queue + ) + ) + active_tasks += 1 + + while active_tasks > 0: + vector_entry = await vector_entry_queue.get() + if vector_entry is None: # Check for termination signal + active_tasks -= 1 + elif isinstance(vector_entry, Exception): + yield vector_entry # Propagate the exception + active_tasks -= 1 + else: + yield vector_entry diff --git a/R2R/r2r/pipes/ingestion/kg_extraction_pipe.py b/R2R/r2r/pipes/ingestion/kg_extraction_pipe.py new file mode 100755 index 00000000..13025e39 --- /dev/null +++ b/R2R/r2r/pipes/ingestion/kg_extraction_pipe.py @@ -0,0 +1,226 @@ +import asyncio +import copy +import json +import logging +import uuid +from typing import Any, AsyncGenerator, Optional + +from r2r.base import ( + AsyncState, + Extraction, + Fragment, + FragmentType, + KGExtraction, + KGProvider, + KVLoggingSingleton, + LLMProvider, + PipeType, + PromptProvider, + TextSplitter, + extract_entities, + extract_triples, + generate_id_from_label, +) +from r2r.base.pipes.base_pipe import AsyncPipe + +logger = logging.getLogger(__name__) + + +class ClientError(Exception): + """Base class for client connection errors.""" + + pass + + +class KGExtractionPipe(AsyncPipe): + """ + Embeds and stores documents using a specified embedding model and database. + """ + + def __init__( + self, + kg_provider: KGProvider, + llm_provider: LLMProvider, + prompt_provider: PromptProvider, + text_splitter: TextSplitter, + kg_batch_size: int = 1, + id_prefix: str = "demo", + pipe_logger: Optional[KVLoggingSingleton] = None, + type: PipeType = PipeType.INGESTOR, + config: Optional[AsyncPipe.PipeConfig] = None, + *args, + **kwargs, + ): + """ + Initializes the embedding pipe with necessary components and configurations. + """ + super().__init__( + pipe_logger=pipe_logger, + type=type, + config=config + or AsyncPipe.PipeConfig(name="default_embedding_pipe"), + ) + + self.kg_provider = kg_provider + self.prompt_provider = prompt_provider + self.llm_provider = llm_provider + self.text_splitter = text_splitter + self.kg_batch_size = kg_batch_size + self.id_prefix = id_prefix + self.pipe_run_info = None + + async def fragment( + self, extraction: Extraction, run_id: uuid.UUID + ) -> AsyncGenerator[Fragment, None]: + """ + Splits text into manageable chunks for embedding. + """ + if not isinstance(extraction, Extraction): + raise ValueError( + f"Expected an Extraction, but received {type(extraction)}." + ) + if not isinstance(extraction.data, str): + raise ValueError( + f"Expected a string, but received {type(extraction.data)}." + ) + text_chunks = [ + ele.page_content + for ele in self.text_splitter.create_documents([extraction.data]) + ] + for iteration, chunk in enumerate(text_chunks): + fragment = Fragment( + id=generate_id_from_label(f"{extraction.id}-{iteration}"), + type=FragmentType.TEXT, + data=chunk, + metadata=copy.deepcopy(extraction.metadata), + extraction_id=extraction.id, + document_id=extraction.document_id, + ) + yield fragment + + async def transform_fragments( + self, fragments: list[Fragment] + ) -> AsyncGenerator[Fragment, None]: + """ + Transforms text chunks based on their metadata, e.g., adding prefixes. + """ + async for fragment in fragments: + if "chunk_prefix" in fragment.metadata: + prefix = fragment.metadata.pop("chunk_prefix") + fragment.data = f"{prefix}\n{fragment.data}" + yield fragment + + async def extract_kg( + self, + fragment: Fragment, + retries: int = 3, + delay: int = 2, + ) -> KGExtraction: + """ + Extracts NER triples from a list of fragments with retries. + """ + task_prompt = self.prompt_provider.get_prompt( + self.kg_provider.config.kg_extraction_prompt, + inputs={"input": fragment.data}, + ) + messages = self.prompt_provider._get_message_payload( + self.prompt_provider.get_prompt("default_system"), task_prompt + ) + for attempt in range(retries): + try: + response = await self.llm_provider.aget_completion( + messages, self.kg_provider.config.kg_extraction_config + ) + + kg_extraction = response.choices[0].message.content + + # Parsing JSON from the response + kg_json = ( + json.loads( + kg_extraction.split("```json")[1].split("```")[0] + ) + if """```json""" in kg_extraction + else json.loads(kg_extraction) + ) + llm_payload = kg_json.get("entities_and_triples", {}) + + # Extract triples with detailed logging + entities = extract_entities(llm_payload) + triples = extract_triples(llm_payload, entities) + + # Create KG extraction object + return KGExtraction(entities=entities, triples=triples) + except ( + ClientError, + json.JSONDecodeError, + KeyError, + IndexError, + ) as e: + logger.error(f"Error in extract_kg: {e}") + if attempt < retries - 1: + await asyncio.sleep(delay) + else: + logger.error(f"Failed after retries with {e}") + # raise e # Ensure the exception is raised after the final attempt + + return KGExtraction(entities={}, triples=[]) + + async def _process_batch( + self, + fragment_batch: list[Fragment], + ) -> list[KGExtraction]: + """ + Embeds a batch of fragments and yields vector entries. + """ + tasks = [ + asyncio.create_task(self.extract_kg(fragment)) + for fragment in fragment_batch + ] + return await asyncio.gather(*tasks) + + async def _run_logic( + self, + input: AsyncPipe.Input, + state: AsyncState, + run_id: uuid.UUID, + *args: Any, + **kwargs: Any, + ) -> AsyncGenerator[KGExtraction, None]: + """ + Executes the embedding pipe: chunking, transforming, embedding, and storing documents. + """ + batch_tasks = [] + fragment_batch = [] + + fragment_info = {} + async for extraction in input.message: + async for fragment in self.transform_fragments( + self.fragment(extraction, run_id) + ): + if extraction.document_id in fragment_info: + fragment_info[extraction.document_id] += 1 + else: + fragment_info[extraction.document_id] = 1 + extraction.metadata["chunk_order"] = fragment_info[ + extraction.document_id + ] + fragment_batch.append(fragment) + if len(fragment_batch) >= self.kg_batch_size: + # Here, ensure `_process_batch` is scheduled as a coroutine, not called directly + batch_tasks.append( + self._process_batch(fragment_batch.copy()) + ) # pass a copy if necessary + fragment_batch.clear() # Clear the batch for new fragments + + logger.debug( + f"Fragmented the input document ids into counts as shown: {fragment_info}" + ) + + if fragment_batch: # Process any remaining fragments + batch_tasks.append(self._process_batch(fragment_batch.copy())) + + # Process tasks as they complete + for task in asyncio.as_completed(batch_tasks): + batch_result = await task # Wait for the next task to complete + for kg_extraction in batch_result: + yield kg_extraction diff --git a/R2R/r2r/pipes/ingestion/kg_storage_pipe.py b/R2R/r2r/pipes/ingestion/kg_storage_pipe.py new file mode 100755 index 00000000..9ac63479 --- /dev/null +++ b/R2R/r2r/pipes/ingestion/kg_storage_pipe.py @@ -0,0 +1,133 @@ +import asyncio +import logging +import uuid +from typing import Any, AsyncGenerator, Optional + +from r2r.base import ( + AsyncState, + EmbeddingProvider, + KGExtraction, + KGProvider, + KVLoggingSingleton, + PipeType, +) +from r2r.base.abstractions.llama_abstractions import EntityNode, Relation +from r2r.base.pipes.base_pipe import AsyncPipe + +logger = logging.getLogger(__name__) + + +class KGStoragePipe(AsyncPipe): + class Input(AsyncPipe.Input): + message: AsyncGenerator[KGExtraction, None] + + def __init__( + self, + kg_provider: KGProvider, + embedding_provider: Optional[EmbeddingProvider] = None, + storage_batch_size: int = 1, + pipe_logger: Optional[KVLoggingSingleton] = None, + type: PipeType = PipeType.INGESTOR, + config: Optional[AsyncPipe.PipeConfig] = None, + *args, + **kwargs, + ): + """ + Initializes the async knowledge graph storage pipe with necessary components and configurations. + """ + logger.info( + f"Initializing an `KGStoragePipe` to store knowledge graph extractions in a graph database." + ) + + super().__init__( + pipe_logger=pipe_logger, + type=type, + config=config, + *args, + **kwargs, + ) + self.kg_provider = kg_provider + self.embedding_provider = embedding_provider + self.storage_batch_size = storage_batch_size + + async def store( + self, + kg_extractions: list[KGExtraction], + ) -> None: + """ + Stores a batch of knowledge graph extractions in the graph database. + """ + try: + nodes = [] + relations = [] + for extraction in kg_extractions: + for entity in extraction.entities.values(): + embedding = None + if self.embedding_provider: + embedding = self.embedding_provider.get_embedding( + "Entity:\n{entity.value}\nLabel:\n{entity.category}\nSubcategory:\n{entity.subcategory}" + ) + nodes.append( + EntityNode( + name=entity.value, + label=entity.category, + embedding=embedding, + properties=( + {"subcategory": entity.subcategory} + if entity.subcategory + else {} + ), + ) + ) + for triple in extraction.triples: + relations.append( + Relation( + source_id=triple.subject, + target_id=triple.object, + label=triple.predicate, + ) + ) + self.kg_provider.upsert_nodes(nodes) + self.kg_provider.upsert_relations(relations) + except Exception as e: + error_message = f"Failed to store knowledge graph extractions in the database: {e}" + logger.error(error_message) + raise ValueError(error_message) + + async def _run_logic( + self, + input: Input, + state: AsyncState, + run_id: uuid.UUID, + *args: Any, + **kwargs: Any, + ) -> AsyncGenerator[None, None]: + """ + Executes the async knowledge graph storage pipe: storing knowledge graph extractions in the graph database. + """ + batch_tasks = [] + kg_batch = [] + + async for kg_extraction in input.message: + kg_batch.append(kg_extraction) + if len(kg_batch) >= self.storage_batch_size: + # Schedule the storage task + batch_tasks.append( + asyncio.create_task( + self.store(kg_batch.copy()), + name=f"kg-store-{self.config.name}", + ) + ) + kg_batch.clear() + + if kg_batch: # Process any remaining extractions + batch_tasks.append( + asyncio.create_task( + self.store(kg_batch.copy()), + name=f"kg-store-{self.config.name}", + ) + ) + + # Wait for all storage tasks to complete + await asyncio.gather(*batch_tasks) + yield None diff --git a/R2R/r2r/pipes/ingestion/parsing_pipe.py b/R2R/r2r/pipes/ingestion/parsing_pipe.py new file mode 100755 index 00000000..f3c81ca0 --- /dev/null +++ b/R2R/r2r/pipes/ingestion/parsing_pipe.py @@ -0,0 +1,211 @@ +""" +This module contains the `DocumentParsingPipe` class, which is responsible for parsing incoming documents into plaintext. +""" + +import asyncio +import logging +import time +import uuid +from typing import AsyncGenerator, Optional, Union + +from r2r.base import ( + AsyncParser, + AsyncState, + Document, + DocumentType, + Extraction, + ExtractionType, + KVLoggingSingleton, + PipeType, + generate_id_from_label, +) +from r2r.base.abstractions.exception import R2RDocumentProcessingError +from r2r.base.pipes.base_pipe import AsyncPipe +from r2r.parsers.media.audio_parser import AudioParser +from r2r.parsers.media.docx_parser import DOCXParser +from r2r.parsers.media.img_parser import ImageParser +from r2r.parsers.media.movie_parser import MovieParser +from r2r.parsers.media.pdf_parser import PDFParser +from r2r.parsers.media.ppt_parser import PPTParser +from r2r.parsers.structured.csv_parser import CSVParser +from r2r.parsers.structured.json_parser import JSONParser +from r2r.parsers.structured.xlsx_parser import XLSXParser +from r2r.parsers.text.html_parser import HTMLParser +from r2r.parsers.text.md_parser import MDParser +from r2r.parsers.text.text_parser import TextParser + +logger = logging.getLogger(__name__) + + +class ParsingPipe(AsyncPipe): + """ + Processes incoming documents into plaintext based on their data type. + Supports TXT, JSON, HTML, and PDF formats. + """ + + class Input(AsyncPipe.Input): + message: AsyncGenerator[Document, None] + + AVAILABLE_PARSERS = { + DocumentType.CSV: CSVParser, + DocumentType.DOCX: DOCXParser, + DocumentType.HTML: HTMLParser, + DocumentType.JSON: JSONParser, + DocumentType.MD: MDParser, + DocumentType.PDF: PDFParser, + DocumentType.PPTX: PPTParser, + DocumentType.TXT: TextParser, + DocumentType.XLSX: XLSXParser, + DocumentType.GIF: ImageParser, + DocumentType.JPEG: ImageParser, + DocumentType.JPG: ImageParser, + DocumentType.PNG: ImageParser, + DocumentType.SVG: ImageParser, + DocumentType.MP3: AudioParser, + DocumentType.MP4: MovieParser, + } + + IMAGE_TYPES = { + DocumentType.GIF, + DocumentType.JPG, + DocumentType.JPEG, + DocumentType.PNG, + DocumentType.SVG, + } + + def __init__( + self, + excluded_parsers: list[DocumentType], + override_parsers: Optional[dict[DocumentType, AsyncParser]] = None, + pipe_logger: Optional[KVLoggingSingleton] = None, + type: PipeType = PipeType.INGESTOR, + config: Optional[AsyncPipe.PipeConfig] = None, + *args, + **kwargs, + ): + super().__init__( + pipe_logger=pipe_logger, + type=type, + config=config + or AsyncPipe.PipeConfig(name="default_document_parsing_pipe"), + *args, + **kwargs, + ) + + self.parsers = {} + + if not override_parsers: + override_parsers = {} + + # Apply overrides if specified + for doc_type, parser in override_parsers.items(): + self.parsers[doc_type] = parser + + for doc_type, parser_info in self.AVAILABLE_PARSERS.items(): + if ( + doc_type not in excluded_parsers + and doc_type not in self.parsers + ): + self.parsers[doc_type] = parser_info() + + @property + def supported_types(self) -> list[str]: + """ + Lists the data types supported by the pipe. + """ + return [entry_type for entry_type in DocumentType] + + async def _parse( + self, + document: Document, + run_id: uuid.UUID, + version: str, + ) -> AsyncGenerator[Union[R2RDocumentProcessingError, Extraction], None]: + if document.type not in self.parsers: + yield R2RDocumentProcessingError( + document_id=document.id, + error_message=f"Parser for {document.type} not found in `ParsingPipe`.", + ) + return + parser = self.parsers[document.type] + texts = parser.ingest(document.data) + extraction_type = ExtractionType.TXT + t0 = time.time() + if document.type in self.IMAGE_TYPES: + extraction_type = ExtractionType.IMG + document.metadata["image_type"] = document.type.value + # SAVE IMAGE DATA + # try: + # import base64 + # sanitized_data = base64.b64encode(document.data).decode('utf-8') + # except Exception as e: + # sanitized_data = document.data + + # document.metadata["image_data"] = sanitized_data + elif document.type == DocumentType.MP4: + extraction_type = ExtractionType.MOV + document.metadata["audio_type"] = document.type.value + + iteration = 0 + async for text in texts: + extraction_id = generate_id_from_label( + f"{document.id}-{iteration}-{version}" + ) + document.metadata["version"] = version + extraction = Extraction( + id=extraction_id, + data=text, + metadata=document.metadata, + document_id=document.id, + type=extraction_type, + ) + yield extraction + # TODO - Add settings to enable extraction logging + # extraction_dict = extraction.dict() + # await self.enqueue_log( + # run_id=run_id, + # key="extraction", + # value=json.dumps( + # { + # "data": extraction_dict["data"], + # "document_id": str(extraction_dict["document_id"]), + # "extraction_id": str(extraction_dict["id"]), + # } + # ), + # ) + iteration += 1 + logger.debug( + f"Parsed document with id={document.id}, title={document.metadata.get('title', None)}, user_id={document.metadata.get('user_id', None)}, metadata={document.metadata} into {iteration} extractions in t={time.time() - t0:.2f} seconds." + ) + + async def _run_logic( + self, + input: Input, + state: AsyncState, + run_id: uuid.UUID, + versions: Optional[list[str]] = None, + *args, + **kwargs, + ) -> AsyncGenerator[Extraction, None]: + parse_tasks = [] + + iteration = 0 + async for document in input.message: + version = versions[iteration] if versions else "v0" + iteration += 1 + parse_tasks.append( + self._handle_parse_task(document, version, run_id) + ) + + # Await all tasks and yield results concurrently + for parse_task in asyncio.as_completed(parse_tasks): + for extraction in await parse_task: + yield extraction + + async def _handle_parse_task( + self, document: Document, version: str, run_id: uuid.UUID + ) -> AsyncGenerator[Extraction, None]: + extractions = [] + async for extraction in self._parse(document, run_id, version): + extractions.append(extraction) + return extractions diff --git a/R2R/r2r/pipes/ingestion/vector_storage_pipe.py b/R2R/r2r/pipes/ingestion/vector_storage_pipe.py new file mode 100755 index 00000000..9564fd22 --- /dev/null +++ b/R2R/r2r/pipes/ingestion/vector_storage_pipe.py @@ -0,0 +1,128 @@ +import asyncio +import logging +import uuid +from typing import Any, AsyncGenerator, Optional, Tuple, Union + +from r2r.base import ( + AsyncState, + KVLoggingSingleton, + PipeType, + VectorDBProvider, + VectorEntry, +) +from r2r.base.pipes.base_pipe import AsyncPipe + +from ...base.abstractions.exception import R2RDocumentProcessingError + +logger = logging.getLogger(__name__) + + +class VectorStoragePipe(AsyncPipe): + class Input(AsyncPipe.Input): + message: AsyncGenerator[ + Union[R2RDocumentProcessingError, VectorEntry], None + ] + do_upsert: bool = True + + def __init__( + self, + vector_db_provider: VectorDBProvider, + storage_batch_size: int = 128, + pipe_logger: Optional[KVLoggingSingleton] = None, + type: PipeType = PipeType.INGESTOR, + config: Optional[AsyncPipe.PipeConfig] = None, + *args, + **kwargs, + ): + """ + Initializes the async vector storage pipe with necessary components and configurations. + """ + super().__init__( + pipe_logger=pipe_logger, + type=type, + config=config, + *args, + **kwargs, + ) + self.vector_db_provider = vector_db_provider + self.storage_batch_size = storage_batch_size + + async def store( + self, + vector_entries: list[VectorEntry], + do_upsert: bool = True, + ) -> None: + """ + Stores a batch of vector entries in the database. + """ + + try: + if do_upsert: + self.vector_db_provider.upsert_entries(vector_entries) + else: + self.vector_db_provider.copy_entries(vector_entries) + except Exception as e: + error_message = ( + f"Failed to store vector entries in the database: {e}" + ) + logger.error(error_message) + raise ValueError(error_message) + + async def _run_logic( + self, + input: Input, + state: AsyncState, + run_id: uuid.UUID, + *args: Any, + **kwargs: Any, + ) -> AsyncGenerator[ + Tuple[uuid.UUID, Union[str, R2RDocumentProcessingError]], None + ]: + """ + Executes the async vector storage pipe: storing embeddings in the vector database. + """ + batch_tasks = [] + vector_batch = [] + document_counts = {} + i = 0 + async for msg in input.message: + i += 1 + if isinstance(msg, R2RDocumentProcessingError): + yield (msg.document_id, msg) + continue + + document_id = msg.metadata.get("document_id", None) + if not document_id: + raise ValueError("Document ID not found in the metadata.") + if document_id not in document_counts: + document_counts[document_id] = 1 + else: + document_counts[document_id] += 1 + + vector_batch.append(msg) + if len(vector_batch) >= self.storage_batch_size: + # Schedule the storage task + batch_tasks.append( + asyncio.create_task( + self.store(vector_batch.copy(), input.do_upsert), + name=f"vector-store-{self.config.name}", + ) + ) + vector_batch.clear() + + if vector_batch: # Process any remaining vectors + batch_tasks.append( + asyncio.create_task( + self.store(vector_batch.copy(), input.do_upsert), + name=f"vector-store-{self.config.name}", + ) + ) + + # Wait for all storage tasks to complete + await asyncio.gather(*batch_tasks) + + for document_id, count in document_counts.items(): + yield ( + document_id, + f"Processed {count} vectors for document {document_id}.", + ) diff --git a/R2R/r2r/pipes/other/eval_pipe.py b/R2R/r2r/pipes/other/eval_pipe.py new file mode 100755 index 00000000..b1c60343 --- /dev/null +++ b/R2R/r2r/pipes/other/eval_pipe.py @@ -0,0 +1,54 @@ +import logging +import uuid +from typing import Any, AsyncGenerator, Optional + +from pydantic import BaseModel + +from r2r import AsyncState, EvalProvider, LLMChatCompletion, PipeType +from r2r.base.abstractions.llm import GenerationConfig +from r2r.base.pipes.base_pipe import AsyncPipe + +logger = logging.getLogger(__name__) + + +class EvalPipe(AsyncPipe): + class EvalPayload(BaseModel): + query: str + context: str + completion: str + + class Input(AsyncPipe.Input): + message: AsyncGenerator["EvalPipe.EvalPayload", None] + + def __init__( + self, + eval_provider: EvalProvider, + type: PipeType = PipeType.EVAL, + config: Optional[AsyncPipe.PipeConfig] = None, + *args, + **kwargs, + ): + self.eval_provider = eval_provider + super().__init__( + type=type, + config=config or AsyncPipe.PipeConfig(name="default_eval_pipe"), + *args, + **kwargs, + ) + + async def _run_logic( + self, + input: Input, + state: AsyncState, + run_id: uuid.UUID, + eval_generation_config: GenerationConfig, + *args: Any, + **kwargs: Any, + ) -> AsyncGenerator[LLMChatCompletion, None]: + async for item in input.message: + yield self.eval_provider.evaluate( + item.query, + item.context, + item.completion, + eval_generation_config, + ) diff --git a/R2R/r2r/pipes/other/web_search_pipe.py b/R2R/r2r/pipes/other/web_search_pipe.py new file mode 100755 index 00000000..92e3feee --- /dev/null +++ b/R2R/r2r/pipes/other/web_search_pipe.py @@ -0,0 +1,105 @@ +import json +import logging +import uuid +from typing import Any, AsyncGenerator, Optional + +from r2r.base import ( + AsyncPipe, + AsyncState, + PipeType, + VectorSearchResult, + generate_id_from_label, +) +from r2r.integrations import SerperClient + +from ..abstractions.search_pipe import SearchPipe + +logger = logging.getLogger(__name__) + + +class WebSearchPipe(SearchPipe): + def __init__( + self, + serper_client: SerperClient, + type: PipeType = PipeType.SEARCH, + config: Optional[SearchPipe.SearchConfig] = None, + *args, + **kwargs, + ): + super().__init__( + type=type, + config=config or SearchPipe.SearchConfig(), + *args, + **kwargs, + ) + self.serper_client = serper_client + + async def search( + self, + message: str, + run_id: uuid.UUID, + *args: Any, + **kwargs: Any, + ) -> AsyncGenerator[VectorSearchResult, None]: + search_limit_override = kwargs.get("search_limit", None) + await self.enqueue_log( + run_id=run_id, key="search_query", value=message + ) + # TODO - Make more general in the future by creating a SearchProvider interface + results = self.serper_client.get_raw( + query=message, + limit=search_limit_override or self.config.search_limit, + ) + + search_results = [] + for result in results: + if result.get("snippet") is None: + continue + result["text"] = result.pop("snippet") + search_result = VectorSearchResult( + id=generate_id_from_label(str(result)), + score=result.get( + "score", 0 + ), # TODO - Consider dynamically generating scores based on similarity + metadata=result, + ) + search_results.append(search_result) + yield search_result + + await self.enqueue_log( + run_id=run_id, + key="search_results", + value=json.dumps([ele.json() for ele in search_results]), + ) + + async def _run_logic( + self, + input: AsyncPipe.Input, + state: AsyncState, + run_id: uuid.UUID, + *args: Any, + **kwargs, + ) -> AsyncGenerator[VectorSearchResult, None]: + search_queries = [] + search_results = [] + async for search_request in input.message: + search_queries.append(search_request) + async for result in self.search( + message=search_request, run_id=run_id, *args, **kwargs + ): + search_results.append(result) + yield result + + await state.update( + self.config.name, {"output": {"search_results": search_results}} + ) + + await state.update( + self.config.name, + { + "output": { + "search_queries": search_queries, + "search_results": search_results, + } + }, + ) diff --git a/R2R/r2r/pipes/retrieval/kg_agent_search_pipe.py b/R2R/r2r/pipes/retrieval/kg_agent_search_pipe.py new file mode 100755 index 00000000..60935265 --- /dev/null +++ b/R2R/r2r/pipes/retrieval/kg_agent_search_pipe.py @@ -0,0 +1,103 @@ +import logging +import uuid +from typing import Any, Optional + +from r2r.base import ( + AsyncState, + KGProvider, + KGSearchSettings, + KVLoggingSingleton, + LLMProvider, + PipeType, + PromptProvider, +) + +from ..abstractions.generator_pipe import GeneratorPipe + +logger = logging.getLogger(__name__) + + +class KGAgentSearchPipe(GeneratorPipe): + """ + Embeds and stores documents using a specified embedding model and database. + """ + + def __init__( + self, + kg_provider: KGProvider, + llm_provider: LLMProvider, + prompt_provider: PromptProvider, + pipe_logger: Optional[KVLoggingSingleton] = None, + type: PipeType = PipeType.INGESTOR, + config: Optional[GeneratorPipe.PipeConfig] = None, + *args, + **kwargs, + ): + """ + Initializes the embedding pipe with necessary components and configurations. + """ + super().__init__( + llm_provider=llm_provider, + prompt_provider=prompt_provider, + type=type, + config=config + or GeneratorPipe.Config( + name="kg_rag_pipe", task_prompt="kg_agent" + ), + pipe_logger=pipe_logger, + *args, + **kwargs, + ) + self.kg_provider = kg_provider + self.llm_provider = llm_provider + self.prompt_provider = prompt_provider + self.pipe_run_info = None + + async def _run_logic( + self, + input: GeneratorPipe.Input, + state: AsyncState, + run_id: uuid.UUID, + kg_search_settings: KGSearchSettings, + *args: Any, + **kwargs: Any, + ): + async for message in input.message: + # TODO - Remove hard code + formatted_prompt = self.prompt_provider.get_prompt( + "kg_agent", {"input": message} + ) + messages = self._get_message_payload(formatted_prompt) + + result = await self.llm_provider.aget_completion( + messages=messages, + generation_config=kg_search_settings.agent_generation_config, + ) + + extraction = result.choices[0].message.content + query = extraction.split("```cypher")[1].split("```")[0] + result = self.kg_provider.structured_query(query) + yield (query, result) + + await self.enqueue_log( + run_id=run_id, + key="kg_agent_response", + value=extraction, + ) + + await self.enqueue_log( + run_id=run_id, + key="kg_agent_execution_result", + value=result, + ) + + def _get_message_payload(self, message: str) -> dict: + return [ + { + "role": "system", + "content": self.prompt_provider.get_prompt( + self.config.system_prompt, + ), + }, + {"role": "user", "content": message}, + ] diff --git a/R2R/r2r/pipes/retrieval/multi_search.py b/R2R/r2r/pipes/retrieval/multi_search.py new file mode 100755 index 00000000..6da2c34b --- /dev/null +++ b/R2R/r2r/pipes/retrieval/multi_search.py @@ -0,0 +1,79 @@ +import uuid +from copy import copy +from typing import Any, AsyncGenerator, Optional + +from r2r.base.abstractions.llm import GenerationConfig +from r2r.base.abstractions.search import VectorSearchResult +from r2r.base.pipes.base_pipe import AsyncPipe + +from ..abstractions.search_pipe import SearchPipe +from .query_transform_pipe import QueryTransformPipe + + +class MultiSearchPipe(AsyncPipe): + class PipeConfig(AsyncPipe.PipeConfig): + name: str = "multi_search_pipe" + + def __init__( + self, + query_transform_pipe: QueryTransformPipe, + inner_search_pipe: SearchPipe, + config: Optional[PipeConfig] = None, + *args, + **kwargs, + ): + self.query_transform_pipe = query_transform_pipe + self.vector_search_pipe = inner_search_pipe + if ( + not query_transform_pipe.config.name + == inner_search_pipe.config.name + ): + raise ValueError( + "The query transform pipe and search pipe must have the same name." + ) + if config and not config.name == query_transform_pipe.config.name: + raise ValueError( + "The pipe config name must match the query transform pipe name." + ) + + super().__init__( + config=config + or MultiSearchPipe.PipeConfig( + name=query_transform_pipe.config.name + ), + *args, + **kwargs, + ) + + async def _run_logic( + self, + input: Any, + state: Any, + run_id: uuid.UUID, + query_transform_generation_config: Optional[GenerationConfig] = None, + *args: Any, + **kwargs: Any, + ) -> AsyncGenerator[VectorSearchResult, None]: + query_transform_generation_config = ( + query_transform_generation_config + or copy(kwargs.get("rag_generation_config", None)) + or GenerationConfig(model="gpt-4o") + ) + query_transform_generation_config.stream = False + + query_generator = await self.query_transform_pipe.run( + input, + state, + query_transform_generation_config=query_transform_generation_config, + num_query_xf_outputs=3, + *args, + **kwargs, + ) + + async for search_result in await self.vector_search_pipe.run( + self.vector_search_pipe.Input(message=query_generator), + state, + *args, + **kwargs, + ): + yield search_result diff --git a/R2R/r2r/pipes/retrieval/query_transform_pipe.py b/R2R/r2r/pipes/retrieval/query_transform_pipe.py new file mode 100755 index 00000000..99df6b5b --- /dev/null +++ b/R2R/r2r/pipes/retrieval/query_transform_pipe.py @@ -0,0 +1,101 @@ +import logging +import uuid +from typing import Any, AsyncGenerator, Optional + +from r2r.base import ( + AsyncPipe, + AsyncState, + LLMProvider, + PipeType, + PromptProvider, +) +from r2r.base.abstractions.llm import GenerationConfig + +from ..abstractions.generator_pipe import GeneratorPipe + +logger = logging.getLogger(__name__) + + +class QueryTransformPipe(GeneratorPipe): + class QueryTransformConfig(GeneratorPipe.PipeConfig): + name: str = "default_query_transform" + system_prompt: str = "default_system" + task_prompt: str = "hyde" + + class Input(GeneratorPipe.Input): + message: AsyncGenerator[str, None] + + def __init__( + self, + llm_provider: LLMProvider, + prompt_provider: PromptProvider, + type: PipeType = PipeType.TRANSFORM, + config: Optional[QueryTransformConfig] = None, + *args, + **kwargs, + ): + logger.info(f"Initalizing an `QueryTransformPipe` pipe.") + super().__init__( + llm_provider=llm_provider, + prompt_provider=prompt_provider, + type=type, + config=config or QueryTransformPipe.QueryTransformConfig(), + *args, + **kwargs, + ) + + async def _run_logic( + self, + input: AsyncPipe.Input, + state: AsyncState, + run_id: uuid.UUID, + query_transform_generation_config: GenerationConfig, + num_query_xf_outputs: int = 3, + *args: Any, + **kwargs: Any, + ) -> AsyncGenerator[str, None]: + async for query in input.message: + logger.info( + f"Transforming query: {query} into {num_query_xf_outputs} outputs with {self.config.task_prompt}." + ) + + query_transform_request = self._get_message_payload( + query, num_outputs=num_query_xf_outputs + ) + + response = await self.llm_provider.aget_completion( + messages=query_transform_request, + generation_config=query_transform_generation_config, + ) + content = self.llm_provider.extract_content(response) + outputs = content.split("\n") + outputs = [ + output.strip() for output in outputs if output.strip() != "" + ] + await state.update( + self.config.name, {"output": {"outputs": outputs}} + ) + + for output in outputs: + logger.info(f"Yielding transformed output: {output}") + yield output + + def _get_message_payload(self, input: str, num_outputs: int) -> dict: + return [ + { + "role": "system", + "content": self.prompt_provider.get_prompt( + self.config.system_prompt, + ), + }, + { + "role": "user", + "content": self.prompt_provider.get_prompt( + self.config.task_prompt, + inputs={ + "message": input, + "num_outputs": num_outputs, + }, + ), + }, + ] diff --git a/R2R/r2r/pipes/retrieval/search_rag_pipe.py b/R2R/r2r/pipes/retrieval/search_rag_pipe.py new file mode 100755 index 00000000..4d01d2df --- /dev/null +++ b/R2R/r2r/pipes/retrieval/search_rag_pipe.py @@ -0,0 +1,130 @@ +import logging +import uuid +from typing import Any, AsyncGenerator, Optional, Tuple + +from r2r.base import ( + AggregateSearchResult, + AsyncPipe, + AsyncState, + LLMProvider, + PipeType, + PromptProvider, +) +from r2r.base.abstractions.llm import GenerationConfig, RAGCompletion + +from ..abstractions.generator_pipe import GeneratorPipe + +logger = logging.getLogger(__name__) + + +class SearchRAGPipe(GeneratorPipe): + class Input(AsyncPipe.Input): + message: AsyncGenerator[Tuple[str, AggregateSearchResult], None] + + def __init__( + self, + llm_provider: LLMProvider, + prompt_provider: PromptProvider, + type: PipeType = PipeType.GENERATOR, + config: Optional[GeneratorPipe] = None, + *args, + **kwargs, + ): + super().__init__( + llm_provider=llm_provider, + prompt_provider=prompt_provider, + type=type, + config=config + or GeneratorPipe.Config( + name="default_rag_pipe", task_prompt="default_rag" + ), + *args, + **kwargs, + ) + + async def _run_logic( + self, + input: Input, + state: AsyncState, + run_id: uuid.UUID, + rag_generation_config: GenerationConfig, + *args: Any, + **kwargs: Any, + ) -> AsyncGenerator[RAGCompletion, None]: + context = "" + search_iteration = 1 + total_results = 0 + # must select a query if there are multiple + sel_query = None + async for query, search_results in input.message: + if search_iteration == 1: + sel_query = query + context_piece, total_results = await self._collect_context( + query, search_results, search_iteration, total_results + ) + context += context_piece + search_iteration += 1 + + messages = self._get_message_payload(sel_query, context) + + response = await self.llm_provider.aget_completion( + messages=messages, generation_config=rag_generation_config + ) + yield RAGCompletion(completion=response, search_results=search_results) + + await self.enqueue_log( + run_id=run_id, + key="llm_response", + value=response.choices[0].message.content, + ) + + def _get_message_payload(self, query: str, context: str) -> dict: + return [ + { + "role": "system", + "content": self.prompt_provider.get_prompt( + self.config.system_prompt, + ), + }, + { + "role": "user", + "content": self.prompt_provider.get_prompt( + self.config.task_prompt, + inputs={ + "query": query, + "context": context, + }, + ), + }, + ] + + async def _collect_context( + self, + query: str, + results: AggregateSearchResult, + iteration: int, + total_results: int, + ) -> Tuple[str, int]: + context = f"Query:\n{query}\n\n" + if results.vector_search_results: + context += f"Vector Search Results({iteration}):\n" + it = total_results + 1 + for result in results.vector_search_results: + context += f"[{it}]: {result.metadata['text']}\n\n" + it += 1 + total_results = ( + it - 1 + ) # Update total_results based on the last index used + if results.kg_search_results: + context += f"Knowledge Graph ({iteration}):\n" + it = total_results + 1 + for query, search_results in results.kg_search_results: # [1]: + context += f"Query: {query}\n\n" + context += f"Results:\n" + for search_result in search_results: + context += f"[{it}]: {search_result}\n\n" + it += 1 + total_results = ( + it - 1 + ) # Update total_results based on the last index used + return context, total_results diff --git a/R2R/r2r/pipes/retrieval/streaming_rag_pipe.py b/R2R/r2r/pipes/retrieval/streaming_rag_pipe.py new file mode 100755 index 00000000..b01f6445 --- /dev/null +++ b/R2R/r2r/pipes/retrieval/streaming_rag_pipe.py @@ -0,0 +1,131 @@ +import json +import logging +import uuid +from typing import Any, AsyncGenerator, Generator, Optional + +from r2r.base import ( + AsyncState, + LLMChatCompletionChunk, + LLMProvider, + PipeType, + PromptProvider, +) +from r2r.base.abstractions.llm import GenerationConfig + +from ..abstractions.generator_pipe import GeneratorPipe +from .search_rag_pipe import SearchRAGPipe + +logger = logging.getLogger(__name__) + + +class StreamingSearchRAGPipe(SearchRAGPipe): + SEARCH_STREAM_MARKER = "search" + COMPLETION_STREAM_MARKER = "completion" + + def __init__( + self, + llm_provider: LLMProvider, + prompt_provider: PromptProvider, + type: PipeType = PipeType.GENERATOR, + config: Optional[GeneratorPipe] = None, + *args, + **kwargs, + ): + super().__init__( + llm_provider=llm_provider, + prompt_provider=prompt_provider, + type=type, + config=config + or GeneratorPipe.Config( + name="default_streaming_rag_pipe", task_prompt="default_rag" + ), + *args, + **kwargs, + ) + + async def _run_logic( + self, + input: SearchRAGPipe.Input, + state: AsyncState, + run_id: uuid.UUID, + rag_generation_config: GenerationConfig, + *args: Any, + **kwargs: Any, + ) -> AsyncGenerator[str, None]: + iteration = 0 + context = "" + # dump the search results and construct the context + async for query, search_results in input.message: + yield f"<{self.SEARCH_STREAM_MARKER}>" + if search_results.vector_search_results: + context += "Vector Search Results:\n" + for result in search_results.vector_search_results: + if iteration >= 1: + yield "," + yield json.dumps(result.json()) + context += ( + f"{iteration + 1}:\n{result.metadata['text']}\n\n" + ) + iteration += 1 + + # if search_results.kg_search_results: + # for result in search_results.kg_search_results: + # if iteration >= 1: + # yield "," + # yield json.dumps(result.json()) + # context += f"Result {iteration+1}:\n{result.metadata['text']}\n\n" + # iteration += 1 + + yield f"</{self.SEARCH_STREAM_MARKER}>" + + messages = self._get_message_payload(query, context) + yield f"<{self.COMPLETION_STREAM_MARKER}>" + response = "" + for chunk in self.llm_provider.get_completion_stream( + messages=messages, generation_config=rag_generation_config + ): + chunk = StreamingSearchRAGPipe._process_chunk(chunk) + response += chunk + yield chunk + + yield f"</{self.COMPLETION_STREAM_MARKER}>" + + await self.enqueue_log( + run_id=run_id, + key="llm_response", + value=response, + ) + + async def _yield_chunks( + self, + start_marker: str, + chunks: Generator[str, None, None], + end_marker: str, + ) -> str: + yield start_marker + for chunk in chunks: + yield chunk + yield end_marker + + def _get_message_payload( + self, query: str, context: str + ) -> list[dict[str, str]]: + return [ + { + "role": "system", + "content": self.prompt_provider.get_prompt( + self.config.system_prompt + ), + }, + { + "role": "user", + "content": self.prompt_provider.get_prompt( + self.config.task_prompt, + inputs={"query": query, "context": context}, + ), + }, + ] + + @staticmethod + def _process_chunk(chunk: LLMChatCompletionChunk) -> str: + return chunk.choices[0].delta.content or "" diff --git a/R2R/r2r/pipes/retrieval/vector_search_pipe.py b/R2R/r2r/pipes/retrieval/vector_search_pipe.py new file mode 100755 index 00000000..742de16b --- /dev/null +++ b/R2R/r2r/pipes/retrieval/vector_search_pipe.py @@ -0,0 +1,123 @@ +import json +import logging +import uuid +from typing import Any, AsyncGenerator, Optional + +from r2r.base import ( + AsyncPipe, + AsyncState, + EmbeddingProvider, + PipeType, + VectorDBProvider, + VectorSearchResult, + VectorSearchSettings, +) + +from ..abstractions.search_pipe import SearchPipe + +logger = logging.getLogger(__name__) + + +class VectorSearchPipe(SearchPipe): + def __init__( + self, + vector_db_provider: VectorDBProvider, + embedding_provider: EmbeddingProvider, + type: PipeType = PipeType.SEARCH, + config: Optional[SearchPipe.SearchConfig] = None, + *args, + **kwargs, + ): + super().__init__( + type=type, + config=config or SearchPipe.SearchConfig(), + *args, + **kwargs, + ) + self.embedding_provider = embedding_provider + self.vector_db_provider = vector_db_provider + + async def search( + self, + message: str, + run_id: uuid.UUID, + vector_search_settings: VectorSearchSettings, + *args: Any, + **kwargs: Any, + ) -> AsyncGenerator[VectorSearchResult, None]: + await self.enqueue_log( + run_id=run_id, key="search_query", value=message + ) + search_filters = ( + vector_search_settings.search_filters or self.config.search_filters + ) + search_limit = ( + vector_search_settings.search_limit or self.config.search_limit + ) + results = [] + query_vector = self.embedding_provider.get_embedding( + message, + ) + search_results = ( + self.vector_db_provider.hybrid_search( + query_vector=query_vector, + query_text=message, + filters=search_filters, + limit=search_limit, + ) + if vector_search_settings.do_hybrid_search + else self.vector_db_provider.search( + query_vector=query_vector, + filters=search_filters, + limit=search_limit, + ) + ) + reranked_results = self.embedding_provider.rerank( + query=message, results=search_results, limit=search_limit + ) + for result in reranked_results: + result.metadata["associatedQuery"] = message + results.append(result) + yield result + await self.enqueue_log( + run_id=run_id, + key="search_results", + value=json.dumps([ele.json() for ele in results]), + ) + + async def _run_logic( + self, + input: AsyncPipe.Input, + state: AsyncState, + run_id: uuid.UUID, + vector_search_settings: VectorSearchSettings = VectorSearchSettings(), + *args: Any, + **kwargs: Any, + ) -> AsyncGenerator[VectorSearchResult, None]: + search_queries = [] + search_results = [] + async for search_request in input.message: + search_queries.append(search_request) + async for result in self.search( + message=search_request, + run_id=run_id, + vector_search_settings=vector_search_settings, + *args, + **kwargs, + ): + search_results.append(result) + yield result + + await state.update( + self.config.name, {"output": {"search_results": search_results}} + ) + + await state.update( + self.config.name, + { + "output": { + "search_queries": search_queries, + "search_results": search_results, + } + }, + ) diff --git a/R2R/r2r/prompts/__init__.py b/R2R/r2r/prompts/__init__.py new file mode 100755 index 00000000..88ed0658 --- /dev/null +++ b/R2R/r2r/prompts/__init__.py @@ -0,0 +1,3 @@ +from .local.r2r_prompt_provider import R2RPromptProvider + +__all__ = ["R2RPromptProvider"] diff --git a/R2R/r2r/prompts/local/__init__.py b/R2R/r2r/prompts/local/__init__.py new file mode 100755 index 00000000..e69de29b --- /dev/null +++ b/R2R/r2r/prompts/local/__init__.py diff --git a/R2R/r2r/prompts/local/defaults.jsonl b/R2R/r2r/prompts/local/defaults.jsonl new file mode 100755 index 00000000..042136f6 --- /dev/null +++ b/R2R/r2r/prompts/local/defaults.jsonl @@ -0,0 +1,12 @@ +{"name": "default_system", "template": "You are a helpful assistant.", "input_types": {}} +{"name": "default_rag", "template": "## Task:\n\nAnswer the query given immediately below given the context which follows later. Use line item references to like [1], [2], ... refer to specifically numbered items in the provided context. Pay close attention to the title of each given source to ensure it is consistent with the query.\n\n### Query:\n{query}\n\n### Context:\n{context}\n\n### Query:\n{query}\n\nREMINDER - Use line item references to like [1], [2], ... refer to specifically numbered items in the provided context.\n## Response:\n", "input_types": {"query": "str", "context": "str"}} +{"name": "hyde", "template": "### Instruction:\n\nGiven the query that follows write a double newline separated list of {num_outputs} single paragraph distinct attempted answers to the given query. \nDO NOT generate any single answer which is likely to require information from multiple distinct documents, \nEACH single answer will be used to carry out a cosine similarity semantic search over distinct indexed documents, such as varied medical documents. \nFOR EXAMPLE if asked `how do the key themes of Great Gatsby compare with 1984`, the two attempted answers would be \n`The key themes of Great Gatsby are ... ANSWER_CONTINUED` and `The key themes of 1984 are ... ANSWER_CONTINUED`, where `ANSWER_CONTINUED` IS TO BE COMPLETED BY YOU in your response. \nHere is the original user query to be transformed into answers:\n\n### Query:\n{message}\n\n### Response:\n", "input_types": {"num_outputs": "int", "message": "str"}} +{"name": "rag_fusion_prompt", "template": "### Instruction:\n\nGiven the following query that follows to write a double newline separated list of up to {num_outputs} queries meant to help answer the original query. \nDO NOT generate any single query which is likely to require information from multiple distinct documents, \nEACH single query will be used to carry out a cosine similarity semantic search over distinct indexed documents, such as varied medical documents. \nFOR EXAMPLE if asked `how do the key themes of Great Gatsby compare with 1984`, the two queries would be \n`What are the key themes of Great Gatsby?` and `What are the key themes of 1984?`.\nHere is the original user query to be transformed into answers:\n\n### Query:\n{message}\n\n### Response:\n", "input_types": {"num_outputs": "int", "message": "str"}} +{"name": "rag_answer_eval", "template": "### Instruction:\n\nYou are given a `query`, related `context` and an associated `answer`. Your task is to sequentially score each sentence in the given answer as either 1 or 0, based on whether or not the given sentence is relevant to the given query and supported in full by the given context.\n### Example:\n#### Input:\n\nQuery:\nWhy does Alice prefer spending her mornings in the garden?\n\nContext:\nAlice loves to read books in her garden. She has a large collection of mystery novels. Every morning, she spends an hour reading while drinking her favorite tea. Her garden is filled with various flowers, and she especially loves the roses. On weekends, Alice's friend, Bob, often joins her for tea and they discuss the books they've read.\n\nAnswer:\nAlice enjoys her mornings in the garden because she loves to read there. She often listens to music while reading.\n####### Response:\n\n([1,0], '1/2')### Input:\nQuery:\n{query}\n\nContext:\n{context}\n\nAnswer:\n{answer}\n\nResponse:\n\n", "input_types": {"query": "str", "context": "str", "answer": "str"}} +{"name": "rag_context_eval", "template": "### Instruction:\n\nYou are given a `query` and an associated `context`. Your task is to sequentially score each sentence in the context as either 1 or 0, based on the relevancy to the given query. For instance, if the query is \"What is the capital of France?\" then the sentence \"The capital of France is Paris\" would receive a +1 value, whereas \"The french enjoy wine\" would receive a 0. Return your response as a tuple containing a list of 1s and 0s, where each value corresponds to the respective sentence in the context, and then the rational fraction of 1's to the total number of sentences (e.g. '1/4'). NOTE - do not include ANY extra text other than the requested tuple.\n\nQuery:\n{query}\n\nContext:\n{context}\n\n###Response\n\n", "input_types": {"query": "str", "context": "str"}} +{"name": "few_shot_ner_kg_extraction", "template": "### Instruction\nYou will shortly be asked to perform Named Entity Recognition (NER) and knowledge graph triplet extraction on the text that follows. NER involves identifying named entities in a text, and knowledge graph triplet extraction involves identifying relationships between these entities and other attributes in the text.\n\nA knowledge graph triplet contains the three following pieces of information:\n- `subject`: The main entity.\n- `predicate`: The relationship type.\n- `object`: The related entity.\n\nThey are represented below as `[subject]:<predicate>:[object]`.\n\n#### Process \n**Identify Named Entities**: Extract entities based on the given entity types, ensuring they appear in the order they are mentioned in the text.\n**Establish Triplets**: Form triplets using the provided predicates, again in the order they appear in the text.\n\nYour final response should follow this format:\n\n**Output:**\n```json\n{{\n \"entities_and_triples\": [\n \"[1], entity_type:entity_name\",\n \"[1] predicate [2]\",\n \"[1] predicate [3]\",\n \"[2], entity_type:entity_name\",\n ...\n ]\n}}\n```\n\n### Example:\n\n**Entity Types:**\nORGANIZATION\nCOMPANY\nCITY\nSTATE\nCOUNTRY\nOTHER\nPERSON\nYEAR\nMONTH\nDAY\nOTHER\nQUANTITY\nEVENT\n\n**Predicates:**\nFOUNDED_BY\nHEADQUARTERED_IN\nOPERATES_IN\nOWNED_BY\nACQUIRED_BY\nHAS_EMPLOYEE_COUNT\nGENERATED_REVENUE\nLISTED_ON\nINCORPORATED\nHAS_DIVISION\nALIAS\nANNOUNCED\nHAS_QUANTITY\nAS_OF\n\n**Input:**\nWalmart Inc. (formerly Wal-Mart Stores, Inc.) is an American multinational retail corporation that operates a chain of hypermarkets (also called supercenters), discount department stores, and grocery stores in the United States, headquartered in Bentonville, Arkansas.[10] The company was founded by brothers Sam and James \"Bud\" Walton in nearby Rogers, Arkansas in 1962 and incorporated under Delaware General Corporation Law on October 31, 1969. It also owns and operates Sam's Club retail warehouses.[11][12]\n\nAs of October 31, 2022, Walmart has 10,586 stores and clubs in 24 countries, operating under 46 different names.[2][3][4] The company operates under the name Walmart in the United States and Canada, as Walmart de M\u00e9xico y Centroam\u00e9rica in Mexico and Central America, and as Flipkart Wholesale in India.\n\n**Output:**\n```json\n{{\n \"entities_and_triples\": [\n \"[1], company:Walmart Inc.\",\n \"[2], company:Wal-Mart Stores, Inc.\",\n \"[1] ALIAS [2]\",\n \"[3], location:country:United States\",\n \"[1] OPERATES_IN [3]\",\n \"[4], location:city:Bentonville\",\n \"[1] HEADQUARTERED_IN [4]\",\n \"[5], location:state:Arkansas\",\n \"[1] HEADQUARTERED_IN [5]\",\n \"[6], person:Sam Walton\",\n \"[1] FOUNDED_BY [6]\",\n \"[7], person:James Walton\",\n \"[8], person:Bud Walton\",\n \"[7] ALIAS [8]\",\n \"[1] FOUNDED_BY [7]\",\n \"[9], location:city:Rogers\",\n \"[10], date:year:1962\",\n \"[11], event:incorporated under Delaware General Corporation Law\",\n \"[1] INCORPORATED [11]\",\n \"[12], date:day:October 31\",\n \"[1] INCORPORATED [12]\",\n \"[13], date:year:1969\",\n \"[1] INCORPORATED [13]\",\n \"[14], company:Sam's Club\",\n \"[1] INCORPORATED [14]\",\n \"[15], date:day:October 31, 2022\",\n \"[16], quantity:10,586 stores and clubs\",\n \"[16] AS_OF [15]\",\n \"[1] HAS_QUANTITY [16]\",\n \"[17], quantity:24 countries\",\n \"[18], quantity:46 different names\",\n \"[1] HAS_QUANTITY [18]\",\n \"[18], organization:company:Walmart de M\u00e9xico y Centroam\u00e9rica\",\n \"[1] ALIAS [18]\",\n \"[19], location:country:Mexico\",\n \"[1] OPERATES_IN [19]\",\n \"[20], location:region:Central America\",\n \"[1] OPERATES_IN [20]\",\n \"[21], organization:company:Flipkart Wholesale\",\n \"[1] ALIAS [21]\",\n \"[22], location:country:India\",\n \"[1] OPERATES_IN [22]\"\n ]\n}}\n```\n\n### Task:\nYour task is to perform Named Entity Recognition (NER) and knowledge graph triplet extraction on the text that follows below.\n\n**Input:**\n{input}\n\n**Output:**\n","input_types": {"input" : "str"}} +{"name": "few_shot_ner_kg_extraction_with_spec", "template": "### Instruction\nYou will shortly be asked to perform Named Entity Recognition (NER) and knowledge graph triplet extraction on the text that follows. NER involves identifying named entities in a text, and knowledge graph triplet extraction involves identifying relationships between these entities and other attributes in the text.\n\nA knowledge graph triplet contains the three following pieces of information:\n- `subject`: The main entity.\n- `predicate`: The relationship type.\n- `object`: The related entity.\n\nThey are represented below as `[subject]:<predicate>:[object]`.\n\n#### Process \n**Identify Named Entities**: Extract entities based on the given entity types, ensuring they appear in the order they are mentioned in the text.\n**Establish Triplets**: Form triplets using the provided predicates, again in the order they appear in the text.\n\nYour final response should follow this format:\n\n**Output:**\n```json\n{{\n \"entities_and_triples\": [\n \"[1], ENTITY_TYPE:ENTITY_NAME\",\n \"[1] PREDICATE [2]\",\n \"[1] PREDICATE [3]\",\n \"[2], ENTITY_TYPE:ENTITY_NAME\",\n ...\n ]\n}}\n```\n\n### Example:\n\n**Entity Types:**\nORGANIZATION\nCOMPANY\nCITY, STATE, COUNTRY, OTHER\nPERSON\nYEAR, MONTH, DAY, OTHER\nQUANTITY\nEVENT\n\n**Predicates:**\nFOUNDED_BY\nHEADQUARTERED_IN\nOPERATES_IN\nOWNED_BY\nACQUIRED_BY\nHAS_EMPLOYEE_COUNT\nGENERATED_REVENUE\nLISTED_ON\nINCORPORATED\nHAS_DIVISION\nALIAS\nANNOUNCED\nHAS_QUANTITY\nAS_OF\n\n**Input:**\nWalmart Inc. (formerly Wal-Mart Stores, Inc.) is an American multinational retail corporation that operates a chain of hypermarkets (also called supercenters), discount department stores, and grocery stores in the United States, headquartered in Bentonville, Arkansas.[10] The company was founded by brothers Sam and James \"Bud\" Walton in nearby Rogers, Arkansas in 1962 and incorporated under Delaware General Corporation Law on October 31, 1969. It also owns and operates Sam's Club retail warehouses.[11][12]\n\nAs of October 31, 2022, Walmart has 10,586 stores and clubs in 24 countries, operating under 46 different names.[2][3][4] The company operates under the name Walmart in the United States and Canada, as Walmart de M\u00e9xico y Centroam\u00e9rica in Mexico and Central America, and as Flipkart Wholesale in India.\n\n**Output:**\n```json\n{{\n \"entities_and_triples\": [\n \"[1], ORGANIZATION:COMPANY:Walmart Inc.\",\n \"[2], ORGANIZATION:COMPANY:Wal-Mart Stores, Inc.\",\n \"[1] ALIAS [2]\",\n \"[3], LOCATION:COUNTRY:United States\",\n \"[1] OPERATES_IN [3]\",\n \"[4], LOCATION:CITY:Bentonville\",\n \"[1] HEADQUARTERED_IN [4]\",\n \"[5], LOCATION:STATE:Arkansas\",\n \"[1] HEADQUARTERED_IN [5]\",\n \"[6], PERSON:Sam Walton\",\n \"[1] FOUNDED_BY [6]\",\n \"[7], PERSON:James Walton\",\n \"[8], PERSON:Bud Walton\",\n \"[7] ALIAS [8]\",\n \"[1] FOUNDED_BY [7]\",\n \"[9], LOCATION:CITY:Rogers\",\n \"[10], DATE:YEAR:1962\",\n \"[11], EVENT:Incorporated under Delaware General Corporation Law\",\n \"[1] INCORPORATED [11]\",\n \"[12], DATE:DAY:October 31\",\n \"[1] INCORPORATED [12]\",\n \"[13], DATE:YEAR:1969\",\n \"[1] INCORPORATED [13]\",\n \"[14], ORGANIZATION:COMPANY:Sam's Club\",\n \"[1] INCORPORATED [14]\",\n \"[15], DATE:DAY:October 31, 2022\",\n \"[16], QUANTITY:10,586 stores and clubs\",\n \"[16] AS_OF [15]\",\n \"[1] HAS_QUANTITY [16]\",\n \"[17], QUANTITY:24 countries\",\n \"[18], QUANTITY:46 different names\",\n \"[1] HAS_QUANTITY [18]\",\n \"[18], ORGANIZATION:COMPANY:Walmart de M\u00e9xico y Centroam\u00e9rica\",\n \"[1] ALIAS [18]\",\n \"[19], LOCATION:COUNTRY:Mexico\",\n \"[1] OPERATES_IN [19]\",\n \"[20], LOCATION:REGION:Central America\",\n \"[1] OPERATES_IN [20]\",\n \"[21], ORGANIZATION:COMPANY:Flipkart Wholesale\",\n \"[1] ALIAS [21]\",\n \"[22], LOCATION:COUNTRY:India\",\n \"[1] OPERATES_IN [22]\"\n ]\n}}\n```\n\n### Task:\nYour task is to perform Named Entity Recognition (NER) and knowledge graph triplet extraction on the text that follows below. Use the provided entities and predicates as shown\n\n**Entity Types:**\n{entity_types}\n\n**Predicates:**\n{relations}\n\n**Input:**\n{input}\n\n**Output:**\n", "input_types": {"entity_types": "str", "relations": "str", "input" : "str"}} +{"name": "zero_shot_ner_kg_extraction", "template": "Perform Named Entity Recognition (NER) and extract knowledge graph triplets from the text. NER identifies named entities of given entity types, and triple extraction identifies relationships between entities using specified predicates.\n\n**Entity Types**:\n\n[\"PERSON\", \"ORGANIZATION\", \"LOCATION\", \"DATE\", \"TIME\", \"MONEY\", \"PERCENTAGE\", \"PRODUCT\", \"EVENT\", \"LANGUAGE\", \"NATIONALITY\", \"RELIGION\", \"TITLE\", \"PROFESSION\", \"ANIMAL\", \"PLANT\", \"DISEASE\", \"MEDICATION\", \"CHEMICAL\", \"MATERIAL\", \"COLOR\", \"SHAPE\", \"MEASUREMENT\", \"WEATHER\", \"NATURAL_DISASTER\", \"AWARD\", \"LAW\", \"CRIME\", \"TECHNOLOGY\", \"SOFTWARE\", \"HARDWARE\", \"VEHICLE\", \"FOOD\", \"DRINK\", \"SPORT\", \"MUSIC_GENRE\", \"INSTRUMENT\", \"ARTWORK\", \"BOOK\", \"MOVIE\", \"TV_SHOW\", \"ACADEMIC_SUBJECT\", \"SCIENTIFIC_THEORY\", \"POLITICAL_PARTY\", \"CURRENCY\", \"STOCK_SYMBOL\", \"FILE_TYPE\", \"PROGRAMMING_LANGUAGE\", \"MEDICAL_PROCEDURE\", \"CELESTIAL_BODY\"]\n\n**Predicates**\n[\"IS_EMPLOYED_BY\", \"LIVES_IN\", \"BORN_IN\", \"DIED_IN\", \"FOUNDED\", \"INVENTED\", \"WROTE\", \"DIRECTED\", \"STARRED_IN\", \"MARRIED_TO\", \"PARENT_OF\", \"CHILD_OF\", \"SIBLING_OF\", \"MEMBER_OF\", \"OWNER_OF\", \"CEO_OF\", \"STUDIED_AT\", \"GRADUATED_FROM\", \"TEACHES_AT\", \"SPEAKS\", \"CAPITAL_OF\", \"LOCATED_IN\", \"PART_OF\", \"CONTAINS\", \"PRODUCES\", \"CONSUMES\", \"EXPORTS\", \"IMPORTS\", \"ALLIES_WITH\", \"CONFLICTS_WITH\", \"PREDECESSOR_OF\", \"SUCCESSOR_OF\", \"DISCOVERED\", \"DEVELOPED\", \"FUNDED_BY\", \"INVESTED_IN\", \"COLLABORATES_WITH\", \"COMPETES_WITH\", \"ACQUIRED\", \"MERGED_WITH\", \"SPECIALIZES_IN\", \"PERFORMS\", \"AFFECTS\", \"CAUSES\", \"PREVENTS\", \"TREATS\", \"SYMPTOMS_OF\", \"BELONGS_TO\", \"DERIVED_FROM\", \"MEASURED_IN\"]\n\n**Text**{input}", "input_types": {"input" : "str"}} +{"name": "zero_shot_ner_kg_extraction_with_spec", "template": "Perform Named Entity Recognition (NER) and extract knowledge graph triplets from the text. NER identifies named entities of given entity types, and triple extraction identifies relationships between entities using specified predicates.\n\n**Entity Types**\n{entity_types}\n\n**Predicates**\n{relations}\n\n**Text**{input}", "input_types": {"entity_types": "str", "relations": "str", "input" : "str"}} +{"name": "kg_agent", "template": "**System Message:**\n\nYou are an AI assistant capable of generating Cypher queries to interact with a Neo4j knowledge graph. The knowledge graph contains information about organizations, people, locations, and their relationships, such as founders of companies, locations of companies, and products associated with companies.\n\n**Instructions:**\n\nWhen a user asks a question, you will generate a Cypher query to retrieve the relevant information from the Neo4j knowledge graph. Later, you will be given a schema which specifies the available relationships to help you construct the query. First, review the examples provided to understand the expected format of the queries.\n\n### Example(s) - User Questions and Cypher Queries for an Academic Knowledge Graph\n\n**User Question:**\n\"List all courses available in the computer science department.\"\n\n**Generated Cypher Query:**\n```cypher\nMATCH (c:COURSE)-[:OFFERED_BY]->(d:DEPARTMENT)\nWHERE d.name CONTAINS 'Computer Science'\nRETURN c.id AS Course, d.name AS Department\nORDER BY c.id;\n```\n\n**User Question:**\n\"Retrieve all courses taught by professors who have published research on natural language processing.\"\n\n**Generated Cypher Query:**\n```cypher\nMATCH (pr:PERSON)-[:PUBLISHED]->(p:PAPER)\nMATCH (p)-[:TOPIC]->(t:TOPIC)\nWHERE t.name CONTAINS 'Natural Language Processing'\nMATCH (c:COURSE)-[:TAUGHT_BY]->(pr)\nRETURN DISTINCT c.id AS Course, pr.name AS Professor, t.name AS Topic\nORDER BY c.id;\n```\n\n\n### Example(s) - User Questions and Cypher Queries for an Historical Events and Figures\n\n**User Question:**\n\"List all battles that occurred in the 19th century and the generals who participated in them.\"\n\n**Generated Cypher Query:**\n```cypher\nMATCH (b:EVENT)-[:HAPPENED_AT]->(d:DATE)\nWHERE d.year >= 1800 AND d.year < 1900 AND b.type CONTAINS 'Battle'\nMATCH (g:PERSON)-[:PARTICIPATED_IN]->(b)\nRETURN b.name AS Battle, d.year AS Year, g.name AS General\nORDER BY d.year, b.name, g.name;\n```\n\n**User Question:**\n\"Find all treaties signed in Paris and the countries involved.\"\n\n\n**Generated Cypher Query:**\n```cypher\nMATCH (t:EVENT)-[:HAPPENED_AT]->(l:LOCATION)\nWHERE l.name CONTAINS 'Paris' AND t.type CONTAINS 'Treaty'\nMATCH (c:ORGANIZATION)-[:SIGNED]->(t)\nRETURN t.name AS Treaty, l.name AS Location, c.name AS Country\nORDER BY t.name, c.name;\n```\n\n\nNow, you will be provided with a schema for the entities and relationships in the Neo4j knowledge graph. Use this schema to construct Cypher queries based on user questions.\n\n- **Entities:**\n - `ORGANIZATION` (e.g.: `COMPANY`, `SCHOOL`, `NON-PROFIT`, `OTHER`)\n - `COMPANY`\n - `LOCATION` (e.g.: `CITY`, `STATE`, `COUNTRY`, `OTHER`)\n - `DATE` (e.g.: `YEAR`, `MONTH`, `DAY`, `BATCH`, `OTHER`)\n - `QUANTITY`\n - `EVENT` (e.g.: `INCORPORATION`, `FUNDING_ROUND`, `ACQUISITION`, `LAUNCH`, `OTHER`)\n\n- **Relationships:**\n - `FOUNDED_BY`\n - `HEADQUARTERED_IN`\n - `OPERATES_IN`\n - `RAISED`\n - `ACQUIRED_BY`\n - `HAS_EMPLOYEE_COUNT`\n - `GENERATED_REVENUE`\n - `LISTED_ON`\n - `INCORPORATED`\n - `HAS_DIVISION`\n - `ANNOUNCED`\n - `HAS_QUANTITY`\n\nUse the referenced examples and schema to help you construct an appropriate Cypher query based on the following question:\n\n**User Question:**\n{input}\n\n**Generated Cypher Query:**\n", "input_types": {"input" : "str"}} +{"name": "kg_agent_with_spec", "template": "**System Message:**\n\nYou are an AI assistant capable of generating Cypher queries to interact with a Neo4j knowledge graph. The knowledge graph contains information about organizations, people, locations, and their relationships, such as founders of companies, locations of companies, and products associated with companies.\n\n**Instructions:**\n\nWhen a user asks a question, you will generate a Cypher query to retrieve the relevant information from the Neo4j knowledge graph. Later, you will be given a schema which specifies the available relationships to help you construct the query. First, review the examples provided to understand the expected format of the queries.\n\n### Example(s) - User Questions and Cypher Queries for an Academic Knowledge Graph\n\n**User Question:**\n\"List all courses available in the computer science department.\"\n\n**Generated Cypher Query:**\n```cypher\nMATCH (c:COURSE)-[:OFFERED_BY]->(d:DEPARTMENT)\nWHERE d.name CONTAINS 'Computer Science'\nRETURN c.id AS Course, d.name AS Department\nORDER BY c.id;\n```\n\n**User Question:**\n\"Retrieve all courses taught by professors who have published research on natural language processing.\"\n\n**Generated Cypher Query:**\n```cypher\nMATCH (pr:PERSON)-[:PUBLISHED]->(p:PAPER)\nMATCH (p)-[:TOPIC]->(t:TOPIC)\nWHERE t.name CONTAINS 'Natural Language Processing'\nMATCH (c:COURSE)-[:TAUGHT_BY]->(pr)\nRETURN DISTINCT c.id AS Course, pr.name AS Professor, t.name AS Topic\nORDER BY c.id;\n```\n\n\n### Example(s) - User Questions and Cypher Queries for an Historical Events and Figures\n\n**User Question:**\n\"List all battles that occurred in the 19th century and the generals who participated in them.\"\n\n**Generated Cypher Query:**\n```cypher\nMATCH (b:EVENT)-[:HAPPENED_AT]->(d:DATE)\nWHERE d.year >= 1800 AND d.year < 1900 AND b.type CONTAINS 'Battle'\nMATCH (g:PERSON)-[:PARTICIPATED_IN]->(b)\nRETURN b.name AS Battle, d.year AS Year, g.name AS General\nORDER BY d.year, b.name, g.name;\n```\n\n**User Question:**\n\"Find all treaties signed in Paris and the countries involved.\"\n\n\n**Generated Cypher Query:**\n```cypher\nMATCH (t:EVENT)-[:HAPPENED_AT]->(l:LOCATION)\nWHERE l.name CONTAINS 'Paris' AND t.type CONTAINS 'Treaty'\nMATCH (c:ORGANIZATION)-[:SIGNED]->(t)\nRETURN t.name AS Treaty, l.name AS Location, c.name AS Country\nORDER BY t.name, c.name;\n```\n\n\nNow, you will be provided with a schema for the entities and relationships in the Neo4j knowledge graph. Use this schema to construct Cypher queries based on user questions.\n\n- **Entities:**\n{entity_types}\n\n- **Relationships:**\n{relations}\n\nUse the referenced examples and schema to help you construct an appropriate Cypher query based on the following question:\n\n**User Question:**\n{input}\n\n**Generated Cypher Query:**\n", "input_types": {"entity_types": "str", "relations": "str", "input" : "str"}}
\ No newline at end of file diff --git a/R2R/r2r/prompts/local/r2r_prompt_provider.py b/R2R/r2r/prompts/local/r2r_prompt_provider.py new file mode 100755 index 00000000..830c4203 --- /dev/null +++ b/R2R/r2r/prompts/local/r2r_prompt_provider.py @@ -0,0 +1,69 @@ +import json +import logging +import os +from typing import Any, Optional + +from r2r.base import Prompt, PromptProvider + +logger = logging.getLogger(__name__) + + +class R2RPromptProvider(PromptProvider): + def __init__(self, file_path: Optional[str] = None): + self.prompts: dict[str, Prompt] = {} + self._load_prompts_from_jsonl(file_path=file_path) + + def _load_prompts_from_jsonl(self, file_path: Optional[str] = None): + if not file_path: + file_path = os.path.join( + os.path.dirname(__file__), "defaults.jsonl" + ) + try: + with open(file_path, "r") as file: + for line in file: + if line.strip(): + data = json.loads(line) + self.add_prompt( + data["name"], + data["template"], + data.get("input_types", {}), + ) + except json.JSONDecodeError as e: + error_msg = f"Error loading prompts from JSONL file: {e}" + logger.error(error_msg) + raise ValueError(error_msg) + + def add_prompt( + self, name: str, template: str, input_types: dict[str, str] + ) -> None: + if name in self.prompts: + raise ValueError(f"Prompt '{name}' already exists.") + self.prompts[name] = Prompt( + name=name, template=template, input_types=input_types + ) + + def get_prompt( + self, prompt_name: str, inputs: Optional[dict[str, Any]] = None + ) -> str: + if prompt_name not in self.prompts: + raise ValueError(f"Prompt '{prompt_name}' not found.") + prompt = self.prompts[prompt_name] + if inputs is None: + return prompt.template + return prompt.format_prompt(inputs) + + def update_prompt( + self, + name: str, + template: Optional[str] = None, + input_types: Optional[dict[str, str]] = None, + ) -> None: + if name not in self.prompts: + raise ValueError(f"Prompt '{name}' not found.") + if template: + self.prompts[name].template = template + if input_types: + self.prompts[name].input_types = input_types + + def get_all_prompts(self) -> dict[str, Prompt]: + return self.prompts diff --git a/R2R/r2r/providers/__init__.py b/R2R/r2r/providers/__init__.py new file mode 100755 index 00000000..e69de29b --- /dev/null +++ b/R2R/r2r/providers/__init__.py diff --git a/R2R/r2r/providers/embeddings/__init__.py b/R2R/r2r/providers/embeddings/__init__.py new file mode 100755 index 00000000..6b0c8b83 --- /dev/null +++ b/R2R/r2r/providers/embeddings/__init__.py @@ -0,0 +1,11 @@ +from .ollama.ollama_base import OllamaEmbeddingProvider +from .openai.openai_base import OpenAIEmbeddingProvider +from .sentence_transformer.sentence_transformer_base import ( + SentenceTransformerEmbeddingProvider, +) + +__all__ = [ + "OllamaEmbeddingProvider", + "OpenAIEmbeddingProvider", + "SentenceTransformerEmbeddingProvider", +] diff --git a/R2R/r2r/providers/embeddings/ollama/ollama_base.py b/R2R/r2r/providers/embeddings/ollama/ollama_base.py new file mode 100755 index 00000000..31a8c717 --- /dev/null +++ b/R2R/r2r/providers/embeddings/ollama/ollama_base.py @@ -0,0 +1,156 @@ +import asyncio +import logging +import os +import random +from typing import Any + +from ollama import AsyncClient, Client + +from r2r.base import EmbeddingConfig, EmbeddingProvider, VectorSearchResult + +logger = logging.getLogger(__name__) + + +class OllamaEmbeddingProvider(EmbeddingProvider): + def __init__(self, config: EmbeddingConfig): + super().__init__(config) + provider = config.provider + if not provider: + raise ValueError( + "Must set provider in order to initialize `OllamaEmbeddingProvider`." + ) + if provider != "ollama": + raise ValueError( + "OllamaEmbeddingProvider must be initialized with provider `ollama`." + ) + if config.rerank_model: + raise ValueError( + "OllamaEmbeddingProvider does not support separate reranking." + ) + + self.base_model = config.base_model + self.base_dimension = config.base_dimension + self.base_url = os.getenv("OLLAMA_API_BASE") + logger.info( + f"Using Ollama API base URL: {self.base_url or 'http://127.0.0.1:11434'}" + ) + self.client = Client(host=self.base_url) + self.aclient = AsyncClient(host=self.base_url) + + self.request_queue = asyncio.Queue() + self.max_retries = 2 + self.initial_backoff = 1 + self.max_backoff = 60 + self.concurrency_limit = 10 + self.semaphore = asyncio.Semaphore(self.concurrency_limit) + + async def process_queue(self): + while True: + task = await self.request_queue.get() + try: + result = await self.execute_task_with_backoff(task) + task["future"].set_result(result) + except Exception as e: + task["future"].set_exception(e) + finally: + self.request_queue.task_done() + + async def execute_task_with_backoff(self, task: dict[str, Any]): + retries = 0 + backoff = self.initial_backoff + while retries < self.max_retries: + try: + async with self.semaphore: + response = await asyncio.wait_for( + self.aclient.embeddings( + prompt=task["text"], model=self.base_model + ), + timeout=30, + ) + return response["embedding"] + except Exception as e: + logger.warning( + f"Request failed (attempt {retries + 1}): {str(e)}" + ) + retries += 1 + if retries == self.max_retries: + raise Exception( + f"Max retries reached. Last error: {str(e)}" + ) + await asyncio.sleep(backoff + random.uniform(0, 1)) + backoff = min(backoff * 2, self.max_backoff) + + def get_embedding( + self, + text: str, + stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.BASE, + ) -> list[float]: + if stage != EmbeddingProvider.PipeStage.BASE: + raise ValueError( + "OllamaEmbeddingProvider only supports search stage." + ) + + try: + response = self.client.embeddings( + prompt=text, model=self.base_model + ) + return response["embedding"] + except Exception as e: + logger.error(f"Error getting embedding: {str(e)}") + raise + + def get_embeddings( + self, + texts: list[str], + stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.BASE, + ) -> list[list[float]]: + return [self.get_embedding(text, stage) for text in texts] + + async def async_get_embeddings( + self, + texts: list[str], + stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.BASE, + ) -> list[list[float]]: + if stage != EmbeddingProvider.PipeStage.BASE: + raise ValueError( + "OllamaEmbeddingProvider only supports search stage." + ) + + queue_processor = asyncio.create_task(self.process_queue()) + futures = [] + for text in texts: + future = asyncio.Future() + await self.request_queue.put({"text": text, "future": future}) + futures.append(future) + + try: + results = await asyncio.gather(*futures, return_exceptions=True) + # Check if any result is an exception and raise it + exceptions = set([r for r in results if isinstance(r, Exception)]) + if exceptions: + raise Exception( + f"Embedding generation failed for one or more embeddings." + ) + return results + except Exception as e: + logger.error(f"Embedding generation failed: {str(e)}") + raise + finally: + await self.request_queue.join() + queue_processor.cancel() + + def rerank( + self, + query: str, + results: list[VectorSearchResult], + stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.RERANK, + limit: int = 10, + ) -> list[VectorSearchResult]: + return results[:limit] + + def tokenize_string( + self, text: str, model: str, stage: EmbeddingProvider.PipeStage + ) -> list[int]: + raise NotImplementedError( + "Tokenization is not supported by OllamaEmbeddingProvider." + ) diff --git a/R2R/r2r/providers/embeddings/openai/openai_base.py b/R2R/r2r/providers/embeddings/openai/openai_base.py new file mode 100755 index 00000000..7e7d32aa --- /dev/null +++ b/R2R/r2r/providers/embeddings/openai/openai_base.py @@ -0,0 +1,200 @@ +import logging +import os + +from openai import AsyncOpenAI, AuthenticationError, OpenAI + +from r2r.base import EmbeddingConfig, EmbeddingProvider, VectorSearchResult + +logger = logging.getLogger(__name__) + + +class OpenAIEmbeddingProvider(EmbeddingProvider): + MODEL_TO_TOKENIZER = { + "text-embedding-ada-002": "cl100k_base", + "text-embedding-3-small": "cl100k_base", + "text-embedding-3-large": "cl100k_base", + } + MODEL_TO_DIMENSIONS = { + "text-embedding-ada-002": [1536], + "text-embedding-3-small": [512, 1536], + "text-embedding-3-large": [256, 1024, 3072], + } + + def __init__(self, config: EmbeddingConfig): + super().__init__(config) + provider = config.provider + if not provider: + raise ValueError( + "Must set provider in order to initialize OpenAIEmbeddingProvider." + ) + + if provider != "openai": + raise ValueError( + "OpenAIEmbeddingProvider must be initialized with provider `openai`." + ) + if not os.getenv("OPENAI_API_KEY"): + raise ValueError( + "Must set OPENAI_API_KEY in order to initialize OpenAIEmbeddingProvider." + ) + self.client = OpenAI() + self.async_client = AsyncOpenAI() + + if config.rerank_model: + raise ValueError( + "OpenAIEmbeddingProvider does not support separate reranking." + ) + self.base_model = config.base_model + self.base_dimension = config.base_dimension + + if self.base_model not in OpenAIEmbeddingProvider.MODEL_TO_TOKENIZER: + raise ValueError( + f"OpenAI embedding model {self.base_model} not supported." + ) + if ( + self.base_dimension + and self.base_dimension + not in OpenAIEmbeddingProvider.MODEL_TO_DIMENSIONS[self.base_model] + ): + raise ValueError( + f"Dimensions {self.dimension} for {self.base_model} are not supported" + ) + + if not self.base_model or not self.base_dimension: + raise ValueError( + "Must set base_model and base_dimension in order to initialize OpenAIEmbeddingProvider." + ) + + if config.rerank_model: + raise ValueError( + "OpenAIEmbeddingProvider does not support separate reranking." + ) + + def get_embedding( + self, + text: str, + stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.BASE, + ) -> list[float]: + if stage != EmbeddingProvider.PipeStage.BASE: + raise ValueError( + "OpenAIEmbeddingProvider only supports search stage." + ) + + try: + return ( + self.client.embeddings.create( + input=[text], + model=self.base_model, + dimensions=self.base_dimension + or OpenAIEmbeddingProvider.MODEL_TO_DIMENSIONS[ + self.base_model + ][-1], + ) + .data[0] + .embedding + ) + except AuthenticationError as e: + raise ValueError( + "Invalid OpenAI API key provided. Please check your OPENAI_API_KEY environment variable." + ) from e + + async def async_get_embedding( + self, + text: str, + stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.BASE, + ) -> list[float]: + if stage != EmbeddingProvider.PipeStage.BASE: + raise ValueError( + "OpenAIEmbeddingProvider only supports search stage." + ) + + try: + response = await self.async_client.embeddings.create( + input=[text], + model=self.base_model, + dimensions=self.base_dimension + or OpenAIEmbeddingProvider.MODEL_TO_DIMENSIONS[ + self.base_model + ][-1], + ) + return response.data[0].embedding + except AuthenticationError as e: + raise ValueError( + "Invalid OpenAI API key provided. Please check your OPENAI_API_KEY environment variable." + ) from e + + def get_embeddings( + self, + texts: list[str], + stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.BASE, + ) -> list[list[float]]: + if stage != EmbeddingProvider.PipeStage.BASE: + raise ValueError( + "OpenAIEmbeddingProvider only supports search stage." + ) + + try: + return [ + ele.embedding + for ele in self.client.embeddings.create( + input=texts, + model=self.base_model, + dimensions=self.base_dimension + or OpenAIEmbeddingProvider.MODEL_TO_DIMENSIONS[ + self.base_model + ][-1], + ).data + ] + except AuthenticationError as e: + raise ValueError( + "Invalid OpenAI API key provided. Please check your OPENAI_API_KEY environment variable." + ) from e + + async def async_get_embeddings( + self, + texts: list[str], + stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.BASE, + ) -> list[list[float]]: + if stage != EmbeddingProvider.PipeStage.BASE: + raise ValueError( + "OpenAIEmbeddingProvider only supports search stage." + ) + + try: + response = await self.async_client.embeddings.create( + input=texts, + model=self.base_model, + dimensions=self.base_dimension + or OpenAIEmbeddingProvider.MODEL_TO_DIMENSIONS[ + self.base_model + ][-1], + ) + return [ele.embedding for ele in response.data] + except AuthenticationError as e: + raise ValueError( + "Invalid OpenAI API key provided. Please check your OPENAI_API_KEY environment variable." + ) from e + + def rerank( + self, + query: str, + results: list[VectorSearchResult], + stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.RERANK, + limit: int = 10, + ): + return results[:limit] + + def tokenize_string(self, text: str, model: str) -> list[int]: + try: + import tiktoken + except ImportError: + raise ValueError( + "Must download tiktoken library to run `tokenize_string`." + ) + # tiktoken encoding - + # cl100k_base - gpt-4, gpt-3.5-turbo, text-embedding-ada-002, text-embedding-3-small, text-embedding-3-large + if model not in OpenAIEmbeddingProvider.MODEL_TO_TOKENIZER: + raise ValueError(f"OpenAI embedding model {model} not supported.") + encoding = tiktoken.get_encoding( + OpenAIEmbeddingProvider.MODEL_TO_TOKENIZER[model] + ) + return encoding.encode(text) diff --git a/R2R/r2r/providers/embeddings/sentence_transformer/sentence_transformer_base.py b/R2R/r2r/providers/embeddings/sentence_transformer/sentence_transformer_base.py new file mode 100755 index 00000000..3316cb60 --- /dev/null +++ b/R2R/r2r/providers/embeddings/sentence_transformer/sentence_transformer_base.py @@ -0,0 +1,160 @@ +import logging + +from r2r.base import EmbeddingConfig, EmbeddingProvider, VectorSearchResult + +logger = logging.getLogger(__name__) + + +class SentenceTransformerEmbeddingProvider(EmbeddingProvider): + def __init__( + self, + config: EmbeddingConfig, + ): + super().__init__(config) + logger.info( + "Initializing `SentenceTransformerEmbeddingProvider` with separate models for search and rerank." + ) + provider = config.provider + if not provider: + raise ValueError( + "Must set provider in order to initialize SentenceTransformerEmbeddingProvider." + ) + if provider != "sentence-transformers": + raise ValueError( + "SentenceTransformerEmbeddingProvider must be initialized with provider `sentence-transformers`." + ) + try: + from sentence_transformers import CrossEncoder, SentenceTransformer + + self.SentenceTransformer = SentenceTransformer + # TODO - Modify this to be configurable, as `bge-reranker-large` is a `SentenceTransformer` model + self.CrossEncoder = CrossEncoder + except ImportError as e: + raise ValueError( + "Must download sentence-transformers library to run `SentenceTransformerEmbeddingProvider`." + ) from e + + # Initialize separate models for search and rerank + self.do_search = False + self.do_rerank = False + + self.search_encoder = self._init_model( + config, EmbeddingProvider.PipeStage.BASE + ) + self.rerank_encoder = self._init_model( + config, EmbeddingProvider.PipeStage.RERANK + ) + + def _init_model(self, config: EmbeddingConfig, stage: str): + stage_name = stage.name.lower() + model = config.dict().get(f"{stage_name}_model", None) + dimension = config.dict().get(f"{stage_name}_dimension", None) + + transformer_type = config.dict().get( + f"{stage_name}_transformer_type", "SentenceTransformer" + ) + + if stage == EmbeddingProvider.PipeStage.BASE: + self.do_search = True + # Check if a model is set for the stage + if not (model and dimension and transformer_type): + raise ValueError( + f"Must set {stage.name.lower()}_model and {stage.name.lower()}_dimension for {stage} stage in order to initialize SentenceTransformerEmbeddingProvider." + ) + + if stage == EmbeddingProvider.PipeStage.RERANK: + # Check if a model is set for the stage + if not (model and dimension and transformer_type): + return None + + self.do_rerank = True + if transformer_type == "SentenceTransformer": + raise ValueError( + f"`SentenceTransformer` models are not yet supported for {stage} stage in SentenceTransformerEmbeddingProvider." + ) + + # Save the model_key and dimension into instance variables + setattr(self, f"{stage_name}_model", model) + setattr(self, f"{stage_name}_dimension", dimension) + setattr(self, f"{stage_name}_transformer_type", transformer_type) + + # Initialize the model + encoder = ( + self.SentenceTransformer( + model, truncate_dim=dimension, trust_remote_code=True + ) + if transformer_type == "SentenceTransformer" + else self.CrossEncoder(model, trust_remote_code=True) + ) + return encoder + + def get_embedding( + self, + text: str, + stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.BASE, + ) -> list[float]: + if stage != EmbeddingProvider.PipeStage.BASE: + raise ValueError("`get_embedding` only supports `SEARCH` stage.") + if not self.do_search: + raise ValueError( + "`get_embedding` can only be called for the search stage if a search model is set." + ) + encoder = self.search_encoder + return encoder.encode([text]).tolist()[0] + + def get_embeddings( + self, + texts: list[str], + stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.BASE, + ) -> list[list[float]]: + if stage != EmbeddingProvider.PipeStage.BASE: + raise ValueError("`get_embeddings` only supports `SEARCH` stage.") + if not self.do_search: + raise ValueError( + "`get_embeddings` can only be called for the search stage if a search model is set." + ) + encoder = ( + self.search_encoder + if stage == EmbeddingProvider.PipeStage.BASE + else self.rerank_encoder + ) + return encoder.encode(texts).tolist() + + def rerank( + self, + query: str, + results: list[VectorSearchResult], + stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.RERANK, + limit: int = 10, + ) -> list[VectorSearchResult]: + if stage != EmbeddingProvider.PipeStage.RERANK: + raise ValueError("`rerank` only supports `RERANK` stage.") + if not self.do_rerank: + return results[:limit] + + from copy import copy + + texts = copy([doc.metadata["text"] for doc in results]) + # Use the rank method from the rerank_encoder, which is a CrossEncoder model + reranked_scores = self.rerank_encoder.rank( + query, texts, return_documents=False, top_k=limit + ) + # Map the reranked scores back to the original documents + reranked_results = [] + for score in reranked_scores: + corpus_id = score["corpus_id"] + new_result = results[corpus_id] + new_result.score = float(score["score"]) + reranked_results.append(new_result) + + # Sort the documents by the new scores in descending order + reranked_results.sort(key=lambda doc: doc.score, reverse=True) + return reranked_results + + def tokenize_string( + self, + stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.BASE, + ) -> list[int]: + raise ValueError( + "SentenceTransformerEmbeddingProvider does not support tokenize_string." + ) diff --git a/R2R/r2r/providers/eval/__init__.py b/R2R/r2r/providers/eval/__init__.py new file mode 100755 index 00000000..3f5e1b51 --- /dev/null +++ b/R2R/r2r/providers/eval/__init__.py @@ -0,0 +1,3 @@ +from .llm.base_llm_eval import LLMEvalProvider + +__all__ = ["LLMEvalProvider"] diff --git a/R2R/r2r/providers/eval/llm/base_llm_eval.py b/R2R/r2r/providers/eval/llm/base_llm_eval.py new file mode 100755 index 00000000..7c573a34 --- /dev/null +++ b/R2R/r2r/providers/eval/llm/base_llm_eval.py @@ -0,0 +1,84 @@ +from fractions import Fraction +from typing import Union + +from r2r import EvalConfig, EvalProvider, LLMProvider, PromptProvider +from r2r.base.abstractions.llm import GenerationConfig + + +class LLMEvalProvider(EvalProvider): + def __init__( + self, + config: EvalConfig, + llm_provider: LLMProvider, + prompt_provider: PromptProvider, + ): + super().__init__(config) + + self.llm_provider = llm_provider + self.prompt_provider = prompt_provider + + def _calc_query_context_relevancy(self, query: str, context: str) -> float: + system_prompt = self.prompt_provider.get_prompt("default_system") + eval_prompt = self.prompt_provider.get_prompt( + "rag_context_eval", {"query": query, "context": context} + ) + response = self.llm_provider.get_completion( + self.prompt_provider._get_message_payload( + system_prompt, eval_prompt + ), + self.eval_generation_config, + ) + response_text = response.choices[0].message.content + fraction = ( + response_text + # Get the fraction in the returned tuple + .split(",")[-1][:-1] + # Remove any quotes and spaces + .replace("'", "") + .replace('"', "") + .strip() + ) + return float(Fraction(fraction)) + + def _calc_answer_grounding( + self, query: str, context: str, answer: str + ) -> float: + system_prompt = self.prompt_provider.get_prompt("default_system") + eval_prompt = self.prompt_provider.get_prompt( + "rag_answer_eval", + {"query": query, "context": context, "answer": answer}, + ) + response = self.llm_provider.get_completion( + self.prompt_provider._get_message_payload( + system_prompt, eval_prompt + ), + self.eval_generation_config, + ) + response_text = response.choices[0].message.content + fraction = ( + response_text + # Get the fraction in the returned tuple + .split(",")[-1][:-1] + # Remove any quotes and spaces + .replace("'", "") + .replace('"', "") + .strip() + ) + return float(Fraction(fraction)) + + def _evaluate( + self, + query: str, + context: str, + answer: str, + eval_generation_config: GenerationConfig, + ) -> dict[str, dict[str, Union[str, float]]]: + self.eval_generation_config = eval_generation_config + query_context_relevancy = self._calc_query_context_relevancy( + query, context + ) + answer_grounding = self._calc_answer_grounding(query, context, answer) + return { + "query_context_relevancy": query_context_relevancy, + "answer_grounding": answer_grounding, + } diff --git a/R2R/r2r/providers/kg/__init__.py b/R2R/r2r/providers/kg/__init__.py new file mode 100755 index 00000000..36bc79a2 --- /dev/null +++ b/R2R/r2r/providers/kg/__init__.py @@ -0,0 +1,3 @@ +from .neo4j.base_neo4j import Neo4jKGProvider + +__all__ = ["Neo4jKGProvider"] diff --git a/R2R/r2r/providers/kg/neo4j/base_neo4j.py b/R2R/r2r/providers/kg/neo4j/base_neo4j.py new file mode 100755 index 00000000..9ede2b85 --- /dev/null +++ b/R2R/r2r/providers/kg/neo4j/base_neo4j.py @@ -0,0 +1,983 @@ +# abstractions are taken from LlamaIndex +# Neo4jKGProvider is almost entirely taken from LlamaIndex Neo4jPropertyGraphStore +# https://github.com/run-llama/llama_index +import json +import os +from typing import Any, Dict, List, Optional, Tuple + +from r2r.base import ( + EntityType, + KGConfig, + KGProvider, + PromptProvider, + format_entity_types, + format_relations, +) +from r2r.base.abstractions.llama_abstractions import ( + LIST_LIMIT, + ChunkNode, + EntityNode, + LabelledNode, + PropertyGraphStore, + Relation, + Triplet, + VectorStoreQuery, + clean_string_values, + value_sanitize, +) + + +def remove_empty_values(input_dict): + """ + Remove entries with empty values from the dictionary. + + Parameters: + input_dict (dict): The dictionary from which empty values need to be removed. + + Returns: + dict: A new dictionary with all empty values removed. + """ + # Create a new dictionary excluding empty values + return {key: value for key, value in input_dict.items() if value} + + +BASE_ENTITY_LABEL = "__Entity__" +EXCLUDED_LABELS = ["_Bloom_Perspective_", "_Bloom_Scene_"] +EXCLUDED_RELS = ["_Bloom_HAS_SCENE_"] +EXHAUSTIVE_SEARCH_LIMIT = 10000 +# Threshold for returning all available prop values in graph schema +DISTINCT_VALUE_LIMIT = 10 + +node_properties_query = """ +CALL apoc.meta.data() +YIELD label, other, elementType, type, property +WHERE NOT type = "RELATIONSHIP" AND elementType = "node" + AND NOT label IN $EXCLUDED_LABELS +WITH label AS nodeLabels, collect({property:property, type:type}) AS properties +RETURN {labels: nodeLabels, properties: properties} AS output + +""" + +rel_properties_query = """ +CALL apoc.meta.data() +YIELD label, other, elementType, type, property +WHERE NOT type = "RELATIONSHIP" AND elementType = "relationship" + AND NOT label in $EXCLUDED_LABELS +WITH label AS nodeLabels, collect({property:property, type:type}) AS properties +RETURN {type: nodeLabels, properties: properties} AS output +""" + +rel_query = """ +CALL apoc.meta.data() +YIELD label, other, elementType, type, property +WHERE type = "RELATIONSHIP" AND elementType = "node" +UNWIND other AS other_node +WITH * WHERE NOT label IN $EXCLUDED_LABELS + AND NOT other_node IN $EXCLUDED_LABELS +RETURN {start: label, type: property, end: toString(other_node)} AS output +""" + + +class Neo4jKGProvider(PropertyGraphStore, KGProvider): + r""" + Neo4j Property Graph Store. + + This class implements a Neo4j property graph store. + + If you are using local Neo4j instead of aura, here's a helpful + command for launching the docker container: + + ```bash + docker run \ + -p 7474:7474 -p 7687:7687 \ + -v $PWD/data:/data -v $PWD/plugins:/plugins \ + --name neo4j-apoc \ + -e NEO4J_apoc_export_file_enabled=true \ + -e NEO4J_apoc_import_file_enabled=true \ + -e NEO4J_apoc_import_file_use__neo4j__config=true \ + -e NEO4JLABS_PLUGINS=\\[\"apoc\"\\] \ + neo4j:latest + ``` + + Args: + username (str): The username for the Neo4j database. + password (str): The password for the Neo4j database. + url (str): The URL for the Neo4j database. + database (Optional[str]): The name of the database to connect to. Defaults to "neo4j". + + Examples: + `pip install llama-index-graph-stores-neo4j` + + ```python + from llama_index.core.indices.property_graph import PropertyGraphIndex + from llama_index.graph_stores.neo4j import Neo4jKGProvider + + # Create a Neo4jKGProvider instance + graph_store = Neo4jKGProvider( + username="neo4j", + password="neo4j", + url="bolt://localhost:7687", + database="neo4j" + ) + + # create the index + index = PropertyGraphIndex.from_documents( + documents, + property_graph_store=graph_store, + ) + ``` + """ + + supports_structured_queries: bool = True + supports_vector_queries: bool = True + + def __init__( + self, + config: KGConfig, + refresh_schema: bool = True, + sanitize_query_output: bool = True, + enhanced_schema: bool = False, + *args: Any, + **kwargs: Any, + ) -> None: + if config.provider != "neo4j": + raise ValueError( + "Neo4jKGProvider must be initialized with config with `neo4j` provider." + ) + + try: + import neo4j + except ImportError: + raise ImportError("Please install neo4j: pip install neo4j") + + username = os.getenv("NEO4J_USER") + password = os.getenv("NEO4J_PASSWORD") + url = os.getenv("NEO4J_URL") + database = os.getenv("NEO4J_DATABASE", "neo4j") + + if not username or not password or not url: + raise ValueError( + "Neo4j configuration values are missing. Please set NEO4J_USER, NEO4J_PASSWORD, and NEO4J_URL environment variables." + ) + + self.sanitize_query_output = sanitize_query_output + self.enhcnaced_schema = enhanced_schema + self._driver = neo4j.GraphDatabase.driver( + url, auth=(username, password), **kwargs + ) + self._async_driver = neo4j.AsyncGraphDatabase.driver( + url, + auth=(username, password), + **kwargs, + ) + self._database = database + self.structured_schema = {} + if refresh_schema: + self.refresh_schema() + self.neo4j = neo4j + self.config = config + + @property + def client(self): + return self._driver + + def refresh_schema(self) -> None: + """Refresh the schema.""" + node_query_results = self.structured_query( + node_properties_query, + param_map={ + "EXCLUDED_LABELS": [*EXCLUDED_LABELS, BASE_ENTITY_LABEL] + }, + ) + node_properties = ( + [el["output"] for el in node_query_results] + if node_query_results + else [] + ) + + rels_query_result = self.structured_query( + rel_properties_query, param_map={"EXCLUDED_LABELS": EXCLUDED_RELS} + ) + rel_properties = ( + [el["output"] for el in rels_query_result] + if rels_query_result + else [] + ) + + rel_objs_query_result = self.structured_query( + rel_query, + param_map={ + "EXCLUDED_LABELS": [*EXCLUDED_LABELS, BASE_ENTITY_LABEL] + }, + ) + relationships = ( + [el["output"] for el in rel_objs_query_result] + if rel_objs_query_result + else [] + ) + + # Get constraints & indexes + try: + constraint = self.structured_query("SHOW CONSTRAINTS") + index = self.structured_query( + "CALL apoc.schema.nodes() YIELD label, properties, type, size, " + "valuesSelectivity WHERE type = 'RANGE' RETURN *, " + "size * valuesSelectivity as distinctValues" + ) + except ( + self.neo4j.exceptions.ClientError + ): # Read-only user might not have access to schema information + constraint = [] + index = [] + + self.structured_schema = { + "node_props": { + el["labels"]: el["properties"] for el in node_properties + }, + "rel_props": { + el["type"]: el["properties"] for el in rel_properties + }, + "relationships": relationships, + "metadata": {"constraint": constraint, "index": index}, + } + schema_counts = self.structured_query( + "CALL apoc.meta.graphSample() YIELD nodes, relationships " + "RETURN nodes, [rel in relationships | {name:apoc.any.property" + "(rel, 'type'), count: apoc.any.property(rel, 'count')}]" + " AS relationships" + ) + # Update node info + for node in schema_counts[0].get("nodes", []): + # Skip bloom labels + if node["name"] in EXCLUDED_LABELS: + continue + node_props = self.structured_schema["node_props"].get(node["name"]) + if not node_props: # The node has no properties + continue + enhanced_cypher = self._enhanced_schema_cypher( + node["name"], + node_props, + node["count"] < EXHAUSTIVE_SEARCH_LIMIT, + ) + enhanced_info = self.structured_query(enhanced_cypher)[0]["output"] + for prop in node_props: + if prop["property"] in enhanced_info: + prop.update(enhanced_info[prop["property"]]) + # Update rel info + for rel in schema_counts[0].get("relationships", []): + # Skip bloom labels + if rel["name"] in EXCLUDED_RELS: + continue + rel_props = self.structured_schema["rel_props"].get(rel["name"]) + if not rel_props: # The rel has no properties + continue + enhanced_cypher = self._enhanced_schema_cypher( + rel["name"], + rel_props, + rel["count"] < EXHAUSTIVE_SEARCH_LIMIT, + is_relationship=True, + ) + try: + enhanced_info = self.structured_query(enhanced_cypher)[0][ + "output" + ] + for prop in rel_props: + if prop["property"] in enhanced_info: + prop.update(enhanced_info[prop["property"]]) + except self.neo4j.exceptions.ClientError: + # Sometimes the types are not consistent in the db + pass + + def upsert_nodes(self, nodes: List[LabelledNode]) -> None: + # Lists to hold separated types + entity_dicts: List[dict] = [] + chunk_dicts: List[dict] = [] + + # Sort by type + for item in nodes: + if isinstance(item, EntityNode): + entity_dicts.append({**item.dict(), "id": item.id}) + elif isinstance(item, ChunkNode): + chunk_dicts.append({**item.dict(), "id": item.id}) + else: + # Log that we do not support these types of nodes + # Or raise an error? + pass + + if chunk_dicts: + self.structured_query( + """ + UNWIND $data AS row + MERGE (c:Chunk {id: row.id}) + SET c.text = row.text + WITH c, row + SET c += row.properties + WITH c, row.embedding AS embedding + WHERE embedding IS NOT NULL + CALL db.create.setNodeVectorProperty(c, 'embedding', embedding) + RETURN count(*) + """, + param_map={"data": chunk_dicts}, + ) + + if entity_dicts: + self.structured_query( + """ + UNWIND $data AS row + MERGE (e:`__Entity__` {id: row.id}) + SET e += apoc.map.clean(row.properties, [], []) + SET e.name = row.name + WITH e, row + CALL apoc.create.addLabels(e, [row.label]) + YIELD node + WITH e, row + CALL { + WITH e, row + WITH e, row + WHERE row.embedding IS NOT NULL + CALL db.create.setNodeVectorProperty(e, 'embedding', row.embedding) + RETURN count(*) AS count + } + WITH e, row WHERE row.properties.triplet_source_id IS NOT NULL + MERGE (c:Chunk {id: row.properties.triplet_source_id}) + MERGE (e)<-[:MENTIONS]-(c) + """, + param_map={"data": entity_dicts}, + ) + + def upsert_relations(self, relations: List[Relation]) -> None: + """Add relations.""" + params = [r.dict() for r in relations] + + self.structured_query( + """ + UNWIND $data AS row + MERGE (source {id: row.source_id}) + MERGE (target {id: row.target_id}) + WITH source, target, row + CALL apoc.merge.relationship(source, row.label, {}, row.properties, target) YIELD rel + RETURN count(*) + """, + param_map={"data": params}, + ) + + def get( + self, + properties: Optional[dict] = None, + ids: Optional[List[str]] = None, + ) -> List[LabelledNode]: + """Get nodes.""" + cypher_statement = "MATCH (e) " + + params = {} + if properties or ids: + cypher_statement += "WHERE " + + if ids: + cypher_statement += "e.id in $ids " + params["ids"] = ids + + if properties: + prop_list = [] + for i, prop in enumerate(properties): + prop_list.append(f"e.`{prop}` = $property_{i}") + params[f"property_{i}"] = properties[prop] + cypher_statement += " AND ".join(prop_list) + + return_statement = """ + WITH e + RETURN e.id AS name, + [l in labels(e) WHERE l <> '__Entity__' | l][0] AS type, + e{.* , embedding: Null, id: Null} AS properties + """ + cypher_statement += return_statement + + response = self.structured_query(cypher_statement, param_map=params) + response = response if response else [] + + nodes = [] + for record in response: + # text indicates a chunk node + # none on the type indicates an implicit node, likely a chunk node + if "text" in record["properties"] or record["type"] is None: + text = record["properties"].pop("text", "") + nodes.append( + ChunkNode( + id_=record["name"], + text=text, + properties=remove_empty_values(record["properties"]), + ) + ) + else: + nodes.append( + EntityNode( + name=record["name"], + label=record["type"], + properties=remove_empty_values(record["properties"]), + ) + ) + + return nodes + + def get_triplets( + self, + entity_names: Optional[List[str]] = None, + relation_names: Optional[List[str]] = None, + properties: Optional[dict] = None, + ids: Optional[List[str]] = None, + ) -> List[Triplet]: + # TODO: handle ids of chunk nodes + cypher_statement = "MATCH (e:`__Entity__`) " + + params = {} + if entity_names or properties or ids: + cypher_statement += "WHERE " + + if entity_names: + cypher_statement += "e.name in $entity_names " + params["entity_names"] = entity_names + + if ids: + cypher_statement += "e.id in $ids " + params["ids"] = ids + + if properties: + prop_list = [] + for i, prop in enumerate(properties): + prop_list.append(f"e.`{prop}` = $property_{i}") + params[f"property_{i}"] = properties[prop] + cypher_statement += " AND ".join(prop_list) + + return_statement = f""" + WITH e + CALL {{ + WITH e + MATCH (e)-[r{':`' + '`|`'.join(relation_names) + '`' if relation_names else ''}]->(t) + RETURN e.name AS source_id, [l in labels(e) WHERE l <> '__Entity__' | l][0] AS source_type, + e{{.* , embedding: Null, name: Null}} AS source_properties, + type(r) AS type, + t.name AS target_id, [l in labels(t) WHERE l <> '__Entity__' | l][0] AS target_type, + t{{.* , embedding: Null, name: Null}} AS target_properties + UNION ALL + WITH e + MATCH (e)<-[r{':`' + '`|`'.join(relation_names) + '`' if relation_names else ''}]-(t) + RETURN t.name AS source_id, [l in labels(t) WHERE l <> '__Entity__' | l][0] AS source_type, + e{{.* , embedding: Null, name: Null}} AS source_properties, + type(r) AS type, + e.name AS target_id, [l in labels(e) WHERE l <> '__Entity__' | l][0] AS target_type, + t{{.* , embedding: Null, name: Null}} AS target_properties + }} + RETURN source_id, source_type, type, target_id, target_type, source_properties, target_properties""" + cypher_statement += return_statement + + data = self.structured_query(cypher_statement, param_map=params) + data = data if data else [] + + triples = [] + for record in data: + source = EntityNode( + name=record["source_id"], + label=record["source_type"], + properties=remove_empty_values(record["source_properties"]), + ) + target = EntityNode( + name=record["target_id"], + label=record["target_type"], + properties=remove_empty_values(record["target_properties"]), + ) + rel = Relation( + source_id=record["source_id"], + target_id=record["target_id"], + label=record["type"], + ) + triples.append([source, rel, target]) + return triples + + def get_rel_map( + self, + graph_nodes: List[LabelledNode], + depth: int = 2, + limit: int = 30, + ignore_rels: Optional[List[str]] = None, + ) -> List[Triplet]: + """Get depth-aware rel map.""" + triples = [] + + ids = [node.id for node in graph_nodes] + # Needs some optimization + response = self.structured_query( + f""" + MATCH (e:`__Entity__`) + WHERE e.id in $ids + MATCH p=(e)-[r*1..{depth}]-(other) + WHERE ALL(rel in relationships(p) WHERE type(rel) <> 'MENTIONS') + UNWIND relationships(p) AS rel + WITH distinct rel + WITH startNode(rel) AS source, + type(rel) AS type, + endNode(rel) AS endNode + RETURN source.id AS source_id, [l in labels(source) WHERE l <> '__Entity__' | l][0] AS source_type, + source{{.* , embedding: Null, id: Null}} AS source_properties, + type, + endNode.id AS target_id, [l in labels(endNode) WHERE l <> '__Entity__' | l][0] AS target_type, + endNode{{.* , embedding: Null, id: Null}} AS target_properties + LIMIT toInteger($limit) + """, + param_map={"ids": ids, "limit": limit}, + ) + response = response if response else [] + + ignore_rels = ignore_rels or [] + for record in response: + if record["type"] in ignore_rels: + continue + + source = EntityNode( + name=record["source_id"], + label=record["source_type"], + properties=remove_empty_values(record["source_properties"]), + ) + target = EntityNode( + name=record["target_id"], + label=record["target_type"], + properties=remove_empty_values(record["target_properties"]), + ) + rel = Relation( + source_id=record["source_id"], + target_id=record["target_id"], + label=record["type"], + ) + triples.append([source, rel, target]) + + return triples + + def structured_query( + self, query: str, param_map: Optional[Dict[str, Any]] = None + ) -> Any: + param_map = param_map or {} + + with self._driver.session(database=self._database) as session: + result = session.run(query, param_map) + full_result = [d.data() for d in result] + + if self.sanitize_query_output: + return value_sanitize(full_result) + + return full_result + + def vector_query( + self, query: VectorStoreQuery, **kwargs: Any + ) -> Tuple[List[LabelledNode], List[float]]: + """Query the graph store with a vector store query.""" + data = self.structured_query( + """MATCH (e:`__Entity__`) + WHERE e.embedding IS NOT NULL AND size(e.embedding) = $dimension + WITH e, vector.similarity.cosine(e.embedding, $embedding) AS score + ORDER BY score DESC LIMIT toInteger($limit) + RETURN e.id AS name, + [l in labels(e) WHERE l <> '__Entity__' | l][0] AS type, + e{.* , embedding: Null, name: Null, id: Null} AS properties, + score""", + param_map={ + "embedding": query.query_embedding, + "dimension": len(query.query_embedding), + "limit": query.similarity_top_k, + }, + ) + data = data if data else [] + + nodes = [] + scores = [] + for record in data: + node = EntityNode( + name=record["name"], + label=record["type"], + properties=remove_empty_values(record["properties"]), + ) + nodes.append(node) + scores.append(record["score"]) + + return (nodes, scores) + + def delete( + self, + entity_names: Optional[List[str]] = None, + relation_names: Optional[List[str]] = None, + properties: Optional[dict] = None, + ids: Optional[List[str]] = None, + ) -> None: + """Delete matching data.""" + if entity_names: + self.structured_query( + "MATCH (n) WHERE n.name IN $entity_names DETACH DELETE n", + param_map={"entity_names": entity_names}, + ) + + if ids: + self.structured_query( + "MATCH (n) WHERE n.id IN $ids DETACH DELETE n", + param_map={"ids": ids}, + ) + + if relation_names: + for rel in relation_names: + self.structured_query(f"MATCH ()-[r:`{rel}`]->() DELETE r") + + if properties: + cypher = "MATCH (e) WHERE " + prop_list = [] + params = {} + for i, prop in enumerate(properties): + prop_list.append(f"e.`{prop}` = $property_{i}") + params[f"property_{i}"] = properties[prop] + cypher += " AND ".join(prop_list) + self.structured_query( + cypher + " DETACH DELETE e", param_map=params + ) + + def _enhanced_schema_cypher( + self, + label_or_type: str, + properties: List[Dict[str, Any]], + exhaustive: bool, + is_relationship: bool = False, + ) -> str: + if is_relationship: + match_clause = f"MATCH ()-[n:`{label_or_type}`]->()" + else: + match_clause = f"MATCH (n:`{label_or_type}`)" + + with_clauses = [] + return_clauses = [] + output_dict = {} + if exhaustive: + for prop in properties: + prop_name = prop["property"] + prop_type = prop["type"] + if prop_type == "STRING": + with_clauses.append( + f"collect(distinct substring(toString(n.`{prop_name}`), 0, 50)) " + f"AS `{prop_name}_values`" + ) + return_clauses.append( + f"values:`{prop_name}_values`[..{DISTINCT_VALUE_LIMIT}]," + f" distinct_count: size(`{prop_name}_values`)" + ) + elif prop_type in [ + "INTEGER", + "FLOAT", + "DATE", + "DATE_TIME", + "LOCAL_DATE_TIME", + ]: + with_clauses.append( + f"min(n.`{prop_name}`) AS `{prop_name}_min`" + ) + with_clauses.append( + f"max(n.`{prop_name}`) AS `{prop_name}_max`" + ) + with_clauses.append( + f"count(distinct n.`{prop_name}`) AS `{prop_name}_distinct`" + ) + return_clauses.append( + f"min: toString(`{prop_name}_min`), " + f"max: toString(`{prop_name}_max`), " + f"distinct_count: `{prop_name}_distinct`" + ) + elif prop_type == "LIST": + with_clauses.append( + f"min(size(n.`{prop_name}`)) AS `{prop_name}_size_min`, " + f"max(size(n.`{prop_name}`)) AS `{prop_name}_size_max`" + ) + return_clauses.append( + f"min_size: `{prop_name}_size_min`, " + f"max_size: `{prop_name}_size_max`" + ) + elif prop_type in ["BOOLEAN", "POINT", "DURATION"]: + continue + output_dict[prop_name] = "{" + return_clauses.pop() + "}" + else: + # Just sample 5 random nodes + match_clause += " WITH n LIMIT 5" + for prop in properties: + prop_name = prop["property"] + prop_type = prop["type"] + + # Check if indexed property, we can still do exhaustive + prop_index = [ + el + for el in self.structured_schema["metadata"]["index"] + if el["label"] == label_or_type + and el["properties"] == [prop_name] + and el["type"] == "RANGE" + ] + if prop_type == "STRING": + if ( + prop_index + and prop_index[0].get("size") > 0 + and prop_index[0].get("distinctValues") + <= DISTINCT_VALUE_LIMIT + ): + distinct_values = self.query( + f"CALL apoc.schema.properties.distinct(" + f"'{label_or_type}', '{prop_name}') YIELD value" + )[0]["value"] + return_clauses.append( + f"values: {distinct_values}," + f" distinct_count: {len(distinct_values)}" + ) + else: + with_clauses.append( + f"collect(distinct substring(n.`{prop_name}`, 0, 50)) " + f"AS `{prop_name}_values`" + ) + return_clauses.append(f"values: `{prop_name}_values`") + elif prop_type in [ + "INTEGER", + "FLOAT", + "DATE", + "DATE_TIME", + "LOCAL_DATE_TIME", + ]: + if not prop_index: + with_clauses.append( + f"collect(distinct toString(n.`{prop_name}`)) " + f"AS `{prop_name}_values`" + ) + return_clauses.append(f"values: `{prop_name}_values`") + else: + with_clauses.append( + f"min(n.`{prop_name}`) AS `{prop_name}_min`" + ) + with_clauses.append( + f"max(n.`{prop_name}`) AS `{prop_name}_max`" + ) + with_clauses.append( + f"count(distinct n.`{prop_name}`) AS `{prop_name}_distinct`" + ) + return_clauses.append( + f"min: toString(`{prop_name}_min`), " + f"max: toString(`{prop_name}_max`), " + f"distinct_count: `{prop_name}_distinct`" + ) + + elif prop_type == "LIST": + with_clauses.append( + f"min(size(n.`{prop_name}`)) AS `{prop_name}_size_min`, " + f"max(size(n.`{prop_name}`)) AS `{prop_name}_size_max`" + ) + return_clauses.append( + f"min_size: `{prop_name}_size_min`, " + f"max_size: `{prop_name}_size_max`" + ) + elif prop_type in ["BOOLEAN", "POINT", "DURATION"]: + continue + + output_dict[prop_name] = "{" + return_clauses.pop() + "}" + + with_clause = "WITH " + ",\n ".join(with_clauses) + return_clause = ( + "RETURN {" + + ", ".join(f"`{k}`: {v}" for k, v in output_dict.items()) + + "} AS output" + ) + + # Combine all parts of the Cypher query + return f"{match_clause}\n{with_clause}\n{return_clause}" + + def get_schema(self, refresh: bool = False) -> Any: + if refresh: + self.refresh_schema() + + return self.structured_schema + + def get_schema_str(self, refresh: bool = False) -> str: + schema = self.get_schema(refresh=refresh) + + formatted_node_props = [] + formatted_rel_props = [] + + if self.enhcnaced_schema: + # Enhanced formatting for nodes + for node_type, properties in schema["node_props"].items(): + formatted_node_props.append(f"- **{node_type}**") + for prop in properties: + example = "" + if prop["type"] == "STRING" and prop.get("values"): + if ( + prop.get("distinct_count", 11) + > DISTINCT_VALUE_LIMIT + ): + example = ( + f'Example: "{clean_string_values(prop["values"][0])}"' + if prop["values"] + else "" + ) + else: # If less than 10 possible values return all + example = ( + ( + "Available options: " + f'{[clean_string_values(el) for el in prop["values"]]}' + ) + if prop["values"] + else "" + ) + + elif prop["type"] in [ + "INTEGER", + "FLOAT", + "DATE", + "DATE_TIME", + "LOCAL_DATE_TIME", + ]: + if prop.get("min") is not None: + example = f'Min: {prop["min"]}, Max: {prop["max"]}' + else: + example = ( + f'Example: "{prop["values"][0]}"' + if prop.get("values") + else "" + ) + elif prop["type"] == "LIST": + # Skip embeddings + if ( + not prop.get("min_size") + or prop["min_size"] > LIST_LIMIT + ): + continue + example = f'Min Size: {prop["min_size"]}, Max Size: {prop["max_size"]}' + formatted_node_props.append( + f" - `{prop['property']}`: {prop['type']} {example}" + ) + + # Enhanced formatting for relationships + for rel_type, properties in schema["rel_props"].items(): + formatted_rel_props.append(f"- **{rel_type}**") + for prop in properties: + example = "" + if prop["type"] == "STRING": + if ( + prop.get("distinct_count", 11) + > DISTINCT_VALUE_LIMIT + ): + example = ( + f'Example: "{clean_string_values(prop["values"][0])}"' + if prop.get("values") + else "" + ) + else: # If less than 10 possible values return all + example = ( + ( + "Available options: " + f'{[clean_string_values(el) for el in prop["values"]]}' + ) + if prop.get("values") + else "" + ) + elif prop["type"] in [ + "INTEGER", + "FLOAT", + "DATE", + "DATE_TIME", + "LOCAL_DATE_TIME", + ]: + if prop.get("min"): # If we have min/max + example = ( + f'Min: {prop["min"]}, Max: {prop["max"]}' + ) + else: # return a single value + example = ( + f'Example: "{prop["values"][0]}"' + if prop.get("values") + else "" + ) + elif prop["type"] == "LIST": + # Skip embeddings + if prop["min_size"] > LIST_LIMIT: + continue + example = f'Min Size: {prop["min_size"]}, Max Size: {prop["max_size"]}' + formatted_rel_props.append( + f" - `{prop['property']}: {prop['type']}` {example}" + ) + else: + # Format node properties + for label, props in schema["node_props"].items(): + props_str = ", ".join( + [f"{prop['property']}: {prop['type']}" for prop in props] + ) + formatted_node_props.append(f"{label} {{{props_str}}}") + + # Format relationship properties using structured_schema + for type, props in schema["rel_props"].items(): + props_str = ", ".join( + [f"{prop['property']}: {prop['type']}" for prop in props] + ) + formatted_rel_props.append(f"{type} {{{props_str}}}") + + # Format relationships + formatted_rels = [ + f"(:{el['start']})-[:{el['type']}]->(:{el['end']})" + for el in schema["relationships"] + ] + + return "\n".join( + [ + "Node properties:", + "\n".join(formatted_node_props), + "Relationship properties:", + "\n".join(formatted_rel_props), + "The relationships:", + "\n".join(formatted_rels), + ] + ) + + def update_extraction_prompt( + self, + prompt_provider: PromptProvider, + entity_types: list[EntityType], + relations: list[Relation], + ): + # Fetch the kg extraction prompt with blank entity types and relations + # Note - Assumes that for given prompt there is a `_with_spec` that can have entities + relations specified + few_shot_ner_kg_extraction_with_spec = prompt_provider.get_prompt( + f"{self.config.kg_extraction_prompt}_with_spec" + ) + + # Format the prompt to include the desired entity types and relations + few_shot_ner_kg_extraction = ( + few_shot_ner_kg_extraction_with_spec.replace( + "{entity_types}", format_entity_types(entity_types) + ).replace("{relations}", format_relations(relations)) + ) + + # Update the "few_shot_ner_kg_extraction" prompt used in downstream KG construction + prompt_provider.update_prompt( + self.config.kg_extraction_prompt, + json.dumps(few_shot_ner_kg_extraction, ensure_ascii=False), + ) + + def update_kg_agent_prompt( + self, + prompt_provider: PromptProvider, + entity_types: list[EntityType], + relations: list[Relation], + ): + # Fetch the kg extraction prompt with blank entity types and relations + # Note - Assumes that for given prompt there is a `_with_spec` that can have entities + relations specified + few_shot_ner_kg_extraction_with_spec = prompt_provider.get_prompt( + f"{self.config.kg_agent_prompt}_with_spec" + ) + + # Format the prompt to include the desired entity types and relations + few_shot_ner_kg_extraction = ( + few_shot_ner_kg_extraction_with_spec.replace( + "{entity_types}", + format_entity_types(entity_types, ignore_subcats=True), + ).replace("{relations}", format_relations(relations)) + ) + + # Update the "few_shot_ner_kg_extraction" prompt used in downstream KG construction + prompt_provider.update_prompt( + self.config.kg_agent_prompt, + json.dumps(few_shot_ner_kg_extraction, ensure_ascii=False), + ) diff --git a/R2R/r2r/providers/llms/__init__.py b/R2R/r2r/providers/llms/__init__.py new file mode 100755 index 00000000..38a1c54a --- /dev/null +++ b/R2R/r2r/providers/llms/__init__.py @@ -0,0 +1,7 @@ +from .litellm.base_litellm import LiteLLM +from .openai.base_openai import OpenAILLM + +__all__ = [ + "LiteLLM", + "OpenAILLM", +] diff --git a/R2R/r2r/providers/llms/litellm/base_litellm.py b/R2R/r2r/providers/llms/litellm/base_litellm.py new file mode 100755 index 00000000..581cce9a --- /dev/null +++ b/R2R/r2r/providers/llms/litellm/base_litellm.py @@ -0,0 +1,142 @@ +import logging +from typing import Any, Generator, Union + +from r2r.base import ( + LLMChatCompletion, + LLMChatCompletionChunk, + LLMConfig, + LLMProvider, +) +from r2r.base.abstractions.llm import GenerationConfig + +logger = logging.getLogger(__name__) + + +class LiteLLM(LLMProvider): + """A concrete class for creating LiteLLM models.""" + + def __init__( + self, + config: LLMConfig, + *args, + **kwargs, + ) -> None: + try: + from litellm import acompletion, completion + + self.litellm_completion = completion + self.litellm_acompletion = acompletion + except ImportError: + raise ImportError( + "Error, `litellm` is required to run a LiteLLM. Please install it using `pip install litellm`." + ) + super().__init__(config) + + def get_completion( + self, + messages: list[dict], + generation_config: GenerationConfig, + **kwargs, + ) -> LLMChatCompletion: + if generation_config.stream: + raise ValueError( + "Stream must be set to False to use the `get_completion` method." + ) + return self._get_completion(messages, generation_config, **kwargs) + + def get_completion_stream( + self, + messages: list[dict], + generation_config: GenerationConfig, + **kwargs, + ) -> Generator[LLMChatCompletionChunk, None, None]: + if not generation_config.stream: + raise ValueError( + "Stream must be set to True to use the `get_completion_stream` method." + ) + return self._get_completion(messages, generation_config, **kwargs) + + def extract_content(self, response: LLMChatCompletion) -> str: + return response.choices[0].message.content + + def _get_completion( + self, + messages: list[dict], + generation_config: GenerationConfig, + **kwargs, + ) -> Union[ + LLMChatCompletion, Generator[LLMChatCompletionChunk, None, None] + ]: + # Create a dictionary with the default arguments + args = self._get_base_args(generation_config) + args["messages"] = messages + + # Conditionally add the 'functions' argument if it's not None + if generation_config.functions is not None: + args["functions"] = generation_config.functions + + args = {**args, **kwargs} + response = self.litellm_completion(**args) + + if not generation_config.stream: + return LLMChatCompletion(**response.dict()) + else: + return self._get_chat_completion(response) + + def _get_chat_completion( + self, + response: Any, + ) -> Generator[LLMChatCompletionChunk, None, None]: + for part in response: + yield LLMChatCompletionChunk(**part.dict()) + + def _get_base_args( + self, + generation_config: GenerationConfig, + prompt=None, + ) -> dict: + """Get the base arguments for the LiteLLM API.""" + args = { + "model": generation_config.model, + "temperature": generation_config.temperature, + "top_p": generation_config.top_p, + "stream": generation_config.stream, + # TODO - We need to cap this to avoid potential errors when exceed max allowable context + "max_tokens": generation_config.max_tokens_to_sample, + } + return args + + async def aget_completion( + self, + messages: list[dict], + generation_config: GenerationConfig, + **kwargs, + ) -> LLMChatCompletion: + if generation_config.stream: + raise ValueError( + "Stream must be set to False to use the `aget_completion` method." + ) + return await self._aget_completion( + messages, generation_config, **kwargs + ) + + async def _aget_completion( + self, + messages: list[dict], + generation_config: GenerationConfig, + **kwargs, + ) -> Union[LLMChatCompletion, LLMChatCompletionChunk]: + """Asynchronously get a completion from the OpenAI API based on the provided messages.""" + + # Create a dictionary with the default arguments + args = self._get_base_args(generation_config) + + args["messages"] = messages + + # Conditionally add the 'functions' argument if it's not None + if generation_config.functions is not None: + args["functions"] = generation_config.functions + + args = {**args, **kwargs} + # Create the chat completion + return await self.litellm_acompletion(**args) diff --git a/R2R/r2r/providers/llms/openai/base_openai.py b/R2R/r2r/providers/llms/openai/base_openai.py new file mode 100755 index 00000000..460c0f0b --- /dev/null +++ b/R2R/r2r/providers/llms/openai/base_openai.py @@ -0,0 +1,144 @@ +"""A module for creating OpenAI model abstractions.""" + +import logging +import os +from typing import Union + +from r2r.base import ( + LLMChatCompletion, + LLMChatCompletionChunk, + LLMConfig, + LLMProvider, +) +from r2r.base.abstractions.llm import GenerationConfig + +logger = logging.getLogger(__name__) + + +class OpenAILLM(LLMProvider): + """A concrete class for creating OpenAI models.""" + + def __init__( + self, + config: LLMConfig, + *args, + **kwargs, + ) -> None: + if not isinstance(config, LLMConfig): + raise ValueError( + "The provided config must be an instance of OpenAIConfig." + ) + try: + from openai import OpenAI # noqa + except ImportError: + raise ImportError( + "Error, `openai` is required to run an OpenAILLM. Please install it using `pip install openai`." + ) + if config.provider != "openai": + raise ValueError( + "OpenAILLM must be initialized with config with `openai` provider." + ) + if not os.getenv("OPENAI_API_KEY"): + raise ValueError( + "OpenAI API key not found. Please set the OPENAI_API_KEY environment variable." + ) + super().__init__(config) + self.config: LLMConfig = config + self.client = OpenAI() + + def get_completion( + self, + messages: list[dict], + generation_config: GenerationConfig, + **kwargs, + ) -> LLMChatCompletion: + if generation_config.stream: + raise ValueError( + "Stream must be set to False to use the `get_completion` method." + ) + return self._get_completion(messages, generation_config, **kwargs) + + def get_completion_stream( + self, + messages: list[dict], + generation_config: GenerationConfig, + **kwargs, + ) -> LLMChatCompletionChunk: + if not generation_config.stream: + raise ValueError( + "Stream must be set to True to use the `get_completion_stream` method." + ) + return self._get_completion(messages, generation_config, **kwargs) + + def _get_completion( + self, + messages: list[dict], + generation_config: GenerationConfig, + **kwargs, + ) -> Union[LLMChatCompletion, LLMChatCompletionChunk]: + """Get a completion from the OpenAI API based on the provided messages.""" + + # Create a dictionary with the default arguments + args = self._get_base_args(generation_config) + + args["messages"] = messages + + # Conditionally add the 'functions' argument if it's not None + if generation_config.functions is not None: + args["functions"] = generation_config.functions + + args = {**args, **kwargs} + # Create the chat completion + return self.client.chat.completions.create(**args) + + def _get_base_args( + self, + generation_config: GenerationConfig, + ) -> dict: + """Get the base arguments for the OpenAI API.""" + + args = { + "model": generation_config.model, + "temperature": generation_config.temperature, + "top_p": generation_config.top_p, + "stream": generation_config.stream, + # TODO - We need to cap this to avoid potential errors when exceed max allowable context + "max_tokens": generation_config.max_tokens_to_sample, + } + + return args + + async def aget_completion( + self, + messages: list[dict], + generation_config: GenerationConfig, + **kwargs, + ) -> LLMChatCompletion: + if generation_config.stream: + raise ValueError( + "Stream must be set to False to use the `aget_completion` method." + ) + return await self._aget_completion( + messages, generation_config, **kwargs + ) + + async def _aget_completion( + self, + messages: list[dict], + generation_config: GenerationConfig, + **kwargs, + ) -> Union[LLMChatCompletion, LLMChatCompletionChunk]: + """Asynchronously get a completion from the OpenAI API based on the provided messages.""" + + # Create a dictionary with the default arguments + args = self._get_base_args(generation_config) + + args["messages"] = messages + + # Conditionally add the 'functions' argument if it's not None + if generation_config.functions is not None: + args["functions"] = generation_config.functions + + args = {**args, **kwargs} + # Create the chat completion + return await self.client.chat.completions.create(**args) diff --git a/R2R/r2r/providers/vector_dbs/__init__.py b/R2R/r2r/providers/vector_dbs/__init__.py new file mode 100755 index 00000000..38ea0890 --- /dev/null +++ b/R2R/r2r/providers/vector_dbs/__init__.py @@ -0,0 +1,5 @@ +from .pgvector.pgvector_db import PGVectorDB + +__all__ = [ + "PGVectorDB", +] diff --git a/R2R/r2r/providers/vector_dbs/pgvector/pgvector_db.py b/R2R/r2r/providers/vector_dbs/pgvector/pgvector_db.py new file mode 100755 index 00000000..8cf728d1 --- /dev/null +++ b/R2R/r2r/providers/vector_dbs/pgvector/pgvector_db.py @@ -0,0 +1,610 @@ +import json +import logging +import os +import time +from typing import Literal, Optional, Union + +from sqlalchemy import exc, text +from sqlalchemy.engine.url import make_url + +from r2r.base import ( + DocumentInfo, + UserStats, + VectorDBConfig, + VectorDBProvider, + VectorEntry, + VectorSearchResult, +) +from r2r.vecs.client import Client +from r2r.vecs.collection import Collection + +logger = logging.getLogger(__name__) + + +class PGVectorDB(VectorDBProvider): + def __init__(self, config: VectorDBConfig) -> None: + super().__init__(config) + try: + import r2r.vecs + except ImportError: + raise ValueError( + f"Error, PGVectorDB requires the vecs library. Please run `pip install vecs`." + ) + + # Check if a complete Postgres URI is provided + postgres_uri = self.config.extra_fields.get( + "postgres_uri" + ) or os.getenv("POSTGRES_URI") + + if postgres_uri: + # Log loudly that Postgres URI is being used + logger.warning("=" * 50) + logger.warning( + "ATTENTION: Using provided Postgres URI for connection" + ) + logger.warning("=" * 50) + + # Validate and use the provided URI + try: + parsed_uri = make_url(postgres_uri) + if not all([parsed_uri.username, parsed_uri.database]): + raise ValueError( + "The provided Postgres URI is missing required components." + ) + DB_CONNECTION = postgres_uri + + # Log the sanitized URI (without password) + sanitized_uri = parsed_uri.set(password="*****") + logger.info(f"Connecting using URI: {sanitized_uri}") + except Exception as e: + raise ValueError(f"Invalid Postgres URI provided: {e}") + else: + # Fall back to existing logic for individual connection parameters + user = self.config.extra_fields.get("user", None) or os.getenv( + "POSTGRES_USER" + ) + password = self.config.extra_fields.get( + "password", None + ) or os.getenv("POSTGRES_PASSWORD") + host = self.config.extra_fields.get("host", None) or os.getenv( + "POSTGRES_HOST" + ) + port = self.config.extra_fields.get("port", None) or os.getenv( + "POSTGRES_PORT" + ) + db_name = self.config.extra_fields.get( + "db_name", None + ) or os.getenv("POSTGRES_DBNAME") + + if not all([user, password, host, db_name]): + raise ValueError( + "Error, please set the POSTGRES_USER, POSTGRES_PASSWORD, POSTGRES_HOST, POSTGRES_DBNAME environment variables or provide them in the config." + ) + + # Check if it's a Unix socket connection + if host.startswith("/") and not port: + DB_CONNECTION = ( + f"postgresql://{user}:{password}@/{db_name}?host={host}" + ) + logger.info("Using Unix socket connection") + else: + DB_CONNECTION = ( + f"postgresql://{user}:{password}@{host}:{port}/{db_name}" + ) + logger.info("Using TCP connection") + + # The rest of the initialization remains the same + try: + self.vx: Client = r2r.vecs.create_client(DB_CONNECTION) + except Exception as e: + raise ValueError( + f"Error {e} occurred while attempting to connect to the pgvector provider with {DB_CONNECTION}." + ) + + self.collection_name = self.config.extra_fields.get( + "vecs_collection" + ) or os.getenv("POSTGRES_VECS_COLLECTION") + if not self.collection_name: + raise ValueError( + "Error, please set a valid POSTGRES_VECS_COLLECTION environment variable or set a 'vecs_collection' in the 'vector_database' settings of your `config.json`." + ) + + self.collection: Optional[Collection] = None + + logger.info( + f"Successfully initialized PGVectorDB with collection: {self.collection_name}" + ) + + def initialize_collection(self, dimension: int) -> None: + self.collection = self.vx.get_or_create_collection( + name=self.collection_name, dimension=dimension + ) + self._create_document_info_table() + self._create_hybrid_search_function() + + def _create_document_info_table(self): + with self.vx.Session() as sess: + with sess.begin(): + try: + # Enable uuid-ossp extension + sess.execute( + text('CREATE EXTENSION IF NOT EXISTS "uuid-ossp";') + ) + except exc.ProgrammingError as e: + logger.error(f"Error enabling uuid-ossp extension: {e}") + raise + + # Create the table if it doesn't exist + create_table_query = f""" + CREATE TABLE IF NOT EXISTS document_info_"{self.collection_name}" ( + document_id UUID PRIMARY KEY, + title TEXT, + user_id UUID NULL, + version TEXT, + size_in_bytes INT, + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW(), + metadata JSONB, + status TEXT + ); + """ + sess.execute(text(create_table_query)) + + # Add the new column if it doesn't exist + add_column_query = f""" + DO $$ + BEGIN + IF NOT EXISTS ( + SELECT 1 + FROM information_schema.columns + WHERE table_name = 'document_info_"{self.collection_name}"' + AND column_name = 'status' + ) THEN + ALTER TABLE "document_info_{self.collection_name}" + ADD COLUMN status TEXT DEFAULT 'processing'; + END IF; + END $$; + """ + sess.execute(text(add_column_query)) + + sess.commit() + + def _create_hybrid_search_function(self): + hybrid_search_function = f""" + CREATE OR REPLACE FUNCTION hybrid_search_{self.collection_name}( + query_text TEXT, + query_embedding VECTOR(512), + match_limit INT, + full_text_weight FLOAT = 1, + semantic_weight FLOAT = 1, + rrf_k INT = 50, + filter_condition JSONB = NULL + ) + RETURNS SETOF vecs."{self.collection_name}" + LANGUAGE sql + AS $$ + WITH full_text AS ( + SELECT + id, + ROW_NUMBER() OVER (ORDER BY ts_rank(to_tsvector('english', metadata->>'text'), websearch_to_tsquery(query_text)) DESC) AS rank_ix + FROM vecs."{self.collection_name}" + WHERE to_tsvector('english', metadata->>'text') @@ websearch_to_tsquery(query_text) + AND (filter_condition IS NULL OR (metadata @> filter_condition)) + ORDER BY rank_ix + LIMIT LEAST(match_limit, 30) * 2 + ), + semantic AS ( + SELECT + id, + ROW_NUMBER() OVER (ORDER BY vec <#> query_embedding) AS rank_ix + FROM vecs."{self.collection_name}" + WHERE filter_condition IS NULL OR (metadata @> filter_condition) + ORDER BY rank_ix + LIMIT LEAST(match_limit, 30) * 2 + ) + SELECT + vecs."{self.collection_name}".* + FROM + full_text + FULL OUTER JOIN semantic + ON full_text.id = semantic.id + JOIN vecs."{self.collection_name}" + ON vecs."{self.collection_name}".id = COALESCE(full_text.id, semantic.id) + ORDER BY + COALESCE(1.0 / (rrf_k + full_text.rank_ix), 0.0) * full_text_weight + + COALESCE(1.0 / (rrf_k + semantic.rank_ix), 0.0) * semantic_weight + DESC + LIMIT + LEAST(match_limit, 30); + $$; + """ + retry_attempts = 5 + for attempt in range(retry_attempts): + try: + with self.vx.Session() as sess: + # Acquire an advisory lock + sess.execute(text("SELECT pg_advisory_lock(123456789)")) + try: + sess.execute(text(hybrid_search_function)) + sess.commit() + finally: + # Release the advisory lock + sess.execute( + text("SELECT pg_advisory_unlock(123456789)") + ) + break # Break the loop if successful + except exc.InternalError as e: + if "tuple concurrently updated" in str(e): + time.sleep(2**attempt) # Exponential backoff + else: + raise # Re-raise the exception if it's not a concurrency issue + else: + raise RuntimeError( + "Failed to create hybrid search function after multiple attempts" + ) + + def copy(self, entry: VectorEntry, commit=True) -> None: + if self.collection is None: + raise ValueError( + "Please call `initialize_collection` before attempting to run `copy`." + ) + + serializeable_entry = entry.to_serializable() + + self.collection.copy( + records=[ + ( + serializeable_entry["id"], + serializeable_entry["vector"], + serializeable_entry["metadata"], + ) + ] + ) + + def copy_entries( + self, entries: list[VectorEntry], commit: bool = True + ) -> None: + if self.collection is None: + raise ValueError( + "Please call `initialize_collection` before attempting to run `copy_entries`." + ) + + self.collection.copy( + records=[ + ( + str(entry.id), + entry.vector.data, + entry.to_serializable()["metadata"], + ) + for entry in entries + ] + ) + + def upsert(self, entry: VectorEntry, commit=True) -> None: + if self.collection is None: + raise ValueError( + "Please call `initialize_collection` before attempting to run `upsert`." + ) + + self.collection.upsert( + records=[ + ( + str(entry.id), + entry.vector.data, + entry.to_serializable()["metadata"], + ) + ] + ) + + def upsert_entries( + self, entries: list[VectorEntry], commit: bool = True + ) -> None: + if self.collection is None: + raise ValueError( + "Please call `initialize_collection` before attempting to run `upsert_entries`." + ) + + self.collection.upsert( + records=[ + ( + str(entry.id), + entry.vector.data, + entry.to_serializable()["metadata"], + ) + for entry in entries + ] + ) + + def search( + self, + query_vector: list[float], + filters: dict[str, Union[bool, int, str]] = {}, + limit: int = 10, + *args, + **kwargs, + ) -> list[VectorSearchResult]: + if self.collection is None: + raise ValueError( + "Please call `initialize_collection` before attempting to run `search`." + ) + measure = kwargs.get("measure", "cosine_distance") + mapped_filters = { + key: {"$eq": value} for key, value in filters.items() + } + + return [ + VectorSearchResult(id=ele[0], score=float(1 - ele[1]), metadata=ele[2]) # type: ignore + for ele in self.collection.query( + data=query_vector, + limit=limit, + filters=mapped_filters, + measure=measure, + include_value=True, + include_metadata=True, + ) + ] + + def hybrid_search( + self, + query_text: str, + query_vector: list[float], + limit: int = 10, + filters: Optional[dict[str, Union[bool, int, str]]] = None, + # Hybrid search parameters + full_text_weight: float = 1.0, + semantic_weight: float = 1.0, + rrf_k: int = 20, # typical value is ~2x the number of results you want + *args, + **kwargs, + ) -> list[VectorSearchResult]: + if self.collection is None: + raise ValueError( + "Please call `initialize_collection` before attempting to run `hybrid_search`." + ) + + # Convert filters to a JSON-compatible format + filter_condition = None + if filters: + filter_condition = json.dumps(filters) + + query = text( + f""" + SELECT * FROM hybrid_search_{self.collection_name}( + cast(:query_text as TEXT), cast(:query_embedding as VECTOR), cast(:match_limit as INT), + cast(:full_text_weight as FLOAT), cast(:semantic_weight as FLOAT), cast(:rrf_k as INT), + cast(:filter_condition as JSONB) + ) + """ + ) + + params = { + "query_text": str(query_text), + "query_embedding": list(query_vector), + "match_limit": limit, + "full_text_weight": full_text_weight, + "semantic_weight": semantic_weight, + "rrf_k": rrf_k, + "filter_condition": filter_condition, + } + + with self.vx.Session() as session: + result = session.execute(query, params).fetchall() + return [ + VectorSearchResult(id=row[0], score=1.0, metadata=row[-1]) + for row in result + ] + + def create_index(self, index_type, column_name, index_options): + pass + + def delete_by_metadata( + self, + metadata_fields: list[str], + metadata_values: list[Union[bool, int, str]], + logic: Literal["AND", "OR"] = "AND", + ) -> list[str]: + if logic == "OR": + raise ValueError( + "OR logic is still being tested before official support for `delete_by_metadata` in pgvector." + ) + if self.collection is None: + raise ValueError( + "Please call `initialize_collection` before attempting to run `delete_by_metadata`." + ) + + if len(metadata_fields) != len(metadata_values): + raise ValueError( + "The number of metadata fields must match the number of metadata values." + ) + + # Construct the filter + if logic == "AND": + filters = { + k: {"$eq": v} for k, v in zip(metadata_fields, metadata_values) + } + else: # OR logic + # TODO - Test 'or' logic and remove check above + filters = { + "$or": [ + {k: {"$eq": v}} + for k, v in zip(metadata_fields, metadata_values) + ] + } + return self.collection.delete(filters=filters) + + def get_metadatas( + self, + metadata_fields: list[str], + filter_field: Optional[str] = None, + filter_value: Optional[Union[bool, int, str]] = None, + ) -> list[dict]: + if self.collection is None: + raise ValueError( + "Please call `initialize_collection` before attempting to run `get_metadatas`." + ) + + results = {tuple(metadata_fields): {}} + for field in metadata_fields: + unique_values = self.collection.get_unique_metadata_values( + field=field, + filter_field=filter_field, + filter_value=filter_value, + ) + for value in unique_values: + if value not in results: + results[value] = {} + results[value][field] = value + + return [ + results[key] for key in results if key != tuple(metadata_fields) + ] + + def upsert_documents_overview( + self, documents_overview: list[DocumentInfo] + ) -> None: + for document_info in documents_overview: + db_entry = document_info.convert_to_db_entry() + + # Convert 'None' string to None type for user_id + if db_entry["user_id"] == "None": + db_entry["user_id"] = None + + query = text( + f""" + INSERT INTO "document_info_{self.collection_name}" (document_id, title, user_id, version, created_at, updated_at, size_in_bytes, metadata, status) + VALUES (:document_id, :title, :user_id, :version, :created_at, :updated_at, :size_in_bytes, :metadata, :status) + ON CONFLICT (document_id) DO UPDATE SET + title = EXCLUDED.title, + user_id = EXCLUDED.user_id, + version = EXCLUDED.version, + updated_at = EXCLUDED.updated_at, + size_in_bytes = EXCLUDED.size_in_bytes, + metadata = EXCLUDED.metadata, + status = EXCLUDED.status; + """ + ) + with self.vx.Session() as sess: + sess.execute(query, db_entry) + sess.commit() + + def delete_from_documents_overview( + self, document_id: str, version: Optional[str] = None + ) -> None: + query = f""" + DELETE FROM "document_info_{self.collection_name}" + WHERE document_id = :document_id + """ + params = {"document_id": document_id} + + if version is not None: + query += " AND version = :version" + params["version"] = version + + with self.vx.Session() as sess: + with sess.begin(): + sess.execute(text(query), params) + sess.commit() + + def get_documents_overview( + self, + filter_document_ids: Optional[list[str]] = None, + filter_user_ids: Optional[list[str]] = None, + ): + conditions = [] + params = {} + + if filter_document_ids: + placeholders = ", ".join( + f":doc_id_{i}" for i in range(len(filter_document_ids)) + ) + conditions.append(f"document_id IN ({placeholders})") + params.update( + { + f"doc_id_{i}": str(document_id) + for i, document_id in enumerate(filter_document_ids) + } + ) + if filter_user_ids: + placeholders = ", ".join( + f":user_id_{i}" for i in range(len(filter_user_ids)) + ) + conditions.append(f"user_id IN ({placeholders})") + params.update( + { + f"user_id_{i}": str(user_id) + for i, user_id in enumerate(filter_user_ids) + } + ) + + query = f""" + SELECT document_id, title, user_id, version, size_in_bytes, created_at, updated_at, metadata, status + FROM "document_info_{self.collection_name}" + """ + if conditions: + query += " WHERE " + " AND ".join(conditions) + + with self.vx.Session() as sess: + results = sess.execute(text(query), params).fetchall() + return [ + DocumentInfo( + document_id=row[0], + title=row[1], + user_id=row[2], + version=row[3], + size_in_bytes=row[4], + created_at=row[5], + updated_at=row[6], + metadata=row[7], + status=row[8], + ) + for row in results + ] + + def get_document_chunks(self, document_id: str) -> list[dict]: + if not self.collection: + raise ValueError("Collection is not initialized.") + + table_name = self.collection.table.name + query = text( + f""" + SELECT metadata + FROM vecs."{table_name}" + WHERE metadata->>'document_id' = :document_id + ORDER BY CAST(metadata->>'chunk_order' AS INTEGER) + """ + ) + + params = {"document_id": document_id} + + with self.vx.Session() as sess: + results = sess.execute(query, params).fetchall() + return [result[0] for result in results] + + def get_users_overview(self, user_ids: Optional[list[str]] = None): + user_ids_condition = "" + params = {} + if user_ids: + user_ids_condition = "WHERE user_id IN :user_ids" + params["user_ids"] = tuple( + map(str, user_ids) + ) # Convert UUIDs to strings + + query = f""" + SELECT user_id, COUNT(document_id) AS num_files, SUM(size_in_bytes) AS total_size_in_bytes, ARRAY_AGG(document_id) AS document_ids + FROM "document_info_{self.collection_name}" + {user_ids_condition} + GROUP BY user_id + """ + + with self.vx.Session() as sess: + results = sess.execute(text(query), params).fetchall() + return [ + UserStats( + user_id=row[0], + num_files=row[1], + total_size_in_bytes=row[2], + document_ids=row[3], + ) + for row in results + if row[0] is not None + ] diff --git a/R2R/r2r/telemetry/__init__.py b/R2R/r2r/telemetry/__init__.py new file mode 100755 index 00000000..e69de29b --- /dev/null +++ b/R2R/r2r/telemetry/__init__.py diff --git a/R2R/r2r/telemetry/events.py b/R2R/r2r/telemetry/events.py new file mode 100755 index 00000000..5bd7528b --- /dev/null +++ b/R2R/r2r/telemetry/events.py @@ -0,0 +1,59 @@ +import uuid +from typing import Any, Dict + + +class BaseTelemetryEvent: + def __init__(self, event_type: str, properties: Dict[str, Any]): + self.event_type = event_type + self.properties = properties + self.event_id = str(uuid.uuid4()) + + +class DailyActiveUserEvent(BaseTelemetryEvent): + def __init__(self, user_id: str): + super().__init__("DailyActiveUser", {"user_id": user_id}) + + +class FeatureUsageEvent(BaseTelemetryEvent): + def __init__(self, user_id: str, feature: str): + super().__init__( + "FeatureUsage", {"user_id": user_id, "feature": feature} + ) + + +class ErrorEvent(BaseTelemetryEvent): + def __init__(self, user_id: str, endpoint: str, error_message: str): + super().__init__( + "Error", + { + "user_id": user_id, + "endpoint": endpoint, + "error_message": error_message, + }, + ) + + +class RequestLatencyEvent(BaseTelemetryEvent): + def __init__(self, endpoint: str, latency: float): + super().__init__( + "RequestLatency", {"endpoint": endpoint, "latency": latency} + ) + + +class GeographicDistributionEvent(BaseTelemetryEvent): + def __init__(self, user_id: str, country: str): + super().__init__( + "GeographicDistribution", {"user_id": user_id, "country": country} + ) + + +class SessionDurationEvent(BaseTelemetryEvent): + def __init__(self, user_id: str, duration: float): + super().__init__( + "SessionDuration", {"user_id": user_id, "duration": duration} + ) + + +class UserPathEvent(BaseTelemetryEvent): + def __init__(self, user_id: str, path: str): + super().__init__("UserPath", {"user_id": user_id, "path": path}) diff --git a/R2R/r2r/telemetry/posthog.py b/R2R/r2r/telemetry/posthog.py new file mode 100755 index 00000000..64e63895 --- /dev/null +++ b/R2R/r2r/telemetry/posthog.py @@ -0,0 +1,58 @@ +import logging +import os + +import posthog + +from r2r.telemetry.events import BaseTelemetryEvent + +logger = logging.getLogger(__name__) + + +class PosthogClient: + """ + This is a write-only project API key, so it can only create new events. It can't + read events or any of your other data stored with PostHog, so it's safe to use in public apps. + """ + + def __init__( + self, api_key: str, enabled: bool = True, debug: bool = False + ): + self.enabled = enabled + self.debug = debug + + if self.enabled: + logger.info( + "Initializing anonymized telemetry. To disable, set TELEMETRY_ENABLED=false in your environment." + ) + posthog.project_api_key = api_key + posthog.disable_geoip = False + else: + posthog.disabled = True + + if self.debug: + posthog.debug = True + + logger.info( + f"Posthog telemetry {'enabled' if self.enabled else 'disabled'}, debug mode {'on' if self.debug else 'off'}" + ) + + def capture(self, event: BaseTelemetryEvent): + if self.enabled: + posthog.capture(event.event_id, event.event_type, event.properties) + + +# Initialize the telemetry client with a flag to enable or disable telemetry +telemetry_enabled = os.getenv("TELEMETRY_ENABLED", "true").lower() in ( + "true", + "1", + "t", +) +debug_mode = os.getenv("DEBUG_MODE", "false").lower() in ( + "true", + "1", + "t", +) +telemetry_client = PosthogClient( + api_key="phc_OPBbibOIErCGc4NDLQsOrMuYFTKDmRwXX6qxnTr6zpU", + enabled=telemetry_enabled, +) diff --git a/R2R/r2r/telemetry/telemetry_decorator.py b/R2R/r2r/telemetry/telemetry_decorator.py new file mode 100755 index 00000000..2938a83e --- /dev/null +++ b/R2R/r2r/telemetry/telemetry_decorator.py @@ -0,0 +1,56 @@ +import asyncio +import logging +from functools import wraps + +from r2r.telemetry.events import ErrorEvent, FeatureUsageEvent +from r2r.telemetry.posthog import telemetry_client + +logger = logging.getLogger(__name__) + + +def telemetry_event(event_name): + def decorator(func): + @wraps(func) + async def async_wrapper(*args, **kwargs): + user_id = kwargs.get("user_id", "unknown_user") + try: + result = await func(*args, **kwargs) + try: + telemetry_client.capture( + FeatureUsageEvent(user_id=user_id, feature=event_name) + ) + except Exception as e: + logger.error(f"Error in telemetry event logging: {str(e)}") + return result + except Exception as e: + try: + telemetry_client.capture( + ErrorEvent( + user_id=user_id, + endpoint=event_name, + error_message=str(e), + ) + ) + except Exception as e: + logger.error(f"Error in telemetry event logging: {str(e)}") + + raise + + @wraps(func) + def sync_wrapper(*args, **kwargs): + loop = asyncio.get_event_loop() + if loop.is_running(): + future = asyncio.run_coroutine_threadsafe( + async_wrapper(*args, **kwargs), loop + ) + return future.result() + else: + return loop.run_until_complete(async_wrapper(*args, **kwargs)) + + return ( + async_wrapper + if asyncio.iscoroutinefunction(func) + else sync_wrapper + ) + + return decorator diff --git a/R2R/r2r/vecs/__init__.py b/R2R/r2r/vecs/__init__.py new file mode 100755 index 00000000..9d4f1d7e --- /dev/null +++ b/R2R/r2r/vecs/__init__.py @@ -0,0 +1,28 @@ +from . import exc +from .client import Client +from .collection import ( + Collection, + IndexArgsHNSW, + IndexArgsIVFFlat, + IndexMeasure, + IndexMethod, +) + +__project__ = "vecs" +__version__ = "0.4.2" + + +__all__ = [ + "IndexArgsIVFFlat", + "IndexArgsHNSW", + "IndexMethod", + "IndexMeasure", + "Collection", + "Client", + "exc", +] + + +def create_client(connection_string: str, *args, **kwargs) -> Client: + """Creates a client from a Postgres connection string""" + return Client(connection_string, *args, **kwargs) diff --git a/R2R/r2r/vecs/adapter/__init__.py b/R2R/r2r/vecs/adapter/__init__.py new file mode 100755 index 00000000..9cd9860d --- /dev/null +++ b/R2R/r2r/vecs/adapter/__init__.py @@ -0,0 +1,15 @@ +from .base import Adapter, AdapterContext, AdapterStep +from .markdown import MarkdownChunker +from .noop import NoOp +from .text import ParagraphChunker, TextEmbedding, TextEmbeddingModel + +__all__ = [ + "Adapter", + "AdapterContext", + "AdapterStep", + "NoOp", + "ParagraphChunker", + "TextEmbedding", + "TextEmbeddingModel", + "MarkdownChunker", +] diff --git a/R2R/r2r/vecs/adapter/base.py b/R2R/r2r/vecs/adapter/base.py new file mode 100755 index 00000000..7734e802 --- /dev/null +++ b/R2R/r2r/vecs/adapter/base.py @@ -0,0 +1,111 @@ +""" +The `vecs.experimental.adapter.base` module provides abstract classes and utilities +for creating and handling adapters in vecs. Adapters allow users to interact with +a collection using media types other than vectors. + +All public classes, enums, and functions are re-exported by `vecs.adapters` module. +""" + +from abc import ABC, abstractmethod +from enum import Enum +from typing import Any, Dict, Generator, Iterable, Optional, Tuple + +from vecs.exc import ArgError + + +class AdapterContext(str, Enum): + """ + An enum representing the different contexts in which a Pipeline + will be invoked. + + Attributes: + upsert (str): The Collection.upsert method + query (str): The Collection.query method + """ + + upsert = "upsert" + query = "query" + + +class AdapterStep(ABC): + """ + Abstract class representing a step in the adapter pipeline. + + Each adapter step should adapt a user media into a tuple of: + - id (str) + - media (unknown type) + - metadata (dict) + + If the user provides id or metadata, default production is overridden. + """ + + @property + def exported_dimension(self) -> Optional[int]: + """ + Property that should be overridden by subclasses to provide the output dimension + of the adapter step. + """ + return None + + @abstractmethod + def __call__( + self, + records: Iterable[Tuple[str, Any, Optional[Dict]]], + adapter_context: AdapterContext, + ) -> Generator[Tuple[str, Any, Dict], None, None]: + """ + Abstract method that should be overridden by subclasses to handle each record. + """ + + +class Adapter: + """ + Class representing a sequence of AdapterStep instances forming a pipeline. + """ + + def __init__(self, steps: list[AdapterStep]): + """ + Initialize an Adapter instance with a list of AdapterStep instances. + + Args: + steps: list of AdapterStep instances. + + Raises: + ArgError: Raised if the steps list is empty. + """ + self.steps = steps + if len(steps) < 1: + raise ArgError("Adapter must contain at least 1 step") + + @property + def exported_dimension(self) -> Optional[int]: + """ + The output dimension of the adapter. Returns the exported dimension of the last + AdapterStep that provides one (from end to start of the steps list). + """ + for step in reversed(self.steps): + step_dim = step.exported_dimension + if step_dim is not None: + return step_dim + return None + + def __call__( + self, + records: Iterable[Tuple[str, Any, Optional[Dict]]], + adapter_context: AdapterContext, + ) -> Generator[Tuple[str, Any, Dict], None, None]: + """ + Invokes the adapter pipeline on an iterable of records. + + Args: + records: Iterable of tuples each containing an id, a media and an optional dict. + adapter_context: Context of the adapter. + + Yields: + Tuples each containing an id, a media and a dict. + """ + pipeline = records + for step in self.steps: + pipeline = step(pipeline, adapter_context) + + yield from pipeline # type: ignore diff --git a/R2R/r2r/vecs/adapter/markdown.py b/R2R/r2r/vecs/adapter/markdown.py new file mode 100755 index 00000000..149573f4 --- /dev/null +++ b/R2R/r2r/vecs/adapter/markdown.py @@ -0,0 +1,88 @@ +import re +from typing import Any, Dict, Generator, Iterable, Optional, Tuple + +from flupy import flu + +from .base import AdapterContext, AdapterStep + + +class MarkdownChunker(AdapterStep): + """ + MarkdownChunker is an AdapterStep that splits a markdown string into chunks where a heading signifies the start of a chunk, and yields each chunk as a separate record. + """ + + def __init__(self, *, skip_during_query: bool): + """ + Initializes the MarkdownChunker adapter. + + Args: + skip_during_query (bool): Whether to skip chunking during querying. + """ + self.skip_during_query = skip_during_query + + @staticmethod + def split_by_heading( + md: str, max_tokens: int + ) -> Generator[str, None, None]: + regex_split = r"^(#{1,6}\s+.+)$" + headings = [ + match.span()[0] + for match in re.finditer(regex_split, md, flags=re.MULTILINE) + ] + + if headings == [] or headings[0] != 0: + headings.insert(0, 0) + + sections = [md[i:j] for i, j in zip(headings, headings[1:] + [None])] + + for section in sections: + chunks = flu(section.split(" ")).chunk(max_tokens) + + is_not_useless_chunk = lambda i: not i in ["", "\n", []] + + joined_chunks = filter( + is_not_useless_chunk, [" ".join(chunk) for chunk in chunks] + ) + + for joined_chunk in joined_chunks: + yield joined_chunk + + def __call__( + self, + records: Iterable[Tuple[str, Any, Optional[Dict]]], + adapter_context: AdapterContext, + max_tokens: int = 99999999, + ) -> Generator[Tuple[str, Any, Dict], None, None]: + """ + Splits each markdown string in the records into chunks where each heading starts a new chunk, and yields each chunk + as a separate record. If the `skip_during_query` attribute is set to True, + this step is skipped during querying. + + Args: + records (Iterable[Tuple[str, Any, Optional[Dict]]]): Iterable of tuples each containing an id, a markdown string and an optional dict. + adapter_context (AdapterContext): Context of the adapter. + max_tokens (int): The maximum number of tokens per chunk + + Yields: + Tuple[str, Any, Dict]: The id appended with chunk index, the chunk, and the metadata. + """ + if max_tokens and max_tokens < 1: + raise ValueError("max_tokens must be a nonzero positive integer") + + if ( + adapter_context == AdapterContext("query") + and self.skip_during_query + ): + for id, markdown, metadata in records: + yield (id, markdown, metadata or {}) + else: + for id, markdown, metadata in records: + headings = MarkdownChunker.split_by_heading( + markdown, max_tokens + ) + for heading_ix, heading in enumerate(headings): + yield ( + f"{id}_head_{str(heading_ix).zfill(3)}", + heading, + metadata or {}, + ) diff --git a/R2R/r2r/vecs/adapter/noop.py b/R2R/r2r/vecs/adapter/noop.py new file mode 100755 index 00000000..b587a552 --- /dev/null +++ b/R2R/r2r/vecs/adapter/noop.py @@ -0,0 +1,55 @@ +""" +The `vecs.experimental.adapter.noop` module provides a default no-op (no operation) adapter +that passes the inputs through without any modification. This can be useful when no specific +adapter processing is required. + +All public classes, enums, and functions are re-exported by `vecs.adapters` module. +""" + +from typing import Any, Dict, Generator, Iterable, Optional, Tuple + +from .base import AdapterContext, AdapterStep + + +class NoOp(AdapterStep): + """ + NoOp is a no-operation AdapterStep. It is a default adapter that passes through + the input records without any modifications. + """ + + def __init__(self, dimension: int): + """ + Initializes the NoOp adapter with a dimension. + + Args: + dimension (int): The dimension of the input vectors. + """ + self._dimension = dimension + + @property + def exported_dimension(self) -> Optional[int]: + """ + Returns the dimension of the adapter. + + Returns: + int: The dimension of the input vectors. + """ + return self._dimension + + def __call__( + self, + records: Iterable[Tuple[str, Any, Optional[Dict]]], + adapter_context: AdapterContext, + ) -> Generator[Tuple[str, Any, Dict], None, None]: + """ + Yields the input records without any modification. + + Args: + records: Iterable of tuples each containing an id, a media and an optional dict. + adapter_context: Context of the adapter. + + Yields: + Tuple[str, Any, Dict]: The input record. + """ + for id, media, metadata in records: + yield (id, media, metadata or {}) diff --git a/R2R/r2r/vecs/adapter/text.py b/R2R/r2r/vecs/adapter/text.py new file mode 100755 index 00000000..78ae7732 --- /dev/null +++ b/R2R/r2r/vecs/adapter/text.py @@ -0,0 +1,151 @@ +""" +The `vecs.experimental.adapter.text` module provides adapter steps specifically designed for +handling text data. It provides two main classes, `TextEmbedding` and `ParagraphChunker`. + +All public classes, enums, and functions are re-exported by `vecs.adapters` module. +""" + +from typing import Any, Dict, Generator, Iterable, Literal, Optional, Tuple + +from flupy import flu +from vecs.exc import MissingDependency + +from .base import AdapterContext, AdapterStep + +TextEmbeddingModel = Literal[ + "all-mpnet-base-v2", + "multi-qa-mpnet-base-dot-v1", + "all-distilroberta-v1", + "all-MiniLM-L12-v2", + "multi-qa-distilbert-cos-v1", + "mixedbread-ai/mxbai-embed-large-v1", + "multi-qa-MiniLM-L6-cos-v1", + "paraphrase-multilingual-mpnet-base-v2", + "paraphrase-albert-small-v2", + "paraphrase-multilingual-MiniLM-L12-v2", + "paraphrase-MiniLM-L3-v2", + "distiluse-base-multilingual-cased-v1", + "distiluse-base-multilingual-cased-v2", +] + + +class TextEmbedding(AdapterStep): + """ + TextEmbedding is an AdapterStep that converts text media into + embeddings using a specified sentence transformers model. + """ + + def __init__( + self, + *, + model: TextEmbeddingModel, + batch_size: int = 8, + use_auth_token: str = None, + ): + """ + Initializes the TextEmbedding adapter with a sentence transformers model. + + Args: + model (TextEmbeddingModel): The sentence transformers model to use for embeddings. + batch_size (int): The number of records to encode simultaneously. + use_auth_token (str): The HuggingFace Hub auth token to use for private models. + + Raises: + MissingDependency: If the sentence_transformers library is not installed. + """ + try: + from sentence_transformers import SentenceTransformer as ST + except ImportError: + raise MissingDependency( + "Missing feature vecs[text_embedding]. Hint: `pip install 'vecs[text_embedding]'`" + ) + + self.model = ST(model, use_auth_token=use_auth_token) + self._exported_dimension = ( + self.model.get_sentence_embedding_dimension() + ) + self.batch_size = batch_size + + @property + def exported_dimension(self) -> Optional[int]: + """ + Returns the dimension of the embeddings produced by the sentence transformers model. + + Returns: + int: The dimension of the embeddings. + """ + return self._exported_dimension + + def __call__( + self, + records: Iterable[Tuple[str, Any, Optional[Dict]]], + adapter_context: AdapterContext, # pyright: ignore + ) -> Generator[Tuple[str, Any, Dict], None, None]: + """ + Converts each media in the records to an embedding and yields the result. + + Args: + records: Iterable of tuples each containing an id, a media and an optional dict. + adapter_context: Context of the adapter. + + Yields: + Tuple[str, Any, Dict]: The id, the embedding, and the metadata. + """ + for batch in flu(records).chunk(self.batch_size): + batch_records = [x for x in batch] + media = [text for _, text, _ in batch_records] + + embeddings = self.model.encode(media, normalize_embeddings=True) + + for (id, _, metadata), embedding in zip(batch_records, embeddings): # type: ignore + yield (id, embedding, metadata or {}) + + +class ParagraphChunker(AdapterStep): + """ + ParagraphChunker is an AdapterStep that splits text media into + paragraphs and yields each paragraph as a separate record. + """ + + def __init__(self, *, skip_during_query: bool): + """ + Initializes the ParagraphChunker adapter. + + Args: + skip_during_query (bool): Whether to skip chunking during querying. + """ + self.skip_during_query = skip_during_query + + def __call__( + self, + records: Iterable[Tuple[str, Any, Optional[Dict]]], + adapter_context: AdapterContext, + ) -> Generator[Tuple[str, Any, Dict], None, None]: + """ + Splits each media in the records into paragraphs and yields each paragraph + as a separate record. If the `skip_during_query` attribute is set to True, + this step is skipped during querying. + + Args: + records (Iterable[Tuple[str, Any, Optional[Dict]]]): Iterable of tuples each containing an id, a media and an optional dict. + adapter_context (AdapterContext): Context of the adapter. + + Yields: + Tuple[str, Any, Dict]: The id appended with paragraph index, the paragraph, and the metadata. + """ + if ( + adapter_context == AdapterContext("query") + and self.skip_during_query + ): + for id, media, metadata in records: + yield (id, media, metadata or {}) + else: + for id, media, metadata in records: + paragraphs = media.split("\n\n") + + for paragraph_ix, paragraph in enumerate(paragraphs): + yield ( + f"{id}_para_{str(paragraph_ix).zfill(3)}", + paragraph, + metadata or {}, + ) diff --git a/R2R/r2r/vecs/client.py b/R2R/r2r/vecs/client.py new file mode 100755 index 00000000..6259f1d8 --- /dev/null +++ b/R2R/r2r/vecs/client.py @@ -0,0 +1,313 @@ +""" +Defines the 'Client' class + +Importing from the `vecs.client` directly is not supported. +All public classes, enums, and functions are re-exported by the top level `vecs` module. +""" + +from __future__ import annotations + +import logging +import time +from typing import TYPE_CHECKING, List, Optional + +import sqlalchemy +from deprecated import deprecated +from sqlalchemy import MetaData, create_engine, text +from sqlalchemy.orm import sessionmaker +from sqlalchemy.pool import QueuePool + +from .adapter import Adapter +from .exc import CollectionNotFound + +if TYPE_CHECKING: + from r2r.vecs.collection import Collection + +logger = logging.getLogger(__name__) + + +class Client: + """ + The `vecs.Client` class serves as an interface to a PostgreSQL database with pgvector support. It facilitates + the creation, retrieval, listing and deletion of vector collections, while managing connections to the + database. + + A `Client` instance represents a connection to a PostgreSQL database. This connection can be used to create + and manipulate vector collections, where each collection is a group of vector records in a PostgreSQL table. + + The `vecs.Client` class can be also supports usage as a context manager to ensure the connection to the database + is properly closed after operations, or used directly. + + Example usage: + + DB_CONNECTION = "postgresql://<user>:<password>@<host>:<port>/<db_name>" + + with vecs.create_client(DB_CONNECTION) as vx: + # do some work + pass + + # OR + + vx = vecs.create_client(DB_CONNECTION) + # do some work + vx.disconnect() + """ + + def __init__( + self, + connection_string: str, + pool_size: int = 1, + max_retries: int = 3, + retry_delay: int = 1, + ): + self.engine = create_engine( + connection_string, + pool_size=pool_size, + poolclass=QueuePool, + pool_recycle=300, # Recycle connections after 5 min + ) + self.meta = MetaData(schema="vecs") + self.Session = sessionmaker(self.engine) + self.max_retries = max_retries + self.retry_delay = retry_delay + self.vector_version: Optional[str] = None + self._initialize_database() + + def _initialize_database(self): + retries = 0 + error = None + while retries < self.max_retries: + try: + with self.Session() as sess: + with sess.begin(): + self._create_schema(sess) + self._create_extension(sess) + self._get_vector_version(sess) + return + except Exception as e: + logger.warning( + f"Database connection error: {str(e)}. Retrying in {self.retry_delay} seconds..." + ) + retries += 1 + time.sleep(self.retry_delay) + error = e + + error_message = f"Failed to initialize database after {self.max_retries} retries with error: {str(error)}" + logger.error(error_message) + raise RuntimeError(error_message) + + def _create_schema(self, sess): + try: + sess.execute(text("CREATE SCHEMA IF NOT EXISTS vecs;")) + except Exception as e: + logger.warning(f"Failed to create schema: {str(e)}") + + def _create_extension(self, sess): + try: + sess.execute(text("CREATE EXTENSION IF NOT EXISTS vector;")) + sess.execute(text("CREATE EXTENSION IF NOT EXISTS pg_trgm;")) + sess.execute(text("CREATE EXTENSION IF NOT EXISTS fuzzystrmatch;")) + except Exception as e: + logger.warning(f"Failed to create extension: {str(e)}") + + def _get_vector_version(self, sess): + try: + self.vector_version = sess.execute( + text( + "SELECT installed_version FROM pg_available_extensions WHERE name = 'vector' LIMIT 1;" + ) + ).scalar_one() + except sqlalchemy.exc.InternalError as e: + logger.error(f"Failed with internal alchemy error: {str(e)}") + + import psycopg2 + + if isinstance(e.orig, psycopg2.errors.InFailedSqlTransaction): + sess.rollback() + self.vector_version = sess.execute( + text( + "SELECT installed_version FROM pg_available_extensions WHERE name = 'vector' LIMIT 1;" + ) + ).scalar_one() + else: + raise e + except Exception as e: + logger.error(f"Failed to retrieve vector version: {str(e)}") + raise e + + def _supports_hnsw(self): + return ( + not self.vector_version.startswith("0.4") + and not self.vector_version.startswith("0.3") + and not self.vector_version.startswith("0.2") + and not self.vector_version.startswith("0.1") + and not self.vector_version.startswith("0.0") + ) + + def get_or_create_collection( + self, + name: str, + *, + dimension: Optional[int] = None, + adapter: Optional[Adapter] = None, + ) -> Collection: + """ + Get a vector collection by name, or create it if no collection with + *name* exists. + + Args: + name (str): The name of the collection. + + Keyword Args: + dimension (int): The dimensionality of the vectors in the collection. + pipeline (int): The dimensionality of the vectors in the collection. + + Returns: + Collection: The created collection. + + Raises: + CollectionAlreadyExists: If a collection with the same name already exists + """ + from r2r.vecs.collection import Collection + + adapter_dimension = adapter.exported_dimension if adapter else None + + collection = Collection( + name=name, + dimension=dimension or adapter_dimension, # type: ignore + client=self, + adapter=adapter, + ) + + return collection._create_if_not_exists() + + @deprecated("use Client.get_or_create_collection") + def create_collection(self, name: str, dimension: int) -> Collection: + """ + Create a new vector collection. + + Args: + name (str): The name of the collection. + dimension (int): The dimensionality of the vectors in the collection. + + Returns: + Collection: The created collection. + + Raises: + CollectionAlreadyExists: If a collection with the same name already exists + """ + from r2r.vecs.collection import Collection + + return Collection(name, dimension, self)._create() + + @deprecated("use Client.get_or_create_collection") + def get_collection(self, name: str) -> Collection: + """ + Retrieve an existing vector collection. + + Args: + name (str): The name of the collection. + + Returns: + Collection: The retrieved collection. + + Raises: + CollectionNotFound: If no collection with the given name exists. + """ + from r2r.vecs.collection import Collection + + query = text( + f""" + select + relname as table_name, + atttypmod as embedding_dim + from + pg_class pc + join pg_attribute pa + on pc.oid = pa.attrelid + where + pc.relnamespace = 'vecs'::regnamespace + and pc.relkind = 'r' + and pa.attname = 'vec' + and not pc.relname ^@ '_' + and pc.relname = :name + """ + ).bindparams(name=name) + with self.Session() as sess: + query_result = sess.execute(query).fetchone() + + if query_result is None: + raise CollectionNotFound( + "No collection found with requested name" + ) + + name, dimension = query_result + return Collection( + name, + dimension, + self, + ) + + def list_collections(self) -> List["Collection"]: + """ + List all vector collections. + + Returns: + list[Collection]: A list of all collections. + """ + from r2r.vecs.collection import Collection + + return Collection._list_collections(self) + + def delete_collection(self, name: str) -> None: + """ + Delete a vector collection. + + If no collection with requested name exists, does nothing. + + Args: + name (str): The name of the collection. + + Returns: + None + """ + from r2r.vecs.collection import Collection + + Collection(name, -1, self)._drop() + return + + def disconnect(self) -> None: + """ + Disconnect the client from the database. + + Returns: + None + """ + self.engine.dispose() + logger.info("Disconnected from the database.") + return + + def __enter__(self) -> "Client": + """ + Enable use of the 'with' statement. + + Returns: + Client: The current instance of the Client. + """ + + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """ + Disconnect the client on exiting the 'with' statement context. + + Args: + exc_type: The exception type, if any. + exc_val: The exception value, if any. + exc_tb: The traceback, if any. + + Returns: + None + """ + self.disconnect() + return diff --git a/R2R/r2r/vecs/collection.py b/R2R/r2r/vecs/collection.py new file mode 100755 index 00000000..2293d49b --- /dev/null +++ b/R2R/r2r/vecs/collection.py @@ -0,0 +1,1132 @@ +""" +Defines the 'Collection' class + +Importing from the `vecs.collection` directly is not supported. +All public classes, enums, and functions are re-exported by the top level `vecs` module. +""" + +from __future__ import annotations + +import math +import uuid +import warnings +from dataclasses import dataclass +from enum import Enum +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Iterable, + List, + Optional, + Tuple, + Union, +) + +import psycopg2 +from flupy import flu +from sqlalchemy import ( + Column, + MetaData, + String, + Table, + alias, + and_, + cast, + delete, + distinct, + func, + or_, + select, + text, +) +from sqlalchemy.dialects import postgresql +from sqlalchemy.types import Float, UserDefinedType + +from .adapter import Adapter, AdapterContext, NoOp +from .exc import ( + ArgError, + CollectionAlreadyExists, + CollectionNotFound, + FilterError, + MismatchedDimension, + Unreachable, +) + +if TYPE_CHECKING: + from vecs.client import Client + + +MetadataValues = Union[str, int, float, bool, List[str]] +Metadata = Dict[str, MetadataValues] +Numeric = Union[int, float, complex] +Record = Tuple[str, Iterable[Numeric], Metadata] + + +class IndexMethod(str, Enum): + """ + An enum representing the index methods available. + + This class currently only supports the 'ivfflat' method but may + expand in the future. + + Attributes: + auto (str): Automatically choose the best available index method. + ivfflat (str): The ivfflat index method. + hnsw (str): The hnsw index method. + """ + + auto = "auto" + ivfflat = "ivfflat" + hnsw = "hnsw" + + +class IndexMeasure(str, Enum): + """ + An enum representing the types of distance measures available for indexing. + + Attributes: + cosine_distance (str): The cosine distance measure for indexing. + l2_distance (str): The Euclidean (L2) distance measure for indexing. + max_inner_product (str): The maximum inner product measure for indexing. + """ + + cosine_distance = "cosine_distance" + l2_distance = "l2_distance" + max_inner_product = "max_inner_product" + + +@dataclass +class IndexArgsIVFFlat: + """ + A class for arguments that can optionally be supplied to the index creation + method when building an IVFFlat type index. + + Attributes: + nlist (int): The number of IVF centroids that the index should use + """ + + n_lists: int + + +@dataclass +class IndexArgsHNSW: + """ + A class for arguments that can optionally be supplied to the index creation + method when building an HNSW type index. + + Ref: https://github.com/pgvector/pgvector#index-options + + Both attributes are Optional in case the user only wants to specify one and + leave the other as default + + Attributes: + m (int): Maximum number of connections per node per layer (default: 16) + ef_construction (int): Size of the dynamic candidate list for + constructing the graph (default: 64) + """ + + m: Optional[int] = 16 + ef_construction: Optional[int] = 64 + + +INDEX_MEASURE_TO_OPS = { + # Maps the IndexMeasure enum options to the SQL ops string required by + # the pgvector `create index` statement + IndexMeasure.cosine_distance: "vector_cosine_ops", + IndexMeasure.l2_distance: "vector_l2_ops", + IndexMeasure.max_inner_product: "vector_ip_ops", +} + +INDEX_MEASURE_TO_SQLA_ACC = { + IndexMeasure.cosine_distance: lambda x: x.cosine_distance, + IndexMeasure.l2_distance: lambda x: x.l2_distance, + IndexMeasure.max_inner_product: lambda x: x.max_inner_product, +} + + +class Vector(UserDefinedType): + cache_ok = True + + def __init__(self, dim=None): + super(UserDefinedType, self).__init__() + self.dim = dim + + def get_col_spec(self, **kw): + return "VECTOR" if self.dim is None else f"VECTOR({self.dim})" + + def bind_processor(self, dialect): + def process(value): + if value is None: + return value + if not isinstance(value, list): + raise ValueError("Expected a list") + if self.dim is not None and len(value) != self.dim: + raise ValueError( + f"Expected {self.dim} dimensions, not {len(value)}" + ) + return "[" + ",".join(str(float(v)) for v in value) + "]" + + return process + + def result_processor(self, dialect, coltype): + return lambda value: ( + value + if value is None + else [float(v) for v in value[1:-1].split(",")] + ) + + class comparator_factory(UserDefinedType.Comparator): + def l2_distance(self, other): + return self.op("<->", return_type=Float)(other) + + def max_inner_product(self, other): + return self.op("<#>", return_type=Float)(other) + + def cosine_distance(self, other): + return self.op("<=>", return_type=Float)(other) + + +class Collection: + """ + The `vecs.Collection` class represents a collection of vectors within a PostgreSQL database with pgvector support. + It provides methods to manage (create, delete, fetch, upsert), index, and perform similarity searches on these vector collections. + + The collections are stored in separate tables in the database, with each vector associated with an identifier and optional metadata. + + Example usage: + + with vecs.create_client(DB_CONNECTION) as vx: + collection = vx.create_collection(name="docs", dimension=3) + collection.upsert([("id1", [1, 1, 1], {"key": "value"})]) + # Further operations on 'collection' + + Public Attributes: + name: The name of the vector collection. + dimension: The dimension of vectors in the collection. + + Note: Some methods of this class can raise exceptions from the `vecs.exc` module if errors occur. + """ + + def __init__( + self, + name: str, + dimension: int, + client: Client, + adapter: Optional[Adapter] = None, + ): + """ + Initializes a new instance of the `Collection` class. + + During expected use, developers initialize instances of `Collection` using the + `vecs.Client` with `vecs.Client.create_collection(...)` rather than directly. + + Args: + name (str): The name of the collection. + dimension (int): The dimension of the vectors in the collection. + client (Client): The client to use for interacting with the database. + """ + from r2r.vecs.adapter import Adapter + + self.client = client + self.name = name + self.dimension = dimension + self.table = build_table(name, client.meta, dimension) + self._index: Optional[str] = None + self.adapter = adapter or Adapter(steps=[NoOp(dimension=dimension)]) + + reported_dimensions = set( + [ + x + for x in [ + dimension, + adapter.exported_dimension if adapter else None, + ] + if x is not None + ] + ) + if len(reported_dimensions) == 0: + raise ArgError( + "One of dimension or adapter must provide a dimension" + ) + elif len(reported_dimensions) > 1: + raise MismatchedDimension( + "Mismatch in the reported dimensions of the selected vector collection and embedding model. Correct the selected embedding model or specify a new vector collection by modifying the `POSTGRES_VECS_COLLECTION` environment variable." + ) + + def __repr__(self): + """ + Returns a string representation of the `Collection` instance. + + Returns: + str: A string representation of the `Collection` instance. + """ + return ( + f'vecs.Collection(name="{self.name}", dimension={self.dimension})' + ) + + def __len__(self) -> int: + """ + Returns the number of vectors in the collection. + + Returns: + int: The number of vectors in the collection. + """ + with self.client.Session() as sess: + with sess.begin(): + stmt = select(func.count()).select_from(self.table) + return sess.execute(stmt).scalar() or 0 + + def _create_if_not_exists(self): + """ + PRIVATE + + Creates a new collection in the database if it doesn't already exist + + Returns: + Collection: The found or created collection. + """ + query = text( + f""" + select + relname as table_name, + atttypmod as embedding_dim + from + pg_class pc + join pg_attribute pa + on pc.oid = pa.attrelid + where + pc.relnamespace = 'vecs'::regnamespace + and pc.relkind = 'r' + and pa.attname = 'vec' + and not pc.relname ^@ '_' + and pc.relname = :name + """ + ).bindparams(name=self.name) + with self.client.Session() as sess: + query_result = sess.execute(query).fetchone() + + if query_result: + _, collection_dimension = query_result + else: + collection_dimension = None + + reported_dimensions = set( + [ + x + for x in [self.dimension, collection_dimension] + if x is not None + ] + ) + if len(reported_dimensions) > 1: + raise MismatchedDimension( + "Dimensions reported by adapter, dimension, and collection do not match. The likely cause of this is a mismatch between the dimensions of the selected vector collection and embedding model. Select the correct embedding model, or specify a new vector collection by modifying your `POSTGRES_VECS_COLLECTION` environment variable. If the selected colelction does not exist then it will be automatically with dimensions that match the selected embedding model." + ) + + if not collection_dimension: + self.table.create(self.client.engine) + + return self + + def _create(self): + """ + PRIVATE + + Creates a new collection in the database. Raises a `vecs.exc.CollectionAlreadyExists` + exception if a collection with the specified name already exists. + + Returns: + Collection: The newly created collection. + """ + + collection_exists = self.__class__._does_collection_exist( + self.client, self.name + ) + if collection_exists: + raise CollectionAlreadyExists( + "Collection with requested name already exists" + ) + self.table.create(self.client.engine) + + unique_string = str(uuid.uuid4()).replace("-", "_")[0:7] + with self.client.Session() as sess: + sess.execute( + text( + f""" + create index ix_meta_{unique_string} + on vecs."{self.table.name}" + using gin ( metadata jsonb_path_ops ) + """ + ) + ) + return self + + def _drop(self): + """ + PRIVATE + + Deletes the collection from the database. Raises a `vecs.exc.CollectionNotFound` + exception if no collection with the specified name exists. + + Returns: + Collection: The deleted collection. + """ + with self.client.Session() as sess: + sess.execute(text(f"DROP TABLE IF EXISTS {self.name} CASCADE")) + sess.commit() + + return self + + def get_unique_metadata_values( + self, + field: str, + filter_field: Optional[str] = None, + filter_value: Optional[MetadataValues] = None, + ) -> List[MetadataValues]: + """ + Fetches all unique metadata values of a specific field, optionally filtered by another metadata field. + Args: + field (str): The metadata field for which to fetch unique values. + filter_field (Optional[str], optional): The metadata field to filter on. Defaults to None. + filter_value (Optional[MetadataValues], optional): The value to filter the metadata field with. Defaults to None. + Returns: + List[MetadataValues]: A list of unique metadata values for the specified field. + """ + with self.client.Session() as sess: + with sess.begin(): + stmt = select( + distinct(self.table.c.metadata[field].astext) + ).where(self.table.c.metadata[field] != None) + + if filter_field is not None and filter_value is not None: + stmt = stmt.where( + self.table.c.metadata[filter_field].astext + == str(filter_value) + ) + + result = sess.execute(stmt) + unique_values = result.scalars().all() + + return unique_values + + def copy( + self, + records: Iterable[Tuple[str, Any, Metadata]], + skip_adapter: bool = False, + ) -> None: + """ + Copies records into the collection. + + Args: + records (Iterable[Tuple[str, Any, Metadata]]): An iterable of content to copy. + Each record is a tuple where: + - the first element is a unique string identifier + - the second element is an iterable of numeric values or relevant input type for the + adapter assigned to the collection + - the third element is metadata associated with the vector + + skip_adapter (bool): Should the adapter be skipped while copying. i.e. if vectors are being + provided, rather than a media type that needs to be transformed + """ + import csv + import io + import json + import os + + pipeline = flu(records) + for record in pipeline: + with psycopg2.connect( + database=os.getenv("POSTGRES_DBNAME"), + user=os.getenv("POSTGRES_USER"), + password=os.getenv("POSTGRES_PASSWORD"), + host=os.getenv("POSTGRES_HOST"), + port=os.getenv("POSTGRES_PORT"), + ) as conn: + with conn.cursor() as cur: + f = io.StringIO() + id, vec, metadata = record + + writer = csv.writer(f, delimiter=",", quotechar='"') + writer.writerow( + [ + str(id), + [float(ele) for ele in vec], + json.dumps(metadata), + ] + ) + f.seek(0) + result = f.getvalue() + + writer_name = ( + f'vecs."{self.table.fullname.split(".")[-1]}"' + ) + g = io.StringIO(result) + cur.copy_expert( + f"COPY {writer_name}(id, vec, metadata) FROM STDIN WITH (FORMAT csv)", + g, + ) + conn.commit() + cur.close() + conn.close() + + def upsert( + self, + records: Iterable[Tuple[str, Any, Metadata]], + skip_adapter: bool = False, + ) -> None: + """ + Inserts or updates *vectors* records in the collection. + + Args: + records (Iterable[Tuple[str, Any, Metadata]]): An iterable of content to upsert. + Each record is a tuple where: + - the first element is a unique string identifier + - the second element is an iterable of numeric values or relevant input type for the + adapter assigned to the collection + - the third element is metadata associated with the vector + + skip_adapter (bool): Should the adapter be skipped while upserting. i.e. if vectors are being + provided, rather than a media type that needs to be transformed + """ + + chunk_size = 512 + + if skip_adapter: + pipeline = flu(records).chunk(chunk_size) + else: + # Construct a lazy pipeline of steps to transform and chunk user input + pipeline = flu( + self.adapter(records, AdapterContext("upsert")) + ).chunk(chunk_size) + + with self.client.Session() as sess: + with sess.begin(): + for chunk in pipeline: + stmt = postgresql.insert(self.table).values(chunk) + stmt = stmt.on_conflict_do_update( + index_elements=[self.table.c.id], + set_=dict( + vec=stmt.excluded.vec, + metadata=stmt.excluded.metadata, + ), + ) + sess.execute(stmt) + return None + + def fetch(self, ids: Iterable[str]) -> List[Record]: + """ + Fetches vectors from the collection by their identifiers. + + Args: + ids (Iterable[str]): An iterable of vector identifiers. + + Returns: + List[Record]: A list of the fetched vectors. + """ + if isinstance(ids, str): + raise ArgError("ids must be a list of strings") + + chunk_size = 12 + records = [] + with self.client.Session() as sess: + with sess.begin(): + for id_chunk in flu(ids).chunk(chunk_size): + stmt = select(self.table).where( + self.table.c.id.in_(id_chunk) + ) + chunk_records = sess.execute(stmt) + records.extend(chunk_records) + return records + + def delete( + self, + ids: Optional[Iterable[str]] = None, + filters: Optional[Dict[str, Any]] = None, + ) -> List[str]: + """ + Deletes vectors from the collection by matching filters or ids. + + Args: + ids (Iterable[str], optional): An iterable of vector identifiers. + filters (Optional[Dict], optional): Filters to apply to the search. Defaults to None. + + Returns: + List[str]: A list of the document IDs of the deleted vectors. + """ + if ids is None and filters is None: + raise ArgError("Either ids or filters must be provided.") + + if ids is not None and filters is not None: + raise ArgError("Either ids or filters must be provided, not both.") + + if isinstance(ids, str): + raise ArgError("ids must be a list of strings") + + ids = ids or [] + filters = filters or {} + del_document_ids = set([]) + + with self.client.Session() as sess: + with sess.begin(): + if ids: + for id_chunk in flu(ids).chunk(12): + stmt = select(self.table.c.metadata).where( + self.table.c.id.in_(id_chunk) + ) + results = sess.execute(stmt).fetchall() + for result in results: + metadata_json = result[0] + document_id = metadata_json.get("document_id") + if document_id: + del_document_ids.add(document_id) + + delete_stmt = ( + delete(self.table) + .where(self.table.c.id.in_(id_chunk)) + .returning(self.table.c.id) + ) + sess.execute(delete_stmt) + + if filters: + meta_filter = build_filters(self.table.c.metadata, filters) + stmt = select(self.table.c.metadata).where(meta_filter) + results = sess.execute(stmt).fetchall() + for result in results: + metadata_json = result[0] + document_id = metadata_json.get("document_id") + if document_id: + del_document_ids.add(document_id) + + delete_stmt = ( + delete(self.table) + .where(meta_filter) + .returning(self.table.c.id) + ) + sess.execute(delete_stmt) + + return list(del_document_ids) + + def __getitem__(self, items): + """ + Fetches a vector from the collection by its identifier. + + Args: + items (str): The identifier of the vector. + + Returns: + Record: The fetched vector. + """ + if not isinstance(items, str): + raise ArgError("items must be a string id") + + row = self.fetch([items]) + + if row == []: + raise KeyError("no item found with requested id") + return row[0] + + def query( + self, + data: Union[Iterable[Numeric], Any], + limit: int = 10, + filters: Optional[Dict] = None, + measure: Union[IndexMeasure, str] = IndexMeasure.cosine_distance, + include_value: bool = False, + include_metadata: bool = False, + *, + probes: Optional[int] = None, + ef_search: Optional[int] = None, + skip_adapter: bool = False, + ) -> Union[List[Record], List[str]]: + """ + Executes a similarity search in the collection. + + The return type is dependent on arguments *include_value* and *include_metadata* + + Args: + data (Any): The vector to use as the query. + limit (int, optional): The maximum number of results to return. Defaults to 10. + filters (Optional[Dict], optional): Filters to apply to the search. Defaults to None. + measure (Union[IndexMeasure, str], optional): The distance measure to use for the search. Defaults to 'cosine_distance'. + include_value (bool, optional): Whether to include the distance value in the results. Defaults to False. + include_metadata (bool, optional): Whether to include the metadata in the results. Defaults to False. + probes (Optional[Int], optional): Number of ivfflat index lists to query. Higher increases accuracy but decreases speed + ef_search (Optional[Int], optional): Size of the dynamic candidate list for HNSW index search. Higher increases accuracy but decreases speed + skip_adapter (bool, optional): When True, skips any associated adapter and queries using a literal vector provided to *data* + + Returns: + Union[List[Record], List[str]]: The result of the similarity search. + """ + + if probes is None: + probes = 10 + + if ef_search is None: + ef_search = 40 + + if not isinstance(probes, int): + raise ArgError("probes must be an integer") + + if probes < 1: + raise ArgError("probes must be >= 1") + + if limit > 1000: + raise ArgError("limit must be <= 1000") + + # ValueError on bad input + try: + imeasure = IndexMeasure(measure) + except ValueError: + raise ArgError("Invalid index measure") + + if not self.is_indexed_for_measure(imeasure): + warnings.warn( + UserWarning( + f"Query does not have a covering index for {imeasure}. See Collection.create_index" + ) + ) + + if skip_adapter: + adapted_query = [("", data, {})] + else: + # Adapt the query using the pipeline + adapted_query = [ + x + for x in self.adapter( + records=[("", data, {})], + adapter_context=AdapterContext("query"), + ) + ] + + if len(adapted_query) != 1: + raise ArgError( + "Failed to produce exactly one query vector from input" + ) + + _, vec, _ = adapted_query[0] + + distance_lambda = INDEX_MEASURE_TO_SQLA_ACC.get(imeasure) + if distance_lambda is None: + # unreachable + raise ArgError("invalid distance_measure") # pragma: no cover + + distance_clause = distance_lambda(self.table.c.vec)(vec) + + cols = [self.table.c.id] + + if include_value: + cols.append(distance_clause) + + if include_metadata: + cols.append(self.table.c.metadata) + + stmt = select(*cols) + if filters: + stmt = stmt.filter( + build_filters(self.table.c.metadata, filters) # type: ignore + ) + + stmt = stmt.order_by(distance_clause) + stmt = stmt.limit(limit) + + with self.client.Session() as sess: + with sess.begin(): + # index ignored if greater than n_lists + sess.execute( + text("set local ivfflat.probes = :probes").bindparams( + probes=probes + ) + ) + if self.client._supports_hnsw(): + sess.execute( + text( + "set local hnsw.ef_search = :ef_search" + ).bindparams(ef_search=ef_search) + ) + if len(cols) == 1: + return [str(x) for x in sess.scalars(stmt).fetchall()] + return sess.execute(stmt).fetchall() or [] + + @classmethod + def _list_collections(cls, client: "Client") -> List["Collection"]: + """ + PRIVATE + + Retrieves all collections from the database. + + Args: + client (Client): The database client. + + Returns: + List[Collection]: A list of all existing collections. + """ + + query = text( + """ + select + relname as table_name, + atttypmod as embedding_dim + from + pg_class pc + join pg_attribute pa + on pc.oid = pa.attrelid + where + pc.relnamespace = 'vecs'::regnamespace + and pc.relkind = 'r' + and pa.attname = 'vec' + and not pc.relname ^@ '_' + """ + ) + xc = [] + with client.Session() as sess: + for name, dimension in sess.execute(query): + existing_collection = cls(name, dimension, client) + xc.append(existing_collection) + return xc + + @classmethod + def _does_collection_exist(cls, client: "Client", name: str) -> bool: + """ + PRIVATE + + Checks if a collection with a given name exists within the database + + Args: + client (Client): The database client. + name (str): The name of the collection + + Returns: + Exists: Whether the collection exists or not + """ + + try: + client.get_collection(name) + return True + except CollectionNotFound: + return False + + @property + def index(self) -> Optional[str]: + """ + PRIVATE + + Note: + The `index` property is private and expected to undergo refactoring. + Do not rely on it's output. + + Retrieves the SQL name of the collection's vector index, if it exists. + + Returns: + Optional[str]: The name of the index, or None if no index exists. + """ + + if self._index is None: + query = text( + """ + select + relname as table_name + from + pg_class pc + where + pc.relnamespace = 'vecs'::regnamespace + and relname ilike 'ix_vector%' + and pc.relkind = 'i' + """ + ) + with self.client.Session() as sess: + ix_name = sess.execute(query).scalar() + self._index = ix_name + return self._index + + def is_indexed_for_measure(self, measure: IndexMeasure): + """ + Checks if the collection is indexed for a specific measure. + + Args: + measure (IndexMeasure): The measure to check for. + + Returns: + bool: True if the collection is indexed for the measure, False otherwise. + """ + + index_name = self.index + if index_name is None: + return False + + ops = INDEX_MEASURE_TO_OPS.get(measure) + if ops is None: + return False + + if ops in index_name: + return True + + return False + + def create_index( + self, + measure: IndexMeasure = IndexMeasure.cosine_distance, + method: IndexMethod = IndexMethod.auto, + index_arguments: Optional[ + Union[IndexArgsIVFFlat, IndexArgsHNSW] + ] = None, + replace=True, + ) -> None: + """ + Creates an index for the collection. + + Note: + When `vecs` creates an index on a pgvector column in PostgreSQL, it uses a multi-step + process that enables performant indexes to be built for large collections with low end + database hardware. + + Those steps are: + + - Creates a new table with a different name + - Randomly selects records from the existing table + - Inserts the random records from the existing table into the new table + - Creates the requested vector index on the new table + - Upserts all data from the existing table into the new table + - Drops the existing table + - Renames the new table to the existing tables name + + If you create dependencies (like views) on the table that underpins + a `vecs.Collection` the `create_index` step may require you to drop those dependencies before + it will succeed. + + Args: + measure (IndexMeasure, optional): The measure to index for. Defaults to 'cosine_distance'. + method (IndexMethod, optional): The indexing method to use. Defaults to 'auto'. + index_arguments: (IndexArgsIVFFlat | IndexArgsHNSW, optional): Index type specific arguments + replace (bool, optional): Whether to replace the existing index. Defaults to True. + + Raises: + ArgError: If an invalid index method is used, or if *replace* is False and an index already exists. + """ + + if method not in ( + IndexMethod.ivfflat, + IndexMethod.hnsw, + IndexMethod.auto, + ): + raise ArgError("invalid index method") + + if index_arguments: + # Disallow case where user submits index arguments but uses the + # IndexMethod.auto index (index build arguments should only be + # used with a specific index) + if method == IndexMethod.auto: + raise ArgError( + "Index build parameters are not allowed when using the IndexMethod.auto index." + ) + # Disallow case where user specifies one index type but submits + # index build arguments for the other index type + if ( + isinstance(index_arguments, IndexArgsHNSW) + and method != IndexMethod.hnsw + ) or ( + isinstance(index_arguments, IndexArgsIVFFlat) + and method != IndexMethod.ivfflat + ): + raise ArgError( + f"{index_arguments.__class__.__name__} build parameters were supplied but {method} index was specified." + ) + + if method == IndexMethod.auto: + if self.client._supports_hnsw(): + method = IndexMethod.hnsw + else: + method = IndexMethod.ivfflat + + if method == IndexMethod.hnsw and not self.client._supports_hnsw(): + raise ArgError( + "HNSW Unavailable. Upgrade your pgvector installation to > 0.5.0 to enable HNSW support" + ) + + ops = INDEX_MEASURE_TO_OPS.get(measure) + if ops is None: + raise ArgError("Unknown index measure") + + unique_string = str(uuid.uuid4()).replace("-", "_")[0:7] + + with self.client.Session() as sess: + with sess.begin(): + if self.index is not None: + if replace: + sess.execute(text(f'drop index vecs."{self.index}";')) + self._index = None + else: + raise ArgError( + "replace is set to False but an index exists" + ) + + if method == IndexMethod.ivfflat: + if not index_arguments: + n_records: int = sess.execute(func.count(self.table.c.id)).scalar() # type: ignore + + n_lists = ( + int(max(n_records / 1000, 30)) + if n_records < 1_000_000 + else int(math.sqrt(n_records)) + ) + else: + # The following mypy error is ignored because mypy + # complains that `index_arguments` is typed as a union + # of IndexArgsIVFFlat and IndexArgsHNSW types, + # which both don't necessarily contain the `n_lists` + # parameter, however we have validated that the + # correct type is being used above. + n_lists = index_arguments.n_lists # type: ignore + + sess.execute( + text( + f""" + create index ix_{ops}_ivfflat_nl{n_lists}_{unique_string} + on vecs."{self.table.name}" + using ivfflat (vec {ops}) with (lists={n_lists}) + """ + ) + ) + + if method == IndexMethod.hnsw: + if not index_arguments: + index_arguments = IndexArgsHNSW() + + # See above for explanation of why the following lines + # are ignored + m = index_arguments.m # type: ignore + ef_construction = index_arguments.ef_construction # type: ignore + + sess.execute( + text( + f""" + create index ix_{ops}_hnsw_m{m}_efc{ef_construction}_{unique_string} + on vecs."{self.table.name}" + using hnsw (vec {ops}) WITH (m={m}, ef_construction={ef_construction}); + """ + ) + ) + + return None + + +def build_filters(json_col: Column, filters: Dict): + """ + Builds filters for SQL query based on provided dictionary. + + Args: + json_col (Column): The column in the database table. + filters (Dict): The dictionary specifying filter conditions. + + Raises: + FilterError: If filter conditions are not correctly formatted. + + Returns: + The filter clause for the SQL query. + """ + if not isinstance(filters, dict): + raise FilterError("filters must be a dict") + + filter_clauses = [] + + for key, value in filters.items(): + if not isinstance(key, str): + raise FilterError("*filters* keys must be strings") + + if isinstance(value, dict): + if len(value) > 1: + raise FilterError("only one operator permitted per key") + for operator, clause in value.items(): + if operator not in ( + "$eq", + "$ne", + "$lt", + "$lte", + "$gt", + "$gte", + "$in", + ): + raise FilterError("unknown operator") + + if operator == "$eq" and not hasattr(clause, "__len__"): + contains_value = cast({key: clause}, postgresql.JSONB) + filter_clauses.append(json_col.op("@>")(contains_value)) + elif operator == "$in": + if not isinstance(clause, list): + raise FilterError( + "argument to $in filter must be a list" + ) + for elem in clause: + if not isinstance(elem, (int, str, float)): + raise FilterError( + "argument to $in filter must be a list of scalars" + ) + contains_value = [ + cast(elem, postgresql.JSONB) for elem in clause + ] + filter_clauses.append( + json_col.op("->")(key).in_(contains_value) + ) + else: + matches_value = cast(clause, postgresql.JSONB) + if operator == "$eq": + filter_clauses.append( + json_col.op("->")(key) == matches_value + ) + elif operator == "$ne": + filter_clauses.append( + json_col.op("->")(key) != matches_value + ) + elif operator == "$lt": + filter_clauses.append( + json_col.op("->")(key) < matches_value + ) + elif operator == "$lte": + filter_clauses.append( + json_col.op("->")(key) <= matches_value + ) + elif operator == "$gt": + filter_clauses.append( + json_col.op("->")(key) > matches_value + ) + elif operator == "$gte": + filter_clauses.append( + json_col.op("->")(key) >= matches_value + ) + else: + raise Unreachable() + else: + raise FilterError("Filter value must be a dict with an operator") + + if len(filter_clauses) == 1: + return filter_clauses[0] + else: + return and_(*filter_clauses) + + +def build_table(name: str, meta: MetaData, dimension: int) -> Table: + """ + PRIVATE + + Builds a SQLAlchemy model underpinning a `vecs.Collection`. + + Args: + name (str): The name of the table. + meta (MetaData): MetaData instance associated with the SQL database. + dimension: The dimension of the vectors in the collection. + + Returns: + Table: The constructed SQL table. + """ + return Table( + name, + meta, + Column("id", String, primary_key=True), + Column("vec", Vector(dimension), nullable=False), + Column( + "metadata", + postgresql.JSONB, + server_default=text("'{}'::jsonb"), + nullable=False, + ), + extend_existing=True, + ) diff --git a/R2R/r2r/vecs/exc.py b/R2R/r2r/vecs/exc.py new file mode 100755 index 00000000..0ae4500c --- /dev/null +++ b/R2R/r2r/vecs/exc.py @@ -0,0 +1,83 @@ +__all__ = [ + "VecsException", + "CollectionAlreadyExists", + "CollectionNotFound", + "ArgError", + "FilterError", + "IndexNotFound", + "Unreachable", +] + + +class VecsException(Exception): + """ + Base exception class for the 'vecs' package. + All custom exceptions in the 'vecs' package should derive from this class. + """ + + ... + + +class CollectionAlreadyExists(VecsException): + """ + Exception raised when attempting to create a collection that already exists. + """ + + ... + + +class CollectionNotFound(VecsException): + """ + Exception raised when attempting to access or manipulate a collection that does not exist. + """ + + ... + + +class ArgError(VecsException): + """ + Exception raised for invalid arguments when calling a method. + """ + + ... + + +class MismatchedDimension(ArgError): + """ + Exception raised when multiple sources of truth for a collection's embedding dimension do not match. + """ + + ... + + +class FilterError(VecsException): + """ + Exception raised when there's an error related to filter usage in a query. + """ + + ... + + +class IndexNotFound(VecsException): + """ + Exception raised when attempting to access an index that does not exist. + """ + + ... + + +class Unreachable(VecsException): + """ + Exception raised when an unreachable part of the code is executed. + This is typically used for error handling in cases that should be logically impossible. + """ + + ... + + +class MissingDependency(VecsException, ImportError): + """ + Exception raised when attempting to access a feature that requires an optional dependency when the optional dependency is not present. + """ + + ... diff --git a/R2R/rag_out_1.json b/R2R/rag_out_1.json new file mode 100755 index 00000000..48ca603c --- /dev/null +++ b/R2R/rag_out_1.json @@ -0,0 +1,28 @@ +{'vector_search_results': + [ + {'id': '7656b48b-d191-516e-9753-d34efedd4812', + 'score': 1.0, + 'metadata': {'text': 'gene interaction and high predictive value, PLoS One 3 (5) (2008) e2031,doi:10.1371/journal.pone.0002031 .\n[107] M. van Hoek, A. Dehghan, J.C. Witteman, C.M. van Duijn, A.G. Uitterlinden, B.A.\nOostra, A. Hofman, E.J. Sijbrands, A.C. Janssens, Predicting type 2 diabetes based\non polymorphisms from genome-wide association studies: a population-based\nstudy, Diabetes 57 (11) (Nov 2008) 3122 3128.\n[108] Q. Lu, Y. Song, X. Wang, S. Won, Y. Cui, R.C. Elston, The effect of multiple genetic', + 'title': '2011 - Annotating individual human genomes.pdf', + 'version': 'v0', + 'chunk_order': 160, + 'document_id': 'f7b5d738-3f0b-5074-9c21-f6b443b4e07f', + 'extraction_id': '80d78615-8424-5478-a01b-73e220bc0345', + 'associatedQuery': 'List any factors that help predict early onset of diabetes.'} + }, + {'id': 'd1af5c82-d226-5980-b5d9-90d7558d1880', + 'score': 1.0, + 'metadata': + { + 'text': 'variants in predicting the risk of type 2 diabetes, BMC Proc 3 (Suppl 7) (Dec 15\n2009) S49.\n[109] K. Miyake, W. Yang, K. Hara, K. Yasuda, Y. Horikawa, H. Osawa, H. Furuta, et al.,\nConstruction of a prediction model for type 2 diabetes mellitus in the Japanese\npopulation based on 11 genes with strong evidence of the association, J. Hum.\nGenet. 54 (4) (Apr 2009) 236 241 [Epub 2009 Feb 27].\n[110] P.J. Talmud, A.D. Hingorani, J.A. Cooper, M.G. Marmot, E.J. Brunner, M. Kumari, M.', 'title': '2011 - Annotating individual human genomes.pdf', + 'version': 'v0', + 'chunk_order': 161, + 'document_id': 'f7b5d738-3f0b-5074-9c21-f6b443b4e07f', + 'extraction_id': '80d78615-8424-5478-a01b-73e220bc0345', + 'associatedQuery': 'List any factors that help predict early onset of diabetes.' + } + }, + {'id': 'bb2a67ec-135b-5d55-b33d-74b1dc085685', 'score': 1.0, 'metadata': {'text': 'type 2 diabetes risk, Diabetes 57 (11) (Nov 2008) 3129 3135.\n[103] Q. Lu, R.C. Elston, Using the optimal receiver operating characteristic curve to\ndesign a predictive genetic test, exempli ed with type 2 diabetes, Am. J. Hum.\nGenet. 82 (3) (Mar 2008) 641 651.\n[104] V. Lyssenko, A. Jonsson, P. Almgren, N. Pulizzi, B. Isomaa, T. Tuomi, G. Berglund, D.\nAltshuler, P. Nilsson, L. Groop, Clinical risk factors, DNA variants, and the\ndevelopment of type 2 diabetes, N. Engl. J. Med. 359 (21) (Nov 20 2008)', 'title': '2011 - Annotating individual human genomes.pdf', 'version': 'v0', 'chunk_order': 158, 'document_id': 'f7b5d738-3f0b-5074-9c21-f6b443b4e07f', 'extraction_id': '80d78615-8424-5478-a01b-73e220bc0345', 'associatedQuery': 'List any factors that help predict early onset of diabetes.'}}, {'id': '11fc663d2-2833-51e7-ae6a-55b007a6e27c', 'score': 1.0, 'metadata': {'text': 'insulin resistance, hypertension, and dyslipidemia (Obesity Education Initiative Expert Panel, 1998 ). Insulin resist-ance increases with age, and the incidence of diabetes rises sharply in the elderly (American Diabetes Association, 2010a ). \n In a few patients, genetic mutations appear to be associ-\nated with T2D (Roche et al. , 2005 ; American Diabetes \nAssociation, 2010a ). For example, recent work using the DPP data has led to the identi cation of 27 single nucle-', 'title': '2012 - Systems Biology Approaches to Nutrition.pdf', 'version': 'v0', 'chunk_order': 9596, 'document_id': '6955478b-950d-5d29-b24c-3a5ca656f3ae', 'extraction_id': 'eb3de845-98db-505c-bb7f-c0f3259875fc', 'associatedQuery': 'List any factors that help predict early onset of diabetes.'}}, {'id': 'bb55a705-7399-550e-8285-07c33654b909', 'score': 1.0, 'metadata': {'text': '19. Permutt MA, Wasson J, Cox N: Genetic epidemiology of diabe-\ntes. J Clin Invest 2005, 115:1431-1439.\n20. Barroso I: Genetics of Type 2 diabetes. Diabet Med 2005,\n22:517-535.\n21. Parikh H, Groop L: Candidate genes for type 2 diabetes. Rev\nEndocr Metab Disord 2004, 5:151-176.\n22. Lohmueller KE, Pearce CL, Pike M, Lander ES, Hirschhorn JN: Meta-\nanalysis of genetic association studies supports a contribu-\ntion of common variants to su sceptibility to common dis-\nease. Nat Genet 2003, 33:177-182.', 'title': '2006 - β2-adrenergic receptor and UCP3 variants modulate the relationship between age and type 2 diabetes mellitus.pdf', 'version': 'v0', 'chunk_order': 86, 'document_id': '0ea34c04-5d09-5a32-89a7-c3add179927a', 'extraction_id': 'acf69ed8-c7b0-5d9f-8005-de020c9cf699', 'associatedQuery': 'List any factors that help predict early onset of diabetes.'}}, {'id': '9bff43c0-fd12-572e-9996-24957edd17d2', 'score': 1.0, 'metadata': {'text': 'insulin-dependent diabetes and schizophrenia, twin studies have demon-strated the existence of a significant genetic component (Kyvik et al., 1995;Plomin et al., 1994). Genetic factors also influence cardiovascular diseaseswhich occur in early or midlife, while for cardiovascular diseases occur-ring late in life there is little evidence of a genetic effect (Marenberg et al.,1994). Dementia has a very strong genetic component, not only withregard to early-onset monogenic types but also to late-onset', 'title': '2001 - Demography in the age of genomics.pdf', 'version': 'v0', 'chunk_order': 452, 'document_id': '0f07fa43-feb6-5656-b7e7-b8faa86f5623', 'extraction_id': '5f24a851-1de6-5b6e-8230-2da08806b01a', 'associatedQuery': 'List any factors that help predict early onset of diabetes.'}}, {'id': '2df84ccc-0d32-582e-bda6-9cd46bee5378', 'score': 1.0, 'metadata': {'text': 'Three categories of increased risk of developing diabetes \nare currently recognized by the ADA: an FPG between 5.6 and 6.9 mmol/L (100 and 125 mg/dL), de ned as having \nimpaired fasting glucose (IFG); a 2 - h OGTT between 7.8 and 11 mmol/L (140 and 199 mg/dL), de ned as having \nimpaired glucose tolerance (IGT); an A1C between 5.7 and 6.4% with values between 6.0 and 6.4 considered very high risk (American Diabetes Association, 2010a ). \n It is estimated that approximately one - fourth of indi-', 'title': '2012 - Systems Biology Approaches to Nutrition.pdf', 'version': 'v0', 'chunk_order': 9590, 'document_id': '6955478b-950d-5d29-b24c-3a5ca656f3ae', 'extraction_id': 'eb3de845-98db-505c-bb7f-c0f3259875fc', 'associatedQuery': 'List any factors that help predict early onset of diabetes.'}}, {'id': 'ff30f187-d5c3-5d01-8026-0588a77e9f44', 'score': 1.0, 'metadata': {'text': '20 90 D20S451 0.006 10.7 5.4 (34) 8.42 5.4 (61) 0.30 (long duration)\nInteraction with age at diagnosis of diabetes\n19 1 D1S1665 0.004 37.4 8.1 (66) 41.2 8.3 (81) 0.23 (early onset)\n2 159 D2S1399/D2S1353 0.023 40.8 8.2 (53) 38.8 8.5 (94) 0.16 (late onset)\n3 135 D3S2460 0.036 37.7 8.6 (66) 41.0 8.0 (81) 0.16 (early onset)\n4 146 D4S1625 0.005 37.9 7.4 (52) 40.4 8.9 (95) 0.23 (early onset)\n65 5 D6S2427 0.024 38.0 7.8 (70) 40.9 8.8 (77) 0.18 (early onset)', 'title': '2004 - A genome scan for diabetic nephropathy in African Americans.pdf', 'version': 'v0', 'chunk_order': 64, 'document_id': '5798fb6b-b3e6-57c4-9823-5428853dbfa1', 'extraction_id': '01ec7832-8a80-5f5e-aa26-3648f572c4a1', 'associatedQuery': 'List any factors that help predict early onset of diabetes.'}}, {'id': '57dc1ee5-4252-52c3-92cb-e2ac36cdc4d6', 'score': 1.0, 'metadata': {'text': 'prevention, even though there are great concerns about how such\ninformation will be perceived and utilized [68]. Recent studies in\ndiabetes and cardiovascular disease suggest that the addition of\npreviously associated genetic risk loci in clinical risk models of those\ndiseases increases both discriminative and predictive accuracy, albeit\nonly marginally. Typically, the strongest predictors of disease onset\nare known clinical risk factors such as body mass index, age, or', 'title': '2011 - Annotating individual human genomes.pdf', 'version': 'v0', 'chunk_order': 66, 'document_id': 'f7b5d738-3f0b-5074-9c21-f6b443b4e07f', 'extraction_id': 'c17c74fa-12a3-5072-bb48-c179055db14d', 'associatedQuery': 'List any factors that help predict early onset of diabetes.'}}, {'id': 'ff801099-e737-57b1-91af-a4cea20adb87', 'score': 1.0, 'metadata': {'text': 'and sex, weight for height greater than the 85th percentile, or weight greater than 120% of ideal for height) who also have any two of the following risk factors: family history of T2D ( rst - or second - degree relative), maternal history \nof diabetes or GDM during child s gestation, member of high - risk race/ethnicity group (Native American, African -(40 in) in men is considered an indicator of increased', 'title': '2012 - Systems Biology Approaches to Nutrition.pdf', 'version': 'v0', 'chunk_order': 9595, 'document_id': '6955478b-950d-5d29-b24c-3a5ca656f3ae', 'extraction_id': 'eb3de845-98db-505c-bb7f-c0f3259875fc', 'associatedQuery': 'List any factors that help predict early onset of diabetes.'}}], 'kg_search_results': None}, + +{'id': 'chatcmpl-9wP7fPKqhM05QleyuBp1k6DB74T0x', 'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'content': "The factors that help predict early onset of diabetes include:\n\n1. **Genetic Factors**: Specific genetic mutations and polymorphisms have been associated with an increased risk of developing type 2 diabetes (T2D) [1], [2], [4], [5], [6], [8].\n2. **Clinical Risk Factors**: Known clinical risk factors such as body mass index (BMI), age, and sex are strong predictors of disease onset [9].\n3. **Family History**: A family history of T2D, particularly in first- or second-degree relatives, is a significant risk factor [10].\n4. **Maternal History**: A maternal history of diabetes or gestational diabetes mellitus (GDM) during the child's gestation increases the risk [10].\n5. **Ethnicity**: Being a member of a high-risk race/ethnicity group, such as Native American or African American, is associated with a higher risk [10].\n6. **Impaired Glucose Levels**: Impaired fasting glucose (IFG), impaired glucose tolerance (IGT), and elevated A1C levels are indicators of increased risk [7].\n7. **Insulin Resistance and Related Conditions**: Conditions such as insulin resistance, hypertension, and dyslipidemia are linked to a higher risk of developing diabetes [4].\n\nThese factors collectively contribute to the prediction of early onset diabetes.", 'role': 'assistant'}}], 'created': 1723706623, 'model': 'gpt-4o-2024-05-13', 'object': 'chat.completion', 'system_fingerprint': 'fp_3aa7262c27', 'usage': {'completion_tokens': 284, 'prompt_tokens': 1723, 'total_tokens': 2007}} +Time taken for RAG: 14.26 seconds diff --git a/R2R/rag_out_2.json b/R2R/rag_out_2.json new file mode 100755 index 00000000..b14bfbfe --- /dev/null +++ b/R2R/rag_out_2.json @@ -0,0 +1,5 @@ +Search Results: +{'vector_search_results': [{'id': '7656b48b-d191-516e-9753-d34efedd4812', 'score': 0.6306634325184495, 'metadata': {'text': 'gene interaction and high predictive value, PLoS One 3 (5) (2008) e2031,doi:10.1371/journal.pone.0002031 .\n[107] M. van Hoek, A. Dehghan, J.C. Witteman, C.M. van Duijn, A.G. Uitterlinden, B.A.\nOostra, A. Hofman, E.J. Sijbrands, A.C. Janssens, Predicting type 2 diabetes based\non polymorphisms from genome-wide association studies: a population-based\nstudy, Diabetes 57 (11) (Nov 2008) 3122 3128.\n[108] Q. Lu, Y. Song, X. Wang, S. Won, Y. Cui, R.C. Elston, The effect of multiple genetic', 'title': '2011 - Annotating individual human genomes.pdf', 'version': 'v0', 'chunk_order': 160, 'document_id': 'f7b5d738-3f0b-5074-9c21-f6b443b4e07f', 'extraction_id': '80d78615-8424-5478-a01b-73e220bc0345', 'associatedQuery': 'List any factors that help predict early onset of diabetes.'}}, {'id': 'd1af5c82-d226-5980-b5d9-90d7558d1880', 'score': 0.6210695956862499, 'metadata': {'text': 'variants in predicting the risk of type 2 diabetes, BMC Proc 3 (Suppl 7) (Dec 15\n2009) S49.\n[109] K. Miyake, W. Yang, K. Hara, K. Yasuda, Y. Horikawa, H. Osawa, H. Furuta, et al.,\nConstruction of a prediction model for type 2 diabetes mellitus in the Japanese\npopulation based on 11 genes with strong evidence of the association, J. Hum.\nGenet. 54 (4) (Apr 2009) 236 241 [Epub 2009 Feb 27].\n[110] P.J. Talmud, A.D. Hingorani, J.A. Cooper, M.G. Marmot, E.J. Brunner, M. Kumari, M.', 'title': '2011 - Annotating individual human genomes.pdf', 'version': 'v0', 'chunk_order': 161, 'document_id': 'f7b5d738-3f0b-5074-9c21-f6b443b4e07f', 'extraction_id': '80d78615-8424-5478-a01b-73e220bc0345', 'associatedQuery': 'List any factors that help predict early onset of diabetes.'}}, {'id': 'bb2a67ec-135b-5d55-b33d-74b1dc085685', 'score': 0.6134476661682129, 'metadata': {'text': 'type 2 diabetes risk, Diabetes 57 (11) (Nov 2008) 3129 3135.\n[103] Q. Lu, R.C. Elston, Using the optimal receiver operating characteristic curve to\ndesign a predictive genetic test, exempli ed with type 2 diabetes, Am. J. Hum.\nGenet. 82 (3) (Mar 2008) 641 651.\n[104] V. Lyssenko, A. Jonsson, P. Almgren, N. Pulizzi, B. Isomaa, T. Tuomi, G. Berglund, D.\nAltshuler, P. Nilsson, L. Groop, Clinical risk factors, DNA variants, and the\ndevelopment of type 2 diabetes, N. Engl. J. Med. 359 (21) (Nov 20 2008)', 'title': '2011 - Annotating individual human genomes.pdf', 'version': 'v0', 'chunk_order': 158, 'document_id': 'f7b5d738-3f0b-5074-9c21-f6b443b4e07f', 'extraction_id': '80d78615-8424-5478-a01b-73e220bc0345', 'associatedQuery': 'List any factors that help predict early onset of diabetes.'}}, {'id': '9fc663d2-2833-51e7-ae6a-55b007a6e27c', 'score': 0.5998189449310303, 'metadata': {'text': 'insulin resistance, hypertension, and dyslipidemia (Obesity Education Initiative Expert Panel, 1998 ). Insulin resist-ance increases with age, and the incidence of diabetes rises sharply in the elderly (American Diabetes Association, 2010a ). \n In a few patients, genetic mutations appear to be associ-\nated with T2D (Roche et al. , 2005 ; American Diabetes \nAssociation, 2010a ). For example, recent work using the DPP data has led to the identi cation of 27 single nucle-', 'title': '2012 - Systems Biology Approaches to Nutrition.pdf', 'version': 'v0', 'chunk_order': 9596, 'document_id': '6955478b-950d-5d29-b24c-3a5ca656f3ae', 'extraction_id': 'eb3de845-98db-505c-bb7f-c0f3259875fc', 'associatedQuery': 'List any factors that help predict early onset of diabetes.'}}, {'id': 'bb55a705-7399-550e-8285-07c33654b909', 'score': 0.5985058546066284, 'metadata': {'text': '19. Permutt MA, Wasson J, Cox N: Genetic epidemiology of diabe-\ntes. J Clin Invest 2005, 115:1431-1439.\n20. Barroso I: Genetics of Type 2 diabetes. Diabet Med 2005,\n22:517-535.\n21. Parikh H, Groop L: Candidate genes for type 2 diabetes. Rev\nEndocr Metab Disord 2004, 5:151-176.\n22. Lohmueller KE, Pearce CL, Pike M, Lander ES, Hirschhorn JN: Meta-\nanalysis of genetic association studies supports a contribu-\ntion of common variants to su sceptibility to common dis-\nease. Nat Genet 2003, 33:177-182.', 'title': '2006 - β2-adrenergic receptor and UCP3 variants modulate the relationship between age and type 2 diabetes mellitus.pdf', 'version': 'v0', 'chunk_order': 86, 'document_id': '0ea34c04-5d09-5a32-89a7-c3add179927a', 'extraction_id': 'acf69ed8-c7b0-5d9f-8005-de020c9cf699', 'associatedQuery': 'List any factors that help predict early onset of diabetes.'}}, {'id': '9bff43c0-fd12-572e-9996-24957edd17d2', 'score': 0.5946860555001475, 'metadata': {'text': 'insulin-dependent diabetes and schizophrenia, twin studies have demon-strated the existence of a significant genetic component (Kyvik et al., 1995;Plomin et al., 1994). Genetic factors also influence cardiovascular diseaseswhich occur in early or midlife, while for cardiovascular diseases occur-ring late in life there is little evidence of a genetic effect (Marenberg et al.,1994). Dementia has a very strong genetic component, not only withregard to early-onset monogenic types but also to late-onset', 'title': '2001 - Demography in the age of genomics.pdf', 'version': 'v0', 'chunk_order': 452, 'document_id': '0f07fa43-feb6-5656-b7e7-b8faa86f5623', 'extraction_id': '5f24a851-1de6-5b6e-8230-2da08806b01a', 'associatedQuery': 'List any factors that help predict early onset of diabetes.'}}, {'id': '2df84ccc-0d32-582e-bda6-9cd46bee5378', 'score': 0.5944506525993347, 'metadata': {'text': 'Three categories of increased risk of developing diabetes \nare currently recognized by the ADA: an FPG between 5.6 and 6.9 mmol/L (100 and 125 mg/dL), de ned as having \nimpaired fasting glucose (IFG); a 2 - h OGTT between 7.8 and 11 mmol/L (140 and 199 mg/dL), de ned as having \nimpaired glucose tolerance (IGT); an A1C between 5.7 and 6.4% with values between 6.0 and 6.4 considered very high risk (American Diabetes Association, 2010a ). \n It is estimated that approximately one - fourth of indi-', 'title': '2012 - Systems Biology Approaches to Nutrition.pdf', 'version': 'v0', 'chunk_order': 9590, 'document_id': '6955478b-950d-5d29-b24c-3a5ca656f3ae', 'extraction_id': 'eb3de845-98db-505c-bb7f-c0f3259875fc', 'associatedQuery': 'List any factors that help predict early onset of diabetes.'}}, {'id': 'ff30f187-d5c3-5d01-8026-0588a77e9f44', 'score': 0.5909770727157593, 'metadata': {'text': '20 90 D20S451 0.006 10.7 5.4 (34) 8.42 5.4 (61) 0.30 (long duration)\nInteraction with age at diagnosis of diabetes\n19 1 D1S1665 0.004 37.4 8.1 (66) 41.2 8.3 (81) 0.23 (early onset)\n2 159 D2S1399/D2S1353 0.023 40.8 8.2 (53) 38.8 8.5 (94) 0.16 (late onset)\n3 135 D3S2460 0.036 37.7 8.6 (66) 41.0 8.0 (81) 0.16 (early onset)\n4 146 D4S1625 0.005 37.9 7.4 (52) 40.4 8.9 (95) 0.23 (early onset)\n65 5 D6S2427 0.024 38.0 7.8 (70) 40.9 8.8 (77) 0.18 (early onset)', 'title': '2004 - A genome scan for diabetic nephropathy in African Americans.pdf', 'version': 'v0', 'chunk_order': 64, 'document_id': '5798fb6b-b3e6-57c4-9823-5428853dbfa1', 'extraction_id': '01ec7832-8a80-5f5e-aa26-3648f572c4a1', 'associatedQuery': 'List any factors that help predict early onset of diabetes.'}}, {'id': '57dc1ee5-4252-52c3-92cb-e2ac36cdc4d6', 'score': 0.5907666927119091, 'metadata': {'text': 'prevention, even though there are great concerns about how such\ninformation will be perceived and utilized [68]. Recent studies in\ndiabetes and cardiovascular disease suggest that the addition of\npreviously associated genetic risk loci in clinical risk models of those\ndiseases increases both discriminative and predictive accuracy, albeit\nonly marginally. Typically, the strongest predictors of disease onset\nare known clinical risk factors such as body mass index, age, or', 'title': '2011 - Annotating individual human genomes.pdf', 'version': 'v0', 'chunk_order': 66, 'document_id': 'f7b5d738-3f0b-5074-9c21-f6b443b4e07f', 'extraction_id': 'c17c74fa-12a3-5072-bb48-c179055db14d', 'associatedQuery': 'List any factors that help predict early onset of diabetes.'}}, {'id': 'ff801099-e737-57b1-91af-a4cea20adb87', 'score': 0.58806312084198, 'metadata': {'text': 'and sex, weight for height greater than the 85th percentile, or weight greater than 120% of ideal for height) who also have any two of the following risk factors: family history of T2D ( rst - or second - degree relative), maternal history \nof diabetes or GDM during child s gestation, member of high - risk race/ethnicity group (Native American, African -(40 in) in men is considered an indicator of increased', 'title': '2012 - Systems Biology Approaches to Nutrition.pdf', 'version': 'v0', 'chunk_order': 9595, 'document_id': '6955478b-950d-5d29-b24c-3a5ca656f3ae', 'extraction_id': 'eb3de845-98db-505c-bb7f-c0f3259875fc', 'associatedQuery': 'List any factors that help predict early onset of diabetes.'}}], 'kg_search_results': None} +Completion: +{'id': 'chatcmpl-9wRnHpkyuzTtAXaZGr0qT373xjaOO', 'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'content': "The factors that help predict the early onset of diabetes include:\n\n1. **Genetic Factors**: Specific genetic mutations and polymorphisms have been associated with an increased risk of developing diabetes. For example, studies have identified multiple genetic variants that can predict the risk of type 2 diabetes [1], [2], [4], [5], [6].\n\n2. **Clinical Risk Factors**: Known clinical risk factors such as body mass index (BMI), age, and sex are strong predictors of disease onset [9].\n\n3. **Family History**: A family history of type 2 diabetes (T2D), particularly in first- or second-degree relatives, is a significant risk factor [10].\n\n4. **Maternal History of Diabetes**: A maternal history of diabetes or gestational diabetes mellitus (GDM) during the child's gestation increases the risk [10].\n\n5. **Race/Ethnicity**: Being a member of a high-risk race/ethnicity group, such as Native American or African American, is associated with a higher risk of developing diabetes [10].\n\n6. **Impaired Glucose Tolerance and Impaired Fasting Glucose**: Individuals with impaired fasting glucose (IFG) or impaired glucose tolerance (IGT) are at increased risk of developing diabetes [7].\n\n7. **Insulin Resistance, Hypertension, and Dyslipidemia**: These conditions are associated with an increased risk of diabetes, particularly as they often occur together in metabolic syndrome [4].\n\n8. **Age at Diagnosis**: There is an interaction between genetic factors and the age at diagnosis, with certain genetic markers being more strongly associated with early-onset diabetes [8].\n\nThese factors collectively help in predicting the early onset of diabetes.", 'role': 'assistant'}}], 'created': 1723716891, 'model': 'gpt-4o-2024-05-13', 'object': 'chat.completion', 'system_fingerprint': 'fp_3aa7262c27', 'usage': {'completion_tokens': 353, 'prompt_tokens': 1723, 'total_tokens': 2076}} +Time taken for RAG: 9.47 seconds diff --git a/R2R/tests/test_abstractions.py b/R2R/tests/test_abstractions.py new file mode 100755 index 00000000..a360e952 --- /dev/null +++ b/R2R/tests/test_abstractions.py @@ -0,0 +1,162 @@ +import asyncio +import uuid + +import pytest + +from r2r import ( + AsyncPipe, + AsyncState, + Prompt, + Vector, + VectorEntry, + VectorSearchRequest, + VectorSearchResult, + VectorType, + generate_id_from_label, +) + + +@pytest.fixture(scope="session", autouse=True) +def event_loop_policy(): + asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy()) + + +@pytest.fixture(scope="function") +def event_loop(): + loop = asyncio.get_event_loop_policy().new_event_loop() + yield loop + loop.close() + asyncio.set_event_loop(None) + + +@pytest.fixture(scope="session", autouse=True) +async def cleanup_tasks(): + yield + for task in asyncio.all_tasks(): + if task is not asyncio.current_task(): + task.cancel() + try: + await task + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +async def test_async_state_update_and_get(): + state = AsyncState() + outer_key = "test_key" + values = {"inner_key": "value"} + await state.update(outer_key, values) + result = await state.get(outer_key, "inner_key") + assert result == "value" + + +@pytest.mark.asyncio +async def test_async_state_delete(): + state = AsyncState() + outer_key = "test_key" + values = {"inner_key": "value"} + await state.update(outer_key, values) + await state.delete(outer_key, "inner_key") + result = await state.get(outer_key, "inner_key") + assert result == {}, "Expect empty result after deletion" + + +class MockAsyncPipe(AsyncPipe): + async def _run_logic(self, input, state, run_id, *args, **kwargs): + yield "processed" + + +@pytest.mark.asyncio +async def test_async_pipe_run(): + pipe = MockAsyncPipe() + + async def list_to_generator(lst): + for item in lst: + yield item + + input = pipe.Input(message=list_to_generator(["test"])) + state = AsyncState() + try: + async_generator = await pipe.run(input, state) + results = [result async for result in async_generator] + assert results == ["processed"] + except asyncio.CancelledError: + pass # Task cancelled as expected + + +def test_prompt_initialization_and_formatting(): + prompt = Prompt( + name="greet", template="Hello, {name}!", input_types={"name": "str"} + ) + formatted = prompt.format_prompt({"name": "Alice"}) + assert formatted == "Hello, Alice!" + + +def test_prompt_missing_input(): + prompt = Prompt( + name="greet", template="Hello, {name}!", input_types={"name": "str"} + ) + with pytest.raises(ValueError): + prompt.format_prompt({}) + + +def test_prompt_invalid_input_type(): + prompt = Prompt( + name="greet", template="Hello, {name}!", input_types={"name": "int"} + ) + with pytest.raises(TypeError): + prompt.format_prompt({"name": "Alice"}) + + +def test_search_request_with_optional_filters(): + request = VectorSearchRequest( + query="test", limit=10, filters={"category": "books"} + ) + assert request.query == "test" + assert request.limit == 10 + assert request.filters == {"category": "books"} + + +def test_search_result_to_string(): + result = VectorSearchResult( + id=generate_id_from_label("1"), + score=9.5, + metadata={"author": "John Doe"}, + ) + result_str = str(result) + assert ( + result_str + == f"VectorSearchResult(id={str(generate_id_from_label('1'))}, score=9.5, metadata={{'author': 'John Doe'}})" + ) + + +def test_search_result_repr(): + result = VectorSearchResult( + id=generate_id_from_label("1"), + score=9.5, + metadata={"author": "John Doe"}, + ) + assert ( + repr(result) + == f"VectorSearchResult(id={str(generate_id_from_label('1'))}, score=9.5, metadata={{'author': 'John Doe'}})" + ) + + +def test_vector_fixed_length_validation(): + with pytest.raises(ValueError): + Vector(data=[1.0, 2.0], type=VectorType.FIXED, length=3) + + +def test_vector_entry_serialization(): + vector = Vector(data=[1.0, 2.0], type=VectorType.FIXED, length=2) + entry_id = uuid.uuid4() + entry = VectorEntry( + id=entry_id, vector=vector, metadata={"key": uuid.uuid4()} + ) + serializable = entry.to_serializable() + assert serializable["id"] == str(entry_id) + assert serializable["vector"] == [1.0, 2.0] + assert isinstance( + serializable["metadata"]["key"], str + ) # Check UUID conversion to string diff --git a/R2R/tests/test_config.py b/R2R/tests/test_config.py new file mode 100755 index 00000000..5e60833c --- /dev/null +++ b/R2R/tests/test_config.py @@ -0,0 +1,187 @@ +import asyncio +import json +from unittest.mock import Mock, mock_open, patch + +import pytest + +from r2r import DocumentType, R2RConfig + + +@pytest.fixture(scope="session", autouse=True) +def event_loop_policy(): + asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy()) + + +@pytest.fixture(scope="function") +def event_loop(): + loop = asyncio.get_event_loop_policy().new_event_loop() + yield loop + loop.close() + asyncio.set_event_loop(None) + + +@pytest.fixture(scope="session", autouse=True) +async def cleanup_tasks(): + yield + for task in asyncio.all_tasks(): + if task is not asyncio.current_task(): + task.cancel() + try: + await task + except asyncio.CancelledError: + pass + + +@pytest.fixture +def mock_bad_file(): + mock_data = json.dumps({}) + with patch("builtins.open", mock_open(read_data=mock_data)) as m: + yield m + + +@pytest.fixture +def mock_file(): + mock_data = json.dumps( + { + "app": {"max_file_size_in_mb": 128}, + "embedding": { + "provider": "example_provider", + "base_model": "model", + "base_dimension": 128, + "batch_size": 16, + "text_splitter": "default", + }, + "kg": { + "provider": "None", + "batch_size": 1, + "text_splitter": { + "type": "recursive_character", + "chunk_size": 2048, + "chunk_overlap": 0, + }, + }, + "eval": {"llm": {"provider": "local"}}, + "ingestion": {"excluded_parsers": {}}, + "completions": {"provider": "lm_provider"}, + "logging": { + "provider": "local", + "log_table": "logs", + "log_info_table": "log_info", + }, + "prompt": {"provider": "prompt_provider"}, + "vector_database": {"provider": "vector_db"}, + } + ) + with patch("builtins.open", mock_open(read_data=mock_data)) as m: + yield m + + +@pytest.mark.asyncio +async def test_r2r_config_loading_required_keys(mock_bad_file): + with pytest.raises(KeyError): + R2RConfig.from_json("config.json") + + +@pytest.mark.asyncio +async def test_r2r_config_loading(mock_file): + config = R2RConfig.from_json("config.json") + assert ( + config.embedding.provider == "example_provider" + ), "Provider should match the mock data" + + +@pytest.fixture +def mock_redis_client(): + client = Mock() + return client + + +def test_r2r_config_serialization(mock_file, mock_redis_client): + config = R2RConfig.from_json("config.json") + config.save_to_redis(mock_redis_client, "test_key") + mock_redis_client.set.assert_called_once() + saved_data = json.loads(mock_redis_client.set.call_args[0][1]) + assert saved_data["app"]["max_file_size_in_mb"] == 128 + + +def test_r2r_config_deserialization(mock_file, mock_redis_client): + config_data = { + "app": {"max_file_size_in_mb": 128}, + "embedding": { + "provider": "example_provider", + "base_model": "model", + "base_dimension": 128, + "batch_size": 16, + "text_splitter": "default", + }, + "kg": { + "provider": "None", + "batch_size": 1, + "text_splitter": { + "type": "recursive_character", + "chunk_size": 2048, + "chunk_overlap": 0, + }, + }, + "eval": {"llm": {"provider": "local"}}, + "ingestion": {"excluded_parsers": ["pdf"]}, + "completions": {"provider": "lm_provider"}, + "logging": { + "provider": "local", + "log_table": "logs", + "log_info_table": "log_info", + }, + "prompt": {"provider": "prompt_provider"}, + "vector_database": {"provider": "vector_db"}, + } + mock_redis_client.get.return_value = json.dumps(config_data) + config = R2RConfig.load_from_redis(mock_redis_client, "test_key") + assert config.app["max_file_size_in_mb"] == 128 + assert DocumentType.PDF in config.ingestion["excluded_parsers"] + + +def test_r2r_config_missing_section(): + invalid_data = { + "embedding": { + "provider": "example_provider", + "base_model": "model", + "base_dimension": 128, + "batch_size": 16, + "text_splitter": "default", + } + } + with patch("builtins.open", mock_open(read_data=json.dumps(invalid_data))): + with pytest.raises(KeyError): + R2RConfig.from_json("config.json") + + +def test_r2r_config_missing_required_key(): + invalid_data = { + "app": {"max_file_size_in_mb": 128}, + "embedding": { + "base_model": "model", + "base_dimension": 128, + "batch_size": 16, + "text_splitter": "default", + }, + "kg": { + "provider": "None", + "batch_size": 1, + "text_splitter": { + "type": "recursive_character", + "chunk_size": 2048, + "chunk_overlap": 0, + }, + }, + "completions": {"provider": "lm_provider"}, + "logging": { + "provider": "local", + "log_table": "logs", + "log_info_table": "log_info", + }, + "prompt": {"provider": "prompt_provider"}, + "vector_database": {"provider": "vector_db"}, + } + with patch("builtins.open", mock_open(read_data=json.dumps(invalid_data))): + with pytest.raises(KeyError): + R2RConfig.from_json("config.json") diff --git a/R2R/tests/test_embedding.py b/R2R/tests/test_embedding.py new file mode 100755 index 00000000..7a3e760a --- /dev/null +++ b/R2R/tests/test_embedding.py @@ -0,0 +1,162 @@ +import asyncio + +import pytest + +from r2r import EmbeddingConfig, VectorSearchResult, generate_id_from_label +from r2r.providers.embeddings import ( + OpenAIEmbeddingProvider, + SentenceTransformerEmbeddingProvider, +) + + +@pytest.fixture(scope="session", autouse=True) +def event_loop_policy(): + asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy()) + + +@pytest.fixture(scope="function") +def event_loop(): + loop = asyncio.get_event_loop_policy().new_event_loop() + yield loop + loop.close() + asyncio.set_event_loop(None) + + +@pytest.fixture(scope="session", autouse=True) +async def cleanup_tasks(): + yield + for task in asyncio.all_tasks(): + if task is not asyncio.current_task(): + task.cancel() + try: + await task + except asyncio.CancelledError: + pass + + +@pytest.fixture +def openai_provider(): + config = EmbeddingConfig( + provider="openai", + base_model="text-embedding-3-small", + base_dimension=1536, + ) + return OpenAIEmbeddingProvider(config) + + +def test_openai_initialization(openai_provider): + assert isinstance(openai_provider, OpenAIEmbeddingProvider) + assert openai_provider.base_model == "text-embedding-3-small" + assert openai_provider.base_dimension == 1536 + + +def test_openai_invalid_provider_initialization(): + config = EmbeddingConfig(provider="invalid_provider") + with pytest.raises(ValueError): + OpenAIEmbeddingProvider(config) + + +def test_openai_get_embedding(openai_provider): + embedding = openai_provider.get_embedding("test text") + assert len(embedding) == 1536 + assert isinstance(embedding, list) + + +@pytest.mark.asyncio +async def test_openai_async_get_embedding(openai_provider): + try: + embedding = await openai_provider.async_get_embedding("test text") + assert len(embedding) == 1536 + assert isinstance(embedding, list) + except asyncio.CancelledError: + pass # Task cancelled as expected + + +def test_openai_get_embeddings(openai_provider): + embeddings = openai_provider.get_embeddings(["text1", "text2"]) + assert len(embeddings) == 2 + assert all(len(emb) == 1536 for emb in embeddings) + + +@pytest.mark.asyncio +async def test_openai_async_get_embeddings(openai_provider): + try: + embeddings = await openai_provider.async_get_embeddings( + ["text1", "text2"] + ) + assert len(embeddings) == 2 + assert all(len(emb) == 1536 for emb in embeddings) + except asyncio.CancelledError: + pass # Task cancelled as expected + + +def test_openai_tokenize_string(openai_provider): + tokens = openai_provider.tokenize_string( + "test text", "text-embedding-3-small" + ) + assert isinstance(tokens, list) + assert all(isinstance(token, int) for token in tokens) + + +@pytest.fixture +def sentence_transformer_provider(): + config = EmbeddingConfig( + provider="sentence-transformers", + base_model="mixedbread-ai/mxbai-embed-large-v1", + base_dimension=512, + rerank_model="jinaai/jina-reranker-v1-turbo-en", + rerank_dimension=384, + ) + return SentenceTransformerEmbeddingProvider(config) + + +def test_sentence_transformer_initialization(sentence_transformer_provider): + assert isinstance( + sentence_transformer_provider, SentenceTransformerEmbeddingProvider + ) + assert sentence_transformer_provider.do_search + # assert sentence_transformer_provider.do_rerank + + +def test_sentence_transformer_invalid_provider_initialization(): + config = EmbeddingConfig(provider="invalid_provider") + with pytest.raises(ValueError): + SentenceTransformerEmbeddingProvider(config) + + +def test_sentence_transformer_get_embedding(sentence_transformer_provider): + embedding = sentence_transformer_provider.get_embedding("test text") + assert len(embedding) == 512 + assert isinstance(embedding, list) + + +def test_sentence_transformer_get_embeddings(sentence_transformer_provider): + embeddings = sentence_transformer_provider.get_embeddings( + ["text1", "text2"] + ) + assert len(embeddings) == 2 + assert all(len(emb) == 512 for emb in embeddings) + + +def test_sentence_transformer_rerank(sentence_transformer_provider): + results = [ + VectorSearchResult( + id=generate_id_from_label("x"), + score=0.9, + metadata={"text": "doc1"}, + ), + VectorSearchResult( + id=generate_id_from_label("y"), + score=0.8, + metadata={"text": "doc2"}, + ), + ] + reranked_results = sentence_transformer_provider.rerank("query", results) + assert len(reranked_results) == 2 + assert reranked_results[0].metadata["text"] == "doc1" + assert reranked_results[1].metadata["text"] == "doc2" + + +def test_sentence_transformer_tokenize_string(sentence_transformer_provider): + with pytest.raises(ValueError): + sentence_transformer_provider.tokenize_string("test text") diff --git a/R2R/tests/test_end_to_end.py b/R2R/tests/test_end_to_end.py new file mode 100755 index 00000000..5e13ab5c --- /dev/null +++ b/R2R/tests/test_end_to_end.py @@ -0,0 +1,375 @@ +import asyncio +import os +import uuid + +import pytest +from fastapi.datastructures import UploadFile + +from r2r import ( + Document, + KVLoggingSingleton, + R2RConfig, + R2REngine, + R2RPipeFactory, + R2RPipelineFactory, + R2RProviderFactory, + VectorSearchSettings, + generate_id_from_label, +) +from r2r.base.abstractions.llm import GenerationConfig + + +@pytest.fixture(scope="session", autouse=True) +def event_loop_policy(): + asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy()) + + +@pytest.fixture(scope="function") +def event_loop(): + loop = asyncio.get_event_loop_policy().new_event_loop() + yield loop + loop.close() + asyncio.set_event_loop(None) + + +@pytest.fixture(scope="session", autouse=True) +async def cleanup_tasks(): + yield + for task in asyncio.all_tasks(): + if task is not asyncio.current_task(): + task.cancel() + try: + await task + except asyncio.CancelledError: + pass + + +@pytest.fixture(scope="function") +def app(request): + config = R2RConfig.from_json() + config.logging.provider = "local" + config.logging.logging_path = uuid.uuid4().hex + + vector_db_provider = request.param + if vector_db_provider == "pgvector": + config.vector_database.provider = "pgvector" + config.vector_database.extra_fields["vecs_collection"] = ( + config.logging.logging_path + ) + try: + providers = R2RProviderFactory(config).create_providers() + pipes = R2RPipeFactory(config, providers).create_pipes() + pipelines = R2RPipelineFactory(config, pipes).create_pipelines() + + r2r = R2REngine( + config=config, + providers=providers, + pipelines=pipelines, + ) + + try: + KVLoggingSingleton.configure(config.logging) + except: + KVLoggingSingleton._config.logging_path = ( + config.logging.logging_path + ) + + yield r2r + finally: + if os.path.exists(config.logging.logging_path): + os.remove(config.logging.logging_path) + + +@pytest.fixture +def logging_connection(): + return KVLoggingSingleton() + + +@pytest.mark.parametrize("app", ["pgvector"], indirect=True) +@pytest.mark.asyncio +async def test_ingest_txt_document(app, logging_connection): + try: + await app.aingest_documents( + [ + Document( + id=generate_id_from_label("doc_1"), + data="The quick brown fox jumps over the lazy dog.", + type="txt", + metadata={"author": "John Doe"}, + ), + ] + ) + except asyncio.CancelledError: + pass + + +@pytest.mark.parametrize("app", ["pgvector"], indirect=True) +@pytest.mark.asyncio +async def test_ingest_txt_file(app, logging_connection): + try: + # Prepare the test data + metadata = {"author": "John Doe"} + files = [ + UploadFile( + filename="test.txt", + file=open( + os.path.join( + os.path.dirname(__file__), + "..", + "r2r", + "examples", + "data", + "test.txt", + ), + "rb", + ), + ) + ] + # Set file size manually + for file in files: + file.file.seek(0, 2) # Move to the end of the file + file.size = file.file.tell() # Get the file size + file.file.seek(0) # Move back to the start of the file + + await app.aingest_files(metadatas=[metadata], files=files) + except asyncio.CancelledError: + pass + + +@pytest.mark.parametrize("app", ["pgvector"], indirect=True) +@pytest.mark.asyncio +async def test_ingest_search_txt_file(app, logging_connection): + try: + # Prepare the test data + metadata = {} + files = [ + UploadFile( + filename="aristotle.txt", + file=open( + os.path.join( + os.path.dirname(__file__), + "..", + "r2r", + "examples", + "data", + "aristotle.txt", + ), + "rb", + ), + ), + ] + + # Set file size manually + for file in files: + file.file.seek(0, 2) # Move to the end of the file + file.size = file.file.tell() # Get the file size + file.file.seek(0) # Move back to the start of the file + + await app.aingest_files(metadatas=[metadata], files=files) + + search_results = await app.asearch("who was aristotle?") + assert len(search_results["vector_search_results"]) == 10 + assert ( + "was an Ancient Greek philosopher and polymath" + in search_results["vector_search_results"][0]["metadata"]["text"] + ) + + search_results = await app.asearch( + "who was aristotle?", + vector_search_settings=VectorSearchSettings(search_limit=20), + ) + assert len(search_results["vector_search_results"]) == 20 + assert ( + "was an Ancient Greek philosopher and polymath" + in search_results["vector_search_results"][0]["metadata"]["text"] + ) + run_info = await logging_connection.get_run_info( + log_type_filter="search" + ) + + assert len(run_info) == 2, f"Expected 2 runs, but got {len(run_info)}" + + logs = await logging_connection.get_logs( + [run.run_id for run in run_info], 100 + ) + assert len(logs) == 6, f"Expected 6 logs, but got {len(logs)}" + + ## test stream + response = await app.arag( + query="Who was aristotle?", + rag_generation_config=GenerationConfig( + **{"model": "gpt-3.5-turbo", "stream": True} + ), + ) + collector = "" + async for chunk in response: + collector += chunk + assert "Aristotle" in collector + assert "Greek" in collector + assert "philosopher" in collector + assert "polymath" in collector + assert "Ancient" in collector + except asyncio.CancelledError: + pass + + +@pytest.mark.parametrize("app", ["pgvector"], indirect=True) +@pytest.mark.asyncio +async def test_ingest_search_then_delete(app, logging_connection): + try: + # Ingest a document + await app.aingest_documents( + [ + Document( + id=generate_id_from_label("doc_1"), + data="The quick brown fox jumps over the lazy dog.", + type="txt", + metadata={"author": "John Doe"}, + ), + ] + ) + + # Search for the document + search_results = await app.asearch("who was aristotle?") + + # Verify that the search results are not empty + assert ( + len(search_results["vector_search_results"]) > 0 + ), "Expected search results, but got none" + assert ( + search_results["vector_search_results"][0]["metadata"]["text"] + == "The quick brown fox jumps over the lazy dog." + ) + + # Delete the document + delete_result = await app.adelete(["author"], ["John Doe"]) + + # Verify the deletion was successful + expected_deletion_message = "deleted successfully" + assert ( + expected_deletion_message in delete_result + ), f"Expected successful deletion message, but got {delete_result}" + + # Search for the document again + search_results_2 = await app.asearch("who was aristotle?") + + # Verify that the search results are empty + assert ( + len(search_results_2["vector_search_results"]) == 0 + ), f"Expected no search results, but got {search_results_2['results']}" + except asyncio.CancelledError: + pass + + +@pytest.mark.parametrize("app", ["local", "pgvector"], indirect=True) +@pytest.mark.asyncio +async def test_ingest_user_documents(app, logging_connection): + try: + user_id_0 = generate_id_from_label("user_0") + user_id_1 = generate_id_from_label("user_1") + + try: + await app.aingest_documents( + [ + Document( + id=generate_id_from_label("doc_01"), + data="The quick brown fox jumps over the lazy dog.", + type="txt", + metadata={"author": "John Doe", "user_id": user_id_0}, + ), + Document( + id=generate_id_from_label("doc_11"), + data="The lazy dog jumps over the quick brown fox.", + type="txt", + metadata={"author": "John Doe", "user_id": user_id_1}, + ), + ] + ) + user_id_results = await app.ausers_overview([user_id_0, user_id_1]) + assert set([stats.user_id for stats in user_id_results]) == set( + [user_id_0, user_id_1] + ), f"Expected user ids {user_id_0} and {user_id_1}, but got {user_id_results}" + + user_0_docs = await app.adocuments_overview(user_ids=[user_id_0]) + user_1_docs = await app.adocuments_overview(user_ids=[user_id_1]) + + assert ( + len(user_0_docs) == 1 + ), f"Expected 1 document for user {user_id_0}, but got {len(user_0_docs)}" + assert ( + len(user_1_docs) == 1 + ), f"Expected 1 document for user {user_id_1}, but got {len(user_1_docs)}" + assert user_0_docs[0].document_id == generate_id_from_label( + "doc_01" + ), f"Expected document id {str(generate_id_from_label('doc_0'))} for user {user_id_0}, but got {user_0_docs[0].document_id}" + assert user_1_docs[0].document_id == generate_id_from_label( + "doc_11" + ), f"Expected document id {str(generate_id_from_label('doc_1'))} for user {user_id_1}, but got {user_1_docs[0].document_id}" + finally: + await app.adelete( + ["document_id", "document_id"], + [ + str(generate_id_from_label("doc_01")), + str(generate_id_from_label("doc_11")), + ], + ) + except asyncio.CancelledError: + pass + + +@pytest.mark.parametrize("app", ["pgvector"], indirect=True) +@pytest.mark.asyncio +async def test_delete_by_id(app, logging_connection): + try: + await app.aingest_documents( + [ + Document( + id=generate_id_from_label("doc_1"), + data="The quick brown fox jumps over the lazy dog.", + type="txt", + metadata={"author": "John Doe"}, + ), + ] + ) + search_results = await app.asearch("who was aristotle?") + + assert len(search_results["vector_search_results"]) > 0 + await app.adelete( + ["document_id"], [str(generate_id_from_label("doc_1"))] + ) + search_results = await app.asearch("who was aristotle?") + assert len(search_results["vector_search_results"]) == 0 + except asyncio.CancelledError: + pass + + +@pytest.mark.parametrize("app", ["pgvector"], indirect=True) +@pytest.mark.asyncio +async def test_double_ingest(app, logging_connection): + try: + await app.aingest_documents( + [ + Document( + id=generate_id_from_label("doc_1"), + data="The quick brown fox jumps over the lazy dog.", + type="txt", + metadata={"author": "John Doe"}, + ), + ] + ) + search_results = await app.asearch("who was aristotle?") + + assert len(search_results["vector_search_results"]) == 1 + with pytest.raises(Exception): + await app.aingest_documents( + [ + Document( + id=generate_id_from_label("doc_1"), + data="The quick brown fox jumps over the lazy dog.", + type="txt", + metadata={"author": "John Doe"}, + ), + ] + ) + except asyncio.CancelledError: + pass diff --git a/R2R/tests/test_ingestion_service.py b/R2R/tests/test_ingestion_service.py new file mode 100755 index 00000000..375e51f9 --- /dev/null +++ b/R2R/tests/test_ingestion_service.py @@ -0,0 +1,443 @@ +import asyncio +import io +import uuid +from datetime import datetime +from unittest.mock import AsyncMock, MagicMock, Mock + +import pytest +from fastapi import UploadFile + +from r2r.base import ( + Document, + DocumentInfo, + R2RDocumentProcessingError, + R2RException, + generate_id_from_label, +) +from r2r.main import R2RPipelines, R2RProviders +from r2r.main.services.ingestion_service import IngestionService + + +@pytest.fixture(scope="session", autouse=True) +def event_loop_policy(): + asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy()) + + +@pytest.fixture(scope="function") +def event_loop(): + loop = asyncio.get_event_loop_policy().new_event_loop() + yield loop + loop.close() + asyncio.set_event_loop(None) + + +@pytest.fixture(scope="session", autouse=True) +async def cleanup_tasks(): + yield + for task in asyncio.all_tasks(): + if task is not asyncio.current_task(): + task.cancel() + try: + await task + except asyncio.CancelledError: + pass + + +@pytest.fixture +def mock_vector_db(): + mock_db = MagicMock() + mock_db.get_documents_overview.return_value = [] # Default to empty list + return mock_db + + +@pytest.fixture +def mock_embedding_model(): + return Mock() + + +@pytest.fixture +def ingestion_service(mock_vector_db, mock_embedding_model): + config = MagicMock() + config.app.get.return_value = 32 # Default max file size + providers = Mock(spec=R2RProviders) + providers.vector_db = mock_vector_db + providers.embedding_model = mock_embedding_model + pipelines = Mock(spec=R2RPipelines) + pipelines.ingestion_pipeline = AsyncMock() + pipelines.ingestion_pipeline.run.return_value = { + "embedding_pipeline_output": [] + } + run_manager = Mock() + run_manager.run_info = {"mock_run_id": {}} + logging_connection = AsyncMock() + + return IngestionService( + config, providers, pipelines, run_manager, logging_connection + ) + + +@pytest.mark.asyncio +async def test_ingest_single_document(ingestion_service, mock_vector_db): + try: + document = Document( + id=generate_id_from_label("test_id"), + data="Test content", + type="txt", + metadata={}, + ) + + ingestion_service.pipelines.ingestion_pipeline.run.return_value = { + "embedding_pipeline_output": [(document.id, None)] + } + mock_vector_db.get_documents_overview.return_value = ( + [] + ) # No existing documents + + result = await ingestion_service.ingest_documents([document]) + + assert result["processed_documents"] == [ + f"Document '{document.id}' processed successfully." + ] + assert not result["failed_documents"] + assert not result["skipped_documents"] + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +async def test_ingest_duplicate_document(ingestion_service, mock_vector_db): + try: + document = Document( + id=generate_id_from_label("test_id"), + data="Test content", + type="txt", + metadata={}, + ) + mock_vector_db.get_documents_overview.return_value = [ + DocumentInfo( + document_id=document.id, + version="v0", + size_in_bytes=len(document.data), + metadata={}, + title=str(document.id), + user_id=None, + created_at=datetime.now(), + updated_at=datetime.now(), + status="success", + ) + ] + + with pytest.raises(R2RException) as exc_info: + await ingestion_service.ingest_documents([document]) + + assert ( + f"Document with ID {document.id} was already successfully processed" + in str(exc_info.value) + ) + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +async def test_ingest_file(ingestion_service): + try: + file_content = b"Test content" + file_mock = UploadFile( + filename="test.txt", file=io.BytesIO(file_content) + ) + file_mock.file.seek(0) + file_mock.size = len(file_content) # Set file size manually + + ingestion_service.pipelines.ingestion_pipeline.run.return_value = { + "embedding_pipeline_output": [ + (generate_id_from_label("test.txt"), None) + ] + } + + result = await ingestion_service.ingest_files([file_mock]) + + assert len(result["processed_documents"]) == 1 + assert not result["failed_documents"] + assert not result["skipped_documents"] + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +async def test_ingest_mixed_success_and_failure( + ingestion_service, mock_vector_db +): + try: + documents = [ + Document( + id=generate_id_from_label("success_id"), + data="Success content", + type="txt", + metadata={}, + ), + Document( + id=generate_id_from_label("failure_id"), + data="Failure content", + type="txt", + metadata={}, + ), + ] + + ingestion_service.pipelines.ingestion_pipeline.run.return_value = { + "embedding_pipeline_output": [ + ( + documents[0].id, + f"Processed 1 vectors for document {documents[0].id}.", + ), + ( + documents[1].id, + R2RDocumentProcessingError( + error_message="Embedding failed", + document_id=documents[1].id, + ), + ), + ] + } + + result = await ingestion_service.ingest_documents(documents) + + assert len(result["processed_documents"]) == 1 + assert len(result["failed_documents"]) == 1 + assert str(documents[0].id) in result["processed_documents"][0] + assert str(documents[1].id) in result["failed_documents"][0] + assert "Embedding failed" in result["failed_documents"][0] + + assert mock_vector_db.upsert_documents_overview.call_count == 2 + upserted_docs = mock_vector_db.upsert_documents_overview.call_args[0][ + 0 + ] + assert len(upserted_docs) == 2 + assert upserted_docs[0].document_id == documents[0].id + assert upserted_docs[0].status == "success" + assert upserted_docs[1].document_id == documents[1].id + assert upserted_docs[1].status == "failure" + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +async def test_ingest_unsupported_file_type(ingestion_service): + try: + file_mock = UploadFile( + filename="test.unsupported", file=io.BytesIO(b"Test content") + ) + file_mock.file.seek(0) + file_mock.size = 12 # Set file size manually + + with pytest.raises(R2RException) as exc_info: + await ingestion_service.ingest_files([file_mock]) + + assert "is not a valid DocumentType" in str(exc_info.value) + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +async def test_ingest_large_file(ingestion_service): + try: + large_content = b"Large content" * 1000000 # 12MB content + file_mock = UploadFile( + filename="large_file.txt", file=io.BytesIO(large_content) + ) + file_mock.file.seek(0) + file_mock.size = len(large_content) # Set file size manually + + ingestion_service.config.app.get.return_value = ( + 10 # Set max file size to 10MB + ) + + with pytest.raises(R2RException) as exc_info: + await ingestion_service.ingest_files([file_mock]) + + assert "File size exceeds maximum allowed size" in str(exc_info.value) + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +async def test_partial_ingestion_success(ingestion_service, mock_vector_db): + try: + documents = [ + Document( + id=generate_id_from_label("success_1"), + data="Success content 1", + type="txt", + metadata={}, + ), + Document( + id=generate_id_from_label("fail"), + data="Fail content", + type="txt", + metadata={}, + ), + Document( + id=generate_id_from_label("success_2"), + data="Success content 2", + type="txt", + metadata={}, + ), + ] + + ingestion_service.pipelines.ingestion_pipeline.run.return_value = { + "embedding_pipeline_output": [ + (documents[0].id, None), + ( + documents[1].id, + R2RDocumentProcessingError( + error_message="Embedding failed", + document_id=documents[1].id, + ), + ), + (documents[2].id, None), + ] + } + + result = await ingestion_service.ingest_documents(documents) + + assert len(result["processed_documents"]) == 2 + assert len(result["failed_documents"]) == 1 + assert str(documents[1].id) in result["failed_documents"][0] + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +async def test_version_increment(ingestion_service, mock_vector_db): + try: + document = Document( + id=generate_id_from_label("test_id"), + data="Test content", + type="txt", + metadata={}, + ) + mock_vector_db.get_documents_overview.return_value = [ + DocumentInfo( + document_id=document.id, + version="v2", + status="success", + size_in_bytes=0, + metadata={}, + ) + ] + + file_mock = UploadFile( + filename="test.txt", file=io.BytesIO(b"Updated content") + ) + await ingestion_service.update_files([file_mock], [document.id]) + + calls = mock_vector_db.upsert_documents_overview.call_args_list + assert len(calls) == 2 + assert calls[1][0][0][0].version == "v3" + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +async def test_process_ingestion_results_error_handling(ingestion_service): + try: + document_infos = [ + DocumentInfo( + document_id=uuid.uuid4(), + version="v0", + status="processing", + size_in_bytes=0, + metadata={}, + ) + ] + ingestion_results = { + "embedding_pipeline_output": [ + ( + document_infos[0].document_id, + R2RDocumentProcessingError( + "Unexpected error", + document_id=document_infos[0].document_id, + ), + ) + ] + } + + result = await ingestion_service._process_ingestion_results( + ingestion_results, + document_infos, + [], + {document_infos[0].document_id: "test"}, + ) + + assert len(result["failed_documents"]) == 1 + assert "Unexpected error" in result["failed_documents"][0] + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +async def test_file_size_limit_edge_cases(ingestion_service): + try: + ingestion_service.config.app.get.return_value = 1 # 1MB limit + + just_under_limit = b"x" * (1024 * 1024 - 1) + at_limit = b"x" * (1024 * 1024) + over_limit = b"x" * (1024 * 1024 + 1) + + file_under = UploadFile( + filename="under.txt", + file=io.BytesIO(just_under_limit), + size=1024 * 1024 - 1, + ) + file_at = UploadFile( + filename="at.txt", file=io.BytesIO(at_limit), size=1024 * 1024 + ) + file_over = UploadFile( + filename="over.txt", + file=io.BytesIO(over_limit), + size=1024 * 1024 + 1, + ) + + await ingestion_service.ingest_files([file_under]) # Should succeed + await ingestion_service.ingest_files([file_at]) # Should succeed + + with pytest.raises( + R2RException, match="File size exceeds maximum allowed size" + ): + await ingestion_service.ingest_files([file_over]) + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +async def test_document_status_update_after_ingestion( + ingestion_service, mock_vector_db +): + try: + document = Document( + id=generate_id_from_label("test_id"), + data="Test content", + type="txt", + metadata={}, + ) + + ingestion_service.pipelines.ingestion_pipeline.run.return_value = { + "embedding_pipeline_output": [(document.id, None)] + } + mock_vector_db.get_documents_overview.return_value = ( + [] + ) # No existing documents + + await ingestion_service.ingest_documents([document]) + + # Check that upsert_documents_overview was called twice + assert mock_vector_db.upsert_documents_overview.call_count == 2 + + # Check the second call to upsert_documents_overview (status update) + second_call_args = ( + mock_vector_db.upsert_documents_overview.call_args_list[1][0][0] + ) + assert len(second_call_args) == 1 + assert second_call_args[0].document_id == document.id + assert second_call_args[0].status == "success" + except asyncio.CancelledError: + pass diff --git a/R2R/tests/test_llms.py b/R2R/tests/test_llms.py new file mode 100755 index 00000000..666bbff8 --- /dev/null +++ b/R2R/tests/test_llms.py @@ -0,0 +1,59 @@ +import pytest + +from r2r import LLMConfig +from r2r.base.abstractions.llm import GenerationConfig +from r2r.providers.llms import LiteLLM + + +@pytest.fixture +def lite_llm(): + config = LLMConfig(provider="litellm") + return LiteLLM(config) + + +@pytest.mark.parametrize("llm_fixture", ["lite_llm"]) +def test_get_completion_ollama(request, llm_fixture): + llm = request.getfixturevalue(llm_fixture) + + messages = [ + { + "role": "user", + "content": "This is a test, return only the word `True`", + } + ] + generation_config = GenerationConfig( + model="ollama/llama2", + temperature=0.0, + top_p=0.9, + max_tokens_to_sample=50, + stream=False, + ) + + completion = llm.get_completion(messages, generation_config) + # assert isinstance(completion, LLMChatCompletion) + assert completion.choices[0].message.role == "assistant" + assert completion.choices[0].message.content.strip() == "True" + + +@pytest.mark.parametrize("llm_fixture", ["lite_llm"]) +def test_get_completion_openai(request, llm_fixture): + llm = request.getfixturevalue(llm_fixture) + + messages = [ + { + "role": "user", + "content": "This is a test, return only the word `True`", + } + ] + generation_config = GenerationConfig( + model="gpt-3.5-turbo", + temperature=0.0, + top_p=0.9, + max_tokens_to_sample=50, + stream=False, + ) + + completion = llm.get_completion(messages, generation_config) + # assert isinstance(completion, LLMChatCompletion) + assert completion.choices[0].message.role == "assistant" + assert completion.choices[0].message.content.strip() == "True" diff --git a/R2R/tests/test_logging.py b/R2R/tests/test_logging.py new file mode 100755 index 00000000..cab5051d --- /dev/null +++ b/R2R/tests/test_logging.py @@ -0,0 +1,360 @@ +import asyncio +import logging +import os +import uuid + +import pytest + +from r2r import ( + LocalKVLoggingProvider, + LoggingConfig, + PostgresKVLoggingProvider, + PostgresLoggingConfig, + RedisKVLoggingProvider, + RedisLoggingConfig, + generate_run_id, +) + +logger = logging.getLogger(__name__) + + +@pytest.fixture(scope="session", autouse=True) +def event_loop_policy(): + asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy()) + + +@pytest.fixture(scope="function") +def event_loop(): + loop = asyncio.get_event_loop_policy().new_event_loop() + yield loop + loop.close() + asyncio.set_event_loop(None) + + +@pytest.fixture(scope="session", autouse=True) +async def cleanup_tasks(): + yield + for task in asyncio.all_tasks(): + if task is not asyncio.current_task(): + task.cancel() + try: + await task + except asyncio.CancelledError: + pass + + +@pytest.fixture(scope="function") +def local_provider(): + """Fixture to create and tear down the LocalKVLoggingProvider with a unique database file.""" + # Generate a unique file name for the SQLite database + unique_id = str(uuid.uuid4()) + logging_path = f"test_{unique_id}.sqlite" + + # Setup the LocalKVLoggingProvider with the unique file + provider = LocalKVLoggingProvider(LoggingConfig(logging_path=logging_path)) + + # Provide the setup provider to the test + yield provider + + # Cleanup: Remove the SQLite file after test completes + provider.close() + if os.path.exists(logging_path): + os.remove(logging_path) + + +@pytest.mark.asyncio +async def test_local_logging(local_provider): + """Test logging and retrieving from the local logging provider.""" + try: + run_id = generate_run_id() + await local_provider.init() + await local_provider.log(run_id, "key", "value") + logs = await local_provider.get_logs([run_id]) + assert len(logs) == 1 + assert logs[0]["key"] == "key" + assert logs[0]["value"] == "value" + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +async def test_multiple_log_entries(local_provider): + """Test logging multiple entries and retrieving them.""" + try: + run_id_0 = generate_run_id() + run_id_1 = generate_run_id() + run_id_2 = generate_run_id() + await local_provider.init() + + entries = [ + (run_id_0, "key_0", "value_0"), + (run_id_1, "key_1", "value_1"), + (run_id_2, "key_2", "value_2"), + ] + for run_id, key, value in entries: + await local_provider.log(run_id, key, value) + + logs = await local_provider.get_logs([run_id_0, run_id_1, run_id_2]) + assert len(logs) == 3 + + # Check that logs are returned in the correct order (most recent first if applicable) + for log in logs: + selected_entry = [ + entry for entry in entries if entry[0] == log["log_id"] + ][0] + assert log["log_id"] == selected_entry[0] + assert log["key"] == selected_entry[1] + assert log["value"] == selected_entry[2] + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +async def test_log_retrieval_limit(local_provider): + """Test the max_logs limit parameter works correctly.""" + try: + await local_provider.init() + + run_ids = [] + for i in range(10): # Add 10 entries + run_ids.append(generate_run_id()) + await local_provider.log(run_ids[-1], f"key_{i}", f"value_{i}") + + logs = await local_provider.get_logs(run_ids[0:5]) + assert len(logs) == 5 # Ensure only 5 logs are returned + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +async def test_specific_run_type_retrieval(local_provider): + """Test retrieving logs for a specific run type works correctly.""" + try: + await local_provider.init() + run_id_0 = generate_run_id() + run_id_1 = generate_run_id() + + await local_provider.log( + run_id_0, "pipeline_type", "search", is_info_log=True + ) + await local_provider.log(run_id_0, "key_0", "value_0") + await local_provider.log( + run_id_1, "pipeline_type", "rag", is_info_log=True + ) + await local_provider.log(run_id_1, "key_1", "value_1") + + run_info = await local_provider.get_run_info(log_type_filter="search") + logs = await local_provider.get_logs([run.run_id for run in run_info]) + assert len(logs) == 1 + assert logs[0]["log_id"] == run_id_0 + assert logs[0]["key"] == "key_0" + assert logs[0]["value"] == "value_0" + except asyncio.CancelledError: + pass + + +@pytest.fixture(scope="function") +def postgres_provider(): + """Fixture to create and tear down the PostgresKVLoggingProvider.""" + log_table = f"logs_{str(uuid.uuid4()).replace('-', '_')}" + log_info_table = f"log_info_{str(uuid.uuid4()).replace('-', '_')}" + + provider = PostgresKVLoggingProvider( + PostgresLoggingConfig( + log_table=log_table, log_info_table=log_info_table + ) + ) + yield provider + + +@pytest.mark.asyncio +async def test_postgres_logging(postgres_provider): + """Test logging and retrieving from the postgres logging provider.""" + try: + await postgres_provider.init() + run_id = generate_run_id() + await postgres_provider.log(run_id, "key", "value") + logs = await postgres_provider.get_logs([run_id]) + assert len(logs) == 1 + assert logs[0]["key"] == "key" + assert logs[0]["value"] == "value" + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +async def test_postgres_multiple_log_entries(postgres_provider): + """Test logging multiple entries and retrieving them.""" + try: + await postgres_provider.init() + run_id_0 = generate_run_id() + run_id_1 = generate_run_id() + run_id_2 = generate_run_id() + + entries = [ + (run_id_0, "key_0", "value_0"), + (run_id_1, "key_1", "value_1"), + (run_id_2, "key_2", "value_2"), + ] + for run_id, key, value in entries: + await postgres_provider.log(run_id, key, value) + + logs = await postgres_provider.get_logs([run_id_0, run_id_1, run_id_2]) + assert len(logs) == 3 + + # Check that logs are returned in the correct order (most recent first if applicable) + for log in logs: + selected_entry = [ + entry for entry in entries if entry[0] == log["log_id"] + ][0] + assert log["log_id"] == selected_entry[0] + assert log["key"] == selected_entry[1] + assert log["value"] == selected_entry[2] + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +async def test_postgres_log_retrieval_limit(postgres_provider): + """Test the max_logs limit parameter works correctly.""" + try: + await postgres_provider.init() + run_ids = [] + for i in range(10): # Add 10 entries + run_ids.append(generate_run_id()) + await postgres_provider.log(run_ids[-1], f"key_{i}", f"value_{i}") + + logs = await postgres_provider.get_logs(run_ids[:5]) + assert len(logs) == 5 # Ensure only 5 logs are returned + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +async def test_postgres_specific_run_type_retrieval(postgres_provider): + """Test retrieving logs for a specific run type works correctly.""" + try: + await postgres_provider.init() + run_id_0 = generate_run_id() + run_id_1 = generate_run_id() + + await postgres_provider.log( + run_id_0, "pipeline_type", "search", is_info_log=True + ) + await postgres_provider.log(run_id_0, "key_0", "value_0") + await postgres_provider.log( + run_id_1, "pipeline_type", "rag", is_info_log=True + ) + await postgres_provider.log(run_id_1, "key_1", "value_1") + + run_info = await postgres_provider.get_run_info( + log_type_filter="search" + ) + logs = await postgres_provider.get_logs( + [run.run_id for run in run_info] + ) + assert len(logs) == 1 + assert logs[0]["log_id"] == run_id_0 + assert logs[0]["key"] == "key_0" + assert logs[0]["value"] == "value_0" + except asyncio.CancelledError: + pass + + +@pytest.fixture(scope="function") +def redis_provider(): + """Fixture to create and tear down the RedisKVLoggingProvider.""" + log_table = f"logs_{str(uuid.uuid4()).replace('-', '_')}" + log_info_table = f"log_info_{str(uuid.uuid4()).replace('-', '_')}" + + provider = RedisKVLoggingProvider( + RedisLoggingConfig(log_table=log_table, log_info_table=log_info_table) + ) + yield provider + provider.close() + + +@pytest.mark.asyncio +async def test_redis_logging(redis_provider): + """Test logging and retrieving from the Redis logging provider.""" + try: + run_id = generate_run_id() + await redis_provider.log(run_id, "key", "value") + logs = await redis_provider.get_logs([run_id]) + assert len(logs) == 1 + assert logs[0]["key"] == "key" + assert logs[0]["value"] == "value" + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +async def test_redis_multiple_log_entries(redis_provider): + """Test logging multiple entries and retrieving them.""" + try: + run_id_0 = generate_run_id() + run_id_1 = generate_run_id() + run_id_2 = generate_run_id() + + entries = [ + (run_id_0, "key_0", "value_0"), + (run_id_1, "key_1", "value_1"), + (run_id_2, "key_2", "value_2"), + ] + for run_id, key, value in entries: + await redis_provider.log(run_id, key, value) + + logs = await redis_provider.get_logs([run_id_0, run_id_1, run_id_2]) + assert len(logs) == 3 + + # Check that logs are returned in the correct order (most recent first if applicable) + for log in logs: + selected_entry = [ + entry for entry in entries if entry[0] == log["log_id"] + ][0] + assert log["log_id"] == selected_entry[0] + assert log["key"] == selected_entry[1] + assert log["value"] == selected_entry[2] + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +async def test_redis_log_retrieval_limit(redis_provider): + """Test the max_logs limit parameter works correctly.""" + try: + run_ids = [] + for i in range(10): # Add 10 entries + run_ids.append(generate_run_id()) + await redis_provider.log(run_ids[-1], f"key_{i}", f"value_{i}") + + logs = await redis_provider.get_logs(run_ids[0:5]) + assert len(logs) == 5 # Ensure only 5 logs are returned + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +async def test_redis_specific_run_type_retrieval(redis_provider): + """Test retrieving logs for a specific run type works correctly.""" + try: + run_id_0 = generate_run_id() + run_id_1 = generate_run_id() + + await redis_provider.log( + run_id_0, "pipeline_type", "search", is_info_log=True + ) + await redis_provider.log(run_id_0, "key_0", "value_0") + await redis_provider.log( + run_id_1, "pipeline_type", "rag", is_info_log=True + ) + await redis_provider.log(run_id_1, "key_1", "value_1") + + run_info = await redis_provider.get_run_info(log_type_filter="search") + logs = await redis_provider.get_logs([run.run_id for run in run_info]) + assert len(logs) == 1 + assert logs[0]["log_id"] == run_id_0 + assert logs[0]["key"] == "key_0" + assert logs[0]["value"] == "value_0" + except asyncio.CancelledError: + pass diff --git a/R2R/tests/test_parser.py b/R2R/tests/test_parser.py new file mode 100755 index 00000000..6965c5a9 --- /dev/null +++ b/R2R/tests/test_parser.py @@ -0,0 +1,159 @@ +import asyncio +import json +from unittest.mock import MagicMock, patch + +import pytest + +from r2r.parsers.media.docx_parser import DOCXParser +from r2r.parsers.media.pdf_parser import PDFParser +from r2r.parsers.media.ppt_parser import PPTParser +from r2r.parsers.structured.csv_parser import CSVParser +from r2r.parsers.structured.json_parser import JSONParser +from r2r.parsers.structured.xlsx_parser import XLSXParser +from r2r.parsers.text.html_parser import HTMLParser +from r2r.parsers.text.md_parser import MDParser +from r2r.parsers.text.text_parser import TextParser + + +@pytest.fixture(scope="session", autouse=True) +def event_loop_policy(): + asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy()) + + +@pytest.fixture(scope="function") +def event_loop(): + loop = asyncio.get_event_loop_policy().new_event_loop() + yield loop + loop.close() + asyncio.set_event_loop(None) + + +@pytest.fixture(scope="session", autouse=True) +async def cleanup_tasks(): + yield + for task in asyncio.all_tasks(): + if task is not asyncio.current_task(): + task.cancel() + try: + await task + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +async def test_text_parser(): + try: + parser = TextParser() + data = "Simple text" + async for result in parser.ingest(data): + assert result == "Simple text" + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +async def test_json_parser(): + try: + parser = JSONParser() + data = json.dumps({"key": "value", "null_key": None}) + async for result in parser.ingest(data): + assert "key: value" in result + assert "null_key" not in result + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +async def test_html_parser(): + try: + parser = HTMLParser() + data = "<html><body><p>Hello World</p></body></html>" + async for result in parser.ingest(data): + assert result.strip() == "Hello World" + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +@patch("pypdf.PdfReader") +async def test_pdf_parser(mock_pdf_reader): + try: + parser = PDFParser() + mock_pdf_reader.return_value.pages = [ + MagicMock(extract_text=lambda: "Page text") + ] + data = b"fake PDF data" + async for result in parser.ingest(data): + assert result == "Page text" + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +@patch("pptx.Presentation") +async def test_ppt_parser(mock_presentation): + try: + mock_slide = MagicMock() + mock_shape = MagicMock(text="Slide text") + mock_slide.shapes = [mock_shape] + mock_presentation.return_value.slides = [mock_slide] + parser = PPTParser() + data = b"fake PPT data" + async for result in parser.ingest(data): + assert result == "Slide text" + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +@patch("docx.Document") +async def test_docx_parser(mock_document): + try: + mock_paragraph = MagicMock(text="Paragraph text") + mock_document.return_value.paragraphs = [mock_paragraph] + parser = DOCXParser() + data = b"fake DOCX data" + async for result in parser.ingest(data): + assert result == "Paragraph text" + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +async def test_csv_parser(): + try: + parser = CSVParser() + data = "col1,col2\nvalue1,value2" + async for result in parser.ingest(data): + assert result == "col1, col2" + break + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +@patch("openpyxl.load_workbook") +async def test_xlsx_parser(mock_load_workbook): + try: + mock_sheet = MagicMock() + mock_sheet.iter_rows.return_value = [(1, 2), (3, 4)] + mock_workbook = MagicMock(worksheets=[mock_sheet]) + mock_load_workbook.return_value = mock_workbook + parser = XLSXParser() + data = b"fake XLSX data" + async for result in parser.ingest(data): + assert result == "1, 2" + break + except asyncio.CancelledError: + pass + + +@pytest.mark.asyncio +async def test_markdown_parser(): + try: + parser = MDParser() + data = "# Header\nContent" + async for result in parser.ingest(data): + assert result.strip() == "Header\nContent" + except asyncio.CancelledError: + pass diff --git a/R2R/tests/test_pipeline.py b/R2R/tests/test_pipeline.py new file mode 100755 index 00000000..1811de0f --- /dev/null +++ b/R2R/tests/test_pipeline.py @@ -0,0 +1,291 @@ +import asyncio +from typing import Any, AsyncGenerator + +import pytest + +from r2r import AsyncPipe, AsyncPipeline, PipeType + + +class MultiplierPipe(AsyncPipe): + def __init__(self, multiplier=1, delay=0, name="multiplier_pipe"): + super().__init__( + type=PipeType.OTHER, + config=self.PipeConfig(name=name), + ) + self.multiplier = multiplier + self.delay = delay + + async def _run_logic( + self, + input: AsyncGenerator[Any, None], + state, + run_id=None, + *args, + **kwargs, + ) -> AsyncGenerator[Any, None]: + async for item in input.message: + if self.delay > 0: + await asyncio.sleep(self.delay) # Simulate processing delay + if isinstance(item, list): + processed = [x * self.multiplier for x in item] + elif isinstance(item, int): + processed = item * self.multiplier + else: + raise ValueError(f"Unsupported type: {type(item)}") + yield processed + + +class FanOutPipe(AsyncPipe): + def __init__(self, multiplier=1, delay=0, name="fan_out_pipe"): + super().__init__( + type=PipeType.OTHER, + config=self.PipeConfig(name=name), + ) + self.multiplier = multiplier + self.delay = delay + + async def _run_logic( + self, + input: AsyncGenerator[Any, None], + state, + run_id=None, + *args, + **kwargs, + ) -> AsyncGenerator[Any, None]: + inputs = [] + async for item in input.message: + inputs.append(item) + for it in range(self.multiplier): + if self.delay > 0: + await asyncio.sleep(self.delay) + yield [(it + 1) * ele for ele in inputs] + + +class FanInPipe(AsyncPipe): + def __init__(self, delay=0, name="fan_in_pipe"): + super().__init__( + type=PipeType.OTHER, + config=self.PipeConfig(name=name), + ) + self.delay = delay + + async def _run_logic( + self, + input: AsyncGenerator[Any, None], + state, + run_id=None, + *args, + **kwargs, + ) -> AsyncGenerator[Any, None]: + total_sum = 0 + async for batch in input.message: + if self.delay > 0: + await asyncio.sleep(self.delay) # Simulate processing delay + total_sum += sum( + batch + ) # Assuming batch is iterable and contains numeric values + yield total_sum + + +@pytest.fixture +def pipe_factory(): + def create_pipe(type, **kwargs): + if type == "multiplier": + return MultiplierPipe(**kwargs) + elif type == "fan_out": + return FanOutPipe(**kwargs) + elif type == "fan_in": + return FanInPipe(**kwargs) + else: + raise ValueError("Unsupported pipe type") + + return create_pipe + + +@pytest.mark.asyncio +@pytest.mark.parametrize("multiplier, delay, name", [(2, 0.1, "pipe")]) +async def test_single_multiplier(pipe_factory, multiplier, delay, name): + pipe = pipe_factory( + "multiplier", multiplier=multiplier, delay=delay, name=name + ) + + async def input_generator(): + for i in [1, 2, 3]: + yield i + + pipeline = AsyncPipeline() + pipeline.add_pipe(pipe) + + result = [] + for output in await pipeline.run(input_generator()): + result.append(output) + + expected_result = [i * multiplier for i in [1, 2, 3]] + assert ( + result == expected_result + ), "Pipeline output did not match expected multipliers" + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "multiplier_a, delay_a, name_a, multiplier_b, delay_b, name_b", + [(2, 0.1, "pipe_a", 2, 0.1, "pipe_b")], +) +async def test_double_multiplier( + pipe_factory, multiplier_a, delay_a, name_a, multiplier_b, delay_b, name_b +): + pipe_a = pipe_factory( + "multiplier", multiplier=multiplier_a, delay=delay_a, name=name_a + ) + pipe_b = pipe_factory( + "multiplier", multiplier=multiplier_b, delay=delay_b, name=name_b + ) + + async def input_generator(): + for i in [1, 2, 3]: + yield i + + pipeline = AsyncPipeline() + pipeline.add_pipe(pipe_a) + pipeline.add_pipe(pipe_b) + + result = [] + for output in await pipeline.run(input_generator()): + result.append(output) + + expected_result = [i * multiplier_a * multiplier_b for i in [1, 2, 3]] + assert ( + result == expected_result + ), "Pipeline output did not match expected multipliers" + + +@pytest.mark.asyncio +@pytest.mark.parametrize("multiplier, delay, name", [(3, 0.1, "pipe")]) +async def test_fan_out(pipe_factory, multiplier, delay, name): + pipe = pipe_factory( + "fan_out", multiplier=multiplier, delay=delay, name=name + ) + + async def input_generator(): + for i in [1, 2, 3]: + yield i + + pipeline = AsyncPipeline() + pipeline.add_pipe(pipe) + + result = [] + for output in await pipeline.run(input_generator()): + result.append(output) + + expected_result = [ + [i + 1, 2 * (i + 1), 3 * (i + 1)] for i in range(multiplier) + ] + assert ( + result == expected_result + ), "Pipeline output did not match expected multipliers" + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "multiplier_a, delay_a, name_a, multiplier_b, delay_b, name_b", + [ + (2, 0.1, "pipe_a", 2, 0.1, "pipe_b"), + (4, 0.1, "pipe_a", 3, 0.1, "pipe_b"), + ], +) +async def multiply_then_fan_out( + pipe_factory, multiplier_a, delay_a, name_a, multiplier_b, delay_b, name_b +): + pipe_a = pipe_factory( + "multiplier", multiplier=multiplier_a, delay=delay_a, name=name_a + ) + pipe_b = pipe_factory( + "fan_out", multiplier=multiplier_b, delay=delay_b, name=name_b + ) + + async def input_generator(): + for i in [1, 2, 3]: + yield i + + pipeline = AsyncPipeline() + pipeline.add_pipe(pipe_a) + pipeline.add_pipe(pipe_b) + + result = [] + async for output in await pipeline.run(input_generator()): + result.append(output) + + expected_result = [[i * multiplier_a] async for i in input_generator()] + assert ( + result[0] == expected_result + ), "Pipeline output did not match expected multipliers" + + +@pytest.mark.asyncio +@pytest.mark.parametrize("multiplier, delay, name", [(3, 0.1, "pipe")]) +async def test_fan_in_sum(pipe_factory, multiplier, delay, name): + # Create fan-out to generate multiple streams + fan_out_pipe = pipe_factory( + "fan_out", multiplier=multiplier, delay=delay, name=name + "_a" + ) + # Summing fan-in pipe + fan_in_sum_pipe = pipe_factory("fan_in", delay=delay, name=name + "_b") + + async def input_generator(): + for i in [1, 2, 3]: + yield i + + pipeline = AsyncPipeline() + pipeline.add_pipe(fan_out_pipe) + pipeline.add_pipe(fan_in_sum_pipe) + + result = await pipeline.run(input_generator()) + + # Calculate expected results based on the multiplier and the sum of inputs + expected_result = sum( + [sum([j * i for j in [1, 2, 3]]) for i in range(1, multiplier + 1)] + ) + assert ( + result[0] == expected_result + ), "Pipeline output did not match expected sums" + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "multiplier_a, delay_a, name_a, multiplier_b, delay_b, name_b", + [ + (3, 0.1, "pipe_a", 2, 0.1, "pipe_b"), + (4, 0.1, "pipe_a", 3, 0.1, "pipe_b"), + ], +) +async def test_fan_out_then_multiply( + pipe_factory, multiplier_a, delay_a, name_a, multiplier_b, delay_b, name_b +): + pipe_a = pipe_factory( + "multiplier", multiplier=multiplier_a, delay=delay_a, name=name_a + ) + pipe_b = pipe_factory( + "fan_out", multiplier=multiplier_b, delay=delay_b, name=name_b + ) + pipe_c = pipe_factory("fan_in", delay=0.1, name="pipe_c") + + async def input_generator(): + for i in [1, 2, 3]: + yield i + + pipeline = AsyncPipeline() + pipeline.add_pipe(pipe_a) + pipeline.add_pipe(pipe_b) + pipeline.add_pipe(pipe_c) + + result = await pipeline.run(input_generator()) + + expected_result = sum( + [ + sum([j * i * multiplier_a for j in [1, 2, 3]]) + for i in range(1, multiplier_b + 1) + ] + ) + assert ( + result[0] == expected_result + ), "Pipeline output did not match expected multipliers" diff --git a/R2R/tests/test_vector_db.py b/R2R/tests/test_vector_db.py new file mode 100755 index 00000000..023145ce --- /dev/null +++ b/R2R/tests/test_vector_db.py @@ -0,0 +1,160 @@ +import random + +import pytest +from dotenv import load_dotenv + +from r2r import ( + Vector, + VectorDBConfig, + VectorDBProvider, + VectorEntry, + generate_id_from_label, +) +from r2r.providers.vector_dbs import PGVectorDB + +load_dotenv() + + +# Sample vector entries +def generate_random_vector_entry(id: str, dimension: int) -> VectorEntry: + vector = [random.random() for _ in range(dimension)] + metadata = {"key": f"value_{id}"} + return VectorEntry( + id=generate_id_from_label(id), vector=Vector(vector), metadata=metadata + ) + + +dimension = 3 +num_entries = 100 +sample_entries = [ + generate_random_vector_entry(f"id_{i}", dimension) + for i in range(num_entries) +] + + +# Fixture for PGVectorDB +@pytest.fixture +def pg_vector_db(): + random_collection_name = ( + f"test_collection_{random.randint(0, 1_000_000_000)}" + ) + config = VectorDBConfig.create( + provider="pgvector", vecs_collection=random_collection_name + ) + db = PGVectorDB(config) + db.initialize_collection(dimension=dimension) + yield db + # Teardown + db.vx.delete_collection( + db.config.extra_fields.get("vecs_collection", None) + ) + + +@pytest.mark.parametrize("db_fixture", ["pg_vector_db"]) +def test_get_metadatas(request, db_fixture): + db = request.getfixturevalue(db_fixture) + for entry in sample_entries: + db.upsert(entry) + + unique_metadatas = db.get_metadatas(metadata_fields=["key"]) + unique_values = set([ele["key"] for ele in unique_metadatas]) + assert len(unique_values) == num_entries + assert all(f"value_id_{i}" in unique_values for i in range(num_entries)) + + +@pytest.mark.parametrize("db_fixture", ["pg_vector_db"]) +def test_db_initialization(request, db_fixture): + db = request.getfixturevalue(db_fixture) + assert isinstance(db, VectorDBProvider) + + +@pytest.mark.parametrize("db_fixture", ["pg_vector_db"]) +def test_db_copy_and_search(request, db_fixture): + db = request.getfixturevalue(db_fixture) + db.upsert(sample_entries[0]) + results = db.search(query_vector=sample_entries[0].vector.data) + assert len(results) == 1 + assert results[0].id == sample_entries[0].id + assert results[0].score == pytest.approx(1.0, rel=1e-3) + + +@pytest.mark.parametrize("db_fixture", ["pg_vector_db"]) +def test_db_upsert_and_search(request, db_fixture): + db = request.getfixturevalue(db_fixture) + db.upsert(sample_entries[0]) + results = db.search(query_vector=sample_entries[0].vector.data) + assert len(results) == 1 + assert results[0].id == sample_entries[0].id + assert results[0].score == pytest.approx(1.0, rel=1e-3) + + +@pytest.mark.parametrize("db_fixture", ["pg_vector_db"]) +def test_imperfect_match(request, db_fixture): + db = request.getfixturevalue(db_fixture) + db.upsert(sample_entries[0]) + query_vector = [val + 0.1 for val in sample_entries[0].vector.data] + results = db.search(query_vector=query_vector) + assert len(results) == 1 + assert results[0].id == sample_entries[0].id + assert results[0].score < 1.0 + + +@pytest.mark.parametrize("db_fixture", ["pg_vector_db"]) +def test_bulk_insert_and_search(request, db_fixture): + db = request.getfixturevalue(db_fixture) + for entry in sample_entries: + db.upsert(entry) + + query_vector = sample_entries[0].vector.data + results = db.search(query_vector=query_vector, limit=5) + assert len(results) == 5 + assert results[0].id == sample_entries[0].id + assert results[0].score == pytest.approx(1.0, rel=1e-3) + + +@pytest.mark.parametrize("db_fixture", ["pg_vector_db"]) +def test_search_with_filters(request, db_fixture): + db = request.getfixturevalue(db_fixture) + for entry in sample_entries: + db.upsert(entry) + + filtered_id = sample_entries[0].metadata["key"] + query_vector = sample_entries[0].vector.data + results = db.search( + query_vector=query_vector, filters={"key": filtered_id} + ) + assert len(results) == 1 + assert results[0].id == sample_entries[0].id + assert results[0].metadata["key"] == filtered_id + + +@pytest.mark.parametrize("db_fixture", ["pg_vector_db"]) +def test_delete_by_metadata(request, db_fixture): + db = request.getfixturevalue(db_fixture) + for entry in sample_entries: + db.upsert(entry) + + key_to_delete = sample_entries[0].metadata["key"] + db.delete_by_metadata( + metadata_fields=["key"], metadata_values=[key_to_delete] + ) + + results = db.search(query_vector=sample_entries[0].vector.data) + assert all(result.metadata["key"] != key_to_delete for result in results) + + +@pytest.mark.parametrize("db_fixture", ["pg_vector_db"]) +def test_upsert(request, db_fixture): + db = request.getfixturevalue(db_fixture) + db.upsert(sample_entries[0]) + modified_entry = VectorEntry( + id=sample_entries[0].id, + vector=Vector([0.5, 0.5, 0.5]), + metadata={"key": "new_value"}, + ) + db.upsert(modified_entry) + + results = db.search(query_vector=[0.5, 0.5, 0.5]) + assert len(results) == 1 + assert results[0].id == sample_entries[0].id + assert results[0].metadata["key"] == "new_value" |