R2R/r2r/parsers/media/audio_parser.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32

import os
from typing import AsyncGenerator

from r2r.base.parsers.base_parser import AsyncParser
from r2r.parsers.media.openai_helpers import process_audio_with_openai


class AudioParser(AsyncParser[bytes]):
    """A parser for audio data."""

    def __init__(
        self, api_base: str = "https://api.openai.com/v1/audio/transcriptions"
    ):
        self.api_base = api_base
        self.openai_api_key = os.environ.get("OPENAI_API_KEY")
        if not self.openai_api_key:
            raise ValueError(
                "Error, environment variable `OPENAI_API_KEY` is required to run `AudioParser`."
            )

    async def ingest(self, data: bytes) -> AsyncGenerator[str, None]:
        """Ingest audio data and yield a transcription."""
        temp_audio_path = "temp_audio.wav"
        with open(temp_audio_path, "wb") as f:
            f.write(data)
        try:
            transcription_text = process_audio_with_openai(
                open(temp_audio_path, "rb"), self.openai_api_key
            )
            yield transcription_text
        finally:
            os.remove(temp_audio_path)