aboutsummaryrefslogtreecommitdiff
path: root/R2R/r2r/parsers/media/movie_parser.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /R2R/r2r/parsers/media/movie_parser.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to 'R2R/r2r/parsers/media/movie_parser.py')
-rwxr-xr-xR2R/r2r/parsers/media/movie_parser.py108
1 files changed, 108 insertions, 0 deletions
diff --git a/R2R/r2r/parsers/media/movie_parser.py b/R2R/r2r/parsers/media/movie_parser.py
new file mode 100755
index 00000000..c00b80d9
--- /dev/null
+++ b/R2R/r2r/parsers/media/movie_parser.py
@@ -0,0 +1,108 @@
+import base64
+import os
+from typing import AsyncGenerator
+
+from r2r.base.parsers.base_parser import AsyncParser
+from r2r.parsers.media.openai_helpers import (
+ process_audio_with_openai,
+ process_frame_with_openai,
+)
+
+
+class MovieParser(AsyncParser):
+ """A parser for movie data."""
+
+ def __init__(
+ self,
+ model: str = "gpt-4o",
+ max_tokens: int = 2048,
+ seconds_per_frame: int = 2,
+ max_frames: int = 10,
+ ):
+ try:
+ import cv2
+
+ self.cv2 = cv2
+ except ImportError:
+ raise ValueError(
+ "Error, `opencv-python` is required to run `MovieParser`. Please install it using `pip install opencv-python`."
+ )
+ try:
+ import moviepy.editor as mp
+
+ self.mp = mp
+ except ImportError:
+ raise ValueError(
+ "Error, `moviepy` is required to run `MovieParser`. Please install it using `pip install moviepy`."
+ )
+
+ self.model = model
+ self.max_tokens = max_tokens
+ self.seconds_per_frame = seconds_per_frame
+ self.max_frames = max_frames
+ self.openai_api_key = os.environ.get("OPENAI_API_KEY")
+ if not self.openai_api_key:
+ raise ValueError(
+ "Error, environment variable `OPENAI_API_KEY` is required to run `MovieParser`."
+ )
+
+ async def ingest(self, data: bytes) -> AsyncGenerator[str, None]:
+ """Ingest movie data and yield a description."""
+ temp_video_path = "temp_movie.mp4"
+ with open(temp_video_path, "wb") as f:
+ f.write(data)
+ try:
+ raw_frames, audio_file = self.process_video(temp_video_path)
+ for frame in raw_frames:
+ frame_text = process_frame_with_openai(
+ frame, self.openai_api_key
+ )
+ yield frame_text
+
+ if audio_file:
+ transcription_text = process_audio_with_openai(
+ audio_file, self.openai_api_key
+ )
+ yield transcription_text
+ finally:
+ os.remove(temp_video_path)
+
+ def process_video(self, video_path):
+ base64Frames = []
+ base_video_path, _ = os.path.splitext(video_path)
+
+ video = self.cv2.VideoCapture(video_path)
+ total_frames = int(video.get(self.cv2.CAP_PROP_FRAME_COUNT))
+ fps = video.get(self.cv2.CAP_PROP_FPS)
+ frames_to_skip = int(fps * self.seconds_per_frame)
+ curr_frame = 0
+
+ # Calculate frames to skip based on max_frames if it is set
+ if self.max_frames and self.max_frames < total_frames / frames_to_skip:
+ frames_to_skip = max(total_frames // self.max_frames, 1)
+
+ frame_count = 0
+ while curr_frame < total_frames - 1 and (
+ not self.max_frames or frame_count < self.max_frames
+ ):
+ video.set(self.cv2.CAP_PROP_POS_FRAMES, curr_frame)
+ success, frame = video.read()
+ if not success:
+ break
+ _, buffer = self.cv2.imencode(".jpg", frame)
+ base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
+ curr_frame += frames_to_skip
+ frame_count += 1
+ video.release()
+
+ audio_path = f"{base_video_path}.wav"
+ audio_file = None
+ with self.mp.VideoFileClip(video_path) as clip:
+ if clip.audio is not None:
+ clip.audio.write_audiofile(
+ audio_path, codec="pcm_s16le", fps=16000
+ )
+ audio_file = open(audio_path, "rb")
+ os.remove(audio_path)
+
+ return base64Frames, audio_file