vision-agent 0.2.125__tar.gz → 0.2.127__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. {vision_agent-0.2.125 → vision_agent-0.2.127}/PKG-INFO +3 -2
  2. {vision_agent-0.2.125 → vision_agent-0.2.127}/pyproject.toml +3 -4
  3. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/agent/vision_agent_coder.py +1 -1
  4. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/tools/tools.py +15 -14
  5. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/utils/__init__.py +1 -1
  6. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/utils/image_utils.py +0 -20
  7. vision_agent-0.2.127/vision_agent/utils/video.py +126 -0
  8. vision_agent-0.2.125/vision_agent/utils/video.py +0 -215
  9. {vision_agent-0.2.125 → vision_agent-0.2.127}/LICENSE +0 -0
  10. {vision_agent-0.2.125 → vision_agent-0.2.127}/README.md +0 -0
  11. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/__init__.py +0 -0
  12. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/agent/__init__.py +0 -0
  13. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/agent/agent.py +0 -0
  14. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/agent/agent_utils.py +0 -0
  15. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/agent/vision_agent.py +0 -0
  16. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
  17. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/agent/vision_agent_prompts.py +0 -0
  18. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/clients/__init__.py +0 -0
  19. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/clients/http.py +0 -0
  20. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/clients/landing_public_api.py +0 -0
  21. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/fonts/__init__.py +0 -0
  22. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  23. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/lmm/__init__.py +0 -0
  24. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/lmm/lmm.py +0 -0
  25. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/lmm/types.py +0 -0
  26. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/tools/__init__.py +0 -0
  27. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/tools/meta_tools.py +0 -0
  28. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/tools/prompts.py +0 -0
  29. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/tools/tool_utils.py +0 -0
  30. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/tools/tools_types.py +0 -0
  31. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/utils/exceptions.py +0 -0
  32. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/utils/execute.py +0 -0
  33. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/utils/sim.py +0 -0
  34. {vision_agent-0.2.125 → vision_agent-0.2.127}/vision_agent/utils/type_defs.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.125
3
+ Version: 0.2.127
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -10,11 +10,12 @@ Classifier: Programming Language :: Python :: 3.9
10
10
  Classifier: Programming Language :: Python :: 3.10
11
11
  Classifier: Programming Language :: Python :: 3.11
12
12
  Requires-Dist: anthropic (>=0.31.0,<0.32.0)
13
+ Requires-Dist: av (>=11.0.0,<12.0.0)
13
14
  Requires-Dist: e2b (>=0.17.2a50,<0.18.0)
14
15
  Requires-Dist: e2b-code-interpreter (==0.0.11a37)
16
+ Requires-Dist: eva-decord (>=0.6.1,<0.7.0)
15
17
  Requires-Dist: ipykernel (>=6.29.4,<7.0.0)
16
18
  Requires-Dist: langsmith (>=0.1.58,<0.2.0)
17
- Requires-Dist: moviepy (>=1.0.0,<2.0.0)
18
19
  Requires-Dist: nbclient (>=0.10.0,<0.11.0)
19
20
  Requires-Dist: nbformat (>=5.10.4,<6.0.0)
20
21
  Requires-Dist: numpy (>=1.21.0,<2.0.0)
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.125"
7
+ version = "0.2.127"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -25,7 +25,6 @@ tqdm = ">=4.64.0,<5.0.0"
25
25
  pandas = "2.*"
26
26
  openai = "1.*"
27
27
  typing_extensions = "4.*"
28
- moviepy = "1.*"
29
28
  opencv-python = "4.*"
30
29
  tabulate = "^0.9.0"
31
30
  pydantic-settings = "^2.2.1"
@@ -42,6 +41,8 @@ pillow-heif = "^0.16.0"
42
41
  pytube = "15.0.0"
43
42
  anthropic = "^0.31.0"
44
43
  pydantic = "2.7.4"
44
+ eva-decord = "^0.6.1"
45
+ av = "^11.0.0"
45
46
 
46
47
  [tool.poetry.group.dev.dependencies]
47
48
  autoflake = "1.*"
@@ -100,10 +101,8 @@ show_error_codes = true
100
101
  ignore_missing_imports = true
101
102
  module = [
102
103
  "cv2.*",
103
- "faiss.*",
104
104
  "openai.*",
105
105
  "sentence_transformers.*",
106
- "moviepy.*",
107
106
  "e2b_code_interpreter.*",
108
107
  "e2b.*"
109
108
  ]
@@ -173,7 +173,7 @@ def pick_plan(
173
173
 
174
174
  if verbosity == 2:
175
175
  _print_code("Initial code and tests:", code)
176
- _LOGGER.info(f"Initial code execution result:\n{tool_output.text()}")
176
+ _LOGGER.info(f"Initial code execution result:\n{tool_output_str}")
177
177
 
178
178
  log_progress(
179
179
  {
@@ -12,7 +12,6 @@ from uuid import UUID
12
12
  import cv2
13
13
  import numpy as np
14
14
  import requests
15
- from moviepy.editor import ImageSequenceClip
16
15
  from PIL import Image, ImageDraw, ImageEnhance, ImageFont
17
16
  from pillow_heif import register_heif_opener # type: ignore
18
17
  from pytube import YouTube # type: ignore
@@ -35,7 +34,6 @@ from vision_agent.tools.tools_types import (
35
34
  ODResponseData,
36
35
  PromptTask,
37
36
  )
38
- from vision_agent.utils import extract_frames_from_video
39
37
  from vision_agent.utils.exceptions import FineTuneModelIsNotReady
40
38
  from vision_agent.utils.execute import FileSerializer, MimeType
41
39
  from vision_agent.utils.image_utils import (
@@ -44,13 +42,17 @@ from vision_agent.utils.image_utils import (
44
42
  convert_to_b64,
45
43
  denormalize_bbox,
46
44
  encode_image_bytes,
47
- frames_to_bytes,
48
45
  get_image_size,
49
46
  normalize_bbox,
50
47
  numpy_to_bytes,
51
48
  rle_decode,
52
49
  rle_decode_array,
53
50
  )
51
+ from vision_agent.utils.video import (
52
+ extract_frames_from_video,
53
+ frames_to_bytes,
54
+ video_writer,
55
+ )
54
56
 
55
57
  register_heif_opener()
56
58
 
@@ -1513,17 +1515,16 @@ def save_video(
1513
1515
  "/tmp/tmpvideo123.mp4"
1514
1516
  """
1515
1517
  if fps <= 0:
1516
- _LOGGER.warning(f"Invalid fps value: {fps}. Setting fps to 4 (default value).")
1517
- fps = 4
1518
- with ImageSequenceClip(frames, fps=fps) as video:
1519
- if output_video_path:
1520
- f = open(output_video_path, "wb")
1521
- else:
1522
- f = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) # type: ignore
1523
- video.write_videofile(f.name, codec="libx264")
1524
- f.close()
1525
- _save_video_to_result(f.name)
1526
- return f.name
1518
+ raise ValueError(f"fps must be greater than 0 got {fps}")
1519
+
1520
+ if output_video_path is None:
1521
+ output_video_path = tempfile.NamedTemporaryFile(
1522
+ delete=False, suffix=".mp4"
1523
+ ).name
1524
+
1525
+ output_video_path = video_writer(frames, fps, output_video_path)
1526
+ _save_video_to_result(output_video_path)
1527
+ return output_video_path
1527
1528
 
1528
1529
 
1529
1530
  def _save_video_to_result(video_uri: str) -> None:
@@ -7,4 +7,4 @@ from .execute import (
7
7
  Result,
8
8
  )
9
9
  from .sim import AzureSim, OllamaSim, Sim, load_sim, merge_sim
10
- from .video import extract_frames_from_video
10
+ from .video import extract_frames_from_video, video_writer
@@ -2,14 +2,12 @@
2
2
 
3
3
  import base64
4
4
  import io
5
- import tempfile
6
5
  from importlib import resources
7
6
  from io import BytesIO
8
7
  from pathlib import Path
9
8
  from typing import Dict, List, Tuple, Union
10
9
 
11
10
  import numpy as np
12
- from moviepy.editor import ImageSequenceClip
13
11
  from PIL import Image, ImageDraw, ImageFont
14
12
  from PIL.Image import Image as ImageType
15
13
 
@@ -90,24 +88,6 @@ def rle_decode_array(rle: Dict[str, List[int]]) -> np.ndarray:
90
88
  return binary_mask
91
89
 
92
90
 
93
- def frames_to_bytes(
94
- frames: List[np.ndarray], fps: float = 10, file_ext: str = "mp4"
95
- ) -> bytes:
96
- r"""Convert a list of frames to a video file encoded into a byte string.
97
-
98
- Parameters:
99
- frames: the list of frames
100
- fps: the frames per second of the video
101
- file_ext: the file extension of the video file
102
- """
103
- with tempfile.NamedTemporaryFile(delete=True) as temp_file:
104
- clip = ImageSequenceClip(frames, fps=fps)
105
- clip.write_videofile(temp_file.name + f".{file_ext}", fps=fps, codec="libx264")
106
- with open(temp_file.name + f".{file_ext}", "rb") as f:
107
- buffer_bytes = f.read()
108
- return buffer_bytes
109
-
110
-
111
91
  def b64_to_pil(b64_str: str) -> ImageType:
112
92
  r"""Convert a base64 string to a PIL Image.
113
93
 
@@ -0,0 +1,126 @@
1
+ import base64
2
+ import logging
3
+ import tempfile
4
+ from functools import lru_cache
5
+ from typing import List, Optional, Tuple
6
+
7
+ import cv2
8
+ import av # type: ignore
9
+ import numpy as np
10
+ from decord import VideoReader # type: ignore
11
+
12
+ _LOGGER = logging.getLogger(__name__)
13
+ # The maximum length of the clip to extract frames from, in seconds
14
+
15
+
16
+ def play_video(video_base64: str) -> None:
17
+ """Play a video file"""
18
+ video_data = base64.b64decode(video_base64)
19
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
20
+ temp_video.write(video_data)
21
+ temp_video_path = temp_video.name
22
+
23
+ cap = cv2.VideoCapture(temp_video_path)
24
+ if not cap.isOpened():
25
+ _LOGGER.error("Error: Could not open video.")
26
+ return
27
+
28
+ # Display the first frame and wait for any key press to start the video
29
+ ret, frame = cap.read()
30
+ if ret:
31
+ cv2.imshow("Video Player", frame)
32
+ _LOGGER.info(f"Press any key to start playing the video: {temp_video_path}")
33
+ cv2.waitKey(0) # Wait for any key press
34
+
35
+ while cap.isOpened():
36
+ ret, frame = cap.read()
37
+ if not ret:
38
+ break
39
+ cv2.imshow("Video Player", frame)
40
+ # Press 'q' to exit the video
41
+ if cv2.waitKey(200) & 0xFF == ord("q"):
42
+ break
43
+ cap.release()
44
+ cv2.destroyAllWindows()
45
+
46
+
47
+ def _resize_frame(frame: np.ndarray) -> np.ndarray:
48
+ height, width = frame.shape[:2]
49
+ new_width = width - (width % 2)
50
+ new_height = height - (height % 2)
51
+ return cv2.resize(frame, (new_width, new_height))
52
+
53
+
54
+ def video_writer(
55
+ frames: List[np.ndarray], fps: float = 1.0, filename: Optional[str] = None
56
+ ) -> str:
57
+ if filename is None:
58
+ filename = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
59
+ container = av.open(filename, mode="w")
60
+ stream = container.add_stream("h264", rate=fps)
61
+ height, width = frames[0].shape[:2]
62
+ stream.height = height - (height % 2)
63
+ stream.width = width - (width % 2)
64
+ stream.pix_fmt = "yuv420p"
65
+ for frame in frames:
66
+ # Remove the alpha channel (convert RGBA to RGB)
67
+ frame_rgb = frame[:, :, :3]
68
+ # Resize the frame to make dimensions divisible by 2
69
+ frame_rgb = _resize_frame(frame_rgb)
70
+ av_frame = av.VideoFrame.from_ndarray(frame_rgb, format="rgb24")
71
+ for packet in stream.encode(av_frame):
72
+ container.mux(packet)
73
+
74
+ for packet in stream.encode():
75
+ container.mux(packet)
76
+ container.close()
77
+ return filename
78
+
79
+
80
+ def frames_to_bytes(
81
+ frames: List[np.ndarray], fps: float = 10, file_ext: str = ".mp4"
82
+ ) -> bytes:
83
+ r"""Convert a list of frames to a video file encoded into a byte string.
84
+
85
+ Parameters:
86
+ frames: the list of frames
87
+ fps: the frames per second of the video
88
+ file_ext: the file extension of the video file
89
+ """
90
+ with tempfile.NamedTemporaryFile(delete=True, suffix=file_ext) as temp_file:
91
+ video_writer(frames, fps, temp_file.name)
92
+
93
+ with open(temp_file.name, "rb") as f:
94
+ buffer_bytes = f.read()
95
+ return buffer_bytes
96
+
97
+
98
+ # WARNING: this cache is cache is a little dangerous because if the underlying video
99
+ # contents change but the filename remains the same it will return the old file contents
100
+ # but for vision agent it's unlikely to change the file contents while keeping the
101
+ # same file name and the time savings are very large.
102
+ @lru_cache(maxsize=8)
103
+ def extract_frames_from_video(
104
+ video_uri: str, fps: float = 1.0
105
+ ) -> List[Tuple[np.ndarray, float]]:
106
+ """Extract frames from a video
107
+
108
+ Parameters:
109
+ video_uri (str): the path to the video file or a video file url
110
+ fps (float): the frame rate per second to extract the frames
111
+
112
+ Returns:
113
+ a list of tuples containing the extracted frame and the timestamp in seconds.
114
+ E.g. [(frame1, 0.0), (frame2, 0.5), ...]. The timestamp is the time in seconds
115
+ from the start of the video. E.g. 12.125 means 12.125 seconds from the start of
116
+ the video. The frames are sorted by the timestamp in ascending order.
117
+ """
118
+ vr = VideoReader(video_uri)
119
+ orig_fps = vr.get_avg_fps()
120
+ if fps > orig_fps:
121
+ fps = orig_fps
122
+
123
+ s = orig_fps / fps
124
+ samples = [(int(i * s), int(i * s) / orig_fps) for i in range(int(len(vr) / s))]
125
+ frames = vr.get_batch([s[0] for s in samples]).asnumpy()
126
+ return [(frames[i, :, :, :], samples[i][1]) for i in range(len(samples))]
@@ -1,215 +0,0 @@
1
- import base64
2
- import logging
3
- import math
4
- import os
5
- import tempfile
6
- from concurrent.futures import ProcessPoolExecutor, as_completed
7
- from typing import List, Tuple, cast
8
-
9
- import cv2
10
- import numpy as np
11
- from moviepy.video.io.VideoFileClip import VideoFileClip
12
- from tqdm import tqdm
13
-
14
- _LOGGER = logging.getLogger(__name__)
15
- # The maximum length of the clip to extract frames from, in seconds
16
- _CLIP_LENGTH = 30.0
17
-
18
-
19
- def play_video(video_base64: str) -> None:
20
- """Play a video file"""
21
- video_data = base64.b64decode(video_base64)
22
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
23
- temp_video.write(video_data)
24
- temp_video_path = temp_video.name
25
-
26
- cap = cv2.VideoCapture(temp_video_path)
27
- if not cap.isOpened():
28
- _LOGGER.error("Error: Could not open video.")
29
- return
30
-
31
- # Display the first frame and wait for any key press to start the video
32
- ret, frame = cap.read()
33
- if ret:
34
- cv2.imshow("Video Player", frame)
35
- _LOGGER.info(f"Press any key to start playing the video: {temp_video_path}")
36
- cv2.waitKey(0) # Wait for any key press
37
-
38
- while cap.isOpened():
39
- ret, frame = cap.read()
40
- if not ret:
41
- break
42
- cv2.imshow("Video Player", frame)
43
- # Press 'q' to exit the video
44
- if cv2.waitKey(200) & 0xFF == ord("q"):
45
- break
46
- cap.release()
47
- cv2.destroyAllWindows()
48
-
49
-
50
- def extract_frames_from_video(
51
- video_uri: str, fps: float = 0.5, motion_detection_threshold: float = 0.0
52
- ) -> List[Tuple[np.ndarray, float]]:
53
- """Extract frames from a video
54
-
55
- Parameters:
56
- video_uri: the path to the video file or a video file url
57
- fps: the frame rate per second to extract the frames
58
- motion_detection_threshold: The threshold to detect motion between
59
- changes/frames. A value between 0-1, which represents the percentage change
60
- required for the frames to be considered in motion. For example, a lower
61
- value means more frames will be extracted. A non-positive value will disable
62
- motion detection and extract all frames.
63
-
64
- Returns:
65
- a list of tuples containing the extracted frame and the timestamp in seconds.
66
- E.g. [(frame1, 0.0), (frame2, 0.5), ...]. The timestamp is the time in seconds
67
- from the start of the video. E.g. 12.125 means 12.125 seconds from the start of
68
- the video. The frames are sorted by the timestamp in ascending order.
69
- """
70
- with VideoFileClip(video_uri) as video:
71
- video_duration: float = video.duration
72
- num_workers = os.cpu_count()
73
- clip_length: float = min(video_duration, _CLIP_LENGTH)
74
- start_times = list(range(0, math.ceil(video_duration), math.ceil(clip_length)))
75
- assert start_times, f"No frames to extract from the input video: {video_uri}"
76
- segment_args = [
77
- {
78
- "video_uri": video_uri,
79
- "start": start,
80
- "end": (
81
- start + clip_length if i < len(start_times) - 1 else video_duration
82
- ),
83
- "fps": fps,
84
- "motion_detection_threshold": motion_detection_threshold,
85
- }
86
- for i, start in enumerate(start_times)
87
- ]
88
- if (
89
- cast(float, segment_args[-1]["end"])
90
- - cast(float, segment_args[-1]["start"])
91
- < 1
92
- ):
93
- # If the last segment is less than 1s, merge it with the previous segment
94
- # This is to avoid the failure of the last segment extraction
95
- assert (
96
- len(segment_args) > 1
97
- ), "Development bug - Expect at least 2 segments."
98
- segment_args[-2]["end"] = video_duration
99
- segment_args.pop(-1)
100
- _LOGGER.info(
101
- f"""Created {len(segment_args)} segments from the input video {video_uri} of length {video.duration}s, with clip size: {clip_length}s and {num_workers} workers.
102
- Segments: {segment_args}
103
- """
104
- )
105
- frames = []
106
- with tqdm(total=len(segment_args)) as pbar:
107
- with ProcessPoolExecutor(max_workers=num_workers) as executor:
108
- futures = [
109
- executor.submit(_extract_frames_by_clip, **kwargs) # type: ignore
110
- for kwargs in segment_args
111
- ]
112
- for future in as_completed(futures):
113
- result = future.result()
114
- frames.extend(result)
115
- pbar.update(1)
116
- frames.sort(key=lambda x: x[1])
117
- _LOGGER.info(f"Extracted {len(frames)} frames from video {video_uri}")
118
- return frames
119
-
120
-
121
- def _extract_frames_by_clip(
122
- video_uri: str,
123
- start: int = 0,
124
- end: float = -1,
125
- fps: int = 2,
126
- motion_detection_threshold: float = 0.06,
127
- ) -> List[Tuple[np.ndarray, float]]:
128
- """Extract frames from a video clip with start and end time in seconds.
129
-
130
- Parameters:
131
- video_uri: the path to the video file or a video file url
132
- start: the start time (in seconds) of the clip to extract
133
- end: the end time (in seconds, up to millisecond level precision) of the clip to extract, if -1, extract the whole video
134
- fps: the frame rate to extract the frames
135
- motion_detection_threshold: the threshold to detect the motion between frames
136
- """
137
- with VideoFileClip(video_uri) as video:
138
- source_fps = video.fps
139
- if end <= 0:
140
- end = video.duration
141
- _LOGGER.info(
142
- f"Extracting frames from video {video_uri} ({video.duration}s) with start={start}s and end={end}s"
143
- )
144
- clip = video.subclip(start, end)
145
- processable_frames = int(clip.duration * fps)
146
- _LOGGER.info(
147
- f"Extracting frames from video clip of length {clip.duration}s with FPS={fps} and start_time={start}s. Total number of frames in clip: {processable_frames}"
148
- )
149
- frames = []
150
- total_count, skipped_count = 0, 0
151
- prev_processed_frame = None
152
- pbar = tqdm(
153
- total=processable_frames, desc=f"Extracting frames from clip {start}-{end}"
154
- )
155
- for i, frame in enumerate(clip.iter_frames(fps=fps, dtype="uint8")):
156
- total_count += 1
157
- pbar.update(1)
158
- if motion_detection_threshold > 0:
159
- curr_processed_frame = _preprocess_frame(frame)
160
- # Skip the frame if it is similar to the previous one
161
- if prev_processed_frame is not None and _similar_frame(
162
- prev_processed_frame,
163
- curr_processed_frame,
164
- threshold=motion_detection_threshold,
165
- ):
166
- skipped_count += 1
167
- continue
168
- prev_processed_frame = curr_processed_frame
169
- ts = round(clip.reader.pos / source_fps, 3)
170
- frames.append((frame, ts))
171
-
172
- _LOGGER.info(
173
- f"""Finished!
174
- Frames extracted: {len(frames)}
175
- Extracted frame timestamp: {[f[1] for f in frames]}
176
- Total processed frames: {total_count}
177
- Skipped frames: {skipped_count}
178
- Scan FPS: {fps}
179
- Clip start time: {start}s, {clip.pos}
180
- Clip end time: {end}s
181
- Clip duration: {clip.duration}s
182
- Clip total frames: {clip.duration * source_fps}
183
- Video duration: {video.duration}s
184
- Video FPS: {video.fps}
185
- Video total frames: {video.reader.nframes}"""
186
- )
187
- return frames
188
-
189
-
190
- def _preprocess_frame(frame: np.ndarray) -> np.ndarray:
191
- # Convert to grayscale
192
- frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
193
- frame = cv2.GaussianBlur(src=frame, ksize=(5, 5), sigmaX=0)
194
- return frame
195
-
196
-
197
- def _similar_frame(
198
- prev_frame: np.ndarray, curr_frame: np.ndarray, threshold: float
199
- ) -> bool:
200
- """Detect two frames are similar or not
201
-
202
- Parameters:
203
- threshold: similarity threshold, a value between 0-1, the percentage change that is considered a different frame.
204
- """
205
- # calculate difference and update previous frame TODO: don't assume the processed image is cached
206
- diff_frame = cv2.absdiff(src1=prev_frame, src2=curr_frame)
207
- # Only take different areas that are different enough (>20 / 255)
208
- thresh_frame = cv2.threshold(
209
- src=diff_frame, thresh=20, maxval=255, type=cv2.THRESH_BINARY
210
- )[1]
211
- change_percentage = cv2.countNonZero(thresh_frame) / (
212
- curr_frame.shape[0] * curr_frame.shape[1]
213
- )
214
- _LOGGER.debug(f"Image diff: {change_percentage}")
215
- return change_percentage < threshold
File without changes
File without changes