vision-agent 0.2.124__tar.gz → 0.2.126__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. {vision_agent-0.2.124 → vision_agent-0.2.126}/PKG-INFO +2 -2
  2. {vision_agent-0.2.124 → vision_agent-0.2.126}/pyproject.toml +2 -4
  3. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/agent/vision_agent_coder.py +1 -1
  4. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/tools/tools.py +15 -16
  5. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/utils/__init__.py +1 -1
  6. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/utils/image_utils.py +0 -20
  7. vision_agent-0.2.126/vision_agent/utils/video.py +107 -0
  8. vision_agent-0.2.124/vision_agent/utils/video.py +0 -215
  9. {vision_agent-0.2.124 → vision_agent-0.2.126}/LICENSE +0 -0
  10. {vision_agent-0.2.124 → vision_agent-0.2.126}/README.md +0 -0
  11. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/__init__.py +0 -0
  12. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/agent/__init__.py +0 -0
  13. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/agent/agent.py +0 -0
  14. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/agent/agent_utils.py +0 -0
  15. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/agent/vision_agent.py +0 -0
  16. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
  17. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/agent/vision_agent_prompts.py +0 -0
  18. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/clients/__init__.py +0 -0
  19. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/clients/http.py +0 -0
  20. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/clients/landing_public_api.py +0 -0
  21. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/fonts/__init__.py +0 -0
  22. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  23. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/lmm/__init__.py +0 -0
  24. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/lmm/lmm.py +0 -0
  25. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/lmm/types.py +0 -0
  26. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/tools/__init__.py +0 -0
  27. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/tools/meta_tools.py +0 -0
  28. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/tools/prompts.py +0 -0
  29. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/tools/tool_utils.py +0 -0
  30. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/tools/tools_types.py +0 -0
  31. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/utils/exceptions.py +0 -0
  32. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/utils/execute.py +0 -0
  33. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/utils/sim.py +0 -0
  34. {vision_agent-0.2.124 → vision_agent-0.2.126}/vision_agent/utils/type_defs.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.124
3
+ Version: 0.2.126
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -12,9 +12,9 @@ Classifier: Programming Language :: Python :: 3.11
12
12
  Requires-Dist: anthropic (>=0.31.0,<0.32.0)
13
13
  Requires-Dist: e2b (>=0.17.2a50,<0.18.0)
14
14
  Requires-Dist: e2b-code-interpreter (==0.0.11a37)
15
+ Requires-Dist: eva-decord (>=0.6.1,<0.7.0)
15
16
  Requires-Dist: ipykernel (>=6.29.4,<7.0.0)
16
17
  Requires-Dist: langsmith (>=0.1.58,<0.2.0)
17
- Requires-Dist: moviepy (>=1.0.0,<2.0.0)
18
18
  Requires-Dist: nbclient (>=0.10.0,<0.11.0)
19
19
  Requires-Dist: nbformat (>=5.10.4,<6.0.0)
20
20
  Requires-Dist: numpy (>=1.21.0,<2.0.0)
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.124"
7
+ version = "0.2.126"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -25,7 +25,6 @@ tqdm = ">=4.64.0,<5.0.0"
25
25
  pandas = "2.*"
26
26
  openai = "1.*"
27
27
  typing_extensions = "4.*"
28
- moviepy = "1.*"
29
28
  opencv-python = "4.*"
30
29
  tabulate = "^0.9.0"
31
30
  pydantic-settings = "^2.2.1"
@@ -42,6 +41,7 @@ pillow-heif = "^0.16.0"
42
41
  pytube = "15.0.0"
43
42
  anthropic = "^0.31.0"
44
43
  pydantic = "2.7.4"
44
+ eva-decord = "^0.6.1"
45
45
 
46
46
  [tool.poetry.group.dev.dependencies]
47
47
  autoflake = "1.*"
@@ -100,10 +100,8 @@ show_error_codes = true
100
100
  ignore_missing_imports = true
101
101
  module = [
102
102
  "cv2.*",
103
- "faiss.*",
104
103
  "openai.*",
105
104
  "sentence_transformers.*",
106
- "moviepy.*",
107
105
  "e2b_code_interpreter.*",
108
106
  "e2b.*"
109
107
  ]
@@ -173,7 +173,7 @@ def pick_plan(
173
173
 
174
174
  if verbosity == 2:
175
175
  _print_code("Initial code and tests:", code)
176
- _LOGGER.info(f"Initial code execution result:\n{tool_output.text()}")
176
+ _LOGGER.info(f"Initial code execution result:\n{tool_output_str}")
177
177
 
178
178
  log_progress(
179
179
  {
@@ -12,7 +12,6 @@ from uuid import UUID
12
12
  import cv2
13
13
  import numpy as np
14
14
  import requests
15
- from moviepy.editor import ImageSequenceClip
16
15
  from PIL import Image, ImageDraw, ImageEnhance, ImageFont
17
16
  from pillow_heif import register_heif_opener # type: ignore
18
17
  from pytube import YouTube # type: ignore
@@ -35,7 +34,6 @@ from vision_agent.tools.tools_types import (
35
34
  ODResponseData,
36
35
  PromptTask,
37
36
  )
38
- from vision_agent.utils import extract_frames_from_video
39
37
  from vision_agent.utils.exceptions import FineTuneModelIsNotReady
40
38
  from vision_agent.utils.execute import FileSerializer, MimeType
41
39
  from vision_agent.utils.image_utils import (
@@ -44,13 +42,17 @@ from vision_agent.utils.image_utils import (
44
42
  convert_to_b64,
45
43
  denormalize_bbox,
46
44
  encode_image_bytes,
47
- frames_to_bytes,
48
45
  get_image_size,
49
46
  normalize_bbox,
50
47
  numpy_to_bytes,
51
48
  rle_decode,
52
49
  rle_decode_array,
53
50
  )
51
+ from vision_agent.utils.video import (
52
+ extract_frames_from_video,
53
+ frames_to_bytes,
54
+ video_writer,
55
+ )
54
56
 
55
57
  register_heif_opener()
56
58
 
@@ -1513,17 +1515,14 @@ def save_video(
1513
1515
  "/tmp/tmpvideo123.mp4"
1514
1516
  """
1515
1517
  if fps <= 0:
1516
- _LOGGER.warning(f"Invalid fps value: {fps}. Setting fps to 4 (default value).")
1517
- fps = 4
1518
- with ImageSequenceClip(frames, fps=fps) as video:
1519
- if output_video_path:
1520
- f = open(output_video_path, "wb")
1521
- else:
1522
- f = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) # type: ignore
1523
- video.write_videofile(f.name, codec="libx264")
1524
- f.close()
1525
- _save_video_to_result(f.name)
1526
- return f.name
1518
+ raise ValueError(f"fps must be greater than 0 got {fps}")
1519
+
1520
+ if output_video_path is None:
1521
+ output_video_path = tempfile.NamedTemporaryFile(delete=False).name
1522
+
1523
+ output_video_path = video_writer(frames, fps, output_video_path)
1524
+ _save_video_to_result(output_video_path)
1525
+ return output_video_path
1527
1526
 
1528
1527
 
1529
1528
  def _save_video_to_result(video_uri: str) -> None:
@@ -1820,7 +1819,6 @@ def overlay_counting_results(
1820
1819
 
1821
1820
  FUNCTION_TOOLS = [
1822
1821
  owl_v2,
1823
- extract_frames,
1824
1822
  ocr,
1825
1823
  clip,
1826
1824
  vit_image_classification,
@@ -1841,6 +1839,7 @@ FUNCTION_TOOLS = [
1841
1839
  ]
1842
1840
 
1843
1841
  UTIL_TOOLS = [
1842
+ extract_frames,
1844
1843
  save_json,
1845
1844
  load_image,
1846
1845
  save_image,
@@ -1856,7 +1855,7 @@ TOOLS = FUNCTION_TOOLS + UTIL_TOOLS
1856
1855
  TOOLS_DF = get_tools_df(TOOLS) # type: ignore
1857
1856
  TOOL_DESCRIPTIONS = get_tool_descriptions(TOOLS) # type: ignore
1858
1857
  TOOL_DOCSTRING = get_tool_documentation(TOOLS) # type: ignore
1859
- TOOLS_INFO = get_tools_info(TOOLS) # type: ignore
1858
+ TOOLS_INFO = get_tools_info(FUNCTION_TOOLS) # type: ignore
1860
1859
  UTILITIES_DOCSTRING = get_tool_documentation(
1861
1860
  [
1862
1861
  save_json,
@@ -7,4 +7,4 @@ from .execute import (
7
7
  Result,
8
8
  )
9
9
  from .sim import AzureSim, OllamaSim, Sim, load_sim, merge_sim
10
- from .video import extract_frames_from_video
10
+ from .video import extract_frames_from_video, video_writer
@@ -2,14 +2,12 @@
2
2
 
3
3
  import base64
4
4
  import io
5
- import tempfile
6
5
  from importlib import resources
7
6
  from io import BytesIO
8
7
  from pathlib import Path
9
8
  from typing import Dict, List, Tuple, Union
10
9
 
11
10
  import numpy as np
12
- from moviepy.editor import ImageSequenceClip
13
11
  from PIL import Image, ImageDraw, ImageFont
14
12
  from PIL.Image import Image as ImageType
15
13
 
@@ -90,24 +88,6 @@ def rle_decode_array(rle: Dict[str, List[int]]) -> np.ndarray:
90
88
  return binary_mask
91
89
 
92
90
 
93
- def frames_to_bytes(
94
- frames: List[np.ndarray], fps: float = 10, file_ext: str = "mp4"
95
- ) -> bytes:
96
- r"""Convert a list of frames to a video file encoded into a byte string.
97
-
98
- Parameters:
99
- frames: the list of frames
100
- fps: the frames per second of the video
101
- file_ext: the file extension of the video file
102
- """
103
- with tempfile.NamedTemporaryFile(delete=True) as temp_file:
104
- clip = ImageSequenceClip(frames, fps=fps)
105
- clip.write_videofile(temp_file.name + f".{file_ext}", fps=fps, codec="libx264")
106
- with open(temp_file.name + f".{file_ext}", "rb") as f:
107
- buffer_bytes = f.read()
108
- return buffer_bytes
109
-
110
-
111
91
  def b64_to_pil(b64_str: str) -> ImageType:
112
92
  r"""Convert a base64 string to a PIL Image.
113
93
 
@@ -0,0 +1,107 @@
1
+ import base64
2
+ import logging
3
+ import tempfile
4
+ from functools import lru_cache
5
+ from typing import List, Optional, Tuple
6
+
7
+ import cv2
8
+ import numpy as np
9
+ from decord import VideoReader # type: ignore
10
+
11
+ _LOGGER = logging.getLogger(__name__)
12
+ # The maximum length of the clip to extract frames from, in seconds
13
+
14
+
15
+ def play_video(video_base64: str) -> None:
16
+ """Play a video file"""
17
+ video_data = base64.b64decode(video_base64)
18
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
19
+ temp_video.write(video_data)
20
+ temp_video_path = temp_video.name
21
+
22
+ cap = cv2.VideoCapture(temp_video_path)
23
+ if not cap.isOpened():
24
+ _LOGGER.error("Error: Could not open video.")
25
+ return
26
+
27
+ # Display the first frame and wait for any key press to start the video
28
+ ret, frame = cap.read()
29
+ if ret:
30
+ cv2.imshow("Video Player", frame)
31
+ _LOGGER.info(f"Press any key to start playing the video: {temp_video_path}")
32
+ cv2.waitKey(0) # Wait for any key press
33
+
34
+ while cap.isOpened():
35
+ ret, frame = cap.read()
36
+ if not ret:
37
+ break
38
+ cv2.imshow("Video Player", frame)
39
+ # Press 'q' to exit the video
40
+ if cv2.waitKey(200) & 0xFF == ord("q"):
41
+ break
42
+ cap.release()
43
+ cv2.destroyAllWindows()
44
+
45
+
46
+ def video_writer(
47
+ frames: List[np.ndarray], fps: float = 1.0, filename: Optional[str] = None
48
+ ) -> str:
49
+ if filename is None:
50
+ filename = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
51
+
52
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v") # type: ignore
53
+ height, width = frames[0].shape[:2]
54
+ writer = cv2.VideoWriter(filename, fourcc, fps, (width, height))
55
+ for frame in frames:
56
+ writer.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
57
+ writer.release()
58
+ return filename
59
+
60
+
61
+ def frames_to_bytes(
62
+ frames: List[np.ndarray], fps: float = 10, file_ext: str = ".mp4"
63
+ ) -> bytes:
64
+ r"""Convert a list of frames to a video file encoded into a byte string.
65
+
66
+ Parameters:
67
+ frames: the list of frames
68
+ fps: the frames per second of the video
69
+ file_ext: the file extension of the video file
70
+ """
71
+ with tempfile.NamedTemporaryFile(delete=True, suffix=file_ext) as temp_file:
72
+ video_writer(frames, fps, temp_file.name)
73
+
74
+ with open(temp_file.name, "rb") as f:
75
+ buffer_bytes = f.read()
76
+ return buffer_bytes
77
+
78
+
79
+ # WARNING: this cache is cache is a little dangerous because if the underlying video
80
+ # contents change but the filename remains the same it will return the old file contents
81
+ # but for vision agent it's unlikely to change the file contents while keeping the
82
+ # same file name and the time savings are very large.
83
+ @lru_cache(maxsize=8)
84
+ def extract_frames_from_video(
85
+ video_uri: str, fps: float = 1.0
86
+ ) -> List[Tuple[np.ndarray, float]]:
87
+ """Extract frames from a video
88
+
89
+ Parameters:
90
+ video_uri (str): the path to the video file or a video file url
91
+ fps (float): the frame rate per second to extract the frames
92
+
93
+ Returns:
94
+ a list of tuples containing the extracted frame and the timestamp in seconds.
95
+ E.g. [(frame1, 0.0), (frame2, 0.5), ...]. The timestamp is the time in seconds
96
+ from the start of the video. E.g. 12.125 means 12.125 seconds from the start of
97
+ the video. The frames are sorted by the timestamp in ascending order.
98
+ """
99
+ vr = VideoReader(video_uri)
100
+ orig_fps = vr.get_avg_fps()
101
+ if fps > orig_fps:
102
+ fps = orig_fps
103
+
104
+ s = orig_fps / fps
105
+ samples = [(int(i * s), int(i * s) / orig_fps) for i in range(int(len(vr) / s))]
106
+ frames = vr.get_batch([s[0] for s in samples]).asnumpy()
107
+ return [(frames[i, :, :, :], samples[i][1]) for i in range(len(samples))]
@@ -1,215 +0,0 @@
1
- import base64
2
- import logging
3
- import math
4
- import os
5
- import tempfile
6
- from concurrent.futures import ProcessPoolExecutor, as_completed
7
- from typing import List, Tuple, cast
8
-
9
- import cv2
10
- import numpy as np
11
- from moviepy.video.io.VideoFileClip import VideoFileClip
12
- from tqdm import tqdm
13
-
14
- _LOGGER = logging.getLogger(__name__)
15
- # The maximum length of the clip to extract frames from, in seconds
16
- _CLIP_LENGTH = 30.0
17
-
18
-
19
- def play_video(video_base64: str) -> None:
20
- """Play a video file"""
21
- video_data = base64.b64decode(video_base64)
22
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
23
- temp_video.write(video_data)
24
- temp_video_path = temp_video.name
25
-
26
- cap = cv2.VideoCapture(temp_video_path)
27
- if not cap.isOpened():
28
- _LOGGER.error("Error: Could not open video.")
29
- return
30
-
31
- # Display the first frame and wait for any key press to start the video
32
- ret, frame = cap.read()
33
- if ret:
34
- cv2.imshow("Video Player", frame)
35
- _LOGGER.info(f"Press any key to start playing the video: {temp_video_path}")
36
- cv2.waitKey(0) # Wait for any key press
37
-
38
- while cap.isOpened():
39
- ret, frame = cap.read()
40
- if not ret:
41
- break
42
- cv2.imshow("Video Player", frame)
43
- # Press 'q' to exit the video
44
- if cv2.waitKey(200) & 0xFF == ord("q"):
45
- break
46
- cap.release()
47
- cv2.destroyAllWindows()
48
-
49
-
50
- def extract_frames_from_video(
51
- video_uri: str, fps: float = 0.5, motion_detection_threshold: float = 0.0
52
- ) -> List[Tuple[np.ndarray, float]]:
53
- """Extract frames from a video
54
-
55
- Parameters:
56
- video_uri: the path to the video file or a video file url
57
- fps: the frame rate per second to extract the frames
58
- motion_detection_threshold: The threshold to detect motion between
59
- changes/frames. A value between 0-1, which represents the percentage change
60
- required for the frames to be considered in motion. For example, a lower
61
- value means more frames will be extracted. A non-positive value will disable
62
- motion detection and extract all frames.
63
-
64
- Returns:
65
- a list of tuples containing the extracted frame and the timestamp in seconds.
66
- E.g. [(frame1, 0.0), (frame2, 0.5), ...]. The timestamp is the time in seconds
67
- from the start of the video. E.g. 12.125 means 12.125 seconds from the start of
68
- the video. The frames are sorted by the timestamp in ascending order.
69
- """
70
- with VideoFileClip(video_uri) as video:
71
- video_duration: float = video.duration
72
- num_workers = os.cpu_count()
73
- clip_length: float = min(video_duration, _CLIP_LENGTH)
74
- start_times = list(range(0, math.ceil(video_duration), math.ceil(clip_length)))
75
- assert start_times, f"No frames to extract from the input video: {video_uri}"
76
- segment_args = [
77
- {
78
- "video_uri": video_uri,
79
- "start": start,
80
- "end": (
81
- start + clip_length if i < len(start_times) - 1 else video_duration
82
- ),
83
- "fps": fps,
84
- "motion_detection_threshold": motion_detection_threshold,
85
- }
86
- for i, start in enumerate(start_times)
87
- ]
88
- if (
89
- cast(float, segment_args[-1]["end"])
90
- - cast(float, segment_args[-1]["start"])
91
- < 1
92
- ):
93
- # If the last segment is less than 1s, merge it with the previous segment
94
- # This is to avoid the failure of the last segment extraction
95
- assert (
96
- len(segment_args) > 1
97
- ), "Development bug - Expect at least 2 segments."
98
- segment_args[-2]["end"] = video_duration
99
- segment_args.pop(-1)
100
- _LOGGER.info(
101
- f"""Created {len(segment_args)} segments from the input video {video_uri} of length {video.duration}s, with clip size: {clip_length}s and {num_workers} workers.
102
- Segments: {segment_args}
103
- """
104
- )
105
- frames = []
106
- with tqdm(total=len(segment_args)) as pbar:
107
- with ProcessPoolExecutor(max_workers=num_workers) as executor:
108
- futures = [
109
- executor.submit(_extract_frames_by_clip, **kwargs) # type: ignore
110
- for kwargs in segment_args
111
- ]
112
- for future in as_completed(futures):
113
- result = future.result()
114
- frames.extend(result)
115
- pbar.update(1)
116
- frames.sort(key=lambda x: x[1])
117
- _LOGGER.info(f"Extracted {len(frames)} frames from video {video_uri}")
118
- return frames
119
-
120
-
121
- def _extract_frames_by_clip(
122
- video_uri: str,
123
- start: int = 0,
124
- end: float = -1,
125
- fps: int = 2,
126
- motion_detection_threshold: float = 0.06,
127
- ) -> List[Tuple[np.ndarray, float]]:
128
- """Extract frames from a video clip with start and end time in seconds.
129
-
130
- Parameters:
131
- video_uri: the path to the video file or a video file url
132
- start: the start time (in seconds) of the clip to extract
133
- end: the end time (in seconds, up to millisecond level precision) of the clip to extract, if -1, extract the whole video
134
- fps: the frame rate to extract the frames
135
- motion_detection_threshold: the threshold to detect the motion between frames
136
- """
137
- with VideoFileClip(video_uri) as video:
138
- source_fps = video.fps
139
- if end <= 0:
140
- end = video.duration
141
- _LOGGER.info(
142
- f"Extracting frames from video {video_uri} ({video.duration}s) with start={start}s and end={end}s"
143
- )
144
- clip = video.subclip(start, end)
145
- processable_frames = int(clip.duration * fps)
146
- _LOGGER.info(
147
- f"Extracting frames from video clip of length {clip.duration}s with FPS={fps} and start_time={start}s. Total number of frames in clip: {processable_frames}"
148
- )
149
- frames = []
150
- total_count, skipped_count = 0, 0
151
- prev_processed_frame = None
152
- pbar = tqdm(
153
- total=processable_frames, desc=f"Extracting frames from clip {start}-{end}"
154
- )
155
- for i, frame in enumerate(clip.iter_frames(fps=fps, dtype="uint8")):
156
- total_count += 1
157
- pbar.update(1)
158
- if motion_detection_threshold > 0:
159
- curr_processed_frame = _preprocess_frame(frame)
160
- # Skip the frame if it is similar to the previous one
161
- if prev_processed_frame is not None and _similar_frame(
162
- prev_processed_frame,
163
- curr_processed_frame,
164
- threshold=motion_detection_threshold,
165
- ):
166
- skipped_count += 1
167
- continue
168
- prev_processed_frame = curr_processed_frame
169
- ts = round(clip.reader.pos / source_fps, 3)
170
- frames.append((frame, ts))
171
-
172
- _LOGGER.info(
173
- f"""Finished!
174
- Frames extracted: {len(frames)}
175
- Extracted frame timestamp: {[f[1] for f in frames]}
176
- Total processed frames: {total_count}
177
- Skipped frames: {skipped_count}
178
- Scan FPS: {fps}
179
- Clip start time: {start}s, {clip.pos}
180
- Clip end time: {end}s
181
- Clip duration: {clip.duration}s
182
- Clip total frames: {clip.duration * source_fps}
183
- Video duration: {video.duration}s
184
- Video FPS: {video.fps}
185
- Video total frames: {video.reader.nframes}"""
186
- )
187
- return frames
188
-
189
-
190
- def _preprocess_frame(frame: np.ndarray) -> np.ndarray:
191
- # Convert to grayscale
192
- frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
193
- frame = cv2.GaussianBlur(src=frame, ksize=(5, 5), sigmaX=0)
194
- return frame
195
-
196
-
197
- def _similar_frame(
198
- prev_frame: np.ndarray, curr_frame: np.ndarray, threshold: float
199
- ) -> bool:
200
- """Detect two frames are similar or not
201
-
202
- Parameters:
203
- threshold: similarity threshold, a value between 0-1, the percentage change that is considered a different frame.
204
- """
205
- # calculate difference and update previous frame TODO: don't assume the processed image is cached
206
- diff_frame = cv2.absdiff(src1=prev_frame, src2=curr_frame)
207
- # Only take different areas that are different enough (>20 / 255)
208
- thresh_frame = cv2.threshold(
209
- src=diff_frame, thresh=20, maxval=255, type=cv2.THRESH_BINARY
210
- )[1]
211
- change_percentage = cv2.countNonZero(thresh_frame) / (
212
- curr_frame.shape[0] * curr_frame.shape[1]
213
- )
214
- _LOGGER.debug(f"Image diff: {change_percentage}")
215
- return change_percentage < threshold
File without changes
File without changes