vidformer 0.11.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vidformer/cv2/__init__.py CHANGED
@@ -19,10 +19,10 @@ except Exception:
19
19
  _opencv2 = None
20
20
 
21
21
  import re
22
- import uuid
23
22
  import zlib
24
23
  from bisect import bisect_right
25
24
  from fractions import Fraction
25
+ import os
26
26
 
27
27
  import numpy as np
28
28
 
@@ -80,21 +80,36 @@ _global_cv2_server = None
80
80
  def _server():
81
81
  global _global_cv2_server
82
82
  if _global_cv2_server is None:
83
- _global_cv2_server = vf.YrdenServer()
83
+ if "VF_IGNI_ENDPOINT" in os.environ:
84
+ server_endpoint = os.environ["VF_IGNI_ENDPOINT"]
85
+ if "VF_IGNI_API_KEY" not in os.environ:
86
+ raise Exception("VF_IGNI_API_KEY must be set")
87
+ api_key = os.environ["VF_IGNI_API_KEY"]
88
+ _global_cv2_server = vf.Server(server_endpoint, api_key)
89
+ else:
90
+ raise Exception(
91
+ "No server set for the cv2 frontend. Set VF_IGNI_ENDPOINT and VF_IGNI_API_KEY environment variables or use cv2.set_server() before use."
92
+ )
84
93
  return _global_cv2_server
85
94
 
86
95
 
87
96
  def set_server(server):
88
97
  """Set the server to use for the cv2 frontend."""
89
98
  global _global_cv2_server
90
- assert isinstance(server, vf.YrdenServer) or isinstance(server, vf.IgniServer)
99
+ assert isinstance(server, vf.Server)
91
100
  _global_cv2_server = server
92
101
 
93
102
 
103
+ def get_server():
104
+ """Get the server used by the cv2 frontend."""
105
+ return _server()
106
+
107
+
94
108
  _PIX_FMT_MAP = {
95
109
  "rgb24": "rgb24",
96
110
  "yuv420p": "rgb24",
97
111
  "yuv422p": "rgb24",
112
+ "yuv422p10le": "rgb24",
98
113
  "yuv444p": "rgb24",
99
114
  "yuvj420p": "rgb24",
100
115
  "yuvj422p": "rgb24",
@@ -149,28 +164,16 @@ class Frame:
149
164
 
150
165
  self._mut()
151
166
  server = _server()
152
- if type(server) is vf.YrdenServer:
153
- spec = vf.YrdenSpec([Fraction(0, 1)], lambda t, i: self._f, self._fmt)
154
- loader = spec.load(_server())
155
-
156
- frame_raster_rgb24 = loader[0]
157
- assert type(frame_raster_rgb24) is bytes
158
- assert len(frame_raster_rgb24) == self.shape[0] * self.shape[1] * 3
159
- raw_data_array = np.frombuffer(frame_raster_rgb24, dtype=np.uint8)
160
- frame = raw_data_array.reshape(self.shape)
167
+ frame = server.frame(
168
+ self.shape[1], self.shape[0], self._fmt["pix_fmt"], self._f
169
+ )
170
+ assert type(frame) is bytes
171
+ assert len(frame) == self.shape[0] * self.shape[1] * self.shape[2]
172
+ raw_data_array = np.frombuffer(frame, dtype=np.uint8)
173
+ frame = raw_data_array.reshape(self.shape)
174
+ if self.shape[2] == 3:
161
175
  frame = frame[:, :, ::-1] # convert RGB to BGR
162
- return frame
163
- else:
164
- frame = server.frame(
165
- self.shape[1], self.shape[0], self._fmt["pix_fmt"], self._f
166
- )
167
- assert type(frame) is bytes
168
- assert len(frame) == self.shape[0] * self.shape[1] * self.shape[2]
169
- raw_data_array = np.frombuffer(frame, dtype=np.uint8)
170
- frame = raw_data_array.reshape(self.shape)
171
- if self.shape[2] == 3:
172
- frame = frame[:, :, ::-1] # convert RGB to BGR
173
- return frame
176
+ return frame
174
177
 
175
178
  def __getitem__(self, key):
176
179
  if not isinstance(key, tuple):
@@ -265,8 +268,12 @@ class Frame:
265
268
  raise NotImplementedError("Only 1-channel mask frames are supported")
266
269
 
267
270
  # Value should be a bgr or bgra color
268
- if type(value) is not list or len(value) not in [3, 4]:
269
- raise NotImplementedError("Value should be a 3 or 4 element list")
271
+ if (type(value) is not list and type(value) is not tuple) or len(
272
+ value
273
+ ) not in [3, 4]:
274
+ raise NotImplementedError(
275
+ "Value should be a 3 or 4 element list or tuple"
276
+ )
270
277
  value = [float(x) for x in value]
271
278
  if len(value) == 3:
272
279
  value.append(255.0)
@@ -312,27 +319,19 @@ class VideoCapture:
312
319
  def __init__(self, path: str):
313
320
  server = _server()
314
321
  if type(path) is str:
315
- if isinstance(server, vf.YrdenServer):
322
+ match = re.match(r"(http|https)://([^/]+)(.*)", path)
323
+ if match is not None:
324
+ endpoint = f"{match.group(1)}://{match.group(2)}"
325
+ path = match.group(3)
326
+ if path.startswith("/"):
327
+ path = path[1:]
316
328
  self._path = path
317
- self._source = vf.YrdenSource(server, str(uuid.uuid4()), path, 0)
329
+ self._source = server.source(path, 0, "http", {"endpoint": endpoint})
318
330
  else:
319
- assert isinstance(server, vf.IgniServer)
320
- match = re.match(r"(http|https)://([^/]+)(.*)", path)
321
- if match is not None:
322
- endpoint = f"{match.group(1)}://{match.group(2)}"
323
- path = match.group(3)
324
- if path.startswith("/"):
325
- path = path[1:]
326
- self._path = path
327
- self._source = server.source(
328
- path, 0, "http", {"endpoint": endpoint}
329
- )
330
- else:
331
- raise Exception(
332
- "Using a VideoCapture source by name only works with http(s) URLs. You need to pass an IgniSource instead."
333
- )
334
- elif isinstance(path, vf.IgniSource):
335
- assert isinstance(server, vf.IgniServer)
331
+ self._path = path
332
+ self._source = server.source(path, 0, "fs", {"root": "."})
333
+ elif isinstance(path, vf.Source):
334
+ assert isinstance(server, vf.Server)
336
335
  self._path = path._name
337
336
  self._source = path
338
337
  self._next_frame_idx = 0
@@ -348,7 +347,7 @@ class VideoCapture:
348
347
  elif prop == CAP_PROP_FRAME_HEIGHT:
349
348
  return self._source.fmt()["height"]
350
349
  elif prop == CAP_PROP_FRAME_COUNT:
351
- return len(self._source.ts())
350
+ return len(self._source)
352
351
  elif prop == CAP_PROP_POS_FRAMES:
353
352
  return self._next_frame_idx
354
353
 
@@ -374,31 +373,25 @@ class VideoCapture:
374
373
  frame = Frame(frame, self._source.fmt())
375
374
  return True, frame
376
375
 
376
+ def __getitem__(self, key):
377
+ if not isinstance(key, int):
378
+ raise NotImplementedError("Only integer indexing is supported")
379
+ if key < 0:
380
+ key = len(self._source) + key
381
+ if key < 0 or key >= len(self._source):
382
+ raise IndexError("Index out of bounds")
383
+ frame = self._source.iloc[key]
384
+ frame = Frame(frame, self._source.fmt())
385
+ return frame
386
+
387
+ def __len__(self):
388
+ return len(self._source)
389
+
377
390
  def release(self):
378
391
  pass
379
392
 
380
393
 
381
394
  class VideoWriter:
382
- def __init__(self, *args, **kwargs):
383
- server = _server()
384
- if isinstance(server, vf.YrdenServer):
385
- self._writer = _YrdenVideoWriter(*args, **kwargs)
386
- elif isinstance(server, vf.IgniServer):
387
- self._writer = _IgniVideoWriter(*args, **kwargs)
388
- else:
389
- raise Exception("Unsupported server type")
390
-
391
- def write(self, *args, **kwargs):
392
- return self._writer.write(*args, **kwargs)
393
-
394
- def release(self, *args, **kwargs):
395
- return self._writer.release(*args, **kwargs)
396
-
397
- def spec(self, *args, **kwargs):
398
- return self._writer.spec(*args, **kwargs)
399
-
400
-
401
- class _IgniVideoWriter:
402
395
  def __init__(
403
396
  self,
404
397
  path,
@@ -408,14 +401,13 @@ class _IgniVideoWriter:
408
401
  batch_size=1024,
409
402
  compression="gzip",
410
403
  ttl=3600,
404
+ pix_fmt="yuv420p",
411
405
  vod_segment_length=Fraction(2, 1),
412
406
  ):
413
407
  server = _server()
414
- assert isinstance(server, vf.IgniServer)
415
- if path is not None:
416
- raise Exception(
417
- "Igni does not support writing to a file. VideoWriter path must be None"
418
- )
408
+ assert isinstance(server, vf.Server)
409
+ assert path is None or type(path) is str
410
+ self._path = path
419
411
  if isinstance(fps, int):
420
412
  self._f_time = Fraction(1, fps)
421
413
  elif isinstance(fps, Fraction):
@@ -425,10 +417,10 @@ class _IgniVideoWriter:
425
417
 
426
418
  assert isinstance(size, tuple) or isinstance(size, list)
427
419
  assert len(size) == 2
428
- height, width = size
420
+ width, height = size
429
421
  assert ttl is None or isinstance(ttl, int)
430
422
  self._spec = server.create_spec(
431
- width, height, "yuv420p", vod_segment_length, 1 / self._f_time, ttl=ttl
423
+ width, height, pix_fmt, vod_segment_length, 1 / self._f_time, ttl=ttl
432
424
  )
433
425
  self._batch_size = batch_size
434
426
  assert compression is None or compression in ["gzip"]
@@ -480,47 +472,9 @@ class _IgniVideoWriter:
480
472
 
481
473
  def release(self):
482
474
  self._flush(True)
483
-
484
-
485
- class _YrdenVideoWriter:
486
- def __init__(self, path, fourcc, fps, size):
487
- assert isinstance(fourcc, VideoWriter_fourcc)
488
- if path is not None and not isinstance(path, str):
489
- raise Exception("path must be a string or None")
490
- self._path = path
491
- self._fourcc = fourcc
492
- self._fps = fps
493
- self._size = size
494
-
495
- self._frames = []
496
- self._pix_fmt = "yuv420p"
497
-
498
- def write(self, frame):
499
- frame = frameify(frame, "frame")
500
-
501
- if frame._fmt["pix_fmt"] != self._pix_fmt:
502
- f_obj = _filter_scale(frame._f, pix_fmt=self._pix_fmt)
503
- self._frames.append(f_obj)
504
- else:
505
- self._frames.append(frame._f)
506
-
507
- def release(self):
508
- if self._path is None:
509
- return
510
-
511
- spec = self.spec()
512
- server = _server()
513
- spec.save(server, self._path)
514
-
515
- def spec(self) -> vf.YrdenSpec:
516
- fmt = {
517
- "width": self._size[1],
518
- "height": self._size[0],
519
- "pix_fmt": self._pix_fmt,
520
- }
521
- domain = _fps_to_ts(self._fps, len(self._frames))
522
- spec = vf.YrdenSpec(domain, lambda t, i: self._frames[i], fmt)
523
- return spec
475
+ if self._path is not None:
476
+ server = _server()
477
+ server.export_spec(self._spec.id(), self._path)
524
478
 
525
479
 
526
480
  class VideoWriter_fourcc:
@@ -552,82 +506,57 @@ def imread(path, *args):
552
506
  assert path.lower().endswith((".jpg", ".jpeg", ".png"))
553
507
  server = _server()
554
508
 
555
- if type(server) is vf.YrdenServer:
556
- source = vf.YrdenSource(server, str(uuid.uuid4()), path, 0)
557
- frame = Frame(source.iloc[0], source.fmt())
558
- return frame
559
- else:
560
- cap = VideoCapture(path)
561
- assert cap.isOpened()
562
- assert len(cap._source) == 1
563
- ret, frame = cap.read()
564
- assert ret
565
- cap.release()
566
- return frame
509
+ cap = VideoCapture(path)
510
+ assert cap.isOpened()
511
+ assert len(cap._source) == 1
512
+ ret, frame = cap.read()
513
+ assert ret
514
+ cap.release()
515
+ return frame
567
516
 
568
517
 
569
518
  def imwrite(path, img, *args):
570
519
  if len(args) > 0:
571
520
  raise NotImplementedError("imwrite does not support additional arguments")
572
521
 
573
- server = _server()
574
- if type(server) is vf.IgniServer:
575
- raise NotImplementedError(
576
- "imwrite is only supported with YrdenServer, not IgniServer"
577
- )
578
-
579
522
  img = frameify(img)
580
-
581
523
  fmt = img._fmt.copy()
582
524
  width = fmt["width"]
583
525
  height = fmt["height"]
584
- f = img._f
585
-
586
- domain = [Fraction(0, 1)]
587
526
 
588
527
  if path.lower().endswith(".png"):
589
- img._mut() # Make sure it's in rgb24
590
- spec = vf.YrdenSpec(
591
- domain,
592
- lambda t, i: img._f,
593
- {"width": width, "height": height, "pix_fmt": "rgb24"},
594
- )
595
- spec.save(_server(), path, encoder="png")
528
+ out_pix_fmt = "rgb24"
529
+ encoder = "png"
596
530
  elif path.lower().endswith((".jpg", ".jpeg")):
597
- if img._modified:
598
- # it's rgb24, we need to convert to something jpeg can handle
599
- f = _filter_scale(img._f, pix_fmt="yuv420p")
600
- fmt["pix_fmt"] = "yuv420p"
531
+ encoder = "mjpeg"
532
+ if img._fmt["pix_fmt"] not in ["yuvj420p", "yuvj422p", "yuvj444p"]:
533
+ out_pix_fmt = "yuvj420p"
601
534
  else:
602
- if fmt["pix_fmt"] not in ["yuvj420p", "yuvj422p", "yuvj444p"]:
603
- f = _filter_scale(img._f, pix_fmt="yuvj420p")
604
- fmt["pix_fmt"] = "yuvj420p"
605
-
606
- spec = vf.YrdenSpec(domain, lambda t, i: f, fmt)
607
- spec.save(server, path, encoder="mjpeg")
535
+ out_pix_fmt = img._fmt["pix_fmt"]
608
536
  else:
609
537
  raise Exception("Unsupported image format")
610
538
 
539
+ if img._fmt["pix_fmt"] != out_pix_fmt:
540
+ f = _filter_scale(img._f, pix_fmt=out_pix_fmt)
541
+ img = Frame(f, {"width": width, "height": height, "pix_fmt": out_pix_fmt})
542
+
543
+ writer = VideoWriter(None, None, 1, (width, height), pix_fmt=out_pix_fmt)
544
+ writer.write(img)
545
+ writer.release()
546
+
547
+ spec = writer.spec()
548
+ server = _server()
549
+ server.export_spec(spec.id(), path, encoder=encoder)
550
+
611
551
 
612
- def vidplay(video, *args, **kwargs):
552
+ def vidplay(video, method=None):
613
553
  """
614
554
  Play a vidformer video specification.
615
-
616
- Args:
617
- video: one of [vidformer.Spec, vidformer.Source, vidformer.cv2.VideoWriter]
618
555
  """
619
- if isinstance(video, vf.YrdenSpec):
620
- return video.play(_server(), *args, **kwargs)
621
- elif isinstance(video, vf.YrdenSource):
622
- return video.play(_server(), *args, **kwargs)
623
- elif isinstance(video, VideoWriter):
624
- return vidplay(video._writer, *args, **kwargs)
625
- elif isinstance(video, _YrdenVideoWriter):
626
- return video.spec().play(_server(), *args, **kwargs)
627
- elif isinstance(video, _IgniVideoWriter):
628
- return video._spec.play(*args, **kwargs)
629
- elif isinstance(video, vf.IgniSpec):
630
- return video.play(*args, **kwargs)
556
+ if isinstance(video, VideoWriter):
557
+ return video.spec().play(method=method)
558
+ elif isinstance(video, vf.Spec):
559
+ return video.play(method=method)
631
560
  else:
632
561
  raise Exception("Unsupported video type to vidplay")
633
562
 
@@ -658,7 +587,7 @@ def resize(src, dsize):
658
587
 
659
588
  assert isinstance(dsize, tuple) or isinstance(dsize, list)
660
589
  assert len(dsize) == 2
661
- height, width = dsize
590
+ width, height = dsize
662
591
 
663
592
  f = _filter_scale(src._f, width=width, height=height)
664
593
  fmt = {"width": width, "height": height, "pix_fmt": src._fmt["pix_fmt"]}
@@ -14,6 +14,11 @@ from supervision.geometry.core import Position
14
14
 
15
15
  import vidformer.cv2 as vf_cv2
16
16
 
17
+ try:
18
+ import cv2 as ocv_cv2
19
+ except ImportError:
20
+ ocv_cv2 = None
21
+
17
22
  CV2_FONT = vf_cv2.FONT_HERSHEY_SIMPLEX
18
23
 
19
24
 
@@ -272,7 +277,6 @@ class DotAnnotator:
272
277
  outline_thickness: int = 0,
273
278
  outline_color=Color.BLACK,
274
279
  ):
275
-
276
280
  self.color = color
277
281
  self.radius: int = radius
278
282
  self.position: Position = position
@@ -537,3 +541,89 @@ class LabelAnnotator:
537
541
  thickness=-1,
538
542
  )
539
543
  return scene
544
+
545
+
546
+ class MaskAnnotator:
547
+ def __init__(
548
+ self,
549
+ color=ColorPalette.DEFAULT,
550
+ opacity: float = 0.5,
551
+ color_lookup: ColorLookup = ColorLookup.CLASS,
552
+ ):
553
+ self.color = color
554
+ self.opacity = opacity
555
+ self.color_lookup: ColorLookup = color_lookup
556
+
557
+ def annotate(
558
+ self,
559
+ scene,
560
+ detections: Detections,
561
+ custom_color_lookup=None,
562
+ ):
563
+ if detections.mask is None:
564
+ return scene
565
+
566
+ colored_mask = scene.copy()
567
+
568
+ for detection_idx in np.flip(np.argsort(detections.box_area)):
569
+ color = resolve_color(
570
+ color=self.color,
571
+ detections=detections,
572
+ detection_idx=detection_idx,
573
+ color_lookup=(
574
+ self.color_lookup
575
+ if custom_color_lookup is None
576
+ else custom_color_lookup
577
+ ),
578
+ )
579
+ mask = detections.mask[detection_idx]
580
+ colored_mask[mask] = color.as_bgr()
581
+
582
+ vf_cv2.addWeighted(
583
+ colored_mask, self.opacity, scene, 1 - self.opacity, 0, dst=scene
584
+ )
585
+ return scene
586
+
587
+
588
+ class MaskStreamWriter:
589
+ def __init__(self, path: str, shape: tuple):
590
+ # Shape should be (width, height)
591
+ assert ocv_cv2 is not None, "OpenCV cv2 is required for ExternDetectionsBuilder"
592
+ assert type(shape) is tuple, "shape must be a tuple"
593
+ assert len(shape) == 2, "shape must be a tuple of length 2"
594
+ self._shape = (shape[1], shape[0])
595
+ self._writer = ocv_cv2.VideoWriter(
596
+ path, ocv_cv2.VideoWriter_fourcc(*"FFV1"), 1, shape, isColor=False
597
+ )
598
+ assert self._writer.isOpened(), f"Failed to open video writer at {path}"
599
+ self._i = 0
600
+
601
+ def write_detections(self, detections: Detections):
602
+ if len(detections) == 0:
603
+ return self._i
604
+
605
+ mask = detections.mask
606
+ assert (
607
+ mask.shape[1:] == self._shape
608
+ ), f"mask shape ({mask.shape[:1]}) must match the shape of the video ({self._shape})"
609
+ for i in range(mask.shape[0]):
610
+ frame_uint8 = detections.mask[i].astype(np.uint8)
611
+ self._writer.write(frame_uint8)
612
+ self._i += 1
613
+ return self._i
614
+
615
+ def release(self):
616
+ self._writer.release()
617
+
618
+
619
+ def populate_mask(
620
+ detections: Detections, mask_stream: vf_cv2.VideoCapture, frame_idx: int
621
+ ):
622
+ assert type(detections) is Detections
623
+ assert detections.mask is None
624
+ detections.mask = []
625
+ assert len(detections) + frame_idx <= len(mask_stream)
626
+ for i in range(len(detections)):
627
+ mask = mask_stream[frame_idx + i]
628
+ assert mask.shape[2] == 1, "mask must be a single channel image"
629
+ detections.mask.append(mask)
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: vidformer
3
- Version: 0.11.0
3
+ Version: 1.0.0
4
4
  Summary: vidformer-py is a Python 🐍 interface for [vidformer](https://github.com/ixlab/vidformer).
5
5
  Author-email: Dominik Winecki <dominikwinecki@gmail.com>
6
6
  Requires-Python: >=3.8
@@ -8,7 +8,6 @@ Description-Content-Type: text/markdown
8
8
  Classifier: Programming Language :: Python :: 3
9
9
  Classifier: Operating System :: OS Independent
10
10
  Requires-Dist: requests
11
- Requires-Dist: msgpack
12
11
  Requires-Dist: numpy
13
12
  Project-URL: Documentation, https://ixlab.github.io/vidformer/vidformer-py/
14
13
  Project-URL: Homepage, https://ixlab.github.io/vidformer/
@@ -0,0 +1,6 @@
1
+ vidformer/__init__.py,sha256=kL_qU6iIV-XOrw4yk2SwCYA2YJIck3fS6Nqonnj55uA,29972
2
+ vidformer/cv2/__init__.py,sha256=yBlWAQpbIbL4RgIDN6T_p-7JLERuKOs_m_8mqC55jak,26827
3
+ vidformer/supervision/__init__.py,sha256=dRHAcHiZN68gUH_2m3o7Ohsv3NBGxF4XGPeI0pn2_K4,20346
4
+ vidformer-1.0.0.dist-info/WHEEL,sha256=_2ozNFCLWc93bK4WKHCO-eDUENDlo-dgc9cU3qokYO4,82
5
+ vidformer-1.0.0.dist-info/METADATA,sha256=zSQ1w9B6S4cToRYM3cpVQ3MG8dY7oop09YtS9aI-Z2E,1776
6
+ vidformer-1.0.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: flit 3.10.1
2
+ Generator: flit 3.11.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,6 +0,0 @@
1
- vidformer/__init__.py,sha256=lbbyaiV57QsaXmvHfrz_RXLaRnFMfm5ulK2dN701X-E,55465
2
- vidformer/cv2/__init__.py,sha256=9J_PV306rHYlf4FgBeQqJnlJJ6d2Mcb9s0TfiH8fASA,29528
3
- vidformer/supervision/__init__.py,sha256=KR-keBgDG29TSyIFU4Czgd8Yc5qckJKlSaMcPj_z-Zc,17490
4
- vidformer-0.11.0.dist-info/WHEEL,sha256=CpUCUxeHQbRN5UGRQHYRJorO5Af-Qy_fHMctcQ8DSGI,82
5
- vidformer-0.11.0.dist-info/METADATA,sha256=K3-g51c1iXRrkmqRwoYLUN8uJThtSCkjMs7kzr2SvNw,1800
6
- vidformer-0.11.0.dist-info/RECORD,,