vuer-cli 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,434 @@
1
+ """Point cloud utilities for RGB-D visualization.
2
+
3
+ Provides classes and functions for:
4
+ - Loading camera intrinsics and transforms from extracted MCAP data
5
+ - Interpolating robot poses over time
6
+ - Backprojecting RGB-D images to 3D point clouds
7
+ - Coordinate system transformations (OpenCV/ROS to Three.js)
8
+ """
9
+
10
+ import json
11
+ from pathlib import Path
12
+ from typing import Dict, Optional, Tuple
13
+
14
+ import numpy as np
15
+ import pandas as pd
16
+
17
+
18
+ def matrix_to_three_js(mat: np.ndarray) -> list:
19
+ """Convert a 4x4 transformation matrix to Three.js column-major format."""
20
+ CONVERSION_INDICES = [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15]
21
+ return mat.flatten()[CONVERSION_INDICES].tolist()
22
+
23
+
24
+ class CoordinateTransform:
25
+ """Handle coordinate system transformations between OpenCV/ROS and Three.js.
26
+
27
+ OpenCV/ROS camera frame:
28
+ +X: right, +Y: down, +Z: forward
29
+
30
+ Three.js with up=[0, 0, 1]:
31
+ +X: right, +Y: backward, +Z: up
32
+
33
+ The transformation flips X and Z axes to convert between these systems.
34
+ """
35
+
36
+ def __init__(self):
37
+ """Initialize the coordinate transformation matrices."""
38
+ self.flip_xz_3x3 = np.array([[-1, 0, 0], [0, 1, 0], [0, 0, -1]], dtype=np.float64)
39
+ self.flip_xz_4x4 = np.eye(4, dtype=np.float64)
40
+ self.flip_xz_4x4[:3, :3] = self.flip_xz_3x3
41
+
42
+ def transform_points(self, points: np.ndarray) -> np.ndarray:
43
+ """Transform 3D points from camera frame to Three.js frame."""
44
+ return points @ self.flip_xz_3x3.T
45
+
46
+ def transform_pose(self, pose: np.ndarray, rotation_only: bool = False) -> np.ndarray:
47
+ """Transform a 4x4 pose matrix from camera frame to Three.js frame."""
48
+ if rotation_only:
49
+ result = pose.copy()
50
+ result[:3, :3] = self.flip_xz_3x3 @ pose[:3, :3]
51
+ return result
52
+ return self.flip_xz_4x4 @ pose @ self.flip_xz_4x4
53
+
54
+ def to_threejs_matrix(self, pose: np.ndarray, rotation_only: bool = False) -> list:
55
+ """Transform pose and convert to Three.js column-major format."""
56
+ transformed = self.transform_pose(pose, rotation_only=rotation_only)
57
+ return matrix_to_three_js(transformed)
58
+
59
+
60
+ def load_camera_intrinsics(data_dir: Path, camera_name: str) -> Dict:
61
+ """Load camera intrinsics for a specific camera.
62
+
63
+ Args:
64
+ data_dir: Path to extracted data directory
65
+ camera_name: Name of the camera (e.g., 'nav_front_d455')
66
+
67
+ Returns:
68
+ dict: Camera intrinsics containing:
69
+ - width, height: int - Image resolution
70
+ - fx, fy: float - Focal lengths in pixels
71
+ - cx, cy: float - Principal point in pixels
72
+ - K: np.ndarray (3, 3) - Camera calibration matrix
73
+ - D: np.ndarray or None - Distortion coefficients
74
+ """
75
+ with open(data_dir / "camera_intrinsics.json") as f:
76
+ all_intrinsics = json.load(f)
77
+
78
+ key = f"{camera_name}_color_camera_info"
79
+ if key not in all_intrinsics:
80
+ raise ValueError(f"Camera {key} not found in intrinsics")
81
+
82
+ intr = all_intrinsics[key]
83
+ K = np.array(intr["K"]).reshape(3, 3)
84
+
85
+ return {
86
+ "width": intr["width"],
87
+ "height": intr["height"],
88
+ "fx": K[0, 0],
89
+ "fy": K[1, 1],
90
+ "cx": K[0, 2],
91
+ "cy": K[1, 2],
92
+ "K": K,
93
+ "D": np.array(intr["D"]) if intr["D"] else None,
94
+ }
95
+
96
+
97
+ def load_robot_poses(data_dir: Path) -> "TransformInterpolator":
98
+ """Load robot pose transforms (shared by all cameras).
99
+
100
+ Args:
101
+ data_dir: Path to extracted data directory
102
+
103
+ Returns:
104
+ TransformInterpolator: Time-varying robot poses
105
+ """
106
+ transforms_path = data_dir / "transforms" / "transforms.csv"
107
+ odometry_path = data_dir / "odometry.csv"
108
+
109
+ robot_poses = None
110
+
111
+ if transforms_path.exists() and transforms_path.stat().st_size > 0:
112
+ tf = pd.read_csv(transforms_path)
113
+ robot_poses = tf[(tf["parent_frame"] == "localization") & (tf["child_frame"] == "base_link")].copy()
114
+ if len(robot_poses) == 0:
115
+ robot_poses = None
116
+
117
+ if robot_poses is None and odometry_path.exists():
118
+ odom = pd.read_csv(odometry_path)
119
+ robot_poses = pd.DataFrame(
120
+ {
121
+ "timestamp_s": odom["timestamp_s"],
122
+ "parent_frame": odom["frame_id"],
123
+ "child_frame": odom["child_frame_id"],
124
+ "translation_x": odom["position_x"],
125
+ "translation_y": odom["position_y"],
126
+ "translation_z": odom["position_z"],
127
+ "rotation_x": odom["orientation_x"],
128
+ "rotation_y": odom["orientation_y"],
129
+ "rotation_z": odom["orientation_z"],
130
+ "rotation_w": odom["orientation_w"],
131
+ }
132
+ )
133
+
134
+ if robot_poses is None or len(robot_poses) == 0:
135
+ raise FileNotFoundError(
136
+ f"Could not find robot pose data. Looked for:\n"
137
+ f" - {transforms_path} (with 'localization -> base_link' transforms)\n"
138
+ f" - {odometry_path}\n"
139
+ f"Please ensure you have extracted the MCAP data correctly."
140
+ )
141
+
142
+ return TransformInterpolator(robot_poses)
143
+
144
+
145
+ def load_static_transforms(data_dir: Path, camera_name: str) -> "StaticTransformChain":
146
+ """Load static camera mounting transforms for a specific camera.
147
+
148
+ Args:
149
+ data_dir: Path to extracted data directory
150
+ camera_name: Name of the camera (e.g., 'nav_front_d455')
151
+
152
+ Returns:
153
+ StaticTransformChain: Chain of static transforms from base_link to camera
154
+ """
155
+ transforms_path = data_dir / "transforms" / "transforms.csv"
156
+
157
+ if transforms_path.exists() and transforms_path.stat().st_size > 0:
158
+ tf = pd.read_csv(transforms_path)
159
+ static_tf = tf[tf["topic"] == "/tf_static"].copy()
160
+ if len(static_tf) == 0:
161
+ static_tf = tf.copy()
162
+ else:
163
+ static_tf = pd.DataFrame(
164
+ columns=[
165
+ "timestamp_s",
166
+ "topic",
167
+ "parent_frame",
168
+ "child_frame",
169
+ "translation_x",
170
+ "translation_y",
171
+ "translation_z",
172
+ "rotation_x",
173
+ "rotation_y",
174
+ "rotation_z",
175
+ "rotation_w",
176
+ ]
177
+ )
178
+
179
+ return StaticTransformChain(static_tf, camera_name)
180
+
181
+
182
+ class TransformInterpolator:
183
+ """Interpolate robot poses between transform messages."""
184
+
185
+ def __init__(self, transforms_df: pd.DataFrame):
186
+ self.df = transforms_df.sort_values("timestamp_s").copy()
187
+ self.times = self.df["timestamp_s"].values
188
+
189
+ def interpolate_pose(self, timestamp_s: float) -> Optional[np.ndarray]:
190
+ """Interpolate pose at given timestamp.
191
+
192
+ Returns:
193
+ 4x4 transformation matrix or None if timestamp out of range
194
+ """
195
+ if timestamp_s < self.times[0] or timestamp_s > self.times[-1]:
196
+ return None
197
+
198
+ idx_after = np.searchsorted(self.times, timestamp_s)
199
+ if idx_after == 0:
200
+ idx_after = 1
201
+ idx_before = idx_after - 1
202
+
203
+ t_before = self.times[idx_before]
204
+ t_after = self.times[idx_after]
205
+
206
+ if t_after == t_before:
207
+ alpha = 0.0
208
+ else:
209
+ alpha = (timestamp_s - t_before) / (t_after - t_before)
210
+
211
+ pose_before = self.df.iloc[idx_before]
212
+ pose_after = self.df.iloc[idx_after]
213
+
214
+ trans_before = np.array(
215
+ [pose_before["translation_x"], pose_before["translation_y"], pose_before["translation_z"]]
216
+ )
217
+ trans_after = np.array([pose_after["translation_x"], pose_after["translation_y"], pose_after["translation_z"]])
218
+ translation = (1 - alpha) * trans_before + alpha * trans_after
219
+
220
+ quat_before = np.array(
221
+ [pose_before["rotation_x"], pose_before["rotation_y"], pose_before["rotation_z"], pose_before["rotation_w"]]
222
+ )
223
+ quat_after = np.array(
224
+ [pose_after["rotation_x"], pose_after["rotation_y"], pose_after["rotation_z"], pose_after["rotation_w"]]
225
+ )
226
+
227
+ quat = self.slerp(quat_before, quat_after, alpha)
228
+
229
+ T = np.eye(4)
230
+ T[:3, :3] = self.quat_to_matrix(quat)
231
+ T[:3, 3] = translation
232
+
233
+ return T
234
+
235
+ @staticmethod
236
+ def slerp(q1: np.ndarray, q2: np.ndarray, t: float) -> np.ndarray:
237
+ """Spherical linear interpolation between quaternions."""
238
+ q1 = q1 / np.linalg.norm(q1)
239
+ q2 = q2 / np.linalg.norm(q2)
240
+
241
+ dot = np.dot(q1, q2)
242
+
243
+ if dot < 0.0:
244
+ q2 = -q2
245
+ dot = -dot
246
+
247
+ if dot > 0.9995:
248
+ result = q1 + t * (q2 - q1)
249
+ return result / np.linalg.norm(result)
250
+
251
+ theta_0 = np.arccos(np.clip(dot, -1.0, 1.0))
252
+ theta = theta_0 * t
253
+
254
+ q3 = q2 - q1 * dot
255
+ q3 = q3 / np.linalg.norm(q3)
256
+
257
+ return q1 * np.cos(theta) + q3 * np.sin(theta)
258
+
259
+ @staticmethod
260
+ def quat_to_matrix(q: np.ndarray) -> np.ndarray:
261
+ """Convert quaternion (x, y, z, w) to rotation matrix."""
262
+ x, y, z, w = q
263
+ return np.array(
264
+ [
265
+ [1 - 2 * (y * y + z * z), 2 * (x * y - w * z), 2 * (x * z + w * y)],
266
+ [2 * (x * y + w * z), 1 - 2 * (x * x + z * z), 2 * (y * z - w * x)],
267
+ [2 * (x * z - w * y), 2 * (y * z + w * x), 1 - 2 * (x * x + y * y)],
268
+ ]
269
+ )
270
+
271
+
272
+ class StaticTransformChain:
273
+ """Compose static transforms from base_link to camera optical frame."""
274
+
275
+ def __init__(self, transforms_df: pd.DataFrame, camera_name: str):
276
+ self.camera_name = camera_name
277
+ self.transforms = {}
278
+
279
+ for _, row in transforms_df.iterrows():
280
+ parent = row["parent_frame"]
281
+ child = row["child_frame"]
282
+
283
+ T = np.eye(4)
284
+ T[:3, 3] = [row["translation_x"], row["translation_y"], row["translation_z"]]
285
+
286
+ quat = np.array([row["rotation_x"], row["rotation_y"], row["rotation_z"], row["rotation_w"]])
287
+ T[:3, :3] = TransformInterpolator.quat_to_matrix(quat)
288
+
289
+ self.transforms[(parent, child)] = T
290
+
291
+ self.chain = self._build_chain(camera_name)
292
+
293
+ def _build_chain(self, camera_name: str) -> np.ndarray:
294
+ """Build complete transform chain from base_link to camera_optical_frame."""
295
+ target_frame = f"{camera_name}_color_optical_frame"
296
+
297
+ if ("base_link", target_frame) in self.transforms:
298
+ return self.transforms[("base_link", target_frame)]
299
+
300
+ chain_frames = [
301
+ "base_link",
302
+ f"{camera_name}_bottom_screw_link",
303
+ f"{camera_name}_link",
304
+ f"{camera_name}_color_frame",
305
+ f"{camera_name}_color_optical_frame",
306
+ ]
307
+
308
+ T_total = np.eye(4)
309
+ missing_transforms = []
310
+
311
+ for i in range(len(chain_frames) - 1):
312
+ parent = chain_frames[i]
313
+ child = chain_frames[i + 1]
314
+
315
+ if (parent, child) in self.transforms:
316
+ T_total = T_total @ self.transforms[(parent, child)]
317
+ else:
318
+ missing_transforms.append(f"{parent} -> {child}")
319
+
320
+ if missing_transforms:
321
+ return np.eye(4)
322
+
323
+ return T_total
324
+
325
+ def get_transform(self) -> np.ndarray:
326
+ """Get composed static transform."""
327
+ return self.chain
328
+
329
+
330
+ class PointCloudGenerator:
331
+ """Generate point cloud from RGB-D images with camera poses."""
332
+
333
+ def __init__(self, data_dir: str = "demcap_data", camera_name: str = "nav_front_d455"):
334
+ self.data_dir = Path(data_dir)
335
+ self.camera_name = camera_name
336
+
337
+ self.intrinsics = load_camera_intrinsics(self.data_dir, camera_name)
338
+ self.robot_pose_interpolator = load_robot_poses(self.data_dir)
339
+ self.static_camera_transform = load_static_transforms(self.data_dir, camera_name)
340
+
341
+ def _find_rgb_depth_pairs(self) -> list:
342
+ """Find matching RGB and depth image pairs."""
343
+ rgb_dir = None
344
+ images_base = self.data_dir / "images"
345
+
346
+ possible_rgb_dirs = [
347
+ images_base / f"{self.camera_name}_color_image_compressed",
348
+ images_base / f"{self.camera_name}_color_image_raw",
349
+ images_base / self.camera_name,
350
+ ]
351
+
352
+ for candidate in possible_rgb_dirs:
353
+ if candidate.exists():
354
+ rgb_dir = candidate
355
+ break
356
+
357
+ if rgb_dir is None:
358
+ raise ValueError(f"No RGB directory found for {self.camera_name}. Tried: {[str(p) for p in possible_rgb_dirs]}")
359
+
360
+ rgb_files = sorted(rgb_dir.glob("*.jpg"))
361
+
362
+ depth_dir = self.data_dir / "depth"
363
+ depth_dirs = list(depth_dir.glob(f"{self.camera_name}*"))
364
+ if not depth_dirs:
365
+ raise ValueError(f"No depth directory found for {self.camera_name}")
366
+ depth_dir = depth_dirs[0]
367
+ depth_files = sorted(depth_dir.glob("*.png"))
368
+
369
+ rgb_times = {int(f.stem): f for f in rgb_files}
370
+ depth_times = {int(f.stem): f for f in depth_files}
371
+
372
+ pairs = []
373
+ depth_timestamps = np.array(list(depth_times.keys()))
374
+
375
+ for rgb_ts, rgb_file in rgb_times.items():
376
+ idx = np.argmin(np.abs(depth_timestamps - rgb_ts))
377
+ depth_ts = depth_timestamps[idx]
378
+ depth_file = depth_times[depth_ts]
379
+
380
+ time_diff_ms = abs(rgb_ts - depth_ts) / 1e6
381
+ if time_diff_ms < 50:
382
+ pairs.append(
383
+ {
384
+ "rgb_file": rgb_file,
385
+ "depth_file": depth_file,
386
+ "rgb_timestamp_ns": rgb_ts,
387
+ "depth_timestamp_ns": depth_ts,
388
+ "timestamp_s": rgb_ts / 1e9,
389
+ "time_diff_ms": time_diff_ms,
390
+ }
391
+ )
392
+
393
+ return pairs
394
+
395
+ def backproject_rgbd(
396
+ self, rgb: np.ndarray, depth: np.ndarray, max_depth: float = 10.0, pixel_step: int = 1
397
+ ) -> Tuple[np.ndarray, np.ndarray]:
398
+ """Backproject RGB-D image to 3D points in camera frame.
399
+
400
+ Args:
401
+ rgb: RGB image (H, W, 3)
402
+ depth: Depth image (H, W) in mm
403
+ max_depth: Maximum depth in meters
404
+ pixel_step: Sample every Nth pixel
405
+
406
+ Returns:
407
+ points: (N, 3) array of 3D points in camera frame
408
+ colors: (N, 3) array of RGB colors normalized to [0, 1]
409
+ """
410
+ depth_ds = depth[::pixel_step, ::pixel_step]
411
+ rgb_ds = rgb[::pixel_step, ::pixel_step]
412
+
413
+ h_ds, w_ds = depth_ds.shape
414
+
415
+ u, v = np.meshgrid(np.arange(w_ds), np.arange(h_ds))
416
+
417
+ depth_m = depth_ds.astype(np.float32) / 1000.0
418
+
419
+ valid = (depth_m > 0) & (depth_m < max_depth)
420
+
421
+ u = u[valid]
422
+ v = v[valid]
423
+ z = depth_m[valid]
424
+
425
+ u_original = u * pixel_step
426
+ v_original = v * pixel_step
427
+
428
+ x = (u_original - self.intrinsics["cx"]) * z / self.intrinsics["fx"]
429
+ y = (v_original - self.intrinsics["cy"]) * z / self.intrinsics["fy"]
430
+
431
+ points = np.stack([x, y, z], axis=-1)
432
+ colors = rgb_ds[valid] / 255.0
433
+
434
+ return points, colors