PyPI - wunderscout - Versions diffs - 0.1.11__py3-none-any.whl - Mend

wunderscout 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

wunderscout/__init__.py +15 -0
wunderscout/core.py +167 -0
wunderscout/data.py +33 -0
wunderscout/exporters.py +42 -0
wunderscout/geometry.py +74 -0
wunderscout/heatmap.py +115 -0
wunderscout/heatmaps.py +271 -0
wunderscout/pass_network.py +103 -0
wunderscout/teams.py +76 -0
wunderscout/vision.py +155 -0
wunderscout-0.1.11.dist-info/METADATA +87 -0
wunderscout-0.1.11.dist-info/RECORD +13 -0
wunderscout-0.1.11.dist-info/WHEEL +4 -0

wunderscout/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+from .vision import VisionEngine
+from .geometry import PitchMapper
+from .teams import TeamClassifier
+from .core import ScoutingPipeline
+from .exporters import DataExporter
+from .heatmaps import HeatmapGenerator
+__all__ = [
+    "VisionEngine",
+    "PitchMapper",
+    "TeamClassifier",
+    "ScoutingPipeline",
+    "DataExporter",
+    "HeatmapGenerator",
+]

wunderscout/core.py ADDED Viewed

@@ -0,0 +1,167 @@
+import cv2
+import supervision as sv
+import numpy as np
+from pathlib import Path
+from .vision import VisionEngine
+from .geometry import PitchMapper
+from .teams import TeamClassifier
+from .exporters import DataExporter
+from .data import TrackingResult
+class ScoutingPipeline:
+    def __init__(self, player_weights, field_weights):
+        self.engine = VisionEngine(player_weights, field_weights)
+        self.mapper = PitchMapper()
+        self.classifier = TeamClassifier()
+    def run(self, video_path, output_video_path=None):
+        # 1. Warm-up (Calibration)
+        print("WORKER: Calibrating teams...")
+        crops = self.engine.get_calibration_crops(video_path)
+        if len(crops) > 0:
+            embeddings = self.engine.get_embeddings(crops)
+            self.classifier.fit(embeddings)
+        else:
+            print("WARNING: No player crops found for calibration.")
+        # 2. Setup Video I/O
+        cap = cv2.VideoCapture(video_path)
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        out = None
+        if output_video_path:
+            output_path_obj = Path(output_video_path)
+            output_path_obj.parent.mkdir(parents=True, exist_ok=True)
+            out = cv2.VideoWriter(
+                output_video_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height)
+            )
+            if not out.isOpened():
+                print(f"ERROR: Could not create video file at {output_video_path}")
+                out = None
+        tracker = sv.ByteTrack()
+        tracking_results = {}
+        # ID Constants
+        BALL_ID = 0
+        GOALKEEPER_ID = 1
+        PLAYER_ID = 2
+        REFEREE_ID = 3
+        # 3. Main Processing Loop
+        print(f"WORKER: Starting processing: {video_path}")
+        frame_generator = sv.get_video_frames_generator(video_path)
+        frame_idx = -1
+        for frame_idx, frame in enumerate(frame_generator):
+            print(f"WORKER: Processing frame {frame_idx}")
+            # --- A. DETECTION ---
+            all_dets = self.engine.detect_players(frame)
+            f_res = self.engine.detect_field(frame)
+            # --- B. FIELD HOMOGRAPHY ---
+            H = None
+            if f_res.keypoints is not None and len(f_res.keypoints.xy) > 0:
+                H = self.mapper.get_matrix(
+                    f_res.keypoints.xy[0].cpu().numpy(),
+                    f_res.keypoints.conf[0].cpu().numpy(),
+                )
+            else:
+                H = self.mapper.last_h
+            # --- C. SEPARATE BALL & OTHERS ---
+            ball_detections = all_dets[all_dets.class_id == BALL_ID]
+            ball_detections.xyxy = sv.pad_boxes(xyxy=ball_detections.xyxy, px=10)
+            other_detections = all_dets[all_dets.class_id != BALL_ID]
+            other_detections = other_detections.with_nms(threshold=0.5)
+            # --- D. TRACKING ---
+            tracked_objects = tracker.update_with_detections(other_detections)
+            # Split tracked objects
+            tracked_players = tracked_objects[tracked_objects.class_id == PLAYER_ID]
+            tracked_gks = tracked_objects[tracked_objects.class_id == GOALKEEPER_ID]
+            tracked_refs = tracked_objects[tracked_objects.class_id == REFEREE_ID]
+            # --- E. TEAM CLASSIFICATION ---
+            # 1. Players
+            if len(tracked_players) > 0:
+                p_crops = [sv.crop_image(frame, xyxy) for xyxy in tracked_players.xyxy]
+                p_pil = [sv.cv2_to_pillow(c) for c in p_crops]
+                p_embeddings = self.engine.get_embeddings(p_pil)
+                final_team_ids = []
+                for i, tid in enumerate(tracked_players.tracker_id):
+                    team_id = self.classifier.get_consensus_team(tid, p_embeddings[i])
+                    final_team_ids.append(team_id)
+                tracked_players.class_id = np.array(final_team_ids)
+            # 2. Goalkeepers
+            if len(tracked_gks) > 0 and len(tracked_players) > 0:
+                tracked_gks.class_id = self.classifier.resolve_goalkeepers_team_id(
+                    tracked_players, tracked_gks
+                )
+            # 3. Referees (Shift ID 3 -> 2)
+            if len(tracked_refs) > 0:
+                tracked_refs.class_id -= 1
+            # --- F. DATA STORAGE ---
+            tracking_results[frame_idx] = {"players": {}, "ball": None}
+            data_targets = sv.Detections.merge([tracked_players, tracked_gks])
+            if H is not None:
+                if len(data_targets) > 0:
+                    feet_coords = data_targets.get_anchors_coordinates(
+                        sv.Position.BOTTOM_CENTER
+                    )
+                    transformed_feet = self.mapper.transform(feet_coords, H)
+                    for i, tid in enumerate(data_targets.tracker_id):
+                        px, py = transformed_feet[i]
+                        tracking_results[frame_idx]["players"][tid] = (
+                            max(0.0, min(1.0, px)),
+                            max(0.0, min(1.0, py)),
+                        )
+                if len(ball_detections) > 0:
+                    ball_coords = ball_detections.get_anchors_coordinates(
+                        sv.Position.CENTER
+                    )
+                    transformed_ball = self.mapper.transform([ball_coords[0]], H)
+                    bx, by = transformed_ball[0]
+                    tracking_results[frame_idx]["ball"] = (
+                        max(0.0, min(1.0, bx)),
+                        max(0.0, min(1.0, by)),
+                    )
+            # --- G. DRAW & WRITE VIDEO ---
+            if out:
+                all_tracked = sv.Detections.merge(
+                    [tracked_players, tracked_gks, tracked_refs]
+                )
+                annotated_frame = self.engine.draw_annotations(
+                    frame, all_tracked, ball_detections
+                )
+                out.write(annotated_frame)
+        # 4. Cleanup
+        if out:
+            out.release()
+            print(f"WORKER: Video saved to {output_video_path}")
+        cap.release()
+        # 5. Return data
+        return TrackingResult(
+            frames=tracking_results,
+            team_assignments=self.classifier.get_final_assignments(),
+            total_frames=frame_idx + 1,
+            fps=fps,
+        )

wunderscout/data.py ADDED Viewed

@@ -0,0 +1,33 @@
+from dataclasses import dataclass
+@dataclass
+class TrackingResult:
+    frames: dict[int, dict]
+    team_assignments: dict[int, int]
+    total_frames: int
+    fps: float
+    def get_team_players(self, team: int) -> list[int]:
+        """Get player IDs for a specific team (0 or 1)."""
+        return [tid for tid, t in self.team_assignments.items() if t == team]
+    def get_all_player_ids(self) -> list[int]:
+        """Get all player IDs."""
+        return list(self.team_assignments.keys())
+    def get_player_trajectory(self, player_id: int) -> list[tuple[float, float]]:
+        """Get all positions for one player."""
+        return [
+            self.frames[f]["players"][player_id]
+            for f in sorted(self.frames.keys())
+            if player_id in self.frames[f]["players"]
+        ]
+    def get_ball_trajectory(self) -> list[tuple[float, float]]:
+        """Get all ball positions."""
+        return [
+            self.frames[f]["ball"]
+            for f in sorted(self.frames.keys())
+            if self.frames[f]["ball"] is not None
+        ]

wunderscout/exporters.py ADDED Viewed

@@ -0,0 +1,42 @@
+import csv
+from pathlib import Path
+from .data import TrackingResult
+class DataExporter:
+    @staticmethod
+    def save_csvs(result: TrackingResult, output_path: str):
+        """Export tracking data to CSV files (one per team)."""
+        path_obj = Path(output_path)
+        path_obj.parent.mkdir(parents=True, exist_ok=True)
+        base_name = str(path_obj.with_suffix(""))
+        team1_ids = result.get_team_players(0)
+        team2_ids = result.get_team_players(1)
+        def write_file(filename, team_name, ids):
+            with open(filename, "w", newline="") as f:
+                writer = csv.writer(f)
+                writer.writerow(
+                    ["", "", ""] + [team_name for _ in ids for _ in (0, 1)] + ["", ""]
+                )
+                writer.writerow(
+                    ["", "", ""] + [str(pid) for pid in ids for _ in (0, 1)] + ["", ""]
+                )
+                writer.writerow(
+                    ["Period", "Frame", "Time [s]"]
+                    + [f"Player{pid}_{axis}" for pid in ids for axis in ("X", "Y")]
+                    + ["Ball_X", "Ball_Y"]
+                )
+                for f_idx in range(result.total_frames):
+                    data = result.frames.get(f_idx, {"ball": None, "players": {}})
+                    row = [1, f_idx, f"{f_idx / result.fps:.2f}"]
+                    for tid in ids:
+                        coords = data["players"].get(tid, ("NaN", "NaN"))
+                        row.extend(coords)
+                    row.extend(data["ball"] if data["ball"] else ("NaN", "NaN"))
+                    writer.writerow(row)
+        write_file(f"{base_name}_Team1.csv", "Team1", sorted(team1_ids))
+        write_file(f"{base_name}_Team2.csv", "Team2", sorted(team2_ids))

wunderscout/geometry.py ADDED Viewed

@@ -0,0 +1,74 @@
+import cv2
+import numpy as np
+PITCH_CONFIG = {
+    # --- LEFT GOAL LINE ---
+    0: (0.000, 0.000),  # Top-Left Corner
+    1: (0.000, 0.204),  # Top Edge of Penalty Box
+    2: (0.000, 0.365),  # Top Edge of Goal Area
+    3: (0.000, 0.635),  # Bottom Edge of Goal Area
+    4: (0.000, 0.796),  # Bottom Edge of Penalty Box
+    5: (0.000, 1.000),  # Bottom-Left Corner
+    # --- LEFT PENALTY AREA ---
+    6: (0.052, 0.365),
+    7: (0.052, 0.635),
+    8: (0.105, 0.500),  # Penalty Spot (Left)
+    9: (0.157, 0.204),
+    10: (0.157, 0.392),
+    11: (0.157, 0.608),
+    12: (0.157, 0.796),
+    # --- MIDFIELD ---
+    13: (0.413, 0.500),
+    14: (0.500, 0.000),
+    15: (0.500, 0.365),
+    16: (0.500, 0.635),
+    17: (0.500, 1.000),
+    18: (0.587, 0.500),
+    # --- RIGHT PENALTY AREA ---
+    19: (0.843, 0.204),
+    20: (0.843, 0.392),
+    21: (0.843, 0.608),
+    22: (0.843, 0.796),
+    23: (0.895, 0.500),  # Penalty Spot (Right)
+    24: (0.948, 0.365),
+    25: (0.948, 0.635),
+    # --- RIGHT GOAL LINE ---
+    26: (1.000, 0.000),
+    27: (1.000, 0.204),
+    28: (1.000, 0.365),
+    29: (1.000, 0.635),
+    30: (1.000, 0.796),
+    31: (1.000, 1.000),
+}
+class PitchMapper:
+    def __init__(self, pitch_config=PITCH_CONFIG):
+        self.pitch_config = pitch_config
+        self.last_h = None
+    def get_matrix(self, keypoints_xy, keypoints_conf):
+        src_points = []
+        dst_points = []
+        for i, (xy, conf) in enumerate(zip(keypoints_xy, keypoints_conf)):
+            if conf > 0.5 and i in self.pitch_config:
+                src_points.append(xy)
+                dst_points.append(self.pitch_config[i])
+        if len(src_points) >= 4:
+            H, _ = cv2.findHomography(
+                np.array(src_points), np.array(dst_points), cv2.RANSAC
+            )
+            self.last_h = H
+        return self.last_h
+    def transform(self, points, H=None):
+        target_h = H if H is not None else self.last_h
+        if target_h is None or len(points) == 0:
+            return []
+        points_reshaped = np.array(points).reshape(-1, 1, 2).astype(np.float32)
+        projected = cv2.perspectiveTransform(points_reshaped, target_h)
+        return projected.reshape(-1, 2)

wunderscout/heatmap.py ADDED Viewed

@@ -0,0 +1,115 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+import seaborn as sns
+import json
+from scipy.stats import gaussian_kde
+# === Load the raw tracking data CSV ===
+# NOTE: "header=2" → skip the first two rows (team/labels) and use row 3 as header
+df = pd.read_csv(
+    "./data/Sample_Game_1_RawTrackingData_Away_Team.csv",
+    header=2,
+)
+# === Clean column names so each player has _X and _Y ===
+cleaned_colums = []
+colnames = df.columns.tolist()
+i = 0
+while i < len(colnames):
+    col = colnames[i]
+    if col.startswith("Player") or col.startswith("Ball"):
+        cleaned_colums.append(f"{col}_X")
+        cleaned_colums.append(f"{col}_Y")
+        i += 2
+    else:
+        cleaned_colums.append(col)
+        i += 1
+df.columns = cleaned_colums
+print("Columns cleaned. First few rows:")
+print(df.head())
+# === Extract Player17 (drop NaN values where tracking failed) ===
+player17 = df[["Player17_X", "Player17_Y"]].dropna()
+x = player17["Player17_X"].to_numpy()
+y = player17["Player17_Y"].to_numpy()
+# === Detect scale (normalized [0,1] or real meters) ===
+if x.max() <= 1.5 and y.max() <= 1.5:
+    print("Scaling Player17 data from normalized [0,1] to meters...")
+    x = x * 105  # pitch length in meters
+    y = y * 68  # pitch width in meters
+else:
+    print("Data appears to already be in meters, leaving as is.")
+print("First 10 points:", list(zip(x[:10], y[:10])))
+# =============================================================================
+# 1. Scatter Plot (sanity check, raw positions)
+# =============================================================================
+fig, ax = plt.subplots(figsize=(10, 7))
+# Pitch outline
+ax.plot([0, 105, 105, 0, 0], [0, 0, 68, 68, 0], color="black")
+ax.plot([52.5, 52.5], [0, 68], color="black")  # halfway line
+# Player positions
+ax.scatter(x, y, s=1, alpha=0.3, color="blue")
+ax.set_xlim(0, 105)
+ax.set_ylim(0, 68)
+ax.set_title("Player17 Movement Scatter (raw positions)")
+plt.savefig("./heatmap/player17_scatter.png", dpi=150, bbox_inches="tight")
+# =============================================================================
+# 2. Histogram Heatmap (occupancy grid)
+# =============================================================================
+heatmap, xedges, yedges = np.histogram2d(x, y, bins=(50, 34), range=[[0, 105], [0, 68]])
+fig, ax = plt.subplots(figsize=(10, 7))
+extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
+im = ax.imshow(
+    heatmap.T, origin="lower", extent=extent, cmap="Blues", alpha=0.7, aspect="auto"
+)
+ax.plot([0, 105, 105, 0, 0], [0, 0, 68, 68, 0], color="black")
+ax.plot([52.5, 52.5], [0, 68], color="black")
+fig.colorbar(im, ax=ax, label="Frames")
+ax.set_title("Player17 Heatmap (Histogram)")
+plt.savefig("./heatmap/player17_histogram.png", dpi=150, bbox_inches="tight")
+# === Export histogram data as JSON for three.js ===
+heatmap_data = {
+    "xedges": xedges.tolist(),
+    "yedges": yedges.tolist(),
+    "values": heatmap.T.tolist(),  # transpose so rows correspond to y-axis correctly
+}
+with open("./heatmap/player17_histogram.json", "w") as f:
+    json.dump(heatmap_data, f)
+# =============================================================================
+# 3. KDE Heatmap (smoothed density field)
+# =============================================================================
+values = np.vstack([x, y])
+kde = gaussian_kde(values)
+# Define mesh grid
+X, Y = np.meshgrid(np.linspace(0, 105, 100), np.linspace(0, 68, 68))
+Z = kde(np.vstack([X.ravel(), Y.ravel()])).reshape(X.shape)
+fig, ax = plt.subplots(figsize=(10, 7))
+sns.kdeplot(x=x, y=y, fill=True, cmap="Blues", alpha=0.7, thresh=0.05, levels=50, ax=ax)
+ax.plot([0, 105, 105, 0, 0], [0, 0, 68, 68, 0], color="black")
+ax.plot([52.5, 52.5], [0, 68], color="black")
+ax.set_xlim(0, 105)
+ax.set_ylim(0, 68)
+ax.set_title("Player17 Heatmap (KDE Smoothed)")
+plt.savefig("./heatmap/player17_kde.png", dpi=150, bbox_inches="tight")
+# === Export KDE density field for three.js ===
+kde_data = {
+    "x": X[0].tolist(),  # x grid coordinates
+    "y": Y[:, 0].tolist(),  # y grid coordinates
+    "values": Z.tolist(),  # density values
+}
+with open("./heatmap/player17_kde.json", "w") as f:
+    json.dump(kde_data, f)
+print("Outputs saved: scatter, histogram PNG+JSON, KDE PNG+JSON for Player17")

wunderscout/heatmaps.py ADDED Viewed

@@ -0,0 +1,271 @@
+import numpy as np
+import json
+from scipy.stats import gaussian_kde
+from pathlib import Path
+from typing import Optional, Literal, Any
+from .data import TrackingResult
+class HeatmapGenerator:
+    def __init__(
+        self,
+        pitch_length: float = 105.0,
+        pitch_width: float = 68.0,
+        histogram_bins: tuple[int, int] = (50, 34),
+        kde_grid_size: tuple[int, int] = (100, 68),
+        min_samples_for_kde: int = 10,  # Minimum samples needed for KDE
+    ):
+        """
+        Initialize heatmap generator with pitch dimensions and resolution.
+        Args:
+            pitch_length: Length of pitch in meters (default 105m)
+            pitch_width: Width of pitch in meters (default 68m)
+            histogram_bins: (x_bins, y_bins) for histogram heatmap
+            kde_grid_size: (x_points, y_points) for KDE grid resolution
+            min_samples_for_kde: Minimum number of samples required for KDE
+        """
+        self.pitch_length = pitch_length
+        self.pitch_width = pitch_width
+        self.histogram_bins = histogram_bins
+        self.kde_grid_size = kde_grid_size
+        self.min_samples_for_kde = min_samples_for_kde
+    def _scale_to_meters(self, positions: np.ndarray) -> np.ndarray:
+        """Convert normalized [0, 1] coordinates to meters."""
+        scaled = positions.copy()
+        scaled[:, 0] *= self.pitch_length
+        scaled[:, 1] *= self.pitch_width
+        return scaled
+    def _has_sufficient_variation(self, x: np.ndarray, y: np.ndarray) -> bool:
+        """Check if data has sufficient spatial variation for KDE."""
+        if len(x) < 2:
+            return False
+        # Check if all points are identical
+        x_range = np.ptp(x)  # peak-to-peak (max - min)
+        y_range = np.ptp(y)
+        # Need at least some variation in both dimensions
+        # Using 1cm as minimum threshold
+        return x_range > 0.01 and y_range > 0.01
+    def generate_player_heatmap(
+        self,
+        result: TrackingResult,
+        player_id: int,
+        method: Literal["histogram", "kde", "both"] = "both",
+    ) -> dict[str, Any]:
+        """
+        Generate heatmap for a single player.
+        Args:
+            result: TrackingResult from pipeline
+            player_id: Player tracker ID
+            method: "histogram", "kde", or "both"
+        Returns:
+            Dictionary with heatmap data in format ready for JSON export
+        """
+        trajectory = result.get_player_trajectory(player_id)
+        if len(trajectory) == 0:
+            raise ValueError(f"No trajectory data found for player {player_id}")
+        positions = np.array(trajectory)
+        positions_meters = self._scale_to_meters(positions)
+        x, y = positions_meters[:, 0], positions_meters[:, 1]
+        output: dict[str, Any] = {
+            "player_id": player_id,
+            "sample_count": len(trajectory),
+        }
+        # Always try histogram (works with any amount of data)
+        if method in ["histogram", "both"]:
+            try:
+                histogram_result = self._compute_histogram(x, y)
+                output["histogram"] = histogram_result
+            except Exception as e:
+                print(f"Warning: Histogram failed for player {player_id}: {e}")
+                # Don't include histogram key at all if it fails
+        # Only attempt KDE if we have enough quality data
+        if method in ["kde", "both"]:
+            if len(trajectory) < self.min_samples_for_kde:
+                print(
+                    f"Info: Player {player_id} has only {len(trajectory)} samples "
+                    f"(minimum {self.min_samples_for_kde} required for KDE). "
+                    f"Skipping KDE, histogram only."
+                )
+                # Don't include kde key at all
+            elif not self._has_sufficient_variation(x, y):
+                print(
+                    f"Info: Player {player_id} has insufficient spatial variation "
+                    f"for KDE. Skipping KDE, histogram only."
+                )
+                # Don't include kde key at all
+            else:
+                try:
+                    kde_result = self._compute_kde(x, y)
+                    output["kde"] = kde_result
+                except Exception as e:
+                    print(f"Warning: KDE failed for player {player_id}: {e}")
+                    # Don't include kde key at all if it fails
+        return output
+    def _compute_histogram(self, x: np.ndarray, y: np.ndarray) -> dict[str, Any]:
+        """Compute 2D histogram heatmap."""
+        heatmap, xedges, yedges = np.histogram2d(
+            x,
+            y,
+            bins=self.histogram_bins,
+            range=[[0, self.pitch_length], [0, self.pitch_width]],
+        )
+        return {
+            "xedges": xedges.tolist(),
+            "yedges": yedges.tolist(),
+            "values": heatmap.T.tolist(),
+        }
+    def _compute_kde(self, x: np.ndarray, y: np.ndarray) -> dict[str, Any]:
+        """
+        Compute KDE smoothed density field.
+        Returns dict with:
+            - x: 1D list of x coordinates
+            - y: 1D list of y coordinates
+            - values: 2D list where values[i][j] = density at [x[j], y[i]]
+        """
+        # Add small jitter to prevent perfect collinearity
+        # This helps with edge cases where points are nearly identical
+        jitter_amount = 0.01  # 1cm jitter
+        x_jittered = x + np.random.normal(0, jitter_amount, size=x.shape)
+        y_jittered = y + np.random.normal(0, jitter_amount, size=y.shape)
+        values = np.vstack([x_jittered, y_jittered])
+        kde = gaussian_kde(values)
+        # Create coordinate grids
+        x_coords = np.linspace(0, self.pitch_length, self.kde_grid_size[0])
+        y_coords = np.linspace(0, self.pitch_width, self.kde_grid_size[1])
+        X, Y = np.meshgrid(x_coords, y_coords)
+        # Evaluate KDE on grid
+        positions = np.vstack([X.ravel(), Y.ravel()])
+        Z = kde(positions).reshape(X.shape)
+        return {
+            "x": x_coords.tolist(),
+            "y": y_coords.tolist(),
+            "values": Z.tolist(),
+        }
+    def generate_team_heatmap(
+        self,
+        result: TrackingResult,
+        team: int,
+        method: Literal["histogram", "kde", "both"] = "both",
+    ) -> dict[str, Any]:
+        """
+        Generate aggregated heatmap for entire team.
+        Args:
+            result: TrackingResult from pipeline
+            team: Team ID (0 or 1)
+            method: "histogram", "kde", or "both"
+        """
+        player_ids = result.get_team_players(team)
+        if len(player_ids) == 0:
+            raise ValueError(f"No players found for team {team}")
+        # Collect all positions from all players
+        all_positions = []
+        for pid in player_ids:
+            trajectory = result.get_player_trajectory(pid)
+            all_positions.extend(trajectory)
+        if len(all_positions) == 0:
+            raise ValueError(f"No position data found for team {team}")
+        positions = np.array(all_positions)
+        positions_meters = self._scale_to_meters(positions)
+        x, y = positions_meters[:, 0], positions_meters[:, 1]
+        output: dict[str, Any] = {
+            "team_id": team,
+            "player_count": len(player_ids),
+            "sample_count": len(all_positions),
+        }
+        # Histogram (always attempt)
+        if method in ["histogram", "both"]:
+            try:
+                histogram_result = self._compute_histogram(x, y)
+                output["histogram"] = histogram_result
+            except Exception as e:
+                print(f"Warning: Team histogram failed for team {team}: {e}")
+                # Don't include histogram key at all if it fails
+        # KDE (with quality checks)
+        if method in ["kde", "both"]:
+            if len(all_positions) < self.min_samples_for_kde:
+                print(
+                    f"Info: Team {team} has only {len(all_positions)} samples. "
+                    f"Skipping KDE."
+                )
+                # Don't include kde key at all
+            elif not self._has_sufficient_variation(x, y):
+                print(f"Info: Team {team} has insufficient variation. Skipping KDE.")
+                # Don't include kde key at all
+            else:
+                try:
+                    kde_result = self._compute_kde(x, y)
+                    output["kde"] = kde_result
+                except Exception as e:
+                    print(f"Warning: KDE failed for team {team}: {e}")
+                    # Don't include kde key at all if it fails
+        return output
+    def generate_all_players_heatmaps(
+        self,
+        result: TrackingResult,
+        method: Literal["histogram", "kde", "both"] = "both",
+    ) -> dict[int, dict[str, Any]]:
+        """
+        Generate heatmaps for all players.
+        Returns:
+            Dictionary mapping player_id -> heatmap data
+        """
+        all_heatmaps = {}
+        for player_id in result.get_all_player_ids():
+            try:
+                all_heatmaps[player_id] = self.generate_player_heatmap(
+                    result, player_id, method
+                )
+            except ValueError as e:
+                print(f"Warning: Skipping player {player_id}: {e}")
+        return all_heatmaps
+    def save_heatmap(
+        self,
+        heatmap_data: dict[str, Any],
+        output_path: str,
+        pretty: bool = False,
+    ):
+        """Save heatmap data to JSON file."""
+        path_obj = Path(output_path)
+        path_obj.parent.mkdir(parents=True, exist_ok=True)
+        with open(output_path, "w") as f:
+            json.dump(heatmap_data, f, indent=2 if pretty else None)

wunderscout/pass_network.py ADDED Viewed

@@ -0,0 +1,103 @@
+import json
+import networkx as nx
+import matplotlib.pyplot as plt
+from collections import defaultdict
+import os
+from pathlib import Path
+# Load match events JSON (replace with your actual file path)
+with open(
+    "./data/3825818.json",
+    "r",
+) as f:
+    events = json.load(f)
+# Build mapping from player ID -> player name from the Starting XI event
+player_id_to_name = {}
+for ev in events:
+    if ev["type"]["name"] == "Starting XI":
+        for lineup in ev["tactics"]["lineup"]:
+            pid = lineup["player"]["id"]
+            name = lineup["player"]["name"]
+            player_id_to_name[pid] = name
+# Data structures for passes and positions
+edges = defaultdict(int)  # (passer, recipient) -> count of passes
+player_positions = defaultdict(list)  # player_id -> list of [x, y] positions
+TEAM_NAME = "Real Sociedad"
+# Extract completed passes
+for ev in events:
+    if ev["type"]["name"] == "Pass" and ev["team"]["name"] == TEAM_NAME:
+        passer = ev["player"]["id"]
+        recipient = ev.get("pass", {}).get("recipient", {}).get("id")
+        outcome = ev.get("pass", {}).get("outcome", {"name": "Complete"})["name"]
+        if outcome == "Complete" and recipient is not None:
+            edges[(passer, recipient)] += 1
+            start = ev["location"]
+            end = ev["pass"]["end_location"]
+            player_positions[passer].append(start)
+            player_positions[recipient].append(end)
+# Calculate avg positions
+avg_positions = {}
+for player_id, coords in player_positions.items():
+    xs = [pt[0] for pt in coords]
+    ys = [pt[1] for pt in coords]
+    avg_positions[player_id] = [sum(xs) / len(xs), sum(ys) / len(ys)]
+# Build a JSON-friendly structure for export (nodes + links)
+nodes = [{"id": pid, "x": pos[0], "y": pos[1]} for pid, pos in avg_positions.items()]
+links = [
+    {"source": src, "target": tgt, "value": count}
+    for (src, tgt), count in edges.items()
+]
+network = {"nodes": nodes, "links": links}
+os.makedirs("pass_network", exist_ok=True)
+with open("./pass_network/pass_network.json", "w") as f:
+    json.dump(network, f, indent=2)
+# Build NetworkX graph
+G = nx.DiGraph()
+# Add nodes with positions
+for pid, pos in avg_positions.items():
+    G.add_node(pid, pos=(pos[0], pos[1]))
+# Add edges with weights
+for (src, tgt), count in edges.items():
+    G.add_edge(src, tgt, weight=count)
+# Draw graph
+pos = nx.get_node_attributes(G, "pos")
+labels = {pid: player_id_to_name.get(pid, str(pid)) for pid in G.nodes()}
+fig, ax = plt.subplots(figsize=(10, 7))
+# Draw pitch outline
+ax.set_xlim(0, 120)
+ax.set_ylim(0, 80)
+ax.plot([0, 120, 120, 0, 0], [0, 0, 80, 80, 0], color="black")
+# Draw nodes
+nx.draw_networkx_nodes(G, pos, ax=ax, node_color="skyblue", node_size=500)
+# Draw edges
+nx.draw_networkx_edges(
+    G,
+    pos,
+    ax=ax,
+    width=[d["weight"] * 0.2 for _, _, d in G.edges(data=True)],
+    alpha=0.7,
+    arrowsize=10,
+)
+# Draw player names
+nx.draw_networkx_labels(G, pos, labels=labels, ax=ax, font_size=8)
+plt.title("Team Pass Network")
+plt.savefig("./pass_network/pass_network_viz.png", dpi=150, bbox_inches="tight")

wunderscout/teams.py ADDED Viewed

@@ -0,0 +1,76 @@
+import numpy as np
+import umap
+from sklearn.cluster import KMeans
+import supervision as sv
+class TeamClassifier:
+    def __init__(self):
+        self.reducer = umap.UMAP(n_components=3)
+        self.clusterer = KMeans(n_clusters=2, n_init=10, random_state=42)
+        self.history = {}
+    def fit(self, embeddings):
+        projections = self.reducer.fit_transform(embeddings)
+        self.clusterer.fit(projections)
+    def get_consensus_team(self, tracker_id, embedding):
+        proj = self.reducer.transform(embedding.reshape(1, -1))
+        pred = self.clusterer.predict(proj)[0]
+        if tracker_id not in self.history:
+            self.history[tracker_id] = []
+        self.history[tracker_id].append(pred)
+        if len(self.history[tracker_id]) > 50:
+            self.history[tracker_id].pop(0)
+        return (
+            1
+            if (sum(self.history[tracker_id]) / len(self.history[tracker_id])) > 0.5
+            else 0
+        )
+    def resolve_goalkeepers_team_id(self, players, goalkeepers):
+        """
+        Assigns goalkeepers to the team whose centroid is closest.
+        players: sv.Detections (already classified with class_id 0 or 1)
+        goalkeepers: sv.Detections
+        """
+        if len(players) == 0 or len(goalkeepers) == 0:
+            return np.array([0] * len(goalkeepers))
+        players_xy = players.get_anchors_coordinates(sv.Position.BOTTOM_CENTER)
+        goalkeepers_xy = goalkeepers.get_anchors_coordinates(sv.Position.BOTTOM_CENTER)
+        # Calculate centroids for Team 0 and Team 1
+        team_0_mask = players.class_id == 0
+        team_1_mask = players.class_id == 1
+        # Handle cases where one team might not be detected yet
+        if np.any(team_0_mask):
+            team_0_centroid = players_xy[team_0_mask].mean(axis=0)
+        else:
+            team_0_centroid = np.array([0, 0])
+        if np.any(team_1_mask):
+            team_1_centroid = players_xy[team_1_mask].mean(axis=0)
+        else:
+            team_1_centroid = np.array([10000, 10000])  # Far away
+        goalkeepers_team_id = []
+        for gk_xy in goalkeepers_xy:
+            dist_0 = np.linalg.norm(gk_xy - team_0_centroid)
+            dist_1 = np.linalg.norm(gk_xy - team_1_centroid)
+            goalkeepers_team_id.append(0 if dist_0 < dist_1 else 1)
+        return np.array(goalkeepers_team_id)
+    def get_final_assignments(self):
+        assignments = {}
+        for tid, votes in self.history.items():
+            if len(votes) > 0:
+                avg = sum(votes) / len(votes)
+                assignments[tid] = 1 if avg > 0.5 else 0
+        return assignments

wunderscout/vision.py ADDED Viewed

@@ -0,0 +1,155 @@
+import torch
+from ultralytics import YOLO
+import supervision as sv
+from transformers import AutoProcessor, SiglipVisionModel, data
+from roboflow import Roboflow
+from tqdm import tqdm
+from more_itertools import chunked
+import numpy as np
+from pathlib import Path
+class VisionEngine:
+    def __init__(self, player_weights, field_weights, device=None):
+        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+        self.player_model = YOLO(player_weights)
+        self.field_model = YOLO(field_weights)
+        # Siglip for embeddings
+        siglip_path = "google/siglip-base-patch16-224"
+        self.siglip_model = SiglipVisionModel.from_pretrained(siglip_path).to(
+            self.device
+        )
+        self.siglip_processor = AutoProcessor.from_pretrained(siglip_path)
+        # --- Annotators ---
+        # Palette: 0=Blue, 1=Pink, 2=Yellow (Referee)
+        self.palette = sv.ColorPalette.from_hex(["#00BFFF", "#FF1493", "#FFD700"])
+        self.ellipse_annotator = sv.EllipseAnnotator(
+            color=self.palette,
+            thickness=2,
+        )
+        self.label_annotator = sv.LabelAnnotator(
+            color=self.palette,
+            text_color=sv.Color.from_hex("#000000"),
+            text_position=sv.Position.BOTTOM_CENTER,
+        )
+        self.triangle_annotator = sv.TriangleAnnotator(
+            color=sv.Color.from_hex("#FFD700"), base=25, height=21, outline_thickness=1
+        )
+    def get_calibration_crops(self, video_path, stride=30):
+        PLAYER_ID = 2
+        frame_generator = sv.get_video_frames_generator(
+            source_path=video_path, stride=stride
+        )
+        crops = []
+        for frame in frame_generator:
+            detections = self.detect_players(frame)
+            # Filter for players only for calibration
+            players = detections[detections.class_id == PLAYER_ID]
+            frame_crops = [sv.crop_image(frame, xyxy) for xyxy in players.xyxy]
+            crops += [sv.cv2_to_pillow(c) for c in frame_crops]
+        print(f"VisionEngine: Collected {len(crops)} calibration crops.")
+        return crops
+    def get_embeddings(self, pil_crops, batch_size=32):
+        batches = chunked(pil_crops, batch_size)
+        data_list = []
+        with torch.no_grad():
+            for batch in batches:
+                inputs = self.siglip_processor(images=batch, return_tensors="pt").to(
+                    self.device
+                )
+                outputs = self.siglip_model(**inputs)
+                embeddings = torch.mean(outputs.last_hidden_state, dim=1).cpu().numpy()
+                data_list.append(embeddings)
+        return np.concatenate(data_list) if data_list else np.array([])
+    def detect_players(self, frame, conf=0.3):
+        result = self.player_model.predict(frame, conf=conf, verbose=False)[0]
+        return sv.Detections.from_ultralytics(result)
+    def detect_field(self, frame, conf=0.3):
+        result = self.field_model.predict(frame, conf=conf, verbose=False)[0]
+        return result
+    def draw_annotations(self, frame, all_detections, ball_detections):
+        annotated_frame = frame.copy()
+        # 1. Draw Ball
+        annotated_frame = self.triangle_annotator.annotate(
+            scene=annotated_frame, detections=ball_detections
+        )
+        # 2. Draw People (Players, GKs, Refs)
+        if len(all_detections) > 0:
+            # Ensure class_id is int for color mapping
+            all_detections.class_id = all_detections.class_id.astype(int)
+            labels = [f"#{tracker_id}" for tracker_id in all_detections.tracker_id]
+            annotated_frame = self.ellipse_annotator.annotate(
+                scene=annotated_frame, detections=all_detections
+            )
+            annotated_frame = self.label_annotator.annotate(
+                scene=annotated_frame, detections=all_detections, labels=labels
+            )
+        return annotated_frame
+class ScoutingTrainer:
+    def __init__(self, api_key):
+        self.rf = Roboflow(api_key=api_key)
+    def train_players(
+        self,
+        workspace,
+        project,
+        version,
+        epochs=300,
+        output_dir="../runs/training/player",
+    ):
+        project = self.rf.workspace(workspace).project(project)
+        dataset = project.version(version).download("yolov11")
+        model = YOLO("../data/base_models/yolo11m.pt")
+        return model.train(
+            data=f"{dataset.location}/data.yaml",
+            epochs=epochs,
+            imgsz=1280,
+            plots=True,
+            device=0,
+            batch=2,
+            project=output_dir,
+        )
+    def train_field(
+        self,
+        workspace,
+        project,
+        version,
+        epochs=300,
+        output_dir="../runs/training/field",
+    ):
+        project = self.rf.workspace(workspace).project(project)
+        version = project.version(15)
+        dataset = version.download("yolov8", location="../data/data_sets/")
+        model = YOLO("yolo11m-pose.pt")
+        return model.train(
+            data=f"{dataset.location}/data.yaml",
+            save=True,
+            epochs=epochs,
+            plots=True,
+            imgsz=1080,
+            device=0,
+            batch=2,
+            project=output_dir,
+        )

wunderscout-0.1.11.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,87 @@
+Metadata-Version: 2.4
+Name: wunderscout
+Version: 0.1.11
+Summary: Scouting and vision tools for YOLO and sports analytics.
+Project-URL: Homepage, https://github.com/qhuboo/wunderscout
+Project-URL: Issues, https://github.com/qhuboo/wunderscout/issues
+Keywords: scouting,sports-analytics,vision,yolo
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Requires-Python: >=3.11
+Requires-Dist: matplotlib>=3.10.6
+Requires-Dist: more-itertools>=10.8.0
+Requires-Dist: networkx>=3.5
+Requires-Dist: numpy>=2.3.2
+Requires-Dist: opencv-python>=4.11.0.86
+Requires-Dist: pandas>=2.3.2
+Requires-Dist: python-dotenv>=1.1.1
+Requires-Dist: roboflow>=1.2.7
+Requires-Dist: scikit-learn>=1.7.2
+Requires-Dist: scipy>=1.16.2
+Requires-Dist: seaborn>=0.13.2
+Requires-Dist: supervision>=0.26.1
+Requires-Dist: torch>=2.8.0
+Requires-Dist: tqdm>=4.67.1
+Requires-Dist: transformers>=4.56.1
+Requires-Dist: ultralytics>=8.3.193
+Requires-Dist: umap-learn>=0.5.9.post2
+Description-Content-Type: text/markdown
+# wunderscout
+A Python library for extracting player and ball tracking data from soccer match footage using YOLO, Siglip embeddings, and homography.
+## Features
+- **Detection & Tracking**: Uses YOLO for player/ball/pitch-keypoint detection and ByteTrack for temporal consistency.
+- **Automated Team Clustering**: Groups players into teams using Siglip vision transformer embeddings and K-Means clustering via UMAP dimensionality reduction.
+- **Pitch Mapping**: Transforms 2D image coordinates to a normalized 0-1 coordinate system using pitch keypoint homography.
+- **Goalkeeper Attribution**: Assigns goalkeepers to teams based on proximity to team centroids.
+- **Data Export**: Generates Home and Away CSV files containing frame-by-frame XY coordinates.
+## Installation
+```bash
+uv add wunderscout
+```
+## Usage
+The `ScoutingPipeline` class manages calibration, tracking, and data export.
+```python
+from wunderscout import ScoutingPipeline
+# Initialize with paths to trained YOLO weights
+pipeline = ScoutingPipeline(
+    player_weights="players.pt",
+    field_weights="pitch.pt"
+)
+# Run processing
+pipeline.run(
+    video_path="input_match.mp4",
+    output_video_path="ouput_match.mp4"
+)
+```
+## Internal Components
+- **VisionEngine**: Manages YOLO models and generates Siglip embeddings for player crops.
+- **PitchMapper**: Computes homography matrices based on a 32-point pitch configuration. Handles RANSAC-based perspective transforms.
+- **TeamClassifier**: Performs unsupervised clustering on player embeddings. Uses a rolling consensus buffer to stabilize team assignments across frames.
+- **DataExporter**: Formats tracking results into CSV files with frame indices and normalized pitch coordinates.
+## Dependencies
+- `ultralytics`
+- `supervision`
+- `transformers`
+- `umap-learn`
+- `scikit-learn`
+- `opencv-python`
+## License
+MIT

wunderscout-0.1.11.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,13 @@
+wunderscout/__init__.py,sha256=QPNgcOVyGGefsTeA8LyHsfcPGc2Q4M28J6zU5RYhzr0,355
+wunderscout/core.py,sha256=icdSLesum0cZV0MqIlwoxA2L1bfP5nnAGAjiLZy9bSg,6409
+wunderscout/data.py,sha256=l6wRBd6WZGskqIguPjzFTfW47zWZYf8qPwzLVbsesyA,1069
+wunderscout/exporters.py,sha256=FduXc4q065uF3Hp4G1LMO7xXVRC1DFewPBWnFHmS1bI,1756
+wunderscout/geometry.py,sha256=I5lt00O9jOiEoVpPGy5iVglzA7cgaAdUvzfuFBcJbRA,2197
+wunderscout/heatmap.py,sha256=5R8Zw5Bnk-8eHTcWudo-1a3Mt83WrSbqYX9MkUoht_8,4268
+wunderscout/heatmaps.py,sha256=_c3pKkkvDMfdhHaxG5S9bqOSEdpyxcgozxYogB8xaak,9695
+wunderscout/pass_network.py,sha256=QC859Pi5VKSgHu1qrE3Zybvu97lNorsb03UAf1IrSbs,3099
+wunderscout/teams.py,sha256=y0IclDACo3F8buVdpqqMCSmZJeWx2uqMkGNbZ6YToVc,2628
+wunderscout/vision.py,sha256=fVX3wtwCwe6AiiGxZjH8u4q2gk3t4gCb4MNvmQf7Lhs,5257
+wunderscout-0.1.11.dist-info/METADATA,sha256=TT0uNp1KrZWIDpuib26X2Eqx6k-nTSPe2RN6wu0bMI0,2922
+wunderscout-0.1.11.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+wunderscout-0.1.11.dist-info/RECORD,,

wunderscout-0.1.11.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.28.0
+Root-Is-Purelib: true
+Tag: py3-none-any