unityflow 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1687 @@
1
+ """Unity Asset Reference Tracker.
2
+
3
+ Tracks references to binary assets (textures, meshes, etc.) in Unity YAML files.
4
+ Provides dependency analysis and reverse reference lookup.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import os
11
+ import re
12
+ import sqlite3
13
+ from collections.abc import Callable, Iterator
14
+ from concurrent.futures import ThreadPoolExecutor, as_completed
15
+ from dataclasses import dataclass, field
16
+ from pathlib import Path
17
+ from threading import Lock
18
+ from typing import Any
19
+
20
+ from unityflow.git_utils import UNITY_EXTENSIONS
21
+
22
+ # Common binary asset extensions in Unity
23
+ BINARY_ASSET_EXTENSIONS = {
24
+ # Textures
25
+ ".png",
26
+ ".jpg",
27
+ ".jpeg",
28
+ ".tga",
29
+ ".psd",
30
+ ".tiff",
31
+ ".tif",
32
+ ".gif",
33
+ ".bmp",
34
+ ".exr",
35
+ ".hdr",
36
+ # 3D Models
37
+ ".fbx",
38
+ ".obj",
39
+ ".dae",
40
+ ".3ds",
41
+ ".blend",
42
+ ".max",
43
+ ".ma",
44
+ ".mb",
45
+ # Audio
46
+ ".wav",
47
+ ".mp3",
48
+ ".ogg",
49
+ ".aiff",
50
+ ".aif",
51
+ ".flac",
52
+ ".m4a",
53
+ # Video
54
+ ".mp4",
55
+ ".mov",
56
+ ".avi",
57
+ ".webm",
58
+ # Fonts
59
+ ".ttf",
60
+ ".otf",
61
+ ".fon",
62
+ # Other
63
+ ".dll",
64
+ ".so",
65
+ ".dylib", # Native plugins
66
+ ".shader",
67
+ ".cginc",
68
+ ".hlsl",
69
+ ".glsl", # Shaders
70
+ ".compute", # Compute shaders
71
+ ".bytes",
72
+ ".txt",
73
+ ".json",
74
+ ".xml",
75
+ ".csv", # Data files
76
+ }
77
+
78
+ # Pattern to extract GUID from .meta files
79
+ META_GUID_PATTERN = re.compile(r"^guid:\s*([a-f0-9]{32})\s*$", re.MULTILINE)
80
+
81
+
82
+ @dataclass
83
+ class AssetReference:
84
+ """Represents a reference to an asset."""
85
+
86
+ file_id: int
87
+ guid: str
88
+ ref_type: int | None = None
89
+ source_path: str | None = None
90
+ source_file_id: int | None = None
91
+ property_path: str | None = None
92
+
93
+ def __hash__(self) -> int:
94
+ return hash((self.guid, self.file_id))
95
+
96
+ def __eq__(self, other: object) -> bool:
97
+ if not isinstance(other, AssetReference):
98
+ return False
99
+ return self.guid == other.guid and self.file_id == other.file_id
100
+
101
+
102
+ @dataclass
103
+ class AssetDependency:
104
+ """Represents a resolved asset dependency."""
105
+
106
+ guid: str
107
+ path: Path | None # None if asset not found in project
108
+ asset_type: str | None = None # Extension-based type classification
109
+ references: list[AssetReference] = field(default_factory=list)
110
+
111
+ @property
112
+ def is_resolved(self) -> bool:
113
+ """Check if this dependency was resolved to an actual file."""
114
+ return self.path is not None
115
+
116
+ @property
117
+ def is_binary(self) -> bool:
118
+ """Check if this is a binary asset (texture, mesh, etc.)."""
119
+ if self.path is None:
120
+ return False
121
+ return self.path.suffix.lower() in BINARY_ASSET_EXTENSIONS
122
+
123
+
124
+ @dataclass
125
+ class GUIDIndex:
126
+ """Index mapping GUIDs to asset paths.
127
+
128
+ Provides methods to resolve GUIDs to asset paths and names,
129
+ enabling LLM-friendly access to Unity asset metadata.
130
+
131
+ Example:
132
+ >>> guid_index = build_guid_index("/path/to/unity/project")
133
+ >>> path = guid_index.get_path("f4afdcb1cbadf954ba8b1cf465429e17")
134
+ >>> print(path) # Assets/Scripts/PlayerController.cs
135
+ >>> name = guid_index.resolve_name("f4afdcb1cbadf954ba8b1cf465429e17")
136
+ >>> print(name) # PlayerController
137
+ """
138
+
139
+ guid_to_path: dict[str, Path] = field(default_factory=dict)
140
+ path_to_guid: dict[Path, str] = field(default_factory=dict)
141
+ project_root: Path | None = None
142
+
143
+ def __len__(self) -> int:
144
+ return len(self.guid_to_path)
145
+
146
+ def get_path(self, guid: str) -> Path | None:
147
+ """Get the asset path for a GUID."""
148
+ return self.guid_to_path.get(guid)
149
+
150
+ def get_guid(self, path: Path) -> str | None:
151
+ """Get the GUID for an asset path."""
152
+ # Try both absolute and relative paths
153
+ if path in self.path_to_guid:
154
+ return self.path_to_guid[path]
155
+
156
+ # Try resolving relative to project root
157
+ if self.project_root:
158
+ try:
159
+ rel_path = path.relative_to(self.project_root)
160
+ if rel_path in self.path_to_guid:
161
+ return self.path_to_guid[rel_path]
162
+ except ValueError:
163
+ pass
164
+
165
+ return None
166
+
167
+ def resolve_name(self, guid: str) -> str | None:
168
+ """Resolve a GUID to an asset name (filename without extension).
169
+
170
+ This is particularly useful for resolving MonoBehaviour script names
171
+ from their m_Script GUID references.
172
+
173
+ Args:
174
+ guid: The GUID to resolve
175
+
176
+ Returns:
177
+ The asset name (stem), or None if GUID is not found
178
+
179
+ Example:
180
+ >>> name = guid_index.resolve_name("f4afdcb1cbadf954ba8b1cf465429e17")
181
+ >>> print(name) # "PlayerController"
182
+ """
183
+ path = self.guid_to_path.get(guid)
184
+ if path is not None:
185
+ return path.stem
186
+ return None
187
+
188
+ def resolve_path(self, guid: str) -> Path | None:
189
+ """Resolve a GUID to an asset path.
190
+
191
+ Alias for get_path() with a more descriptive name for LLM usage.
192
+
193
+ Args:
194
+ guid: The GUID to resolve
195
+
196
+ Returns:
197
+ The asset path, or None if GUID is not found
198
+
199
+ Example:
200
+ >>> path = guid_index.resolve_path("f4afdcb1cbadf954ba8b1cf465429e17")
201
+ >>> print(path) # Path("Assets/Scripts/PlayerController.cs")
202
+ """
203
+ return self.guid_to_path.get(guid)
204
+
205
+ def batch_resolve_names(self, guids: set[str]) -> dict[str, str]:
206
+ """Batch resolve multiple GUIDs to asset names.
207
+
208
+ Efficiently resolves multiple GUIDs at once using simple dict lookups.
209
+ This is more efficient than calling resolve_name() repeatedly when
210
+ processing many components.
211
+
212
+ Args:
213
+ guids: Set of GUIDs to resolve
214
+
215
+ Returns:
216
+ Dict mapping GUID to asset name (filename without extension).
217
+ GUIDs that couldn't be resolved are omitted from the result.
218
+
219
+ Example:
220
+ >>> names = guid_index.batch_resolve_names({"abc123...", "def456..."})
221
+ >>> print(names) # {"abc123...": "PlayerController", "def456...": "EnemyAI"}
222
+ """
223
+ result: dict[str, str] = {}
224
+ for guid in guids:
225
+ path = self.guid_to_path.get(guid)
226
+ if path is not None:
227
+ result[guid] = path.stem
228
+ return result
229
+
230
+
231
+ def find_unity_project_root(start_path: Path) -> Path | None:
232
+ """Find the Unity project root by looking for Assets folder.
233
+
234
+ Args:
235
+ start_path: Starting path to search from
236
+
237
+ Returns:
238
+ Path to project root (parent of Assets folder), or None if not found
239
+ """
240
+ current = start_path.resolve()
241
+
242
+ # If start_path is a file, start from its parent
243
+ if current.is_file():
244
+ current = current.parent
245
+
246
+ # Search upward for Assets folder
247
+ for _ in range(20): # Limit search depth
248
+ assets_dir = current / "Assets"
249
+ if assets_dir.is_dir():
250
+ # Verify this looks like a Unity project
251
+ project_settings = current / "ProjectSettings"
252
+ if project_settings.is_dir():
253
+ return current
254
+ # Even without ProjectSettings, Assets folder is a good indicator
255
+ return current
256
+
257
+ parent = current.parent
258
+ if parent == current: # Reached root
259
+ break
260
+ current = parent
261
+
262
+ return None
263
+
264
+
265
+ def get_local_package_paths(project_root: Path) -> list[Path]:
266
+ """Get paths to local packages referenced via file: in manifest.json.
267
+
268
+ Parses Packages/manifest.json and extracts paths for dependencies
269
+ that use the "file:" prefix (local filesystem packages).
270
+
271
+ Examples of supported formats:
272
+ - "file:../../NK.Packages/com.domybest.mybox@1.7.0"
273
+ - "file:../SharedPackages/mypackage"
274
+
275
+ Args:
276
+ project_root: Path to Unity project root
277
+
278
+ Returns:
279
+ List of resolved absolute paths to local package directories.
280
+ """
281
+ manifest_path = project_root / "Packages" / "manifest.json"
282
+ if not manifest_path.exists():
283
+ return []
284
+
285
+ local_paths: list[Path] = []
286
+ try:
287
+ manifest_data = json.loads(manifest_path.read_text(encoding="utf-8"))
288
+ dependencies = manifest_data.get("dependencies", {})
289
+
290
+ for _dep_name, dep_value in dependencies.items():
291
+ if isinstance(dep_value, str) and dep_value.startswith("file:"):
292
+ # Extract relative path: "file:../../NK.Packages/pkg" -> "../../NK.Packages/pkg"
293
+ relative_path = dep_value[5:] # Remove "file:" prefix
294
+
295
+ # Resolve relative to Packages directory (where manifest.json lives)
296
+ package_path = (project_root / "Packages" / relative_path).resolve()
297
+
298
+ # Only add if it exists and is a directory
299
+ if package_path.is_dir():
300
+ local_paths.append(package_path)
301
+
302
+ except (OSError, json.JSONDecodeError, KeyError):
303
+ pass
304
+
305
+ return local_paths
306
+
307
+
308
+ def build_guid_index(
309
+ project_root: Path,
310
+ include_packages: bool = False,
311
+ progress_callback: callable | None = None,
312
+ ) -> GUIDIndex:
313
+ """Build an index of all GUIDs in a Unity project.
314
+
315
+ Scans:
316
+ - Assets/ folder (always)
317
+ - Packages/ folder (when include_packages=True, for embedded packages)
318
+ - Library/PackageCache/ (when include_packages=True, for registry packages)
319
+ - Local packages from manifest.json file: references (when include_packages=True)
320
+
321
+ Args:
322
+ project_root: Path to Unity project root
323
+ include_packages: Whether to include Packages/ and Library/PackageCache/
324
+ progress_callback: Optional callback for progress (current, total)
325
+
326
+ Returns:
327
+ GUIDIndex mapping GUIDs to asset paths
328
+ """
329
+ index = GUIDIndex(project_root=project_root)
330
+
331
+ # Collect all .meta files
332
+ search_paths = [project_root / "Assets"]
333
+ if include_packages:
334
+ # Embedded packages in Packages/ folder
335
+ packages_dir = project_root / "Packages"
336
+ if packages_dir.is_dir():
337
+ search_paths.append(packages_dir)
338
+
339
+ # Downloaded packages from Unity registry in Library/PackageCache/
340
+ package_cache_dir = project_root / "Library" / "PackageCache"
341
+ if package_cache_dir.is_dir():
342
+ search_paths.append(package_cache_dir)
343
+
344
+ # Local packages referenced via file: in manifest.json
345
+ local_package_paths = get_local_package_paths(project_root)
346
+ search_paths.extend(local_package_paths)
347
+
348
+ meta_files: list[Path] = []
349
+ for search_path in search_paths:
350
+ if search_path.is_dir():
351
+ meta_files.extend(search_path.rglob("*.meta"))
352
+
353
+ total = len(meta_files)
354
+
355
+ for i, meta_path in enumerate(meta_files):
356
+ if progress_callback:
357
+ progress_callback(i + 1, total)
358
+
359
+ try:
360
+ content = meta_path.read_text(encoding="utf-8", errors="replace")
361
+ match = META_GUID_PATTERN.search(content)
362
+ if match:
363
+ guid = match.group(1)
364
+ # Asset path is meta path without .meta extension
365
+ asset_path = meta_path.with_suffix("")
366
+
367
+ # Store relative path from project root
368
+ try:
369
+ rel_path = asset_path.relative_to(project_root)
370
+ index.guid_to_path[guid] = rel_path
371
+ index.path_to_guid[rel_path] = guid
372
+ except ValueError:
373
+ # Path is not relative to project root
374
+ index.guid_to_path[guid] = asset_path
375
+ index.path_to_guid[asset_path] = guid
376
+ except (OSError, UnicodeDecodeError):
377
+ # Skip unreadable files
378
+ continue
379
+
380
+ return index
381
+
382
+
383
+ def extract_guid_references(data: Any, source_path: str | None = None) -> Iterator[AssetReference]:
384
+ """Extract all GUID references from parsed YAML data.
385
+
386
+ Args:
387
+ data: Parsed YAML data (dict or list)
388
+ source_path: Optional property path for context
389
+
390
+ Yields:
391
+ AssetReference objects for each external reference found
392
+ """
393
+ if isinstance(data, dict):
394
+ # Check if this is a reference object
395
+ if "guid" in data and "fileID" in data:
396
+ guid = data["guid"]
397
+ file_id = data.get("fileID", 0)
398
+ ref_type = data.get("type")
399
+
400
+ if guid and isinstance(guid, str):
401
+ yield AssetReference(
402
+ file_id=int(file_id) if file_id else 0,
403
+ guid=guid,
404
+ ref_type=int(ref_type) if ref_type else None,
405
+ property_path=source_path,
406
+ )
407
+
408
+ # Recurse into nested structures
409
+ for key, value in data.items():
410
+ child_path = f"{source_path}.{key}" if source_path else key
411
+ yield from extract_guid_references(value, child_path)
412
+
413
+ elif isinstance(data, list):
414
+ for i, item in enumerate(data):
415
+ child_path = f"{source_path}[{i}]" if source_path else f"[{i}]"
416
+ yield from extract_guid_references(item, child_path)
417
+
418
+
419
+ def get_file_dependencies(
420
+ file_path: Path,
421
+ guid_index: GUIDIndex | None = None,
422
+ ) -> list[AssetDependency]:
423
+ """Get all asset dependencies for a Unity YAML file.
424
+
425
+ Args:
426
+ file_path: Path to the Unity YAML file
427
+ guid_index: Optional pre-built GUID index for resolution
428
+
429
+ Returns:
430
+ List of AssetDependency objects
431
+ """
432
+ from unityflow.parser import UnityYAMLDocument
433
+
434
+ # Parse the file
435
+ doc = UnityYAMLDocument.load_auto(file_path)
436
+
437
+ # Collect all references
438
+ refs_by_guid: dict[str, list[AssetReference]] = {}
439
+
440
+ for obj in doc.objects:
441
+ for ref in extract_guid_references(obj.data):
442
+ ref.source_file_id = obj.file_id
443
+ ref.source_path = str(file_path)
444
+
445
+ if ref.guid not in refs_by_guid:
446
+ refs_by_guid[ref.guid] = []
447
+ refs_by_guid[ref.guid].append(ref)
448
+
449
+ # Build dependency list
450
+ dependencies: list[AssetDependency] = []
451
+
452
+ for guid, refs in refs_by_guid.items():
453
+ resolved_path = None
454
+ asset_type = None
455
+
456
+ if guid_index:
457
+ path = guid_index.get_path(guid)
458
+ if path:
459
+ resolved_path = path
460
+ asset_type = _classify_asset_type(path)
461
+
462
+ dep = AssetDependency(
463
+ guid=guid,
464
+ path=resolved_path,
465
+ asset_type=asset_type,
466
+ references=refs,
467
+ )
468
+ dependencies.append(dep)
469
+
470
+ # Sort by resolved status and path
471
+ dependencies.sort(key=lambda d: (not d.is_resolved, str(d.path or d.guid)))
472
+
473
+ return dependencies
474
+
475
+
476
+ def find_references_to_asset(
477
+ asset_path: Path,
478
+ search_paths: list[Path],
479
+ guid_index: GUIDIndex | None = None,
480
+ extensions: set[str] | None = None,
481
+ progress_callback: callable | None = None,
482
+ ) -> list[tuple[Path, list[AssetReference]]]:
483
+ """Find all files that reference a specific asset.
484
+
485
+ Args:
486
+ asset_path: Path to the asset to search for
487
+ search_paths: Directories to search in
488
+ guid_index: Optional pre-built GUID index
489
+ extensions: File extensions to search (default: Unity YAML extensions)
490
+ progress_callback: Optional callback for progress (current, total)
491
+
492
+ Returns:
493
+ List of (file_path, references) tuples
494
+ """
495
+ from unityflow.parser import UnityYAMLDocument
496
+
497
+ if extensions is None:
498
+ extensions = UNITY_EXTENSIONS
499
+
500
+ # Get the GUID for the asset
501
+ target_guid = None
502
+
503
+ if guid_index:
504
+ target_guid = guid_index.get_guid(asset_path)
505
+
506
+ if not target_guid:
507
+ # Try to read from .meta file
508
+ meta_path = Path(str(asset_path) + ".meta")
509
+ if meta_path.is_file():
510
+ try:
511
+ content = meta_path.read_text(encoding="utf-8")
512
+ match = META_GUID_PATTERN.search(content)
513
+ if match:
514
+ target_guid = match.group(1)
515
+ except OSError:
516
+ pass
517
+
518
+ if not target_guid:
519
+ return []
520
+
521
+ # Collect all Unity YAML files to search
522
+ files_to_search: list[Path] = []
523
+ for search_path in search_paths:
524
+ if search_path.is_file():
525
+ if search_path.suffix.lower() in extensions:
526
+ files_to_search.append(search_path)
527
+ elif search_path.is_dir():
528
+ for ext in extensions:
529
+ files_to_search.extend(search_path.rglob(f"*{ext}"))
530
+
531
+ # Remove duplicates
532
+ files_to_search = list(set(files_to_search))
533
+ total = len(files_to_search)
534
+
535
+ results: list[tuple[Path, list[AssetReference]]] = []
536
+
537
+ for i, file_path in enumerate(files_to_search):
538
+ if progress_callback:
539
+ progress_callback(i + 1, total)
540
+
541
+ try:
542
+ doc = UnityYAMLDocument.load_auto(file_path)
543
+
544
+ refs_found: list[AssetReference] = []
545
+ for obj in doc.objects:
546
+ for ref in extract_guid_references(obj.data):
547
+ if ref.guid == target_guid:
548
+ ref.source_file_id = obj.file_id
549
+ ref.source_path = str(file_path)
550
+ refs_found.append(ref)
551
+
552
+ if refs_found:
553
+ results.append((file_path, refs_found))
554
+ except Exception:
555
+ # Skip files that can't be parsed
556
+ continue
557
+
558
+ # Sort by file path
559
+ results.sort(key=lambda r: str(r[0]))
560
+
561
+ return results
562
+
563
+
564
+ def _classify_asset_type(path: Path) -> str:
565
+ """Classify an asset by its file extension.
566
+
567
+ Args:
568
+ path: Path to the asset
569
+
570
+ Returns:
571
+ Asset type classification string
572
+ """
573
+ ext = path.suffix.lower()
574
+
575
+ # Textures
576
+ texture_exts = {".png", ".jpg", ".jpeg", ".tga", ".psd", ".tiff", ".tif", ".gif", ".bmp", ".exr", ".hdr"}
577
+ if ext in texture_exts:
578
+ return "Texture"
579
+
580
+ # 3D Models
581
+ if ext in {".fbx", ".obj", ".dae", ".3ds", ".blend", ".max", ".ma", ".mb"}:
582
+ return "Model"
583
+
584
+ # Audio
585
+ if ext in {".wav", ".mp3", ".ogg", ".aiff", ".aif", ".flac", ".m4a"}:
586
+ return "Audio"
587
+
588
+ # Video
589
+ if ext in {".mp4", ".mov", ".avi", ".webm"}:
590
+ return "Video"
591
+
592
+ # Fonts
593
+ if ext in {".ttf", ".otf", ".fon"}:
594
+ return "Font"
595
+
596
+ # Shaders
597
+ if ext in {".shader", ".cginc", ".hlsl", ".glsl", ".compute"}:
598
+ return "Shader"
599
+
600
+ # Scripts
601
+ if ext in {".cs", ".js"}:
602
+ return "Script"
603
+
604
+ # Unity YAML assets
605
+ if ext in UNITY_EXTENSIONS:
606
+ return "UnityAsset"
607
+
608
+ # Native plugins
609
+ if ext in {".dll", ".so", ".dylib"}:
610
+ return "Plugin"
611
+
612
+ # Data files
613
+ if ext in {".bytes", ".txt", ".json", ".xml", ".csv"}:
614
+ return "Data"
615
+
616
+ return "Unknown"
617
+
618
+
619
+ @dataclass
620
+ class DependencyReport:
621
+ """Report of all dependencies for a file or set of files."""
622
+
623
+ source_files: list[Path]
624
+ dependencies: list[AssetDependency]
625
+ guid_index: GUIDIndex | None = None
626
+
627
+ @property
628
+ def total_dependencies(self) -> int:
629
+ return len(self.dependencies)
630
+
631
+ @property
632
+ def resolved_count(self) -> int:
633
+ return sum(1 for d in self.dependencies if d.is_resolved)
634
+
635
+ @property
636
+ def unresolved_count(self) -> int:
637
+ return sum(1 for d in self.dependencies if not d.is_resolved)
638
+
639
+ @property
640
+ def binary_count(self) -> int:
641
+ return sum(1 for d in self.dependencies if d.is_binary)
642
+
643
+ def get_by_type(self, asset_type: str) -> list[AssetDependency]:
644
+ """Get dependencies of a specific type."""
645
+ return [d for d in self.dependencies if d.asset_type == asset_type]
646
+
647
+ def get_binary_dependencies(self) -> list[AssetDependency]:
648
+ """Get only binary asset dependencies."""
649
+ return [d for d in self.dependencies if d.is_binary]
650
+
651
+ def get_unresolved(self) -> list[AssetDependency]:
652
+ """Get unresolved dependencies."""
653
+ return [d for d in self.dependencies if not d.is_resolved]
654
+
655
+ def to_dict(self) -> dict[str, Any]:
656
+ """Convert to dictionary for JSON serialization."""
657
+ deps_list = []
658
+ for dep in self.dependencies:
659
+ dep_dict = {
660
+ "guid": dep.guid,
661
+ "path": str(dep.path) if dep.path else None,
662
+ "type": dep.asset_type,
663
+ "resolved": dep.is_resolved,
664
+ "binary": dep.is_binary,
665
+ "reference_count": len(dep.references),
666
+ }
667
+ deps_list.append(dep_dict)
668
+
669
+ return {
670
+ "source_files": [str(f) for f in self.source_files],
671
+ "summary": {
672
+ "total": self.total_dependencies,
673
+ "resolved": self.resolved_count,
674
+ "unresolved": self.unresolved_count,
675
+ "binary": self.binary_count,
676
+ },
677
+ "dependencies": deps_list,
678
+ }
679
+
680
+
681
+ def analyze_dependencies(
682
+ files: list[Path],
683
+ project_root: Path | None = None,
684
+ include_packages: bool = False,
685
+ progress_callback: callable | None = None,
686
+ ) -> DependencyReport:
687
+ """Analyze dependencies for one or more Unity YAML files.
688
+
689
+ Args:
690
+ files: List of Unity YAML files to analyze
691
+ project_root: Optional project root for GUID resolution
692
+ include_packages: Whether to include Packages folder in GUID index
693
+ progress_callback: Optional callback for progress
694
+
695
+ Returns:
696
+ DependencyReport with all dependencies
697
+ """
698
+ # Find project root if not provided
699
+ if project_root is None and files:
700
+ project_root = find_unity_project_root(files[0])
701
+
702
+ # Build GUID index
703
+ guid_index = None
704
+ if project_root:
705
+ guid_index = build_guid_index(
706
+ project_root,
707
+ include_packages=include_packages,
708
+ )
709
+
710
+ # Collect all dependencies
711
+ all_deps: dict[str, AssetDependency] = {}
712
+
713
+ for file_path in files:
714
+ deps = get_file_dependencies(file_path, guid_index)
715
+ for dep in deps:
716
+ if dep.guid in all_deps:
717
+ # Merge references
718
+ all_deps[dep.guid].references.extend(dep.references)
719
+ else:
720
+ all_deps[dep.guid] = dep
721
+
722
+ # Sort dependencies
723
+ sorted_deps = sorted(
724
+ all_deps.values(), key=lambda d: (not d.is_resolved, d.asset_type or "", str(d.path or d.guid))
725
+ )
726
+
727
+ return DependencyReport(
728
+ source_files=files,
729
+ dependencies=sorted_deps,
730
+ guid_index=guid_index,
731
+ )
732
+
733
+
734
+ # ============================================================================
735
+ # GUID Cache System (SQLite-based)
736
+ # ============================================================================
737
+
738
+ CACHE_DIR_NAME = ".unityflow"
739
+ CACHE_DB_NAME = "guid_cache.db"
740
+ CACHE_VERSION = 2 # Bumped for SQLite migration
741
+
742
+ # Type alias for progress callback
743
+ ProgressCallback = Callable[[int, int], None] | None
744
+
745
+
746
+ def _parse_meta_file(meta_path: Path, project_root: Path) -> tuple[str, Path, float] | None:
747
+ """Parse a single .meta file and extract GUID with mtime.
748
+
749
+ Args:
750
+ meta_path: Path to the .meta file
751
+ project_root: Project root for relative path calculation
752
+
753
+ Returns:
754
+ Tuple of (guid, relative_path, mtime) or None if parsing fails
755
+ """
756
+ try:
757
+ # Get mtime during read to avoid second stat() call
758
+ mtime = meta_path.stat().st_mtime
759
+ content = meta_path.read_text(encoding="utf-8", errors="replace")
760
+ match = META_GUID_PATTERN.search(content)
761
+ if match:
762
+ guid = match.group(1)
763
+ asset_path = meta_path.with_suffix("")
764
+
765
+ # Store relative path from project root if possible
766
+ try:
767
+ rel_path = asset_path.relative_to(project_root)
768
+ return (guid, rel_path, mtime)
769
+ except ValueError:
770
+ return (guid, asset_path, mtime)
771
+ except (OSError, UnicodeDecodeError):
772
+ pass
773
+ return None
774
+
775
+
776
+ @dataclass
777
+ class CachedGUIDIndex:
778
+ """GUID index with SQLite-based caching for performance.
779
+
780
+ Caches GUID mappings using SQLite with WAL mode for:
781
+ - Faster queries for large projects (170k+ assets)
782
+ - Better concurrent read/write access
783
+ - Incremental updates at file level (mtime tracking)
784
+
785
+ Automatically invalidates cache when:
786
+ - Package versions change
787
+ - Cache file is missing or corrupted
788
+ - Cache version mismatch
789
+ - Individual file mtime changes (incremental update)
790
+ """
791
+
792
+ project_root: Path
793
+ _index: GUIDIndex | None = field(default=None, repr=False)
794
+ _cache_dir: Path | None = field(default=None, repr=False)
795
+ _db_lock: Lock = field(default_factory=Lock, repr=False)
796
+
797
+ def __post_init__(self):
798
+ self._cache_dir = self.project_root / CACHE_DIR_NAME
799
+
800
+ @property
801
+ def cache_db(self) -> Path:
802
+ """Path to the cache database."""
803
+ return self._cache_dir / CACHE_DB_NAME
804
+
805
+ def get_index(
806
+ self,
807
+ include_packages: bool = True,
808
+ progress_callback: ProgressCallback = None,
809
+ max_workers: int | None = None,
810
+ ) -> GUIDIndex:
811
+ """Get GUID index, using cache if available.
812
+
813
+ Args:
814
+ include_packages: Whether to include Library/PackageCache/
815
+ progress_callback: Optional callback for progress (current, total)
816
+ max_workers: Max threads for parallel processing (default: min(32, cpu_count + 4))
817
+
818
+ Returns:
819
+ GUIDIndex with GUID to path mappings
820
+ """
821
+ if self._index is not None:
822
+ return self._index
823
+
824
+ # Ensure cache directory exists
825
+ self._cache_dir.mkdir(parents=True, exist_ok=True)
826
+
827
+ # Check if we need full rebuild or incremental update
828
+ package_versions = self._get_package_versions() if include_packages else {}
829
+
830
+ if self._needs_full_rebuild(package_versions, include_packages):
831
+ # Full rebuild
832
+ self._index, db_entries = self._build_full_index(
833
+ include_packages,
834
+ progress_callback=progress_callback,
835
+ max_workers=max_workers,
836
+ )
837
+ self._save_to_db(db_entries, package_versions, include_packages)
838
+ else:
839
+ # Try incremental update
840
+ self._index = self._incremental_update(
841
+ include_packages,
842
+ progress_callback=progress_callback,
843
+ max_workers=max_workers,
844
+ )
845
+
846
+ return self._index
847
+
848
+ def invalidate(self) -> None:
849
+ """Invalidate the cache."""
850
+ self._index = None
851
+ if self.cache_db.exists():
852
+ self.cache_db.unlink()
853
+ # Also remove WAL and SHM files if they exist
854
+ wal_file = Path(str(self.cache_db) + "-wal")
855
+ shm_file = Path(str(self.cache_db) + "-shm")
856
+ if wal_file.exists():
857
+ wal_file.unlink()
858
+ if shm_file.exists():
859
+ shm_file.unlink()
860
+
861
+ def _get_db_connection(self) -> sqlite3.Connection:
862
+ """Get a database connection with WAL mode enabled."""
863
+ conn = sqlite3.connect(str(self.cache_db), timeout=30.0)
864
+ conn.execute("PRAGMA journal_mode=WAL")
865
+ conn.execute("PRAGMA synchronous=NORMAL")
866
+ conn.execute("PRAGMA cache_size=-64000") # 64MB cache
867
+ return conn
868
+
869
+ def _init_db(self, conn: sqlite3.Connection) -> None:
870
+ """Initialize database schema."""
871
+ conn.executescript(
872
+ """
873
+ CREATE TABLE IF NOT EXISTS metadata (
874
+ key TEXT PRIMARY KEY,
875
+ value TEXT
876
+ );
877
+
878
+ CREATE TABLE IF NOT EXISTS guid_cache (
879
+ guid TEXT PRIMARY KEY,
880
+ path TEXT NOT NULL,
881
+ mtime REAL NOT NULL
882
+ );
883
+
884
+ CREATE INDEX IF NOT EXISTS idx_path ON guid_cache(path);
885
+ """
886
+ )
887
+ conn.commit()
888
+
889
+ def _needs_full_rebuild(
890
+ self,
891
+ current_package_versions: dict[str, str],
892
+ include_packages: bool,
893
+ ) -> bool:
894
+ """Check if cache needs full rebuild."""
895
+ if not self.cache_db.exists():
896
+ return True
897
+
898
+ try:
899
+ with self._db_lock, self._get_db_connection() as conn:
900
+ cursor = conn.execute("SELECT value FROM metadata WHERE key = 'version'")
901
+ row = cursor.fetchone()
902
+ if not row or int(row[0]) != CACHE_VERSION:
903
+ return True
904
+
905
+ cursor = conn.execute("SELECT value FROM metadata WHERE key = 'include_packages'")
906
+ row = cursor.fetchone()
907
+ if not row or (row[0] == "1") != include_packages:
908
+ return True
909
+
910
+ cursor = conn.execute("SELECT value FROM metadata WHERE key = 'package_versions'")
911
+ row = cursor.fetchone()
912
+ cached_versions = json.loads(row[0]) if row else {}
913
+ if cached_versions != current_package_versions:
914
+ return True
915
+
916
+ return False
917
+ except (sqlite3.Error, ValueError, json.JSONDecodeError):
918
+ return True
919
+
920
+ def _save_to_db(
921
+ self,
922
+ db_entries: list[tuple[str, str, float]],
923
+ package_versions: dict[str, str],
924
+ include_packages: bool,
925
+ ) -> None:
926
+ """Save cache to SQLite database.
927
+
928
+ Args:
929
+ db_entries: List of (guid, path_str, mtime) tuples
930
+ package_versions: Dict of package name -> version
931
+ include_packages: Whether packages were included in scan
932
+ """
933
+ try:
934
+ with self._db_lock, self._get_db_connection() as conn:
935
+ self._init_db(conn)
936
+
937
+ # Clear existing data
938
+ conn.execute("DELETE FROM guid_cache")
939
+ conn.execute("DELETE FROM metadata")
940
+
941
+ # Save metadata
942
+ conn.execute("INSERT INTO metadata (key, value) VALUES (?, ?)", ("version", str(CACHE_VERSION)))
943
+ conn.execute(
944
+ "INSERT INTO metadata (key, value) VALUES (?, ?)",
945
+ ("include_packages", "1" if include_packages else "0"),
946
+ )
947
+ conn.execute(
948
+ "INSERT INTO metadata (key, value) VALUES (?, ?)",
949
+ ("package_versions", json.dumps(package_versions)),
950
+ )
951
+
952
+ # Batch insert GUIDs with mtime (already calculated during scan)
953
+ conn.executemany("INSERT OR REPLACE INTO guid_cache (guid, path, mtime) VALUES (?, ?, ?)", db_entries)
954
+ conn.commit()
955
+ except sqlite3.Error:
956
+ pass # Ignore cache write errors
957
+
958
+ def _load_from_db(self) -> GUIDIndex | None:
959
+ """Load cache from SQLite database."""
960
+ if not self.cache_db.exists():
961
+ return None
962
+
963
+ try:
964
+ index = GUIDIndex(project_root=self.project_root)
965
+ with self._db_lock, self._get_db_connection() as conn:
966
+ cursor = conn.execute("SELECT guid, path FROM guid_cache")
967
+ for guid, path_str in cursor:
968
+ path = Path(path_str)
969
+ index.guid_to_path[guid] = path
970
+ index.path_to_guid[path] = guid
971
+ return index
972
+ except sqlite3.Error:
973
+ return None
974
+
975
+ def _incremental_update(
976
+ self,
977
+ include_packages: bool,
978
+ progress_callback: ProgressCallback = None,
979
+ max_workers: int | None = None,
980
+ ) -> GUIDIndex:
981
+ """Perform incremental cache update based on mtime changes."""
982
+ # Load existing cache
983
+ index = self._load_from_db()
984
+ if index is None:
985
+ index, _ = self._build_full_index(
986
+ include_packages,
987
+ progress_callback=progress_callback,
988
+ max_workers=max_workers,
989
+ )
990
+ return index
991
+
992
+ # Get all meta files and their current mtimes
993
+ meta_files = self._collect_meta_files(include_packages)
994
+ total = len(meta_files)
995
+
996
+ # Load cached mtimes
997
+ cached_mtimes: dict[str, float] = {}
998
+ try:
999
+ with self._db_lock, self._get_db_connection() as conn:
1000
+ cursor = conn.execute("SELECT path, mtime FROM guid_cache")
1001
+ for path_str, mtime in cursor:
1002
+ cached_mtimes[path_str] = mtime
1003
+ except sqlite3.Error:
1004
+ pass
1005
+
1006
+ # Find files that need updating (new, modified, or deleted)
1007
+ current_paths = set()
1008
+ files_to_process: list[Path] = []
1009
+
1010
+ for meta_path in meta_files:
1011
+ asset_path = meta_path.with_suffix("")
1012
+ try:
1013
+ rel_path = asset_path.relative_to(self.project_root)
1014
+ except ValueError:
1015
+ rel_path = asset_path
1016
+
1017
+ path_str = str(rel_path)
1018
+ current_paths.add(path_str)
1019
+
1020
+ try:
1021
+ current_mtime = meta_path.stat().st_mtime
1022
+ except OSError:
1023
+ continue
1024
+
1025
+ cached_mtime = cached_mtimes.get(path_str, -1)
1026
+ if current_mtime != cached_mtime:
1027
+ files_to_process.append(meta_path)
1028
+
1029
+ # Find deleted files
1030
+ deleted_paths = set(cached_mtimes.keys()) - current_paths
1031
+
1032
+ # If too many changes, do full rebuild
1033
+ change_ratio = (len(files_to_process) + len(deleted_paths)) / max(total, 1)
1034
+ if change_ratio > 0.3: # More than 30% changed
1035
+ index, _ = self._build_full_index(
1036
+ include_packages,
1037
+ progress_callback=progress_callback,
1038
+ max_workers=max_workers,
1039
+ )
1040
+ return index
1041
+
1042
+ # Process changed files
1043
+ db_updates: list[tuple[str, str, float]] = []
1044
+ if files_to_process:
1045
+ updates = self._parse_meta_files(
1046
+ files_to_process,
1047
+ progress_callback=progress_callback,
1048
+ max_workers=max_workers,
1049
+ )
1050
+
1051
+ # Update index and collect DB entries
1052
+ for guid, path, mtime in updates:
1053
+ # Remove old entry if guid changed for this path
1054
+ old_guid = index.path_to_guid.get(path)
1055
+ if old_guid and old_guid != guid:
1056
+ del index.guid_to_path[old_guid]
1057
+
1058
+ index.guid_to_path[guid] = path
1059
+ index.path_to_guid[path] = guid
1060
+ db_updates.append((guid, str(path), mtime))
1061
+
1062
+ # Remove deleted files from index
1063
+ for path_str in deleted_paths:
1064
+ path = Path(path_str)
1065
+ if path in index.path_to_guid:
1066
+ guid = index.path_to_guid.pop(path)
1067
+ if guid in index.guid_to_path:
1068
+ del index.guid_to_path[guid]
1069
+
1070
+ # Update cache with changes
1071
+ self._update_db_entries(db_updates, deleted_paths)
1072
+
1073
+ return index
1074
+
1075
+ def _update_db_entries(
1076
+ self,
1077
+ db_updates: list[tuple[str, str, float]],
1078
+ deleted_paths: set[str],
1079
+ ) -> None:
1080
+ """Update specific database entries.
1081
+
1082
+ Args:
1083
+ db_updates: List of (guid, path_str, mtime) tuples to upsert
1084
+ deleted_paths: Set of path strings to delete
1085
+ """
1086
+ try:
1087
+ with self._db_lock, self._get_db_connection() as conn:
1088
+ # Delete removed entries
1089
+ if deleted_paths:
1090
+ placeholders = ",".join("?" * len(deleted_paths))
1091
+ conn.execute(f"DELETE FROM guid_cache WHERE path IN ({placeholders})", list(deleted_paths))
1092
+
1093
+ # Update changed entries (already have mtime from parse)
1094
+ if db_updates:
1095
+ conn.executemany(
1096
+ "INSERT OR REPLACE INTO guid_cache (guid, path, mtime) VALUES (?, ?, ?)", db_updates
1097
+ )
1098
+
1099
+ conn.commit()
1100
+ except sqlite3.Error:
1101
+ pass
1102
+
1103
+ def _collect_meta_files(self, include_packages: bool) -> list[Path]:
1104
+ """Collect all .meta files from relevant directories.
1105
+
1106
+ Scans:
1107
+ - Assets/ folder (always)
1108
+ - Packages/ folder (always, for embedded packages)
1109
+ - Library/PackageCache/ (when include_packages=True, for registry packages)
1110
+ - Local package paths from manifest.json file: references (when include_packages=True)
1111
+ """
1112
+ meta_files: list[Path] = []
1113
+
1114
+ # Scan Assets folder
1115
+ assets_dir = self.project_root / "Assets"
1116
+ if assets_dir.is_dir():
1117
+ meta_files.extend(assets_dir.rglob("*.meta"))
1118
+
1119
+ # Scan Packages folder (embedded packages)
1120
+ packages_dir = self.project_root / "Packages"
1121
+ if packages_dir.is_dir():
1122
+ meta_files.extend(packages_dir.rglob("*.meta"))
1123
+
1124
+ # Scan Library/PackageCache (downloaded packages from Unity registry)
1125
+ if include_packages:
1126
+ package_cache_dir = self.project_root / "Library" / "PackageCache"
1127
+ if package_cache_dir.is_dir():
1128
+ meta_files.extend(package_cache_dir.rglob("*.meta"))
1129
+
1130
+ # Scan local packages referenced via file: in manifest.json
1131
+ # e.g., "file:../../NK.Packages/com.domybest.mybox@1.7.0"
1132
+ local_package_paths = self._get_local_package_paths()
1133
+ for package_path in local_package_paths:
1134
+ if package_path.is_dir():
1135
+ meta_files.extend(package_path.rglob("*.meta"))
1136
+
1137
+ return meta_files
1138
+
1139
+ def _get_local_package_paths(self) -> list[Path]:
1140
+ """Get paths to local packages referenced via file: in manifest.json.
1141
+
1142
+ Uses the shared get_local_package_paths() utility function.
1143
+
1144
+ Returns:
1145
+ List of resolved absolute paths to local package directories.
1146
+ """
1147
+ return get_local_package_paths(self.project_root)
1148
+
1149
+ def _parse_meta_files_sequential(
1150
+ self,
1151
+ meta_files: list[Path],
1152
+ progress_callback: ProgressCallback = None,
1153
+ ) -> list[tuple[str, Path, float]]:
1154
+ """Parse meta files sequentially (faster for local storage).
1155
+
1156
+ Args:
1157
+ meta_files: List of .meta file paths to parse
1158
+ progress_callback: Optional callback for progress (current, total)
1159
+
1160
+ Returns:
1161
+ List of (guid, path, mtime) tuples
1162
+ """
1163
+ if not meta_files:
1164
+ return []
1165
+
1166
+ results: list[tuple[str, Path, float]] = []
1167
+ total = len(meta_files)
1168
+
1169
+ for i, meta_path in enumerate(meta_files):
1170
+ if progress_callback:
1171
+ progress_callback(i + 1, total)
1172
+
1173
+ result = _parse_meta_file(meta_path, self.project_root)
1174
+ if result:
1175
+ results.append(result)
1176
+
1177
+ return results
1178
+
1179
+ def _parse_meta_files_parallel(
1180
+ self,
1181
+ meta_files: list[Path],
1182
+ progress_callback: ProgressCallback = None,
1183
+ max_workers: int | None = None,
1184
+ ) -> list[tuple[str, Path, float]]:
1185
+ """Parse meta files in parallel using ThreadPoolExecutor.
1186
+
1187
+ Note: Parallel processing has significant overhead and is only
1188
+ beneficial for network storage or very slow disks. For local SSDs,
1189
+ sequential processing is typically 2-3x faster.
1190
+
1191
+ Args:
1192
+ meta_files: List of .meta file paths to parse
1193
+ progress_callback: Optional callback for progress (current, total)
1194
+ max_workers: Max threads (default: min(32, cpu_count + 4))
1195
+
1196
+ Returns:
1197
+ List of (guid, path, mtime) tuples
1198
+ """
1199
+ if not meta_files:
1200
+ return []
1201
+
1202
+ results: list[tuple[str, Path, float]] = []
1203
+ total = len(meta_files)
1204
+ completed = 0
1205
+
1206
+ if max_workers is None:
1207
+ max_workers = min(32, (os.cpu_count() or 1) + 4)
1208
+
1209
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
1210
+ futures = {
1211
+ executor.submit(_parse_meta_file, meta_path, self.project_root): meta_path for meta_path in meta_files
1212
+ }
1213
+
1214
+ for future in as_completed(futures):
1215
+ completed += 1
1216
+ if progress_callback:
1217
+ progress_callback(completed, total)
1218
+
1219
+ result = future.result()
1220
+ if result:
1221
+ results.append(result)
1222
+
1223
+ return results
1224
+
1225
+ def _parse_meta_files(
1226
+ self,
1227
+ meta_files: list[Path],
1228
+ progress_callback: ProgressCallback = None,
1229
+ max_workers: int | None = None,
1230
+ ) -> list[tuple[str, Path, float]]:
1231
+ """Parse meta files with automatic strategy selection.
1232
+
1233
+ Uses sequential processing by default (faster for local storage).
1234
+ Set max_workers > 1 to force parallel processing (useful for network storage).
1235
+
1236
+ Args:
1237
+ meta_files: List of .meta file paths to parse
1238
+ progress_callback: Optional callback for progress (current, total)
1239
+ max_workers: Set to > 1 to force parallel processing
1240
+
1241
+ Returns:
1242
+ List of (guid, path, mtime) tuples
1243
+ """
1244
+ # Use parallel only if explicitly requested with max_workers > 1
1245
+ if max_workers is not None and max_workers > 1:
1246
+ return self._parse_meta_files_parallel(
1247
+ meta_files,
1248
+ progress_callback=progress_callback,
1249
+ max_workers=max_workers,
1250
+ )
1251
+
1252
+ # Default: sequential processing (faster for local storage)
1253
+ return self._parse_meta_files_sequential(
1254
+ meta_files,
1255
+ progress_callback=progress_callback,
1256
+ )
1257
+
1258
+ def _build_full_index(
1259
+ self,
1260
+ include_packages: bool,
1261
+ progress_callback: ProgressCallback = None,
1262
+ max_workers: int | None = None,
1263
+ ) -> tuple[GUIDIndex, list[tuple[str, str, float]]]:
1264
+ """Build full GUID index by scanning directories.
1265
+
1266
+ Returns:
1267
+ Tuple of (GUIDIndex, list of (guid, path_str, mtime) for DB save)
1268
+ """
1269
+ index = GUIDIndex(project_root=self.project_root)
1270
+
1271
+ # Collect all meta files
1272
+ meta_files = self._collect_meta_files(include_packages)
1273
+
1274
+ # Parse files (sequential by default, parallel if max_workers > 1)
1275
+ results = self._parse_meta_files(
1276
+ meta_files,
1277
+ progress_callback=progress_callback,
1278
+ max_workers=max_workers,
1279
+ )
1280
+
1281
+ # Build index and DB entries from results
1282
+ db_entries: list[tuple[str, str, float]] = []
1283
+ for guid, path, mtime in results:
1284
+ index.guid_to_path[guid] = path
1285
+ index.path_to_guid[path] = guid
1286
+ db_entries.append((guid, str(path), mtime))
1287
+
1288
+ return index, db_entries
1289
+
1290
+ def _get_package_versions(self) -> dict[str, str]:
1291
+ """Get installed package versions from Library/PackageCache and manifest.json.
1292
+
1293
+ Includes:
1294
+ - Registry packages from Library/PackageCache (e.g., "com.unity.ugui@1.0.0")
1295
+ - Local packages from manifest.json file: references (e.g., "file:../../path@1.0.0")
1296
+
1297
+ This ensures cache invalidation when any package changes.
1298
+ """
1299
+ versions = {}
1300
+
1301
+ # Get versions from Library/PackageCache (registry packages)
1302
+ package_cache_dir = self.project_root / "Library" / "PackageCache"
1303
+ if package_cache_dir.is_dir():
1304
+ # Parse directory names like "com.unity.ugui@1.0.0"
1305
+ for entry in package_cache_dir.iterdir():
1306
+ if entry.is_dir() and "@" in entry.name:
1307
+ parts = entry.name.rsplit("@", 1)
1308
+ if len(parts) == 2:
1309
+ package_name, version = parts
1310
+ versions[package_name] = version
1311
+
1312
+ # Get versions from manifest.json file: references (local packages)
1313
+ manifest_path = self.project_root / "Packages" / "manifest.json"
1314
+ if manifest_path.exists():
1315
+ try:
1316
+ manifest_data = json.loads(manifest_path.read_text(encoding="utf-8"))
1317
+ dependencies = manifest_data.get("dependencies", {})
1318
+
1319
+ for dep_name, dep_value in dependencies.items():
1320
+ if isinstance(dep_value, str) and dep_value.startswith("file:"):
1321
+ # Use the full file: path as "version" to detect changes
1322
+ # e.g., "file:../../NK.Packages/pkg@1.0.0" -> track the whole path
1323
+ versions[f"local:{dep_name}"] = dep_value
1324
+ except (OSError, json.JSONDecodeError):
1325
+ pass
1326
+
1327
+ return versions
1328
+
1329
+
1330
+ def get_cached_guid_index(
1331
+ project_root: Path,
1332
+ include_packages: bool = True,
1333
+ progress_callback: ProgressCallback = None,
1334
+ max_workers: int | None = None,
1335
+ ) -> GUIDIndex:
1336
+ """Get GUID index with SQLite caching support.
1337
+
1338
+ This is the recommended way to get a GUID index for performance.
1339
+ Uses SQLite with WAL mode for:
1340
+ - Faster queries for large projects (170k+ assets)
1341
+ - Better concurrent read/write access
1342
+ - Incremental updates based on file mtime (only re-parse changed files)
1343
+
1344
+ Performance characteristics:
1345
+ - First run: Scans all .meta files and builds SQLite cache
1346
+ - Subsequent runs: Loads from cache (~2x faster than rescan)
1347
+ - Incremental updates: Only processes changed files (~1.5x faster)
1348
+
1349
+ Args:
1350
+ project_root: Path to Unity project root
1351
+ include_packages: Whether to include Library/PackageCache/
1352
+ progress_callback: Optional callback for progress (current, total)
1353
+ max_workers: Set to > 1 to force parallel processing
1354
+ (only useful for network storage; local SSDs are faster sequential)
1355
+
1356
+ Returns:
1357
+ GUIDIndex with GUID to path mappings
1358
+ """
1359
+ cache = CachedGUIDIndex(project_root=project_root)
1360
+ return cache.get_index(
1361
+ include_packages=include_packages,
1362
+ progress_callback=progress_callback,
1363
+ max_workers=max_workers,
1364
+ )
1365
+
1366
+
1367
+ # ============================================================================
1368
+ # Lazy GUID Index (Memory-Optimized)
1369
+ # ============================================================================
1370
+
1371
+
1372
+ @dataclass
1373
+ class LazyGUIDIndex:
1374
+ """Memory-efficient GUID index that queries SQLite directly.
1375
+
1376
+ Unlike GUIDIndex which loads all entries into memory, LazyGUIDIndex
1377
+ queries the SQLite database on-demand. This is ideal for large projects
1378
+ (170k+ assets) where loading the entire index would be slow and
1379
+ memory-intensive.
1380
+
1381
+ Features:
1382
+ - O(1) initialization (no upfront loading)
1383
+ - O(log N) lookups via SQLite index
1384
+ - Optional LRU cache for frequently accessed GUIDs
1385
+ - Compatible with GUIDIndex API
1386
+
1387
+ Performance characteristics:
1388
+ - Initial loading: O(1) vs O(N) for GUIDIndex
1389
+ - Memory usage: O(cache_size) vs O(N) for GUIDIndex
1390
+ - Lookup: O(log N) database query vs O(1) dict lookup
1391
+ - For typical usage patterns where only a subset of GUIDs are accessed,
1392
+ LazyGUIDIndex provides better overall performance.
1393
+
1394
+ Example:
1395
+ >>> # Use lazy index for memory efficiency
1396
+ >>> lazy_index = get_lazy_guid_index("/path/to/unity/project")
1397
+ >>> path = lazy_index.get_path("f4afdcb1cbadf954ba8b1cf465429e17")
1398
+ >>> print(path) # Assets/Scripts/PlayerController.cs
1399
+ """
1400
+
1401
+ project_root: Path
1402
+ _db_path: Path = field(init=False)
1403
+ _conn: sqlite3.Connection | None = field(default=None, repr=False)
1404
+ _cache: dict[str, Path] = field(default_factory=dict, repr=False)
1405
+ _reverse_cache: dict[Path, str] = field(default_factory=dict, repr=False)
1406
+ _cache_size: int = field(default=1000, repr=False)
1407
+ _db_lock: Lock = field(default_factory=Lock, repr=False)
1408
+
1409
+ def __post_init__(self) -> None:
1410
+ self._db_path = self.project_root / CACHE_DIR_NAME / CACHE_DB_NAME
1411
+
1412
+ def __len__(self) -> int:
1413
+ """Return the total number of entries in the database."""
1414
+ if not self._db_path.exists():
1415
+ return 0
1416
+ try:
1417
+ with self._db_lock:
1418
+ conn = self._get_connection()
1419
+ cursor = conn.execute("SELECT COUNT(*) FROM guid_cache")
1420
+ row = cursor.fetchone()
1421
+ return row[0] if row else 0
1422
+ except sqlite3.Error:
1423
+ return 0
1424
+
1425
+ def _get_connection(self) -> sqlite3.Connection:
1426
+ """Get or create a database connection."""
1427
+ if self._conn is None:
1428
+ self._conn = sqlite3.connect(str(self._db_path), timeout=30.0)
1429
+ self._conn.execute("PRAGMA journal_mode=WAL")
1430
+ self._conn.execute("PRAGMA synchronous=NORMAL")
1431
+ self._conn.execute("PRAGMA cache_size=-16000") # 16MB cache
1432
+ return self._conn
1433
+
1434
+ def _add_to_cache(self, guid: str, path: Path) -> None:
1435
+ """Add entry to LRU cache, evicting oldest if necessary."""
1436
+ if len(self._cache) >= self._cache_size:
1437
+ # Simple LRU: remove oldest entry (first inserted)
1438
+ oldest_guid = next(iter(self._cache))
1439
+ oldest_path = self._cache.pop(oldest_guid)
1440
+ self._reverse_cache.pop(oldest_path, None)
1441
+
1442
+ self._cache[guid] = path
1443
+ self._reverse_cache[path] = guid
1444
+
1445
+ def get_path(self, guid: str) -> Path | None:
1446
+ """Get the asset path for a GUID.
1447
+
1448
+ Checks LRU cache first, then queries SQLite database.
1449
+
1450
+ Args:
1451
+ guid: The GUID to look up
1452
+
1453
+ Returns:
1454
+ Asset path, or None if not found
1455
+ """
1456
+ # Check cache first
1457
+ if guid in self._cache:
1458
+ # Move to end for LRU behavior (re-insert)
1459
+ path = self._cache.pop(guid)
1460
+ self._cache[guid] = path
1461
+ return path
1462
+
1463
+ # Query database
1464
+ if not self._db_path.exists():
1465
+ return None
1466
+
1467
+ try:
1468
+ with self._db_lock:
1469
+ conn = self._get_connection()
1470
+ cursor = conn.execute("SELECT path FROM guid_cache WHERE guid = ?", (guid,))
1471
+ row = cursor.fetchone()
1472
+ if row:
1473
+ path = Path(row[0])
1474
+ self._add_to_cache(guid, path)
1475
+ return path
1476
+ except sqlite3.Error:
1477
+ pass
1478
+
1479
+ return None
1480
+
1481
+ def get_guid(self, path: Path) -> str | None:
1482
+ """Get the GUID for an asset path.
1483
+
1484
+ Args:
1485
+ path: The asset path to look up
1486
+
1487
+ Returns:
1488
+ GUID string, or None if not found
1489
+ """
1490
+ # Try both absolute and relative paths
1491
+ paths_to_check = [path]
1492
+
1493
+ # Try resolving relative to project root
1494
+ if self.project_root:
1495
+ try:
1496
+ rel_path = path.relative_to(self.project_root)
1497
+ paths_to_check.append(rel_path)
1498
+ except ValueError:
1499
+ pass
1500
+
1501
+ # Check cache first
1502
+ for p in paths_to_check:
1503
+ if p in self._reverse_cache:
1504
+ return self._reverse_cache[p]
1505
+
1506
+ # Query database
1507
+ if not self._db_path.exists():
1508
+ return None
1509
+
1510
+ try:
1511
+ with self._db_lock:
1512
+ conn = self._get_connection()
1513
+ for p in paths_to_check:
1514
+ cursor = conn.execute("SELECT guid FROM guid_cache WHERE path = ?", (str(p),))
1515
+ row = cursor.fetchone()
1516
+ if row:
1517
+ guid = row[0]
1518
+ self._add_to_cache(guid, p)
1519
+ return guid
1520
+ except sqlite3.Error:
1521
+ pass
1522
+
1523
+ return None
1524
+
1525
+ def resolve_name(self, guid: str) -> str | None:
1526
+ """Resolve a GUID to an asset name (filename without extension).
1527
+
1528
+ This is particularly useful for resolving MonoBehaviour script names
1529
+ from their m_Script GUID references.
1530
+
1531
+ Args:
1532
+ guid: The GUID to resolve
1533
+
1534
+ Returns:
1535
+ The asset name (stem), or None if GUID is not found
1536
+ """
1537
+ path = self.get_path(guid)
1538
+ if path is not None:
1539
+ return path.stem
1540
+ return None
1541
+
1542
+ def resolve_path(self, guid: str) -> Path | None:
1543
+ """Resolve a GUID to an asset path.
1544
+
1545
+ Alias for get_path() with a more descriptive name for LLM usage.
1546
+
1547
+ Args:
1548
+ guid: The GUID to resolve
1549
+
1550
+ Returns:
1551
+ The asset path, or None if GUID is not found
1552
+ """
1553
+ return self.get_path(guid)
1554
+
1555
+ def batch_resolve_names(self, guids: set[str]) -> dict[str, str]:
1556
+ """Batch resolve multiple GUIDs to asset names using a single SQL query.
1557
+
1558
+ This is significantly faster than calling resolve_name() repeatedly
1559
+ when processing many components (e.g., in build_hierarchy).
1560
+
1561
+ Performance: O(1) query instead of O(N) individual queries.
1562
+ Typical improvement: 1600ms -> 80ms for large prefabs with 100+ components.
1563
+
1564
+ Args:
1565
+ guids: Set of GUIDs to resolve
1566
+
1567
+ Returns:
1568
+ Dict mapping GUID to asset name (filename without extension).
1569
+ GUIDs that couldn't be resolved are omitted from the result.
1570
+
1571
+ Example:
1572
+ >>> names = lazy_index.batch_resolve_names({"abc123...", "def456..."})
1573
+ >>> print(names) # {"abc123...": "PlayerController", "def456...": "EnemyAI"}
1574
+ """
1575
+ if not guids:
1576
+ return {}
1577
+
1578
+ result: dict[str, str] = {}
1579
+
1580
+ # First check cache for already-resolved GUIDs
1581
+ uncached_guids: list[str] = []
1582
+ for guid in guids:
1583
+ if guid in self._cache:
1584
+ path = self._cache[guid]
1585
+ result[guid] = path.stem
1586
+ else:
1587
+ uncached_guids.append(guid)
1588
+
1589
+ # If all GUIDs were cached, return early
1590
+ if not uncached_guids:
1591
+ return result
1592
+
1593
+ # Query database for uncached GUIDs
1594
+ if not self._db_path.exists():
1595
+ return result
1596
+
1597
+ try:
1598
+ with self._db_lock:
1599
+ conn = self._get_connection()
1600
+ # Use batched queries to avoid SQL variable limit (SQLite default 999)
1601
+ batch_size = 500
1602
+ for i in range(0, len(uncached_guids), batch_size):
1603
+ batch = uncached_guids[i : i + batch_size]
1604
+ placeholders = ",".join("?" * len(batch))
1605
+ cursor = conn.execute(
1606
+ f"SELECT guid, path FROM guid_cache WHERE guid IN ({placeholders})",
1607
+ batch,
1608
+ )
1609
+ for guid, path_str in cursor:
1610
+ path = Path(path_str)
1611
+ # Add to cache
1612
+ self._add_to_cache(guid, path)
1613
+ result[guid] = path.stem
1614
+ except sqlite3.Error:
1615
+ pass
1616
+
1617
+ return result
1618
+
1619
+ def close(self) -> None:
1620
+ """Close the database connection."""
1621
+ if self._conn is not None:
1622
+ self._conn.close()
1623
+ self._conn = None
1624
+
1625
+ def clear_cache(self) -> None:
1626
+ """Clear the in-memory LRU cache."""
1627
+ self._cache.clear()
1628
+ self._reverse_cache.clear()
1629
+
1630
+ def __del__(self) -> None:
1631
+ """Clean up database connection on deletion."""
1632
+ self.close()
1633
+
1634
+
1635
+ def get_lazy_guid_index(
1636
+ project_root: Path,
1637
+ include_packages: bool = True,
1638
+ progress_callback: ProgressCallback = None,
1639
+ max_workers: int | None = None,
1640
+ cache_size: int = 1000,
1641
+ ) -> LazyGUIDIndex:
1642
+ """Get a memory-efficient lazy GUID index.
1643
+
1644
+ This function ensures the SQLite cache exists (building it if necessary)
1645
+ and returns a LazyGUIDIndex that queries the database on-demand.
1646
+
1647
+ This is the recommended approach for large projects (170k+ assets)
1648
+ where loading the entire index into memory would be slow.
1649
+
1650
+ Performance comparison with get_cached_guid_index():
1651
+ - Initial loading: O(1) vs O(N) - LazyGUIDIndex is instant
1652
+ - Memory usage: O(cache_size) vs O(N) - LazyGUIDIndex uses minimal memory
1653
+ - Lookup: O(log N) vs O(1) - GUIDIndex is faster for individual lookups
1654
+ - Overall: LazyGUIDIndex is better when accessing a subset of GUIDs
1655
+
1656
+ Args:
1657
+ project_root: Path to Unity project root
1658
+ include_packages: Whether to include Library/PackageCache/
1659
+ progress_callback: Optional callback for progress during cache build
1660
+ max_workers: Set to > 1 to force parallel processing during cache build
1661
+ cache_size: Maximum number of entries to keep in memory cache (default: 1000)
1662
+
1663
+ Returns:
1664
+ LazyGUIDIndex for memory-efficient GUID lookups
1665
+
1666
+ Example:
1667
+ >>> lazy_index = get_lazy_guid_index("/path/to/unity/project")
1668
+ >>> path = lazy_index.get_path("f4afdcb1cbadf954ba8b1cf465429e17")
1669
+ >>> name = lazy_index.resolve_name("f4afdcb1cbadf954ba8b1cf465429e17")
1670
+ """
1671
+ project_root = Path(project_root)
1672
+ cache_db = project_root / CACHE_DIR_NAME / CACHE_DB_NAME
1673
+
1674
+ # Ensure cache exists
1675
+ if not cache_db.exists():
1676
+ # Build the cache first
1677
+ cache = CachedGUIDIndex(project_root=project_root)
1678
+ cache.get_index(
1679
+ include_packages=include_packages,
1680
+ progress_callback=progress_callback,
1681
+ max_workers=max_workers,
1682
+ )
1683
+
1684
+ # Create lazy index
1685
+ lazy_index = LazyGUIDIndex(project_root=project_root)
1686
+ lazy_index._cache_size = cache_size
1687
+ return lazy_index