stravinsky 0.4.18__py3-none-any.whl → 0.4.66__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of stravinsky might be problematic. Click here for more details.

Files changed (184) hide show
  1. mcp_bridge/__init__.py +1 -1
  2. mcp_bridge/auth/__init__.py +16 -6
  3. mcp_bridge/auth/cli.py +202 -11
  4. mcp_bridge/auth/oauth.py +1 -2
  5. mcp_bridge/auth/openai_oauth.py +4 -7
  6. mcp_bridge/auth/token_store.py +0 -1
  7. mcp_bridge/cli/__init__.py +1 -1
  8. mcp_bridge/cli/install_hooks.py +503 -107
  9. mcp_bridge/cli/session_report.py +0 -3
  10. mcp_bridge/config/__init__.py +2 -2
  11. mcp_bridge/config/hook_config.py +3 -5
  12. mcp_bridge/config/rate_limits.py +108 -13
  13. mcp_bridge/hooks/HOOKS_SETTINGS.json +17 -4
  14. mcp_bridge/hooks/__init__.py +14 -4
  15. mcp_bridge/hooks/agent_reminder.py +4 -4
  16. mcp_bridge/hooks/auto_slash_command.py +5 -5
  17. mcp_bridge/hooks/budget_optimizer.py +2 -2
  18. mcp_bridge/hooks/claude_limits_hook.py +114 -0
  19. mcp_bridge/hooks/comment_checker.py +3 -4
  20. mcp_bridge/hooks/compaction.py +2 -2
  21. mcp_bridge/hooks/context.py +2 -1
  22. mcp_bridge/hooks/context_monitor.py +2 -2
  23. mcp_bridge/hooks/delegation_policy.py +85 -0
  24. mcp_bridge/hooks/directory_context.py +3 -3
  25. mcp_bridge/hooks/edit_recovery.py +3 -2
  26. mcp_bridge/hooks/edit_recovery_policy.py +49 -0
  27. mcp_bridge/hooks/empty_message_sanitizer.py +2 -2
  28. mcp_bridge/hooks/events.py +160 -0
  29. mcp_bridge/hooks/git_noninteractive.py +4 -4
  30. mcp_bridge/hooks/keyword_detector.py +8 -10
  31. mcp_bridge/hooks/manager.py +35 -22
  32. mcp_bridge/hooks/notification_hook.py +13 -6
  33. mcp_bridge/hooks/parallel_enforcement_policy.py +67 -0
  34. mcp_bridge/hooks/parallel_enforcer.py +5 -5
  35. mcp_bridge/hooks/parallel_execution.py +22 -10
  36. mcp_bridge/hooks/post_tool/parallel_validation.py +103 -0
  37. mcp_bridge/hooks/pre_compact.py +8 -9
  38. mcp_bridge/hooks/pre_tool/agent_spawn_validator.py +115 -0
  39. mcp_bridge/hooks/preemptive_compaction.py +2 -3
  40. mcp_bridge/hooks/routing_notifications.py +80 -0
  41. mcp_bridge/hooks/rules_injector.py +11 -19
  42. mcp_bridge/hooks/session_idle.py +4 -4
  43. mcp_bridge/hooks/session_notifier.py +4 -4
  44. mcp_bridge/hooks/session_recovery.py +4 -5
  45. mcp_bridge/hooks/stravinsky_mode.py +1 -1
  46. mcp_bridge/hooks/subagent_stop.py +1 -3
  47. mcp_bridge/hooks/task_validator.py +2 -2
  48. mcp_bridge/hooks/tmux_manager.py +7 -8
  49. mcp_bridge/hooks/todo_delegation.py +4 -1
  50. mcp_bridge/hooks/todo_enforcer.py +180 -10
  51. mcp_bridge/hooks/truncation_policy.py +37 -0
  52. mcp_bridge/hooks/truncator.py +1 -2
  53. mcp_bridge/metrics/cost_tracker.py +115 -0
  54. mcp_bridge/native_search.py +93 -0
  55. mcp_bridge/native_watcher.py +118 -0
  56. mcp_bridge/notifications.py +3 -4
  57. mcp_bridge/orchestrator/enums.py +11 -0
  58. mcp_bridge/orchestrator/router.py +165 -0
  59. mcp_bridge/orchestrator/state.py +32 -0
  60. mcp_bridge/orchestrator/visualization.py +14 -0
  61. mcp_bridge/orchestrator/wisdom.py +34 -0
  62. mcp_bridge/prompts/__init__.py +1 -8
  63. mcp_bridge/prompts/dewey.py +1 -1
  64. mcp_bridge/prompts/planner.py +2 -4
  65. mcp_bridge/prompts/stravinsky.py +53 -31
  66. mcp_bridge/proxy/__init__.py +0 -0
  67. mcp_bridge/proxy/client.py +70 -0
  68. mcp_bridge/proxy/model_server.py +157 -0
  69. mcp_bridge/routing/__init__.py +43 -0
  70. mcp_bridge/routing/config.py +250 -0
  71. mcp_bridge/routing/model_tiers.py +135 -0
  72. mcp_bridge/routing/provider_state.py +261 -0
  73. mcp_bridge/routing/task_classifier.py +190 -0
  74. mcp_bridge/server.py +363 -34
  75. mcp_bridge/server_tools.py +298 -6
  76. mcp_bridge/tools/__init__.py +19 -8
  77. mcp_bridge/tools/agent_manager.py +549 -799
  78. mcp_bridge/tools/background_tasks.py +13 -17
  79. mcp_bridge/tools/code_search.py +54 -51
  80. mcp_bridge/tools/continuous_loop.py +0 -1
  81. mcp_bridge/tools/dashboard.py +19 -0
  82. mcp_bridge/tools/find_code.py +296 -0
  83. mcp_bridge/tools/init.py +1 -0
  84. mcp_bridge/tools/list_directory.py +42 -0
  85. mcp_bridge/tools/lsp/__init__.py +8 -8
  86. mcp_bridge/tools/lsp/manager.py +51 -28
  87. mcp_bridge/tools/lsp/tools.py +98 -65
  88. mcp_bridge/tools/model_invoke.py +1047 -152
  89. mcp_bridge/tools/mux_client.py +75 -0
  90. mcp_bridge/tools/project_context.py +1 -2
  91. mcp_bridge/tools/query_classifier.py +132 -49
  92. mcp_bridge/tools/read_file.py +84 -0
  93. mcp_bridge/tools/replace.py +45 -0
  94. mcp_bridge/tools/run_shell_command.py +38 -0
  95. mcp_bridge/tools/search_enhancements.py +347 -0
  96. mcp_bridge/tools/semantic_search.py +677 -92
  97. mcp_bridge/tools/session_manager.py +0 -2
  98. mcp_bridge/tools/skill_loader.py +0 -1
  99. mcp_bridge/tools/task_runner.py +5 -7
  100. mcp_bridge/tools/templates.py +3 -3
  101. mcp_bridge/tools/tool_search.py +331 -0
  102. mcp_bridge/tools/write_file.py +29 -0
  103. mcp_bridge/update_manager.py +33 -37
  104. mcp_bridge/update_manager_pypi.py +6 -8
  105. mcp_bridge/utils/cache.py +82 -0
  106. mcp_bridge/utils/process.py +71 -0
  107. mcp_bridge/utils/session_state.py +51 -0
  108. mcp_bridge/utils/truncation.py +76 -0
  109. {stravinsky-0.4.18.dist-info → stravinsky-0.4.66.dist-info}/METADATA +84 -35
  110. stravinsky-0.4.66.dist-info/RECORD +198 -0
  111. {stravinsky-0.4.18.dist-info → stravinsky-0.4.66.dist-info}/entry_points.txt +1 -0
  112. stravinsky_claude_assets/HOOKS_INTEGRATION.md +316 -0
  113. stravinsky_claude_assets/agents/HOOKS.md +437 -0
  114. stravinsky_claude_assets/agents/code-reviewer.md +210 -0
  115. stravinsky_claude_assets/agents/comment_checker.md +580 -0
  116. stravinsky_claude_assets/agents/debugger.md +254 -0
  117. stravinsky_claude_assets/agents/delphi.md +495 -0
  118. stravinsky_claude_assets/agents/dewey.md +248 -0
  119. stravinsky_claude_assets/agents/explore.md +1198 -0
  120. stravinsky_claude_assets/agents/frontend.md +472 -0
  121. stravinsky_claude_assets/agents/implementation-lead.md +164 -0
  122. stravinsky_claude_assets/agents/momus.md +464 -0
  123. stravinsky_claude_assets/agents/research-lead.md +141 -0
  124. stravinsky_claude_assets/agents/stravinsky.md +730 -0
  125. stravinsky_claude_assets/commands/delphi.md +9 -0
  126. stravinsky_claude_assets/commands/dewey.md +54 -0
  127. stravinsky_claude_assets/commands/git-master.md +112 -0
  128. stravinsky_claude_assets/commands/index.md +49 -0
  129. stravinsky_claude_assets/commands/publish.md +86 -0
  130. stravinsky_claude_assets/commands/review.md +73 -0
  131. stravinsky_claude_assets/commands/str/agent_cancel.md +70 -0
  132. stravinsky_claude_assets/commands/str/agent_list.md +56 -0
  133. stravinsky_claude_assets/commands/str/agent_output.md +92 -0
  134. stravinsky_claude_assets/commands/str/agent_progress.md +74 -0
  135. stravinsky_claude_assets/commands/str/agent_retry.md +94 -0
  136. stravinsky_claude_assets/commands/str/cancel.md +51 -0
  137. stravinsky_claude_assets/commands/str/clean.md +97 -0
  138. stravinsky_claude_assets/commands/str/continue.md +38 -0
  139. stravinsky_claude_assets/commands/str/index.md +199 -0
  140. stravinsky_claude_assets/commands/str/list_watchers.md +96 -0
  141. stravinsky_claude_assets/commands/str/search.md +205 -0
  142. stravinsky_claude_assets/commands/str/start_filewatch.md +136 -0
  143. stravinsky_claude_assets/commands/str/stats.md +71 -0
  144. stravinsky_claude_assets/commands/str/stop_filewatch.md +89 -0
  145. stravinsky_claude_assets/commands/str/unwatch.md +42 -0
  146. stravinsky_claude_assets/commands/str/watch.md +45 -0
  147. stravinsky_claude_assets/commands/strav.md +53 -0
  148. stravinsky_claude_assets/commands/stravinsky.md +292 -0
  149. stravinsky_claude_assets/commands/verify.md +60 -0
  150. stravinsky_claude_assets/commands/version.md +5 -0
  151. stravinsky_claude_assets/hooks/README.md +248 -0
  152. stravinsky_claude_assets/hooks/comment_checker.py +193 -0
  153. stravinsky_claude_assets/hooks/context.py +38 -0
  154. stravinsky_claude_assets/hooks/context_monitor.py +153 -0
  155. stravinsky_claude_assets/hooks/dependency_tracker.py +73 -0
  156. stravinsky_claude_assets/hooks/edit_recovery.py +46 -0
  157. stravinsky_claude_assets/hooks/execution_state_tracker.py +68 -0
  158. stravinsky_claude_assets/hooks/notification_hook.py +103 -0
  159. stravinsky_claude_assets/hooks/notification_hook_v2.py +96 -0
  160. stravinsky_claude_assets/hooks/parallel_execution.py +241 -0
  161. stravinsky_claude_assets/hooks/parallel_reinforcement.py +106 -0
  162. stravinsky_claude_assets/hooks/parallel_reinforcement_v2.py +112 -0
  163. stravinsky_claude_assets/hooks/pre_compact.py +123 -0
  164. stravinsky_claude_assets/hooks/ralph_loop.py +173 -0
  165. stravinsky_claude_assets/hooks/session_recovery.py +263 -0
  166. stravinsky_claude_assets/hooks/stop_hook.py +89 -0
  167. stravinsky_claude_assets/hooks/stravinsky_metrics.py +164 -0
  168. stravinsky_claude_assets/hooks/stravinsky_mode.py +146 -0
  169. stravinsky_claude_assets/hooks/subagent_stop.py +98 -0
  170. stravinsky_claude_assets/hooks/todo_continuation.py +111 -0
  171. stravinsky_claude_assets/hooks/todo_delegation.py +96 -0
  172. stravinsky_claude_assets/hooks/tool_messaging.py +281 -0
  173. stravinsky_claude_assets/hooks/truncator.py +23 -0
  174. stravinsky_claude_assets/rules/deployment_safety.md +51 -0
  175. stravinsky_claude_assets/rules/integration_wiring.md +89 -0
  176. stravinsky_claude_assets/rules/pypi_deployment.md +220 -0
  177. stravinsky_claude_assets/rules/stravinsky_orchestrator.md +32 -0
  178. stravinsky_claude_assets/settings.json +152 -0
  179. stravinsky_claude_assets/skills/chrome-devtools/SKILL.md +81 -0
  180. stravinsky_claude_assets/skills/sqlite/SKILL.md +77 -0
  181. stravinsky_claude_assets/skills/supabase/SKILL.md +74 -0
  182. stravinsky_claude_assets/task_dependencies.json +34 -0
  183. stravinsky-0.4.18.dist-info/RECORD +0 -88
  184. {stravinsky-0.4.18.dist-info → stravinsky-0.4.66.dist-info}/WHEEL +0 -0
@@ -18,20 +18,27 @@ Architecture:
18
18
  - Chunking strategy: function/class level with context
19
19
  """
20
20
 
21
+ import asyncio
21
22
  import atexit
22
23
  import hashlib
23
24
  import logging
25
+ import signal
24
26
  import sys
25
27
  import threading
26
28
  from abc import ABC, abstractmethod
27
29
  from pathlib import Path
28
- from typing import Literal
30
+ from typing import TYPE_CHECKING, Literal
31
+
32
+ if TYPE_CHECKING:
33
+ import pathspec
29
34
 
30
35
  import httpx
31
36
  from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
32
37
 
33
38
  from mcp_bridge.auth.token_store import TokenStore
34
39
  from mcp_bridge.tools.query_classifier import QueryCategory, classify_query
40
+ from mcp_bridge.native_search import native_chunk_code
41
+ from mcp_bridge.native_watcher import NativeFileWatcher
35
42
 
36
43
  logger = logging.getLogger(__name__)
37
44
 
@@ -295,7 +302,10 @@ class GitIgnoreManager:
295
302
  rel_path_str = str(rel_path).replace("\\", "/")
296
303
 
297
304
  # Check against patterns
298
- return self.spec.match_file(rel_path_str)
305
+ spec = self.spec
306
+ if spec is None:
307
+ return False # No patterns loaded, nothing is ignored
308
+ return spec.match_file(rel_path_str)
299
309
  except ValueError:
300
310
  # Path is outside project - not ignored by gitignore (but may be ignored for other reasons)
301
311
  return False
@@ -925,40 +935,120 @@ class CodebaseVectorStore:
925
935
  ".toml",
926
936
  }
927
937
 
928
- # Directories to skip
929
- SKIP_DIRS = {
930
- "node_modules",
931
- ".git",
938
+ # Directories to skip (non-code related)
939
+ SKIP_DUW = {
940
+ # Python
932
941
  "__pycache__",
933
942
  ".venv",
934
943
  "venv",
935
944
  "env",
936
- "dist",
937
- "build",
938
- ".next",
939
- ".nuxt",
940
- "target",
945
+ ".env",
946
+ "virtualenv",
947
+ ".virtualenv",
941
948
  ".tox",
949
+ ".nox",
942
950
  ".pytest_cache",
943
951
  ".mypy_cache",
944
952
  ".ruff_cache",
953
+ ".pytype",
954
+ ".pyre",
955
+ "*.egg-info",
956
+ ".eggs",
957
+ "pip-wheel-metadata",
958
+ # Node.js
959
+ "node_modules",
960
+ ".npm",
961
+ ".yarn",
962
+ ".pnpm-store",
963
+ "bower_components",
964
+ # Build outputs
965
+ "dist",
966
+ "build",
967
+ "out",
968
+ "_build",
969
+ ".next",
970
+ ".nuxt",
971
+ ".output",
972
+ ".cache",
973
+ ".parcel-cache",
974
+ ".turbo",
975
+ # Version control
976
+ ".git",
977
+ ".svn",
978
+ ".hg",
979
+ # IDE/Editor
980
+ ".idea",
981
+ ".vscode",
982
+ ".vs",
983
+ # Test/coverage
945
984
  "coverage",
985
+ "htmlcov",
986
+ ".coverage",
987
+ ".nyc_output",
988
+ # Rust/Go/Java
989
+ "target",
990
+ "vendor",
991
+ "Godeps",
992
+ # Misc
946
993
  ".stravinsky",
947
994
  "scratches",
948
995
  "consoles",
949
- ".idea",
996
+ "logs",
997
+ "tmp",
998
+ "temp",
950
999
  }
951
1000
 
952
- def __init__(self, project_path: str, provider: EmbeddingProvider = "ollama"):
953
- self.project_path = Path(project_path).resolve()
954
- self.project_hash = hashlib.md5(str(self.project_path).encode()).hexdigest()[:12]
1001
+ @staticmethod
1002
+ def _normalize_project_path(path: str) -> Path:
1003
+ """
1004
+ Normalize project path to git root if available.
1005
+
1006
+ This ensures one index per repo regardless of invocation directory.
1007
+ If not a git repo, returns resolved absolute path.
1008
+ """
1009
+ import subprocess
1010
+
1011
+ resolved = Path(path).resolve()
1012
+
1013
+ # Try to find git root
1014
+ try:
1015
+ result = subprocess.run(
1016
+ ["git", "-C", str(resolved), "rev-parse", "--show-toplevel"],
1017
+ capture_output=True,
1018
+ text=True,
1019
+ timeout=2,
1020
+ check=False,
1021
+ )
1022
+ if result.returncode == 0:
1023
+ git_root = Path(result.stdout.strip())
1024
+ logger.debug(f"Normalized {resolved} → {git_root} (git root)")
1025
+ return git_root
1026
+ except (subprocess.TimeoutExpired, FileNotFoundError):
1027
+ pass
1028
+
1029
+ # Not a git repo or git not available, use resolved path
1030
+ return resolved
1031
+
1032
+ def __init__(
1033
+ self,
1034
+ project_path: str,
1035
+ provider: EmbeddingProvider = "ollama",
1036
+ base_path: Path | None = None,
1037
+ ):
1038
+ self.project_path = self._normalize_project_path(project_path)
1039
+ self.repo_name = self.project_path.name
955
1040
 
956
1041
  # Initialize embedding provider
957
1042
  self.provider_name = provider
958
1043
  self.provider = get_embedding_provider(provider)
959
1044
 
960
- # Store in user's home directory, separate by provider to avoid dimension mismatch
961
- self.db_path = Path.home() / ".stravinsky" / "vectordb" / f"{self.project_hash}_{provider}"
1045
+ # Store in provided base_path or user's home directory
1046
+ # Separate by provider to avoid dimension mismatch
1047
+ if base_path:
1048
+ self.db_path = base_path / f"{self.repo_name}_{provider}"
1049
+ else:
1050
+ self.db_path = Path.home() / ".stravinsky" / "vectordb" / f"{self.repo_name}_{provider}"
1051
+
962
1052
  self.db_path.mkdir(parents=True, exist_ok=True)
963
1053
 
964
1054
  # File lock for single-process access to ChromaDB (prevents corruption)
@@ -1073,7 +1163,7 @@ class CodebaseVectorStore:
1073
1163
  sorted_results = sorted(results, key=lambda x: x[0])
1074
1164
  return [emb for _, emb in sorted_results]
1075
1165
 
1076
- def _chunk_file(self, file_path: Path) -> list[dict]:
1166
+ async def _chunk_file(self, file_path: Path) -> list[dict]:
1077
1167
  """Split a file into chunks with metadata.
1078
1168
 
1079
1169
  Uses AST-aware chunking for Python files to respect function/class
@@ -1091,7 +1181,42 @@ class CodebaseVectorStore:
1091
1181
  rel_path = str(file_path.resolve().relative_to(self.project_path.resolve()))
1092
1182
  language = file_path.suffix.lstrip(".")
1093
1183
 
1094
- # Use AST-aware chunking for Python files
1184
+ # Try native AST-aware chunking first
1185
+ native_results = await native_chunk_code(content, language)
1186
+ if native_results:
1187
+ chunks = []
1188
+ for nc in native_results:
1189
+ start_line = nc["start_line"]
1190
+ end_line = nc["end_line"]
1191
+ chunk_text = nc["content"]
1192
+ content_hash = hashlib.md5(chunk_text.encode("utf-8")).hexdigest()[:12]
1193
+
1194
+ node_type = nc.get("node_type", "unknown")
1195
+ name = nc.get("name")
1196
+
1197
+ if name:
1198
+ header = f"File: {rel_path}\n{node_type.capitalize()}: {name}\nLines: {start_line}-{end_line}"
1199
+ else:
1200
+ header = f"File: {rel_path}\nLines: {start_line}-{end_line}"
1201
+
1202
+ document = f"{header}\n\n{chunk_text}"
1203
+
1204
+ chunks.append({
1205
+ "id": f"{rel_path}:{start_line}-{end_line}:{content_hash}",
1206
+ "document": document,
1207
+ "metadata": {
1208
+ "file_path": rel_path,
1209
+ "start_line": start_line,
1210
+ "end_line": end_line,
1211
+ "language": language,
1212
+ "node_type": node_type,
1213
+ "name": name or "",
1214
+ }
1215
+ })
1216
+ if chunks:
1217
+ return chunks
1218
+
1219
+ # Use AST-aware chunking for Python files (fallback)
1095
1220
  if language == "py":
1096
1221
  chunks = self._chunk_python_ast(content, rel_path, language)
1097
1222
  if chunks: # If AST parsing succeeded
@@ -1307,8 +1432,80 @@ class CodebaseVectorStore:
1307
1432
 
1308
1433
  return chunks
1309
1434
 
1435
+ def _load_whitelist(self) -> set[Path] | None:
1436
+ """Load whitelist from .stravinskyadd file if present.
1437
+
1438
+ File format:
1439
+ - One path per line (relative to project root)
1440
+ - Lines starting with # are comments
1441
+ - Empty lines are ignored
1442
+ - Glob patterns are supported (e.g., src/**/*.py)
1443
+ - Directories implicitly include all files within (src/ includes src/**/*.*)
1444
+
1445
+ Returns:
1446
+ Set of resolved file paths to include, or None if no whitelist file exists.
1447
+ """
1448
+ whitelist_file = self.project_path / ".stravinskyadd"
1449
+ if not whitelist_file.exists():
1450
+ return None
1451
+
1452
+ whitelist_paths: set[Path] = set()
1453
+ try:
1454
+ content = whitelist_file.read_text(encoding="utf-8")
1455
+ for line in content.splitlines():
1456
+ line = line.strip()
1457
+ # Skip empty lines and comments
1458
+ if not line or line.startswith("#"):
1459
+ continue
1460
+
1461
+ # Handle glob patterns
1462
+ if "*" in line or "?" in line:
1463
+ for matched_path in self.project_path.glob(line):
1464
+ if (
1465
+ matched_path.is_file()
1466
+ and matched_path.suffix.lower() in self.CODE_EXTENSIONS
1467
+ ):
1468
+ whitelist_paths.add(matched_path.resolve())
1469
+ else:
1470
+ target = self.project_path / line
1471
+ if target.exists():
1472
+ if target.is_file():
1473
+ # Direct file reference
1474
+ if target.suffix.lower() in self.CODE_EXTENSIONS:
1475
+ whitelist_paths.add(target.resolve())
1476
+ elif target.is_dir():
1477
+ # Directory: include all code files recursively
1478
+ for file_path in target.rglob("*"):
1479
+ if (
1480
+ file_path.is_file()
1481
+ and file_path.suffix.lower() in self.CODE_EXTENSIONS
1482
+ ):
1483
+ # Apply SKIP_DUW even within whitelisted directories
1484
+ if not any(
1485
+ skip_dir in file_path.parts for skip_dir in self.SKIP_DUW
1486
+ ):
1487
+ whitelist_paths.add(file_path.resolve())
1488
+
1489
+ logger.info(f"Loaded whitelist from .stravinskyadd: {len(whitelist_paths)} files")
1490
+ return whitelist_paths
1491
+
1492
+ except Exception as e:
1493
+ logger.warning(f"Failed to parse .stravinskyadd: {e}")
1494
+ return None
1495
+
1310
1496
  def _get_files_to_index(self) -> list[Path]:
1311
- """Get all indexable files in the project."""
1497
+ """Get all indexable files in the project.
1498
+
1499
+ If a .stravinskyadd whitelist file exists, ONLY those paths are indexed.
1500
+ Otherwise, all code files are indexed (excluding SKIP_DUW).
1501
+ """
1502
+ # Check for whitelist mode
1503
+ whitelist = self._load_whitelist()
1504
+ if whitelist is not None:
1505
+ logger.info(f"Whitelist mode: indexing {len(whitelist)} files from .stravinskyadd")
1506
+ return sorted(whitelist) # Return sorted for deterministic order
1507
+
1508
+ # Standard mode: crawl entire project
1312
1509
  files = []
1313
1510
  for file_path in self.project_path.rglob("*"):
1314
1511
  if file_path.is_file():
@@ -1345,7 +1542,7 @@ class CodebaseVectorStore:
1345
1542
  continue
1346
1543
 
1347
1544
  # Skip excluded directories
1348
- if any(skip_dir in file_path.parts for skip_dir in self.SKIP_DIRS):
1545
+ if any(skip_dir in file_path.parts for skip_dir in self.SKIP_DUW):
1349
1546
  continue
1350
1547
 
1351
1548
  # Only include code files
@@ -1374,6 +1571,38 @@ class CodebaseVectorStore:
1374
1571
  with self._cancel_lock:
1375
1572
  return self._cancel_indexing
1376
1573
 
1574
+ def _get_manifest_path(self) -> Path:
1575
+ """Get the path to the incremental indexing manifest."""
1576
+ return self.db_path / "manifest.json"
1577
+
1578
+ def _load_manifest(self) -> dict:
1579
+ """Load the indexing manifest."""
1580
+ manifest_path = self._get_manifest_path()
1581
+ if not manifest_path.exists():
1582
+ return {}
1583
+ try:
1584
+ import json
1585
+
1586
+ with open(manifest_path, "r", encoding="utf-8") as f:
1587
+ return json.load(f)
1588
+ except Exception as e:
1589
+ logger.warning(f"Failed to load manifest: {e}")
1590
+ return {}
1591
+
1592
+ def _save_manifest(self, manifest: dict) -> None:
1593
+ """Save the indexing manifest."""
1594
+ manifest_path = self._get_manifest_path()
1595
+ try:
1596
+ import json
1597
+
1598
+ # Atomic write
1599
+ temp_path = manifest_path.with_suffix(".tmp")
1600
+ with open(temp_path, "w", encoding="utf-8") as f:
1601
+ json.dump(manifest, f, indent=2)
1602
+ temp_path.replace(manifest_path)
1603
+ except Exception as e:
1604
+ logger.warning(f"Failed to save manifest: {e}")
1605
+
1377
1606
  async def index_codebase(self, force: bool = False) -> dict:
1378
1607
  """
1379
1608
  Index the entire codebase into the vector store.
@@ -1398,7 +1627,7 @@ class CodebaseVectorStore:
1398
1627
  print(f"🔍 SEMANTIC-INDEX: {self.project_path}", file=sys.stderr)
1399
1628
 
1400
1629
  # Notify reindex start (non-blocking)
1401
- notifier = None # Initialize to avoid NameError in error handlers
1630
+ notifier = None
1402
1631
  try:
1403
1632
  from mcp_bridge.notifications import get_notification_manager
1404
1633
 
@@ -1410,7 +1639,6 @@ class CodebaseVectorStore:
1410
1639
  try:
1411
1640
  if not await self.check_embedding_service():
1412
1641
  error_msg = "Embedding service not available"
1413
- # Notify error
1414
1642
  try:
1415
1643
  if notifier:
1416
1644
  await notifier.notify_reindex_error(error_msg)
@@ -1427,25 +1655,74 @@ class CodebaseVectorStore:
1427
1655
  except Exception:
1428
1656
  pass
1429
1657
 
1658
+ manifest = {}
1430
1659
  if force:
1431
- # Clear existing collection
1660
+ # Clear existing collection and manifest
1432
1661
  try:
1433
1662
  self.client.delete_collection("codebase")
1434
1663
  self._collection = None
1435
1664
  existing_ids = set()
1436
1665
  except Exception:
1437
1666
  pass
1667
+ else:
1668
+ manifest = self._load_manifest()
1438
1669
 
1439
1670
  files = self._get_files_to_index()
1440
1671
  all_chunks = []
1441
1672
  current_chunk_ids = set()
1442
1673
 
1674
+ # Track manifest updates
1675
+ new_manifest = {}
1676
+
1677
+ # Stats
1678
+ reused_files = 0
1679
+
1443
1680
  # Mark: Generate all chunks for current codebase
1444
1681
  for file_path in files:
1445
- chunks = self._chunk_file(file_path)
1682
+ str_path = str(file_path.resolve())
1683
+
1684
+ # Get file stats
1685
+ try:
1686
+ stat = file_path.stat()
1687
+ mtime = stat.st_mtime
1688
+ size = stat.st_size
1689
+ except OSError:
1690
+ continue # File might have been deleted during iteration
1691
+
1692
+ # Check manifest
1693
+ manifest_entry = manifest.get(str_path)
1694
+
1695
+ # Reuse chunks if file hasn't changed AND chunks exist in DB
1696
+ if (
1697
+ not force
1698
+ and manifest_entry
1699
+ and manifest_entry.get("mtime") == mtime
1700
+ and manifest_entry.get("size") == size
1701
+ ):
1702
+ chunk_ids = manifest_entry.get("chunk_ids", [])
1703
+
1704
+ # Verify all chunks actually exist in DB (integrity check)
1705
+ if chunk_ids and all(cid in existing_ids for cid in chunk_ids):
1706
+ current_chunk_ids.update(chunk_ids)
1707
+ new_manifest[str_path] = manifest_entry
1708
+ reused_files += 1
1709
+ continue
1710
+
1711
+ # If we get here: file changed, new, or chunks missing from DB
1712
+ chunks = await self._chunk_file(file_path)
1446
1713
  all_chunks.extend(chunks)
1714
+
1715
+ new_chunk_ids = []
1447
1716
  for c in chunks:
1448
- current_chunk_ids.add(c["id"])
1717
+ cid = c["id"]
1718
+ current_chunk_ids.add(cid)
1719
+ new_chunk_ids.append(cid)
1720
+
1721
+ # Update manifest
1722
+ new_manifest[str_path] = {"mtime": mtime, "size": size, "chunk_ids": new_chunk_ids}
1723
+
1724
+ # Save updated manifest
1725
+ self._save_manifest(new_manifest)
1449
1726
 
1450
1727
  # Sweep: Identify stale chunks to remove
1451
1728
  to_delete = existing_ids - current_chunk_ids
@@ -1464,10 +1741,10 @@ class CodebaseVectorStore:
1464
1741
  "indexed": 0,
1465
1742
  "pruned": len(to_delete),
1466
1743
  "total_files": len(files),
1467
- "message": "No new chunks to index",
1744
+ "reused_files": reused_files,
1745
+ "message": f"No new chunks to index (reused {reused_files} files)",
1468
1746
  "time_taken": round(time.time() - start_time, 1),
1469
1747
  }
1470
- # Notify completion
1471
1748
  try:
1472
1749
  if notifier:
1473
1750
  await notifier.notify_reindex_complete(stats)
@@ -1492,7 +1769,6 @@ class CodebaseVectorStore:
1492
1769
  "cancelled": True,
1493
1770
  "message": f"Cancelled after {total_indexed}/{len(chunks_to_add)} chunks",
1494
1771
  }
1495
- # Notify cancellation
1496
1772
  try:
1497
1773
  if notifier:
1498
1774
  await notifier.notify_reindex_error(
@@ -1520,11 +1796,11 @@ class CodebaseVectorStore:
1520
1796
  "indexed": total_indexed,
1521
1797
  "pruned": len(to_delete),
1522
1798
  "total_files": len(files),
1799
+ "reused_files": reused_files,
1523
1800
  "db_path": str(self.db_path),
1524
1801
  "time_taken": round(time.time() - start_time, 1),
1525
1802
  }
1526
1803
 
1527
- # Notify completion
1528
1804
  try:
1529
1805
  if notifier:
1530
1806
  await notifier.notify_reindex_complete(stats)
@@ -1536,14 +1812,11 @@ class CodebaseVectorStore:
1536
1812
  except Exception as e:
1537
1813
  error_msg = str(e)
1538
1814
  logger.error(f"Reindexing failed: {error_msg}")
1539
-
1540
- # Notify error
1541
1815
  try:
1542
1816
  if notifier:
1543
1817
  await notifier.notify_reindex_error(error_msg)
1544
1818
  except Exception as notify_error:
1545
1819
  logger.warning(f"Failed to send reindex error notification: {notify_error}")
1546
-
1547
1820
  raise
1548
1821
 
1549
1822
  async def search(
@@ -1749,6 +2022,87 @@ def _cleanup_watchers():
1749
2022
  atexit.register(_cleanup_watchers)
1750
2023
 
1751
2024
 
2025
+ def _check_index_exists(store: "CodebaseVectorStore") -> bool:
2026
+ """Check if semantic index exists for this project."""
2027
+ try:
2028
+ doc_count = store.collection.count()
2029
+ return doc_count > 0
2030
+ except Exception as e:
2031
+ logger.warning(f"Could not check index status: {e}")
2032
+ return False
2033
+
2034
+
2035
+ def _prompt_with_timeout(prompt_text: str, timeout: int = 30) -> str:
2036
+ """
2037
+ Prompt user with timeout. Returns 'n' if timeout or non-interactive.
2038
+
2039
+ Args:
2040
+ prompt_text: The prompt to display
2041
+ timeout: Timeout in seconds (default: 30)
2042
+
2043
+ Returns:
2044
+ User response or 'n' if timeout/non-interactive
2045
+ """
2046
+ # Check if stdin is interactive
2047
+ if not sys.stdin.isatty():
2048
+ return "n" # Non-interactive, skip prompt
2049
+
2050
+ # Windows doesn't support SIGALRM, so we need a different approach
2051
+ if sys.platform == "win32":
2052
+ try:
2053
+ import msvcrt
2054
+ import time
2055
+
2056
+ print(prompt_text, end="", flush=True, file=sys.stderr)
2057
+ start_time = time.time()
2058
+ response = []
2059
+
2060
+ while time.time() - start_time < timeout:
2061
+ if msvcrt.kbhit():
2062
+ char = msvcrt.getwche()
2063
+ if char in ("\r", "\n"):
2064
+ print(file=sys.stderr) # Newline after input
2065
+ return "".join(response)
2066
+ response.append(char)
2067
+ time.sleep(0.1)
2068
+
2069
+ print("\n⏱️ Timeout - skipping index creation", file=sys.stderr)
2070
+ return "n"
2071
+ except (ImportError, Exception):
2072
+ # Fallback: just use input() without timeout on Windows
2073
+ try:
2074
+ return input(prompt_text)
2075
+ except EOFError:
2076
+ return "n"
2077
+
2078
+ # Unix-like systems (Linux, macOS)
2079
+ def timeout_handler(signum, frame):
2080
+ raise TimeoutError()
2081
+
2082
+ try:
2083
+ # Save old handler
2084
+ old_handler = signal.signal(signal.SIGALRM, timeout_handler)
2085
+ signal.alarm(timeout)
2086
+ response = input(prompt_text)
2087
+ signal.alarm(0) # Cancel alarm
2088
+ # Restore old handler
2089
+ signal.signal(signal.SIGALRM, old_handler)
2090
+ return response
2091
+ except (TimeoutError, EOFError):
2092
+ signal.alarm(0) # Cancel alarm
2093
+ # Restore old handler
2094
+ try:
2095
+ signal.signal(signal.SIGALRM, old_handler)
2096
+ except Exception:
2097
+ pass
2098
+ print("\n⏱️ Timeout - skipping index creation", file=sys.stderr)
2099
+ return "n"
2100
+ except Exception as e:
2101
+ signal.alarm(0) # Cancel alarm
2102
+ logger.warning(f"Error during prompt: {e}")
2103
+ return "n"
2104
+
2105
+
1752
2106
  def get_store(project_path: str, provider: EmbeddingProvider = "ollama") -> CodebaseVectorStore:
1753
2107
  """Get or create a vector store for a project.
1754
2108
 
@@ -1794,6 +2148,58 @@ async def semantic_search(
1794
2148
  Formatted search results with file paths and code snippets.
1795
2149
  """
1796
2150
  store = get_store(project_path, provider)
2151
+
2152
+ # Check if index exists before searching
2153
+ if not _check_index_exists(store):
2154
+ print("\n⚠️ No semantic index found for this project.", file=sys.stderr)
2155
+ print(f"📁 Project: {project_path}", file=sys.stderr)
2156
+ print(f"🔍 Provider: {provider}", file=sys.stderr)
2157
+
2158
+ # Interactive prompt with timeout
2159
+ response = _prompt_with_timeout("\n🤔 Create semantic index now? [Y/n] (30s timeout): ")
2160
+
2161
+ if response.lower() in ["", "y", "yes"]:
2162
+ print("\n📋 Creating semantic index...", file=sys.stderr)
2163
+ try:
2164
+ # Call index_codebase function
2165
+ index_result = await index_codebase(project_path, provider=provider, force=False)
2166
+ print(f"✅ {index_result}", file=sys.stderr)
2167
+
2168
+ # Auto-start file watcher
2169
+ print("🔄 Starting file watcher for auto-updates...", file=sys.stderr)
2170
+ await start_file_watcher(project_path, provider)
2171
+ print("✅ File watcher started - index will auto-update on changes", file=sys.stderr)
2172
+
2173
+ except Exception as e:
2174
+ logger.error(f"Failed to create index: {e}")
2175
+ return (
2176
+ f"❌ Failed to create index: {e}\n\n"
2177
+ "**Manual fix:**\n"
2178
+ "```python\n"
2179
+ f'index_codebase(project_path="{project_path}", provider="{provider}")\n'
2180
+ "```"
2181
+ )
2182
+ else:
2183
+ return (
2184
+ "❌ Index required for semantic search.\n\n"
2185
+ "**To create index manually:**\n"
2186
+ "```python\n"
2187
+ f'index_codebase(project_path="{project_path}", provider="{provider}")\n'
2188
+ "```\n\n"
2189
+ "This indexes your codebase for natural language search. "
2190
+ "Run it once per project (takes 30s-2min depending on size)."
2191
+ )
2192
+ else:
2193
+ # Index exists, ensure watcher is running
2194
+ # We don't await this to avoid blocking search if it takes a moment
2195
+ # But for tests we might need to await it or mock it properly
2196
+ # The test expects it to be called.
2197
+ # Let's just call it. start_file_watcher is async.
2198
+ try:
2199
+ await start_file_watcher(project_path, provider)
2200
+ except Exception as e:
2201
+ logger.warning(f"Failed to auto-start watcher: {e}")
2202
+
1797
2203
  results = await store.search(
1798
2204
  query,
1799
2205
  n_results,
@@ -1810,6 +2216,17 @@ async def semantic_search(
1810
2216
  if "error" in results[0]:
1811
2217
  return f"Error: {results[0]['error']}\nHint: {results[0].get('hint', 'Check Ollama is running')}"
1812
2218
 
2219
+ # Auto-start file watcher if not already running (index exists and search succeeded)
2220
+ try:
2221
+ active_watcher = get_file_watcher(project_path)
2222
+ if active_watcher is None:
2223
+ # Index exists but no watcher - start it silently in background
2224
+ logger.info(f"Auto-starting file watcher for {project_path}")
2225
+ await start_file_watcher(project_path, provider, debounce_seconds=2.0)
2226
+ except Exception as e:
2227
+ # Don't fail the search if watcher fails to start
2228
+ logger.warning(f"Could not auto-start file watcher: {e}")
2229
+
1813
2230
  lines = [f"Found {len(results)} results for: '{query}'\n"]
1814
2231
  for i, r in enumerate(results, 1):
1815
2232
  lines.append(f"{i}. {r['file']}:{r['lines']} (relevance: {r['relevance']})")
@@ -2050,16 +2467,16 @@ def delete_index(
2050
2467
  except Exception as e:
2051
2468
  return f"❌ Error deleting all indexes: {e}"
2052
2469
 
2053
- # Generate project hash
2470
+ # Generate repo name
2054
2471
  project_path_resolved = Path(project_path).resolve()
2055
- project_hash = hashlib.md5(str(project_path_resolved).encode()).hexdigest()[:12]
2472
+ repo_name = project_path_resolved.name
2056
2473
 
2057
2474
  deleted = []
2058
2475
  errors = []
2059
2476
 
2060
2477
  if provider:
2061
2478
  # Delete specific provider index for this project
2062
- index_path = vectordb_base / f"{project_hash}_{provider}"
2479
+ index_path = vectordb_base / f"{repo_name}_{provider}"
2063
2480
  if index_path.exists():
2064
2481
  try:
2065
2482
  shutil.rmtree(index_path)
@@ -2072,7 +2489,7 @@ def delete_index(
2072
2489
  # Delete all provider indexes for this project
2073
2490
  providers: list[EmbeddingProvider] = ["ollama", "mxbai", "gemini", "openai", "huggingface"]
2074
2491
  for prov in providers:
2075
- index_path = vectordb_base / f"{project_hash}_{prov}"
2492
+ index_path = vectordb_base / f"{repo_name}_{prov}"
2076
2493
  if index_path.exists():
2077
2494
  try:
2078
2495
  shutil.rmtree(index_path)
@@ -2081,7 +2498,7 @@ def delete_index(
2081
2498
  errors.append(f"{prov}: {e}")
2082
2499
 
2083
2500
  if not deleted and not errors:
2084
- return f"⚠️ No indexes found for project: {project_path_resolved}\nProject hash: {project_hash}"
2501
+ return f"⚠️ No indexes found for project: {project_path_resolved}\nRepo name: {repo_name}"
2085
2502
 
2086
2503
  result = []
2087
2504
  if deleted:
@@ -2144,45 +2561,44 @@ async def start_file_watcher(
2144
2561
  Returns:
2145
2562
  The started CodebaseFileWatcher instance
2146
2563
  """
2147
- path = str(Path(project_path).resolve())
2564
+ normalized_path = CodebaseVectorStore._normalize_project_path(project_path)
2565
+ path_key = str(normalized_path)
2566
+
2148
2567
  with _watchers_lock:
2149
- if path not in _watchers:
2568
+ if path_key not in _watchers:
2150
2569
  store = get_store(project_path, provider)
2151
2570
 
2152
- # Check if index exists - CRITICAL: Must have index before watching
2571
+ # Check if index exists - create if missing, update if stale
2153
2572
  try:
2154
2573
  stats = store.get_stats()
2155
2574
  chunks_indexed = stats.get("chunks_indexed", 0)
2156
- if chunks_indexed == 0:
2157
- raise ValueError(
2158
- f"No semantic index found for '{path}'. "
2159
- f"Run semantic_index(project_path='{path}', provider='{provider}') "
2160
- f"before starting the file watcher."
2161
- )
2162
2575
 
2163
- # Index exists - catch up on any missed changes
2164
- print(f"📋 Catching up on changes since last index...", file=sys.stderr)
2165
- await store.index_codebase(force=False)
2166
- print(f"✅ Index updated, starting file watcher", file=sys.stderr)
2576
+ if chunks_indexed == 0:
2577
+ # No index exists - create initial index
2578
+ print("📋 No index found, creating initial index...", file=sys.stderr)
2579
+ await store.index_codebase(force=False)
2580
+ print("✅ Initial index created, starting file watcher", file=sys.stderr)
2581
+ else:
2582
+ # Index exists - catch up on any missed changes since watcher was off
2583
+ print("📋 Catching up on changes since last index...", file=sys.stderr)
2584
+ await store.index_codebase(force=False)
2585
+ print("✅ Index updated, starting file watcher", file=sys.stderr)
2167
2586
 
2168
- except ValueError:
2169
- # Re-raise ValueError (our intentional error)
2170
- raise
2171
2587
  except Exception as e:
2172
- # Collection doesn't exist or other error
2173
- raise ValueError(
2174
- f"No semantic index found for '{path}'. "
2175
- f"Run semantic_index(project_path='{path}', provider='{provider}') "
2176
- f"before starting the file watcher."
2177
- ) from e
2588
+ # Failed to index - log and create watcher anyway (it will index on file changes)
2589
+ logger.warning(f"Failed to index before starting watcher: {e}")
2590
+ print(f"⚠️ Warning: Could not index project: {e}", file=sys.stderr)
2591
+ print(
2592
+ "🔄 Starting watcher anyway - will index on first file change", file=sys.stderr
2593
+ )
2178
2594
 
2179
2595
  watcher = store.start_watching(debounce_seconds=debounce_seconds)
2180
- _watchers[path] = watcher
2596
+ _watchers[path_key] = watcher
2181
2597
  else:
2182
- watcher = _watchers[path]
2598
+ watcher = _watchers[path_key]
2183
2599
  if not watcher.is_running():
2184
2600
  watcher.start()
2185
- return _watchers[path]
2601
+ return _watchers[path_key]
2186
2602
 
2187
2603
 
2188
2604
  def stop_file_watcher(project_path: str) -> bool:
@@ -2194,12 +2610,14 @@ def stop_file_watcher(project_path: str) -> bool:
2194
2610
  Returns:
2195
2611
  True if watcher was stopped, False if no watcher was active
2196
2612
  """
2197
- path = str(Path(project_path).resolve())
2613
+ normalized_path = CodebaseVectorStore._normalize_project_path(project_path)
2614
+ path_key = str(normalized_path)
2615
+
2198
2616
  with _watchers_lock:
2199
- if path in _watchers:
2200
- watcher = _watchers[path]
2617
+ if path_key in _watchers:
2618
+ watcher = _watchers[path_key]
2201
2619
  watcher.stop()
2202
- del _watchers[path]
2620
+ del _watchers[path_key]
2203
2621
  return True
2204
2622
  return False
2205
2623
 
@@ -2213,9 +2631,11 @@ def get_file_watcher(project_path: str) -> "CodebaseFileWatcher | None":
2213
2631
  Returns:
2214
2632
  The CodebaseFileWatcher if active, None otherwise
2215
2633
  """
2216
- path = str(Path(project_path).resolve())
2634
+ normalized_path = CodebaseVectorStore._normalize_project_path(project_path)
2635
+ path_key = str(normalized_path)
2636
+
2217
2637
  with _watchers_lock:
2218
- watcher = _watchers.get(path)
2638
+ watcher = _watchers.get(path_key)
2219
2639
  if watcher is not None and watcher.is_running():
2220
2640
  return watcher
2221
2641
  return None
@@ -2281,7 +2701,7 @@ db session factory pattern"""
2281
2701
  result = await invoke_gemini(
2282
2702
  token_store=TokenStore(),
2283
2703
  prompt=prompt,
2284
- model="gemini-2.0-flash",
2704
+ model="gemini-3-flash",
2285
2705
  temperature=0.7,
2286
2706
  max_tokens=200,
2287
2707
  )
@@ -2335,7 +2755,7 @@ authentication logic"""
2335
2755
  result = await invoke_gemini(
2336
2756
  token_store=TokenStore(),
2337
2757
  prompt=prompt,
2338
- model="gemini-2.0-flash",
2758
+ model="gemini-3-flash",
2339
2759
  temperature=0.3, # Lower temperature for more consistent decomposition
2340
2760
  max_tokens=150,
2341
2761
  )
@@ -2674,6 +3094,145 @@ async def enhanced_search(
2674
3094
  # ========================
2675
3095
 
2676
3096
 
3097
+ class DedicatedIndexingWorker:
3098
+ """Single-threaded worker for all indexing operations.
3099
+
3100
+ Prevents concurrent indexing by serializing all operations through a queue.
3101
+ Uses asyncio.run() for each operation to avoid event loop reuse issues.
3102
+ """
3103
+
3104
+ def __init__(self, store: "CodebaseVectorStore"):
3105
+ """Initialize the indexing worker.
3106
+
3107
+ Args:
3108
+ store: CodebaseVectorStore instance for reindexing
3109
+ """
3110
+ import queue
3111
+
3112
+ self.store = store
3113
+ self._queue: queue.Queue = queue.Queue(maxsize=1) # Max 1 pending request (debouncing)
3114
+ self._thread: threading.Thread | None = None
3115
+ self._shutdown = threading.Event()
3116
+ self._log_file = Path.home() / ".stravinsky" / "logs" / "file_watcher.log"
3117
+ self._log_file.parent.mkdir(parents=True, exist_ok=True)
3118
+
3119
+ def start(self) -> None:
3120
+ """Start the worker thread."""
3121
+ if self._thread is not None and self._thread.is_alive():
3122
+ logger.warning("Indexing worker already running")
3123
+ return
3124
+
3125
+ self._shutdown.clear()
3126
+ self._thread = threading.Thread(
3127
+ target=self._run_worker, daemon=False, name="IndexingWorker"
3128
+ )
3129
+ self._thread.start()
3130
+ logger.info(f"Started indexing worker for {self.store.project_path}")
3131
+
3132
+ def _log_error(self, msg: str, exc: Exception | None = None):
3133
+ """Write error to log file with timestamp and full traceback."""
3134
+ import traceback
3135
+ from datetime import datetime
3136
+
3137
+ timestamp = datetime.now().isoformat()
3138
+ try:
3139
+ with open(self._log_file, "a") as f:
3140
+ f.write(f"\n{'=' * 80}\n")
3141
+ f.write(f"[{timestamp}] {msg}\n")
3142
+ if exc:
3143
+ f.write(f"Exception: {type(exc).__name__}: {exc}\n")
3144
+ f.write(traceback.format_exc())
3145
+ f.write(f"{'=' * 80}\n")
3146
+ except Exception as log_exc:
3147
+ logger.error(f"Failed to write to log file: {log_exc}")
3148
+ logger.error(f"{msg} (logged to {self._log_file})")
3149
+
3150
+ def _run_worker(self) -> None:
3151
+ """Worker thread entry point - processes queue with asyncio.run() per operation."""
3152
+ import queue
3153
+
3154
+ self._log_error(f"🟢 File watcher started for {self.store.project_path}")
3155
+
3156
+ try:
3157
+ while not self._shutdown.is_set():
3158
+ try:
3159
+ # Wait for reindex request (blocking with timeout)
3160
+ self._queue.get(timeout=0.5)
3161
+ self._queue.task_done()
3162
+
3163
+ # Use asyncio.run() for each operation (creates fresh loop)
3164
+ # This avoids "event loop already running" errors
3165
+ try:
3166
+ asyncio.run(self._do_reindex())
3167
+ self._log_error(f"✅ Reindex completed for {self.store.project_path}")
3168
+ except Exception as e:
3169
+ self._log_error(f"⚠️ Reindex failed for {self.store.project_path}", e)
3170
+
3171
+ except queue.Empty:
3172
+ continue # No work, check shutdown flag
3173
+ except Exception as e:
3174
+ self._log_error(f"⚠️ Queue processing error for {self.store.project_path}", e)
3175
+
3176
+ except Exception as e:
3177
+ self._log_error(f"⚠️ Worker thread crashed for {self.store.project_path}", e)
3178
+ finally:
3179
+ self._log_error(f"🔴 File watcher stopped for {self.store.project_path}")
3180
+
3181
+ async def _do_reindex(self) -> None:
3182
+ """Execute reindex with retry logic for ALL error types."""
3183
+ import sqlite3
3184
+
3185
+ from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
3186
+
3187
+ @retry(
3188
+ stop=stop_after_attempt(3),
3189
+ wait=wait_exponential(multiplier=1, min=2, max=10),
3190
+ retry=retry_if_exception_type(
3191
+ (
3192
+ httpx.HTTPError,
3193
+ ConnectionError,
3194
+ TimeoutError,
3195
+ sqlite3.OperationalError, # Database locked
3196
+ OSError, # File system errors
3197
+ )
3198
+ ),
3199
+ reraise=True,
3200
+ )
3201
+ async def _indexed():
3202
+ await self.store.index_codebase(force=False)
3203
+
3204
+ await _indexed()
3205
+
3206
+ def request_reindex(self, files: list[Path]) -> None:
3207
+ """Request reindex from any thread (thread-safe).
3208
+
3209
+ Args:
3210
+ files: List of files that changed (for logging only)
3211
+ """
3212
+ import queue
3213
+
3214
+ try:
3215
+ # Non-blocking put - drops if queue full (natural debouncing)
3216
+ self._queue.put_nowait("reindex")
3217
+ logger.debug(f"📥 Queued reindex for {len(files)} files: {[f.name for f in files[:5]]}")
3218
+ except queue.Full:
3219
+ # Already have pending reindex - this is fine (debouncing)
3220
+ logger.debug(f"Reindex already queued, skipping {len(files)} files")
3221
+
3222
+ def shutdown(self) -> None:
3223
+ """Graceful shutdown of worker thread."""
3224
+ if self._shutdown.is_set():
3225
+ return # Already shutting down
3226
+
3227
+ self._shutdown.set()
3228
+ if self._thread is not None and self._thread.is_alive():
3229
+ self._thread.join(timeout=10) # Wait up to 10 seconds
3230
+ if self._thread.is_alive():
3231
+ self._log_error("⚠️ Worker thread failed to stop within timeout")
3232
+ self._thread = None
3233
+ logger.info("Indexing worker shut down")
3234
+
3235
+
2677
3236
  class CodebaseFileWatcher:
2678
3237
  """Watch a project directory for file changes and trigger reindexing.
2679
3238
 
@@ -2684,6 +3243,7 @@ class CodebaseFileWatcher:
2684
3243
  - Debounces rapid changes to batch them into a single reindex
2685
3244
  - Thread-safe with daemon threads for clean shutdown
2686
3245
  - Integrates with CodebaseVectorStore for incremental indexing
3246
+ - Uses dedicated worker thread to prevent concurrent indexing
2687
3247
  """
2688
3248
 
2689
3249
  # Default debounce time in seconds
@@ -2709,6 +3269,9 @@ class CodebaseFileWatcher:
2709
3269
  # Observer and handler for watchdog
2710
3270
  self._observer = None
2711
3271
  self._event_handler = None
3272
+
3273
+ # Native watcher
3274
+ self._native_watcher: NativeFileWatcher | None = None
2712
3275
 
2713
3276
  # Thread safety
2714
3277
  self._lock = threading.Lock()
@@ -2719,10 +3282,14 @@ class CodebaseFileWatcher:
2719
3282
  self._pending_files: set[Path] = set()
2720
3283
  self._pending_lock = threading.Lock()
2721
3284
 
3285
+ # Dedicated indexing worker (prevents concurrent access)
3286
+ self._indexing_worker = DedicatedIndexingWorker(store)
3287
+
2722
3288
  def start(self) -> None:
2723
3289
  """Start watching the project directory.
2724
3290
 
2725
3291
  Creates and starts a watchdog observer in a daemon thread.
3292
+ Also starts the dedicated indexing worker thread.
2726
3293
  """
2727
3294
  with self._lock:
2728
3295
  if self._running:
@@ -2730,6 +3297,23 @@ class CodebaseFileWatcher:
2730
3297
  return
2731
3298
 
2732
3299
  try:
3300
+ # Start indexing worker first (must be running before file events arrive)
3301
+ self._indexing_worker.start()
3302
+
3303
+ # Try native watcher first
3304
+ try:
3305
+ self._native_watcher = NativeFileWatcher(
3306
+ str(self.project_path),
3307
+ on_change=lambda type, path: self._on_file_changed(Path(path))
3308
+ )
3309
+ self._native_watcher.start()
3310
+ self._running = True
3311
+ logger.info(f"Native file watcher started for {self.project_path}")
3312
+ return
3313
+ except (FileNotFoundError, Exception) as e:
3314
+ logger.info(f"Native watcher not available, falling back to watchdog: {e}")
3315
+ self._native_watcher = None
3316
+
2733
3317
  watchdog = get_watchdog()
2734
3318
  Observer = watchdog["Observer"]
2735
3319
 
@@ -2755,25 +3339,37 @@ class CodebaseFileWatcher:
2755
3339
  except Exception as e:
2756
3340
  logger.error(f"Failed to start file watcher: {e}")
2757
3341
  self._running = False
3342
+ # Clean up worker if observer failed
3343
+ self._indexing_worker.shutdown()
2758
3344
  raise
2759
3345
 
2760
3346
  def stop(self) -> None:
2761
3347
  """Stop watching the project directory.
2762
3348
 
2763
- Cancels any pending reindex timers and stops the observer.
3349
+ Cancels any pending reindex timers, stops the observer, and shuts down the indexing worker.
2764
3350
  """
2765
3351
  with self._lock:
2766
- # Cancel pending reindex
2767
- if self._pending_reindex_timer is not None:
2768
- self._pending_reindex_timer.cancel()
2769
- self._pending_reindex_timer = None
3352
+ # Cancel pending reindex timer
3353
+ with self._pending_lock:
3354
+ if self._pending_reindex_timer:
3355
+ self._pending_reindex_timer.cancel()
3356
+ self._pending_reindex_timer = None
3357
+ self._pending_files.clear()
3358
+
3359
+ # Stop native watcher
3360
+ if self._native_watcher:
3361
+ self._native_watcher.stop()
3362
+ self._native_watcher = None
2770
3363
 
2771
3364
  # Stop observer
2772
- if self._observer is not None:
3365
+ if self._observer:
2773
3366
  self._observer.stop()
2774
3367
  self._observer.join(timeout=5) # Wait up to 5 seconds for shutdown
2775
3368
  self._observer = None
2776
3369
 
3370
+ # Shutdown indexing worker
3371
+ self._indexing_worker.shutdown()
3372
+
2777
3373
  self._event_handler = None
2778
3374
  self._running = False
2779
3375
  logger.info(f"File watcher stopped for {self.project_path}")
@@ -2820,11 +3416,9 @@ class CodebaseFileWatcher:
2820
3416
  def _trigger_reindex(self) -> None:
2821
3417
  """Trigger reindexing of accumulated changed files.
2822
3418
 
2823
- This is called after the debounce period expires. It performs an
2824
- incremental reindex focusing on the changed files.
3419
+ This is called after the debounce period expires. Delegates to the
3420
+ dedicated indexing worker to prevent concurrent access.
2825
3421
  """
2826
- import asyncio
2827
-
2828
3422
  with self._pending_lock:
2829
3423
  if not self._pending_files:
2830
3424
  self._pending_reindex_timer = None
@@ -2834,17 +3428,8 @@ class CodebaseFileWatcher:
2834
3428
  self._pending_files.clear()
2835
3429
  self._pending_reindex_timer = None
2836
3430
 
2837
- # Run async reindex in a new event loop
2838
- try:
2839
- loop = asyncio.new_event_loop()
2840
- asyncio.set_event_loop(loop)
2841
- try:
2842
- loop.run_until_complete(self.store.index_codebase(force=False))
2843
- logger.debug(f"Reindexed {len(files_to_index)} changed files")
2844
- finally:
2845
- loop.close()
2846
- except Exception as e:
2847
- logger.error(f"Error during file watcher reindex: {e}")
3431
+ # Delegate to dedicated worker (prevents concurrent indexing)
3432
+ self._indexing_worker.request_reindex(files_to_index)
2848
3433
 
2849
3434
 
2850
3435
  def _create_file_change_handler_class():